diff options
author | Frédéric Buclin <LpSolit@gmail.com> | 2011-11-19 00:57:57 +0100 |
---|---|---|
committer | Frédéric Buclin <LpSolit@gmail.com> | 2011-11-19 00:57:57 +0100 |
commit | b4e1dbdc5fb05f082be6fe240a4c316b527fdd15 (patch) | |
tree | e359ba0e65e3c1d8e5078ab6cde7c7de1dc45282 | |
parent | 8e99c55e0d31e85dd9c56399ca30590a47e9536d (diff) | |
download | bugzilla-b4e1dbdc5fb05f082be6fe240a4c316b527fdd15.tar.gz bugzilla-b4e1dbdc5fb05f082be6fe240a4c316b527fdd15.tar.xz |
Bug 703788: Improve performance of diff_arrays() with large arrays
r/a=mkanat
-rw-r--r-- | Bugzilla/Util.pm | 55 | ||||
-rw-r--r-- | t/007util.t | 17 |
2 files changed, 44 insertions, 28 deletions
diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm index 1f698f80e..b75c5c340 100644 --- a/Bugzilla/Util.pm +++ b/Bugzilla/Util.pm @@ -305,36 +305,39 @@ sub use_attachbase { sub diff_arrays { my ($old_ref, $new_ref, $attrib) = @_; - - my @old = @$old_ref; - my @new = @$new_ref; $attrib ||= 'name'; - # For each pair of (old, new) entries: - # If object arrays were passed then an attribute should be defined; - # If they're equal, set them to empty. When done, @old contains entries - # that were removed; @new contains ones that got added. - foreach my $oldv (@old) { - foreach my $newv (@new) { - next if ($newv eq '' or $oldv eq ''); - if (blessed($oldv) and blessed($newv)) { - if ($oldv->$attrib eq $newv->$attrib) { - $newv = $oldv = ''; - } - } - else { - if ($oldv eq $newv) { - $newv = $oldv = '' - } - } - } + my (%counts, %pos); + # We are going to alter the old array. + my @old = @$old_ref; + my $i = 0; + + # $counts{foo}-- means old, $counts{foo}++ means new. + # If $counts{foo} becomes positive, then we are adding new items, + # else we simply cancel one old existing item. Remaining items + # in the old list have been removed. + foreach (@old) { + next unless defined $_; + my $value = blessed($_) ? $_->$attrib : $_; + $counts{$value}--; + push @{$pos{$value}}, $i++; } - - my @removed; my @added; - @removed = grep { $_ ne '' } @old; - @added = grep { $_ ne '' } @new; - + foreach (@$new_ref) { + next unless defined $_; + my $value = blessed($_) ? $_->$attrib : $_; + if (++$counts{$value} > 0) { + # Ignore empty strings, but objects having an empty string + # as attribute are fine. + push(@added, $_) unless ($value eq '' && !blessed($_)); + } + else { + my $old_pos = shift @{$pos{$value}}; + $old[$old_pos] = undef; + } + } + # Ignore cancelled items as well as empty strings. + my @removed = grep { defined $_ && $_ ne '' } @old; return (\@removed, \@added); } diff --git a/t/007util.t b/t/007util.t index 742c2b2d2..b32a1b90c 100644 --- a/t/007util.t +++ b/t/007util.t @@ -18,7 +18,7 @@ # # Contributor(s): Zach Lipton <zach@zachlipton.com> # Max Kanat-Alexander <mkanat@bugzilla.org> - +# Frédéric Buclin <LpSolit@gmail.com> ################# #Bugzilla Test 7# @@ -26,7 +26,7 @@ use lib 't'; use Support::Files; -use Test::More tests => 13; +use Test::More tests => 15; BEGIN { use_ok(Bugzilla); @@ -72,3 +72,16 @@ foreach my $input (keys %email_strings) { is(Bugzilla::Util::email_filter($input), $email_strings{$input}, "email_filter('$input')"); } + +# diff_arrays(): +my @old_array = qw(alpha beta alpha gamma gamma beta alpha delta epsilon gamma); +my @new_array = qw(alpha alpha beta gamma epsilon delta beta delta); +# The order is not relevant when comparing both arrays for matching items, +# i.e. (foo bar) and (bar foo) are the same arrays (same items). +# But when returning data, we try to respect the initial order. +# We remove the leftmost items first, and return what's left. This means: +# Removed (in this order): gamma alpha gamma. +# Added (in this order): delta +my ($removed, $added) = diff_arrays(\@old_array, \@new_array); +is_deeply($removed, [qw(gamma alpha gamma)], 'diff_array(\@old, \@new) (check removal)'); +is_deeply($added, [qw(delta)], 'diff_array(\@old, \@new) (check addition)'); |