From 3972541751851348ee9895c841cfc9ff2919a608 Mon Sep 17 00:00:00 2001 From: Frédéric Buclin Date: Sat, 19 Nov 2011 01:00:50 +0100 Subject: Bug 703788: Improve performance of diff_arrays() with large arrays r/a=mkanat --- Bugzilla/Util.pm | 55 +++++++++++++++++++++++++++++-------------------------- t/007util.t | 17 +++++++++++++++-- 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm index ac6848bfa..b3f8a1ce0 100644 --- a/Bugzilla/Util.pm +++ b/Bugzilla/Util.pm @@ -308,36 +308,39 @@ sub use_attachbase { sub diff_arrays { my ($old_ref, $new_ref, $attrib) = @_; - - my @old = @$old_ref; - my @new = @$new_ref; $attrib ||= 'name'; - # For each pair of (old, new) entries: - # If object arrays were passed then an attribute should be defined; - # If they're equal, set them to empty. When done, @old contains entries - # that were removed; @new contains ones that got added. - foreach my $oldv (@old) { - foreach my $newv (@new) { - next if ($newv eq '' or $oldv eq ''); - if (blessed($oldv) and blessed($newv)) { - if ($oldv->$attrib eq $newv->$attrib) { - $newv = $oldv = ''; - } - } - else { - if ($oldv eq $newv) { - $newv = $oldv = '' - } - } - } + my (%counts, %pos); + # We are going to alter the old array. + my @old = @$old_ref; + my $i = 0; + + # $counts{foo}-- means old, $counts{foo}++ means new. + # If $counts{foo} becomes positive, then we are adding new items, + # else we simply cancel one old existing item. Remaining items + # in the old list have been removed. + foreach (@old) { + next unless defined $_; + my $value = blessed($_) ? $_->$attrib : $_; + $counts{$value}--; + push @{$pos{$value}}, $i++; } - - my @removed; my @added; - @removed = grep { $_ ne '' } @old; - @added = grep { $_ ne '' } @new; - + foreach (@$new_ref) { + next unless defined $_; + my $value = blessed($_) ? $_->$attrib : $_; + if (++$counts{$value} > 0) { + # Ignore empty strings, but objects having an empty string + # as attribute are fine. + push(@added, $_) unless ($value eq '' && !blessed($_)); + } + else { + my $old_pos = shift @{$pos{$value}}; + $old[$old_pos] = undef; + } + } + # Ignore cancelled items as well as empty strings. + my @removed = grep { defined $_ && $_ ne '' } @old; return (\@removed, \@added); } diff --git a/t/007util.t b/t/007util.t index 742c2b2d2..b32a1b90c 100644 --- a/t/007util.t +++ b/t/007util.t @@ -18,7 +18,7 @@ # # Contributor(s): Zach Lipton # Max Kanat-Alexander - +# Frédéric Buclin ################# #Bugzilla Test 7# @@ -26,7 +26,7 @@ use lib 't'; use Support::Files; -use Test::More tests => 13; +use Test::More tests => 15; BEGIN { use_ok(Bugzilla); @@ -72,3 +72,16 @@ foreach my $input (keys %email_strings) { is(Bugzilla::Util::email_filter($input), $email_strings{$input}, "email_filter('$input')"); } + +# diff_arrays(): +my @old_array = qw(alpha beta alpha gamma gamma beta alpha delta epsilon gamma); +my @new_array = qw(alpha alpha beta gamma epsilon delta beta delta); +# The order is not relevant when comparing both arrays for matching items, +# i.e. (foo bar) and (bar foo) are the same arrays (same items). +# But when returning data, we try to respect the initial order. +# We remove the leftmost items first, and return what's left. This means: +# Removed (in this order): gamma alpha gamma. +# Added (in this order): delta +my ($removed, $added) = diff_arrays(\@old_array, \@new_array); +is_deeply($removed, [qw(gamma alpha gamma)], 'diff_array(\@old, \@new) (check removal)'); +is_deeply($added, [qw(delta)], 'diff_array(\@old, \@new) (check addition)'); -- cgit v1.2.3-24-g4f1b