summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrédéric Buclin <LpSolit@gmail.com>2011-11-19 00:57:57 +0100
committerFrédéric Buclin <LpSolit@gmail.com>2011-11-19 00:57:57 +0100
commitb4e1dbdc5fb05f082be6fe240a4c316b527fdd15 (patch)
treee359ba0e65e3c1d8e5078ab6cde7c7de1dc45282
parent8e99c55e0d31e85dd9c56399ca30590a47e9536d (diff)
downloadbugzilla-b4e1dbdc5fb05f082be6fe240a4c316b527fdd15.tar.gz
bugzilla-b4e1dbdc5fb05f082be6fe240a4c316b527fdd15.tar.xz
Bug 703788: Improve performance of diff_arrays() with large arrays
r/a=mkanat
-rw-r--r--Bugzilla/Util.pm55
-rw-r--r--t/007util.t17
2 files changed, 44 insertions, 28 deletions
diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm
index 1f698f80e..b75c5c340 100644
--- a/Bugzilla/Util.pm
+++ b/Bugzilla/Util.pm
@@ -305,36 +305,39 @@ sub use_attachbase {
sub diff_arrays {
my ($old_ref, $new_ref, $attrib) = @_;
-
- my @old = @$old_ref;
- my @new = @$new_ref;
$attrib ||= 'name';
- # For each pair of (old, new) entries:
- # If object arrays were passed then an attribute should be defined;
- # If they're equal, set them to empty. When done, @old contains entries
- # that were removed; @new contains ones that got added.
- foreach my $oldv (@old) {
- foreach my $newv (@new) {
- next if ($newv eq '' or $oldv eq '');
- if (blessed($oldv) and blessed($newv)) {
- if ($oldv->$attrib eq $newv->$attrib) {
- $newv = $oldv = '';
- }
- }
- else {
- if ($oldv eq $newv) {
- $newv = $oldv = ''
- }
- }
- }
+ my (%counts, %pos);
+ # We are going to alter the old array.
+ my @old = @$old_ref;
+ my $i = 0;
+
+ # $counts{foo}-- means old, $counts{foo}++ means new.
+ # If $counts{foo} becomes positive, then we are adding new items,
+ # else we simply cancel one old existing item. Remaining items
+ # in the old list have been removed.
+ foreach (@old) {
+ next unless defined $_;
+ my $value = blessed($_) ? $_->$attrib : $_;
+ $counts{$value}--;
+ push @{$pos{$value}}, $i++;
}
-
- my @removed;
my @added;
- @removed = grep { $_ ne '' } @old;
- @added = grep { $_ ne '' } @new;
-
+ foreach (@$new_ref) {
+ next unless defined $_;
+ my $value = blessed($_) ? $_->$attrib : $_;
+ if (++$counts{$value} > 0) {
+ # Ignore empty strings, but objects having an empty string
+ # as attribute are fine.
+ push(@added, $_) unless ($value eq '' && !blessed($_));
+ }
+ else {
+ my $old_pos = shift @{$pos{$value}};
+ $old[$old_pos] = undef;
+ }
+ }
+ # Ignore cancelled items as well as empty strings.
+ my @removed = grep { defined $_ && $_ ne '' } @old;
return (\@removed, \@added);
}
diff --git a/t/007util.t b/t/007util.t
index 742c2b2d2..b32a1b90c 100644
--- a/t/007util.t
+++ b/t/007util.t
@@ -18,7 +18,7 @@
#
# Contributor(s): Zach Lipton <zach@zachlipton.com>
# Max Kanat-Alexander <mkanat@bugzilla.org>
-
+# Frédéric Buclin <LpSolit@gmail.com>
#################
#Bugzilla Test 7#
@@ -26,7 +26,7 @@
use lib 't';
use Support::Files;
-use Test::More tests => 13;
+use Test::More tests => 15;
BEGIN {
use_ok(Bugzilla);
@@ -72,3 +72,16 @@ foreach my $input (keys %email_strings) {
is(Bugzilla::Util::email_filter($input), $email_strings{$input},
"email_filter('$input')");
}
+
+# diff_arrays():
+my @old_array = qw(alpha beta alpha gamma gamma beta alpha delta epsilon gamma);
+my @new_array = qw(alpha alpha beta gamma epsilon delta beta delta);
+# The order is not relevant when comparing both arrays for matching items,
+# i.e. (foo bar) and (bar foo) are the same arrays (same items).
+# But when returning data, we try to respect the initial order.
+# We remove the leftmost items first, and return what's left. This means:
+# Removed (in this order): gamma alpha gamma.
+# Added (in this order): delta
+my ($removed, $added) = diff_arrays(\@old_array, \@new_array);
+is_deeply($removed, [qw(gamma alpha gamma)], 'diff_array(\@old, \@new) (check removal)');
+is_deeply($added, [qw(delta)], 'diff_array(\@old, \@new) (check addition)');