From 3972541751851348ee9895c841cfc9ff2919a608 Mon Sep 17 00:00:00 2001
From: Frédéric Buclin <LpSolit@gmail.com>
Date: Sat, 19 Nov 2011 01:00:50 +0100
Subject: Bug 703788: Improve performance of diff_arrays() with large arrays
 r/a=mkanat

---
 Bugzilla/Util.pm | 55 +++++++++++++++++++++++++++++--------------------------
 t/007util.t      | 17 +++++++++++++++--
 2 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm
index ac6848bfa..b3f8a1ce0 100644
--- a/Bugzilla/Util.pm
+++ b/Bugzilla/Util.pm
@@ -308,36 +308,39 @@ sub use_attachbase {
 
 sub diff_arrays {
     my ($old_ref, $new_ref, $attrib) = @_;
-
-    my @old = @$old_ref;
-    my @new = @$new_ref;
     $attrib ||= 'name';
 
-    # For each pair of (old, new) entries:
-    # If object arrays were passed then an attribute should be defined;
-    # If they're equal, set them to empty. When done, @old contains entries
-    # that were removed; @new contains ones that got added.
-    foreach my $oldv (@old) {
-        foreach my $newv (@new) {
-            next if ($newv eq '' or $oldv eq '');
-            if (blessed($oldv) and blessed($newv)) {
-                if ($oldv->$attrib eq $newv->$attrib) {
-                    $newv = $oldv = '';
-                }
-            }
-            else {
-                if ($oldv eq $newv) {
-                    $newv = $oldv = ''
-                }
-            }
-        }
+    my (%counts, %pos);
+    # We are going to alter the old array.
+    my @old = @$old_ref;
+    my $i = 0;
+
+    # $counts{foo}-- means old, $counts{foo}++ means new.
+    # If $counts{foo} becomes positive, then we are adding new items,
+    # else we simply cancel one old existing item. Remaining items
+    # in the old list have been removed.
+    foreach (@old) {
+        next unless defined $_;
+        my $value = blessed($_) ? $_->$attrib : $_;
+        $counts{$value}--;
+        push @{$pos{$value}}, $i++;
     }
-
-    my @removed;
     my @added;
-    @removed = grep { $_ ne '' } @old;
-    @added   = grep { $_ ne '' } @new;
-
+    foreach (@$new_ref) {
+        next unless defined $_;
+        my $value = blessed($_) ? $_->$attrib : $_;
+        if (++$counts{$value} > 0) {
+            # Ignore empty strings, but objects having an empty string
+            # as attribute are fine.
+            push(@added, $_) unless ($value eq '' && !blessed($_));
+        }
+        else {
+            my $old_pos = shift @{$pos{$value}};
+            $old[$old_pos] = undef;
+        }
+    }
+    # Ignore cancelled items as well as empty strings.
+    my @removed = grep { defined $_ && $_ ne '' } @old;
     return (\@removed, \@added);
 }
 
diff --git a/t/007util.t b/t/007util.t
index 742c2b2d2..b32a1b90c 100644
--- a/t/007util.t
+++ b/t/007util.t
@@ -18,7 +18,7 @@
 # 
 # Contributor(s): Zach Lipton <zach@zachlipton.com>
 #                 Max Kanat-Alexander <mkanat@bugzilla.org>
-
+#                 Frédéric Buclin <LpSolit@gmail.com>
 
 #################
 #Bugzilla Test 7#
@@ -26,7 +26,7 @@
 
 use lib 't';
 use Support::Files;
-use Test::More tests => 13;
+use Test::More tests => 15;
 
 BEGIN { 
     use_ok(Bugzilla);
@@ -72,3 +72,16 @@ foreach my $input (keys %email_strings) {
     is(Bugzilla::Util::email_filter($input), $email_strings{$input}, 
        "email_filter('$input')");
 }
+
+# diff_arrays():
+my @old_array = qw(alpha beta alpha gamma gamma beta alpha delta epsilon gamma);
+my @new_array = qw(alpha alpha beta gamma epsilon delta beta delta);
+# The order is not relevant when comparing both arrays for matching items,
+# i.e. (foo bar) and (bar foo) are the same arrays (same items).
+# But when returning data, we try to respect the initial order.
+# We remove the leftmost items first, and return what's left. This means:
+# Removed (in this order): gamma alpha gamma.
+# Added (in this order): delta
+my ($removed, $added) = diff_arrays(\@old_array, \@new_array);
+is_deeply($removed, [qw(gamma alpha gamma)], 'diff_array(\@old, \@new) (check removal)');
+is_deeply($added, [qw(delta)], 'diff_array(\@old, \@new) (check addition)');
-- 
cgit v1.2.3-24-g4f1b