From ac3b5ab852234a4fd6a09030fb259c1c6d236dd4 Mon Sep 17 00:00:00 2001
From: "tara%tequilarista.org" <>
Date: Tue, 24 Oct 2000 05:01:30 +0000
Subject: Landing duplicates stuff from gervase markham

---
 checksetup.pl   | 45 ++++++++++++++++++++++++++++++-
 collectstats.pl | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 defparams.pl    | 44 ++++++++++++++++++++++++++++++
 process_bug.cgi |  6 ++++-
 4 files changed, 177 insertions(+), 2 deletions(-)
diff --git a/checksetup.pl b/checksetup.pl
index 6f9fe1c1b..f978f8600 100755
--- a/checksetup.pl
+++ b/checksetup.pl
@@ -835,9 +835,12 @@ $table{shadowlog} =
      ts timestamp,
      reflected tinyint not null,
      command mediumtext not null,
-
      index(reflected)';
 
+# GRM
+$table{duplicates} =
+    'dupe_of mediumint(9) not null,
+     dupe mediumint(9) not null primary key';
 
 
 ###########################################################################
@@ -1745,6 +1748,46 @@ if ( CountIndexes('keywords') != 3 ) {
 
 }    
 
+# 2000-07-15 Added duplicates table so Bugzilla tracks duplicates in a better 
+# way than it used to. This code searches the comments to populate the table
+# initially. It's executed if the table is empty; if it's empty because there
+# are no dupes (as opposed to having just created the table) it won't have
+# any effect anyway, so it doesn't matter.
+
+# This should give us the number of populated rows but seems to return "5"
+# all the time <shrug>
+$dbh->prepare("SELECT * FROM duplicates");
+$sth->execute();
+
+if ($sth->fetchrow_array() == 5)
+{
+	# populate table
+	print("Populating duplicates table...\n");
+	
+	$sth = $dbh->prepare("SELECT longdescs.bug_id, thetext FROM longdescs left JOIN bugs using(bug_id) WHERE (thetext " . 
+	        "regexp 'This bug has been marked as a duplicate of') AND (resolution = 'DUPLICATE') ORDER" .
+			" BY longdescs.bug_when");
+	$sth->execute();
+
+	my %dupes;
+	my $key;
+	
+	# Because of the way hashes work, this loop removes all but the last dupe
+	# resolution found for a given bug.
+	while (my ($dupe, $dupe_of) = $sth->fetchrow_array()) {
+		$dupes{$dupe} = $dupe_of;
+	}
+
+	foreach $key (keys(%dupes))
+	{
+		$dupes{$key} =~ s/.*This bug has been marked as a duplicate of (\d{1,5}).*/$1/;
+		$dbh->do("INSERT INTO duplicates VALUES('$dupes{$key}', '$key')");
+		#										 BugItsADupeOf   Dupe
+	}
+	
+	$regenerateshadow = 1;
+}
+
 #
 # If you had to change the --TABLE-- definition in any way, then add your
 # differential change code *** A B O V E *** this comment.
diff --git a/collectstats.pl b/collectstats.pl
index 12bf9fce4..47ba0cb5e 100755
--- a/collectstats.pl
+++ b/collectstats.pl
@@ -20,9 +20,11 @@
 #
 # Contributor(s): Terry Weissman <terry@mozilla.org>,
 #                 Harrison Page <harrison@netscape.com>
+#				  Gervase Markham <gerv@gerv.net>
 
 # Run me out of cron at midnight to collect Bugzilla statistics.
 
+use DB_File;
 use diagnostics;
 use strict;
 use vars @::legal_product,
@@ -43,6 +45,8 @@ foreach (@myproducts) {
     &collect_stats ($dir, $_);
 }
 
+&calculate_dupes();
+
 sub check_data_dir {
     my $dir = shift;
 
@@ -100,6 +104,86 @@ FIN
     }
 }
 
+sub calculate_dupes {
+    SendSQL("SELECT * FROM duplicates");
+
+    my %dupes;
+    my %count;
+    my @row;
+    my $key;
+    my $changed = 1;
+
+    my $today = &today;
+
+    # Save % count here in a date-named file
+    # so we can read it back in to do changed counters
+    # First, delete it if it exists, so we don't add to the contents of an old file
+    if (-e "data/mining/dupes$today.db")
+    {
+		system("rm -f data/mining/dupes$today.db");
+    }
+   
+    dbmopen(%count, "data/mining/dupes$today.db", 0644) || die "Can't open DBM dupes file: $!";
+
+    # Create a hash with key "a bug number", value "bug which that bug is a
+    # direct dupe of" - straight from the duplicates table.
+    while (@row = FetchSQLData()) 
+    {
+	    my $dupe_of = shift @row;
+	    my $dupe = shift @row;
+	    $dupes{$dupe} = $dupe_of;
+    }
+
+    # Total up the number of bugs which are dupes of a given bug
+    # count will then have key = "bug number", 
+    # value = "number of immediate dupes of that bug".
+    foreach $key (keys(%dupes)) 
+    {
+	    my $dupe_of = $dupes{$key};
+
+	    if (!defined($count{$dupe_of}))
+	    {
+		    $count{$dupe_of} = 0;
+	    }
+
+	    $count{$dupe_of}++;
+    }   
+
+    # Now we collapse the dupe tree by iterating over %count until
+    # there is no further change.
+    while ($changed == 1)
+    {
+	    $changed = 0;
+	    foreach $key (keys(%count)) 
+	    {
+		    # if this bug is actually itself a dupe, and has a count...
+		    if (defined($dupes{$key}) && $count{$key} > 0)
+		    {
+				# add that count onto the bug it is a dupe of,
+				# and zero the count; the check is to avoid
+				# loops
+				if ($count{$dupes{$key}} != 0)
+				{
+					$count{$dupes{$key}} += $count{$key};
+					$count{$key} = 0;
+					$changed = 1;
+				}
+	 	    }
+	    }
+    }
+
+    # Remove the values for which the count is zero
+    foreach $key (keys(%count))
+    {
+	    if ($count{$key} == 0)
+	    {
+			delete $count{$key};
+	    }
+    }
+   
+    dbmclose(%count);
+}
+
 sub today {
     my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
     return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
diff --git a/defparams.pl b/defparams.pl
index 7407b65c2..cc47f4eb5 100644
--- a/defparams.pl
+++ b/defparams.pl
@@ -284,6 +284,50 @@ information about what Bugzilla is and what it can do, see
 <A HREF=\"http://www.mozilla.org/bugs/\"><B>bug pages</B></A>.");
 
 
+DefParam("mostfreqhtml",
+         "The HTML which appears at the top of the list of most-frequently-reported bugs. Use it to explain the page, set a maintainer etc.",
+         "l",
+         q{
+<br><p>
+
+<b>What are "most frequent bugs"?</b>
+
+<blockquote>The Most Frequent Bugs page lists the known open bugs which 
+are reported most frequently in recent builds of Mozilla. It is automatically
+generated from the Bugzilla database.
+This information is provided in order to assist in minimizing
+the amount of duplicate bugs entered into Bugzilla which in turn cuts down
+on development time.
+</blockquote>
+
+<b>How do I use this list?</b>
+
+<ul>
+<li>Review the most frequent bugs list.</li>
+<li>If problem is listed:</li>
+
+<ul>
+<li>Click on Bug # link to confirm that you have found the same bug and comment
+if you have additional information. Or move on with your testing
+of the product.</li>
+</ul>
+
+<li>If problem not listed:</li>
+
+<ul>
+<li>Go to the <a href="http://bugzilla.mozilla.org/query.cgi">Bugzilla Query/Search</a>
+page to try and locate a similar bug that has already been written.</li>
+<li>If you find your bug in Bugzilla, feel free to comment with any new or
+additional data you may have.</li>
+<li>If you cannot find your problem already documented in Bugzilla, go to the
+<a href="http://www.mozilla.org/qualtiy/help/bug-form.html">Bugzilla Helper</a> and post a new bug.</li>
+</ul>
+
+</ul>
+<br>
+});
+
+
 DefParam("mybugstemplate",
          "This is the URL to use to bring up a simple 'all of my bugs' list for a user.  %userid% will get replaced with the login name of a user.",
          "t",
diff --git a/process_bug.cgi b/process_bug.cgi
index 3664e427f..9f47b05de 100755
--- a/process_bug.cgi
+++ b/process_bug.cgi
@@ -498,8 +498,12 @@ SWITCH: for ($::FORM{'knob'}) {
         last SWITCH;
     };   
     /^reopen$/  && CheckonComment( "reopen" ) && do {
+		SendSQL("SELECT resolution FROM bugs WHERE bug_id = $::FORM{'id'}");
         ChangeStatus('REOPENED');
         ChangeResolution('');
+		if (FetchOneColumn() eq 'DUPLICATE') {
+			SendSQL("DELETE FROM duplicates WHERE dupe = $::FORM{'id'}");
+		}		
         last SWITCH;
     };
     /^verify$/ && CheckonComment( "verify" ) && do {
@@ -539,8 +543,8 @@ SWITCH: for ($::FORM{'knob'}) {
         if ( Param('strictvaluechecks') ) {
           CheckFormFieldDefined(\%::FORM,'comment');
         }
+		SendSQL("INSERT INTO duplicates VALUES ($num, $::FORM{'id'})");
         $::FORM{'comment'} .= "\n\n*** This bug has been marked as a duplicate of $num ***";
-
         print "<TABLE BORDER=1><TD><H2>Notation added to bug $num</H2>\n";
         system("./processmail", $num, $::FORM{'who'});
         print "<TD><A HREF=\"show_bug.cgi?id=$num\">Go To BUG# $num</A></TABLE>\n";
-- 
cgit v1.2.3-24-g4f1b