summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjustdave%syndicomm.com <>2001-04-07 03:19:44 +0200
committerjustdave%syndicomm.com <>2001-04-07 03:19:44 +0200
commit6ed055cfc3fb1f7821fdfbccbda100a489f57ffa (patch)
tree3ac41f59af043caf91c74107fa83f996d6f5d5b6
parent9818692e383d603ed728df6c16c02150854b6d3c (diff)
downloadbugzilla-6ed055cfc3fb1f7821fdfbccbda100a489f57ffa.tar.gz
bugzilla-6ed055cfc3fb1f7821fdfbccbda100a489f57ffa.tar.xz
Fix for bug 72721 (duplicates.cgi performs poorly with lots of bugs) and bug 69054 (DB_File not portable): dependence on DB_File removed, now uses AnyDBM_File which comes standard with Perl. Duplicates.cgi now runs its queries against the shadow database if it's available, among many other improvements.
Patch by gervase.markham@univ.ox.ac.uk (Gervase Markham) r= justdave
-rwxr-xr-xcollectstats.pl15
-rw-r--r--defparams.pl8
-rwxr-xr-xduplicates.cgi213
3 files changed, 147 insertions, 89 deletions
diff --git a/collectstats.pl b/collectstats.pl
index e645165e1..d6a97e4e2 100755
--- a/collectstats.pl
+++ b/collectstats.pl
@@ -24,7 +24,7 @@
# Run me out of cron at midnight to collect Bugzilla statistics.
-use DB_File;
+use AnyDBM_File;
use diagnostics;
use strict;
use vars @::legal_product;
@@ -125,16 +125,16 @@ sub calculate_dupes {
my $key;
my $changed = 1;
- my $today = &today;
+ my $today = &today_dash;
# Save % count here in a date-named file
# so we can read it back in to do changed counters
# First, delete it if it exists, so we don't add to the contents of an old file
- if (-e "data/mining/dupes$today.db") {
- system("rm -f data/mining/dupes$today.db");
+ if (-e "data/mining/dupes$today") {
+ system("rm -f data/mining/dupes$today");
}
- dbmopen(%count, "data/mining/dupes$today.db", 0644) || die "Can't open DBM dupes file: $!";
+ dbmopen(%count, "data/mining/dupes$today", 0644) || die "Can't open DBM dupes file: $!";
# Create a hash with key "a bug number", value "bug which that bug is a
# direct dupe of" - straight from the duplicates table.
@@ -194,3 +194,8 @@ sub today {
return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
}
+sub today_dash {
+ my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
+ return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
+}
+
diff --git a/defparams.pl b/defparams.pl
index c384eb973..ff60338b0 100644
--- a/defparams.pl
+++ b/defparams.pl
@@ -120,10 +120,6 @@ sub check_shadowdb {
# t -- A short text entry field (suitable for a single line)
# l -- A long text field (suitable for many lines)
# b -- A boolean value (either 1 or 0)
-# i -- An integer.
-# defenum -- This param defines an enum that defines a column in one of
-# the database tables. The name of the parameter is of the form
-# "tablename.columnname".
DefParam("maintainer",
"The email address of the person who maintains this installation of Bugzilla.",
@@ -334,6 +330,10 @@ additional data you may have.</li>
<br>
});
+DefParam("mostfreqthreshold",
+ "The minimum number of duplicates a bug needs to show up on the <A HREF=\"duplicates.cgi\">most frequently reported bugs page</a>. If you have a large database and this page takes a long time to load, try increasing this number.",
+ "t",
+ "2");
DefParam("mybugstemplate",
"This is the URL to use to bring up a simple 'all of my bugs' list for a user. %userid% will get replaced with the login name of a user.",
diff --git a/duplicates.cgi b/duplicates.cgi
index 6bb4e20ab..03a366763 100755
--- a/duplicates.cgi
+++ b/duplicates.cgi
@@ -25,82 +25,119 @@
use diagnostics;
use strict;
use CGI "param";
-use DB_File;
+use AnyDBM_File;
require "globals.pl";
require "CGI.pl";
-ConnectToDatabase();
+ConnectToDatabase(1);
GetVersionTable();
+my %dbmcount;
my %count;
my $dobefore = 0;
my $before = "";
my %before;
-my $changedsince;
-my $maxrows = 500; # arbitrary limit on max number of rows
+# Get params from URL
-my $today = &days_ago(0);
+my $changedsince = 7; # default one week
+my $maxrows = 100; # arbitrary limit on max number of rows
+my $sortby = "dup_count"; # default to sorting by dup count
-# Open today's record of dupes
-if (-e "data/mining/dupes$today.db")
-{
- dbmopen(%count, "data/mining/dupes${today}.db", 0644) || die "Can't open today's dupes file: $!";
-}
-else
+if (defined(param("sortby")))
{
- # Try yesterday's, then (in case today's hasn't been created yet) :-)
- $today = &days_ago(1);
- if (-e "data/mining/dupes$today.db")
- {
- dbmopen(%count, "data/mining/dupes${today}.db", 0644) || die "Can't open yesterday's dupes file: $!";
- }
- else
- {
- die "There are no duplicate statistics for today or yesterday.";
- }
+ $sortby = param("sortby");
}
# Check for changedsince param, and see if it's a positive integer
if (defined(param("changedsince")) && param("changedsince") =~ /^\d{1,4}$/)
{
- $changedsince = param("changedsince");
+ $changedsince = param("changedsince");
}
-else
+
+# check for max rows param, and see if it's a positive integer
+if (defined(param("maxrows")) && param("maxrows") =~ /^\d{1,4}$/)
{
- # Otherwise, default to one week
- $changedsince = "7";
+ $maxrows = param("maxrows");
}
-$before = &days_ago($changedsince);
+# Start the page
+print "Content-type: text/html\n";
+print "\n";
+PutHeader("Most Frequently Reported Bugs");
-# check for max rows parameter
-if (defined(param("maxrows")) && param("maxrows") =~ /^\d{1,4}$/)
+# Open today's record of dupes
+my $today = &days_ago(0);
+
+if (-e "data/mining/dupes$today.db")
{
- $maxrows = param("maxrows");
+ dbmopen(%dbmcount, "data/mining/dupes$today", 0644) ||
+ &die_politely("Can't open today's dupes file: $!");
}
+else
+{
+ # Try yesterday's, then (in case today's hasn't been created yet)
+ $today = &days_ago(1);
+ if (-e "data/mining/dupes$today.db")
+ {
+ dbmopen(%dbmcount, "data/mining/dupes$today", 0644) ||
+ &die_politely("Can't open yesterday's dupes file: $!");
+ }
+ else
+ {
+ &die_politely("There are no duplicate statistics for today or yesterday.");
+ }
+}
+
+# Copy hash (so we don't mess up the on-disk file when we remove entries)
+%count = %dbmcount;
+my $key;
+my $value;
+my $threshold = Param("mostfreqthreshold");
-if (-e "data/mining/dupes${before}.db")
+# Remove all those dupes under the threshold (for performance reasons)
+while (($key, $value) = each %count)
{
- dbmopen(%before, "data/mining/dupes${before}.db", 0644) && ($dobefore = 1);
+ if ($value < $threshold)
+ {
+ delete $count{$key};
+ }
}
-print "Content-type: text/html\n";
-print "\n";
-PutHeader("Most Frequently Reported Bugs");
+# Try and open the database from "changedsince" days ago
+$before = &days_ago($changedsince);
+
+if (-e "data/mining/dupes$before.db")
+{
+ dbmopen(%before, "data/mining/dupes$before", 0644) && ($dobefore = 1);
+}
print Param("mostfreqhtml");
print "
<table BORDER>
-
<tr BGCOLOR=\"#CCCCCC\">
-<td><center><b>Bug #</b></center></td>
-<td><center><b>Dupe<br>Count</b></center></td>\n";
+
+<td><center><b>
+<a href=\"duplicates.cgi?sortby=bug_no&maxrows=$maxrows&changedsince=$changedsince\">Bug #</a>
+</b></center></td>
+<td><center><b>
+<a href=\"duplicates.cgi?sortby=dup_count&maxrows=$maxrows&changedsince=$changedsince\">Dupe<br>Count</a>
+</b></center></td>\n";
+
+my %delta;
if ($dobefore)
{
- print "<td><center><b>Change in last<br>$changedsince day(s)</b></center></td> ";
+ print "<td><center><b>
+ <a href=\"duplicates.cgi?sortby=delta&maxrows=$maxrows&changedsince=$changedsince\">Change in
+ last<br>$changedsince day(s)</a></b></center></td>";
+
+ # Calculate the deltas if we are doing a "before"
+ foreach (keys(%count))
+ {
+ $delta{$_} = $count{$_} - $before{$_};
+ }
}
print "
@@ -111,78 +148,94 @@ print "
<td><center><b>Summary</b></center></td>
</tr>\n\n";
-my %delta;
+# Sort, if required
+my @sortedcount;
-# Calculate the deltas if we are doing a "before"
-if ($dobefore)
+if ($sortby eq "delta")
{
- foreach (keys(%count))
- {
- $delta{$_} = $count{$_} - $before{$_};
- }
+ @sortedcount = sort by_delta keys(%count);
}
-
-# Offer the option of sorting on total count, or on the delta
-my @sortedcount;
-
-if (defined(param("sortby")) && param("sortby") == "delta")
+elsif ($sortby eq "bug_no")
{
- @sortedcount = sort by_delta keys(%count);
+ @sortedcount = sort by_bug_no keys(%count);
}
-else
+elsif ($sortby eq "dup_count")
{
- @sortedcount = sort by_dup_count keys(%count);
+ @sortedcount = sort by_dup_count keys(%count);
}
my $i = 0;
foreach (@sortedcount)
{
- my $id = $_;
- SendSQL("SELECT component, bug_severity, op_sys, target_milestone, short_desc, groupset " .
+ my $id = $_;
+ SendSQL("SELECT component, bug_severity, op_sys, target_milestone, short_desc, groupset " .
" FROM bugs WHERE bug_id = $id");
- my ($component, $severity, $op_sys, $milestone, $summary, $groupset) = FetchSQLData();
+ my ($component, $severity, $op_sys, $milestone, $summary, $groupset) = FetchSQLData();
next unless $groupset == 0;
$summary = html_quote($summary);
- print "<tr>";
- print '<td><center><A HREF="show_bug.cgi?id=' . $id . '">';
- print $id . "</A></center></td>";
- print "<td><center>$count{$id}</center></td>";
- if ($dobefore)
- {
- print "<td><center>$delta{$id}</center></td>";
- }
- print "<td>$component</td>\n ";
- print "<td><center>$severity</center></td>";
- print "<td><center>$op_sys</center></td>";
- print "<td><center>$milestone</center></td>";
- print "<td>$summary</td>";
- print "</tr>\n";
-
- $i++;
- if ($i == $maxrows)
- {
- last;
- }
+ print "<tr>";
+ print '<td><center><A HREF="show_bug.cgi?id=' . $id . '">';
+ print $id . "</A></center></td>";
+ print "<td><center>$count{$id}</center></td>";
+ if ($dobefore)
+ {
+ print "<td><center>$delta{$id}</center></td>";
+ }
+ print "<td>$component</td>\n ";
+ print "<td><center>$severity</center></td>";
+ print "<td><center>$op_sys</center></td>";
+ print "<td><center>$milestone</center></td>";
+ print "<td>$summary</td>";
+ print "</tr>\n";
+
+ $i++;
+ if ($i == $maxrows)
+ {
+ last;
+ }
}
print "</table><br><br>";
PutFooter();
+sub by_bug_no
+{
+ return ($a <=> $b);
+}
+
sub by_dup_count
{
- return -($count{$a} <=> $count{$b});
+ return -($count{$a} <=> $count{$b});
}
sub by_delta
{
- return -($delta{$a} <=> $delta{$b});
+ return -($delta{$a} <=> $delta{$b});
}
sub days_ago
{
- my ($dom, $mon, $year) = (localtime(time - ($_[0]*24*60*60)))[3, 4, 5];
- return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
+ my ($dom, $mon, $year) = (localtime(time - ($_[0]*24*60*60)))[3, 4, 5];
+ return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
}
+sub die_politely {
+ my $msg = shift;
+
+ print <<FIN;
+<p>
+<table border=1 cellpadding=10>
+<tr>
+<td align=center>
+<font color=blue>$msg</font>
+</td>
+</tr>
+</table>
+<p>
+FIN
+
+ PutFooter();
+ exit;
+}