summaryrefslogtreecommitdiffstats
path: root/collectstats.pl
diff options
context:
space:
mode:
authortara%tequilarista.org <>2000-10-24 07:01:30 +0200
committertara%tequilarista.org <>2000-10-24 07:01:30 +0200
commitac3b5ab852234a4fd6a09030fb259c1c6d236dd4 (patch)
treec4ea3b3e21f9f9b428ce75d3a471cf8f7b778c95 /collectstats.pl
parentcf9b879f29f0d01c93882cc40f6c1ce63d870efc (diff)
downloadbugzilla-ac3b5ab852234a4fd6a09030fb259c1c6d236dd4.tar.gz
bugzilla-ac3b5ab852234a4fd6a09030fb259c1c6d236dd4.tar.xz
Landing duplicates stuff from gervase markham
Diffstat (limited to 'collectstats.pl')
-rwxr-xr-xcollectstats.pl84
1 files changed, 84 insertions, 0 deletions
diff --git a/collectstats.pl b/collectstats.pl
index 12bf9fce4..47ba0cb5e 100755
--- a/collectstats.pl
+++ b/collectstats.pl
@@ -20,9 +20,11 @@
#
# Contributor(s): Terry Weissman <terry@mozilla.org>,
# Harrison Page <harrison@netscape.com>
+# Gervase Markham <gerv@gerv.net>
# Run me out of cron at midnight to collect Bugzilla statistics.
+use DB_File;
use diagnostics;
use strict;
use vars @::legal_product,
@@ -43,6 +45,8 @@ foreach (@myproducts) {
&collect_stats ($dir, $_);
}
+&calculate_dupes();
+
sub check_data_dir {
my $dir = shift;
@@ -100,6 +104,86 @@ FIN
}
}
+sub calculate_dupes {
+ SendSQL("SELECT * FROM duplicates");
+
+ my %dupes;
+ my %count;
+ my @row;
+ my $key;
+ my $changed = 1;
+
+ my $today = &today;
+
+ # Save % count here in a date-named file
+ # so we can read it back in to do changed counters
+ # First, delete it if it exists, so we don't add to the contents of an old file
+ if (-e "data/mining/dupes$today.db")
+ {
+ system("rm -f data/mining/dupes$today.db");
+ }
+
+ dbmopen(%count, "data/mining/dupes$today.db", 0644) || die "Can't open DBM dupes file: $!";
+
+ # Create a hash with key "a bug number", value "bug which that bug is a
+ # direct dupe of" - straight from the duplicates table.
+ while (@row = FetchSQLData())
+ {
+ my $dupe_of = shift @row;
+ my $dupe = shift @row;
+ $dupes{$dupe} = $dupe_of;
+ }
+
+ # Total up the number of bugs which are dupes of a given bug
+ # count will then have key = "bug number",
+ # value = "number of immediate dupes of that bug".
+ foreach $key (keys(%dupes))
+ {
+ my $dupe_of = $dupes{$key};
+
+ if (!defined($count{$dupe_of}))
+ {
+ $count{$dupe_of} = 0;
+ }
+
+ $count{$dupe_of}++;
+ }
+
+ # Now we collapse the dupe tree by iterating over %count until
+ # there is no further change.
+ while ($changed == 1)
+ {
+ $changed = 0;
+ foreach $key (keys(%count))
+ {
+ # if this bug is actually itself a dupe, and has a count...
+ if (defined($dupes{$key}) && $count{$key} > 0)
+ {
+ # add that count onto the bug it is a dupe of,
+ # and zero the count; the check is to avoid
+ # loops
+ if ($count{$dupes{$key}} != 0)
+ {
+ $count{$dupes{$key}} += $count{$key};
+ $count{$key} = 0;
+ $changed = 1;
+ }
+ }
+ }
+ }
+
+ # Remove the values for which the count is zero
+ foreach $key (keys(%count))
+ {
+ if ($count{$key} == 0)
+ {
+ delete $count{$key};
+ }
+ }
+
+ dbmclose(%count);
+}
+
sub today {
my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;