summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xcollectstats.pl111
1 files changed, 64 insertions, 47 deletions
diff --git a/collectstats.pl b/collectstats.pl
index bcb0fac5b..e550c1613 100755
--- a/collectstats.pl
+++ b/collectstats.pl
@@ -30,12 +30,14 @@
# To run new charts for a specific date, pass it in on the command line in
# ISO (2004-08-14) format.
-use AnyDBM_File;
use strict;
+use lib qw(. lib);
+
+use AnyDBM_File;
use IO::Handle;
+use List::Util qw(first);
use Cwd;
-use lib qw(. lib);
use Bugzilla;
use Bugzilla::Constants;
@@ -111,6 +113,37 @@ my @resolutions = @{$fields->{'resolution'}};
# Exclude "" from the resolution list.
@resolutions = grep {$_} @resolutions;
+# --regenerate was taking an enormous amount of time to query everything
+# per bug, per day. Instead, we now just get all the data out of the DB
+# at once and stuff it into some data structures.
+my (%bug_status, %bug_resolution, %removed);
+if ($regenerate) {
+ %bug_resolution = @{ $dbh->selectcol_arrayref(
+ 'SELECT bug_id, resolution FROM bugs', {Columns=>[1,2]}) };
+ %bug_status = @{ $dbh->selectcol_arrayref(
+ 'SELECT bug_id, bug_status FROM bugs', {Columns=>[1,2]}) };
+
+ my $removed_sth = $dbh->prepare(
+ q{SELECT bugs_activity.bug_id, bugs_activity.removed,}
+ . $dbh->sql_to_days('bugs_activity.bug_when')
+ . q{FROM bugs_activity
+ WHERE bugs_activity.fieldid = ?
+ ORDER BY bugs_activity.bug_when});
+
+ %removed = (bug_status => {}, resolution => {});
+ foreach my $field (qw(bug_status resolution)) {
+ my $field_id = Bugzilla::Field->check($field)->id;
+ my $rows = $dbh->selectall_arrayref($removed_sth, undef, $field_id);
+ my $hash = $removed{$field};
+ foreach my $row (@$rows) {
+ my ($bug_id, $removed, $when) = @$row;
+ $hash->{$bug_id} ||= [];
+ push(@{ $hash->{$bug_id} }, { when => int($when),
+ removed => $removed });
+ }
+ }
+}
+
my $tstart = time;
foreach (@myproducts) {
my $dir = "$datadir/mining";
@@ -118,7 +151,7 @@ foreach (@myproducts) {
&check_data_dir ($dir);
if ($regenerate) {
- &regenerate_stats($dir, $_);
+ regenerate_stats($dir, $_, \%bug_resolution, \%bug_status, \%removed);
} else {
&collect_stats($dir, $_);
}
@@ -343,8 +376,7 @@ sub calculate_dupes {
# This regenerates all statistics from the database.
sub regenerate_stats {
- my $dir = shift;
- my $product = shift;
+ my ($dir, $product, $bug_resolution, $bug_status, $removed) = @_;
my $dbh = Bugzilla->dbh;
my $when = localtime(time());
@@ -356,8 +388,6 @@ sub regenerate_stats {
$file_product =~ s/\//-/gs;
my $file = join '/', $dir, $file_product;
- my @bugs;
-
my $and_product = "";
my $from_product = "";
@@ -387,7 +417,6 @@ sub regenerate_stats {
}
if (open DATA, ">$file") {
- DATA->autoflush(1);
my $fields = join('|', ('DATE', @statuses, @resolutions));
print DATA <<FIN;
# Bugzilla Daily Bug Stats
@@ -400,6 +429,7 @@ sub regenerate_stats {
FIN
# For each day, generate a line of statistics.
my $total_days = $end - $start;
+ my @bugs;
for (my $day = $start + 1; $day <= $end; $day++) {
# Some output feedback
my $percent_done = ($day - $start - 1) * 100 / $total_days;
@@ -416,56 +446,25 @@ FIN
$and_product . q{ ORDER BY bug_id};
my $bug_ids = $dbh->selectcol_arrayref($query, undef, @values);
-
push(@bugs, @$bug_ids);
- # For each bug that existed on that day, determine its status
- # at the beginning of the day. If there were no status
- # changes on or after that day, the status was the same as it
- # is today, which can be found in the bugs table. Otherwise,
- # the status was equal to the first "previous value" entry in
- # the bugs_activity table for that bug made on or after that
- # day.
my %bugcount;
foreach (@statuses) { $bugcount{$_} = 0; }
foreach (@resolutions) { $bugcount{$_} = 0; }
# Get information on bug states and resolutions.
- $query = qq{SELECT bugs_activity.removed
- FROM bugs_activity
- INNER JOIN fielddefs
- ON bugs_activity.fieldid = fielddefs.id
- WHERE fielddefs.name = ?
- AND bugs_activity.bug_id = ?
- AND bugs_activity.bug_when >= } .
- $dbh->sql_from_days($day) .
- " ORDER BY bugs_activity.bug_when " .
- $dbh->sql_limit(1);
-
- my $sth_bug = $dbh->prepare($query);
- my $sth_status = $dbh->prepare(q{SELECT bug_status
- FROM bugs
- WHERE bug_id = ?});
-
- my $sth_reso = $dbh->prepare(q{SELECT resolution
- FROM bugs
- WHERE bug_id = ?});
-
for my $bug (@bugs) {
- my $status = $dbh->selectrow_array($sth_bug, undef,
- 'bug_status', $bug);
- unless ($status) {
- $status = $dbh->selectrow_array($sth_status, undef, $bug);
- }
+ my $status = _get_value(
+ $removed->{'bug_status'}->{$bug},
+ $bug_status, $day, $bug);
if (defined $bugcount{$status}) {
$bugcount{$status}++;
}
- my $resolution = $dbh->selectrow_array($sth_bug, undef,
- 'resolution', $bug);
- unless ($resolution) {
- $resolution = $dbh->selectrow_array($sth_reso, undef, $bug);
- }
-
+
+ my $resolution = _get_value(
+ $removed->{'resolution'}->{$bug},
+ $bug_resolution, $day, $bug);
+
if (defined $bugcount{$resolution}) {
$bugcount{$resolution}++;
}
@@ -490,6 +489,24 @@ FIN
}
}
+# A helper for --regenerate.
+# For each bug that exists on a day, we determine its status/resolution
+# at the beginning of the day. If there were no status/resolution
+# changes on or after that day, the status was the same as it
+# is today (the "current" value). Otherwise, the status was equal to the
+# first "previous value" entry in the bugs_activity table for that
+# bug made on or after that day.
+sub _get_value {
+ my ($removed, $current, $day, $bug) = @_;
+
+ # Get the first change that's on or after this day.
+ my $item = first { $_->{when} >= $day } @{ $removed || [] };
+
+ # If there's no change on or after this day, then we just return the
+ # current value.
+ return $item ? $item->{removed} : $current->{$bug};
+}
+
sub today {
my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;