From 3bc08bc506e5d25165909cf1f5ea2328ea0eeb4f Mon Sep 17 00:00:00 2001 From: "mkanat%bugzilla.org" <> Date: Mon, 31 Aug 2009 21:12:32 +0000 Subject: Bug 286625: Make collectstats.pl --regenerate WAY faster and make it correctly count the empty resolution. Patch by Max Kanat-Alexander r=LpSolit, a=LpSolit --- collectstats.pl | 111 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 64 insertions(+), 47 deletions(-) (limited to 'collectstats.pl') diff --git a/collectstats.pl b/collectstats.pl index bcb0fac5b..e550c1613 100755 --- a/collectstats.pl +++ b/collectstats.pl @@ -30,12 +30,14 @@ # To run new charts for a specific date, pass it in on the command line in # ISO (2004-08-14) format. -use AnyDBM_File; use strict; +use lib qw(. lib); + +use AnyDBM_File; use IO::Handle; +use List::Util qw(first); use Cwd; -use lib qw(. lib); use Bugzilla; use Bugzilla::Constants; @@ -111,6 +113,37 @@ my @resolutions = @{$fields->{'resolution'}}; # Exclude "" from the resolution list. @resolutions = grep {$_} @resolutions; +# --regenerate was taking an enormous amount of time to query everything +# per bug, per day. Instead, we now just get all the data out of the DB +# at once and stuff it into some data structures. +my (%bug_status, %bug_resolution, %removed); +if ($regenerate) { + %bug_resolution = @{ $dbh->selectcol_arrayref( + 'SELECT bug_id, resolution FROM bugs', {Columns=>[1,2]}) }; + %bug_status = @{ $dbh->selectcol_arrayref( + 'SELECT bug_id, bug_status FROM bugs', {Columns=>[1,2]}) }; + + my $removed_sth = $dbh->prepare( + q{SELECT bugs_activity.bug_id, bugs_activity.removed,} + . $dbh->sql_to_days('bugs_activity.bug_when') + . q{FROM bugs_activity + WHERE bugs_activity.fieldid = ? + ORDER BY bugs_activity.bug_when}); + + %removed = (bug_status => {}, resolution => {}); + foreach my $field (qw(bug_status resolution)) { + my $field_id = Bugzilla::Field->check($field)->id; + my $rows = $dbh->selectall_arrayref($removed_sth, undef, $field_id); + my $hash = $removed{$field}; + foreach my $row (@$rows) { + my ($bug_id, $removed, $when) = @$row; + $hash->{$bug_id} ||= []; + push(@{ $hash->{$bug_id} }, { when => int($when), + removed => $removed }); + } + } +} + my $tstart = time; foreach (@myproducts) { my $dir = "$datadir/mining"; @@ -118,7 +151,7 @@ foreach (@myproducts) { &check_data_dir ($dir); if ($regenerate) { - ®enerate_stats($dir, $_); + regenerate_stats($dir, $_, \%bug_resolution, \%bug_status, \%removed); } else { &collect_stats($dir, $_); } @@ -343,8 +376,7 @@ sub calculate_dupes { # This regenerates all statistics from the database. sub regenerate_stats { - my $dir = shift; - my $product = shift; + my ($dir, $product, $bug_resolution, $bug_status, $removed) = @_; my $dbh = Bugzilla->dbh; my $when = localtime(time()); @@ -356,8 +388,6 @@ sub regenerate_stats { $file_product =~ s/\//-/gs; my $file = join '/', $dir, $file_product; - my @bugs; - my $and_product = ""; my $from_product = ""; @@ -387,7 +417,6 @@ sub regenerate_stats { } if (open DATA, ">$file") { - DATA->autoflush(1); my $fields = join('|', ('DATE', @statuses, @resolutions)); print DATA <selectcol_arrayref($query, undef, @values); - push(@bugs, @$bug_ids); - # For each bug that existed on that day, determine its status - # at the beginning of the day. If there were no status - # changes on or after that day, the status was the same as it - # is today, which can be found in the bugs table. Otherwise, - # the status was equal to the first "previous value" entry in - # the bugs_activity table for that bug made on or after that - # day. my %bugcount; foreach (@statuses) { $bugcount{$_} = 0; } foreach (@resolutions) { $bugcount{$_} = 0; } # Get information on bug states and resolutions. - $query = qq{SELECT bugs_activity.removed - FROM bugs_activity - INNER JOIN fielddefs - ON bugs_activity.fieldid = fielddefs.id - WHERE fielddefs.name = ? - AND bugs_activity.bug_id = ? - AND bugs_activity.bug_when >= } . - $dbh->sql_from_days($day) . - " ORDER BY bugs_activity.bug_when " . - $dbh->sql_limit(1); - - my $sth_bug = $dbh->prepare($query); - my $sth_status = $dbh->prepare(q{SELECT bug_status - FROM bugs - WHERE bug_id = ?}); - - my $sth_reso = $dbh->prepare(q{SELECT resolution - FROM bugs - WHERE bug_id = ?}); - for my $bug (@bugs) { - my $status = $dbh->selectrow_array($sth_bug, undef, - 'bug_status', $bug); - unless ($status) { - $status = $dbh->selectrow_array($sth_status, undef, $bug); - } + my $status = _get_value( + $removed->{'bug_status'}->{$bug}, + $bug_status, $day, $bug); if (defined $bugcount{$status}) { $bugcount{$status}++; } - my $resolution = $dbh->selectrow_array($sth_bug, undef, - 'resolution', $bug); - unless ($resolution) { - $resolution = $dbh->selectrow_array($sth_reso, undef, $bug); - } - + + my $resolution = _get_value( + $removed->{'resolution'}->{$bug}, + $bug_resolution, $day, $bug); + if (defined $bugcount{$resolution}) { $bugcount{$resolution}++; } @@ -490,6 +489,24 @@ FIN } } +# A helper for --regenerate. +# For each bug that exists on a day, we determine its status/resolution +# at the beginning of the day. If there were no status/resolution +# changes on or after that day, the status was the same as it +# is today (the "current" value). Otherwise, the status was equal to the +# first "previous value" entry in the bugs_activity table for that +# bug made on or after that day. +sub _get_value { + my ($removed, $current, $day, $bug) = @_; + + # Get the first change that's on or after this day. + my $item = first { $_->{when} >= $day } @{ $removed || [] }; + + # If there's no change on or after this day, then we just return the + # current value. + return $item ? $item->{removed} : $current->{$bug}; +} + sub today { my ($dom, $mon, $year) = (localtime(time))[3, 4, 5]; return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; -- cgit v1.2.3-24-g4f1b