From 0becc20d415469cb8f9dfb258e066c961fd6de58 Mon Sep 17 00:00:00 2001 From: "gerv%gerv.net" <> Date: Tue, 1 Apr 2003 06:33:06 +0000 Subject: Bug 80157 - Add "regenerate" option to collectstats.pl. Patch by rwalters@qualcomm.com, with tweaks by jean_seb@hybride.com, dberlin@dberlin.org and gerv@mozilla.org; r=gerv, a=justdave. --- collectstats.pl | 207 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 202 insertions(+), 5 deletions(-) (limited to 'collectstats.pl') diff --git a/collectstats.pl b/collectstats.pl index 5d72827b2..67e4c13b7 100755 --- a/collectstats.pl +++ b/collectstats.pl @@ -20,20 +20,27 @@ # # Contributor(s): Terry Weissman , # Harrison Page -# Gervase Markham +# Gervase Markham +# Richard Walters +# Jean-Sebastien Guay # Run me out of cron at midnight to collect Bugzilla statistics. use AnyDBM_File; use strict; +use IO::Handle; use vars @::legal_product; require "globals.pl"; use Bugzilla; -# tidy up after graphing module +# Turn off output buffering (probably needed when displaying output feedback +# in the regenerate mode.) +$| = 1; + +# Tidy up after graphing module if (chdir("graphs")) { unlink <./*.gif>; unlink <./*.png>; @@ -45,15 +52,29 @@ GetVersionTable(); Bugzilla->switch_to_shadow_db(); +# To recreate the daily statistics, run "collectstats.pl --regenerate" . +my $regenerate = 0; +if ($#ARGV >= 0 && $ARGV[0] eq "--regenerate") { + $regenerate = 1; +} + my @myproducts; push( @myproducts, "-All-", @::legal_product ); +my $tstart = time; foreach (@myproducts) { my $dir = "data/mining"; &check_data_dir ($dir); - &collect_stats ($dir, $_); + + if ($regenerate) { + ®enerate_stats($dir, $_); + } else { + &collect_stats($dir, $_); + } } +my $tend = time; +print "Total time taken " . delta_time($tstart, $tend) . "\n"; &calculate_dupes(); @@ -78,8 +99,8 @@ sub check_data_dir { my $dir = shift; if (! -d $dir) { - mkdir $dir, 0777; - chmod 0777, $dir; + mkdir $dir, 0755; + chmod 0755, $dir; } } @@ -135,6 +156,7 @@ FIN print DATA (join '|', @row) . "\n"; close DATA; + chmod 0644, $file; } else { print "$0: $file, $!"; } @@ -213,6 +235,165 @@ sub calculate_dupes { dbmclose(%count); } +# This regenerates all statistics from the database. +sub regenerate_stats { + my $dir = shift; + my $product = shift; + my $when = localtime(time()); + + my $tstart = time(); + + # NB: Need to mangle the product for the filename, but use the real + # product name in the query + my $file_product = $product; + $file_product =~ s/\//-/gs; + my $file = join '/', $dir, $file_product; + + my @bugs; + + my $and_product = ""; + my $from_product = ""; + + if ($product ne '-All-') { + $and_product = "AND bugs.product_id = products.id " . + "AND products.name = " . SqlQuote($product) . " "; + $from_product = ", products"; + } + + # Determine the start date from the date the first bug in the + # database was created, and the end date from the current day. + # If there were no bugs in the search, return early. + SendSQL("SELECT to_days(creation_ts) AS start, " . + "to_days(current_date) AS end, " . + "to_days('1970-01-01') " . + "FROM bugs $from_product WHERE to_days(creation_ts) != 'NULL' " . + $and_product . + "ORDER BY start LIMIT 1"); + + my ($start, $end, $base) = FetchSQLData(); + if (!defined $start) { + return; + } + + if (open DATA, ">$file") { + DATA->autoflush(1); + print DATA <= from_days(" . ($day - 2) . ") " . + $and_product . + " ORDER BY bug_id"); + + my @row; + while (@row = FetchSQLData()) { + push @bugs, $row[0]; + } + + # For each bug that existed on that day, determine its status + # at the beginning of the day. If there were no status + # changes on or after that day, the status was the same as it + # is today, which can be found in the bugs table. Otherwise, + # the status was equal to the first "previous value" entry in + # the bugs_activity table for that bug made on or after that + # day. + my %bugcount; + my @logstates = qw(NEW ASSIGNED REOPENED UNCONFIRMED RESOLVED + VERIFIED CLOSED); + my @logresolutions = qw(FIXED INVALID WONTFIX LATER REMIND + DUPLICATE WORKSFORME MOVED); + foreach (@logstates) { + $bugcount{$_} = 0; + } + + foreach (@logresolutions) { + $bugcount{$_} = 0; + } + + for my $bug (@bugs) { + # First, get information on various bug states. + SendSQL("SELECT bugs_activity.removed " . + "FROM bugs_activity,fielddefs " . + "WHERE bugs_activity.fieldid = fielddefs.fieldid " . + "AND fielddefs.name = 'bug_status' " . + "AND bugs_activity.bug_id = $bug " . + "AND bugs_activity.bug_when >= from_days($day) " . + "ORDER BY bugs_activity.bug_when LIMIT 1"); + + my $status; + if (@row = FetchSQLData()) { + $status = $row[0]; + } else { + SendSQL("SELECT bug_status FROM bugs WHERE bug_id = $bug"); + $status = FetchOneColumn(); + } + + if (defined $bugcount{$status}) { + $bugcount{$status}++; + } + + # Next, get information on various bug resolutions. + SendSQL("SELECT bugs_activity.removed " . + "FROM bugs_activity,fielddefs " . + "WHERE bugs_activity.fieldid = fielddefs.fieldid " . + "AND fielddefs.name = 'resolution' " . + "AND bugs_activity.bug_id = $bug " . + "AND bugs_activity.bug_when >= from_days($day) " . + "ORDER BY bugs_activity.bug_when LIMIT 1"); + + if (@row = FetchSQLData()) { + $status = $row[0]; + } else { + SendSQL("SELECT resolution FROM bugs WHERE bug_id = $bug"); + $status = FetchOneColumn(); + } + + if (defined $bugcount{$status}) { + $bugcount{$status}++; + } + } + + # Generate a line of output containing the date and counts + # of bugs in each state. + my $date = sqlday($day, $base); + print DATA "$date"; + foreach (@logstates) { + print DATA "|$bugcount{$_}"; + } + + foreach (@logresolutions) { + print DATA "|$bugcount{$_}"; + } + + print DATA "\n"; + } + + # Finish up output feedback for this product. + my $tend = time; + print "\rRegenerating $product \[100.0\%] - " . + delta_time($tstart, $tend) . "\n"; + + close DATA; + chmod 0640, $file; + } +} + sub today { my ($dom, $mon, $year) = (localtime(time))[3, 4, 5]; return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; @@ -223,3 +404,19 @@ sub today_dash { return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom; } +sub sqlday { + my ($day, $base) = @_; + $day = ($day - $base) * 86400; + my ($dom, $mon, $year) = (gmtime($day))[3, 4, 5]; + return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom; +} + +sub delta_time { + my $tstart = shift; + my $tend = shift; + my $delta = $tend - $tstart; + my $hours = int($delta/3600); + my $minutes = int($delta/60) - ($hours * 60); + my $seconds = $delta - ($minutes * 60) - ($hours * 3600); + return sprintf("%02d:%02d:%02d" , $hours, $minutes, $seconds); +} -- cgit v1.2.3-24-g4f1b