From 841b5d3961f31277c424a4432fc51f0ac4bf093f Mon Sep 17 00:00:00 2001 From: "mkanat%bugzilla.org" <> Date: Sun, 6 Sep 2009 22:45:51 +0000 Subject: Bug 176002: Move duplicate statistics into the db Patch by Max Kanat-Alexander r=LpSolit, a=LpSolit --- duplicates.cgi | 182 +++++++++++++++++++++++++-------------------------------- 1 file changed, 81 insertions(+), 101 deletions(-) (limited to 'duplicates.cgi') diff --git a/duplicates.cgi b/duplicates.cgi index af239d632..4c0509864 100755 --- a/duplicates.cgi +++ b/duplicates.cgi @@ -18,15 +18,11 @@ # Copyright (C) 1998 Netscape Communications Corporation. All # Rights Reserved. # -# Contributor(s): Gervase Markham -# -# Generates mostfreq list from data collected by collectstats.pl. - +# Contributor(s): +# Gervase Markham +# Max Kanat-Alexander use strict; - -use AnyDBM_File; - use lib qw(. lib); use Bugzilla; @@ -34,28 +30,52 @@ use Bugzilla::Constants; use Bugzilla::Util; use Bugzilla::Error; use Bugzilla::Search; +use Bugzilla::Field; use Bugzilla::Product; +############### +# Subroutines # +############### + +# $counts is a count of exactly how many direct duplicates there are for +# each bug we're considering. $dups is a map of duplicates, from one +# bug_id to another. We go through the duplicates map ($dups) and if one bug +# in $count is a duplicate of another bug in $count, we add their counts +# together under the target bug. +sub add_indirect_dups { + my ($counts, $dups) = @_; + + foreach my $add_from (keys %$dups) { + my $add_to = walk_dup_chain($dups, $add_from); + my $add_amount = delete $counts->{$add_from} || 0; + $counts->{$add_to} += $add_amount; + } +} + +sub walk_dup_chain { + my ($dups, $from_id) = @_; + my $to_id = $dups->{$from_id}; + while (my $bug_id = $dups->{$to_id}) { + last if $bug_id == $from_id; # avoid duplicate loops + $to_id = $bug_id; + } + # Optimize for future calls to add_indirect_dups. + $dups->{$from_id} = $to_id; + return $to_id; +} + +############### +# Main Script # +############### + my $cgi = Bugzilla->cgi; my $template = Bugzilla->template; my $vars = {}; -# collectstats.pl uses duplicates.cgi to generate the RDF duplicates stats. -# However, this conflicts with requirelogin if it's enabled; so we make -# logging-in optional if we are running from the command line. -if ($::ENV{'GATEWAY_INTERFACE'} eq "cmdline") { - Bugzilla->login(LOGIN_OPTIONAL); -} -else { - Bugzilla->login(); -} +Bugzilla->login(); my $dbh = Bugzilla->switch_to_shadow_db(); -my %dbmcount; -my %count; -my %before; - # Get params from URL sub formvalue { my ($name, $default) = (@_); @@ -70,6 +90,10 @@ my $reverse = formvalue("reverse") ? 1 : 0; my @query_products = $cgi->param('product'); my $sortvisible = formvalue("sortvisible"); my @buglist = (split(/[:,]/, formvalue("bug_id"))); +detaint_natural($_) foreach @buglist; +# If we got any non-numeric items, they will now be undef. Remove them from +# the list. +@buglist = grep($_, @buglist); # Make sure all products are valid. foreach my $p (@query_products) { @@ -79,54 +103,6 @@ foreach my $p (@query_products) { # Small backwards-compatibility hack, dated 2002-04-10. $sortby = "count" if $sortby eq "dup_count"; -# Open today's record of dupes -my $today = days_ago(0); -my $yesterday = days_ago(1); - -# We don't know the exact file name, because the extension depends on the -# underlying dbm library, which could be anything. We can't glob, because -# perl < 5.6 considers if (<*>) { ... } to be tainted -# Instead, just check the return value for today's data and yesterday's, -# and ignore file not found errors - -use Errno; -use Fcntl; - -my $datadir = bz_locations()->{'datadir'}; - -if (!tie(%dbmcount, 'AnyDBM_File', "$datadir/duplicates/dupes$today", - O_RDONLY, 0644)) { - if ($!{ENOENT}) { - if (!tie(%dbmcount, 'AnyDBM_File', "$datadir/duplicates/dupes$yesterday", - O_RDONLY, 0644)) { - my $vars = { today => $today }; - if ($!{ENOENT}) { - ThrowUserError("no_dupe_stats", $vars); - } else { - $vars->{'error_msg'} = $!; - ThrowUserError("no_dupe_stats_error_yesterday", $vars); - } - } - } else { - ThrowUserError("no_dupe_stats_error_today", - { error_msg => $! }); - } -} - -# Copy hash (so we don't mess up the on-disk file when we remove entries) -%count = %dbmcount; - -# Remove all those dupes under the threshold parameter. -# We do this, before the sorting, for performance reasons. -my $threshold = Bugzilla->params->{"mostfreqthreshold"}; - -while (my ($key, $value) = each %count) { - delete $count{$key} if ($value < $threshold); - - # If there's a buglist, restrict the bugs to that list. - delete $count{$key} if $sortvisible && (lsearch(\@buglist, $key) == -1); -} - my $origmaxrows = $maxrows; detaint_natural($maxrows) || ThrowUserError("invalid_maxrows", { maxrows => $origmaxrows}); @@ -136,34 +112,45 @@ detaint_natural($changedsince) || ThrowUserError("invalid_changedsince", { changedsince => $origchangedsince }); -# Try and open the database from "changedsince" days ago -my $dobefore = 0; -my %delta; -my $whenever = days_ago($changedsince); - -if (!tie(%before, 'AnyDBM_File', "$datadir/duplicates/dupes$whenever", - O_RDONLY, 0644)) { - # Ignore file not found errors - if (!$!{ENOENT}) { - ThrowUserError("no_dupe_stats_error_whenever", - { error_msg => $!, - changedsince => $changedsince, - whenever => $whenever, - }); - } -} else { - # Calculate the deltas - ($delta{$_} = $count{$_} - ($before{$_} || 0)) foreach (keys(%count)); - $dobefore = 1; +my %total_dups = @{$dbh->selectcol_arrayref( + "SELECT dupe_of, COUNT(dupe) + FROM duplicates + GROUP BY dupe_of", {Columns => [1,2]})}; + +my %dupe_relation = @{$dbh->selectcol_arrayref( + "SELECT dupe, dupe_of FROM duplicates + WHERE dupe IN (SELECT dupe_of FROM duplicates)", + {Columns => [1,2]})}; +add_indirect_dups(\%total_dups, \%dupe_relation); + +my $reso_field_id = get_field_id('resolution'); +my %since_dups = @{$dbh->selectcol_arrayref( + "SELECT dupe_of, COUNT(dupe) + FROM duplicates INNER JOIN bugs_activity + ON bugs_activity.bug_id = duplicates.dupe + WHERE added = 'DUPLICATE' AND fieldid = ? AND " + . $dbh->sql_to_days('bug_when') . " >= (" + . $dbh->sql_to_days('NOW()') . " - ?) + GROUP BY dupe_of", {Columns=>[1,2]}, + $reso_field_id, $changedsince)}; +add_indirect_dups(\%since_dups, \%dupe_relation); + +my (@bugs, @bug_ids); + +foreach my $id (keys %total_dups) { + if ($total_dups{$id} < Bugzilla->params->{'mostfreqthreshold'}) { + delete $total_dups{$id}; + next; + } + if ($sortvisible and @buglist and !grep($_ == $id, @buglist)) { + delete $total_dups{$id}; + } } -my @bugs; -my @bug_ids; - -if (scalar(%count)) { +if (scalar %total_dups) { # use Bugzilla::Search so that we get the security checking - my $params = new Bugzilla::CGI({ 'bug_id' => [keys %count] }); + my $params = new Bugzilla::CGI({ 'bug_id' => [keys %total_dups] }); if ($openonly) { $params->param('resolution', '---'); @@ -221,8 +208,8 @@ if (scalar(%count)) { $short_desc, $bug_status, $resolution) = @$result; push (@bugs, { id => $id, - count => $count{$id}, - delta => $delta{$id}, + count => $total_dups{$id}, + delta => $since_dups{$id} || 0, component => $component, bug_severity => $bug_severity, op_sys => $op_sys, @@ -237,7 +224,6 @@ if (scalar(%count)) { $vars->{'bugs'} = \@bugs; $vars->{'bug_ids'} = \@bug_ids; -$vars->{'dobefore'} = $dobefore; $vars->{'sortby'} = $sortby; $vars->{'sortvisible'} = $sortvisible; $vars->{'changedsince'} = $changedsince; @@ -264,9 +250,3 @@ print $cgi->header( # Generate and return the UI (HTML page) from the appropriate template. $template->process($format->{'template'}, $vars) || ThrowTemplateError($template->error()); - - -sub days_ago { - my ($dom, $mon, $year) = (localtime(time - ($_[0]*24*60*60)))[3, 4, 5]; - return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom; -} -- cgit v1.2.3-24-g4f1b