From d8643908a7d6243c361e670573af763067db408d Mon Sep 17 00:00:00 2001 From: Frédéric Buclin Date: Wed, 16 Jan 2013 19:05:22 +0100 Subject: Bug 819432: Execute queries in two steps to improve performance r=dkl a=LpSolit --- Bugzilla/Search.pm | 180 ++++++++++++++++++++++++++- buglist.cgi | 42 +++---- collectstats.pl | 3 +- report.cgi | 7 +- template/en/default/list/list.html.tmpl | 11 +- template/en/default/reports/report.html.tmpl | 4 +- whine.pl | 9 +- xt/lib/Bugzilla/Test/Search/FieldTest.pm | 11 +- 8 files changed, 218 insertions(+), 49 deletions(-) diff --git a/Bugzilla/Search.pm b/Bugzilla/Search.pm index b05da1a04..1c6af782e 100644 --- a/Bugzilla/Search.pm +++ b/Bugzilla/Search.pm @@ -32,9 +32,10 @@ use Data::Dumper; use Date::Format; use Date::Parse; use Scalar::Util qw(blessed); -use List::MoreUtils qw(all part uniq); +use List::MoreUtils qw(all firstidx part uniq); use POSIX qw(INT_MAX); use Storable qw(dclone); +use Time::HiRes qw(gettimeofday tv_interval); # Description Of Boolean Charts # ----------------------------- @@ -686,7 +687,70 @@ sub new { # Public Accessors # #################### -sub sql { +sub data { + my $self = shift; + return $self->{data} if $self->{data}; + my $dbh = Bugzilla->dbh; + + # If all fields belong to the 'bugs' table, there is no need to split + # the original query into two pieces. Else we override the 'fields' + # argument to first get bug IDs based on the search criteria defined + # by the caller, and the desired fields are collected in the 2nd query. + my @orig_fields = $self->_input_columns; + my $all_in_bugs_table = 1; + foreach my $field (@orig_fields) { + next if $self->COLUMNS->{$field}->{name} =~ /^bugs\.\w+$/; + $self->{fields} = ['bug_id']; + $all_in_bugs_table = 0; + last; + } + + my $start_time = [gettimeofday()]; + my $sql = $self->_sql; + # Do we just want bug IDs to pass to the 2nd query or all the data immediately? + my $func = $all_in_bugs_table ? 'selectall_arrayref' : 'selectcol_arrayref'; + my $bug_ids = $dbh->$func($sql); + my @extra_data = ({sql => $sql, time => tv_interval($start_time)}); + # Restore the original 'fields' argument, just in case. + $self->{fields} = \@orig_fields unless $all_in_bugs_table; + + # If there are no bugs found, or all fields are in the 'bugs' table, + # there is no need for another query. + if (!scalar @$bug_ids || $all_in_bugs_table) { + $self->{data} = $bug_ids; + return wantarray ? ($self->{data}, \@extra_data) : $self->{data}; + } + + # Make sure the bug_id will be returned. If not, append it to the list. + my $pos = firstidx { $_ eq 'bug_id' } @orig_fields; + if ($pos < 0) { + push(@orig_fields, 'bug_id'); + $pos = $#orig_fields; + } + + # Now create a query with the buglist above as the single criteria + # and the fields that the caller wants. No need to redo security checks; + # the list has already been validated above. + my $search = $self->new('fields' => \@orig_fields, + 'params' => {bug_id => $bug_ids, bug_id_type => 'anyexact'}, + 'sharer' => $self->_sharer_id, + 'user' => $self->_user, + 'allow_unlimited' => 1, + '_no_security_check' => 1); + + $start_time = [gettimeofday()]; + $sql = $search->_sql; + my $unsorted_data = $dbh->selectall_arrayref($sql); + push(@extra_data, {sql => $sql, time => tv_interval($start_time)}); + # Let's sort the data. We didn't do it in the query itself because + # we already know in which order to sort bugs thanks to the first query, + # and this avoids additional table joins in the SQL query. + my %data = map { $_->[$pos] => $_ } @$unsorted_data; + $self->{data} = [map { $data{$_} } @$bug_ids]; + return wantarray ? ($self->{data}, \@extra_data) : $self->{data}; +} + +sub _sql { my ($self) = @_; return $self->{sql} if $self->{sql}; my $dbh = Bugzilla->dbh; @@ -720,7 +784,7 @@ sub search_description { # Make sure that the description has actually been generated if # people are asking for the whole thing. else { - $self->sql; + $self->_sql; } return $self->{'search_description'}; } @@ -1116,6 +1180,7 @@ sub _standard_joins { my ($self) = @_; my $user = $self->_user; my @joins; + return () if $self->{_no_security_check}; my $security_join = { table => 'bug_group_map', @@ -1192,6 +1257,7 @@ sub _translate_join { # group security. sub _standard_where { my ($self) = @_; + return ('1=1') if $self->{_no_security_check}; # If replication lags badly between the shadow db and the main DB, # it's possible for bugs to show up in searches before their group # controls are properly set. To prevent this, when initially creating @@ -2960,6 +3026,112 @@ sub _translate_old_column { 1; +__END__ + +=head1 NAME + +Bugzilla::Search - Provides methods to run queries against bugs. + +=head1 SYNOPSIS + + use Bugzilla::Search; + + my $search = new Bugzilla::Search({'fields' => \@fields, + 'params' => \%search_criteria, + 'sharer' => $sharer_id, + 'user' => $user_obj, + 'allow_unlimited' => 1}); + + my $data = $search->data; + my ($data, $extra_data) = $search->data; + +=head1 DESCRIPTION + +Search.pm represents a search object. It's the single way to collect +data about bugs in a secure way. The list of bugs matching criteria +defined by the caller are filtered based on the user privileges. + +=head1 METHODS + +=head2 new + +=over + +=item B + +Create a Bugzilla::Search object. + +=item B + +=over + +=item C + +An arrayref representing the bug attributes for which data is desired. +Legal attributes are listed in the fielddefs DB table. At least one field +must be defined, typically the 'bug_id' field. + +=item C + +A hashref representing search criteria. Each key => value pair represents +a search criteria, where the key is the search field and the value is the +value for this field. At least one search criteria must be defined if the +'search_allow_no_criteria' parameter is turned off, else an error is thrown. + +=item C + +When a saved search is shared by a user, this is his user ID. + +=item C + +A L object representing the user to whom the data is addressed. +All security checks are done based on this user object, so it's not safe +to share results of the query with other users as not all users have the +same privileges or have the same role for all bugs in the list. If this +parameter is not defined, then the currently logged in user is taken into +account. If no user is logged in, then only public bugs will be returned. + +=item C + +If set to a true value, the number of bugs retrieved by the query is not +limited. + +=back + +=item B + +A L object. + +=back + +=head2 data + +=over + +=item B + +Returns bugs matching search criteria passed to C. + +=item B + +None + +=item B + +In scalar context, this method returns a reference to a list of bugs. +Each item of the list represents a bug, which is itself a reference to +a list where each item represents a bug attribute, in the same order as +specified in the C parameter of C. + +In list context, this methods also returns a reference to a list containing +references to hashes. For each hash, two keys are defined: C contains +the SQL query which has been executed, and C