diff options
-rw-r--r-- | Bugzilla/DB.pm | 15 | ||||
-rw-r--r-- | Bugzilla/Search.pm | 120 | ||||
-rw-r--r-- | xt/lib/Bugzilla/Test/Search/Constants.pm | 16 |
3 files changed, 81 insertions, 70 deletions
diff --git a/Bugzilla/DB.pm b/Bugzilla/DB.pm index 31051dd8e..117c3a7b0 100644 --- a/Bugzilla/DB.pm +++ b/Bugzilla/DB.pm @@ -78,6 +78,21 @@ use constant ENUM_DEFAULTS => { # Used by Bugzilla::Bug::possible_duplicates. use constant FULLTEXT_OR => ''; +# These are used in regular expressions to mean "the start or end of a word". +# +# We don't use [[:<:]] and [[:>:]], even though they mean +# "start and end of a word" and are supported by both MySQL and PostgreSQL, +# because they don't work if your search starts or ends with a non-alphanumeric +# character, and there's a fair chance somebody will want to use the "word" +# search to search flags for something like "review+". +# +# We do use [:almum:] because it is supported by at least MySQL and +# PostgreSQL, and hopefully will get us as much Unicode support as possible, +# depending on how well the regexp engines of the various databases support +# Unicode. +use constant WORD_START => '(^|[^[:alnum:]])'; +use constant WORD_END => '($|[^[:alnum:]])'; + ##################################################################### # Connection Methods ##################################################################### diff --git a/Bugzilla/Search.pm b/Bugzilla/Search.pm index 563e75901..80e8dce43 100644 --- a/Bugzilla/Search.pm +++ b/Bugzilla/Search.pm @@ -1050,7 +1050,9 @@ END sub _sql_where { my ($self, $where_terms) = @_; - return join(' AND ', $self->_standard_where, @$where_terms); + # The newline and this particular spacing makes the resulting + # SQL a bit more readable for debugging. + return join("\n AND ", $self->_standard_where, @$where_terms); } ################################ @@ -1676,48 +1678,7 @@ sub SqlifyDate { sub build_subselect { my ($outer, $inner, $table, $cond) = @_; - my $q = "SELECT $inner FROM $table WHERE $cond"; - #return "$outer IN ($q)"; - my $dbh = Bugzilla->dbh; - my $list = $dbh->selectcol_arrayref($q); - return "1=2" unless @$list; # Could use boolean type on dbs which support it - return $dbh->sql_in($outer, $list);} - -sub GetByWordList { - my ($field, $strs) = (@_); - my @list; - my $dbh = Bugzilla->dbh; - return [] unless defined $strs; - - foreach my $w (split(/[\s,]+/, $strs)) { - my $word = $w; - if ($word ne "") { - $word =~ tr/A-Z/a-z/; - $word = $dbh->quote('(^|[^a-z0-9])' . quotemeta($word) . '($|[^a-z0-9])'); - trick_taint($word); - push(@list, $dbh->sql_regexp($field, $word)); - } - } - - return \@list; -} - -# Support for "any/all/nowordssubstr" comparison type ("words as substrings") -sub GetByWordListSubstr { - my ($field, $strs) = (@_); - my @list; - my $dbh = Bugzilla->dbh; - my $sql_word; - - foreach my $word (split(/[\s,]+/, $strs)) { - if ($word ne "") { - $sql_word = $dbh->quote($word); - trick_taint($sql_word); - push(@list, $dbh->sql_iposition($sql_word, $field) . " > 0"); - } - } - - return \@list; + return "$outer IN (SELECT $inner FROM $table WHERE $cond)"; } sub pronoun { @@ -1741,6 +1702,10 @@ sub pronoun { return 0; } +# Used by anyexact to get the list of input values. This allows us to +# support values with commas inside of them in the standard charts, and +# still accept string values for the boolean charts (and split them on +# commas). sub _all_values { my ($self, $args, $split_on) = @_; $split_on ||= qr/[\s,]+/; @@ -1764,6 +1729,50 @@ sub _all_values { return @array; } +# Support for "any/all/nowordssubstr" comparison type ("words as substrings") +sub _substring_terms { + my ($self, $args) = @_; + my $dbh = Bugzilla->dbh; + + # We don't have to (or want to) use _all_values, because we'd just + # split each term on spaces and commas anyway. + my @words = split(/[\s,]+/, $args->{value}); + @words = grep { defined $_ and $_ ne '' } @words; + @words = map { $dbh->quote($_) } @words; + my @terms = map { $dbh->sql_iposition($_, $args->{full_field}) . " > 0" } + @words; + return @terms; +} + +sub _word_terms { + my ($self, $args) = @_; + my $dbh = Bugzilla->dbh; + + my @values = split(/[\s,]+/, $args->{value}); + @values = grep { defined $_ and $_ ne '' } @values; + my @substring_terms = $self->_substring_terms($args); + + my @terms; + my $start = $dbh->WORD_START; + my $end = $dbh->WORD_END; + foreach my $word (@values) { + my $regex = $start . quotemeta($word) . $end; + my $quoted = $dbh->quote($regex); + # We don't have to check the regexp, because we escaped it, so we're + # sure it's valid. + my $regex_term = $dbh->sql_regexp($args->{full_field}, $quoted, + 'no check'); + # Regular expressions are slow--substring searches are faster. + # If we're searching for a word, we're also certain that the + # substring will appear in the value. So we limit first by + # substring and then by a regex that will match just words. + my $substring_term = shift @substring_terms; + push(@terms, "$substring_term AND $regex_term"); + } + + return @terms; +} + ###################### # Public Subroutines # ###################### @@ -2338,9 +2347,6 @@ sub _flagtypes_name { }; push(@$joins, $flagtypes_join); - # Generate the condition by running the operator-specific - # function. Afterwards the condition resides in the $args->{term} - # variable. my $full_field = $dbh->sql_string_concat("$flagtypes.name", "$flags.status"); $args->{full_field} = $full_field; @@ -2710,16 +2716,15 @@ sub _anywordsubstr { my ($self, $args) = @_; my ($full_field, $value) = @$args{qw(full_field value)}; - my $list = GetByWordListSubstr($full_field, $value); - $args->{term} = join(" OR ", @$list); + my @terms = $self->_substring_terms($args); + $args->{term} = join("\n\tOR ", @terms); } sub _allwordssubstr { my ($self, $args) = @_; - my ($full_field, $value) = @$args{qw(full_field value)}; - my $list = GetByWordListSubstr($full_field, $value); - $args->{term} = join(" AND ", @$list); + my @terms = $self->_substring_terms($args); + $args->{term} = join("\n\tAND ", @terms); } sub _nowordssubstr { @@ -2731,18 +2736,19 @@ sub _nowordssubstr { sub _anywords { my ($self, $args) = @_; - my ($full_field, $value) = @$args{qw(full_field value)}; - my $list = GetByWordList($full_field, $value); - $args->{term} = join(" OR ", @$list); + my @terms = $self->_word_terms($args); + # Because _word_terms uses AND, we need to parenthesize its terms + # if there are more than one. + @terms = map("($_)", @terms) if scalar(@terms) > 1; + $args->{term} = join("\n\tOR ", @terms); } sub _allwords { my ($self, $args) = @_; - my ($full_field, $value) = @$args{qw(full_field value)}; - my $list = GetByWordList($full_field, $value); - $args->{term} = join(" AND ", @$list); + my @terms = $self->_word_terms($args); + $args->{term} = join("\n\tAND ", @terms); } sub _nowords { diff --git a/xt/lib/Bugzilla/Test/Search/Constants.pm b/xt/lib/Bugzilla/Test/Search/Constants.pm index b03ed30db..248b59f53 100644 --- a/xt/lib/Bugzilla/Test/Search/Constants.pm +++ b/xt/lib/Bugzilla/Test/Search/Constants.pm @@ -374,7 +374,6 @@ use constant KNOWN_BROKEN => { # Same for attach_data.thedata. 'allwords-<1>' => { ALLWORDS_BROKEN, - 'attach_data.thedata' => { contains => [1] }, 'flagtypes.name' => { contains => [1] }, }, @@ -384,19 +383,16 @@ use constant KNOWN_BROKEN => { # attachments. nowords => { NOWORDS_BROKEN, - 'attach_data.thedata' => { contains => [1,5] }, }, # anywords searches don't work on decimal values. # attach_data doesn't work (perhaps because it's the entire # data, or some problem with the regex?). anywords => { - 'attach_data.thedata' => { contains => [1] }, work_time => { contains => [1] }, }, 'anywords-<1> <2>' => { percentage_complete => { contains => [2] }, - 'attach_data.thedata' => { contains => [1,2] }, work_time => { contains => [1,2] }, }, @@ -477,11 +473,8 @@ use constant KNOWN_BROKEN => { # where MySQL isn't, but the result is still a bit surprising to the user. use constant PG_BROKEN => { 'attach_data.thedata' => { - allwords => { }, - allwordssubstr => { }, - anywords => { }, - notregexp => { contains => [5] }, - nowords => { contains => [5] }, + notregexp => { contains => [5] }, + nowords => { contains => [5] }, }, percentage_complete => { 'allwordssubstr-<1>' => { contains => [3] }, @@ -562,7 +555,6 @@ use constant NEGATIVE_BROKEN_NOT => ( use constant BROKEN_NOT => { allwords => { COMMON_BROKEN_NOT, - "attach_data.thedata" => { contains => [1,5] }, bug_group => { contains => [1] }, cc => { contains => [1] }, "flagtypes.name" => { contains => [1,5] }, @@ -617,13 +609,12 @@ use constant BROKEN_NOT => { }, anywords => { COMMON_BROKEN_NOT, - "attach_data.thedata" => { contains => [1, 5] }, "work_time" => { contains => [1, 2] }, "work_time" => { contains => [1] }, FIELD_TYPE_MULTI_SELECT, { contains => [5] }, }, 'anywords-<1> <2>' => { - 'attach_data.thedata' => { contains => [1,2,5] }, + 'attach_data.thedata' => { contains => [5] }, "percentage_complete" => { contains => [1,3,4,5] }, work_time => { contains => [1,2] }, }, @@ -727,7 +718,6 @@ use constant BROKEN_NOT => { notsubstring => { NEGATIVE_BROKEN_NOT }, nowords => { NEGATIVE_BROKEN_NOT, - "attach_data.thedata" => { contains => [1] }, "work_time" => { contains => [2, 3, 4] }, "cc" => { contains => [5] }, "flagtypes.name" => { }, |