summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Bugzilla/DB.pm15
-rw-r--r--Bugzilla/Search.pm120
-rw-r--r--xt/lib/Bugzilla/Test/Search/Constants.pm16
3 files changed, 81 insertions, 70 deletions
diff --git a/Bugzilla/DB.pm b/Bugzilla/DB.pm
index 31051dd8e..117c3a7b0 100644
--- a/Bugzilla/DB.pm
+++ b/Bugzilla/DB.pm
@@ -78,6 +78,21 @@ use constant ENUM_DEFAULTS => {
# Used by Bugzilla::Bug::possible_duplicates.
use constant FULLTEXT_OR => '';
+# These are used in regular expressions to mean "the start or end of a word".
+#
+# We don't use [[:<:]] and [[:>:]], even though they mean
+# "start and end of a word" and are supported by both MySQL and PostgreSQL,
+# because they don't work if your search starts or ends with a non-alphanumeric
+# character, and there's a fair chance somebody will want to use the "word"
+# search to search flags for something like "review+".
+#
+# We do use [:almum:] because it is supported by at least MySQL and
+# PostgreSQL, and hopefully will get us as much Unicode support as possible,
+# depending on how well the regexp engines of the various databases support
+# Unicode.
+use constant WORD_START => '(^|[^[:alnum:]])';
+use constant WORD_END => '($|[^[:alnum:]])';
+
#####################################################################
# Connection Methods
#####################################################################
diff --git a/Bugzilla/Search.pm b/Bugzilla/Search.pm
index 563e75901..80e8dce43 100644
--- a/Bugzilla/Search.pm
+++ b/Bugzilla/Search.pm
@@ -1050,7 +1050,9 @@ END
sub _sql_where {
my ($self, $where_terms) = @_;
- return join(' AND ', $self->_standard_where, @$where_terms);
+ # The newline and this particular spacing makes the resulting
+ # SQL a bit more readable for debugging.
+ return join("\n AND ", $self->_standard_where, @$where_terms);
}
################################
@@ -1676,48 +1678,7 @@ sub SqlifyDate {
sub build_subselect {
my ($outer, $inner, $table, $cond) = @_;
- my $q = "SELECT $inner FROM $table WHERE $cond";
- #return "$outer IN ($q)";
- my $dbh = Bugzilla->dbh;
- my $list = $dbh->selectcol_arrayref($q);
- return "1=2" unless @$list; # Could use boolean type on dbs which support it
- return $dbh->sql_in($outer, $list);}
-
-sub GetByWordList {
- my ($field, $strs) = (@_);
- my @list;
- my $dbh = Bugzilla->dbh;
- return [] unless defined $strs;
-
- foreach my $w (split(/[\s,]+/, $strs)) {
- my $word = $w;
- if ($word ne "") {
- $word =~ tr/A-Z/a-z/;
- $word = $dbh->quote('(^|[^a-z0-9])' . quotemeta($word) . '($|[^a-z0-9])');
- trick_taint($word);
- push(@list, $dbh->sql_regexp($field, $word));
- }
- }
-
- return \@list;
-}
-
-# Support for "any/all/nowordssubstr" comparison type ("words as substrings")
-sub GetByWordListSubstr {
- my ($field, $strs) = (@_);
- my @list;
- my $dbh = Bugzilla->dbh;
- my $sql_word;
-
- foreach my $word (split(/[\s,]+/, $strs)) {
- if ($word ne "") {
- $sql_word = $dbh->quote($word);
- trick_taint($sql_word);
- push(@list, $dbh->sql_iposition($sql_word, $field) . " > 0");
- }
- }
-
- return \@list;
+ return "$outer IN (SELECT $inner FROM $table WHERE $cond)";
}
sub pronoun {
@@ -1741,6 +1702,10 @@ sub pronoun {
return 0;
}
+# Used by anyexact to get the list of input values. This allows us to
+# support values with commas inside of them in the standard charts, and
+# still accept string values for the boolean charts (and split them on
+# commas).
sub _all_values {
my ($self, $args, $split_on) = @_;
$split_on ||= qr/[\s,]+/;
@@ -1764,6 +1729,50 @@ sub _all_values {
return @array;
}
+# Support for "any/all/nowordssubstr" comparison type ("words as substrings")
+sub _substring_terms {
+ my ($self, $args) = @_;
+ my $dbh = Bugzilla->dbh;
+
+ # We don't have to (or want to) use _all_values, because we'd just
+ # split each term on spaces and commas anyway.
+ my @words = split(/[\s,]+/, $args->{value});
+ @words = grep { defined $_ and $_ ne '' } @words;
+ @words = map { $dbh->quote($_) } @words;
+ my @terms = map { $dbh->sql_iposition($_, $args->{full_field}) . " > 0" }
+ @words;
+ return @terms;
+}
+
+sub _word_terms {
+ my ($self, $args) = @_;
+ my $dbh = Bugzilla->dbh;
+
+ my @values = split(/[\s,]+/, $args->{value});
+ @values = grep { defined $_ and $_ ne '' } @values;
+ my @substring_terms = $self->_substring_terms($args);
+
+ my @terms;
+ my $start = $dbh->WORD_START;
+ my $end = $dbh->WORD_END;
+ foreach my $word (@values) {
+ my $regex = $start . quotemeta($word) . $end;
+ my $quoted = $dbh->quote($regex);
+ # We don't have to check the regexp, because we escaped it, so we're
+ # sure it's valid.
+ my $regex_term = $dbh->sql_regexp($args->{full_field}, $quoted,
+ 'no check');
+ # Regular expressions are slow--substring searches are faster.
+ # If we're searching for a word, we're also certain that the
+ # substring will appear in the value. So we limit first by
+ # substring and then by a regex that will match just words.
+ my $substring_term = shift @substring_terms;
+ push(@terms, "$substring_term AND $regex_term");
+ }
+
+ return @terms;
+}
+
######################
# Public Subroutines #
######################
@@ -2338,9 +2347,6 @@ sub _flagtypes_name {
};
push(@$joins, $flagtypes_join);
- # Generate the condition by running the operator-specific
- # function. Afterwards the condition resides in the $args->{term}
- # variable.
my $full_field = $dbh->sql_string_concat("$flagtypes.name",
"$flags.status");
$args->{full_field} = $full_field;
@@ -2710,16 +2716,15 @@ sub _anywordsubstr {
my ($self, $args) = @_;
my ($full_field, $value) = @$args{qw(full_field value)};
- my $list = GetByWordListSubstr($full_field, $value);
- $args->{term} = join(" OR ", @$list);
+ my @terms = $self->_substring_terms($args);
+ $args->{term} = join("\n\tOR ", @terms);
}
sub _allwordssubstr {
my ($self, $args) = @_;
- my ($full_field, $value) = @$args{qw(full_field value)};
- my $list = GetByWordListSubstr($full_field, $value);
- $args->{term} = join(" AND ", @$list);
+ my @terms = $self->_substring_terms($args);
+ $args->{term} = join("\n\tAND ", @terms);
}
sub _nowordssubstr {
@@ -2731,18 +2736,19 @@ sub _nowordssubstr {
sub _anywords {
my ($self, $args) = @_;
- my ($full_field, $value) = @$args{qw(full_field value)};
- my $list = GetByWordList($full_field, $value);
- $args->{term} = join(" OR ", @$list);
+ my @terms = $self->_word_terms($args);
+ # Because _word_terms uses AND, we need to parenthesize its terms
+ # if there are more than one.
+ @terms = map("($_)", @terms) if scalar(@terms) > 1;
+ $args->{term} = join("\n\tOR ", @terms);
}
sub _allwords {
my ($self, $args) = @_;
- my ($full_field, $value) = @$args{qw(full_field value)};
- my $list = GetByWordList($full_field, $value);
- $args->{term} = join(" AND ", @$list);
+ my @terms = $self->_word_terms($args);
+ $args->{term} = join("\n\tAND ", @terms);
}
sub _nowords {
diff --git a/xt/lib/Bugzilla/Test/Search/Constants.pm b/xt/lib/Bugzilla/Test/Search/Constants.pm
index b03ed30db..248b59f53 100644
--- a/xt/lib/Bugzilla/Test/Search/Constants.pm
+++ b/xt/lib/Bugzilla/Test/Search/Constants.pm
@@ -374,7 +374,6 @@ use constant KNOWN_BROKEN => {
# Same for attach_data.thedata.
'allwords-<1>' => {
ALLWORDS_BROKEN,
- 'attach_data.thedata' => { contains => [1] },
'flagtypes.name' => { contains => [1] },
},
@@ -384,19 +383,16 @@ use constant KNOWN_BROKEN => {
# attachments.
nowords => {
NOWORDS_BROKEN,
- 'attach_data.thedata' => { contains => [1,5] },
},
# anywords searches don't work on decimal values.
# attach_data doesn't work (perhaps because it's the entire
# data, or some problem with the regex?).
anywords => {
- 'attach_data.thedata' => { contains => [1] },
work_time => { contains => [1] },
},
'anywords-<1> <2>' => {
percentage_complete => { contains => [2] },
- 'attach_data.thedata' => { contains => [1,2] },
work_time => { contains => [1,2] },
},
@@ -477,11 +473,8 @@ use constant KNOWN_BROKEN => {
# where MySQL isn't, but the result is still a bit surprising to the user.
use constant PG_BROKEN => {
'attach_data.thedata' => {
- allwords => { },
- allwordssubstr => { },
- anywords => { },
- notregexp => { contains => [5] },
- nowords => { contains => [5] },
+ notregexp => { contains => [5] },
+ nowords => { contains => [5] },
},
percentage_complete => {
'allwordssubstr-<1>' => { contains => [3] },
@@ -562,7 +555,6 @@ use constant NEGATIVE_BROKEN_NOT => (
use constant BROKEN_NOT => {
allwords => {
COMMON_BROKEN_NOT,
- "attach_data.thedata" => { contains => [1,5] },
bug_group => { contains => [1] },
cc => { contains => [1] },
"flagtypes.name" => { contains => [1,5] },
@@ -617,13 +609,12 @@ use constant BROKEN_NOT => {
},
anywords => {
COMMON_BROKEN_NOT,
- "attach_data.thedata" => { contains => [1, 5] },
"work_time" => { contains => [1, 2] },
"work_time" => { contains => [1] },
FIELD_TYPE_MULTI_SELECT, { contains => [5] },
},
'anywords-<1> <2>' => {
- 'attach_data.thedata' => { contains => [1,2,5] },
+ 'attach_data.thedata' => { contains => [5] },
"percentage_complete" => { contains => [1,3,4,5] },
work_time => { contains => [1,2] },
},
@@ -727,7 +718,6 @@ use constant BROKEN_NOT => {
notsubstring => { NEGATIVE_BROKEN_NOT },
nowords => {
NEGATIVE_BROKEN_NOT,
- "attach_data.thedata" => { contains => [1] },
"work_time" => { contains => [2, 3, 4] },
"cc" => { contains => [5] },
"flagtypes.name" => { },