summaryrefslogtreecommitdiffstats
path: root/Bugzilla/Search
diff options
context:
space:
mode:
authorFrédéric Buclin <LpSolit@gmail.com>2012-03-29 19:50:02 +0200
committerFrédéric Buclin <LpSolit@gmail.com>2012-03-29 19:50:02 +0200
commitf21583b6ab8b870aaa3f8be48426d4e0f6a7ae48 (patch)
treed70984b1fbb9dc6d3576d0b0e57781fff1146c20 /Bugzilla/Search
parentb7fb96faa4f99f335695c11fbeed9f3a8bc78aa9 (diff)
downloadbugzilla-f21583b6ab8b870aaa3f8be48426d4e0f6a7ae48.tar.gz
bugzilla-f21583b6ab8b870aaa3f8be48426d4e0f6a7ae48.tar.xz
Bug 554819: Quicksearch should be using Text::ParseWords instead of custom code in splitString
Also fixes QS with accented characters (bug 730207) r=dkl a=LpSolit
Diffstat (limited to 'Bugzilla/Search')
-rw-r--r--Bugzilla/Search/Quicksearch.pm173
1 files changed, 94 insertions, 79 deletions
diff --git a/Bugzilla/Search/Quicksearch.pm b/Bugzilla/Search/Quicksearch.pm
index 01e475463..cd4deb94c 100644
--- a/Bugzilla/Search/Quicksearch.pm
+++ b/Bugzilla/Search/Quicksearch.pm
@@ -19,6 +19,7 @@ use Bugzilla::Util;
use List::Util qw(min max);
use List::MoreUtils qw(firstidx);
+use Text::ParseWords qw(parse_line);
use base qw(Exporter);
@Bugzilla::Search::Quicksearch::EXPORT = qw(quicksearch);
@@ -129,54 +130,95 @@ sub quicksearch {
$searchstring =~ s/(^[\s,]+|[\s,]+$)//g;
ThrowUserError('buglist_parameters_required') unless ($searchstring);
- $fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0;
-
if ($searchstring =~ m/^[0-9,\s]*$/) {
_bug_numbers_only($searchstring);
}
else {
_handle_alias($searchstring);
- # Globally translate " AND ", " OR ", " NOT " to space, pipe, dash.
- $searchstring =~ s/\s+AND\s+/ /g;
- $searchstring =~ s/\s+OR\s+/|/g;
- $searchstring =~ s/\s+NOT\s+/ -/g;
+ # Retain backslashes and quotes, to know which strings are quoted,
+ # and which ones are not.
+ my @words = parse_line('\s+', 1, $searchstring);
+ # If parse_line() returns no data, this means strings are badly quoted.
+ # Rather than trying to guess what the user wanted to do, we throw an error.
+ scalar(@words)
+ || ThrowUserError('quicksearch_unbalanced_quotes', {string => $searchstring});
+
+ # A query cannot start with AND or OR, nor can it end with AND, OR or NOT.
+ ThrowUserError('quicksearch_invalid_query')
+ if ($words[0] =~ /^(?:AND|OR)$/ || $words[$#words] =~ /^(?:AND|OR|NOT)$/);
+
+ my (@qswords, @or_group);
+ while (scalar @words) {
+ my $word = shift @words;
+ # AND is the default word separator, similar to a whitespace,
+ # but |a AND OR b| is not a valid combination.
+ if ($word eq 'AND') {
+ ThrowUserError('quicksearch_invalid_query', {operators => ['AND', 'OR']})
+ if $words[0] eq 'OR';
+ }
+ # |a OR AND b| is not a valid combination.
+ # |a OR OR b| is equivalent to |a OR b| and so is harmless.
+ elsif ($word eq 'OR') {
+ ThrowUserError('quicksearch_invalid_query', {operators => ['OR', 'AND']})
+ if $words[0] eq 'AND';
+ }
+ # NOT negates the following word.
+ # |NOT AND| and |NOT OR| are not valid combinations.
+ # |NOT NOT| is fine but has no effect as they cancel themselves.
+ elsif ($word eq 'NOT') {
+ $word = shift @words;
+ next if $word eq 'NOT';
+ if ($word eq 'AND' || $word eq 'OR') {
+ ThrowUserError('quicksearch_invalid_query', {operators => ['NOT', $word]});
+ }
+ unshift(@words, "-$word");
+ }
+ else {
+ # OR groups words together, as OR has higher precedence than AND.
+ push(@or_group, $word);
+ # If the next word is not OR, then we are not in a OR group,
+ # or we are leaving it.
+ if (!defined $words[0] || $words[0] ne 'OR') {
+ push(@qswords, join('|', @or_group));
+ @or_group = ();
+ }
+ }
+ }
- my @words = splitString($searchstring);
- _handle_status_and_resolution(\@words);
+ _handle_status_and_resolution(\@qswords);
my (@unknownFields, %ambiguous_fields);
+ $fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0;
# Loop over all main-level QuickSearch words.
- foreach my $qsword (@words) {
- my $negate = substr($qsword, 0, 1) eq '-';
- if ($negate) {
- $qsword = substr($qsword, 1);
- }
+ foreach my $qsword (@qswords) {
+ my @or_operand = parse_line('\|', 1, $qsword);
+ foreach my $term (@or_operand) {
+ my $negate = substr($term, 0, 1) eq '-';
+ if ($negate) {
+ $term = substr($term, 1);
+ }
- # No special first char
- if (!_handle_special_first_chars($qsword, $negate)) {
- # Split by '|' to get all operands for a boolean OR.
- foreach my $or_operand (split(/\|/, $qsword)) {
- if (!_handle_field_names($or_operand, $negate,
- \@unknownFields,
- \%ambiguous_fields))
- {
- # Having ruled out the special cases, we may now split
- # by comma, which is another legal boolean OR indicator.
- foreach my $word (split(/,/, $or_operand)) {
- if (!_special_field_syntax($word, $negate)) {
- _default_quicksearch_word($word, $negate);
- }
- _handle_urls($word, $negate);
- }
+ next if _handle_special_first_chars($term, $negate);
+ next if _handle_field_names($term, $negate, \@unknownFields,
+ \%ambiguous_fields);
+
+ # Having ruled out the special cases, we may now split
+ # by comma, which is another legal boolean OR indicator.
+ # Remove quotes from quoted words, if any.
+ @words = parse_line(',', 0, $term);
+ foreach my $word (@words) {
+ if (!_special_field_syntax($word, $negate)) {
+ _default_quicksearch_word($word, $negate);
}
+ _handle_urls($word, $negate);
}
}
$chart++;
$and = 0;
$or = 0;
- } # foreach (@words)
+ }
# Inform user about any unknown fields
if (scalar(@unknownFields) || scalar(keys %ambiguous_fields)) {
@@ -302,7 +344,7 @@ sub _handle_special_first_chars {
my $firstChar = substr($qsword, 0, 1);
my $baseWord = substr($qsword, 1);
- my @subWords = split(/[\|,]/, $baseWord);
+ my @subWords = split(/,/, $baseWord);
if ($firstChar eq '#') {
addChart('short_desc', 'substring', $baseWord, $negate);
@@ -334,7 +376,7 @@ sub _handle_special_first_chars {
sub _handle_field_names {
my ($or_operand, $negate, $unknownFields, $ambiguous_fields) = @_;
-
+
# Flag and requestee shortcut
if ($or_operand =~ /^(?:flag:)?([^\?]+\?)([^\?]*)$/) {
addChart('flagtypes.name', 'substring', $1, $negate);
@@ -342,32 +384,40 @@ sub _handle_field_names {
addChart('requestees.login_name', 'substring', $2, $negate);
return 1;
}
-
- # generic field1,field2,field3:value1,value2 notation
- if ($or_operand =~ /^([^:]+):([^:]+)$/) {
- my @fields = split(/,/, $1);
- my @values = split(/,/, $2);
+
+ # Generic field1,field2,field3:value1,value2 notation.
+ # We have to correctly ignore commas and colons in quotes.
+ my @field_values = parse_line(':', 1, $or_operand);
+ if (scalar @field_values == 2) {
+ my @fields = parse_line(',', 1, $field_values[0]);
+ my @values = parse_line(',', 1, $field_values[1]);
foreach my $field (@fields) {
my $translated = _translate_field_name($field);
# Skip and record any unknown fields
if (!defined $translated) {
push(@$unknownFields, $field);
- next;
}
# If we got back an array, that means the substring is
# ambiguous and could match more than field name
elsif (ref $translated) {
$ambiguous_fields->{$field} = $translated;
- next;
}
- foreach my $value (@values) {
- my $operator = FIELD_OPERATOR->{$translated} || 'substring';
- addChart($translated, $operator, $value, $negate);
+ else {
+ foreach my $value (@values) {
+ my $operator = FIELD_OPERATOR->{$translated} || 'substring';
+ # If the string was quoted to protect some special
+ # characters such as commas and colons, we need
+ # to remove quotes.
+ if ($value =~ /^(["'])(.+)\g1$/) {
+ $value = $2;
+ $value =~ s/\\(["'])/$1/g;
+ }
+ addChart($translated, $operator, $value, $negate);
+ }
}
}
return 1;
}
-
return 0;
}
@@ -500,41 +550,6 @@ sub _handle_urls {
# Helpers
###########################################################################
-# Split string on whitespace, retaining quoted strings as one
-sub splitString {
- my $string = shift;
- my @quoteparts;
- my @parts;
- my $i = 0;
-
- # Now split on quote sign; be tolerant about unclosed quotes
- @quoteparts = split(/"/, $string);
- foreach my $part (@quoteparts) {
- # After every odd quote, quote special chars
- if ($i++ %2) {
- $part = url_quote($part);
- # Protect the minus sign from being considered
- # as negation, in quotes.
- $part =~ s/(?<=^)\-/%2D/;
- }
- }
- # Join again
- $string = join('"', @quoteparts);
-
- # Now split on unescaped whitespace
- @parts = split(/\s+/, $string);
- foreach (@parts) {
- # Protect plus signs from becoming a blank.
- # If "+" appears as the first character, leave it alone
- # as it has a special meaning. Strings which start with
- # "+" must be quoted.
- s/(?<!^)\+/%2B/g;
- # Remove quotes
- s/"//g;
- }
- return @parts;
-}
-
# Quote and escape a phrase appropriately for a "content matches" search.
sub _matches_phrase {
my ($phrase) = @_;
@@ -600,7 +615,7 @@ sub makeChart {
my $cgi = Bugzilla->cgi;
$cgi->param("field$expr", $field);
$cgi->param("type$expr", $type);
- $cgi->param("value$expr", url_decode($value));
+ $cgi->param("value$expr", $value);
}
1;