From 8f4fd271de4edbf1a206dfa0a98f8276f6189709 Mon Sep 17 00:00:00 2001 From: Frédéric Buclin Date: Thu, 29 Mar 2012 19:56:41 +0200 Subject: Bug 554819: Quicksearch should be using Text::ParseWords instead of custom code in splitString Also fixes QS with accented characters (bug 730207) r=dkl a=LpSolit --- Bugzilla/Search/Quicksearch.pm | 173 +++++++++++++----------- Bugzilla/Util.pm | 17 +-- template/en/default/global/user-error.html.tmpl | 21 +++ template/en/default/pages/quicksearch.html.tmpl | 83 ++++++++++-- 4 files changed, 191 insertions(+), 103 deletions(-) diff --git a/Bugzilla/Search/Quicksearch.pm b/Bugzilla/Search/Quicksearch.pm index 8425a2be2..7b951d5ae 100644 --- a/Bugzilla/Search/Quicksearch.pm +++ b/Bugzilla/Search/Quicksearch.pm @@ -32,6 +32,7 @@ use Bugzilla::Util; use List::Util qw(min max); use List::MoreUtils qw(firstidx); +use Text::ParseWords qw(parse_line); use base qw(Exporter); @Bugzilla::Search::Quicksearch::EXPORT = qw(quicksearch); @@ -142,54 +143,95 @@ sub quicksearch { $searchstring =~ s/(^[\s,]+|[\s,]+$)//g; ThrowUserError('buglist_parameters_required') unless ($searchstring); - $fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0; - if ($searchstring =~ m/^[0-9,\s]*$/) { _bug_numbers_only($searchstring); } else { _handle_alias($searchstring); - # Globally translate " AND ", " OR ", " NOT " to space, pipe, dash. - $searchstring =~ s/\s+AND\s+/ /g; - $searchstring =~ s/\s+OR\s+/|/g; - $searchstring =~ s/\s+NOT\s+/ -/g; + # Retain backslashes and quotes, to know which strings are quoted, + # and which ones are not. + my @words = parse_line('\s+', 1, $searchstring); + # If parse_line() returns no data, this means strings are badly quoted. + # Rather than trying to guess what the user wanted to do, we throw an error. + scalar(@words) + || ThrowUserError('quicksearch_unbalanced_quotes', {string => $searchstring}); + + # A query cannot start with AND or OR, nor can it end with AND, OR or NOT. + ThrowUserError('quicksearch_invalid_query') + if ($words[0] =~ /^(?:AND|OR)$/ || $words[$#words] =~ /^(?:AND|OR|NOT)$/); + + my (@qswords, @or_group); + while (scalar @words) { + my $word = shift @words; + # AND is the default word separator, similar to a whitespace, + # but |a AND OR b| is not a valid combination. + if ($word eq 'AND') { + ThrowUserError('quicksearch_invalid_query', {operators => ['AND', 'OR']}) + if $words[0] eq 'OR'; + } + # |a OR AND b| is not a valid combination. + # |a OR OR b| is equivalent to |a OR b| and so is harmless. + elsif ($word eq 'OR') { + ThrowUserError('quicksearch_invalid_query', {operators => ['OR', 'AND']}) + if $words[0] eq 'AND'; + } + # NOT negates the following word. + # |NOT AND| and |NOT OR| are not valid combinations. + # |NOT NOT| is fine but has no effect as they cancel themselves. + elsif ($word eq 'NOT') { + $word = shift @words; + next if $word eq 'NOT'; + if ($word eq 'AND' || $word eq 'OR') { + ThrowUserError('quicksearch_invalid_query', {operators => ['NOT', $word]}); + } + unshift(@words, "-$word"); + } + else { + # OR groups words together, as OR has higher precedence than AND. + push(@or_group, $word); + # If the next word is not OR, then we are not in a OR group, + # or we are leaving it. + if (!defined $words[0] || $words[0] ne 'OR') { + push(@qswords, join('|', @or_group)); + @or_group = (); + } + } + } - my @words = splitString($searchstring); - _handle_status_and_resolution(\@words); + _handle_status_and_resolution(\@qswords); my (@unknownFields, %ambiguous_fields); + $fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0; # Loop over all main-level QuickSearch words. - foreach my $qsword (@words) { - my $negate = substr($qsword, 0, 1) eq '-'; - if ($negate) { - $qsword = substr($qsword, 1); - } + foreach my $qsword (@qswords) { + my @or_operand = parse_line('\|', 1, $qsword); + foreach my $term (@or_operand) { + my $negate = substr($term, 0, 1) eq '-'; + if ($negate) { + $term = substr($term, 1); + } - # No special first char - if (!_handle_special_first_chars($qsword, $negate)) { - # Split by '|' to get all operands for a boolean OR. - foreach my $or_operand (split(/\|/, $qsword)) { - if (!_handle_field_names($or_operand, $negate, - \@unknownFields, - \%ambiguous_fields)) - { - # Having ruled out the special cases, we may now split - # by comma, which is another legal boolean OR indicator. - foreach my $word (split(/,/, $or_operand)) { - if (!_special_field_syntax($word, $negate)) { - _default_quicksearch_word($word, $negate); - } - _handle_urls($word, $negate); - } + next if _handle_special_first_chars($term, $negate); + next if _handle_field_names($term, $negate, \@unknownFields, + \%ambiguous_fields); + + # Having ruled out the special cases, we may now split + # by comma, which is another legal boolean OR indicator. + # Remove quotes from quoted words, if any. + @words = parse_line(',', 0, $term); + foreach my $word (@words) { + if (!_special_field_syntax($word, $negate)) { + _default_quicksearch_word($word, $negate); } + _handle_urls($word, $negate); } } $chart++; $and = 0; $or = 0; - } # foreach (@words) + } # Inform user about any unknown fields if (scalar(@unknownFields) || scalar(keys %ambiguous_fields)) { @@ -315,7 +357,7 @@ sub _handle_special_first_chars { my $firstChar = substr($qsword, 0, 1); my $baseWord = substr($qsword, 1); - my @subWords = split(/[\|,]/, $baseWord); + my @subWords = split(/,/, $baseWord); if ($firstChar eq '#') { addChart('short_desc', 'substring', $baseWord, $negate); @@ -347,7 +389,7 @@ sub _handle_special_first_chars { sub _handle_field_names { my ($or_operand, $negate, $unknownFields, $ambiguous_fields) = @_; - + # Flag and requestee shortcut if ($or_operand =~ /^(?:flag:)?([^\?]+\?)([^\?]*)$/) { addChart('flagtypes.name', 'substring', $1, $negate); @@ -355,32 +397,40 @@ sub _handle_field_names { addChart('requestees.login_name', 'substring', $2, $negate); return 1; } - - # generic field1,field2,field3:value1,value2 notation - if ($or_operand =~ /^([^:]+):([^:]+)$/) { - my @fields = split(/,/, $1); - my @values = split(/,/, $2); + + # Generic field1,field2,field3:value1,value2 notation. + # We have to correctly ignore commas and colons in quotes. + my @field_values = parse_line(':', 1, $or_operand); + if (scalar @field_values == 2) { + my @fields = parse_line(',', 1, $field_values[0]); + my @values = parse_line(',', 1, $field_values[1]); foreach my $field (@fields) { my $translated = _translate_field_name($field); # Skip and record any unknown fields if (!defined $translated) { push(@$unknownFields, $field); - next; } # If we got back an array, that means the substring is # ambiguous and could match more than field name elsif (ref $translated) { $ambiguous_fields->{$field} = $translated; - next; } - foreach my $value (@values) { - my $operator = FIELD_OPERATOR->{$translated} || 'substring'; - addChart($translated, $operator, $value, $negate); + else { + foreach my $value (@values) { + my $operator = FIELD_OPERATOR->{$translated} || 'substring'; + # If the string was quoted to protect some special + # characters such as commas and colons, we need + # to remove quotes. + if ($value =~ /^(["'])(.+)\g1$/) { + $value = $2; + $value =~ s/\\(["'])/$1/g; + } + addChart($translated, $operator, $value, $negate); + } } } return 1; } - return 0; } @@ -513,41 +563,6 @@ sub _handle_urls { # Helpers ########################################################################### -# Split string on whitespace, retaining quoted strings as one -sub splitString { - my $string = shift; - my @quoteparts; - my @parts; - my $i = 0; - - # Now split on quote sign; be tolerant about unclosed quotes - @quoteparts = split(/"/, $string); - foreach my $part (@quoteparts) { - # After every odd quote, quote special chars - if ($i++ %2) { - $part = url_quote($part); - # Protect the minus sign from being considered - # as negation, in quotes. - $part =~ s/(?<=^)\-/%2D/; - } - } - # Join again - $string = join('"', @quoteparts); - - # Now split on unescaped whitespace - @parts = split(/\s+/, $string); - foreach (@parts) { - # Protect plus signs from becoming a blank. - # If "+" appears as the first character, leave it alone - # as it has a special meaning. Strings which start with - # "+" must be quoted. - s/(?cgi; $cgi->param("field$expr", $field); $cgi->param("type$expr", $type); - $cgi->param("value$expr", url_decode($value)); + $cgi->param("value$expr", $value); } 1; diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm index 6d8622e04..7ecaddc88 100644 --- a/Bugzilla/Util.pm +++ b/Bugzilla/Util.pm @@ -34,7 +34,7 @@ use base qw(Exporter); @Bugzilla::Util::EXPORT = qw(trick_taint detaint_natural detaint_signed html_quote url_quote xml_quote - css_class_quote html_light_quote url_decode + css_class_quote html_light_quote i_am_cgi correct_urlbase remote_ip do_ssl_redirect_if_required use_attachbase diff_arrays on_main_db @@ -243,14 +243,6 @@ sub xml_quote { return $var; } -sub url_decode { - my ($todecode) = (@_); - $todecode =~ tr/+/ /; # pluses become spaces - $todecode =~ s/%([0-9a-fA-F]{2})/pack("c",hex($1))/ge; - utf8::decode($todecode) if Bugzilla->params->{'utf8'}; - return $todecode; -} - sub i_am_cgi { # I use SERVER_SOFTWARE because it's required to be # defined for all requests in the CGI spec. @@ -756,9 +748,6 @@ Bugzilla::Util - Generic utility functions for bugzilla xml_quote($var); email_filter($var); - # Functions for decoding - $rv = url_decode($var); - # Functions that tell you about your environment my $is_cgi = i_am_cgi(); my $urlbase = correct_urlbase(); @@ -870,10 +859,6 @@ This is similar to C, except that ' is escaped to '. This is kept separate from html_quote partly for compatibility with previous code (for ') and partly for future handling of non-ASCII characters. -=item C - -Converts the %xx encoding from the given URL back to its original form. - =item C Removes the hostname from email addresses in the string, if the user diff --git a/template/en/default/global/user-error.html.tmpl b/template/en/default/global/user-error.html.tmpl index feff9f042..3d1ac5c53 100644 --- a/template/en/default/global/user-error.html.tmpl +++ b/template/en/default/global/user-error.html.tmpl @@ -1448,6 +1448,27 @@ The name of the query must be less than [% constants.MAX_LEN_QUERY_NAME FILTER html %] characters long. + [% ELSIF error == "quicksearch_invalid_query" %] + [% title = "Invalid Query" %] + Your query is invalid. + [% IF operators %] + The [% operators.shift FILTER html %] operator cannot be followed by + [%+ operators.shift FILTER html %]. + [% ELSE %] + A query cannot start with AND or OR, nor can it end with AND, OR or NOT. + They are reserved operators and must be quoted if you want to look for + these strings. + [% END %] + + [% ELSIF error == "quicksearch_unbalanced_quotes" %] + [% title = "Badly Formatted Query" %] + [% terms.Bugzilla %] is unable to parse your query correctly: + [% string FILTER html %].
+ If you use quotes to enclose strings, make sure both quotes are present. + If you really want to look for a quote in a string, type \" instead of ". + For instance, "I'm so \"special\", really" (with quotes) will be + interpreted as I'm so "special", really. + [% ELSIF error == "quicksearch_unknown_field" %] [% title = "QuickSearch Error" %] There is a problem with your search: diff --git a/template/en/default/pages/quicksearch.html.tmpl b/template/en/default/pages/quicksearch.html.tmpl index e6398eade..901f05467 100644 --- a/template/en/default/pages/quicksearch.html.tmpl +++ b/template/en/default/pages/quicksearch.html.tmpl @@ -38,7 +38,16 @@ -

The Basics

+ + +

The Basics

  • If you just put a word or series of words in the search box, @@ -89,8 +98,32 @@ any of those values will be searched for.
-

You may also want to read up on the Advanced - Features.

+

Examples of Simple Queries

+ +

Here are some examples of how to write some simple queries. + Examples for more complex queries can be + found lower in this page.

+ +
    +
  • All open [% terms.bugs %] where userA@company.com is in the CC list + (no need to mention open [% terms.bugs %], this is the default):
    + cc:userA@company.com
  • +
  • All unconfirmed [% terms.bugs %] in product productA (putting the + [%+ terms.bug %] status at the first position make it being automagically + considered as [% terms.abug %] status):
    + UNCONFIRMED product:productA +
  • All open and closed [% terms.bugs %] reported by userB@company.com + (we must specify ALL as the first word, else only open [% terms.bugs %] + are taken into account):
    + ALL reporter:userB@company.com +
  • All open [% terms.bugs %] with severity blocker or critical with the + target milestone set to 2.5:
    + severity:blocker,critical milestone:2.5 +
  • All open [% terms.bugs %] in the component Research & Development + with priority P1 or P2 (we must use quotes for the component as its name + contains whitespaces):
    + component:"Research & Development" priority:P1,P2
  • +

Fields You Can Search On

@@ -143,15 +176,18 @@ -

Advanced Features

+

Advanced Features

  • If you want to search for a phrase or something that - contains spaces, you can put it in quotes, like: - "this is a phrase". You can also use quotes to search for + contains spaces, commas, colons or quotes, you must put it in quotes, like: + "yes, this is a phrase". You must also use quotes to search for characters that would otherwise be interpreted specially by quicksearch. - For example, "this|thing" would search for the literal phrase - this|thing.
  • + For example, "this|that" would search for the literal string + this|that and would not be parsed as "this OR that". + Also, "-field:value" would search for the literal phrase + -field:value and would not be parsed as + "NOT field:value".
  • You can use AND, NOT, and OR in searches. @@ -181,6 +217,12 @@
+

You cannot use | nor OR to enumerate possible values for a given field. + You must use commas instead. So field:value1,value2 does what + you expect, but field:value1|value2 would be treated as + field:value1 OR value2, which means value2 is not bound to + the given field.

+

OR has higher precedence than AND; AND is the top level operation. For example:

Searching for url|location bar|field -focus means @@ -271,4 +313,29 @@ +

Examples of Complex Queries

+ +

It is pretty easy to write rather complex queries without too much effort. + For very complex queries, you have to use the + Advanced Search form.

+ +
    +
  • All [% terms.bugs %] reported by userA@company.com or assigned to him + (the initial @ is a shortcut for the assignee, see the + Advanced Shortcuts section above):
    + ALL @userA@company.com OR reporter:userA@company.com
  • +
  • All open [% terms.bugs %] in product productA with either severity + blocker, critical or major, or with priority P1, or with the blocker+ + flag set, and which are neither assigned to userB@company.com nor to + userC@company.com (we make the assumption that there are only two users + matching userB and userC, else we would write the whole login name):
    + :productA sev:blocker,critical,major OR pri:P1 OR flag:blocker+ -assign:userB,userC
  • +
  • All FIXED [% terms.bugs %] with the blocker+ flag set, but without + the approval+ nor approval? flags set:
    + FIXED flag:blocker+ -flag:approval+ -flag:approval?
  • +
  • [% terms.Bugs %] with That's a "unusual" issue in the + [%+ terms.bug %] summary (double quotes are escaped using \"):
    + summary:"That's a \"unusual\" issue"
  • +
+ [% PROCESS global/footer.html.tmpl %] -- cgit v1.2.3-24-g4f1b