diff options
author | Frédéric Buclin <LpSolit@gmail.com> | 2012-03-29 19:56:41 +0200 |
---|---|---|
committer | Frédéric Buclin <LpSolit@gmail.com> | 2012-03-29 19:56:41 +0200 |
commit | 8f4fd271de4edbf1a206dfa0a98f8276f6189709 (patch) | |
tree | c7f97377b18dd5b1845c80fffe79c0b413b23863 | |
parent | c89d334af4b6d288f6294cfe8acfa939744fcec6 (diff) | |
download | bugzilla-8f4fd271de4edbf1a206dfa0a98f8276f6189709.tar.gz bugzilla-8f4fd271de4edbf1a206dfa0a98f8276f6189709.tar.xz |
Bug 554819: Quicksearch should be using Text::ParseWords instead of custom code in splitString
Also fixes QS with accented characters (bug 730207)
r=dkl a=LpSolit
-rw-r--r-- | Bugzilla/Search/Quicksearch.pm | 173 | ||||
-rw-r--r-- | Bugzilla/Util.pm | 17 | ||||
-rw-r--r-- | template/en/default/global/user-error.html.tmpl | 21 | ||||
-rw-r--r-- | template/en/default/pages/quicksearch.html.tmpl | 83 |
4 files changed, 191 insertions, 103 deletions
diff --git a/Bugzilla/Search/Quicksearch.pm b/Bugzilla/Search/Quicksearch.pm index 8425a2be2..7b951d5ae 100644 --- a/Bugzilla/Search/Quicksearch.pm +++ b/Bugzilla/Search/Quicksearch.pm @@ -32,6 +32,7 @@ use Bugzilla::Util; use List::Util qw(min max); use List::MoreUtils qw(firstidx); +use Text::ParseWords qw(parse_line); use base qw(Exporter); @Bugzilla::Search::Quicksearch::EXPORT = qw(quicksearch); @@ -142,54 +143,95 @@ sub quicksearch { $searchstring =~ s/(^[\s,]+|[\s,]+$)//g; ThrowUserError('buglist_parameters_required') unless ($searchstring); - $fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0; - if ($searchstring =~ m/^[0-9,\s]*$/) { _bug_numbers_only($searchstring); } else { _handle_alias($searchstring); - # Globally translate " AND ", " OR ", " NOT " to space, pipe, dash. - $searchstring =~ s/\s+AND\s+/ /g; - $searchstring =~ s/\s+OR\s+/|/g; - $searchstring =~ s/\s+NOT\s+/ -/g; + # Retain backslashes and quotes, to know which strings are quoted, + # and which ones are not. + my @words = parse_line('\s+', 1, $searchstring); + # If parse_line() returns no data, this means strings are badly quoted. + # Rather than trying to guess what the user wanted to do, we throw an error. + scalar(@words) + || ThrowUserError('quicksearch_unbalanced_quotes', {string => $searchstring}); + + # A query cannot start with AND or OR, nor can it end with AND, OR or NOT. + ThrowUserError('quicksearch_invalid_query') + if ($words[0] =~ /^(?:AND|OR)$/ || $words[$#words] =~ /^(?:AND|OR|NOT)$/); + + my (@qswords, @or_group); + while (scalar @words) { + my $word = shift @words; + # AND is the default word separator, similar to a whitespace, + # but |a AND OR b| is not a valid combination. + if ($word eq 'AND') { + ThrowUserError('quicksearch_invalid_query', {operators => ['AND', 'OR']}) + if $words[0] eq 'OR'; + } + # |a OR AND b| is not a valid combination. + # |a OR OR b| is equivalent to |a OR b| and so is harmless. + elsif ($word eq 'OR') { + ThrowUserError('quicksearch_invalid_query', {operators => ['OR', 'AND']}) + if $words[0] eq 'AND'; + } + # NOT negates the following word. + # |NOT AND| and |NOT OR| are not valid combinations. + # |NOT NOT| is fine but has no effect as they cancel themselves. + elsif ($word eq 'NOT') { + $word = shift @words; + next if $word eq 'NOT'; + if ($word eq 'AND' || $word eq 'OR') { + ThrowUserError('quicksearch_invalid_query', {operators => ['NOT', $word]}); + } + unshift(@words, "-$word"); + } + else { + # OR groups words together, as OR has higher precedence than AND. + push(@or_group, $word); + # If the next word is not OR, then we are not in a OR group, + # or we are leaving it. + if (!defined $words[0] || $words[0] ne 'OR') { + push(@qswords, join('|', @or_group)); + @or_group = (); + } + } + } - my @words = splitString($searchstring); - _handle_status_and_resolution(\@words); + _handle_status_and_resolution(\@qswords); my (@unknownFields, %ambiguous_fields); + $fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0; # Loop over all main-level QuickSearch words. - foreach my $qsword (@words) { - my $negate = substr($qsword, 0, 1) eq '-'; - if ($negate) { - $qsword = substr($qsword, 1); - } + foreach my $qsword (@qswords) { + my @or_operand = parse_line('\|', 1, $qsword); + foreach my $term (@or_operand) { + my $negate = substr($term, 0, 1) eq '-'; + if ($negate) { + $term = substr($term, 1); + } - # No special first char - if (!_handle_special_first_chars($qsword, $negate)) { - # Split by '|' to get all operands for a boolean OR. - foreach my $or_operand (split(/\|/, $qsword)) { - if (!_handle_field_names($or_operand, $negate, - \@unknownFields, - \%ambiguous_fields)) - { - # Having ruled out the special cases, we may now split - # by comma, which is another legal boolean OR indicator. - foreach my $word (split(/,/, $or_operand)) { - if (!_special_field_syntax($word, $negate)) { - _default_quicksearch_word($word, $negate); - } - _handle_urls($word, $negate); - } + next if _handle_special_first_chars($term, $negate); + next if _handle_field_names($term, $negate, \@unknownFields, + \%ambiguous_fields); + + # Having ruled out the special cases, we may now split + # by comma, which is another legal boolean OR indicator. + # Remove quotes from quoted words, if any. + @words = parse_line(',', 0, $term); + foreach my $word (@words) { + if (!_special_field_syntax($word, $negate)) { + _default_quicksearch_word($word, $negate); } + _handle_urls($word, $negate); } } $chart++; $and = 0; $or = 0; - } # foreach (@words) + } # Inform user about any unknown fields if (scalar(@unknownFields) || scalar(keys %ambiguous_fields)) { @@ -315,7 +357,7 @@ sub _handle_special_first_chars { my $firstChar = substr($qsword, 0, 1); my $baseWord = substr($qsword, 1); - my @subWords = split(/[\|,]/, $baseWord); + my @subWords = split(/,/, $baseWord); if ($firstChar eq '#') { addChart('short_desc', 'substring', $baseWord, $negate); @@ -347,7 +389,7 @@ sub _handle_special_first_chars { sub _handle_field_names { my ($or_operand, $negate, $unknownFields, $ambiguous_fields) = @_; - + # Flag and requestee shortcut if ($or_operand =~ /^(?:flag:)?([^\?]+\?)([^\?]*)$/) { addChart('flagtypes.name', 'substring', $1, $negate); @@ -355,32 +397,40 @@ sub _handle_field_names { addChart('requestees.login_name', 'substring', $2, $negate); return 1; } - - # generic field1,field2,field3:value1,value2 notation - if ($or_operand =~ /^([^:]+):([^:]+)$/) { - my @fields = split(/,/, $1); - my @values = split(/,/, $2); + + # Generic field1,field2,field3:value1,value2 notation. + # We have to correctly ignore commas and colons in quotes. + my @field_values = parse_line(':', 1, $or_operand); + if (scalar @field_values == 2) { + my @fields = parse_line(',', 1, $field_values[0]); + my @values = parse_line(',', 1, $field_values[1]); foreach my $field (@fields) { my $translated = _translate_field_name($field); # Skip and record any unknown fields if (!defined $translated) { push(@$unknownFields, $field); - next; } # If we got back an array, that means the substring is # ambiguous and could match more than field name elsif (ref $translated) { $ambiguous_fields->{$field} = $translated; - next; } - foreach my $value (@values) { - my $operator = FIELD_OPERATOR->{$translated} || 'substring'; - addChart($translated, $operator, $value, $negate); + else { + foreach my $value (@values) { + my $operator = FIELD_OPERATOR->{$translated} || 'substring'; + # If the string was quoted to protect some special + # characters such as commas and colons, we need + # to remove quotes. + if ($value =~ /^(["'])(.+)\g1$/) { + $value = $2; + $value =~ s/\\(["'])/$1/g; + } + addChart($translated, $operator, $value, $negate); + } } } return 1; } - return 0; } @@ -513,41 +563,6 @@ sub _handle_urls { # Helpers ########################################################################### -# Split string on whitespace, retaining quoted strings as one -sub splitString { - my $string = shift; - my @quoteparts; - my @parts; - my $i = 0; - - # Now split on quote sign; be tolerant about unclosed quotes - @quoteparts = split(/"/, $string); - foreach my $part (@quoteparts) { - # After every odd quote, quote special chars - if ($i++ %2) { - $part = url_quote($part); - # Protect the minus sign from being considered - # as negation, in quotes. - $part =~ s/(?<=^)\-/%2D/; - } - } - # Join again - $string = join('"', @quoteparts); - - # Now split on unescaped whitespace - @parts = split(/\s+/, $string); - foreach (@parts) { - # Protect plus signs from becoming a blank. - # If "+" appears as the first character, leave it alone - # as it has a special meaning. Strings which start with - # "+" must be quoted. - s/(?<!^)\+/%2B/g; - # Remove quotes - s/"//g; - } - return @parts; -} - # Quote and escape a phrase appropriately for a "content matches" search. sub _matches_phrase { my ($phrase) = @_; @@ -613,7 +628,7 @@ sub makeChart { my $cgi = Bugzilla->cgi; $cgi->param("field$expr", $field); $cgi->param("type$expr", $type); - $cgi->param("value$expr", url_decode($value)); + $cgi->param("value$expr", $value); } 1; diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm index 6d8622e04..7ecaddc88 100644 --- a/Bugzilla/Util.pm +++ b/Bugzilla/Util.pm @@ -34,7 +34,7 @@ use base qw(Exporter); @Bugzilla::Util::EXPORT = qw(trick_taint detaint_natural detaint_signed html_quote url_quote xml_quote - css_class_quote html_light_quote url_decode + css_class_quote html_light_quote i_am_cgi correct_urlbase remote_ip do_ssl_redirect_if_required use_attachbase diff_arrays on_main_db @@ -243,14 +243,6 @@ sub xml_quote { return $var; } -sub url_decode { - my ($todecode) = (@_); - $todecode =~ tr/+/ /; # pluses become spaces - $todecode =~ s/%([0-9a-fA-F]{2})/pack("c",hex($1))/ge; - utf8::decode($todecode) if Bugzilla->params->{'utf8'}; - return $todecode; -} - sub i_am_cgi { # I use SERVER_SOFTWARE because it's required to be # defined for all requests in the CGI spec. @@ -756,9 +748,6 @@ Bugzilla::Util - Generic utility functions for bugzilla xml_quote($var); email_filter($var); - # Functions for decoding - $rv = url_decode($var); - # Functions that tell you about your environment my $is_cgi = i_am_cgi(); my $urlbase = correct_urlbase(); @@ -870,10 +859,6 @@ This is similar to C<html_quote>, except that ' is escaped to '. This is kept separate from html_quote partly for compatibility with previous code (for ') and partly for future handling of non-ASCII characters. -=item C<url_decode($val)> - -Converts the %xx encoding from the given URL back to its original form. - =item C<email_filter> Removes the hostname from email addresses in the string, if the user diff --git a/template/en/default/global/user-error.html.tmpl b/template/en/default/global/user-error.html.tmpl index feff9f042..3d1ac5c53 100644 --- a/template/en/default/global/user-error.html.tmpl +++ b/template/en/default/global/user-error.html.tmpl @@ -1448,6 +1448,27 @@ The name of the query must be less than [% constants.MAX_LEN_QUERY_NAME FILTER html %] characters long. + [% ELSIF error == "quicksearch_invalid_query" %] + [% title = "Invalid Query" %] + Your query is invalid. + [% IF operators %] + The [% operators.shift FILTER html %] operator cannot be followed by + [%+ operators.shift FILTER html %]. + [% ELSE %] + A query cannot start with AND or OR, nor can it end with AND, OR or NOT. + They are reserved operators and must be quoted if you want to look for + these strings. + [% END %] + + [% ELSIF error == "quicksearch_unbalanced_quotes" %] + [% title = "Badly Formatted Query" %] + [% terms.Bugzilla %] is unable to parse your query correctly: + <em>[% string FILTER html %]</em>.<br> + If you use quotes to enclose strings, make sure both quotes are present. + If you really want to look for a quote in a string, type \" instead of ". + For instance, <em>"I'm so \"special\", really"</em> (with quotes) will be + interpreted as <em>I'm so "special", really</em>. + [% ELSIF error == "quicksearch_unknown_field" %] [% title = "QuickSearch Error" %] There is a problem with your search: diff --git a/template/en/default/pages/quicksearch.html.tmpl b/template/en/default/pages/quicksearch.html.tmpl index e6398eade..901f05467 100644 --- a/template/en/default/pages/quicksearch.html.tmpl +++ b/template/en/default/pages/quicksearch.html.tmpl @@ -38,7 +38,16 @@ <input type="submit" value="Search" id="find"> </form> -<h2>The Basics</h2> +<ul> + <li><a href="#basics">The Basics</a></li> + <li><a href="#basic_examples">Examples of Simple Queries</a></li> + <li><a href="#fields">Fields You Can Search On</a></li> + <li><a href="#advanced_features">Advanced Features</a></li> + <li><a href="#shortcuts">Advanced Shortcuts</a></li> + <li><a href="#advanced_examples">Examples of Complex Queries</a></li> +</ul> + +<h2 id="basics">The Basics</h2> <ul class="qs_help"> <li>If you just put a word or series of words in the search box, @@ -89,8 +98,32 @@ <em>any</em> of those values will be searched for.</li> </ul> -<p>You may also want to read up on the <a href="#advanced">Advanced - Features</a>.</p> +<h2 id="basic_examples">Examples of Simple Queries</h2> + +<p>Here are some examples of how to write some simple queries. + <a href="#advanced_examples">Examples for more complex queries</a> can be + found lower in this page.</p> + +<ul class="qs_help"> + <li>All open [% terms.bugs %] where userA@company.com is in the CC list + (no need to mention open [% terms.bugs %], this is the default):<br> + <kbd>cc:userA@company.com</kbd></li> + <li>All unconfirmed [% terms.bugs %] in product productA (putting the + [%+ terms.bug %] status at the first position make it being automagically + considered as [% terms.abug %] status):<br> + <kbd>UNCONFIRMED product:productA</kbd> + <li>All open and closed [% terms.bugs %] reported by userB@company.com + (we must specify ALL as the first word, else only open [% terms.bugs %] + are taken into account):<br> + <kbd>ALL reporter:userB@company.com</kbd> + <li>All open [% terms.bugs %] with severity blocker or critical with the + target milestone set to 2.5:<br> + <kbd>severity:blocker,critical milestone:2.5</kbd> + <li>All open [% terms.bugs %] in the component Research & Development + with priority P1 or P2 (we must use quotes for the component as its name + contains whitespaces):<br> + <kbd>component:"Research & Development" priority:P1,P2</kbd></li> +</ul> <h2 id="fields">Fields You Can Search On</h2> @@ -143,15 +176,18 @@ </tbody> </table> -<h2 id="advanced">Advanced Features</h2> +<h2 id="advanced_features">Advanced Features</h2> <ul class="qs_help"> <li>If you want to search for a <strong>phrase</strong> or something that - contains spaces, you can put it in quotes, like: - <kbd>"this is a phrase"</kbd>. You can also use quotes to search for + contains spaces, commas, colons or quotes, you must put it in quotes, like: + <kbd>"yes, this is a phrase"</kbd>. You must also use quotes to search for characters that would otherwise be interpreted specially by quicksearch. - For example, <kbd>"this|thing"</kbd> would search for the literal phrase - <em>this|thing</em>.</li> + For example, <kbd>"this|that"</kbd> would search for the literal string + <em>this|that</em> and would not be parsed as <kbd>"this OR that"</kbd>. + Also, <kbd>"-field:value"</kbd> would search for the literal phrase + <em>-field:value</em> and would not be parsed as + <kbd>"NOT field:value"</kbd>.</li> <li>You can use <strong>AND</strong>, <strong>NOT</strong>, and <strong>OR</strong> in searches. @@ -181,6 +217,12 @@ </li> </ul> + <p>You cannot use | nor OR to enumerate possible values for a given field. + You must use commas instead. So <kbd>field:value1,value2</kbd> does what + you expect, but <kbd>field:value1|value2</kbd> would be treated as + <kbd>field:value1 OR value2</kbd>, which means value2 is not bound to + the given field.</p> + <p>OR has higher precedence than AND; AND is the top level operation. For example:</p> <p>Searching for <em><kbd>url|location bar|field -focus</kbd></em> means @@ -271,4 +313,29 @@ </tbody> </table> +<h2 id="advanced_examples">Examples of Complex Queries</h2> + +<p>It is pretty easy to write rather complex queries without too much effort. + For very complex queries, you have to use the + <a href="query.cgi?format=advanced">Advanced Search</a> form.</p> + +<ul class="qs_help"> + <li>All [% terms.bugs %] reported by userA@company.com or assigned to him + (the initial @ is a shortcut for the assignee, see the + <a href="#shortcuts">Advanced Shortcuts</a> section above):<br> + <kbd>ALL @userA@company.com OR reporter:userA@company.com</kbd></li> + <li>All open [% terms.bugs %] in product productA with either severity + blocker, critical or major, or with priority P1, or with the blocker+ + flag set, and which are neither assigned to userB@company.com nor to + userC@company.com (we make the assumption that there are only two users + matching userB and userC, else we would write the whole login name):<br> + <kbd>:productA sev:blocker,critical,major OR pri:P1 OR flag:blocker+ -assign:userB,userC</kbd></li> + <li>All FIXED [% terms.bugs %] with the blocker+ flag set, but without + the approval+ nor approval? flags set:<br> + <kbd>FIXED flag:blocker+ -flag:approval+ -flag:approval?</kbd></li> + <li>[% terms.Bugs %] with <em>That's a "unusual" issue</em> in the + [%+ terms.bug %] summary (double quotes are escaped using <em>\"</em>):<br> + <kbd>summary:"That's a \"unusual\" issue"</kbd></li> +</ul> + [% PROCESS global/footer.html.tmpl %] |