summaryrefslogtreecommitdiffstats
path: root/Bugzilla
diff options
context:
space:
mode:
authorFrédéric Buclin <LpSolit@gmail.com>2012-03-29 19:56:41 +0200
committerFrédéric Buclin <LpSolit@gmail.com>2012-03-29 19:56:41 +0200
commit8f4fd271de4edbf1a206dfa0a98f8276f6189709 (patch)
treec7f97377b18dd5b1845c80fffe79c0b413b23863 /Bugzilla
parentc89d334af4b6d288f6294cfe8acfa939744fcec6 (diff)
downloadbugzilla-8f4fd271de4edbf1a206dfa0a98f8276f6189709.tar.gz
bugzilla-8f4fd271de4edbf1a206dfa0a98f8276f6189709.tar.xz
Bug 554819: Quicksearch should be using Text::ParseWords instead of custom code in splitString
Also fixes QS with accented characters (bug 730207) r=dkl a=LpSolit
Diffstat (limited to 'Bugzilla')
-rw-r--r--Bugzilla/Search/Quicksearch.pm173
-rw-r--r--Bugzilla/Util.pm17
2 files changed, 95 insertions, 95 deletions
diff --git a/Bugzilla/Search/Quicksearch.pm b/Bugzilla/Search/Quicksearch.pm
index 8425a2be2..7b951d5ae 100644
--- a/Bugzilla/Search/Quicksearch.pm
+++ b/Bugzilla/Search/Quicksearch.pm
@@ -32,6 +32,7 @@ use Bugzilla::Util;
use List::Util qw(min max);
use List::MoreUtils qw(firstidx);
+use Text::ParseWords qw(parse_line);
use base qw(Exporter);
@Bugzilla::Search::Quicksearch::EXPORT = qw(quicksearch);
@@ -142,54 +143,95 @@ sub quicksearch {
$searchstring =~ s/(^[\s,]+|[\s,]+$)//g;
ThrowUserError('buglist_parameters_required') unless ($searchstring);
- $fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0;
-
if ($searchstring =~ m/^[0-9,\s]*$/) {
_bug_numbers_only($searchstring);
}
else {
_handle_alias($searchstring);
- # Globally translate " AND ", " OR ", " NOT " to space, pipe, dash.
- $searchstring =~ s/\s+AND\s+/ /g;
- $searchstring =~ s/\s+OR\s+/|/g;
- $searchstring =~ s/\s+NOT\s+/ -/g;
+ # Retain backslashes and quotes, to know which strings are quoted,
+ # and which ones are not.
+ my @words = parse_line('\s+', 1, $searchstring);
+ # If parse_line() returns no data, this means strings are badly quoted.
+ # Rather than trying to guess what the user wanted to do, we throw an error.
+ scalar(@words)
+ || ThrowUserError('quicksearch_unbalanced_quotes', {string => $searchstring});
+
+ # A query cannot start with AND or OR, nor can it end with AND, OR or NOT.
+ ThrowUserError('quicksearch_invalid_query')
+ if ($words[0] =~ /^(?:AND|OR)$/ || $words[$#words] =~ /^(?:AND|OR|NOT)$/);
+
+ my (@qswords, @or_group);
+ while (scalar @words) {
+ my $word = shift @words;
+ # AND is the default word separator, similar to a whitespace,
+ # but |a AND OR b| is not a valid combination.
+ if ($word eq 'AND') {
+ ThrowUserError('quicksearch_invalid_query', {operators => ['AND', 'OR']})
+ if $words[0] eq 'OR';
+ }
+ # |a OR AND b| is not a valid combination.
+ # |a OR OR b| is equivalent to |a OR b| and so is harmless.
+ elsif ($word eq 'OR') {
+ ThrowUserError('quicksearch_invalid_query', {operators => ['OR', 'AND']})
+ if $words[0] eq 'AND';
+ }
+ # NOT negates the following word.
+ # |NOT AND| and |NOT OR| are not valid combinations.
+ # |NOT NOT| is fine but has no effect as they cancel themselves.
+ elsif ($word eq 'NOT') {
+ $word = shift @words;
+ next if $word eq 'NOT';
+ if ($word eq 'AND' || $word eq 'OR') {
+ ThrowUserError('quicksearch_invalid_query', {operators => ['NOT', $word]});
+ }
+ unshift(@words, "-$word");
+ }
+ else {
+ # OR groups words together, as OR has higher precedence than AND.
+ push(@or_group, $word);
+ # If the next word is not OR, then we are not in a OR group,
+ # or we are leaving it.
+ if (!defined $words[0] || $words[0] ne 'OR') {
+ push(@qswords, join('|', @or_group));
+ @or_group = ();
+ }
+ }
+ }
- my @words = splitString($searchstring);
- _handle_status_and_resolution(\@words);
+ _handle_status_and_resolution(\@qswords);
my (@unknownFields, %ambiguous_fields);
+ $fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0;
# Loop over all main-level QuickSearch words.
- foreach my $qsword (@words) {
- my $negate = substr($qsword, 0, 1) eq '-';
- if ($negate) {
- $qsword = substr($qsword, 1);
- }
+ foreach my $qsword (@qswords) {
+ my @or_operand = parse_line('\|', 1, $qsword);
+ foreach my $term (@or_operand) {
+ my $negate = substr($term, 0, 1) eq '-';
+ if ($negate) {
+ $term = substr($term, 1);
+ }
- # No special first char
- if (!_handle_special_first_chars($qsword, $negate)) {
- # Split by '|' to get all operands for a boolean OR.
- foreach my $or_operand (split(/\|/, $qsword)) {
- if (!_handle_field_names($or_operand, $negate,
- \@unknownFields,
- \%ambiguous_fields))
- {
- # Having ruled out the special cases, we may now split
- # by comma, which is another legal boolean OR indicator.
- foreach my $word (split(/,/, $or_operand)) {
- if (!_special_field_syntax($word, $negate)) {
- _default_quicksearch_word($word, $negate);
- }
- _handle_urls($word, $negate);
- }
+ next if _handle_special_first_chars($term, $negate);
+ next if _handle_field_names($term, $negate, \@unknownFields,
+ \%ambiguous_fields);
+
+ # Having ruled out the special cases, we may now split
+ # by comma, which is another legal boolean OR indicator.
+ # Remove quotes from quoted words, if any.
+ @words = parse_line(',', 0, $term);
+ foreach my $word (@words) {
+ if (!_special_field_syntax($word, $negate)) {
+ _default_quicksearch_word($word, $negate);
}
+ _handle_urls($word, $negate);
}
}
$chart++;
$and = 0;
$or = 0;
- } # foreach (@words)
+ }
# Inform user about any unknown fields
if (scalar(@unknownFields) || scalar(keys %ambiguous_fields)) {
@@ -315,7 +357,7 @@ sub _handle_special_first_chars {
my $firstChar = substr($qsword, 0, 1);
my $baseWord = substr($qsword, 1);
- my @subWords = split(/[\|,]/, $baseWord);
+ my @subWords = split(/,/, $baseWord);
if ($firstChar eq '#') {
addChart('short_desc', 'substring', $baseWord, $negate);
@@ -347,7 +389,7 @@ sub _handle_special_first_chars {
sub _handle_field_names {
my ($or_operand, $negate, $unknownFields, $ambiguous_fields) = @_;
-
+
# Flag and requestee shortcut
if ($or_operand =~ /^(?:flag:)?([^\?]+\?)([^\?]*)$/) {
addChart('flagtypes.name', 'substring', $1, $negate);
@@ -355,32 +397,40 @@ sub _handle_field_names {
addChart('requestees.login_name', 'substring', $2, $negate);
return 1;
}
-
- # generic field1,field2,field3:value1,value2 notation
- if ($or_operand =~ /^([^:]+):([^:]+)$/) {
- my @fields = split(/,/, $1);
- my @values = split(/,/, $2);
+
+ # Generic field1,field2,field3:value1,value2 notation.
+ # We have to correctly ignore commas and colons in quotes.
+ my @field_values = parse_line(':', 1, $or_operand);
+ if (scalar @field_values == 2) {
+ my @fields = parse_line(',', 1, $field_values[0]);
+ my @values = parse_line(',', 1, $field_values[1]);
foreach my $field (@fields) {
my $translated = _translate_field_name($field);
# Skip and record any unknown fields
if (!defined $translated) {
push(@$unknownFields, $field);
- next;
}
# If we got back an array, that means the substring is
# ambiguous and could match more than field name
elsif (ref $translated) {
$ambiguous_fields->{$field} = $translated;
- next;
}
- foreach my $value (@values) {
- my $operator = FIELD_OPERATOR->{$translated} || 'substring';
- addChart($translated, $operator, $value, $negate);
+ else {
+ foreach my $value (@values) {
+ my $operator = FIELD_OPERATOR->{$translated} || 'substring';
+ # If the string was quoted to protect some special
+ # characters such as commas and colons, we need
+ # to remove quotes.
+ if ($value =~ /^(["'])(.+)\g1$/) {
+ $value = $2;
+ $value =~ s/\\(["'])/$1/g;
+ }
+ addChart($translated, $operator, $value, $negate);
+ }
}
}
return 1;
}
-
return 0;
}
@@ -513,41 +563,6 @@ sub _handle_urls {
# Helpers
###########################################################################
-# Split string on whitespace, retaining quoted strings as one
-sub splitString {
- my $string = shift;
- my @quoteparts;
- my @parts;
- my $i = 0;
-
- # Now split on quote sign; be tolerant about unclosed quotes
- @quoteparts = split(/"/, $string);
- foreach my $part (@quoteparts) {
- # After every odd quote, quote special chars
- if ($i++ %2) {
- $part = url_quote($part);
- # Protect the minus sign from being considered
- # as negation, in quotes.
- $part =~ s/(?<=^)\-/%2D/;
- }
- }
- # Join again
- $string = join('"', @quoteparts);
-
- # Now split on unescaped whitespace
- @parts = split(/\s+/, $string);
- foreach (@parts) {
- # Protect plus signs from becoming a blank.
- # If "+" appears as the first character, leave it alone
- # as it has a special meaning. Strings which start with
- # "+" must be quoted.
- s/(?<!^)\+/%2B/g;
- # Remove quotes
- s/"//g;
- }
- return @parts;
-}
-
# Quote and escape a phrase appropriately for a "content matches" search.
sub _matches_phrase {
my ($phrase) = @_;
@@ -613,7 +628,7 @@ sub makeChart {
my $cgi = Bugzilla->cgi;
$cgi->param("field$expr", $field);
$cgi->param("type$expr", $type);
- $cgi->param("value$expr", url_decode($value));
+ $cgi->param("value$expr", $value);
}
1;
diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm
index 6d8622e04..7ecaddc88 100644
--- a/Bugzilla/Util.pm
+++ b/Bugzilla/Util.pm
@@ -34,7 +34,7 @@ use base qw(Exporter);
@Bugzilla::Util::EXPORT = qw(trick_taint detaint_natural
detaint_signed
html_quote url_quote xml_quote
- css_class_quote html_light_quote url_decode
+ css_class_quote html_light_quote
i_am_cgi correct_urlbase remote_ip
do_ssl_redirect_if_required use_attachbase
diff_arrays on_main_db
@@ -243,14 +243,6 @@ sub xml_quote {
return $var;
}
-sub url_decode {
- my ($todecode) = (@_);
- $todecode =~ tr/+/ /; # pluses become spaces
- $todecode =~ s/%([0-9a-fA-F]{2})/pack("c",hex($1))/ge;
- utf8::decode($todecode) if Bugzilla->params->{'utf8'};
- return $todecode;
-}
-
sub i_am_cgi {
# I use SERVER_SOFTWARE because it's required to be
# defined for all requests in the CGI spec.
@@ -756,9 +748,6 @@ Bugzilla::Util - Generic utility functions for bugzilla
xml_quote($var);
email_filter($var);
- # Functions for decoding
- $rv = url_decode($var);
-
# Functions that tell you about your environment
my $is_cgi = i_am_cgi();
my $urlbase = correct_urlbase();
@@ -870,10 +859,6 @@ This is similar to C<html_quote>, except that ' is escaped to &apos;. This
is kept separate from html_quote partly for compatibility with previous code
(for &apos;) and partly for future handling of non-ASCII characters.
-=item C<url_decode($val)>
-
-Converts the %xx encoding from the given URL back to its original form.
-
=item C<email_filter>
Removes the hostname from email addresses in the string, if the user