diff options
Diffstat (limited to 'Bugzilla/Elastic')
-rw-r--r-- | Bugzilla/Elastic/Indexer.pm | 29 | ||||
-rw-r--r-- | Bugzilla/Elastic/Role/HasClient.pm | 2 | ||||
-rw-r--r-- | Bugzilla/Elastic/Role/Search.pm | 16 | ||||
-rw-r--r-- | Bugzilla/Elastic/Search.pm | 425 | ||||
-rw-r--r-- | Bugzilla/Elastic/Search/FakeCGI.pm | 43 |
5 files changed, 18 insertions, 497 deletions
diff --git a/Bugzilla/Elastic/Indexer.pm b/Bugzilla/Elastic/Indexer.pm index dd71a7198..82f946af9 100644 --- a/Bugzilla/Elastic/Indexer.pm +++ b/Bugzilla/Elastic/Indexer.pm @@ -23,7 +23,7 @@ has 'mtime' => ( has 'shadow_dbh' => ( is => 'lazy' ); has 'debug_sql' => ( - is => 'ro', + is => 'ro', default => 0, ); @@ -40,24 +40,24 @@ sub create_index { index => $self->index_name, body => { settings => { - number_of_shards => 2, + number_of_shards => 1, analysis => { - filter => { - asciifolding_original => { - type => "asciifolding", - preserve_original => \1, - }, - }, analyzer => { folding => { + type => 'standard', tokenizer => 'standard', - filter => ['standard', 'lowercase', 'asciifolding_original'], + filter => [ 'lowercase', 'asciifolding' ] }, bz_text_analyzer => { type => 'standard', filter => ['lowercase', 'stop'], max_token_length => '20' }, + bz_substring_analyzer => { + type => 'custom', + filter => ['lowercase'], + tokenizer => 'bz_ngram_tokenizer', + }, bz_equals_analyzer => { type => 'custom', filter => ['lowercase'], @@ -71,20 +71,25 @@ sub create_index { whiteboard_shingle_words => { type => 'custom', tokenizer => 'whiteboard_words_pattern', - filter => ['stop', 'shingle', 'lowercase'] + filter => ['stop', 'shingle'] }, whiteboard_tokens => { type => 'custom', tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop', 'lowercase'] + filter => ['stop'] }, whiteboard_shingle_tokens => { type => 'custom', tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop', 'shingle', 'lowercase'] + filter => ['stop', 'shingle'] } }, tokenizer => { + bz_ngram_tokenizer => { + type => 'nGram', + min_ngram => 2, + max_ngram => 25, + }, whiteboard_tokens_pattern => { type => 'pattern', pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*' diff --git a/Bugzilla/Elastic/Role/HasClient.pm b/Bugzilla/Elastic/Role/HasClient.pm index 8e2687880..3d52d513a 100644 --- a/Bugzilla/Elastic/Role/HasClient.pm +++ b/Bugzilla/Elastic/Role/HasClient.pm @@ -17,7 +17,7 @@ sub _build_client { my ($self) = @_; return Search::Elasticsearch->new( - nodes => [ split(/\s+/, Bugzilla->params->{elasticsearch_nodes}) ], + nodes => Bugzilla->params->{elasticsearch_nodes}, cxn_pool => 'Sniff', ); } diff --git a/Bugzilla/Elastic/Role/Search.pm b/Bugzilla/Elastic/Role/Search.pm deleted file mode 100644 index 9446e0da8..000000000 --- a/Bugzilla/Elastic/Role/Search.pm +++ /dev/null @@ -1,16 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# -# This Source Code Form is "Incompatible With Secondary Licenses", as -# defined by the Mozilla Public License, v. 2.0. -package Bugzilla::Elastic::Role::Search; - -use 5.10.1; -use strict; -use warnings; -use Role::Tiny; - -requires qw(data search_description invalid_order_columns order); - -1; diff --git a/Bugzilla/Elastic/Search.pm b/Bugzilla/Elastic/Search.pm deleted file mode 100644 index 5c60f2353..000000000 --- a/Bugzilla/Elastic/Search.pm +++ /dev/null @@ -1,425 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# -# This Source Code Form is "Incompatible With Secondary Licenses", as -# defined by the Mozilla Public License, v. 2.0. -package Bugzilla::Elastic::Search; - -use 5.10.1; -use Moo; -use Bugzilla::Search; -use Bugzilla::Search::Quicksearch; -use Bugzilla::Util qw(trick_taint); -use namespace::clean; - -use Bugzilla::Elastic::Search::FakeCGI; - - -has 'quicksearch' => ( is => 'ro' ); -has 'limit' => ( is => 'ro', predicate => 'has_limit' ); -has 'offset' => ( is => 'ro', predicate => 'has_offset' ); -has 'fields' => ( is => 'ro', isa => \&_arrayref_of_fields, default => sub { [] } ); -has 'params' => ( is => 'lazy' ); -has 'clause' => ( is => 'lazy' ); -has 'es_query' => ( is => 'lazy' ); -has 'search_description' => (is => 'lazy'); -has 'query_time' => ( is => 'rwp' ); - -has '_input_order' => ( is => 'ro', init_arg => 'order', required => 1); -has '_order' => ( is => 'lazy', init_arg => undef ); -has 'invalid_order_columns' => ( is => 'lazy' ); - -with 'Bugzilla::Elastic::Role::HasClient'; -with 'Bugzilla::Elastic::Role::HasIndexName'; -with 'Bugzilla::Elastic::Role::Search'; - -my @SUPPORTED_FIELDS = qw( - bug_id product component short_desc - priority status_whiteboard bug_status resolution - keywords alias assigned_to reporter delta_ts - longdesc cf_crash_signature classification bug_severity - commenter -); -my %IS_SUPPORTED_FIELD = map { $_ => 1 } @SUPPORTED_FIELDS; - -$IS_SUPPORTED_FIELD{relevance} = 1; - -my @NORMAL_FIELDS = qw( - priority - bug_severity - bug_status - resolution - product - component - classification - short_desc - assigned_to - reporter -); - -my %SORT_MAP = ( - bug_id => '_id', - relevance => '_score', - map { $_ => "$_.eq" } @NORMAL_FIELDS, -); - -my %EQUALS_MAP = ( - map { $_ => "$_.eq" } @NORMAL_FIELDS, -); - -sub _arrayref_of_fields { - my $f = $_; - foreach my $field (@$f) { - Bugzilla::Elastic::Search::UnsupportedField->throw(field => $field) - unless $IS_SUPPORTED_FIELD{$field}; - } -} - -sub data { - my ($self) = @_; - my $body = $self->es_query; - my $result = eval { - $self->client->search( - index => $self->index_name, - type => 'bug', - body => $body, - ); - }; - if (!$result) { - die $@; - } - $self->_set_query_time($result->{took} / 1000); - my (@ids, %hits); - my $fields = $self->fields; - foreach my $hit (@{ $result->{hits}{hits} }) { - push @ids, $hit->{_id}; - my $source = $hit->{_source}; - $source->{relevance} = $hit->{_score}; - foreach my $val (values %$source) { - next unless defined $val; - trick_taint($val); - } - trick_taint($hit->{_id}); - if ($source) { - $hits{$hit->{_id}} = [ @$source{@$fields} ]; - } - else { - $hits{$hit->{_id}} = $hit->{_id}; - } - } - my $visible_ids = Bugzilla->user->visible_bugs(\@ids); - - return [ map { $hits{$_} } @$visible_ids ]; -} - -sub _valid_order { - my ($self) = @_; - - return grep { $IS_SUPPORTED_FIELD{$_->[0]} } @{$self->_order}; -} - -sub order { - my ($self) = @_; - - return map { $_->[0] } $self->_valid_order; -} - -sub _quicksearch_to_params { - my ($quicksearch) = @_; - no warnings 'redefine'; - my $cgi = Bugzilla::Elastic::Search::FakeCGI->new; - local *Bugzilla::cgi = sub { $cgi }; - local $Bugzilla::Search::Quicksearch::ELASTIC = 1; - quicksearch($quicksearch); - - return $cgi->params; -} - -sub _build_fields { return \@SUPPORTED_FIELDS } - -sub _build__order { - my ($self) = @_; - - my @order; - foreach my $order (@{$self->_input_order}) { - if ($order =~ /^(.+)\s+(asc|desc)$/i) { - push @order, [ $1, lc $2 ]; - } - else { - push @order, [ $order ]; - } - } - return \@order; -} - -sub _build_invalid_order_columns { - my ($self) = @_; - - return [ map { $_->[0] } grep { !$IS_SUPPORTED_FIELD{$_->[0]} } @{ $self->_order } ]; -} - -sub _build_params { - my ($self) = @_; - - return _quicksearch_to_params($self->quicksearch); -} - -sub _build_clause { - my ($self) = @_; - my $search = Bugzilla::Search->new(params => $self->params); - - return $search->_params_to_data_structure; -} - -sub _build_search_description { - my ($self) = @_; - - return [_describe($self->clause)]; -} - -sub _describe { - my ($thing) = @_; - - state $class_to_func = { - 'Bugzilla::Search::Condition' => \&_describe_condition, - 'Bugzilla::Search::Clause' => \&_describe_clause - }; - - my $func = $class_to_func->{ref $thing} or die "nothing for $thing\n"; - - return $func->($thing); -} - -sub _describe_clause { - my ($clause) = @_; - - return map { _describe($_) } @{$clause->children}; -} - -sub _describe_condition { - my ($cond) = @_; - - return { field => $cond->field, type => $cond->operator, value => _describe_value($cond->value) }; -} - -sub _describe_value { - my ($val) = @_; - - return ref($val) ? join(", ", @$val) : $val; -} - -sub _build_es_query { - my ($self) = @_; - my @extra; - - if ($self->_valid_order) { - my @sort = map { - my $f = $SORT_MAP{$_->[0]} // $_->[0]; - @$_ > 1 ? { $f => lc $_[1] } : $f; - } $self->_valid_order; - push @extra, sort => \@sort; - } - if ($self->has_offset) { - push @extra, from => $self->offset; - } - my $max_limit = Bugzilla->params->{max_search_results}; - my $limit = Bugzilla->params->{default_search_limit}; - if ($self->has_limit) { - if ($self->limit) { - my $l = $self->limit; - $limit = $l < $max_limit ? $l : $max_limit; - } - else { - $limit = $max_limit; - } - } - push @extra, size => $limit; - return { - _source => @{$self->fields} ? \1 : \0, - query => _query($self->clause), - @extra, - }; -} - -sub _query { - my ($thing) = @_; - state $class_to_func = { - 'Bugzilla::Search::Condition' => \&_query_condition, - 'Bugzilla::Search::Clause' => \&_query_clause, - }; - - my $func = $class_to_func->{ref $thing} or die "nothing for $thing\n"; - - return $func->($thing); -} - -sub _query_condition { - my ($cond) = @_; - state $operator_to_es = { - equals => \&_operator_equals, - substring => \&_operator_substring, - anyexact => \&_operator_anyexact, - anywords => \&_operator_anywords, - allwords => \&_operator_allwords, - }; - - my $field = $cond->field; - my $operator = $cond->operator; - my $value = $cond->value; - - if ($field eq 'resolution') { - $value = [ map { $_ eq '---' ? '' : $_ } ref $value ? @$value : $value ]; - } - - unless ($IS_SUPPORTED_FIELD{$field}) { - Bugzilla::Elastic::Search::UnsupportedField->throw(field => $field); - } - - my $op = $operator_to_es->{$operator} - or Bugzilla::Elastic::Search::UnsupportedOperator->throw(operator => $operator); - - my $result; - if (ref $op) { - $result = $op->($field, $value); - } else { - $result = { $op => { $field => $value } }; - } - - return $result; -} - -# is equal to any of the strings -sub _operator_anyexact { - my ($field, $value) = @_; - my @values = ref $value ? @$value : split(/\s*,\s*/, $value); - if (@values == 1) { - return _operator_equals($field, $values[0]); - } - else { - return { - terms => { - $EQUALS_MAP{$field} // $field => [map { lc } @values], - minimum_should_match => 1, - }, - }; - } -} - -# contains any of the words -sub _operator_anywords { - my ($field, $value) = @_; - return { - match => { - $field => { query => $value, operator => "or" } - }, - }; -} - -# contains all of the words -sub _operator_allwords { - my ($field, $value) = @_; - return { - match => { - $field => { query => $value, operator => "and" } - }, - }; -} - -sub _operator_equals { - my ($field, $value) = @_; - return { - match => { - $EQUALS_MAP{$field} // $field => $value, - }, - }; -} - -sub _operator_substring { - my ($field, $value) = @_; - my $is_insider = Bugzilla->user->is_insider; - - if ($field eq 'longdesc') { - return { - has_child => { - type => 'comment', - query => { - bool => { - must => [ - { match => { body => { query => $value, operator => "and" } } }, - $is_insider ? () : { term => { is_private => \0 } }, - ], - }, - }, - }, - } - } - elsif ($field eq 'reporter' or $field eq 'assigned_to') { - return { - prefix => { - $EQUALS_MAP{$field} // $field => lc $value, - } - } - } - else { - return { - wildcard => { - $EQUALS_MAP{$field} // $field => lc "*$value*", - } - }; - } -} - -sub _query_clause { - my ($clause) = @_; - - state $joiner_to_func = { - AND => \&_join_and, - OR => \&_join_or, - }; - - my @children = grep { !$_->isa('Bugzilla::Search::Clause') || @{$_->children} } @{$clause->children}; - if (@children == 1) { - return _query($children[0]); - } - - return $joiner_to_func->{$clause->joiner}->([ map { _query($_) } @children ]); -} - -sub _join_and { - my ($children) = @_; - return { bool => { must => $children } }, -} - -sub _join_or { - my ($children) = @_; - return { bool => { should => $children } }; -} - -# Exceptions -BEGIN { - package Bugzilla::Elastic::Search::Redirect; - use Moo; - - with 'Throwable'; - - has 'redirect_args' => (is => 'ro', required => 1); - - package Bugzilla::Elastic::Search::UnsupportedField; - use Moo; - use overload q{""} => sub { "Unsupported field: ", $_[0]->field }, fallback => 1; - - with 'Throwable'; - - has 'field' => (is => 'ro', required => 1); - - - package Bugzilla::Elastic::Search::UnsupportedOperator; - use Moo; - - with 'Throwable'; - - has 'operator' => (is => 'ro', required => 1); -} - -1; diff --git a/Bugzilla/Elastic/Search/FakeCGI.pm b/Bugzilla/Elastic/Search/FakeCGI.pm deleted file mode 100644 index 827c96c52..000000000 --- a/Bugzilla/Elastic/Search/FakeCGI.pm +++ /dev/null @@ -1,43 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# -# This Source Code Form is "Incompatible With Secondary Licenses", as -# defined by the Mozilla Public License, v. 2.0. -package Bugzilla::Elastic::Search::FakeCGI; -use 5.10.1; -use Moo; -use namespace::clean; - -has 'params' => (is => 'ro', default => sub { {} }); - -# we pretend to be Bugzilla::CGI at times. -sub canonicalise_query { - return Bugzilla::CGI::canonicalise_query(@_); -} - -sub delete { - my ($self, $key) = @_; - delete $self->params->{$key}; -} - -sub redirect { - my ($self, @args) = @_; - - Bugzilla::Elastic::Search::Redirect->throw(redirect_args => \@args); -} - -sub param { - my ($self, $key, $val, @rest) = @_; - if (@_ > 3) { - $self->params->{$key} = [$val, @rest]; - } elsif (@_ == 3) { - $self->params->{$key} = $val; - } elsif (@_ == 2) { - return $self->params->{$key}; - } else { - return $self->params - } -} - -1; |