summaryrefslogtreecommitdiffstats
path: root/Bugzilla/Elastic
diff options
context:
space:
mode:
Diffstat (limited to 'Bugzilla/Elastic')
-rw-r--r--Bugzilla/Elastic/Indexer.pm29
-rw-r--r--Bugzilla/Elastic/Role/HasClient.pm2
-rw-r--r--Bugzilla/Elastic/Role/Search.pm16
-rw-r--r--Bugzilla/Elastic/Search.pm425
-rw-r--r--Bugzilla/Elastic/Search/FakeCGI.pm43
5 files changed, 497 insertions, 18 deletions
diff --git a/Bugzilla/Elastic/Indexer.pm b/Bugzilla/Elastic/Indexer.pm
index 82f946af9..dd71a7198 100644
--- a/Bugzilla/Elastic/Indexer.pm
+++ b/Bugzilla/Elastic/Indexer.pm
@@ -23,7 +23,7 @@ has 'mtime' => (
has 'shadow_dbh' => ( is => 'lazy' );
has 'debug_sql' => (
- is => 'ro',
+ is => 'ro',
default => 0,
);
@@ -40,24 +40,24 @@ sub create_index {
index => $self->index_name,
body => {
settings => {
- number_of_shards => 1,
+ number_of_shards => 2,
analysis => {
+ filter => {
+ asciifolding_original => {
+ type => "asciifolding",
+ preserve_original => \1,
+ },
+ },
analyzer => {
folding => {
- type => 'standard',
tokenizer => 'standard',
- filter => [ 'lowercase', 'asciifolding' ]
+ filter => ['standard', 'lowercase', 'asciifolding_original'],
},
bz_text_analyzer => {
type => 'standard',
filter => ['lowercase', 'stop'],
max_token_length => '20'
},
- bz_substring_analyzer => {
- type => 'custom',
- filter => ['lowercase'],
- tokenizer => 'bz_ngram_tokenizer',
- },
bz_equals_analyzer => {
type => 'custom',
filter => ['lowercase'],
@@ -71,25 +71,20 @@ sub create_index {
whiteboard_shingle_words => {
type => 'custom',
tokenizer => 'whiteboard_words_pattern',
- filter => ['stop', 'shingle']
+ filter => ['stop', 'shingle', 'lowercase']
},
whiteboard_tokens => {
type => 'custom',
tokenizer => 'whiteboard_tokens_pattern',
- filter => ['stop']
+ filter => ['stop', 'lowercase']
},
whiteboard_shingle_tokens => {
type => 'custom',
tokenizer => 'whiteboard_tokens_pattern',
- filter => ['stop', 'shingle']
+ filter => ['stop', 'shingle', 'lowercase']
}
},
tokenizer => {
- bz_ngram_tokenizer => {
- type => 'nGram',
- min_ngram => 2,
- max_ngram => 25,
- },
whiteboard_tokens_pattern => {
type => 'pattern',
pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*'
diff --git a/Bugzilla/Elastic/Role/HasClient.pm b/Bugzilla/Elastic/Role/HasClient.pm
index 3d52d513a..8e2687880 100644
--- a/Bugzilla/Elastic/Role/HasClient.pm
+++ b/Bugzilla/Elastic/Role/HasClient.pm
@@ -17,7 +17,7 @@ sub _build_client {
my ($self) = @_;
return Search::Elasticsearch->new(
- nodes => Bugzilla->params->{elasticsearch_nodes},
+ nodes => [ split(/\s+/, Bugzilla->params->{elasticsearch_nodes}) ],
cxn_pool => 'Sniff',
);
}
diff --git a/Bugzilla/Elastic/Role/Search.pm b/Bugzilla/Elastic/Role/Search.pm
new file mode 100644
index 000000000..9446e0da8
--- /dev/null
+++ b/Bugzilla/Elastic/Role/Search.pm
@@ -0,0 +1,16 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This Source Code Form is "Incompatible With Secondary Licenses", as
+# defined by the Mozilla Public License, v. 2.0.
+package Bugzilla::Elastic::Role::Search;
+
+use 5.10.1;
+use strict;
+use warnings;
+use Role::Tiny;
+
+requires qw(data search_description invalid_order_columns order);
+
+1;
diff --git a/Bugzilla/Elastic/Search.pm b/Bugzilla/Elastic/Search.pm
new file mode 100644
index 000000000..5c60f2353
--- /dev/null
+++ b/Bugzilla/Elastic/Search.pm
@@ -0,0 +1,425 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This Source Code Form is "Incompatible With Secondary Licenses", as
+# defined by the Mozilla Public License, v. 2.0.
+package Bugzilla::Elastic::Search;
+
+use 5.10.1;
+use Moo;
+use Bugzilla::Search;
+use Bugzilla::Search::Quicksearch;
+use Bugzilla::Util qw(trick_taint);
+use namespace::clean;
+
+use Bugzilla::Elastic::Search::FakeCGI;
+
+
+has 'quicksearch' => ( is => 'ro' );
+has 'limit' => ( is => 'ro', predicate => 'has_limit' );
+has 'offset' => ( is => 'ro', predicate => 'has_offset' );
+has 'fields' => ( is => 'ro', isa => \&_arrayref_of_fields, default => sub { [] } );
+has 'params' => ( is => 'lazy' );
+has 'clause' => ( is => 'lazy' );
+has 'es_query' => ( is => 'lazy' );
+has 'search_description' => (is => 'lazy');
+has 'query_time' => ( is => 'rwp' );
+
+has '_input_order' => ( is => 'ro', init_arg => 'order', required => 1);
+has '_order' => ( is => 'lazy', init_arg => undef );
+has 'invalid_order_columns' => ( is => 'lazy' );
+
+with 'Bugzilla::Elastic::Role::HasClient';
+with 'Bugzilla::Elastic::Role::HasIndexName';
+with 'Bugzilla::Elastic::Role::Search';
+
+my @SUPPORTED_FIELDS = qw(
+ bug_id product component short_desc
+ priority status_whiteboard bug_status resolution
+ keywords alias assigned_to reporter delta_ts
+ longdesc cf_crash_signature classification bug_severity
+ commenter
+);
+my %IS_SUPPORTED_FIELD = map { $_ => 1 } @SUPPORTED_FIELDS;
+
+$IS_SUPPORTED_FIELD{relevance} = 1;
+
+my @NORMAL_FIELDS = qw(
+ priority
+ bug_severity
+ bug_status
+ resolution
+ product
+ component
+ classification
+ short_desc
+ assigned_to
+ reporter
+);
+
+my %SORT_MAP = (
+ bug_id => '_id',
+ relevance => '_score',
+ map { $_ => "$_.eq" } @NORMAL_FIELDS,
+);
+
+my %EQUALS_MAP = (
+ map { $_ => "$_.eq" } @NORMAL_FIELDS,
+);
+
+sub _arrayref_of_fields {
+ my $f = $_;
+ foreach my $field (@$f) {
+ Bugzilla::Elastic::Search::UnsupportedField->throw(field => $field)
+ unless $IS_SUPPORTED_FIELD{$field};
+ }
+}
+
+sub data {
+ my ($self) = @_;
+ my $body = $self->es_query;
+ my $result = eval {
+ $self->client->search(
+ index => $self->index_name,
+ type => 'bug',
+ body => $body,
+ );
+ };
+ if (!$result) {
+ die $@;
+ }
+ $self->_set_query_time($result->{took} / 1000);
+ my (@ids, %hits);
+ my $fields = $self->fields;
+ foreach my $hit (@{ $result->{hits}{hits} }) {
+ push @ids, $hit->{_id};
+ my $source = $hit->{_source};
+ $source->{relevance} = $hit->{_score};
+ foreach my $val (values %$source) {
+ next unless defined $val;
+ trick_taint($val);
+ }
+ trick_taint($hit->{_id});
+ if ($source) {
+ $hits{$hit->{_id}} = [ @$source{@$fields} ];
+ }
+ else {
+ $hits{$hit->{_id}} = $hit->{_id};
+ }
+ }
+ my $visible_ids = Bugzilla->user->visible_bugs(\@ids);
+
+ return [ map { $hits{$_} } @$visible_ids ];
+}
+
+sub _valid_order {
+ my ($self) = @_;
+
+ return grep { $IS_SUPPORTED_FIELD{$_->[0]} } @{$self->_order};
+}
+
+sub order {
+ my ($self) = @_;
+
+ return map { $_->[0] } $self->_valid_order;
+}
+
+sub _quicksearch_to_params {
+ my ($quicksearch) = @_;
+ no warnings 'redefine';
+ my $cgi = Bugzilla::Elastic::Search::FakeCGI->new;
+ local *Bugzilla::cgi = sub { $cgi };
+ local $Bugzilla::Search::Quicksearch::ELASTIC = 1;
+ quicksearch($quicksearch);
+
+ return $cgi->params;
+}
+
+sub _build_fields { return \@SUPPORTED_FIELDS }
+
+sub _build__order {
+ my ($self) = @_;
+
+ my @order;
+ foreach my $order (@{$self->_input_order}) {
+ if ($order =~ /^(.+)\s+(asc|desc)$/i) {
+ push @order, [ $1, lc $2 ];
+ }
+ else {
+ push @order, [ $order ];
+ }
+ }
+ return \@order;
+}
+
+sub _build_invalid_order_columns {
+ my ($self) = @_;
+
+ return [ map { $_->[0] } grep { !$IS_SUPPORTED_FIELD{$_->[0]} } @{ $self->_order } ];
+}
+
+sub _build_params {
+ my ($self) = @_;
+
+ return _quicksearch_to_params($self->quicksearch);
+}
+
+sub _build_clause {
+ my ($self) = @_;
+ my $search = Bugzilla::Search->new(params => $self->params);
+
+ return $search->_params_to_data_structure;
+}
+
+sub _build_search_description {
+ my ($self) = @_;
+
+ return [_describe($self->clause)];
+}
+
+sub _describe {
+ my ($thing) = @_;
+
+ state $class_to_func = {
+ 'Bugzilla::Search::Condition' => \&_describe_condition,
+ 'Bugzilla::Search::Clause' => \&_describe_clause
+ };
+
+ my $func = $class_to_func->{ref $thing} or die "nothing for $thing\n";
+
+ return $func->($thing);
+}
+
+sub _describe_clause {
+ my ($clause) = @_;
+
+ return map { _describe($_) } @{$clause->children};
+}
+
+sub _describe_condition {
+ my ($cond) = @_;
+
+ return { field => $cond->field, type => $cond->operator, value => _describe_value($cond->value) };
+}
+
+sub _describe_value {
+ my ($val) = @_;
+
+ return ref($val) ? join(", ", @$val) : $val;
+}
+
+sub _build_es_query {
+ my ($self) = @_;
+ my @extra;
+
+ if ($self->_valid_order) {
+ my @sort = map {
+ my $f = $SORT_MAP{$_->[0]} // $_->[0];
+ @$_ > 1 ? { $f => lc $_[1] } : $f;
+ } $self->_valid_order;
+ push @extra, sort => \@sort;
+ }
+ if ($self->has_offset) {
+ push @extra, from => $self->offset;
+ }
+ my $max_limit = Bugzilla->params->{max_search_results};
+ my $limit = Bugzilla->params->{default_search_limit};
+ if ($self->has_limit) {
+ if ($self->limit) {
+ my $l = $self->limit;
+ $limit = $l < $max_limit ? $l : $max_limit;
+ }
+ else {
+ $limit = $max_limit;
+ }
+ }
+ push @extra, size => $limit;
+ return {
+ _source => @{$self->fields} ? \1 : \0,
+ query => _query($self->clause),
+ @extra,
+ };
+}
+
+sub _query {
+ my ($thing) = @_;
+ state $class_to_func = {
+ 'Bugzilla::Search::Condition' => \&_query_condition,
+ 'Bugzilla::Search::Clause' => \&_query_clause,
+ };
+
+ my $func = $class_to_func->{ref $thing} or die "nothing for $thing\n";
+
+ return $func->($thing);
+}
+
+sub _query_condition {
+ my ($cond) = @_;
+ state $operator_to_es = {
+ equals => \&_operator_equals,
+ substring => \&_operator_substring,
+ anyexact => \&_operator_anyexact,
+ anywords => \&_operator_anywords,
+ allwords => \&_operator_allwords,
+ };
+
+ my $field = $cond->field;
+ my $operator = $cond->operator;
+ my $value = $cond->value;
+
+ if ($field eq 'resolution') {
+ $value = [ map { $_ eq '---' ? '' : $_ } ref $value ? @$value : $value ];
+ }
+
+ unless ($IS_SUPPORTED_FIELD{$field}) {
+ Bugzilla::Elastic::Search::UnsupportedField->throw(field => $field);
+ }
+
+ my $op = $operator_to_es->{$operator}
+ or Bugzilla::Elastic::Search::UnsupportedOperator->throw(operator => $operator);
+
+ my $result;
+ if (ref $op) {
+ $result = $op->($field, $value);
+ } else {
+ $result = { $op => { $field => $value } };
+ }
+
+ return $result;
+}
+
+# is equal to any of the strings
+sub _operator_anyexact {
+ my ($field, $value) = @_;
+ my @values = ref $value ? @$value : split(/\s*,\s*/, $value);
+ if (@values == 1) {
+ return _operator_equals($field, $values[0]);
+ }
+ else {
+ return {
+ terms => {
+ $EQUALS_MAP{$field} // $field => [map { lc } @values],
+ minimum_should_match => 1,
+ },
+ };
+ }
+}
+
+# contains any of the words
+sub _operator_anywords {
+ my ($field, $value) = @_;
+ return {
+ match => {
+ $field => { query => $value, operator => "or" }
+ },
+ };
+}
+
+# contains all of the words
+sub _operator_allwords {
+ my ($field, $value) = @_;
+ return {
+ match => {
+ $field => { query => $value, operator => "and" }
+ },
+ };
+}
+
+sub _operator_equals {
+ my ($field, $value) = @_;
+ return {
+ match => {
+ $EQUALS_MAP{$field} // $field => $value,
+ },
+ };
+}
+
+sub _operator_substring {
+ my ($field, $value) = @_;
+ my $is_insider = Bugzilla->user->is_insider;
+
+ if ($field eq 'longdesc') {
+ return {
+ has_child => {
+ type => 'comment',
+ query => {
+ bool => {
+ must => [
+ { match => { body => { query => $value, operator => "and" } } },
+ $is_insider ? () : { term => { is_private => \0 } },
+ ],
+ },
+ },
+ },
+ }
+ }
+ elsif ($field eq 'reporter' or $field eq 'assigned_to') {
+ return {
+ prefix => {
+ $EQUALS_MAP{$field} // $field => lc $value,
+ }
+ }
+ }
+ else {
+ return {
+ wildcard => {
+ $EQUALS_MAP{$field} // $field => lc "*$value*",
+ }
+ };
+ }
+}
+
+sub _query_clause {
+ my ($clause) = @_;
+
+ state $joiner_to_func = {
+ AND => \&_join_and,
+ OR => \&_join_or,
+ };
+
+ my @children = grep { !$_->isa('Bugzilla::Search::Clause') || @{$_->children} } @{$clause->children};
+ if (@children == 1) {
+ return _query($children[0]);
+ }
+
+ return $joiner_to_func->{$clause->joiner}->([ map { _query($_) } @children ]);
+}
+
+sub _join_and {
+ my ($children) = @_;
+ return { bool => { must => $children } },
+}
+
+sub _join_or {
+ my ($children) = @_;
+ return { bool => { should => $children } };
+}
+
+# Exceptions
+BEGIN {
+ package Bugzilla::Elastic::Search::Redirect;
+ use Moo;
+
+ with 'Throwable';
+
+ has 'redirect_args' => (is => 'ro', required => 1);
+
+ package Bugzilla::Elastic::Search::UnsupportedField;
+ use Moo;
+ use overload q{""} => sub { "Unsupported field: ", $_[0]->field }, fallback => 1;
+
+ with 'Throwable';
+
+ has 'field' => (is => 'ro', required => 1);
+
+
+ package Bugzilla::Elastic::Search::UnsupportedOperator;
+ use Moo;
+
+ with 'Throwable';
+
+ has 'operator' => (is => 'ro', required => 1);
+}
+
+1;
diff --git a/Bugzilla/Elastic/Search/FakeCGI.pm b/Bugzilla/Elastic/Search/FakeCGI.pm
new file mode 100644
index 000000000..827c96c52
--- /dev/null
+++ b/Bugzilla/Elastic/Search/FakeCGI.pm
@@ -0,0 +1,43 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This Source Code Form is "Incompatible With Secondary Licenses", as
+# defined by the Mozilla Public License, v. 2.0.
+package Bugzilla::Elastic::Search::FakeCGI;
+use 5.10.1;
+use Moo;
+use namespace::clean;
+
+has 'params' => (is => 'ro', default => sub { {} });
+
+# we pretend to be Bugzilla::CGI at times.
+sub canonicalise_query {
+ return Bugzilla::CGI::canonicalise_query(@_);
+}
+
+sub delete {
+ my ($self, $key) = @_;
+ delete $self->params->{$key};
+}
+
+sub redirect {
+ my ($self, @args) = @_;
+
+ Bugzilla::Elastic::Search::Redirect->throw(redirect_args => \@args);
+}
+
+sub param {
+ my ($self, $key, $val, @rest) = @_;
+ if (@_ > 3) {
+ $self->params->{$key} = [$val, @rest];
+ } elsif (@_ == 3) {
+ $self->params->{$key} = $val;
+ } elsif (@_ == 2) {
+ return $self->params->{$key};
+ } else {
+ return $self->params
+ }
+}
+
+1;