summaryrefslogtreecommitdiffstats
path: root/Bugzilla/Elastic
diff options
context:
space:
mode:
authorDylan William Hardison <dylan@hardison.net>2017-03-21 04:08:26 +0100
committerDylan William Hardison <dylan@hardison.net>2017-03-21 04:08:26 +0100
commitb921e3142e37977298571a4229faca4e1794017d (patch)
treec965352b0db57f0d73f12ba2fd818845566dbe8b /Bugzilla/Elastic
parente53de8c34e12e18ecea98ad97775893f083beff7 (diff)
downloadbugzilla-b921e3142e37977298571a4229faca4e1794017d.tar.gz
bugzilla-b921e3142e37977298571a4229faca4e1794017d.tar.xz
Bug 1307485 - Add code to run a subset of buglist.cgi search queries against the ES backend
Skipping this for this week's push.
Diffstat (limited to 'Bugzilla/Elastic')
-rw-r--r--Bugzilla/Elastic/Indexer.pm29
-rw-r--r--Bugzilla/Elastic/Role/HasClient.pm2
-rw-r--r--Bugzilla/Elastic/Role/Search.pm16
-rw-r--r--Bugzilla/Elastic/Search.pm425
-rw-r--r--Bugzilla/Elastic/Search/FakeCGI.pm43
5 files changed, 18 insertions, 497 deletions
diff --git a/Bugzilla/Elastic/Indexer.pm b/Bugzilla/Elastic/Indexer.pm
index dd71a7198..82f946af9 100644
--- a/Bugzilla/Elastic/Indexer.pm
+++ b/Bugzilla/Elastic/Indexer.pm
@@ -23,7 +23,7 @@ has 'mtime' => (
has 'shadow_dbh' => ( is => 'lazy' );
has 'debug_sql' => (
- is => 'ro',
+ is => 'ro',
default => 0,
);
@@ -40,24 +40,24 @@ sub create_index {
index => $self->index_name,
body => {
settings => {
- number_of_shards => 2,
+ number_of_shards => 1,
analysis => {
- filter => {
- asciifolding_original => {
- type => "asciifolding",
- preserve_original => \1,
- },
- },
analyzer => {
folding => {
+ type => 'standard',
tokenizer => 'standard',
- filter => ['standard', 'lowercase', 'asciifolding_original'],
+ filter => [ 'lowercase', 'asciifolding' ]
},
bz_text_analyzer => {
type => 'standard',
filter => ['lowercase', 'stop'],
max_token_length => '20'
},
+ bz_substring_analyzer => {
+ type => 'custom',
+ filter => ['lowercase'],
+ tokenizer => 'bz_ngram_tokenizer',
+ },
bz_equals_analyzer => {
type => 'custom',
filter => ['lowercase'],
@@ -71,20 +71,25 @@ sub create_index {
whiteboard_shingle_words => {
type => 'custom',
tokenizer => 'whiteboard_words_pattern',
- filter => ['stop', 'shingle', 'lowercase']
+ filter => ['stop', 'shingle']
},
whiteboard_tokens => {
type => 'custom',
tokenizer => 'whiteboard_tokens_pattern',
- filter => ['stop', 'lowercase']
+ filter => ['stop']
},
whiteboard_shingle_tokens => {
type => 'custom',
tokenizer => 'whiteboard_tokens_pattern',
- filter => ['stop', 'shingle', 'lowercase']
+ filter => ['stop', 'shingle']
}
},
tokenizer => {
+ bz_ngram_tokenizer => {
+ type => 'nGram',
+ min_ngram => 2,
+ max_ngram => 25,
+ },
whiteboard_tokens_pattern => {
type => 'pattern',
pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*'
diff --git a/Bugzilla/Elastic/Role/HasClient.pm b/Bugzilla/Elastic/Role/HasClient.pm
index 8e2687880..3d52d513a 100644
--- a/Bugzilla/Elastic/Role/HasClient.pm
+++ b/Bugzilla/Elastic/Role/HasClient.pm
@@ -17,7 +17,7 @@ sub _build_client {
my ($self) = @_;
return Search::Elasticsearch->new(
- nodes => [ split(/\s+/, Bugzilla->params->{elasticsearch_nodes}) ],
+ nodes => Bugzilla->params->{elasticsearch_nodes},
cxn_pool => 'Sniff',
);
}
diff --git a/Bugzilla/Elastic/Role/Search.pm b/Bugzilla/Elastic/Role/Search.pm
deleted file mode 100644
index 9446e0da8..000000000
--- a/Bugzilla/Elastic/Role/Search.pm
+++ /dev/null
@@ -1,16 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#
-# This Source Code Form is "Incompatible With Secondary Licenses", as
-# defined by the Mozilla Public License, v. 2.0.
-package Bugzilla::Elastic::Role::Search;
-
-use 5.10.1;
-use strict;
-use warnings;
-use Role::Tiny;
-
-requires qw(data search_description invalid_order_columns order);
-
-1;
diff --git a/Bugzilla/Elastic/Search.pm b/Bugzilla/Elastic/Search.pm
deleted file mode 100644
index 5c60f2353..000000000
--- a/Bugzilla/Elastic/Search.pm
+++ /dev/null
@@ -1,425 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#
-# This Source Code Form is "Incompatible With Secondary Licenses", as
-# defined by the Mozilla Public License, v. 2.0.
-package Bugzilla::Elastic::Search;
-
-use 5.10.1;
-use Moo;
-use Bugzilla::Search;
-use Bugzilla::Search::Quicksearch;
-use Bugzilla::Util qw(trick_taint);
-use namespace::clean;
-
-use Bugzilla::Elastic::Search::FakeCGI;
-
-
-has 'quicksearch' => ( is => 'ro' );
-has 'limit' => ( is => 'ro', predicate => 'has_limit' );
-has 'offset' => ( is => 'ro', predicate => 'has_offset' );
-has 'fields' => ( is => 'ro', isa => \&_arrayref_of_fields, default => sub { [] } );
-has 'params' => ( is => 'lazy' );
-has 'clause' => ( is => 'lazy' );
-has 'es_query' => ( is => 'lazy' );
-has 'search_description' => (is => 'lazy');
-has 'query_time' => ( is => 'rwp' );
-
-has '_input_order' => ( is => 'ro', init_arg => 'order', required => 1);
-has '_order' => ( is => 'lazy', init_arg => undef );
-has 'invalid_order_columns' => ( is => 'lazy' );
-
-with 'Bugzilla::Elastic::Role::HasClient';
-with 'Bugzilla::Elastic::Role::HasIndexName';
-with 'Bugzilla::Elastic::Role::Search';
-
-my @SUPPORTED_FIELDS = qw(
- bug_id product component short_desc
- priority status_whiteboard bug_status resolution
- keywords alias assigned_to reporter delta_ts
- longdesc cf_crash_signature classification bug_severity
- commenter
-);
-my %IS_SUPPORTED_FIELD = map { $_ => 1 } @SUPPORTED_FIELDS;
-
-$IS_SUPPORTED_FIELD{relevance} = 1;
-
-my @NORMAL_FIELDS = qw(
- priority
- bug_severity
- bug_status
- resolution
- product
- component
- classification
- short_desc
- assigned_to
- reporter
-);
-
-my %SORT_MAP = (
- bug_id => '_id',
- relevance => '_score',
- map { $_ => "$_.eq" } @NORMAL_FIELDS,
-);
-
-my %EQUALS_MAP = (
- map { $_ => "$_.eq" } @NORMAL_FIELDS,
-);
-
-sub _arrayref_of_fields {
- my $f = $_;
- foreach my $field (@$f) {
- Bugzilla::Elastic::Search::UnsupportedField->throw(field => $field)
- unless $IS_SUPPORTED_FIELD{$field};
- }
-}
-
-sub data {
- my ($self) = @_;
- my $body = $self->es_query;
- my $result = eval {
- $self->client->search(
- index => $self->index_name,
- type => 'bug',
- body => $body,
- );
- };
- if (!$result) {
- die $@;
- }
- $self->_set_query_time($result->{took} / 1000);
- my (@ids, %hits);
- my $fields = $self->fields;
- foreach my $hit (@{ $result->{hits}{hits} }) {
- push @ids, $hit->{_id};
- my $source = $hit->{_source};
- $source->{relevance} = $hit->{_score};
- foreach my $val (values %$source) {
- next unless defined $val;
- trick_taint($val);
- }
- trick_taint($hit->{_id});
- if ($source) {
- $hits{$hit->{_id}} = [ @$source{@$fields} ];
- }
- else {
- $hits{$hit->{_id}} = $hit->{_id};
- }
- }
- my $visible_ids = Bugzilla->user->visible_bugs(\@ids);
-
- return [ map { $hits{$_} } @$visible_ids ];
-}
-
-sub _valid_order {
- my ($self) = @_;
-
- return grep { $IS_SUPPORTED_FIELD{$_->[0]} } @{$self->_order};
-}
-
-sub order {
- my ($self) = @_;
-
- return map { $_->[0] } $self->_valid_order;
-}
-
-sub _quicksearch_to_params {
- my ($quicksearch) = @_;
- no warnings 'redefine';
- my $cgi = Bugzilla::Elastic::Search::FakeCGI->new;
- local *Bugzilla::cgi = sub { $cgi };
- local $Bugzilla::Search::Quicksearch::ELASTIC = 1;
- quicksearch($quicksearch);
-
- return $cgi->params;
-}
-
-sub _build_fields { return \@SUPPORTED_FIELDS }
-
-sub _build__order {
- my ($self) = @_;
-
- my @order;
- foreach my $order (@{$self->_input_order}) {
- if ($order =~ /^(.+)\s+(asc|desc)$/i) {
- push @order, [ $1, lc $2 ];
- }
- else {
- push @order, [ $order ];
- }
- }
- return \@order;
-}
-
-sub _build_invalid_order_columns {
- my ($self) = @_;
-
- return [ map { $_->[0] } grep { !$IS_SUPPORTED_FIELD{$_->[0]} } @{ $self->_order } ];
-}
-
-sub _build_params {
- my ($self) = @_;
-
- return _quicksearch_to_params($self->quicksearch);
-}
-
-sub _build_clause {
- my ($self) = @_;
- my $search = Bugzilla::Search->new(params => $self->params);
-
- return $search->_params_to_data_structure;
-}
-
-sub _build_search_description {
- my ($self) = @_;
-
- return [_describe($self->clause)];
-}
-
-sub _describe {
- my ($thing) = @_;
-
- state $class_to_func = {
- 'Bugzilla::Search::Condition' => \&_describe_condition,
- 'Bugzilla::Search::Clause' => \&_describe_clause
- };
-
- my $func = $class_to_func->{ref $thing} or die "nothing for $thing\n";
-
- return $func->($thing);
-}
-
-sub _describe_clause {
- my ($clause) = @_;
-
- return map { _describe($_) } @{$clause->children};
-}
-
-sub _describe_condition {
- my ($cond) = @_;
-
- return { field => $cond->field, type => $cond->operator, value => _describe_value($cond->value) };
-}
-
-sub _describe_value {
- my ($val) = @_;
-
- return ref($val) ? join(", ", @$val) : $val;
-}
-
-sub _build_es_query {
- my ($self) = @_;
- my @extra;
-
- if ($self->_valid_order) {
- my @sort = map {
- my $f = $SORT_MAP{$_->[0]} // $_->[0];
- @$_ > 1 ? { $f => lc $_[1] } : $f;
- } $self->_valid_order;
- push @extra, sort => \@sort;
- }
- if ($self->has_offset) {
- push @extra, from => $self->offset;
- }
- my $max_limit = Bugzilla->params->{max_search_results};
- my $limit = Bugzilla->params->{default_search_limit};
- if ($self->has_limit) {
- if ($self->limit) {
- my $l = $self->limit;
- $limit = $l < $max_limit ? $l : $max_limit;
- }
- else {
- $limit = $max_limit;
- }
- }
- push @extra, size => $limit;
- return {
- _source => @{$self->fields} ? \1 : \0,
- query => _query($self->clause),
- @extra,
- };
-}
-
-sub _query {
- my ($thing) = @_;
- state $class_to_func = {
- 'Bugzilla::Search::Condition' => \&_query_condition,
- 'Bugzilla::Search::Clause' => \&_query_clause,
- };
-
- my $func = $class_to_func->{ref $thing} or die "nothing for $thing\n";
-
- return $func->($thing);
-}
-
-sub _query_condition {
- my ($cond) = @_;
- state $operator_to_es = {
- equals => \&_operator_equals,
- substring => \&_operator_substring,
- anyexact => \&_operator_anyexact,
- anywords => \&_operator_anywords,
- allwords => \&_operator_allwords,
- };
-
- my $field = $cond->field;
- my $operator = $cond->operator;
- my $value = $cond->value;
-
- if ($field eq 'resolution') {
- $value = [ map { $_ eq '---' ? '' : $_ } ref $value ? @$value : $value ];
- }
-
- unless ($IS_SUPPORTED_FIELD{$field}) {
- Bugzilla::Elastic::Search::UnsupportedField->throw(field => $field);
- }
-
- my $op = $operator_to_es->{$operator}
- or Bugzilla::Elastic::Search::UnsupportedOperator->throw(operator => $operator);
-
- my $result;
- if (ref $op) {
- $result = $op->($field, $value);
- } else {
- $result = { $op => { $field => $value } };
- }
-
- return $result;
-}
-
-# is equal to any of the strings
-sub _operator_anyexact {
- my ($field, $value) = @_;
- my @values = ref $value ? @$value : split(/\s*,\s*/, $value);
- if (@values == 1) {
- return _operator_equals($field, $values[0]);
- }
- else {
- return {
- terms => {
- $EQUALS_MAP{$field} // $field => [map { lc } @values],
- minimum_should_match => 1,
- },
- };
- }
-}
-
-# contains any of the words
-sub _operator_anywords {
- my ($field, $value) = @_;
- return {
- match => {
- $field => { query => $value, operator => "or" }
- },
- };
-}
-
-# contains all of the words
-sub _operator_allwords {
- my ($field, $value) = @_;
- return {
- match => {
- $field => { query => $value, operator => "and" }
- },
- };
-}
-
-sub _operator_equals {
- my ($field, $value) = @_;
- return {
- match => {
- $EQUALS_MAP{$field} // $field => $value,
- },
- };
-}
-
-sub _operator_substring {
- my ($field, $value) = @_;
- my $is_insider = Bugzilla->user->is_insider;
-
- if ($field eq 'longdesc') {
- return {
- has_child => {
- type => 'comment',
- query => {
- bool => {
- must => [
- { match => { body => { query => $value, operator => "and" } } },
- $is_insider ? () : { term => { is_private => \0 } },
- ],
- },
- },
- },
- }
- }
- elsif ($field eq 'reporter' or $field eq 'assigned_to') {
- return {
- prefix => {
- $EQUALS_MAP{$field} // $field => lc $value,
- }
- }
- }
- else {
- return {
- wildcard => {
- $EQUALS_MAP{$field} // $field => lc "*$value*",
- }
- };
- }
-}
-
-sub _query_clause {
- my ($clause) = @_;
-
- state $joiner_to_func = {
- AND => \&_join_and,
- OR => \&_join_or,
- };
-
- my @children = grep { !$_->isa('Bugzilla::Search::Clause') || @{$_->children} } @{$clause->children};
- if (@children == 1) {
- return _query($children[0]);
- }
-
- return $joiner_to_func->{$clause->joiner}->([ map { _query($_) } @children ]);
-}
-
-sub _join_and {
- my ($children) = @_;
- return { bool => { must => $children } },
-}
-
-sub _join_or {
- my ($children) = @_;
- return { bool => { should => $children } };
-}
-
-# Exceptions
-BEGIN {
- package Bugzilla::Elastic::Search::Redirect;
- use Moo;
-
- with 'Throwable';
-
- has 'redirect_args' => (is => 'ro', required => 1);
-
- package Bugzilla::Elastic::Search::UnsupportedField;
- use Moo;
- use overload q{""} => sub { "Unsupported field: ", $_[0]->field }, fallback => 1;
-
- with 'Throwable';
-
- has 'field' => (is => 'ro', required => 1);
-
-
- package Bugzilla::Elastic::Search::UnsupportedOperator;
- use Moo;
-
- with 'Throwable';
-
- has 'operator' => (is => 'ro', required => 1);
-}
-
-1;
diff --git a/Bugzilla/Elastic/Search/FakeCGI.pm b/Bugzilla/Elastic/Search/FakeCGI.pm
deleted file mode 100644
index 827c96c52..000000000
--- a/Bugzilla/Elastic/Search/FakeCGI.pm
+++ /dev/null
@@ -1,43 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#
-# This Source Code Form is "Incompatible With Secondary Licenses", as
-# defined by the Mozilla Public License, v. 2.0.
-package Bugzilla::Elastic::Search::FakeCGI;
-use 5.10.1;
-use Moo;
-use namespace::clean;
-
-has 'params' => (is => 'ro', default => sub { {} });
-
-# we pretend to be Bugzilla::CGI at times.
-sub canonicalise_query {
- return Bugzilla::CGI::canonicalise_query(@_);
-}
-
-sub delete {
- my ($self, $key) = @_;
- delete $self->params->{$key};
-}
-
-sub redirect {
- my ($self, @args) = @_;
-
- Bugzilla::Elastic::Search::Redirect->throw(redirect_args => \@args);
-}
-
-sub param {
- my ($self, $key, $val, @rest) = @_;
- if (@_ > 3) {
- $self->params->{$key} = [$val, @rest];
- } elsif (@_ == 3) {
- $self->params->{$key} = $val;
- } elsif (@_ == 2) {
- return $self->params->{$key};
- } else {
- return $self->params
- }
-}
-
-1;