diff options
author | Dylan William Hardison <dylan@hardison.net> | 2017-07-20 22:36:56 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-07-20 22:36:56 +0200 |
commit | da9c617eb5d8345409386b85b5d8c167fc0c55d7 (patch) | |
tree | 0938a7253553df05d30df952d6fc2b6e1393743b /Bugzilla | |
parent | 608ec172071f5adfe8eba9c83d42668520a2baea (diff) | |
download | bugzilla-da9c617eb5d8345409386b85b5d8c167fc0c55d7.tar.gz bugzilla-da9c617eb5d8345409386b85b5d8c167fc0c55d7.tar.xz |
Bug 1381869 - Use separate elasticsearch index for Bugzilla::User
This patch removes the concept of a single, bugzilla-wide index in favor of a
per-class index. bugs and comments continue to use
Bugzilla->params->{elasticsearch_index} but users use
Bugzilla->params->{elasticsearch_index} . "_user".
It is assured via the ChildObject trait (role) that comments will share the
index with bugs, and we have kept the index for bugs/comments the same to avoid
the multi-hour reindexing of production. Re-indexing users takes only five
minutes.
Subsequent work on this will allow use to version the index names and use
aliases, but I wanted to keep this patch small.
This patch also corrects some mistakes
1. $indexer->put_mapping() should not have been a public method.
2. Time::HiRes should be imported at the top of the file, not in a sub.
Diffstat (limited to 'Bugzilla')
-rw-r--r-- | Bugzilla/Bug.pm | 67 | ||||
-rw-r--r-- | Bugzilla/Comment.pm | 4 | ||||
-rw-r--r-- | Bugzilla/Elastic.pm | 3 | ||||
-rw-r--r-- | Bugzilla/Elastic/Indexer.pm | 102 | ||||
-rw-r--r-- | Bugzilla/Elastic/Role/ChildObject.pm | 6 | ||||
-rw-r--r-- | Bugzilla/Elastic/Role/HasIndexName.pm | 16 | ||||
-rw-r--r-- | Bugzilla/Elastic/Role/Object.pm | 4 | ||||
-rw-r--r-- | Bugzilla/Elastic/Search.pm | 7 | ||||
-rw-r--r-- | Bugzilla/User.pm | 30 |
9 files changed, 133 insertions, 106 deletions
diff --git a/Bugzilla/Bug.pm b/Bugzilla/Bug.pm index eb228d27c..ee48ed7a2 100644 --- a/Bugzilla/Bug.pm +++ b/Bugzilla/Bug.pm @@ -302,6 +302,73 @@ with 'Bugzilla::Elastic::Role::Object'; sub ES_TYPE {'bug'} +sub ES_INDEX { Bugzilla->params->{elasticsearch_index} } + +sub ES_SETTINGS { + return { + number_of_shards => 2, + analysis => { + filter => { + asciifolding_original => { + type => "asciifolding", + preserve_original => \1, + }, + }, + analyzer => { + autocomplete => { + type => 'custom', + tokenizer => 'keyword', + filter => [ 'lowercase', 'asciifolding_original' ], + }, + folding => { + tokenizer => 'standard', + filter => [ 'standard', 'lowercase', 'asciifolding_original' ], + }, + bz_text_analyzer => { + type => 'standard', + filter => [ 'lowercase', 'stop' ], + max_token_length => '20' + }, + bz_equals_analyzer => { + type => 'custom', + filter => ['lowercase'], + tokenizer => 'keyword', + }, + whiteboard_words => { + type => 'custom', + tokenizer => 'whiteboard_words_pattern', + filter => ['stop'] + }, + whiteboard_shingle_words => { + type => 'custom', + tokenizer => 'whiteboard_words_pattern', + filter => [ 'stop', 'shingle', 'lowercase' ] + }, + whiteboard_tokens => { + type => 'custom', + tokenizer => 'whiteboard_tokens_pattern', + filter => [ 'stop', 'lowercase' ] + }, + whiteboard_shingle_tokens => { + type => 'custom', + tokenizer => 'whiteboard_tokens_pattern', + filter => [ 'stop', 'shingle', 'lowercase' ] + } + }, + tokenizer => { + whiteboard_tokens_pattern => { + type => 'pattern', + pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*' + }, + whiteboard_words_pattern => { + type => 'pattern', + pattern => '[\\[\\];,\\s]+' + }, + }, + }, + }; +} + sub _bz_field { my ($field, @fields) = @_; diff --git a/Bugzilla/Comment.pm b/Bugzilla/Comment.pm index 14f28cbe5..23c1d3f85 100644 --- a/Bugzilla/Comment.pm +++ b/Bugzilla/Comment.pm @@ -80,8 +80,8 @@ use constant VALIDATOR_DEPENDENCIES => { with 'Bugzilla::Elastic::Role::ChildObject'; -use constant ES_TYPE => 'comment'; -use constant ES_PARENT_TYPE => 'bug'; +use constant ES_TYPE => 'comment'; +use constant ES_PARENT_CLASS => 'Bugzilla::Bug'; sub ES_OBJECTS_AT_ONCE { 50 } diff --git a/Bugzilla/Elastic.pm b/Bugzilla/Elastic.pm index fa032d2a6..3a3829e3b 100644 --- a/Bugzilla/Elastic.pm +++ b/Bugzilla/Elastic.pm @@ -12,7 +12,6 @@ use Bugzilla::Elastic::Search; use Bugzilla::Util qw(trick_taint); with 'Bugzilla::Elastic::Role::HasClient'; -with 'Bugzilla::Elastic::Role::HasIndexName'; sub suggest_users { my ($self, $text) = @_; @@ -30,7 +29,7 @@ sub suggest_users { my $result = eval { $self->client->suggest( - index => $self->index_name, + index => Bugzilla::User->ES_INDEX, body => { $field => { text => $text, diff --git a/Bugzilla/Elastic/Indexer.pm b/Bugzilla/Elastic/Indexer.pm index 46eb8f648..36dd1dcb4 100644 --- a/Bugzilla/Elastic/Indexer.pm +++ b/Bugzilla/Elastic/Indexer.pm @@ -11,10 +11,10 @@ use Moo; use List::MoreUtils qw(natatime); use Storable qw(dclone); use Scalar::Util qw(looks_like_number); +use Time::HiRes; use namespace::clean; with 'Bugzilla::Elastic::Role::HasClient'; -with 'Bugzilla::Elastic::Role::HasIndexName'; has 'shadow_dbh' => ( is => 'lazy' ); @@ -28,94 +28,34 @@ has 'progress_bar' => ( predicate => 'has_progress_bar', ); -sub create_index { - my ($self) = @_; - my $indices = $self->client->indices; - - $indices->create( - index => $self->index_name, - body => { - settings => { - number_of_shards => 2, - analysis => { - filter => { - asciifolding_original => { - type => "asciifolding", - preserve_original => \1, - }, - }, - analyzer => { - autocomplete => { - type => 'custom', - tokenizer => 'keyword', - filter => ['lowercase', 'asciifolding_original'], - }, - folding => { - tokenizer => 'standard', - filter => ['standard', 'lowercase', 'asciifolding_original'], - }, - bz_text_analyzer => { - type => 'standard', - filter => ['lowercase', 'stop'], - max_token_length => '20' - }, - bz_equals_analyzer => { - type => 'custom', - filter => ['lowercase'], - tokenizer => 'keyword', - }, - whiteboard_words => { - type => 'custom', - tokenizer => 'whiteboard_words_pattern', - filter => ['stop'] - }, - whiteboard_shingle_words => { - type => 'custom', - tokenizer => 'whiteboard_words_pattern', - filter => ['stop', 'shingle', 'lowercase'] - }, - whiteboard_tokens => { - type => 'custom', - tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop', 'lowercase'] - }, - whiteboard_shingle_tokens => { - type => 'custom', - tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop', 'shingle', 'lowercase'] - } - }, - tokenizer => { - whiteboard_tokens_pattern => { - type => 'pattern', - pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*' - }, - whiteboard_words_pattern => { - type => 'pattern', - pattern => '[\\[\\];,\\s]+' - }, - }, - }, - }, - } - ) unless $indices->exists(index => $self->index_name); + +sub _create_index { + my ($self, $class) = @_; + my $indices = $self->client->indices; + my $index_name = $class->ES_INDEX; + + unless ($indices->exists(index => $index_name)) { + $indices->create( + index => $index_name, + body => { settings => $class->ES_SETTINGS }, + ); + } } sub _bulk_helper { my ($self, $class) = @_; return $self->client->bulk_helper( - index => $self->index_name, + index => $class->ES_INDEX, type => $class->ES_TYPE, ); } - sub _find_largest { my ($self, $class, $field) = @_; my $result = $self->client->search( - index => $self->index_name, + index => $class->ES_INDEX, type => $class->ES_TYPE, body => { aggs => { $field => { extended_stats => { field => $field } } }, @@ -147,7 +87,7 @@ sub _find_largest_id { return $self->_find_largest($class, $class->ID_FIELD); } -sub put_mapping { +sub _put_mapping { my ($self, $class) = @_; my %body = ( properties => scalar $class->ES_PROPERTIES ); @@ -156,7 +96,7 @@ sub put_mapping { } $self->client->indices->put_mapping( - index => $self->index_name, + index => $class->ES_INDEX, type => $class->ES_TYPE, body => \%body, ); @@ -178,13 +118,15 @@ sub _debug_sql { sub bulk_load { my ( $self, $class ) = @_; + $self->_create_index($class); + my $bulk = $self->_bulk_helper($class); my $last_mtime = $self->_find_largest_mtime($class); my $last_id = $self->_find_largest_id($class); my $new_ids = $self->_select_all_ids($class, $last_id); my $updated_ids = $self->_select_updated_ids($class, $last_mtime); - $self->put_mapping($class); + $self->_put_mapping($class); $self->_bulk_load_ids($bulk, $class, $new_ids) if @$new_ids; $self->_bulk_load_ids($bulk, $class, $updated_ids) if @$updated_ids; @@ -213,7 +155,8 @@ sub _select_updated_ids { sub bulk_load_ids { my ($self, $class, $ids) = @_; - $self->put_mapping($class); + $self->_create_index($class); + $self->_put_mapping($class); $self->_bulk_load_ids($self->_bulk_helper($class), $class, $ids); } @@ -238,7 +181,6 @@ sub _bulk_load_ids { } my $total = 0; - use Time::HiRes; my $start = time; while (my @ids = $iter->()) { if ($progress_bar) { diff --git a/Bugzilla/Elastic/Role/ChildObject.pm b/Bugzilla/Elastic/Role/ChildObject.pm index 1f7a7483a..9735cc1ed 100644 --- a/Bugzilla/Elastic/Role/ChildObject.pm +++ b/Bugzilla/Elastic/Role/ChildObject.pm @@ -11,6 +11,10 @@ use Role::Tiny; with 'Bugzilla::Elastic::Role::Object'; -requires qw(ES_PARENT_TYPE es_parent_id); +requires qw(ES_PARENT_CLASS es_parent_id); + +sub ES_PARENT_TYPE { $_[0]->ES_PARENT_CLASS->ES_TYPE } +sub ES_INDEX { $_[0]->ES_PARENT_CLASS->ES_INDEX } +sub ES_SETTINGS { $_[0]->ES_PARENT_CLASS->ES_SETTINGS } 1; diff --git a/Bugzilla/Elastic/Role/HasIndexName.pm b/Bugzilla/Elastic/Role/HasIndexName.pm deleted file mode 100644 index eaff339cd..000000000 --- a/Bugzilla/Elastic/Role/HasIndexName.pm +++ /dev/null @@ -1,16 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# -# This Source Code Form is "Incompatible With Secondary Licenses", as -# defined by the Mozilla Public License, v. 2.0. -package Bugzilla::Elastic::Role::HasIndexName; - -use 5.10.1; -use Moo::Role; -use Search::Elasticsearch; - -has 'index_name' => ( is => 'ro', default => sub { Bugzilla->params->{elasticsearch_index} } ); - - -1; diff --git a/Bugzilla/Elastic/Role/Object.pm b/Bugzilla/Elastic/Role/Object.pm index ad5ab002b..c51948ee9 100644 --- a/Bugzilla/Elastic/Role/Object.pm +++ b/Bugzilla/Elastic/Role/Object.pm @@ -9,7 +9,7 @@ package Bugzilla::Elastic::Role::Object; use 5.10.1; use Role::Tiny; -requires qw(ES_TYPE ES_PROPERTIES es_document); +requires qw(ES_TYPE ES_INDEX ES_SETTINGS ES_PROPERTIES es_document); requires qw(ID_FIELD DB_TABLE); sub ES_OBJECTS_AT_ONCE { 100 } @@ -45,4 +45,6 @@ around 'es_document' => sub { return $doc; }; + + 1; diff --git a/Bugzilla/Elastic/Search.pm b/Bugzilla/Elastic/Search.pm index e1af91032..26ab71bec 100644 --- a/Bugzilla/Elastic/Search.pm +++ b/Bugzilla/Elastic/Search.pm @@ -31,7 +31,6 @@ has '_order' => ( is => 'lazy', init_arg => undef ); has 'invalid_order_columns' => ( is => 'lazy' ); with 'Bugzilla::Elastic::Role::HasClient'; -with 'Bugzilla::Elastic::Role::HasIndexName'; with 'Bugzilla::Elastic::Role::Search'; my @SUPPORTED_FIELDS = qw( @@ -92,9 +91,9 @@ sub data { my $body = $self->es_query; my $result = eval { $self->client->search( - index => $self->index_name, - type => 'bug', - body => $body, + index => Bugzilla::Bug->ES_INDEX, + type => Bugzilla::Bug->ES_TYPE, + body => $body, ); }; die $@ unless $result; diff --git a/Bugzilla/User.pm b/Bugzilla/User.pm index 2d7f38640..525733069 100644 --- a/Bugzilla/User.pm +++ b/Bugzilla/User.pm @@ -126,6 +126,11 @@ use constant EXTRA_REQUIRED_FIELDS => qw(is_enabled); with 'Bugzilla::Elastic::Role::Object'; +sub ES_INDEX { + my ($class) = @_; + sprintf("%s_%s", Bugzilla->params->{elasticsearch_index}, $class->ES_TYPE); +} + sub ES_TYPE { 'user' } sub ES_OBJECTS_AT_ONCE { 5000 } @@ -153,6 +158,31 @@ sub ES_SELECT_ALL_SQL { return ("SELECT $id FROM $table WHERE $id > ? AND is_enabled AND NOT disabledtext ORDER BY $id", [$last_id // 0]); } +sub ES_SETTINGS { + return { + number_of_shards => 2, + analysis => { + filter => { + asciifolding_original => { + type => "asciifolding", + preserve_original => \1, + }, + }, + analyzer => { + autocomplete => { + type => 'custom', + tokenizer => 'keyword', + filter => [ 'lowercase', 'asciifolding_original' ], + }, + folding => { + tokenizer => 'standard', + filter => [ 'standard', 'lowercase', 'asciifolding_original' ], + }, + } + } + }; +} + sub ES_PROPERTIES { return { suggest_user => { |