From da9c617eb5d8345409386b85b5d8c167fc0c55d7 Mon Sep 17 00:00:00 2001 From: Dylan William Hardison Date: Thu, 20 Jul 2017 16:36:56 -0400 Subject: Bug 1381869 - Use separate elasticsearch index for Bugzilla::User This patch removes the concept of a single, bugzilla-wide index in favor of a per-class index. bugs and comments continue to use Bugzilla->params->{elasticsearch_index} but users use Bugzilla->params->{elasticsearch_index} . "_user". It is assured via the ChildObject trait (role) that comments will share the index with bugs, and we have kept the index for bugs/comments the same to avoid the multi-hour reindexing of production. Re-indexing users takes only five minutes. Subsequent work on this will allow use to version the index names and use aliases, but I wanted to keep this patch small. This patch also corrects some mistakes 1. $indexer->put_mapping() should not have been a public method. 2. Time::HiRes should be imported at the top of the file, not in a sub. --- Bugzilla/Elastic/Indexer.pm | 102 ++++++++-------------------------- Bugzilla/Elastic/Role/ChildObject.pm | 6 +- Bugzilla/Elastic/Role/HasIndexName.pm | 16 ------ Bugzilla/Elastic/Role/Object.pm | 4 +- Bugzilla/Elastic/Search.pm | 7 +-- 5 files changed, 33 insertions(+), 102 deletions(-) delete mode 100644 Bugzilla/Elastic/Role/HasIndexName.pm (limited to 'Bugzilla/Elastic') diff --git a/Bugzilla/Elastic/Indexer.pm b/Bugzilla/Elastic/Indexer.pm index 46eb8f648..36dd1dcb4 100644 --- a/Bugzilla/Elastic/Indexer.pm +++ b/Bugzilla/Elastic/Indexer.pm @@ -11,10 +11,10 @@ use Moo; use List::MoreUtils qw(natatime); use Storable qw(dclone); use Scalar::Util qw(looks_like_number); +use Time::HiRes; use namespace::clean; with 'Bugzilla::Elastic::Role::HasClient'; -with 'Bugzilla::Elastic::Role::HasIndexName'; has 'shadow_dbh' => ( is => 'lazy' ); @@ -28,94 +28,34 @@ has 'progress_bar' => ( predicate => 'has_progress_bar', ); -sub create_index { - my ($self) = @_; - my $indices = $self->client->indices; - - $indices->create( - index => $self->index_name, - body => { - settings => { - number_of_shards => 2, - analysis => { - filter => { - asciifolding_original => { - type => "asciifolding", - preserve_original => \1, - }, - }, - analyzer => { - autocomplete => { - type => 'custom', - tokenizer => 'keyword', - filter => ['lowercase', 'asciifolding_original'], - }, - folding => { - tokenizer => 'standard', - filter => ['standard', 'lowercase', 'asciifolding_original'], - }, - bz_text_analyzer => { - type => 'standard', - filter => ['lowercase', 'stop'], - max_token_length => '20' - }, - bz_equals_analyzer => { - type => 'custom', - filter => ['lowercase'], - tokenizer => 'keyword', - }, - whiteboard_words => { - type => 'custom', - tokenizer => 'whiteboard_words_pattern', - filter => ['stop'] - }, - whiteboard_shingle_words => { - type => 'custom', - tokenizer => 'whiteboard_words_pattern', - filter => ['stop', 'shingle', 'lowercase'] - }, - whiteboard_tokens => { - type => 'custom', - tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop', 'lowercase'] - }, - whiteboard_shingle_tokens => { - type => 'custom', - tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop', 'shingle', 'lowercase'] - } - }, - tokenizer => { - whiteboard_tokens_pattern => { - type => 'pattern', - pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*' - }, - whiteboard_words_pattern => { - type => 'pattern', - pattern => '[\\[\\];,\\s]+' - }, - }, - }, - }, - } - ) unless $indices->exists(index => $self->index_name); + +sub _create_index { + my ($self, $class) = @_; + my $indices = $self->client->indices; + my $index_name = $class->ES_INDEX; + + unless ($indices->exists(index => $index_name)) { + $indices->create( + index => $index_name, + body => { settings => $class->ES_SETTINGS }, + ); + } } sub _bulk_helper { my ($self, $class) = @_; return $self->client->bulk_helper( - index => $self->index_name, + index => $class->ES_INDEX, type => $class->ES_TYPE, ); } - sub _find_largest { my ($self, $class, $field) = @_; my $result = $self->client->search( - index => $self->index_name, + index => $class->ES_INDEX, type => $class->ES_TYPE, body => { aggs => { $field => { extended_stats => { field => $field } } }, @@ -147,7 +87,7 @@ sub _find_largest_id { return $self->_find_largest($class, $class->ID_FIELD); } -sub put_mapping { +sub _put_mapping { my ($self, $class) = @_; my %body = ( properties => scalar $class->ES_PROPERTIES ); @@ -156,7 +96,7 @@ sub put_mapping { } $self->client->indices->put_mapping( - index => $self->index_name, + index => $class->ES_INDEX, type => $class->ES_TYPE, body => \%body, ); @@ -178,13 +118,15 @@ sub _debug_sql { sub bulk_load { my ( $self, $class ) = @_; + $self->_create_index($class); + my $bulk = $self->_bulk_helper($class); my $last_mtime = $self->_find_largest_mtime($class); my $last_id = $self->_find_largest_id($class); my $new_ids = $self->_select_all_ids($class, $last_id); my $updated_ids = $self->_select_updated_ids($class, $last_mtime); - $self->put_mapping($class); + $self->_put_mapping($class); $self->_bulk_load_ids($bulk, $class, $new_ids) if @$new_ids; $self->_bulk_load_ids($bulk, $class, $updated_ids) if @$updated_ids; @@ -213,7 +155,8 @@ sub _select_updated_ids { sub bulk_load_ids { my ($self, $class, $ids) = @_; - $self->put_mapping($class); + $self->_create_index($class); + $self->_put_mapping($class); $self->_bulk_load_ids($self->_bulk_helper($class), $class, $ids); } @@ -238,7 +181,6 @@ sub _bulk_load_ids { } my $total = 0; - use Time::HiRes; my $start = time; while (my @ids = $iter->()) { if ($progress_bar) { diff --git a/Bugzilla/Elastic/Role/ChildObject.pm b/Bugzilla/Elastic/Role/ChildObject.pm index 1f7a7483a..9735cc1ed 100644 --- a/Bugzilla/Elastic/Role/ChildObject.pm +++ b/Bugzilla/Elastic/Role/ChildObject.pm @@ -11,6 +11,10 @@ use Role::Tiny; with 'Bugzilla::Elastic::Role::Object'; -requires qw(ES_PARENT_TYPE es_parent_id); +requires qw(ES_PARENT_CLASS es_parent_id); + +sub ES_PARENT_TYPE { $_[0]->ES_PARENT_CLASS->ES_TYPE } +sub ES_INDEX { $_[0]->ES_PARENT_CLASS->ES_INDEX } +sub ES_SETTINGS { $_[0]->ES_PARENT_CLASS->ES_SETTINGS } 1; diff --git a/Bugzilla/Elastic/Role/HasIndexName.pm b/Bugzilla/Elastic/Role/HasIndexName.pm deleted file mode 100644 index eaff339cd..000000000 --- a/Bugzilla/Elastic/Role/HasIndexName.pm +++ /dev/null @@ -1,16 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# -# This Source Code Form is "Incompatible With Secondary Licenses", as -# defined by the Mozilla Public License, v. 2.0. -package Bugzilla::Elastic::Role::HasIndexName; - -use 5.10.1; -use Moo::Role; -use Search::Elasticsearch; - -has 'index_name' => ( is => 'ro', default => sub { Bugzilla->params->{elasticsearch_index} } ); - - -1; diff --git a/Bugzilla/Elastic/Role/Object.pm b/Bugzilla/Elastic/Role/Object.pm index ad5ab002b..c51948ee9 100644 --- a/Bugzilla/Elastic/Role/Object.pm +++ b/Bugzilla/Elastic/Role/Object.pm @@ -9,7 +9,7 @@ package Bugzilla::Elastic::Role::Object; use 5.10.1; use Role::Tiny; -requires qw(ES_TYPE ES_PROPERTIES es_document); +requires qw(ES_TYPE ES_INDEX ES_SETTINGS ES_PROPERTIES es_document); requires qw(ID_FIELD DB_TABLE); sub ES_OBJECTS_AT_ONCE { 100 } @@ -45,4 +45,6 @@ around 'es_document' => sub { return $doc; }; + + 1; diff --git a/Bugzilla/Elastic/Search.pm b/Bugzilla/Elastic/Search.pm index e1af91032..26ab71bec 100644 --- a/Bugzilla/Elastic/Search.pm +++ b/Bugzilla/Elastic/Search.pm @@ -31,7 +31,6 @@ has '_order' => ( is => 'lazy', init_arg => undef ); has 'invalid_order_columns' => ( is => 'lazy' ); with 'Bugzilla::Elastic::Role::HasClient'; -with 'Bugzilla::Elastic::Role::HasIndexName'; with 'Bugzilla::Elastic::Role::Search'; my @SUPPORTED_FIELDS = qw( @@ -92,9 +91,9 @@ sub data { my $body = $self->es_query; my $result = eval { $self->client->search( - index => $self->index_name, - type => 'bug', - body => $body, + index => Bugzilla::Bug->ES_INDEX, + type => Bugzilla::Bug->ES_TYPE, + body => $body, ); }; die $@ unless $result; -- cgit v1.2.3-24-g4f1b