summaryrefslogtreecommitdiffstats
path: root/Bugzilla
diff options
context:
space:
mode:
authorDylan William Hardison <dylan@hardison.net>2017-07-20 22:36:56 +0200
committerGitHub <noreply@github.com>2017-07-20 22:36:56 +0200
commitda9c617eb5d8345409386b85b5d8c167fc0c55d7 (patch)
tree0938a7253553df05d30df952d6fc2b6e1393743b /Bugzilla
parent608ec172071f5adfe8eba9c83d42668520a2baea (diff)
downloadbugzilla-da9c617eb5d8345409386b85b5d8c167fc0c55d7.tar.gz
bugzilla-da9c617eb5d8345409386b85b5d8c167fc0c55d7.tar.xz
Bug 1381869 - Use separate elasticsearch index for Bugzilla::User
This patch removes the concept of a single, bugzilla-wide index in favor of a per-class index. bugs and comments continue to use Bugzilla->params->{elasticsearch_index} but users use Bugzilla->params->{elasticsearch_index} . "_user". It is assured via the ChildObject trait (role) that comments will share the index with bugs, and we have kept the index for bugs/comments the same to avoid the multi-hour reindexing of production. Re-indexing users takes only five minutes. Subsequent work on this will allow use to version the index names and use aliases, but I wanted to keep this patch small. This patch also corrects some mistakes 1. $indexer->put_mapping() should not have been a public method. 2. Time::HiRes should be imported at the top of the file, not in a sub.
Diffstat (limited to 'Bugzilla')
-rw-r--r--Bugzilla/Bug.pm67
-rw-r--r--Bugzilla/Comment.pm4
-rw-r--r--Bugzilla/Elastic.pm3
-rw-r--r--Bugzilla/Elastic/Indexer.pm102
-rw-r--r--Bugzilla/Elastic/Role/ChildObject.pm6
-rw-r--r--Bugzilla/Elastic/Role/HasIndexName.pm16
-rw-r--r--Bugzilla/Elastic/Role/Object.pm4
-rw-r--r--Bugzilla/Elastic/Search.pm7
-rw-r--r--Bugzilla/User.pm30
9 files changed, 133 insertions, 106 deletions
diff --git a/Bugzilla/Bug.pm b/Bugzilla/Bug.pm
index eb228d27c..ee48ed7a2 100644
--- a/Bugzilla/Bug.pm
+++ b/Bugzilla/Bug.pm
@@ -302,6 +302,73 @@ with 'Bugzilla::Elastic::Role::Object';
sub ES_TYPE {'bug'}
+sub ES_INDEX { Bugzilla->params->{elasticsearch_index} }
+
+sub ES_SETTINGS {
+ return {
+ number_of_shards => 2,
+ analysis => {
+ filter => {
+ asciifolding_original => {
+ type => "asciifolding",
+ preserve_original => \1,
+ },
+ },
+ analyzer => {
+ autocomplete => {
+ type => 'custom',
+ tokenizer => 'keyword',
+ filter => [ 'lowercase', 'asciifolding_original' ],
+ },
+ folding => {
+ tokenizer => 'standard',
+ filter => [ 'standard', 'lowercase', 'asciifolding_original' ],
+ },
+ bz_text_analyzer => {
+ type => 'standard',
+ filter => [ 'lowercase', 'stop' ],
+ max_token_length => '20'
+ },
+ bz_equals_analyzer => {
+ type => 'custom',
+ filter => ['lowercase'],
+ tokenizer => 'keyword',
+ },
+ whiteboard_words => {
+ type => 'custom',
+ tokenizer => 'whiteboard_words_pattern',
+ filter => ['stop']
+ },
+ whiteboard_shingle_words => {
+ type => 'custom',
+ tokenizer => 'whiteboard_words_pattern',
+ filter => [ 'stop', 'shingle', 'lowercase' ]
+ },
+ whiteboard_tokens => {
+ type => 'custom',
+ tokenizer => 'whiteboard_tokens_pattern',
+ filter => [ 'stop', 'lowercase' ]
+ },
+ whiteboard_shingle_tokens => {
+ type => 'custom',
+ tokenizer => 'whiteboard_tokens_pattern',
+ filter => [ 'stop', 'shingle', 'lowercase' ]
+ }
+ },
+ tokenizer => {
+ whiteboard_tokens_pattern => {
+ type => 'pattern',
+ pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*'
+ },
+ whiteboard_words_pattern => {
+ type => 'pattern',
+ pattern => '[\\[\\];,\\s]+'
+ },
+ },
+ },
+ };
+}
+
sub _bz_field {
my ($field, @fields) = @_;
diff --git a/Bugzilla/Comment.pm b/Bugzilla/Comment.pm
index 14f28cbe5..23c1d3f85 100644
--- a/Bugzilla/Comment.pm
+++ b/Bugzilla/Comment.pm
@@ -80,8 +80,8 @@ use constant VALIDATOR_DEPENDENCIES => {
with 'Bugzilla::Elastic::Role::ChildObject';
-use constant ES_TYPE => 'comment';
-use constant ES_PARENT_TYPE => 'bug';
+use constant ES_TYPE => 'comment';
+use constant ES_PARENT_CLASS => 'Bugzilla::Bug';
sub ES_OBJECTS_AT_ONCE { 50 }
diff --git a/Bugzilla/Elastic.pm b/Bugzilla/Elastic.pm
index fa032d2a6..3a3829e3b 100644
--- a/Bugzilla/Elastic.pm
+++ b/Bugzilla/Elastic.pm
@@ -12,7 +12,6 @@ use Bugzilla::Elastic::Search;
use Bugzilla::Util qw(trick_taint);
with 'Bugzilla::Elastic::Role::HasClient';
-with 'Bugzilla::Elastic::Role::HasIndexName';
sub suggest_users {
my ($self, $text) = @_;
@@ -30,7 +29,7 @@ sub suggest_users {
my $result = eval {
$self->client->suggest(
- index => $self->index_name,
+ index => Bugzilla::User->ES_INDEX,
body => {
$field => {
text => $text,
diff --git a/Bugzilla/Elastic/Indexer.pm b/Bugzilla/Elastic/Indexer.pm
index 46eb8f648..36dd1dcb4 100644
--- a/Bugzilla/Elastic/Indexer.pm
+++ b/Bugzilla/Elastic/Indexer.pm
@@ -11,10 +11,10 @@ use Moo;
use List::MoreUtils qw(natatime);
use Storable qw(dclone);
use Scalar::Util qw(looks_like_number);
+use Time::HiRes;
use namespace::clean;
with 'Bugzilla::Elastic::Role::HasClient';
-with 'Bugzilla::Elastic::Role::HasIndexName';
has 'shadow_dbh' => ( is => 'lazy' );
@@ -28,94 +28,34 @@ has 'progress_bar' => (
predicate => 'has_progress_bar',
);
-sub create_index {
- my ($self) = @_;
- my $indices = $self->client->indices;
-
- $indices->create(
- index => $self->index_name,
- body => {
- settings => {
- number_of_shards => 2,
- analysis => {
- filter => {
- asciifolding_original => {
- type => "asciifolding",
- preserve_original => \1,
- },
- },
- analyzer => {
- autocomplete => {
- type => 'custom',
- tokenizer => 'keyword',
- filter => ['lowercase', 'asciifolding_original'],
- },
- folding => {
- tokenizer => 'standard',
- filter => ['standard', 'lowercase', 'asciifolding_original'],
- },
- bz_text_analyzer => {
- type => 'standard',
- filter => ['lowercase', 'stop'],
- max_token_length => '20'
- },
- bz_equals_analyzer => {
- type => 'custom',
- filter => ['lowercase'],
- tokenizer => 'keyword',
- },
- whiteboard_words => {
- type => 'custom',
- tokenizer => 'whiteboard_words_pattern',
- filter => ['stop']
- },
- whiteboard_shingle_words => {
- type => 'custom',
- tokenizer => 'whiteboard_words_pattern',
- filter => ['stop', 'shingle', 'lowercase']
- },
- whiteboard_tokens => {
- type => 'custom',
- tokenizer => 'whiteboard_tokens_pattern',
- filter => ['stop', 'lowercase']
- },
- whiteboard_shingle_tokens => {
- type => 'custom',
- tokenizer => 'whiteboard_tokens_pattern',
- filter => ['stop', 'shingle', 'lowercase']
- }
- },
- tokenizer => {
- whiteboard_tokens_pattern => {
- type => 'pattern',
- pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*'
- },
- whiteboard_words_pattern => {
- type => 'pattern',
- pattern => '[\\[\\];,\\s]+'
- },
- },
- },
- },
- }
- ) unless $indices->exists(index => $self->index_name);
+
+sub _create_index {
+ my ($self, $class) = @_;
+ my $indices = $self->client->indices;
+ my $index_name = $class->ES_INDEX;
+
+ unless ($indices->exists(index => $index_name)) {
+ $indices->create(
+ index => $index_name,
+ body => { settings => $class->ES_SETTINGS },
+ );
+ }
}
sub _bulk_helper {
my ($self, $class) = @_;
return $self->client->bulk_helper(
- index => $self->index_name,
+ index => $class->ES_INDEX,
type => $class->ES_TYPE,
);
}
-
sub _find_largest {
my ($self, $class, $field) = @_;
my $result = $self->client->search(
- index => $self->index_name,
+ index => $class->ES_INDEX,
type => $class->ES_TYPE,
body => {
aggs => { $field => { extended_stats => { field => $field } } },
@@ -147,7 +87,7 @@ sub _find_largest_id {
return $self->_find_largest($class, $class->ID_FIELD);
}
-sub put_mapping {
+sub _put_mapping {
my ($self, $class) = @_;
my %body = ( properties => scalar $class->ES_PROPERTIES );
@@ -156,7 +96,7 @@ sub put_mapping {
}
$self->client->indices->put_mapping(
- index => $self->index_name,
+ index => $class->ES_INDEX,
type => $class->ES_TYPE,
body => \%body,
);
@@ -178,13 +118,15 @@ sub _debug_sql {
sub bulk_load {
my ( $self, $class ) = @_;
+ $self->_create_index($class);
+
my $bulk = $self->_bulk_helper($class);
my $last_mtime = $self->_find_largest_mtime($class);
my $last_id = $self->_find_largest_id($class);
my $new_ids = $self->_select_all_ids($class, $last_id);
my $updated_ids = $self->_select_updated_ids($class, $last_mtime);
- $self->put_mapping($class);
+ $self->_put_mapping($class);
$self->_bulk_load_ids($bulk, $class, $new_ids) if @$new_ids;
$self->_bulk_load_ids($bulk, $class, $updated_ids) if @$updated_ids;
@@ -213,7 +155,8 @@ sub _select_updated_ids {
sub bulk_load_ids {
my ($self, $class, $ids) = @_;
- $self->put_mapping($class);
+ $self->_create_index($class);
+ $self->_put_mapping($class);
$self->_bulk_load_ids($self->_bulk_helper($class), $class, $ids);
}
@@ -238,7 +181,6 @@ sub _bulk_load_ids {
}
my $total = 0;
- use Time::HiRes;
my $start = time;
while (my @ids = $iter->()) {
if ($progress_bar) {
diff --git a/Bugzilla/Elastic/Role/ChildObject.pm b/Bugzilla/Elastic/Role/ChildObject.pm
index 1f7a7483a..9735cc1ed 100644
--- a/Bugzilla/Elastic/Role/ChildObject.pm
+++ b/Bugzilla/Elastic/Role/ChildObject.pm
@@ -11,6 +11,10 @@ use Role::Tiny;
with 'Bugzilla::Elastic::Role::Object';
-requires qw(ES_PARENT_TYPE es_parent_id);
+requires qw(ES_PARENT_CLASS es_parent_id);
+
+sub ES_PARENT_TYPE { $_[0]->ES_PARENT_CLASS->ES_TYPE }
+sub ES_INDEX { $_[0]->ES_PARENT_CLASS->ES_INDEX }
+sub ES_SETTINGS { $_[0]->ES_PARENT_CLASS->ES_SETTINGS }
1;
diff --git a/Bugzilla/Elastic/Role/HasIndexName.pm b/Bugzilla/Elastic/Role/HasIndexName.pm
deleted file mode 100644
index eaff339cd..000000000
--- a/Bugzilla/Elastic/Role/HasIndexName.pm
+++ /dev/null
@@ -1,16 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#
-# This Source Code Form is "Incompatible With Secondary Licenses", as
-# defined by the Mozilla Public License, v. 2.0.
-package Bugzilla::Elastic::Role::HasIndexName;
-
-use 5.10.1;
-use Moo::Role;
-use Search::Elasticsearch;
-
-has 'index_name' => ( is => 'ro', default => sub { Bugzilla->params->{elasticsearch_index} } );
-
-
-1;
diff --git a/Bugzilla/Elastic/Role/Object.pm b/Bugzilla/Elastic/Role/Object.pm
index ad5ab002b..c51948ee9 100644
--- a/Bugzilla/Elastic/Role/Object.pm
+++ b/Bugzilla/Elastic/Role/Object.pm
@@ -9,7 +9,7 @@ package Bugzilla::Elastic::Role::Object;
use 5.10.1;
use Role::Tiny;
-requires qw(ES_TYPE ES_PROPERTIES es_document);
+requires qw(ES_TYPE ES_INDEX ES_SETTINGS ES_PROPERTIES es_document);
requires qw(ID_FIELD DB_TABLE);
sub ES_OBJECTS_AT_ONCE { 100 }
@@ -45,4 +45,6 @@ around 'es_document' => sub {
return $doc;
};
+
+
1;
diff --git a/Bugzilla/Elastic/Search.pm b/Bugzilla/Elastic/Search.pm
index e1af91032..26ab71bec 100644
--- a/Bugzilla/Elastic/Search.pm
+++ b/Bugzilla/Elastic/Search.pm
@@ -31,7 +31,6 @@ has '_order' => ( is => 'lazy', init_arg => undef );
has 'invalid_order_columns' => ( is => 'lazy' );
with 'Bugzilla::Elastic::Role::HasClient';
-with 'Bugzilla::Elastic::Role::HasIndexName';
with 'Bugzilla::Elastic::Role::Search';
my @SUPPORTED_FIELDS = qw(
@@ -92,9 +91,9 @@ sub data {
my $body = $self->es_query;
my $result = eval {
$self->client->search(
- index => $self->index_name,
- type => 'bug',
- body => $body,
+ index => Bugzilla::Bug->ES_INDEX,
+ type => Bugzilla::Bug->ES_TYPE,
+ body => $body,
);
};
die $@ unless $result;
diff --git a/Bugzilla/User.pm b/Bugzilla/User.pm
index 2d7f38640..525733069 100644
--- a/Bugzilla/User.pm
+++ b/Bugzilla/User.pm
@@ -126,6 +126,11 @@ use constant EXTRA_REQUIRED_FIELDS => qw(is_enabled);
with 'Bugzilla::Elastic::Role::Object';
+sub ES_INDEX {
+ my ($class) = @_;
+ sprintf("%s_%s", Bugzilla->params->{elasticsearch_index}, $class->ES_TYPE);
+}
+
sub ES_TYPE { 'user' }
sub ES_OBJECTS_AT_ONCE { 5000 }
@@ -153,6 +158,31 @@ sub ES_SELECT_ALL_SQL {
return ("SELECT $id FROM $table WHERE $id > ? AND is_enabled AND NOT disabledtext ORDER BY $id", [$last_id // 0]);
}
+sub ES_SETTINGS {
+ return {
+ number_of_shards => 2,
+ analysis => {
+ filter => {
+ asciifolding_original => {
+ type => "asciifolding",
+ preserve_original => \1,
+ },
+ },
+ analyzer => {
+ autocomplete => {
+ type => 'custom',
+ tokenizer => 'keyword',
+ filter => [ 'lowercase', 'asciifolding_original' ],
+ },
+ folding => {
+ tokenizer => 'standard',
+ filter => [ 'standard', 'lowercase', 'asciifolding_original' ],
+ },
+ }
+ }
+ };
+}
+
sub ES_PROPERTIES {
return {
suggest_user => {