From da9c617eb5d8345409386b85b5d8c167fc0c55d7 Mon Sep 17 00:00:00 2001 From: Dylan William Hardison Date: Thu, 20 Jul 2017 16:36:56 -0400 Subject: Bug 1381869 - Use separate elasticsearch index for Bugzilla::User This patch removes the concept of a single, bugzilla-wide index in favor of a per-class index. bugs and comments continue to use Bugzilla->params->{elasticsearch_index} but users use Bugzilla->params->{elasticsearch_index} . "_user". It is assured via the ChildObject trait (role) that comments will share the index with bugs, and we have kept the index for bugs/comments the same to avoid the multi-hour reindexing of production. Re-indexing users takes only five minutes. Subsequent work on this will allow use to version the index names and use aliases, but I wanted to keep this patch small. This patch also corrects some mistakes 1. $indexer->put_mapping() should not have been a public method. 2. Time::HiRes should be imported at the top of the file, not in a sub. --- Bugzilla/Bug.pm | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'Bugzilla/Bug.pm') diff --git a/Bugzilla/Bug.pm b/Bugzilla/Bug.pm index eb228d27c..ee48ed7a2 100644 --- a/Bugzilla/Bug.pm +++ b/Bugzilla/Bug.pm @@ -302,6 +302,73 @@ with 'Bugzilla::Elastic::Role::Object'; sub ES_TYPE {'bug'} +sub ES_INDEX { Bugzilla->params->{elasticsearch_index} } + +sub ES_SETTINGS { + return { + number_of_shards => 2, + analysis => { + filter => { + asciifolding_original => { + type => "asciifolding", + preserve_original => \1, + }, + }, + analyzer => { + autocomplete => { + type => 'custom', + tokenizer => 'keyword', + filter => [ 'lowercase', 'asciifolding_original' ], + }, + folding => { + tokenizer => 'standard', + filter => [ 'standard', 'lowercase', 'asciifolding_original' ], + }, + bz_text_analyzer => { + type => 'standard', + filter => [ 'lowercase', 'stop' ], + max_token_length => '20' + }, + bz_equals_analyzer => { + type => 'custom', + filter => ['lowercase'], + tokenizer => 'keyword', + }, + whiteboard_words => { + type => 'custom', + tokenizer => 'whiteboard_words_pattern', + filter => ['stop'] + }, + whiteboard_shingle_words => { + type => 'custom', + tokenizer => 'whiteboard_words_pattern', + filter => [ 'stop', 'shingle', 'lowercase' ] + }, + whiteboard_tokens => { + type => 'custom', + tokenizer => 'whiteboard_tokens_pattern', + filter => [ 'stop', 'lowercase' ] + }, + whiteboard_shingle_tokens => { + type => 'custom', + tokenizer => 'whiteboard_tokens_pattern', + filter => [ 'stop', 'shingle', 'lowercase' ] + } + }, + tokenizer => { + whiteboard_tokens_pattern => { + type => 'pattern', + pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*' + }, + whiteboard_words_pattern => { + type => 'pattern', + pattern => '[\\[\\];,\\s]+' + }, + }, + }, + }; +} + sub _bz_field { my ($field, @fields) = @_; -- cgit v1.2.3-24-g4f1b