From da9c617eb5d8345409386b85b5d8c167fc0c55d7 Mon Sep 17 00:00:00 2001
From: Dylan William Hardison <dylan@hardison.net>
Date: Thu, 20 Jul 2017 16:36:56 -0400
Subject: Bug 1381869 - Use separate elasticsearch index for Bugzilla::User

This patch removes the concept of a single, bugzilla-wide index in favor of a
per-class index. bugs and comments continue to use
Bugzilla->params->{elasticsearch_index} but users use
Bugzilla->params->{elasticsearch_index} . "_user".

It is assured via the ChildObject trait (role) that comments will share the
index with bugs, and we have kept the index for bugs/comments the same to avoid
the multi-hour reindexing of production. Re-indexing users takes only five
minutes.

Subsequent work on this will allow use to version the index names and use
aliases, but I wanted to keep this patch small.

This patch also corrects some mistakes

1. $indexer->put_mapping() should not have been a public method.
2. Time::HiRes should be imported at the top of the file, not in a sub.
---
 Bugzilla/Bug.pm | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

(limited to 'Bugzilla/Bug.pm')

diff --git a/Bugzilla/Bug.pm b/Bugzilla/Bug.pm
index eb228d27c..ee48ed7a2 100644
--- a/Bugzilla/Bug.pm
+++ b/Bugzilla/Bug.pm
@@ -302,6 +302,73 @@ with 'Bugzilla::Elastic::Role::Object';
 
 sub ES_TYPE {'bug'}
 
+sub ES_INDEX { Bugzilla->params->{elasticsearch_index} }
+
+sub ES_SETTINGS {
+    return {
+        number_of_shards => 2,
+        analysis         => {
+            filter => {
+                asciifolding_original => {
+                    type              => "asciifolding",
+                    preserve_original => \1,
+                },
+            },
+            analyzer => {
+                autocomplete => {
+                    type      => 'custom',
+                    tokenizer => 'keyword',
+                    filter    => [ 'lowercase', 'asciifolding_original' ],
+                },
+                folding => {
+                    tokenizer => 'standard',
+                    filter    => [ 'standard', 'lowercase', 'asciifolding_original' ],
+                },
+                bz_text_analyzer => {
+                    type             => 'standard',
+                    filter           => [ 'lowercase', 'stop' ],
+                    max_token_length => '20'
+                },
+                bz_equals_analyzer => {
+                    type      => 'custom',
+                    filter    => ['lowercase'],
+                    tokenizer => 'keyword',
+                },
+                whiteboard_words => {
+                    type      => 'custom',
+                    tokenizer => 'whiteboard_words_pattern',
+                    filter    => ['stop']
+                },
+                whiteboard_shingle_words => {
+                    type      => 'custom',
+                    tokenizer => 'whiteboard_words_pattern',
+                    filter    => [ 'stop', 'shingle', 'lowercase' ]
+                },
+                whiteboard_tokens => {
+                    type      => 'custom',
+                    tokenizer => 'whiteboard_tokens_pattern',
+                    filter    => [ 'stop', 'lowercase' ]
+                },
+                whiteboard_shingle_tokens => {
+                    type      => 'custom',
+                    tokenizer => 'whiteboard_tokens_pattern',
+                    filter    => [ 'stop', 'shingle', 'lowercase' ]
+                }
+            },
+            tokenizer => {
+                whiteboard_tokens_pattern => {
+                    type    => 'pattern',
+                    pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*'
+                },
+                whiteboard_words_pattern => {
+                    type    => 'pattern',
+                    pattern => '[\\[\\];,\\s]+'
+                },
+            },
+        },
+    };
+}
+
 sub _bz_field {
     my ($field, @fields) = @_;
 
-- 
cgit v1.2.3-24-g4f1b