Bug 1307485 - Add code to run a subset of buglist.cgi search queries against the ES backend

author: Dylan William Hardison <dylan@hardison.net> 2017-03-07 23:47:36 +0100
committer: Dylan William Hardison <dylan@hardison.net> 2017-05-24 20:24:54 +0200
commit: 50fc9d77128db4f8565265047166c6ef61bac5c5 (patch)
tree: eadc23eeaca476412e36c63161f5a2e4ab96aabd /Bugzilla/Elastic/Indexer.pm
parent: 0ea91b298e8fc1c439f42c9288247e9223ce552e (diff)
download: bugzilla-50fc9d77128db4f8565265047166c6ef61bac5c5.tar.gz
bugzilla-50fc9d77128db4f8565265047166c6ef61bac5c5.tar.xz
1 files changed, 12 insertions, 17 deletions
diff --git a/Bugzilla/Elastic/Indexer.pm b/Bugzilla/Elastic/Indexer.pm
index 82f946af9..dd71a7198 100644
--- a/Bugzilla/Elastic/Indexer.pm
+++ b/Bugzilla/Elastic/Indexer.pm
@@ -23,7 +23,7 @@ has 'mtime' => (
 has 'shadow_dbh' => ( is => 'lazy' );
 
 has 'debug_sql' => (
-    is => 'ro',
+    is      => 'ro',
     default => 0,
 );
 
@@ -40,24 +40,24 @@ sub create_index {
         index => $self->index_name,
         body => {
             settings => {
-                number_of_shards => 1,
+                number_of_shards => 2,
                 analysis => {
+                    filter => {
+                        asciifolding_original => { 
+                            type              => "asciifolding",
+                            preserve_original => \1,
+                        },
+                    },
                     analyzer => {
                         folding => {
-                            type      => 'standard',
                             tokenizer => 'standard',
-                            filter    => [ 'lowercase', 'asciifolding' ]
+                            filter    => ['standard', 'lowercase', 'asciifolding_original'],
                         },
                         bz_text_analyzer => {
                             type             => 'standard',
                             filter           => ['lowercase', 'stop'],
                             max_token_length => '20'
                         },
-                        bz_substring_analyzer => {
-                            type      => 'custom',
-                            filter    => ['lowercase'],
-                            tokenizer => 'bz_ngram_tokenizer',
-                        },
                         bz_equals_analyzer => {
                             type   => 'custom',
                             filter => ['lowercase'],
@@ -71,25 +71,20 @@ sub create_index {
                         whiteboard_shingle_words => {
                             type => 'custom',
                             tokenizer => 'whiteboard_words_pattern',
-                            filter => ['stop', 'shingle']
+                            filter => ['stop', 'shingle', 'lowercase']
                         },
                         whiteboard_tokens => {
                             type => 'custom',
                             tokenizer => 'whiteboard_tokens_pattern',
-                            filter => ['stop']
+                            filter => ['stop', 'lowercase']
                         },
                         whiteboard_shingle_tokens => {
                             type => 'custom',
                             tokenizer => 'whiteboard_tokens_pattern',
-                            filter => ['stop', 'shingle']
+                            filter => ['stop', 'shingle', 'lowercase']
                         }
                     },
                     tokenizer => {
-                        bz_ngram_tokenizer => {
-                            type => 'nGram',
-                            min_ngram => 2,
-                            max_ngram => 25,
-                        },
                         whiteboard_tokens_pattern => {
                             type => 'pattern',
                             pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*'
author	Dylan William Hardison <dylan@hardison.net>	2017-03-07 23:47:36 +0100
committer	Dylan William Hardison <dylan@hardison.net>	2017-05-24 20:24:54 +0200
commit	50fc9d77128db4f8565265047166c6ef61bac5c5 (patch)
tree	eadc23eeaca476412e36c63161f5a2e4ab96aabd /Bugzilla/Elastic/Indexer.pm
parent	0ea91b298e8fc1c439f42c9288247e9223ce552e (diff)
download	bugzilla-50fc9d77128db4f8565265047166c6ef61bac5c5.tar.gz bugzilla-50fc9d77128db4f8565265047166c6ef61bac5c5.tar.xz