From 50fc9d77128db4f8565265047166c6ef61bac5c5 Mon Sep 17 00:00:00 2001 From: Dylan William Hardison Date: Tue, 7 Mar 2017 17:47:36 -0500 Subject: Bug 1307485 - Add code to run a subset of buglist.cgi search queries against the ES backend --- Bugzilla/Elastic/Indexer.pm | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'Bugzilla/Elastic/Indexer.pm') diff --git a/Bugzilla/Elastic/Indexer.pm b/Bugzilla/Elastic/Indexer.pm index 82f946af9..dd71a7198 100644 --- a/Bugzilla/Elastic/Indexer.pm +++ b/Bugzilla/Elastic/Indexer.pm @@ -23,7 +23,7 @@ has 'mtime' => ( has 'shadow_dbh' => ( is => 'lazy' ); has 'debug_sql' => ( - is => 'ro', + is => 'ro', default => 0, ); @@ -40,24 +40,24 @@ sub create_index { index => $self->index_name, body => { settings => { - number_of_shards => 1, + number_of_shards => 2, analysis => { + filter => { + asciifolding_original => { + type => "asciifolding", + preserve_original => \1, + }, + }, analyzer => { folding => { - type => 'standard', tokenizer => 'standard', - filter => [ 'lowercase', 'asciifolding' ] + filter => ['standard', 'lowercase', 'asciifolding_original'], }, bz_text_analyzer => { type => 'standard', filter => ['lowercase', 'stop'], max_token_length => '20' }, - bz_substring_analyzer => { - type => 'custom', - filter => ['lowercase'], - tokenizer => 'bz_ngram_tokenizer', - }, bz_equals_analyzer => { type => 'custom', filter => ['lowercase'], @@ -71,25 +71,20 @@ sub create_index { whiteboard_shingle_words => { type => 'custom', tokenizer => 'whiteboard_words_pattern', - filter => ['stop', 'shingle'] + filter => ['stop', 'shingle', 'lowercase'] }, whiteboard_tokens => { type => 'custom', tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop'] + filter => ['stop', 'lowercase'] }, whiteboard_shingle_tokens => { type => 'custom', tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop', 'shingle'] + filter => ['stop', 'shingle', 'lowercase'] } }, tokenizer => { - bz_ngram_tokenizer => { - type => 'nGram', - min_ngram => 2, - max_ngram => 25, - }, whiteboard_tokens_pattern => { type => 'pattern', pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*' -- cgit v1.2.3-24-g4f1b