diff options
Diffstat (limited to 'Bugzilla/Elastic/Indexer.pm')
-rw-r--r-- | Bugzilla/Elastic/Indexer.pm | 29 |
1 files changed, 12 insertions, 17 deletions
diff --git a/Bugzilla/Elastic/Indexer.pm b/Bugzilla/Elastic/Indexer.pm index 82f946af9..dd71a7198 100644 --- a/Bugzilla/Elastic/Indexer.pm +++ b/Bugzilla/Elastic/Indexer.pm @@ -23,7 +23,7 @@ has 'mtime' => ( has 'shadow_dbh' => ( is => 'lazy' ); has 'debug_sql' => ( - is => 'ro', + is => 'ro', default => 0, ); @@ -40,24 +40,24 @@ sub create_index { index => $self->index_name, body => { settings => { - number_of_shards => 1, + number_of_shards => 2, analysis => { + filter => { + asciifolding_original => { + type => "asciifolding", + preserve_original => \1, + }, + }, analyzer => { folding => { - type => 'standard', tokenizer => 'standard', - filter => [ 'lowercase', 'asciifolding' ] + filter => ['standard', 'lowercase', 'asciifolding_original'], }, bz_text_analyzer => { type => 'standard', filter => ['lowercase', 'stop'], max_token_length => '20' }, - bz_substring_analyzer => { - type => 'custom', - filter => ['lowercase'], - tokenizer => 'bz_ngram_tokenizer', - }, bz_equals_analyzer => { type => 'custom', filter => ['lowercase'], @@ -71,25 +71,20 @@ sub create_index { whiteboard_shingle_words => { type => 'custom', tokenizer => 'whiteboard_words_pattern', - filter => ['stop', 'shingle'] + filter => ['stop', 'shingle', 'lowercase'] }, whiteboard_tokens => { type => 'custom', tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop'] + filter => ['stop', 'lowercase'] }, whiteboard_shingle_tokens => { type => 'custom', tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop', 'shingle'] + filter => ['stop', 'shingle', 'lowercase'] } }, tokenizer => { - bz_ngram_tokenizer => { - type => 'nGram', - min_ngram => 2, - max_ngram => 25, - }, whiteboard_tokens_pattern => { type => 'pattern', pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*' |