diff options
author | Dylan William Hardison <dylan@hardison.net> | 2017-03-07 23:47:36 +0100 |
---|---|---|
committer | Dylan William Hardison <dylan@hardison.net> | 2017-05-24 20:24:54 +0200 |
commit | 50fc9d77128db4f8565265047166c6ef61bac5c5 (patch) | |
tree | eadc23eeaca476412e36c63161f5a2e4ab96aabd /Bugzilla/Elastic/Indexer.pm | |
parent | 0ea91b298e8fc1c439f42c9288247e9223ce552e (diff) | |
download | bugzilla-50fc9d77128db4f8565265047166c6ef61bac5c5.tar.gz bugzilla-50fc9d77128db4f8565265047166c6ef61bac5c5.tar.xz |
Bug 1307485 - Add code to run a subset of buglist.cgi search queries against the ES backend
Diffstat (limited to 'Bugzilla/Elastic/Indexer.pm')
-rw-r--r-- | Bugzilla/Elastic/Indexer.pm | 29 |
1 files changed, 12 insertions, 17 deletions
diff --git a/Bugzilla/Elastic/Indexer.pm b/Bugzilla/Elastic/Indexer.pm index 82f946af9..dd71a7198 100644 --- a/Bugzilla/Elastic/Indexer.pm +++ b/Bugzilla/Elastic/Indexer.pm @@ -23,7 +23,7 @@ has 'mtime' => ( has 'shadow_dbh' => ( is => 'lazy' ); has 'debug_sql' => ( - is => 'ro', + is => 'ro', default => 0, ); @@ -40,24 +40,24 @@ sub create_index { index => $self->index_name, body => { settings => { - number_of_shards => 1, + number_of_shards => 2, analysis => { + filter => { + asciifolding_original => { + type => "asciifolding", + preserve_original => \1, + }, + }, analyzer => { folding => { - type => 'standard', tokenizer => 'standard', - filter => [ 'lowercase', 'asciifolding' ] + filter => ['standard', 'lowercase', 'asciifolding_original'], }, bz_text_analyzer => { type => 'standard', filter => ['lowercase', 'stop'], max_token_length => '20' }, - bz_substring_analyzer => { - type => 'custom', - filter => ['lowercase'], - tokenizer => 'bz_ngram_tokenizer', - }, bz_equals_analyzer => { type => 'custom', filter => ['lowercase'], @@ -71,25 +71,20 @@ sub create_index { whiteboard_shingle_words => { type => 'custom', tokenizer => 'whiteboard_words_pattern', - filter => ['stop', 'shingle'] + filter => ['stop', 'shingle', 'lowercase'] }, whiteboard_tokens => { type => 'custom', tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop'] + filter => ['stop', 'lowercase'] }, whiteboard_shingle_tokens => { type => 'custom', tokenizer => 'whiteboard_tokens_pattern', - filter => ['stop', 'shingle'] + filter => ['stop', 'shingle', 'lowercase'] } }, tokenizer => { - bz_ngram_tokenizer => { - type => 'nGram', - min_ngram => 2, - max_ngram => 25, - }, whiteboard_tokens_pattern => { type => 'pattern', pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*' |