From 419f3ae9fd57fc4e03146a830f7ed780ace83937 Mon Sep 17 00:00:00 2001 From: Dylan William Hardison Date: Sat, 19 Nov 2016 18:12:39 -0500 Subject: Bug 1307478 - Elasticsearch Indexer / Bulk Indexer --- Bugzilla/Bug.pm | 144 ++++++++++- Bugzilla/Comment.pm | 54 ++++ Bugzilla/Config/Elastic.pm | 33 +++ Bugzilla/Elastic/Indexer.pm | 280 +++++++++++++++++++++ Bugzilla/Elastic/Role/ChildObject.pm | 16 ++ Bugzilla/Elastic/Role/HasClient.pm | 25 ++ Bugzilla/Elastic/Role/HasIndexName.pm | 16 ++ Bugzilla/Elastic/Role/Object.pm | 48 ++++ Bugzilla/User.pm | 76 ++++++ Bugzilla/Util.pm | 3 + META.json | 22 +- META.yml | 15 +- Makefile.PL | 15 ++ scripts/bulk_index.pl | 70 ++++++ t/002goodperl.t | 6 + template/en/default/admin/params/elastic.html.tmpl | 22 ++ 16 files changed, 840 insertions(+), 5 deletions(-) create mode 100644 Bugzilla/Config/Elastic.pm create mode 100644 Bugzilla/Elastic/Indexer.pm create mode 100644 Bugzilla/Elastic/Role/ChildObject.pm create mode 100644 Bugzilla/Elastic/Role/HasClient.pm create mode 100644 Bugzilla/Elastic/Role/HasIndexName.pm create mode 100644 Bugzilla/Elastic/Role/Object.pm create mode 100644 scripts/bulk_index.pl create mode 100644 template/en/default/admin/params/elastic.html.tmpl diff --git a/Bugzilla/Bug.pm b/Bugzilla/Bug.pm index 637d94114..cba973863 100644 --- a/Bugzilla/Bug.pm +++ b/Bugzilla/Bug.pm @@ -37,6 +37,7 @@ use Storable qw(dclone); use URI; use URI::QueryParam; use Scalar::Util qw(blessed weaken); +use Role::Tiny::With; use base qw(Bugzilla::Object Exporter); @Bugzilla::Bug::EXPORT = qw( @@ -297,6 +298,148 @@ use constant REQUIRED_FIELD_MAP => { # mandatory groups get set on bugs. use constant EXTRA_REQUIRED_FIELDS => qw(creation_ts target_milestone cc qa_contact groups); +with 'Bugzilla::Elastic::Role::Object'; + +sub ES_TYPE {'bug'} + +sub _bz_field { + my ($field, $type, $analyzer, @fields) = @_; + + return ( + $field => { + type => $type, + analyzer => $analyzer, + fields => { + raw => { + type => 'string', + index => 'not_analyzed', + }, + eq => { + type => 'string', + analyzer => 'bz_equals_analyzer', + }, + @fields, + }, + }, + ); +} + +sub _bz_text_field { + my ($field) = @_; + + return _bz_field($field, 'string', 'bz_text_analyzer'); +} + +sub _bz_substring_field { + my ($field, @rest) = @_; + + return _bz_field($field, 'string', 'bz_substring_analyzer', @rest); +} + +sub ES_PROPERTIES { + return { + priority => { type => 'string', analyzer => 'keyword' }, + bug_severity => { type => 'string', analyzer => 'keyword' }, + bug_status => { type => 'string', analyzer => 'keyword' }, + resolution => { type => 'string', analyzer => 'keyword' }, + keywords => { type => 'string' }, + status_whiteboard => { type => 'string', analyzer => 'whiteboard_shingle_tokens' }, + delta_ts => { type => 'string', index => 'not_analyzed' }, + _bz_substring_field('product'), + _bz_substring_field('component'), + _bz_substring_field('classification'), + _bz_text_field('short_desc'), + _bz_substring_field('assigned_to'), + }; +} + +sub ES_OBJECTS_AT_ONCE { 4000 } + +sub ES_SELECT_UPDATED_SQL { + my ($class, $mtime) = @_; + + my @fields = ( + 'keywords', 'short_desc', 'product', 'component', + 'cf_crash_signature', 'alias', 'status_whiteboard', + 'bug_status', 'resolution', 'priority', 'assigned_to' + ); + my $fields = join(', ', ("?") x @fields); + + my $sql = qq{ + SELECT DISTINCT + bug_id + FROM + bugs_activity + JOIN + fielddefs ON fieldid = fielddefs.id + WHERE + bug_when > FROM_UNIXTIME(?) + AND fielddefs.name IN ($fields) + UNION SELECT DISTINCT + bug_id + FROM + audit_log + JOIN + bugs ON bugs.assigned_to = object_id + WHERE + class = 'Bugzilla::User' + AND at_time > FROM_UNIXTIME(?) + UNION SELECT DISTINCT + bug_id + FROM + audit_log + JOIN + bugs ON bugs.product_id = object_id + WHERE + class = 'Bugzilla::Product' + AND field = 'name' + AND at_time > FROM_UNIXTIME(?) + UNION SELECT DISTINCT + bug_id + FROM + audit_log + JOIN + bugs ON bugs.component_id = object_id + WHERE + class = 'Bugzilla::Component' + AND field = 'name' + AND at_time > FROM_UNIXTIME(?) + UNION SELECT DISTINCT + bug_id + FROM + audit_log + JOIN + products ON classification_id = object_id + JOIN + bugs ON product_id = products.id + WHERE + class = 'Bugzilla::Classification' + AND field = 'name' + AND at_time > FROM_UNIXTIME(?) + }; + return ($sql, [$mtime, @fields, $mtime, $mtime, $mtime, $mtime]); +} + +sub es_document { + my ($self) = @_; + return { + bug_id => $self->id, + product => $self->product_obj->name, + alias => $self->alias, + keywords => $self->keywords, + priority => $self->priority, + bug_status => $self->bug_status, + resolution => $self->resolution, + component => $self->component_obj->name, + classification => $self->product_obj->classification->name, + status_whiteboard => $self->status_whiteboard, + short_desc => $self->short_desc, + assigned_to => $self->assigned_to->login, + delta_ts => $self->delta_ts, + bug_severity => $self->bug_severity, + }; +} + ##################################################################### sub new { @@ -2385,7 +2528,6 @@ sub _set_global_validator { $self->_check_field_is_mandatory($value, $field); } - ################# # "Set" Methods # ################# diff --git a/Bugzilla/Comment.pm b/Bugzilla/Comment.pm index 7c2d5c4ea..911b26775 100644 --- a/Bugzilla/Comment.pm +++ b/Bugzilla/Comment.pm @@ -23,6 +23,7 @@ use Bugzilla::Util; use List::Util qw(first); use Scalar::Util qw(blessed weaken isweak); +use Role::Tiny::With; ############################### #### Initialization #### @@ -77,6 +78,59 @@ use constant VALIDATOR_DEPENDENCIES => { isprivate => ['who'], }; +with 'Bugzilla::Elastic::Role::ChildObject'; + +use constant ES_TYPE => 'comment'; +use constant ES_PARENT_TYPE => 'bug'; + +sub ES_OBJECTS_AT_ONCE { 50 } + +sub ES_PROPERTIES { + return { + body => { type => "string", analyzer => 'bz_text_analyzer' }, + is_private => { type => "boolean" }, + tags => { type => "string" }, + }; +} + +sub ES_SELECT_UPDATED_SQL { + my ($class, $mtime) = @_; + + my $sql = q{ + SELECT DISTINCT + comment_id + FROM + bugs_activity AS event + JOIN + fielddefs ON fieldid = fielddefs.id + WHERE + fielddefs.name = 'longdescs.isprivate' + AND bug_when > FROM_UNIXTIME(?) + UNION SELECT DISTINCT + comment_id + FROM + longdescs_activity + WHERE + change_when > FROM_UNIXTIME(?) + }; + return ($sql, [$mtime, $mtime]); +} + +sub es_parent_id { + my ($self) = @_; + + return $self->bug_id, +} + +sub es_document { + my ($self) = @_; + + return { + body => $self->body, + is_private => $self->is_private, + }; +} + ######################### # Database Manipulation # ######################### diff --git a/Bugzilla/Config/Elastic.pm b/Bugzilla/Config/Elastic.pm new file mode 100644 index 000000000..2b2513f1b --- /dev/null +++ b/Bugzilla/Config/Elastic.pm @@ -0,0 +1,33 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This Source Code Form is "Incompatible With Secondary Licenses", as +# defined by the Mozilla Public License, v. 2.0. + +package Bugzilla::Config::Elastic; + +use 5.10.1; +use strict; +use warnings; + +use Bugzilla::Config::Common; + +our $sortkey = 1550; + +sub get_param_list { + return ( + { + name => 'elasticsearch_nodes', + type => 't', + default => 'localhost:9200', + }, + { + name => 'elasticsearch_index', + type => 't', + default => 'bugzilla', + }, + ); +} + +1; diff --git a/Bugzilla/Elastic/Indexer.pm b/Bugzilla/Elastic/Indexer.pm new file mode 100644 index 000000000..82f946af9 --- /dev/null +++ b/Bugzilla/Elastic/Indexer.pm @@ -0,0 +1,280 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This Source Code Form is "Incompatible With Secondary Licenses", as +# defined by the Mozilla Public License, v. 2.0. +package Bugzilla::Elastic::Indexer; + +use 5.10.1; +use Moo; +use List::MoreUtils qw(natatime); +use Storable qw(dclone); +use namespace::clean; + +with 'Bugzilla::Elastic::Role::HasClient'; +with 'Bugzilla::Elastic::Role::HasIndexName'; + +has 'mtime' => ( + is => 'lazy', + clearer => 'clear_mtime', +); + +has 'shadow_dbh' => ( is => 'lazy' ); + +has 'debug_sql' => ( + is => 'ro', + default => 0, +); + +has 'progress_bar' => ( + is => 'ro', + predicate => 'has_progress_bar', +); + +sub create_index { + my ($self) = @_; + my $indices = $self->client->indices; + + $indices->create( + index => $self->index_name, + body => { + settings => { + number_of_shards => 1, + analysis => { + analyzer => { + folding => { + type => 'standard', + tokenizer => 'standard', + filter => [ 'lowercase', 'asciifolding' ] + }, + bz_text_analyzer => { + type => 'standard', + filter => ['lowercase', 'stop'], + max_token_length => '20' + }, + bz_substring_analyzer => { + type => 'custom', + filter => ['lowercase'], + tokenizer => 'bz_ngram_tokenizer', + }, + bz_equals_analyzer => { + type => 'custom', + filter => ['lowercase'], + tokenizer => 'keyword', + }, + whiteboard_words => { + type => 'custom', + tokenizer => 'whiteboard_words_pattern', + filter => ['stop'] + }, + whiteboard_shingle_words => { + type => 'custom', + tokenizer => 'whiteboard_words_pattern', + filter => ['stop', 'shingle'] + }, + whiteboard_tokens => { + type => 'custom', + tokenizer => 'whiteboard_tokens_pattern', + filter => ['stop'] + }, + whiteboard_shingle_tokens => { + type => 'custom', + tokenizer => 'whiteboard_tokens_pattern', + filter => ['stop', 'shingle'] + } + }, + tokenizer => { + bz_ngram_tokenizer => { + type => 'nGram', + min_ngram => 2, + max_ngram => 25, + }, + whiteboard_tokens_pattern => { + type => 'pattern', + pattern => '\\s*([,;]*\\[|\\][\\s\\[]*|[;,])\\s*' + }, + whiteboard_words_pattern => { + type => 'pattern', + pattern => '[\\[\\];,\\s]+' + }, + }, + }, + }, + } + ) unless $indices->exists(index => $self->index_name); +} + +sub _bulk_helper { + my ($self, $class) = @_; + + return $self->client->bulk_helper( + index => $self->index_name, + type => $class->ES_TYPE, + ); +} + +sub find_largest_mtime { + my ($self, $class) = @_; + + my $result = $self->client->search( + index => $self->index_name, + type => $class->ES_TYPE, + body => { + aggs => { es_mtime => { extended_stats => { field => 'es_mtime' } } }, + size => 0 + } + ); + + return $result->{aggregations}{es_mtime}{max}; +} + +sub find_largest_id { + my ($self, $class) = @_; + + my $result = $self->client->search( + index => $self->index_name, + type => $class->ES_TYPE, + body => { + aggs => { $class->ID_FIELD => { extended_stats => { field => $class->ID_FIELD } } }, + size => 0 + } + ); + + return $result->{aggregations}{$class->ID_FIELD}{max}; +} + +sub put_mapping { + my ($self, $class) = @_; + + my %body = ( properties => scalar $class->ES_PROPERTIES ); + if ($class->does('Bugzilla::Elastic::Role::ChildObject')) { + $body{_parent} = { type => $class->ES_PARENT_TYPE }; + } + + $self->client->indices->put_mapping( + index => $self->index_name, + type => $class->ES_TYPE, + body => \%body, + ); +} + +sub _debug_sql { + my ($self, $sql, $params) = @_; + if ($self->debug_sql) { + my ($out, @args) = ($sql, $params ? (@$params) : ()); + $out =~ s/^\n//gs; + $out =~ s/^\s{8}//gm; + $out =~ s/\?/Bugzilla->dbh->quote(shift @args)/ge; + warn $out, "\n"; + } + + return ($sql, $params) +} + +sub bulk_load { + my ( $self, $class ) = @_; + + $self->put_mapping($class); + my $bulk = $self->_bulk_helper($class); + my $ids = $self->_select_all_ids($class); + $self->clear_mtime; + $self->_bulk_load_ids($bulk, $class, $ids) if @$ids; + undef $ids; # free up some memory + + my $updated_ids = $self->_select_updated_ids($class); + if ($updated_ids) { + $self->_bulk_load_ids($bulk, $class, $updated_ids) if @$updated_ids; + } +} + +sub _select_all_ids { + my ($self, $class) = @_; + + my $dbh = Bugzilla->dbh; + my $last_id = $self->find_largest_id($class); + my ($sql, $params) = $self->_debug_sql($class->ES_SELECT_ALL_SQL($last_id)); + return $dbh->selectcol_arrayref($sql, undef, @$params); +} + +sub _select_updated_ids { + my ($self, $class) = @_; + + my $dbh = Bugzilla->dbh; + my $mtime = $self->find_largest_mtime($class); + if ($mtime && $mtime != $self->mtime) { + my ($updated_sql, $updated_params) = $self->_debug_sql($class->ES_SELECT_UPDATED_SQL($mtime)); + return $dbh->selectcol_arrayref($updated_sql, undef, @$updated_params); + } else { + return undef; + } +} + +sub bulk_load_ids { + my ($self, $class, $ids) = @_; + + $self->put_mapping($class); + $self->clear_mtime; + $self->_bulk_load_ids($self->_bulk_helper($class), $class, $ids); +} + +sub _bulk_load_ids { + my ($self, $bulk, $class, $all_ids) = @_; + + my $iter = natatime $class->ES_OBJECTS_AT_ONCE, @$all_ids; + my $mtime = $self->mtime; + my $progress_bar; + my $next_update; + + if ($self->has_progress_bar) { + my $name = (split(/::/, $class))[-1]; + $progress_bar = $self->progress_bar->new({ + name => $name, + count => scalar @$all_ids, + ETA => 'linear' + }); + $progress_bar->message(sprintf "loading %d $class objects, %d at a time", scalar @$all_ids, $class->ES_OBJECTS_AT_ONCE); + $next_update = $progress_bar->update(0); + $progress_bar->max_update_rate(1); + } + + my $total = 0; + use Time::HiRes; + my $start = time; + while (my @ids = $iter->()) { + if ($progress_bar) { + $total += @ids; + if ($total >= $next_update) { + $next_update = $progress_bar->update($total); + my $duration = time - $start || 1; + } + } + + my $objects = $class->new_from_list(\@ids); + foreach my $object (@$objects) { + my %doc = ( + id => $object->id, + source => scalar $object->es_document($mtime), + ); + + if ($class->does('Bugzilla::Elastic::Role::ChildObject')) { + $doc{parent} = $object->es_parent_id; + } + + $bulk->index(\%doc); + } + Bugzilla->_cleanup(); + } + + $bulk->flush; +} + +sub _build_shadow_dbh { Bugzilla->switch_to_shadow_db } + +sub _build_mtime { + my ($self) = @_; + my ($mtime) = $self->shadow_dbh->selectrow_array("SELECT UNIX_TIMESTAMP(NOW())"); + return $mtime; +} + +1; diff --git a/Bugzilla/Elastic/Role/ChildObject.pm b/Bugzilla/Elastic/Role/ChildObject.pm new file mode 100644 index 000000000..1f7a7483a --- /dev/null +++ b/Bugzilla/Elastic/Role/ChildObject.pm @@ -0,0 +1,16 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This Source Code Form is "Incompatible With Secondary Licenses", as +# defined by the Mozilla Public License, v. 2.0. +package Bugzilla::Elastic::Role::ChildObject; + +use 5.10.1; +use Role::Tiny; + +with 'Bugzilla::Elastic::Role::Object'; + +requires qw(ES_PARENT_TYPE es_parent_id); + +1; diff --git a/Bugzilla/Elastic/Role/HasClient.pm b/Bugzilla/Elastic/Role/HasClient.pm new file mode 100644 index 000000000..3d52d513a --- /dev/null +++ b/Bugzilla/Elastic/Role/HasClient.pm @@ -0,0 +1,25 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This Source Code Form is "Incompatible With Secondary Licenses", as +# defined by the Mozilla Public License, v. 2.0. +package Bugzilla::Elastic::Role::HasClient; + +use 5.10.1; +use Moo::Role; +use Search::Elasticsearch; + + +has 'client' => (is => 'lazy'); + +sub _build_client { + my ($self) = @_; + + return Search::Elasticsearch->new( + nodes => Bugzilla->params->{elasticsearch_nodes}, + cxn_pool => 'Sniff', + ); +} + +1; diff --git a/Bugzilla/Elastic/Role/HasIndexName.pm b/Bugzilla/Elastic/Role/HasIndexName.pm new file mode 100644 index 000000000..eaff339cd --- /dev/null +++ b/Bugzilla/Elastic/Role/HasIndexName.pm @@ -0,0 +1,16 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This Source Code Form is "Incompatible With Secondary Licenses", as +# defined by the Mozilla Public License, v. 2.0. +package Bugzilla::Elastic::Role::HasIndexName; + +use 5.10.1; +use Moo::Role; +use Search::Elasticsearch; + +has 'index_name' => ( is => 'ro', default => sub { Bugzilla->params->{elasticsearch_index} } ); + + +1; diff --git a/Bugzilla/Elastic/Role/Object.pm b/Bugzilla/Elastic/Role/Object.pm new file mode 100644 index 000000000..ad5ab002b --- /dev/null +++ b/Bugzilla/Elastic/Role/Object.pm @@ -0,0 +1,48 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This Source Code Form is "Incompatible With Secondary Licenses", as +# defined by the Mozilla Public License, v. 2.0. +package Bugzilla::Elastic::Role::Object; + +use 5.10.1; +use Role::Tiny; + +requires qw(ES_TYPE ES_PROPERTIES es_document); +requires qw(ID_FIELD DB_TABLE); + +sub ES_OBJECTS_AT_ONCE { 100 } + +sub ES_SELECT_ALL_SQL { + my ($class, $last_id) = @_; + + my $id = $class->ID_FIELD; + my $table = $class->DB_TABLE; + + return ("SELECT $id FROM $table WHERE $id > ? ORDER BY $id", [$last_id // 0]); +} + +requires qw(ES_SELECT_UPDATED_SQL); + +around 'ES_PROPERTIES' => sub { + my $orig = shift; + my $self = shift; + my $properties = $orig->($self, @_); + $properties->{es_mtime} = { type => 'long' }; + $properties->{$self->ID_FIELD} = { type => 'long', analyzer => 'keyword' }; + + return $properties; +}; + +around 'es_document' => sub { + my ($orig, $self, $mtime) = @_; + my $doc = $orig->($self); + + $doc->{es_mtime} = $mtime; + $doc->{$self->ID_FIELD} = $self->id; + + return $doc; +}; + +1; diff --git a/Bugzilla/User.pm b/Bugzilla/User.pm index 0eb9587eb..69885f57c 100644 --- a/Bugzilla/User.pm +++ b/Bugzilla/User.pm @@ -30,6 +30,7 @@ use Scalar::Util qw(blessed); use Storable qw(dclone); use URI; use URI::QueryParam; +use Role::Tiny::With; use base qw(Bugzilla::Object Exporter); @Bugzilla::User::EXPORT = qw(is_available_username @@ -123,6 +124,81 @@ use constant VALIDATOR_DEPENDENCIES => { use constant EXTRA_REQUIRED_FIELDS => qw(is_enabled); +with 'Bugzilla::Elastic::Role::Object'; + +sub ES_TYPE { 'user' } + +sub ES_OBJECTS_AT_ONCE { 2000 } + +sub ES_SELECT_UPDATED_SQL { + my ($class, $mtime) = @_; + + my $sql = q{ + SELECT DISTINCT + object_id + FROM + audit_log + WHERE + class = 'Bugzilla::User' AND at_time > FROM_UNIXTIME(?) + }; + return ($sql, [$mtime]); +} + +sub ES_SELECT_ALL_SQL { + my ($class, $last_id) = @_; + + my $id = $class->ID_FIELD; + my $table = $class->DB_TABLE; + + return ("SELECT $id FROM $table WHERE $id > ? AND is_enabled ORDER BY $id", [$last_id // 0]); +} + +sub ES_PROPERTIES { + return { + suggest_user => { + type => 'completion', + analyzer => 'folding', + search_analyzer => 'folding', + payloads => \1, + }, + suggest_nick => { + type => 'completion', + analyzer => 'simple', + search_analyzer => 'simple', + payloads => \1, + }, + login => { type => 'string' }, + name => { type => 'string' }, + is_enabled => { type => 'boolean' }, + }; +} + +sub es_document { + my ( $self, $timestamp ) = @_; + my $weight = eval { $self->last_activity_ts ? datetime_from($self->last_activity_ts)->epoch : 0 } // 0; + my $doc = { + login => $self->login, + name => $self->name, + is_enabled => $self->is_enabled, + suggest_user => { + input => [ $self->login, $self->name ], + output => $self->identity, + payload => { name => $self->login, real_name => $self->name }, + weight => $weight, + }, + }; + if ($self->name && $self->name =~ /:(\w+)/) { + my $ircnick = $1; + $doc->{suggest_nick} = { + input => [ $ircnick ], + output => $self->login, + payload => { name => $self->login, real_name => $self->name, ircnick => $ircnick }, + weight => $weight, + }; + } + + return $doc; +} ################################################################################ # Functions ################################################################################ diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm index 7937d6b80..fd0f8b92e 100644 --- a/Bugzilla/Util.pm +++ b/Bugzilla/Util.pm @@ -523,6 +523,9 @@ sub datetime_from { my ($date, $timezone) = @_; # In the database, this is the "0" date. + use Carp qw(cluck); + cluck("undefined date") unless defined $date; + return undef unless defined $date; return undef if $date =~ /^0000/; my @time; diff --git a/META.json b/META.json index 7c10d9a31..ca339fe95 100644 --- a/META.json +++ b/META.json @@ -4,7 +4,7 @@ "Bugzilla Developers " ], "dynamic_config" : 1, - "generated_by" : "ExtUtils::MakeMaker version 7.22, CPAN::Meta::Converter version 2.150005", + "generated_by" : "ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.150010", "license" : [ "unknown" ], @@ -74,6 +74,7 @@ "GD::Text" : "0", "HTML::Parser" : "3.67", "HTML::Scrubber" : "0", + "IO::Async" : "0", "IO::Scalar" : "0", "JSON::RPC" : "== 1.01", "LWP::UserAgent" : "0", @@ -82,7 +83,9 @@ "Mozilla::CA" : "0", "Net::SFTP" : "0", "PatchReader" : "v0.9.6", + "Role::Tiny" : "0", "SOAP::Lite" : "0.712", + "Search::Elasticsearch" : "0", "Template::Plugin::GD::Image" : "0", "Test::Taint" : "1.06", "TheSchwartz" : "1.10", @@ -128,6 +131,21 @@ } } }, + "elasticsearch" : { + "description" : "Elasticsearch-powered searches", + "prereqs" : { + "runtime" : { + "recommends" : { + "Term::ProgressBar" : "0" + }, + "requires" : { + "IO::Async" : "0", + "Role::Tiny" : "0", + "Search::Elasticsearch" : "0" + } + } + } + }, "extension_bitly_optional" : { "prereqs" : { "runtime" : { @@ -465,5 +483,5 @@ }, "release_status" : "stable", "version" : "42", - "x_serialization_backend" : "JSON::PP version 2.27300" + "x_serialization_backend" : "JSON::PP version 2.27400" } diff --git a/META.yml b/META.yml index 1ce538a3a..c429780e9 100644 --- a/META.yml +++ b/META.yml @@ -10,7 +10,7 @@ build_requires: configure_requires: ExtUtils::MakeMaker: 6.57_07 dynamic_config: 1 -generated_by: 'ExtUtils::MakeMaker version 7.22, CPAN::Meta::Converter version 2.150005' +generated_by: 'ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.150010' license: unknown meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html @@ -55,6 +55,7 @@ optional_features: GD::Text: '0' HTML::Parser: '3.67' HTML::Scrubber: '0' + IO::Async: '0' IO::Scalar: '0' JSON::RPC: '== 1.01' LWP::UserAgent: '0' @@ -63,7 +64,9 @@ optional_features: Mozilla::CA: '0' Net::SFTP: '0' PatchReader: v0.9.6 + Role::Tiny: '0' SOAP::Lite: '0.712' + Search::Elasticsearch: '0' Template::Plugin::GD::Image: '0' Test::Taint: '1.06' TheSchwartz: '1.10' @@ -87,6 +90,14 @@ optional_features: requires: File::Copy::Recursive: '0' File::Which: '0' + elasticsearch: + description: 'Elasticsearch-powered searches' + recommends: + Term::ProgressBar: '0' + requires: + IO::Async: '0' + Role::Tiny: '0' + Search::Elasticsearch: '0' extension_bitly_optional: requires: Mozilla::CA: '0' @@ -249,4 +260,4 @@ requires: perl: '5.010001' version: '0.87' version: '42' -x_serialization_backend: 'CPAN::Meta::YAML version 0.018' +x_serialization_backend: 'CPAN::Meta::YAML version 0.016' diff --git a/Makefile.PL b/Makefile.PL index 704a896b2..17ed79583 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -313,6 +313,21 @@ my %optional_features = ( } } }, + elasticsearch => { + description => 'Elasticsearch-powered searches', + prereqs => { + runtime => { + recommends => { + 'Term::ProgressBar' => 0, + }, + requires => { + 'Search::Elasticsearch' => 0, + 'IO::Async' => 0, + 'Role::Tiny' => 0, + }, + } + }, + }, ); for my $file ( glob("extensions/*/Config.pm") ) { diff --git a/scripts/bulk_index.pl b/scripts/bulk_index.pl new file mode 100644 index 000000000..d501ded39 --- /dev/null +++ b/scripts/bulk_index.pl @@ -0,0 +1,70 @@ +#!/usr/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This Source Code Form is "Incompatible With Secondary Licenses", as +# defined by the Mozilla Public License, v. 2.0. +use strict; +use warnings; +use 5.10.1; +use Bugzilla; +BEGIN { Bugzilla->extensions } + +use Bugzilla::Elastic::Indexer; +use IO::Async::Timer::Periodic; +use IO::Async::Loop; +use Time::HiRes qw(time); + +use Getopt::Long qw(:config gnu_getopt); + +my ($debug_sql, $progress_bar, $once); +my $verbose = 0; + +GetOptions( + 'verbose|v+' => \$verbose, + 'debug-sql' => \$debug_sql, + 'progress-bar' => \$progress_bar, + 'once|n' => \$once, +); + +if ($progress_bar) { + $progress_bar = eval { require Term::ProgressBar; 1}; +} + +my $indexer = Bugzilla::Elastic::Indexer->new( + $debug_sql ? ( debug_sql => 1 ) : (), + $progress_bar ? ( progress_bar => 'Term::ProgressBar' ) : (), +); + +$indexer->create_index; + +my $loop = IO::Async::Loop->new; +my $timer = IO::Async::Timer::Periodic->new( + first_interval => 0, + interval => 15, + reschedule => 'skip', + + on_tick => sub { + my $start_users = time; + say "indexing users" if $verbose; + $indexer->bulk_load('Bugzilla::User'); + print " ", time - $start_users, " seconds\n" if $verbose > 1; + + say "indexing bugs" if $verbose; + my $start_bugs = time; + $indexer->bulk_load('Bugzilla::Bug'); + print " ", time - $start_bugs, " seconds\n" if $verbose > 1; + + say "indexing comments" if $verbose; + my $start_comments = time; + $indexer->bulk_load('Bugzilla::Comment'); + print " ", time - $start_comments, " seconds\n" if $verbose > 1; + + $loop->stop if $once; + }, +); + +$timer->start(); +$loop->add($timer); +$loop->run; diff --git a/t/002goodperl.t b/t/002goodperl.t index 7b2e74acc..d770b7b4f 100644 --- a/t/002goodperl.t +++ b/t/002goodperl.t @@ -80,6 +80,7 @@ foreach my $file (@testitems) { my $found_use_perl = 0; my $found_use_strict = 0; my $found_use_warnings = 0; + my $found_modern_perl = 0; $file =~ s/\s.*$//; # nuke everything after the first space (#comment) next if (!$file); # skip null entries @@ -88,9 +89,14 @@ foreach my $file (@testitems) { next; } while (my $file_line = ) { + $found_modern_perl = 1 if $file_line =~ m/^use\s*(?:Moo|Role::Tiny)/; $found_use_perl = 1 if $file_line =~ m/^\s*use 5.10.1/; $found_use_strict = 1 if $file_line =~ m/^\s*use strict/; $found_use_warnings = 1 if $file_line =~ m/^\s*use warnings/; + if ($found_modern_perl) { + $found_use_strict = 1; + $found_use_warnings = 1; + } last if ($found_use_perl && $found_use_strict && $found_use_warnings); } close (FILE); diff --git a/template/en/default/admin/params/elastic.html.tmpl b/template/en/default/admin/params/elastic.html.tmpl new file mode 100644 index 000000000..47ec088b5 --- /dev/null +++ b/template/en/default/admin/params/elastic.html.tmpl @@ -0,0 +1,22 @@ +[%# This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. + # + # This Source Code Form is "Incompatible With Secondary Licenses", as + # defined by the Mozilla Public License, v. 2.0. + #%] +[% + title = "Elasticsearch" + desc = "Set up Elasticsearch integration" +%] + +[% param_descs = { + elasticsearch_nodes => + "If this option is set, $terms.Bugzilla will integrate with Elasticsearch. " _ + "Specify one of more server, separated by spaces, using hostname[:port] " _ + "notation (for example: localhost).", + + elasticsearch_index => + "The name of the index to use for searching bugs, comments, etc", + } +%] -- cgit v1.2.3-24-g4f1b