summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormkanat%bugzilla.org <>2008-03-25 04:47:21 +0100
committermkanat%bugzilla.org <>2008-03-25 04:47:21 +0100
commit2429c5daba37968dacb9b84e6eb671b057765fda (patch)
tree5218a8c3da1a02d241d02c5b33572551cd98ce5e
parentd7f129889f6d9eb839c18c6bf820f86ec57a2cfe (diff)
downloadbugzilla-2429c5daba37968dacb9b84e6eb671b057765fda.tar.gz
bugzilla-2429c5daba37968dacb9b84e6eb671b057765fda.tar.xz
Bug 399370: Fulltext search with a LIKE on bugs.short_desc is too slow (make Bugzilla use a separate fulltext table)
Patch By Max Kanat-Alexander <mkanat@bugzilla.org> (module owner) a=mkanat
-rwxr-xr-xBugzilla/Bug.pm47
-rw-r--r--Bugzilla/DB/Mysql.pm20
-rw-r--r--Bugzilla/DB/Pg.pm4
-rw-r--r--Bugzilla/DB/Schema.pm25
-rw-r--r--Bugzilla/DB/Schema/Mysql.pm2
-rw-r--r--Bugzilla/Install/DB.pm42
-rw-r--r--Bugzilla/Search.pm66
-rwxr-xr-ximportxml.pl1
8 files changed, 133 insertions, 74 deletions
diff --git a/Bugzilla/Bug.pm b/Bugzilla/Bug.pm
index 96f185c1f..6223320d6 100755
--- a/Bugzilla/Bug.pm
+++ b/Bugzilla/Bug.pm
@@ -408,12 +408,6 @@ sub create {
}
}
- $dbh->bz_commit_transaction();
-
- # Because MySQL doesn't support transactions on the longdescs table,
- # we do this after we've committed the transaction. That way we're
- # fairly sure we're inserting a good Bug ID.
-
# And insert the comment. We always insert a comment on bug creation,
# but sometimes it's blank.
my @columns = qw(bug_id who bug_when thetext);
@@ -429,6 +423,12 @@ sub create {
$dbh->do('INSERT INTO longdescs (' . join(',', @columns) . ")
VALUES ($qmarks)", undef, @values);
+ $dbh->bz_commit_transaction();
+
+ # Because MySQL doesn't support transactions on the fulltext table,
+ # we do this after we've committed the transaction. That way we're
+ # sure we're inserting a good Bug ID.
+ $bug->_sync_fulltext('new bug');
return $bug;
}
@@ -615,6 +615,13 @@ sub update {
$self->{delta_ts} = $delta_ts;
}
+ # The only problem with this here is that update() is often called
+ # in the middle of a transaction, and if that transaction is rolled
+ # back, this change will *not* be rolled back. As we expect rollbacks
+ # to be extremely rare, that is OK for us.
+ $self->_sync_fulltext()
+ if $self->{added_comments} || $changes->{short_desc};
+
return $changes;
}
@@ -766,6 +773,30 @@ sub update_keywords {
return [$removed_keywords, $added_keywords];
}
+# Should be called any time you update short_desc or change a comment.
+sub _sync_fulltext {
+ my ($self, $new_bug) = @_;
+ my $dbh = Bugzilla->dbh;
+ if ($new_bug) {
+ $dbh->do('INSERT INTO bugs_fulltext (bug_id, short_desc)
+ SELECT bug_id, short_desc FROM bugs WHERE bug_id = ?',
+ undef, $self->id);
+ }
+ else {
+ $dbh->do('UPDATE bugs_fulltext SET short_desc = ? WHERE bug_id = ?',
+ undef, $self->short_desc, $self->id);
+ }
+ my $comments = $dbh->selectall_arrayref(
+ 'SELECT thetext, isprivate FROM longdescs WHERE bug_id = ?',
+ undef, $self->id);
+ my $all = join("\n", map { $_->[0] } @$comments);
+ my @no_private = grep { !$_->[1] } @$comments;
+ my $nopriv_string = join("\n", map { $_->[0] } @no_private);
+ $dbh->do('UPDATE bugs_fulltext SET comments = ?, comments_noprivate = ?
+ WHERE bug_id = ?', undef, $all, $nopriv_string, $self->id);
+}
+
+
# This is the correct way to delete bugs from the DB.
# No bug should be deleted from anywhere else except from here.
#
@@ -821,11 +852,10 @@ sub remove_from_db {
# Several of the previous tables also depend on attach_id.
$dbh->do("DELETE FROM attachments WHERE bug_id = ?", undef, $bug_id);
$dbh->do("DELETE FROM bugs WHERE bug_id = ?", undef, $bug_id);
+ $dbh->do("DELETE FROM longdescs WHERE bug_id = ?", undef, $bug_id);
$dbh->bz_commit_transaction();
- $dbh->do("DELETE FROM longdescs WHERE bug_id = ?", undef, $bug_id);
-
# Now this bug no longer exists
$self->DESTROY;
return $self;
@@ -2696,6 +2726,7 @@ sub update_comment {
# We assume _check_comment() has already been called earlier.
Bugzilla->dbh->do('UPDATE longdescs SET thetext = ? WHERE comment_id = ?',
undef, ($new_comment, $comment_id));
+ $self->_sync_fulltext();
# Update the comment object with this new text.
$current_comment_obj[0]->{'body'} = $new_comment;
diff --git a/Bugzilla/DB/Mysql.pm b/Bugzilla/DB/Mysql.pm
index 2da0c44ea..0bd7b7d37 100644
--- a/Bugzilla/DB/Mysql.pm
+++ b/Bugzilla/DB/Mysql.pm
@@ -255,11 +255,11 @@ EOT
print "\nISAM->MyISAM table conversion done.\n\n";
}
- my $sd_index_deleted = 0;
+ my ($sd_index_deleted, $longdescs_index_deleted);
my @tables = $self->bz_table_list_real();
- # We want to convert the bugs table to MyISAM, but it's possible that
- # it has a fulltext index on it and this will fail unless we remove
- # the index.
+ # We want to convert tables to InnoDB, but it's possible that they have
+ # fulltext indexes on them, and conversation will fail unless we remove
+ # the indexes.
if (grep($_ eq 'bugs', @tables)) {
if ($self->bz_index_info_real('bugs', 'short_desc')) {
$self->bz_drop_index_raw('bugs', 'short_desc');
@@ -268,6 +268,13 @@ EOT
$self->bz_drop_index_raw('bugs', 'bugs_short_desc_idx');
$sd_index_deleted = 1; # Used for later schema cleanup.
}
+ if ($self->bz_index_info_real('longdescs', 'thetext')) {
+ $self->bz_drop_index_raw('longdescs', 'thetext');
+ }
+ if ($self->bz_index_info_real('longdescs', 'longdescs_thetext_idx')) {
+ $self->bz_drop_index_raw('longdescs', 'longdescs_thetext_idx');
+ $longdescs_index_deleted = 1; # For later schema cleanup.
+ }
}
# Upgrade tables from MyISAM to InnoDB
@@ -480,6 +487,11 @@ EOT
$self->_bz_real_schema->delete_index('bugs', 'bugs_short_desc_idx');
$self->_bz_store_real_schema;
}
+ if ($longdescs_index_deleted) {
+ $self->_bz_real_schema->delete_index('longdescs',
+ 'longdescs_thetext_idx');
+ $self->_bz_store_real_schema;
+ }
# The old timestamp fields need to be adjusted here instead of in
# checksetup. Otherwise the UPDATE statements inside of bz_add_column
diff --git a/Bugzilla/DB/Pg.pm b/Bugzilla/DB/Pg.pm
index 9675e1f26..4777ba89a 100644
--- a/Bugzilla/DB/Pg.pm
+++ b/Bugzilla/DB/Pg.pm
@@ -179,6 +179,10 @@ sub bz_setup_database {
# field, because it can't have index data longer than 2770
# characters on that field.
$self->bz_drop_index('longdescs', 'longdescs_thetext_idx');
+ # Same for all the comments fields in the fulltext table.
+ $self->bz_drop_index('bugs_fulltext', 'bugs_fulltext_comments_idx');
+ $self->bz_drop_index('bugs_fulltext',
+ 'bugs_fulltext_comments_noprivate_idx');
# PostgreSQL also wants an index for calling LOWER on
# login_name, which we do with sql_istrcmp all over the place.
diff --git a/Bugzilla/DB/Schema.pm b/Bugzilla/DB/Schema.pm
index 23f6e0ca4..b8ac649b7 100644
--- a/Bugzilla/DB/Schema.pm
+++ b/Bugzilla/DB/Schema.pm
@@ -279,6 +279,29 @@ use constant ABSTRACT_SCHEMA => {
],
},
+ bugs_fulltext => {
+ FIELDS => [
+ bug_id => {TYPE => 'INT3', NOTNULL => 1, PRIMARYKEY => 1,
+ REFERENCES => {TABLE => 'bugs',
+ COLUMN => 'bug_id',
+ DELETE => 'CASCADE'}},
+ short_desc => {TYPE => 'varchar(255)', NOTNULL => 1},
+ # Comments are stored all together in one column for searching.
+ # This allows us to examine all comments together when deciding
+ # the relevance of a bug in fulltext search.
+ comments => {TYPE => 'LONGTEXT'},
+ comments_noprivate => {TYPE => 'LONGTEXT'},
+ ],
+ INDEXES => [
+ bugs_fulltext_short_desc_idx => {FIELDS => ['short_desc'],
+ TYPE => 'FULLTEXT'},
+ bugs_fulltext_comments_idx => {FIELDS => ['comments'],
+ TYPE => 'FULLTEXT'},
+ bugs_fulltext_comments_noprivate_idx => {
+ FIELDS => ['comments_noprivate'], TYPE => 'FULLTEXT'},
+ ],
+ },
+
bugs_activity => {
FIELDS => [
bug_id => {TYPE => 'INT3', NOTNULL => 1},
@@ -336,8 +359,6 @@ use constant ABSTRACT_SCHEMA => {
longdescs_bug_id_idx => ['bug_id'],
longdescs_who_idx => [qw(who bug_id)],
longdescs_bug_when_idx => ['bug_when'],
- longdescs_thetext_idx => {FIELDS => ['thetext'],
- TYPE => 'FULLTEXT'},
],
},
diff --git a/Bugzilla/DB/Schema/Mysql.pm b/Bugzilla/DB/Schema/Mysql.pm
index 2f4bc42b2..627716970 100644
--- a/Bugzilla/DB/Schema/Mysql.pm
+++ b/Bugzilla/DB/Schema/Mysql.pm
@@ -86,7 +86,7 @@ use constant REVERSE_MAPPING => {
# as in their db-specific version, so no reverse mapping is needed.
};
-use constant MYISAM_TABLES => qw(longdescs);
+use constant MYISAM_TABLES => qw(bugs_fulltext);
#------------------------------------------------------------------------------
sub _initialize {
diff --git a/Bugzilla/Install/DB.pm b/Bugzilla/Install/DB.pm
index c6668aec2..ca011ce6c 100644
--- a/Bugzilla/Install/DB.pm
+++ b/Bugzilla/Install/DB.pm
@@ -315,12 +315,6 @@ sub update_table_definitions {
$dbh->do('UPDATE quips SET userid = NULL WHERE userid = 0');
}
- # Right now, we only create the "thetext" index on MySQL.
- if ($dbh->isa('Bugzilla::DB::Mysql')) {
- $dbh->bz_add_index('longdescs', 'longdescs_thetext_idx',
- {TYPE => 'FULLTEXT', FIELDS => [qw(thetext)]});
- }
-
_convert_attachments_filename_from_mediumtext();
$dbh->bz_add_column('quips', 'approved',
@@ -525,6 +519,9 @@ sub update_table_definitions {
$dbh->bz_alter_column('namedqueries', 'query_type',
{TYPE => 'BOOLEAN', NOTNULL => 1, DEFAULT => 0});
+ $dbh->bz_drop_index('longdescs', 'longdescs_thetext_idx');
+ _populate_bugs_fulltext();
+
################################################################
# New --TABLE-- changes should go *** A B O V E *** this point #
################################################################
@@ -2992,6 +2989,39 @@ sub _check_content_length {
}
}
+sub _populate_bugs_fulltext {
+ my $dbh = Bugzilla->dbh;
+ my $fulltext = $dbh->selectrow_array('SELECT 1 FROM bugs_fulltext '
+ . $dbh->sql_limit(1));
+ # We only populate the table if it's empty...
+ if (!$fulltext) {
+ # ... and if there are bugs in the bugs table.
+ my $bug_ids = $dbh->selectcol_arrayref('SELECT bug_id FROM bugs');
+ return if !@$bug_ids;
+
+ print "Populating bugs_fulltext.short_desc...\n";
+ $dbh->do('INSERT INTO bugs_fulltext (bug_id, short_desc)
+ SELECT bug_id, short_desc FROM bugs');
+ print "Populating bugs_fulltext comments fields...\n";
+ my $count = 1;
+ my $sth_all = $dbh->prepare('SELECT thetext FROM longdescs
+ WHERE bug_id = ?');
+ my $sth_nopriv = $dbh->prepare('SELECT thetext FROM longdescs
+ WHERE bug_id = ? AND isprivate = 0');
+ my $sth_update = $dbh->prepare(
+ 'UPDATE bugs_fulltext SET comments = ?, comments_noprivate = ?
+ WHERE bug_id = ?');
+ foreach my $id (@$bug_ids) {
+ my $all = $dbh->selectcol_arrayref($sth_all, undef, $id);
+ my $nopriv = $dbh->selectcol_arrayref($sth_nopriv, undef, $id);
+ $sth_update->execute(join("\n", @$all), join("\n", @$nopriv), $id);
+ indicate_progress({ total => scalar @$bug_ids, every => 100,
+ current => $count++ });
+ }
+ print "\n";
+ }
+}
+
1;
__END__
diff --git a/Bugzilla/Search.pm b/Bugzilla/Search.pm
index 252813a06..111875dac 100644
--- a/Bugzilla/Search.pm
+++ b/Bugzilla/Search.pm
@@ -47,11 +47,6 @@ use Bugzilla::Keyword;
use Date::Format;
use Date::Parse;
-# How much we should add to the relevance, for each word that matches
-# in bugs.short_desc, during fulltext search. This is high because
-# we want summary matches to be *very* relevant, by default.
-use constant SUMMARY_RELEVANCE_FACTOR => 7;
-
# Some fields are not sorted on themselves, but on other fields.
# We need to have a list of these fields and what they map to.
# Each field points to an array that contains the fields mapped
@@ -1021,21 +1016,6 @@ sub BuildOrderBy {
push(@$stringlist, trim($orderfield . ' ' . $orderdirection));
}
-# This is a helper for fulltext search
-sub _split_words_into_like {
- my ($field, $text) = @_;
- my $dbh = Bugzilla->dbh;
- # This code is very similar to Bugzilla::DB::sql_fulltext_search,
- # so you can look there if you'd like an explanation of what it's
- # doing.
- my $lower_text = lc($text);
- my @words = split(/\s+/, $lower_text);
- @words = map($dbh->quote("%$_%"), @words);
- map(trick_taint($_), @words);
- @words = map($dbh->sql_istrcmp($field, $_, 'LIKE'), @words);
- return @words;
-}
-
#####################################################################
# Search Functions
#####################################################################
@@ -1252,44 +1232,24 @@ sub _content_matches {
# accept the "matches" operator, which is specific to full-text
# index searches.
- # Add the longdescs table to the query so we can search comments.
- my $table = "longdescs_$$chartid";
- my $extra = "";
- if (Bugzilla->params->{"insidergroup"}
- && !Bugzilla->user->in_group(Bugzilla->params->{"insidergroup"}))
- {
- $extra = "AND $table.isprivate < 1";
- }
- push(@$supptables, "LEFT JOIN longdescs AS $table " .
- "ON bugs.bug_id = $table.bug_id $extra");
-
+ # Add the fulltext table to the query so we can search on it.
+ my $table = "bugs_fulltext_$$chartid";
+ my $comments_col = "comments";
+ $comments_col = "comments_noprivate" unless $self->{'user'}->is_insider;
+ push(@$supptables, "LEFT JOIN bugs_fulltext AS $table " .
+ "ON bugs.bug_id = $table.bug_id");
+
# Create search terms to add to the SELECT and WHERE clauses.
- # $term1 searches comments.
- my $term1 = $dbh->sql_fulltext_search("${table}.thetext", $$v);
-
- # short_desc searching for the WHERE clause
- my @words = _split_words_into_like('bugs.short_desc', $$v);
- my $term2_where = join(' OR ', @words);
-
- # short_desc relevance
- my $factor = SUMMARY_RELEVANCE_FACTOR;
- my @s_words = map("CASE WHEN $_ THEN $factor ELSE 0 END", @words);
- my $term2_select = join(' + ', @s_words);
-
+ my $term1 = $dbh->sql_fulltext_search("$table.$comments_col", $$v);
+ my $term2 = $dbh->sql_fulltext_search("$table.short_desc", $$v);
+
# The term to use in the WHERE clause.
- $$term = "$term1 > 0 OR ($term2_where)";
-
+ $$term = "$term1 > 0 OR $term2 > 0";
+
# In order to sort by relevance (in case the user requests it),
# we SELECT the relevance value and give it an alias so we can
# add it to the SORT BY clause when we build it in buglist.cgi.
- #
- # Note: We should be calculating the relevance based on all
- # comments for a bug, not just matching comments, but that's hard
- # (see http://bugzilla.mozilla.org/show_bug.cgi?id=145588#c35).
- my $select_term = "(SUM($term1) + $term2_select) AS relevance";
-
- # add the column not used in aggregate function explicitly
- push(@$groupby, 'bugs.short_desc');
+ my $select_term = "($term1 + $term2) AS relevance";
# Users can specify to display the relevance field, in which case
# it'll show up in the list of fields being selected, and we need
diff --git a/importxml.pl b/importxml.pl
index b7203a6e6..2644d3380 100755
--- a/importxml.pl
+++ b/importxml.pl
@@ -1253,6 +1253,7 @@ sub process_bug {
VALUES (?,?,?,?,?,?)", undef,
$id, $exporterid, $timestamp, $worktime, $private, $long_description
);
+ Bugzilla::Bug->new($id)->_sync_fulltext();
# Add this bug to each group of which its product is a member.
my $sth_group = $dbh->prepare("INSERT INTO bug_group_map (bug_id, group_id)