From 40a2b11f6279554debafdc415754a9d28d9fefaa Mon Sep 17 00:00:00 2001 From: "mkanat%bugzilla.org" <> Date: Sat, 26 Aug 2006 04:53:12 +0000 Subject: Bug 287170: Need to be able to do fulltext searches on bugs.short_desc even with most tables using InnoDB Patch By Max Kanat-Alexander r=myk, a=myk --- Bugzilla/DB/Schema.pm | 3 +-- Bugzilla/Install/DB.pm | 9 ++++++--- Bugzilla/Search.pm | 48 +++++++++++++++++++++++++++++++++--------------- 3 files changed, 40 insertions(+), 20 deletions(-) diff --git a/Bugzilla/DB/Schema.pm b/Bugzilla/DB/Schema.pm index a26eb311b..adac0c6d8 100644 --- a/Bugzilla/DB/Schema.pm +++ b/Bugzilla/DB/Schema.pm @@ -219,8 +219,7 @@ use constant ABSTRACT_SCHEMA => { bugs_target_milestone_idx => ['target_milestone'], bugs_qa_contact_idx => ['qa_contact'], bugs_votes_idx => ['votes'], - bugs_short_desc_idx => {FIELDS => ['short_desc'], - TYPE => 'FULLTEXT'}, + bugs_short_desc_idx => ['short_desc'], ], }, diff --git a/Bugzilla/Install/DB.pm b/Bugzilla/Install/DB.pm index a7f20625a..bf1fbcccf 100644 --- a/Bugzilla/Install/DB.pm +++ b/Bugzilla/Install/DB.pm @@ -328,9 +328,6 @@ sub update_table_definitions { $dbh->do('UPDATE quips SET userid = NULL WHERE userid = 0'); } - $dbh->bz_add_index('bugs', 'bugs_short_desc_idx', - {TYPE => 'FULLTEXT', FIELDS => [qw(short_desc)]}); - # Right now, we only create the "thetext" index on MySQL. if ($dbh->isa('Bugzilla::DB::Mysql')) { $dbh->bz_add_index('longdescs', 'longdescs_thetext_idx', @@ -482,6 +479,12 @@ sub update_table_definitions { # 2006-08-19 LpSolit@gmail.com - Bug 87795 $dbh->bz_alter_column('tokens', 'userid', {TYPE => 'INT3'}); + my $sd_index = $dbh->bz_index_info('bugs', 'bugs_short_desc_idx'); + if ($sd_index && $sd_index->{TYPE} eq 'FULLTEXT') { + $dbh->bz_drop_index('bugs', 'bugs_short_desc_idx'); + $dbh->bz_add_index('bugs', 'bugs_short_desc_idx', [qw(short_desc)]); + } + ################################################################ # New --TABLE-- changes should go *** A B O V E *** this point # ################################################################ diff --git a/Bugzilla/Search.pm b/Bugzilla/Search.pm index 38380c241..87c07129a 100644 --- a/Bugzilla/Search.pm +++ b/Bugzilla/Search.pm @@ -46,6 +46,11 @@ use Bugzilla::Keyword; use Date::Format; use Date::Parse; +# How much we should add to the relevance, for each word that matches +# in bugs.short_desc, during fulltext search. This is high because +# we want summary matches to be *very* relevant, by default. +use constant SUMMARY_RELEVANCE_FACTOR => 7; + # Some fields are not sorted on themselves, but on other fields. # We need to have a list of these fields and what they map to. # Each field points to an array that contains the fields mapped @@ -387,15 +392,7 @@ sub init { } if (defined $params->param('content')) { - # Append a new chart implementing content quicksearch - my $chart; - for ($chart = 0 ; $params->param("field$chart-0-0") ; $chart++) {}; - $params->param("field$chart-0-0", 'content'); - $params->param("type$chart-0-0", 'matches'); - $params->param("value$chart-0-0", $params->param('content')); - $params->param("field$chart-0-1", 'short_desc'); - $params->param("type$chart-0-1", 'allwords'); - $params->param("value$chart-0-1", $params->param('content')); + push(@specialchart, ['content', 'matches', $params->param('content')]); } my $chartid; @@ -590,13 +587,19 @@ sub init { # Create search terms to add to the SELECT and WHERE clauses. # $term1 searches comments. - # $term2 searches summaries, which contributes to the relevance - # ranking in SELECT but doesn't limit which bugs get retrieved. my $term1 = $dbh->sql_fulltext_search("${table}.thetext", $v); - my $term2 = $dbh->sql_fulltext_search("bugs.short_desc", $v); + + # short_desc searching for the WHERE clause + my @words = _split_words_into_like('bugs.short_desc', $v); + my $term2_where = join(' AND ', @words); + + # short_desc relevance + my $factor = SUMMARY_RELEVANCE_FACTOR; + my @s_words = map("CASE WHEN $_ THEN $factor ELSE 0 END", @words); + my $term2_select = join(' + ', @s_words); # The term to use in the WHERE clause. - $term = "$term1 > 0"; + $term = "$term1 > 0 OR ($term2_where)"; # In order to sort by relevance (in case the user requests it), # we SELECT the relevance value and give it an alias so we can @@ -612,8 +615,8 @@ sub init { # Note: We should be calculating the average relevance of all # comments for a bug, not just matching comments, but that's hard # (see http://bugzilla.mozilla.org/show_bug.cgi?id=145588#c35). - my $select_term = - "(SUM($term1)/COUNT($term1) + $term2) AS relevance"; + my $select_term = "(SUM($term1)/COUNT(${table}.comment_id)" + . " + $term2_select) AS relevance"; # add the column not used in aggregate function explicitly push(@groupby, 'bugs.short_desc'); @@ -1711,4 +1714,19 @@ sub BuildOrderBy { push(@$stringlist, trim($orderfield . ' ' . $orderdirection)); } + +# This is a helper for fulltext search +sub _split_words_into_like { + my ($field, $text) = @_; + my $dbh = Bugzilla->dbh; + # This code is very similar to Bugzilla::DB::sql_fulltext_search, + # so you can look there if you'd like an explanation of what it's + # doing. + my $lower_text = lc($text); + my @words = split(/\s+/, $lower_text); + @words = map($dbh->quote("%$_%"), @words); + map(trick_taint($_), @words); + @words = map($dbh->sql_istrcmp($field, $_, 'LIKE'), @words); + return @words; +} 1; -- cgit v1.2.3-24-g4f1b