summaryrefslogtreecommitdiffstats
path: root/Bugzilla
diff options
context:
space:
mode:
authorMax Kanat-Alexander <mkanat@bugzilla.org>2010-06-22 04:10:21 +0200
committerMax Kanat-Alexander <mkanat@bugzilla.org>2010-06-22 04:10:21 +0200
commit4acb2424e62cbd64bc92a5dec2cbe1e2b7096157 (patch)
tree9076b3809846e8fc58ce720e0cd32dc0d3c8ff77 /Bugzilla
parent601bda78fa436e7030ebcefc589d930c99f1373e (diff)
downloadbugzilla-4acb2424e62cbd64bc92a5dec2cbe1e2b7096157.tar.gz
bugzilla-4acb2424e62cbd64bc92a5dec2cbe1e2b7096157.tar.xz
Bug 22353: Automatic duplicate bug detection on enter_bug.cgi
r=glob, a=mkanat
Diffstat (limited to 'Bugzilla')
-rw-r--r--Bugzilla/Bug.pm133
-rw-r--r--Bugzilla/Constants.pm5
-rw-r--r--Bugzilla/DB.pm5
-rw-r--r--Bugzilla/DB/Mysql.pm5
-rw-r--r--Bugzilla/DB/Oracle.pm5
-rw-r--r--Bugzilla/User.pm2
-rw-r--r--Bugzilla/WebService/Bug.pm35
7 files changed, 161 insertions, 29 deletions
diff --git a/Bugzilla/Bug.pm b/Bugzilla/Bug.pm
index 6df7363d5..80a4b5933 100644
--- a/Bugzilla/Bug.pm
+++ b/Bugzilla/Bug.pm
@@ -49,7 +49,7 @@ use Bugzilla::Group;
use Bugzilla::Status;
use Bugzilla::Comment;
-use List::MoreUtils qw(firstidx);
+use List::MoreUtils qw(firstidx uniq);
use List::Util qw(min first);
use Storable qw(dclone);
use URI;
@@ -446,6 +446,87 @@ sub match {
return $class->SUPER::match(@_);
}
+sub possible_duplicates {
+ my ($class, $params) = @_;
+ my $short_desc = $params->{summary};
+ my $products = $params->{products} || [];
+ my $limit = $params->{limit} || MAX_POSSIBLE_DUPLICATES;
+ $limit = MAX_POSSIBLE_DUPLICATES if $limit > MAX_POSSIBLE_DUPLICATES;
+ $products = [$products] if !ref($products) eq 'ARRAY';
+
+ my $orig_limit = $limit;
+ detaint_natural($limit)
+ || ThrowCodeError('param_must_be_numeric',
+ { function => 'possible_duplicates',
+ param => $orig_limit });
+
+ my $dbh = Bugzilla->dbh;
+ my $user = Bugzilla->user;
+ my @words = split(/[\b\s]+/, $short_desc || '');
+ # Exclude punctuation from the array.
+ @words = map { /(\w+)/; $1 } @words;
+ # And make sure that each word is longer than 2 characters.
+ @words = grep { defined $_ and length($_) > 2 } @words;
+
+ return [] if !@words;
+
+ my ($where_sql, $relevance_sql);
+ if ($dbh->FULLTEXT_OR) {
+ my $joined_terms = join($dbh->FULLTEXT_OR, @words);
+ ($where_sql, $relevance_sql) =
+ $dbh->sql_fulltext_search('bugs_fulltext.short_desc',
+ $joined_terms, 1);
+ $relevance_sql ||= $where_sql;
+ }
+ else {
+ my (@where, @relevance);
+ my $count = 0;
+ foreach my $word (@words) {
+ $count++;
+ my ($term, $rel_term) = $dbh->sql_fulltext_search(
+ 'bugs_fulltext.short_desc', $word, $count);
+ push(@where, $term);
+ push(@relevance, $rel_term || $term);
+ }
+
+ $where_sql = join(' OR ', @where);
+ $relevance_sql = join(' + ', @relevance);
+ }
+
+ my $product_ids = join(',', map { $_->id } @$products);
+ my $product_sql = $product_ids ? "AND product_id IN ($product_ids)" : "";
+
+ # Because we collapse duplicates, we want to get slightly more bugs
+ # than were actually asked for.
+ my $sql_limit = $limit + 5;
+
+ my $possible_dupes = $dbh->selectall_arrayref(
+ "SELECT bugs.bug_id AS bug_id, bugs.resolution AS resolution,
+ ($relevance_sql) AS relevance
+ FROM bugs
+ INNER JOIN bugs_fulltext ON bugs.bug_id = bugs_fulltext.bug_id
+ WHERE ($where_sql) $product_sql
+ ORDER BY relevance DESC, bug_id DESC
+ LIMIT $sql_limit", {Slice=>{}});
+
+ my @actual_dupe_ids;
+ # Resolve duplicates into their ultimate target duplicates.
+ foreach my $bug (@$possible_dupes) {
+ my $push_id = $bug->{bug_id};
+ if ($bug->{resolution} && $bug->{resolution} eq 'DUPLICATE') {
+ $push_id = _resolve_ultimate_dup_id($bug->{bug_id});
+ }
+ push(@actual_dupe_ids, $push_id);
+ }
+ @actual_dupe_ids = uniq @actual_dupe_ids;
+ if (scalar @actual_dupe_ids > $limit) {
+ @actual_dupe_ids = @actual_dupe_ids[0..($limit-1)];
+ }
+
+ my $visible = $user->visible_bugs(\@actual_dupe_ids);
+ return $class->new_from_list($visible);
+}
+
# Docs for create() (there's no POD in this file yet, but we very
# much need this documented right now):
#
@@ -1426,23 +1507,7 @@ sub _check_dup_id {
# Make sure a loop isn't created when marking this bug
# as duplicate.
- my %dupes;
- my $this_dup = $dupe_of;
- my $sth = $dbh->prepare('SELECT dupe_of FROM duplicates WHERE dupe = ?');
-
- while ($this_dup) {
- if ($this_dup == $self->id) {
- ThrowUserError('dupe_loop_detected', { bug_id => $self->id,
- dupe_of => $dupe_of });
- }
- # If $dupes{$this_dup} is already set to 1, then a loop
- # already exists which does not involve this bug.
- # As the user is not responsible for this loop, do not
- # prevent him from marking this bug as a duplicate.
- last if exists $dupes{$this_dup};
- $dupes{$this_dup} = 1;
- $this_dup = $dbh->selectrow_array($sth, undef, $this_dup);
- }
+ _resolve_ultimate_dup_id($self->id, $dupe_of, 1);
my $cur_dup = $self->dup_id || 0;
if ($cur_dup != $dupe_of && Bugzilla->params->{'commentonduplicate'}
@@ -2843,6 +2908,38 @@ sub dup_id {
return $self->{'dup_id'};
}
+sub _resolve_ultimate_dup_id {
+ my ($bug_id, $dupe_of, $loops_are_an_error) = @_;
+ my $dbh = Bugzilla->dbh;
+ my $sth = $dbh->prepare('SELECT dupe_of FROM duplicates WHERE dupe = ?');
+
+ my $this_dup = $dupe_of || $dbh->selectrow_array($sth, undef, $bug_id);
+ my $last_dup = $bug_id;
+
+ my %dupes;
+ while ($this_dup) {
+ if ($this_dup == $bug_id) {
+ if ($loops_are_an_error) {
+ ThrowUserError('dupe_loop_detected', { bug_id => $bug_id,
+ dupe_of => $dupe_of });
+ }
+ else {
+ return $last_dup;
+ }
+ }
+ # If $dupes{$this_dup} is already set to 1, then a loop
+ # already exists which does not involve this bug.
+ # As the user is not responsible for this loop, do not
+ # prevent him from marking this bug as a duplicate.
+ return $last_dup if exists $dupes{$this_dup};
+ $dupes{$this_dup} = 1;
+ $last_dup = $this_dup;
+ $this_dup = $dbh->selectrow_array($sth, undef, $this_dup);
+ }
+
+ return $last_dup;
+}
+
sub actual_time {
my ($self) = @_;
return $self->{'actual_time'} if exists $self->{'actual_time'};
diff --git a/Bugzilla/Constants.pm b/Bugzilla/Constants.pm
index 55ef4a0e3..9af9e7b72 100644
--- a/Bugzilla/Constants.pm
+++ b/Bugzilla/Constants.pm
@@ -175,6 +175,7 @@ use File::Basename;
MAX_FIELD_VALUE_SIZE
MAX_FREETEXT_LENGTH
MAX_BUG_URL_LENGTH
+ MAX_POSSIBLE_DUPLICATES
PASSWORD_DIGEST_ALGORITHM
PASSWORD_SALT_LENGTH
@@ -527,6 +528,10 @@ use constant MAX_FREETEXT_LENGTH => 255;
# The longest a bug URL in a BUG_URLS field can be.
use constant MAX_BUG_URL_LENGTH => 255;
+# The largest number of possible duplicates that Bug::possible_duplicates
+# will return.
+use constant MAX_POSSIBLE_DUPLICATES => 25;
+
# This is the name of the algorithm used to hash passwords before storing
# them in the database. This can be any string that is valid to pass to
# Perl's "Digest" module. Note that if you change this, it won't take
diff --git a/Bugzilla/DB.pm b/Bugzilla/DB.pm
index 8b8d74c90..8c1aba8dd 100644
--- a/Bugzilla/DB.pm
+++ b/Bugzilla/DB.pm
@@ -73,6 +73,11 @@ use constant ENUM_DEFAULTS => {
resolution => ["","FIXED","INVALID","WONTFIX", "DUPLICATE","WORKSFORME"],
};
+# The character that means "OR" in a boolean fulltext search. If empty,
+# the database doesn't support OR searches in fulltext searches.
+# Used by Bugzilla::Bug::possible_duplicates.
+use constant FULLTEXT_OR => '';
+
#####################################################################
# Connection Methods
#####################################################################
diff --git a/Bugzilla/DB/Mysql.pm b/Bugzilla/DB/Mysql.pm
index 13069a78a..4b90a2a34 100644
--- a/Bugzilla/DB/Mysql.pm
+++ b/Bugzilla/DB/Mysql.pm
@@ -40,8 +40,8 @@ For interface details see L<Bugzilla::DB> and L<DBI>.
=cut
package Bugzilla::DB::Mysql;
-
use strict;
+use base qw(Bugzilla::DB);
use Bugzilla::Constants;
use Bugzilla::Install::Util qw(install_string);
@@ -57,8 +57,7 @@ use Text::ParseWords;
# MAX_COMMENT_LENGTH is big.
use constant MAX_COMMENTS => 50;
-# This module extends the DB interface via inheritance
-use base qw(Bugzilla::DB);
+use constant FULLTEXT_OR => '|';
sub new {
my ($class, $params) = @_;
diff --git a/Bugzilla/DB/Oracle.pm b/Bugzilla/DB/Oracle.pm
index 6fa7a9869..a671a0e68 100644
--- a/Bugzilla/DB/Oracle.pm
+++ b/Bugzilla/DB/Oracle.pm
@@ -35,16 +35,14 @@ For interface details see L<Bugzilla::DB> and L<DBI>.
=cut
package Bugzilla::DB::Oracle;
-
use strict;
+use base qw(Bugzilla::DB);
use DBD::Oracle;
use DBD::Oracle qw(:ora_types);
use Bugzilla::Constants;
use Bugzilla::Error;
use Bugzilla::Util;
-# This module extends the DB interface via inheritance
-use base qw(Bugzilla::DB);
#####################################################################
# Constants
@@ -52,6 +50,7 @@ use base qw(Bugzilla::DB);
use constant EMPTY_STRING => '__BZ_EMPTY_STR__';
use constant ISOLATION_LEVEL => 'READ COMMITTED';
use constant BLOB_TYPE => { ora_type => ORA_BLOB };
+use constant FULLTEXT_OR => ' OR ';
sub new {
my ($class, $params) = @_;
diff --git a/Bugzilla/User.pm b/Bugzilla/User.pm
index aa01d93a5..6c7be2241 100644
--- a/Bugzilla/User.pm
+++ b/Bugzilla/User.pm
@@ -900,7 +900,7 @@ sub can_enter_product {
$product && grep($_->name eq $product->name,
@{ $self->get_enterable_products });
- return 1 if $can_enter;
+ return $product if $can_enter;
return 0 unless $warn == THROW_ERROR;
diff --git a/Bugzilla/WebService/Bug.pm b/Bugzilla/WebService/Bug.pm
index ee5591027..d5c1fab74 100644
--- a/Bugzilla/WebService/Bug.pm
+++ b/Bugzilla/WebService/Bug.pm
@@ -36,6 +36,7 @@ use Bugzilla::Util qw(trick_taint trim);
use Bugzilla::Version;
use Bugzilla::Milestone;
use Bugzilla::Status;
+use Bugzilla::Token qw(issue_hash_token);
#############
# Constants #
@@ -322,7 +323,7 @@ sub get {
else {
$bug = Bugzilla::Bug->check($bug_id);
}
- push(@bugs, $self->_bug_to_hash($bug));
+ push(@bugs, $self->_bug_to_hash($bug, $params));
}
return { bugs => \@bugs, faults => \@faults };
@@ -421,7 +422,28 @@ sub search {
my $bugs = Bugzilla::Bug->match($params);
my $visible = Bugzilla->user->visible_bugs($bugs);
- my @hashes = map { $self->_bug_to_hash($_) } @$visible;
+ my @hashes = map { $self->_bug_to_hash($_, $params) } @$visible;
+ return { bugs => \@hashes };
+}
+
+sub possible_duplicates {
+ my ($self, $params) = validate(@_, 'product');
+ my $user = Bugzilla->user;
+
+ # Undo the array-ification that validate() does, for "summary".
+ $params->{summary} || ThrowCodeError('param_required',
+ { function => 'Bug.possible_duplicates', param => 'summary' });
+
+ my @products;
+ foreach my $name (@{ $params->{'product'} || [] }) {
+ my $object = $user->can_enter_product($name, THROW_ERROR);
+ push(@products, $object);
+ }
+
+ my $possible_dupes = Bugzilla::Bug->possible_duplicates(
+ { summary => $params->{summary}, products => \@products,
+ limit => $params->{limit} });
+ my @hashes = map { $self->_bug_to_hash($_, $params) } @$possible_dupes;
return { bugs => \@hashes };
}
@@ -617,7 +639,7 @@ sub attachments {
# A helper for get() and search().
sub _bug_to_hash {
- my ($self, $bug) = @_;
+ my ($self, $bug, $filters) = @_;
# Timetracking fields are deleted if the user doesn't belong to
# the corresponding group.
@@ -646,6 +668,11 @@ sub _bug_to_hash {
$item{'component'} = $self->type('string', $bug->component);
$item{'dupe_of'} = $self->type('int', $bug->dup_id);
+ if (Bugzilla->user->id) {
+ my $token = issue_hash_token([$bug->id, $bug->delta_ts]);
+ $item{'update_token'} = $self->type('string', $token);
+ }
+
# if we do not delete this key, additional user info, including their
# real name, etc, will wind up in the 'internals' hashref
delete $item{internals}->{assigned_to_obj};
@@ -659,7 +686,7 @@ sub _bug_to_hash {
$item{'alias'} = undef;
}
- return \%item;
+ return filter $filters, \%item;
}
sub _attachment_to_hash {