From 4adf7f1b401955a1938cfc7a9decdc77af2fab20 Mon Sep 17 00:00:00 2001 From: "mkanat%bugzilla.org" <> Date: Sat, 24 Oct 2009 05:30:14 +0000 Subject: Bug 519584: Implement a framework for migrating from other bug-trackers, and start with a GNATS importer. Patch by Max Kanat-Alexander (module owner) a=mkanat --- Bugzilla.pm | 1 + Bugzilla/DB.pm | 10 + Bugzilla/DB/Schema/Mysql.pm | 5 + Bugzilla/DB/Schema/Oracle.pm | 9 + Bugzilla/DB/Schema/Pg.pm | 6 + Bugzilla/Install/Filesystem.pm | 1 + Bugzilla/Install/Requirements.pm | 4 +- Bugzilla/Migrate.pm | 1166 +++++++++++++++++++++++ Bugzilla/Migrate/Gnats.pm | 709 ++++++++++++++ contrib/bzdbcopy.pl | 3 +- migrate.pl | 110 +++ template/en/default/global/messages.html.tmpl | 38 + template/en/default/global/user-error.html.tmpl | 9 + template/en/default/setup/strings.txt.pl | 2 +- 14 files changed, 2069 insertions(+), 4 deletions(-) create mode 100644 Bugzilla/Migrate.pm create mode 100644 Bugzilla/Migrate/Gnats.pm create mode 100644 migrate.pl diff --git a/Bugzilla.pm b/Bugzilla.pm index 67ec611a9..a373aa801 100644 --- a/Bugzilla.pm +++ b/Bugzilla.pm @@ -67,6 +67,7 @@ our $_request_cache = {}; use constant SHUTDOWNHTML_EXEMPT => [ 'editparams.cgi', 'checksetup.pl', + 'migrate.pl', 'recode.pl', ]; diff --git a/Bugzilla/DB.pm b/Bugzilla/DB.pm index 24c1f24f9..a702a0f60 100644 --- a/Bugzilla/DB.pm +++ b/Bugzilla/DB.pm @@ -873,6 +873,16 @@ sub bz_rename_table { $self->_bz_store_real_schema; } +sub bz_set_next_serial_value { + my ($self, $table, $column, $value) = @_; + if (!$value) { + $value = $self->selectrow_array("SELECT MAX($column) FROM $table") || 0; + $value++; + } + my @sql = $self->_bz_real_schema->get_set_serial_sql($table, $column, $value); + $self->do($_) foreach @sql; +} + ##################################################################### # Schema Information Methods ##################################################################### diff --git a/Bugzilla/DB/Schema/Mysql.pm b/Bugzilla/DB/Schema/Mysql.pm index 95ef3141e..a68c7c90d 100644 --- a/Bugzilla/DB/Schema/Mysql.pm +++ b/Bugzilla/DB/Schema/Mysql.pm @@ -263,6 +263,11 @@ sub get_rename_indexes_ddl { return ($sql); } +sub get_set_serial_sql { + my ($self, $table, $column, $value) = @_; + return ("ALTER TABLE $table AUTO_INCREMENT = $value"); +} + # Converts a DBI column_info output to an abstract column definition. # Expects to only be called by Bugzila::DB::Mysql::_bz_build_schema_from_disk, # although there's a chance that it will also work properly if called diff --git a/Bugzilla/DB/Schema/Oracle.pm b/Bugzilla/DB/Schema/Oracle.pm index 615987b06..814a842b3 100644 --- a/Bugzilla/DB/Schema/Oracle.pm +++ b/Bugzilla/DB/Schema/Oracle.pm @@ -403,4 +403,13 @@ sub _get_create_seq_ddl { return @ddl; } +sub get_set_serial_sql { + my ($self, $table, $column, $value) = @_; + my @sql; + my $seq_name = "${table}_${column}_SEQ"; + push(@sql, "DROP SEQUENCE ${seq_name}"); + push(@sql, $self->_get_create_seq_ddl($table, $column, $value)); + return @sql; +} + 1; diff --git a/Bugzilla/DB/Schema/Pg.pm b/Bugzilla/DB/Schema/Pg.pm index 070c0b03e..3559bae9c 100644 --- a/Bugzilla/DB/Schema/Pg.pm +++ b/Bugzilla/DB/Schema/Pg.pm @@ -119,6 +119,12 @@ sub get_rename_table_sql { return ("ALTER TABLE $old_name RENAME TO $new_name"); } +sub get_set_serial_sql { + my ($self, $table, $column, $value) = @_; + return ("SELECT setval('${table}_${column}_seq', $value, false) + FROM $table"); +} + sub _get_alter_type_sql { my ($self, $table, $column, $new_def, $old_def) = @_; my @statements; diff --git a/Bugzilla/Install/Filesystem.pm b/Bugzilla/Install/Filesystem.pm index 1bad3b85c..6c18d0213 100644 --- a/Bugzilla/Install/Filesystem.pm +++ b/Bugzilla/Install/Filesystem.pm @@ -118,6 +118,7 @@ sub FILESYSTEM { 'email_in.pl' => { perms => $ws_executable }, 'sanitycheck.pl' => { perms => $ws_executable }, 'jobqueue.pl' => { perms => $owner_executable }, + 'migrate.pl' => { perms => $owner_executable }, 'install-module.pl' => { perms => $owner_executable }, "$localconfig.old" => { perms => $owner_readable }, diff --git a/Bugzilla/Install/Requirements.pm b/Bugzilla/Install/Requirements.pm index 86b4813d1..2b545ebb8 100644 --- a/Bugzilla/Install/Requirements.pm +++ b/Bugzilla/Install/Requirements.pm @@ -462,7 +462,9 @@ sub print_module_instructions { } } - if ($output && $check_results->{any_missing} && !ON_WINDOWS) { + if ($output && $check_results->{any_missing} && !ON_WINDOWS + && !$check_results->{hide_all}) + { print install_string('install_all', { perl => $^X }); } if (!$check_results->{pass}) { diff --git a/Bugzilla/Migrate.pm b/Bugzilla/Migrate.pm new file mode 100644 index 000000000..c8f601521 --- /dev/null +++ b/Bugzilla/Migrate.pm @@ -0,0 +1,1166 @@ +# -*- Mode: perl; indent-tabs-mode: nil -*- +# +# The contents of this file are subject to the Mozilla Public +# License Version 1.1 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS +# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or +# implied. See the License for the specific language governing +# rights and limitations under the License. +# +# The Original Code is The Bugzilla Migration Tool. +# +# The Initial Developer of the Original Code is Lambda Research +# Corporation. Portions created by the Initial Developer are Copyright +# (C) 2009 the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Max Kanat-Alexander + +package Bugzilla::Migrate; +use strict; + +use Bugzilla::Attachment; +use Bugzilla::Bug qw(LogActivityEntry); +use Bugzilla::Component; +use Bugzilla::Constants; +use Bugzilla::Error; +use Bugzilla::Install::Requirements (); +use Bugzilla::Install::Util qw(indicate_progress); +use Bugzilla::Product; +use Bugzilla::Util qw(get_text trim generate_random_password); +use Bugzilla::User (); +use Bugzilla::Status (); +use Bugzilla::Version; + +use Data::Dumper; +use Date::Parse; +use DateTime; +use Fcntl qw(SEEK_SET); +use File::Basename; +use List::Util qw(first); +use Safe; + +use constant CUSTOM_FIELDS => {}; +use constant REQUIRED_MODULES => []; +use constant NON_COMMENT_FIELDS => (); + +use constant CONFIG_VARS => ( + { + name => 'translate_fields', + default => {}, + desc => <<'END', +# This maps field names in your bug-tracker to Bugzilla field names. If a field +# has the same name in your bug-tracker and Bugzilla (case-insensitively), it +# doesn't need a mapping here. If a field isn't listed here and doesn't have +# an equivalent field in Bugzilla, its data will be added to the initial +# description of each bug migrated. If the right side is an empty string, it +# means "just put the value of this field into the initial description of the +# bug". +# +# Generally, you can keep the defaults, here. +# +# If you want to know the internal names of various Bugzilla fields +# (as used on the right side here), see the fielddefs table in the Bugzilla +# database. +# +# If you are mapping to any custom fields in Bugzilla, you have to create +# the custom fields using Bugzilla Administration interface before you run +# migrate.pl. However, if they are drop down or multi-select fields, you +# don't have to populate the list of values--migrate.pl will do that for you. +# Some migrators create certain custom fields by default. If you see a +# field name starting with "cf_" on the right side of this configuration +# variable by default, then that field will be automatically created by +# the migrator and you don't have to worry about it. +END + }, + { + name => 'translate_values', + default => {}, + desc => <<'END', +# This configuration variable allows you to say that a particular field +# value in your current bug-tracker should be translated to a different +# value when it's imported into Bugzilla. +# +# The value of this variable should look something like this: +# +# { +# bug_status => { +# # Translate "Handled" into "RESOLVED". +# "Handled" => "RESOLVED", +# "In Progress" => "ASSIGNED", +# }, +# +# priority => { +# # Translate "Serious" into "Highest" +# "Serious" => "Highest", +# }, +# }; +# +# Values are translated case-insensitively, so "foo" will match "Foo", "FOO", +# and "foo". +# +# Note that the field names used are *Bugzilla* field names (from the fielddefs +# table in the database), not the field names from your current bug-tracker. +# +# The special field name "user" will be used to translate any field that +# can contain a user, including reporter, assigned_to, qa_contact, and cc. +# You should use "user" instead of specifying reporter, assigned_to, etc. +# manually. +# +# The special field "bug_status_resolution" can be used to give certain +# statuses in your bug-tracker a resolution in Bugzilla. So, for example, +# you could translate the "fixed" status in your Bugzilla to "RESOLVED" +# in the "bug_status" field, and then put "fixed => 'FIXED'" in the +# "bug_status_resolution" field to translated a "fixed" bug into +# RESOLVED FIXED in Bugzilla. +# +# Values that don't get translated will be imported as-is. +END + }, + { + name => 'starting_bug_id', + default => 0, + desc => <<'END', +# What bug ID do you want the first imported bug to get? If you set this to +# 0, then the imported bug ids will just start right after the current +# bug ids. If you use this configuration variable, you must make sure that +# nobody else is using your Bugzilla while you run the migration, or a new +# bug filed by a user might take this ID instead. +END + }, + { + name => 'timezone', + default => 'local', + desc => <<'END', +# If migrate.pl comes across any dates without timezones, while doing the +# migration, what timezone should we assume those dates are in? +# The best format for this variable is something like "America/Los Angeles". +# However, time zone abbreviations (like PST, PDT, etc.) are also acceptable, +# but will result in a less-accurate conversion of times and dates. +# +# The special value "local" means "use the same timezone as the system I +# am running this script on now". +END + }, +); + +use constant USER_FIELDS => qw(user assigned_to qa_contact reporter cc); + +######################### +# Main Migration Method # +######################### + +sub do_migration { + my $self = shift; + my $dbh = Bugzilla->dbh; + # On MySQL, setting serial values implicitly commits a transaction, + # so we want to do it up here, outside of any transaction. This also + # has the advantage of loading the config before anything else is done. + if ($self->config('starting_bug_id')) { + $dbh->bz_set_next_serial_value('bugs', 'bug_id', + $self->config('starting_bug_id')); + } + $dbh->bz_start_transaction(); + + # Read Other Database + my $users = $self->users; + my $products = $self->products; + my $bugs = $self->bugs; + $self->after_read(); + + $self->translate_all_bugs($bugs); + + Bugzilla->set_user(Bugzilla::User->super_user); + + # Insert into Bugzilla + $self->before_insert(); + $self->insert_users($users); + $self->insert_products($products); + $self->create_custom_fields(); + $self->create_legal_values($bugs); + $self->insert_bugs($bugs); + $self->after_insert(); + if ($self->dry_run) { + $dbh->bz_rollback_transaction(); + $self->reset_serial_values(); + } + else { + $dbh->bz_commit_transaction(); + } +} + +################ +# Constructors # +################ + +sub new { + my ($class) = @_; + my $self = { }; + bless $self, $class; + return $self; +} + +sub load { + my ($class, $from) = @_; + my $libdir = bz_locations()->{libpath}; + my @migration_modules = glob("$libdir/Bugzilla/Migrate/*"); + my ($module) = grep { basename($_) =~ /^\Q$from\E\.pm$/i } + @migration_modules; + if (!$module) { + ThrowUserError('migrate_from_invalid', { from => $from }); + } + require $module; + my $canonical_name = _canonical_name($module); + return "Bugzilla::Migrate::$canonical_name"->new; +} + +############# +# Accessors # +############# + +sub name { + my $self = shift; + return _canonical_name(ref $self); +} + +sub dry_run { + my ($self, $value) = @_; + if (scalar(@_) > 1) { + $self->{dry_run} = $value; + } + return $self->{dry_run} || 0; +} + + +sub verbose { + my ($self, $value) = @_; + if (scalar(@_) > 1) { + $self->{verbose} = $value; + } + return $self->{verbose} || 0; +} + +sub debug { + my ($self, $value, $level) = @_; + $level ||= 1; + if ($self->verbose >= $level) { + $value = Dumper($value) if ref $value; + print STDERR $value, "\n"; + } +} + +sub bug_fields { + my $self = shift; + $self->{bug_fields} ||= { map { $_->{name} => $_ } Bugzilla->get_fields }; + return $self->{bug_fields}; +} + +sub users { + my $self = shift; + if (!exists $self->{users}) { + print get_text('migrate_reading_users'), "\n"; + $self->{users} = $self->_read_users(); + } + return $self->{users}; +} + +sub products { + my $self = shift; + if (!exists $self->{products}) { + print get_text('migrate_reading_products'), "\n"; + $self->{products} = $self->_read_products(); + } + return $self->{products}; +} + +sub bugs { + my $self = shift; + if (!exists $self->{bugs}) { + print get_text('migrate_reading_bugs'), "\n"; + $self->{bugs} = $self->_read_bugs(); + } + return $self->{bugs}; +} + +########### +# Methods # +########### + +sub check_requirements { + my $self = shift; + my $missing = Bugzilla::Install::Requirements::_check_missing( + $self->REQUIRED_MODULES, 1); + my %results = ( + pass => @$missing ? 0 : 1, + missing => $missing, + any_missing => @$missing ? 1 : 0, + hide_all => 1, + # These are just for compatibility with print_module_instructions + one_dbd => 1, + optional => [], + ); + Bugzilla::Install::Requirements::print_module_instructions( + \%results, 1); + exit(1) if @$missing; +} + +sub reset_serial_values { + my $self = shift; + return if $self->{serial_values_reset}; + my $dbh = Bugzilla->dbh; + my %reset = ( + 'bugs' => 'bug_id', + 'attachments' => 'attach_id', + 'profiles' => 'userid', + 'longdescs' => 'comment_id', + 'products' => 'id', + 'components' => 'id', + 'versions' => 'id', + 'milestones' => 'id', + ); + my @select_fields = grep { $_->is_select } (values %{ $self->bug_fields }); + foreach my $field (@select_fields) { + next if $field->name eq 'product'; + $reset{$field->name} = 'id'; + } + + while (my ($table, $column) = each %reset) { + $dbh->bz_set_next_serial_value($table, $column); + } + + $self->{serial_values_reset} = 1; +} + +################### +# Bug Translation # +################### + +sub translate_all_bugs { + my ($self, $bugs) = @_; + print get_text('migrate_translating_bugs'), "\n"; + # We modify the array in place so that $self->bugs will return the + # modified bugs, in case $self->before_insert wants them. + my $num_bugs = scalar(@$bugs); + for (my $i = 0; $i < $num_bugs; $i++) { + $bugs->[$i] = $self->translate_bug($bugs->[$i]); + } +} + +sub translate_bug { + my ($self, $fields) = @_; + my (%bug, %other_fields); + my $original_status; + foreach my $field (keys %$fields) { + my $value = delete $fields->{$field}; + my $bz_field = $self->translate_field($field); + if ($bz_field) { + $bug{$bz_field} = $self->translate_value($bz_field, $value); + if ($bz_field eq 'bug_status') { + $original_status = $value; + } + } + else { + $other_fields{$field} = $value; + } + } + + if (defined $original_status and !defined $bug{resolution} + and $self->map_value('bug_status_resolution', $original_status)) + { + $bug{resolution} = $self->map_value('bug_status_resolution', + $original_status); + } + + $bug{comment} = $self->_generate_description(\%bug, \%other_fields); + + return wantarray ? (\%bug, \%other_fields) : \%bug; +} + +sub _generate_description { + my ($self, $bug, $fields) = @_; + + my $description = ""; + foreach my $field (sort keys %$fields) { + next if grep($_ eq $field, $self->NON_COMMENT_FIELDS); + my $value = delete $fields->{$field}; + next if $value eq ''; + $description .= "$field: $value\n"; + } + $description .= "\n" if $description; + + return $description . $bug->{comment}; +} + +sub translate_field { + my ($self, $field) = @_; + my $mapped = $self->config('translate_fields')->{$field}; + return $mapped if defined $mapped; + ($mapped) = grep { lc($_) eq lc($field) } (keys %{ $self->bug_fields }); + return $mapped; +} + +sub parse_date { + my ($self, $date) = @_; + my @time = strptime($date); + # Handle times with timezones that strptime doesn't know about. + if (!scalar @time) { + $date =~ s/\s+\S+$//; + @time = strptime($date); + } + my $tz; + if ($time[6]) { + $tz = Bugzilla->local_timezone->offset_as_string($time[6]); + } + else { + $tz = $self->config('timezone'); + $tz =~ s/\s/_/g; + if ($tz eq 'local') { + $tz = Bugzilla->local_timezone; + } + } + my $dt = DateTime->new({ + year => $time[5] + 1900, + month => $time[4] + 1, + day => $time[3], + hour => $time[2], + minute => $time[1], + second => int($time[0]), + time_zone => $tz, + }); + $dt->set_time_zone(Bugzilla->local_timezone); + return $dt->iso8601; +} + +sub translate_value { + my ($self, $field, $value) = @_; + + if (!defined $value) { + warn("Got undefined value for $field\n"); + $value = ''; + } + + if (ref($value) eq 'ARRAY') { + return [ map($self->translate_value($field, $_), @$value) ]; + } + + + if (defined $self->map_value($field, $value)) { + return $self->map_value($field, $value); + } + + if (grep($_ eq $field, USER_FIELDS)) { + if (defined $self->map_value('user', $value)) { + return $self->map_value('user', $value); + } + } + + my $field_obj = $self->bug_fields->{$field}; + if ($field eq 'creation_ts' or $field eq 'delta_ts' + or ($field_obj and $field_obj->type == FIELD_TYPE_DATETIME)) + { + $value = trim($value); + return undef if !$value; + return $self->parse_date($value); + } + + return $value; +} + + +sub map_value { + my ($self, $field, $value) = @_; + return $self->_value_map->{$field}->{lc($value)}; +} + +sub _value_map { + my $self = shift; + if (!defined $self->{_value_map}) { + # Lowercase all values to make them case-insensitive. + my %map; + my $translation = $self->config('translate_values'); + foreach my $field (keys %$translation) { + my $value_mapping = $translation->{$field}; + foreach my $value (keys %$value_mapping) { + $map{$field}->{lc($value)} = $value_mapping->{$value}; + } + } + $self->{_value_map} = \%map; + } + return $self->{_value_map}; +} + +################# +# Configuration # +################# + +sub config { + my ($self, $var) = @_; + if (!exists $self->{config}) { + $self->{config} = $self->read_config; + } + return $self->{config}->{$var}; +} + +sub config_file_name { + my $self = shift; + my $name = $self->name; + my $dir = bz_locations()->{datadir}; + return "$dir/migrate-$name.cfg" +} + +sub read_config { + my ($self) = @_; + my $file = $self->config_file_name; + if (!-e $file) { + $self->write_config(); + ThrowUserError('migrate_config_created', { file => $file }); + } + open(my $fh, "<", $file) || die "$file: $!"; + my $safe = new Safe; + $safe->rdo($file); + my @read_symbols = map($_->{name}, $self->CONFIG_VARS); + my %config; + foreach my $var (@read_symbols) { + my $glob = $safe->varglob($var); + $config{$var} = $$glob; + } + return \%config; +} + +sub write_config { + my ($self) = @_; + my $file = $self->config_file_name; + open(my $fh, ">", $file) || die "$file: $!"; + # Fixed indentation + local $Data::Dumper::Indent = 1; + local $Data::Dumper::Quotekeys = 0; + local $Data::Dumper::Sortkeys = 1; + foreach my $var ($self->CONFIG_VARS) { + print $fh "\n", $var->{desc}, + Data::Dumper->Dump([$var->{default}], [$var->{name}]); + } + close($fh); +} + +#################################### +# Default Implementations of Hooks # +#################################### + +sub after_insert {} +sub before_insert {} +sub after_read {} + +############# +# Inserters # +############# + +sub insert_users { + my ($self, $users) = @_; + foreach my $user (@$users) { + next if new Bugzilla::User({ name => $user->{login_name} }); + my $generated_password; + if (!defined $user->{cryptpassword}) { + $generated_password = lc(generate_random_password()); + $user->{cryptpassword} = $generated_password; + } + my $created = Bugzilla::User->create($user); + print get_text('migrate_user_created', + { created => $created, + password => $generated_password }), "\n"; + } +} + +sub insert_products { + my ($self, $products) = @_; + foreach my $product (@$products) { + my $components = delete $product->{components}; + + my $created_prod = new Bugzilla::Product({ name => $product->{name} }); + if (!$created_prod) { + $created_prod = Bugzilla::Product->create($product); + print get_text('migrate_product_created', + { created => $created_prod }), "\n"; + } + + foreach my $component (@$components) { + next if new Bugzilla::Component({ product => $created_prod, + name => $component->{name} }); + my $created_comp = Bugzilla::Component->create( + { %$component, product => $created_prod }); + print ' ', get_text('migrate_component_created', + { comp => $created_comp, + product => $created_prod }), "\n"; + } + } +} + +sub create_custom_fields { + my $self = shift; + foreach my $field (keys %{ $self->CUSTOM_FIELDS }) { + next if new Bugzilla::Field({ name => $field }); + my %values = %{ $self->CUSTOM_FIELDS->{$field} }; + # We set these all here for the dry-run case. + my $created = { %values, name => $field, custom => 1 }; + if (!$self->dry_run) { + $created = Bugzilla::Field->create($created); + } + print get_text('migrate_field_created', { field => $created }), "\n"; + } + delete $self->{bug_fields}; +} + +sub create_legal_values { + my ($self, $bugs) = @_; + my @select_fields = grep($_->is_select, values %{ $self->bug_fields }); + + # Get all the values in use on all the bugs we're importing. + my (%values, %product_values); + foreach my $bug (@$bugs) { + foreach my $field (@select_fields) { + my $name = $field->name; + next if !defined $bug->{$name}; + $values{$name}->{$bug->{$name}} = 1; + } + foreach my $field (qw(version target_milestone)) { + # Fix per-product bug values here, because it's easier than + # doing it during _insert_bugs. + if (!defined $bug->{$field} or trim($bug->{$field}) eq '') { + my $accessor = $field; + $accessor =~ s/^target_//; $accessor .= "s"; + my $product = Bugzilla::Product->check($bug->{product}); + $bug->{$field} = $product->$accessor->[0]->name; + next; + } + $product_values{$bug->{product}}->{$field}->{$bug->{$field}} = 1; + } + } + + foreach my $field (@select_fields) { + my $name = $field->name; + foreach my $value (keys %{ $values{$name} }) { + next if Bugzilla::Field::Choice->type($field)->new({ name => $value }); + Bugzilla::Field::Choice->type($field)->create({ value => $value }); + print get_text('migrate_value_created', + { field => $field, value => $value }), "\n"; + } + } + + foreach my $product (keys %product_values) { + my $prod_obj = Bugzilla::Product->check($product); + foreach my $version (keys %{ $product_values{$product}->{version} }) { + next if new Bugzilla::Version({ product => $prod_obj, + name => $version }); + my $created = Bugzilla::Version->create({ product => $prod_obj, + name => $version }); + my $field = $self->bug_fields->{version}; + print get_text('migrate_value_created', { product => $prod_obj, + field => $field, + value => $created->name }), "\n"; + } + foreach my $milestone (keys %{ $product_values{$product}->{target_milestone} }) { + next if new Bugzilla::Milestone({ product => $prod_obj, + name => $milestone }); + my $created = Bugzilla::Milestone->create({ product => $prod_obj, + name => $milestone }); + my $field = $self->bug_fields->{target_milestone}; + print get_text('migrate_value_created', { product => $prod_obj, + field => $field, + value => $created->name }), "\n"; + + } + } + +} + +sub insert_bugs { + my ($self, $bugs) = @_; + my $dbh = Bugzilla->dbh; + print get_text('migrate_creating_bugs'), "\n"; + + my $init_statuses = Bugzilla::Status->can_change_to(); + my %allowed_statuses = map { lc($_->name) => 1 } @$init_statuses; + # Bypass the question of whether or not we can file UNCONFIRMED + # in any product by simply picking a non-UNCONFIRMED status as our + # default for bugs that don't have a status specified. + my $default_status = first { $_->name ne 'UNCONFIRMED' } @$init_statuses; + # Use the first resolution that's not blank. + my $default_resolution = + first { $_->name ne '' } + @{ $self->bug_fields->{resolution}->legal_values }; + + # Set the values of any required drop-down fields that aren't set. + my @standard_drop_downs = grep { !$_->custom and $_->is_select } + (values %{ $self->bug_fields }); + # Make bug_status get set before resolution. + @standard_drop_downs = sort { $a->name cmp $b->name } @standard_drop_downs; + # Cache all statuses for setting the resolution. + my %statuses = map { lc($_->name) => $_ } Bugzilla::Status->get_all; + + my $total = scalar @$bugs; + my $count = 1; + foreach my $bug (@$bugs) { + my $comments = delete $bug->{comments}; + my $history = delete $bug->{history}; + my $attachments = delete $bug->{attachments}; + + $self->debug($bug, 3); + + foreach my $field (@standard_drop_downs) { + my $field_name = $field->name; + next if $field_name eq 'product'; + if (!defined $bug->{$field_name}) { + # If there's a default value for this, then just let create() + # pick it. + next if grep($_->is_default, @{ $field->legal_values }); + # Otherwise, pick the first valid value if this is a required + # field. + if ($field_name eq 'bug_status') { + $bug->{bug_status} = $default_status; + } + elsif ($field_name eq 'resolution') { + my $status = $statuses{lc($bug->{bug_status})}; + if (!$status->is_open) { + $bug->{resolution} = $default_resolution; + } + } + else { + $bug->{$field_name} = $field->legal_values->[0]->name; + } + } + } + + my $product = Bugzilla::Product->check($bug->{product}); + + # If this isn't a legal starting status, or if the bug has a + # resolution, then those will have to be set after creating the bug. + # We make them into objects so that we can normalize their names. + my ($set_status, $set_resolution); + if (defined $bug->{resolution}) { + $set_resolution = Bugzilla::Field::Choice->type('resolution') + ->new({ name => $bug->{resolution} }); + } + if (!$allowed_statuses{lc($bug->{bug_status})}) { + $set_status = new Bugzilla::Status({ name => $bug->{bug_status} }); + # Set the starting status to some status that Bugzilla will + # accept. We're going to overwrite it immediately afterward. + $bug->{bug_status} = $default_status; + } + + # If we're in dry-run mode, our custom fields haven't been created + # yet, so we shouldn't try to set them on creation. + if ($self->dry_run) { + foreach my $field (keys %{ $self->CUSTOM_FIELDS }) { + delete $bug->{$field}; + } + } + + # File the bug as the reporter. + my $super_user = Bugzilla->user; + my $reporter = Bugzilla::User->check($bug->{reporter}); + # Allow the user to file a bug in any product, no matter his current + # permissions. + $reporter->{groups} = $super_user->groups; + Bugzilla->set_user($reporter); + my $created = Bugzilla::Bug->create($bug); + $self->debug('Created bug ' . $created->id); + Bugzilla->set_user($super_user); + + if (defined $bug->{delta_ts}) { + $dbh->do('UPDATE bugs SET delta_ts = ? WHERE bug_id = ?', + undef, $bug->{delta_ts}, $created->id); + } + # We don't need to send email for imported bugs. + $dbh->do('UPDATE bugs SET lastdiffed = delta_ts WHERE bug_id = ?', + undef, $created->id); + + # We don't use set_ and update() because that would create + # a bugs_activity entry that we don't want. + if ($set_status) { + $dbh->do('UPDATE bugs SET bug_status = ? WHERE bug_id = ?', + undef, $set_status->name, $created->id); + } + if ($set_resolution) { + $dbh->do('UPDATE bugs SET resolution = ? WHERE bug_id = ?', + undef, $set_resolution->name, $created->id); + } + + $self->_insert_comments($created, $comments); + $self->_insert_history($created, $history); + $self->_insert_attachments($created, $attachments); + + # bugs_fulltext isn't transactional, so if we're in a dry-run we + # need to delete anything that we put in there. + if ($self->dry_run) { + $dbh->do('DELETE FROM bugs_fulltext WHERE bug_id = ?', + undef, $created->id); + } + + if (!$self->verbose) { + indicate_progress({ current => $count++, every => 5, total => $total }); + } + } +} + +sub _insert_comments { + my ($self, $bug, $comments) = @_; + return if !$comments; + $self->debug(' Inserting comments:', 2); + foreach my $comment (@$comments) { + $self->debug($comment, 3); + my %copy = %$comment; + # XXX In the future, if we have a Bugzilla::Comment->create, this + # should use it. + my $who = Bugzilla::User->check(delete $copy{who}); + $copy{who} = $who->id; + $copy{bug_id} = $bug->id; + $self->_do_table_insert('longdescs', \%copy); + $self->debug(" Inserted comment from " . $who->login, 2); + } + $bug->_sync_fulltext(); +} + +sub _insert_history { + my ($self, $bug, $history) = @_; + return if !$history; + $self->debug(' Inserting history:', 2); + foreach my $item (@$history) { + $self->debug($item, 3); + my $who = Bugzilla::User->check($item->{who}); + LogActivityEntry($bug->id, $item->{field}, $item->{removed}, + $item->{added}, $who->id, $item->{bug_when}); + $self->debug(" $item->{field} change from " . $who->login, 2); + } +} + +sub _insert_attachments { + my ($self, $bug, $attachments) = @_; + return if !$attachments; + $self->debug(' Inserting attachments:', 2); + foreach my $attachment (@$attachments) { + $self->debug($attachment, 3); + # Make sure that our pointer is at the beginning of the file, + # because usually it will be at the end, having just been fully + # written to. + if (ref $attachment->{data}) { + $attachment->{data}->seek(0, SEEK_SET); + } + + my $submitter = Bugzilla::User->check(delete $attachment->{submitter}); + my $super_user = Bugzilla->user; + # Make sure the submitter can attach this attachment no matter what. + $submitter->{groups} = $super_user->groups; + Bugzilla->set_user($submitter); + my $created = + Bugzilla::Attachment->create({ %$attachment, bug => $bug }); + $self->debug(' Attachment ' . $created->description . ' from ' + . $submitter->login, 2); + Bugzilla->set_user($super_user); + } +} + +sub _do_table_insert { + my ($self, $table, $hash) = @_; + my @fields = keys %$hash; + my @questions = ('?') x @fields; + my @values = map { $hash->{$_} } @fields; + my $field_sql = join(',', @fields); + my $question_sql = join(',', @questions); + Bugzilla->dbh->do("INSERT INTO $table ($field_sql) VALUES ($question_sql)", + undef, @values); +} + +###################### +# Helper Subroutines # +###################### + +sub _canonical_name { + my ($module) = @_; + $module =~ s{::}{/}g; + $module = basename($module); + $module =~ s/\.pm$//g; + return $module; +} + +1; + +__END__ + +=head1 NAME + +Bugzilla::Migrate - Functions to migrate from other databases + +=head1 DESCRIPTION + +This module acts as a base class for the various modules that migrate +from other bug-trackers. + +The documentation for this module exists mostly to assist people in +creating new migrators for other bug-trackers than the ones currently +supported. + +=head1 HOW MIGRATION WORKS + +Before writing anything to the Bugzilla database, the migrator will read +everything from the other bug-tracker's database. Here's the exact order +of what happens: + +=over + +=item 1 + +Users are read from the other bug-tracker. + +=item 2 + +Products are read from the other bug-tracker. + +=item 3 + +Bugs are read from the other bug-tracker. + +=item 4 + +The L method is called. + +=item 5 + +All bugs are translated from the other bug-tracker's fields/values +into Bugzilla's fields values using L. + +=item 6 + +Users are inserted into Bugzilla. + +=item 7 + +Products are inserted into Bugzilla. + +=item 8 + +Some migrators need to create custom fields before migrating, and +so that happens here. + +=item 9 + +Any legal values that need to be created for any drop-down or +multi-select fields are created. This is done by reading all the +values on every bug that was read in and creating any values that +don't already exist in Bugzilla for every drop-down or multi-select +field on each bug. This includes creating any product versions and +milestones that need to be created. + +=item 10 + +Bugs are inserted into Bugzilla. + +=item 11 + +The L method is called. + +=back + +Everything happens in one big transaction, so in general, if there are +any errors during the process, nothing will be changed. + +The migrator never creates anything that already exists. So users, products, +components, etc. that already exist will just be re-used by this script, +not re-created. + +=head1 CONSTRUCTOR + +=head2 load + +Called like C<< Bugzilla::Migrate->load('Module') >>. Returns a new +C object that can be used to migrate from the +requested bug-tracker. + +=head1 METHODS YOUR SUBCLASS CAN USE + +=head2 config + +Takes a single parameter, a string, and returns the value of the +configuration variable with that name (always a scalar). The first time +you call C, if the configuration file hasn't been read, it will +be read in. + +=head2 debug + +If the user hasn't specified C<--verbose> on the command line, this +does nothing. + +Takes two arguments: + +The first argument is a string or reference to print to C. +If it's a reference, L will be used to print the +data structure. + +The second argument is a number--the string will only be printed if the +user specified C<--verbose> at least that many times on the command line. + +=head2 parse_date + +Parses a date string and returns a formatted date string that can be inserted +into the database. If the input date is missing a timezone, the "timezone" +configuration parameter will be used as the timezone of the date. + +=head2 translate_bug + +Uses the C<$translate_fields> and <$translate_values> configuration variables +to convert a hashref of "other bug-tracker" fields into Bugzilla fields. +It takes one argument, the hashref to convert. Any unrecognized fields will +have their value prepended to the C element in the returned +hashref, unless they are listed in L. + +In scalar context, returns the translated bug. In array context, +returns both the translated bug and a second hashref containing the values +of any untranslated fields that were listed in C. + +B To save memory, the hashref that you pass in will be destroyed +(all keys will be deleted). + +=head2 translate_value + +(Note: Normally you will want to use L instead of this.) + +Uses the C configuration variable to convert +field values from your bug-tracker to Bugzilla. Takes two arguments, +the first being a field name and the second being a value. If the value +is an arrayref, C will be called recursively on all +the array elements. + +Also, any date field will be converted into ISO 8601 format, for +inserting into the database. + +You must use this to translate any bug field values that you return +during L, so that they are valid values for +L. + +=head2 translate_field + +(Note: Normally you will want to use L instead of this.) + +Translates a field name in your bug-tracker to a field name in Bugzilla, +using the rules described in the description of the C<$translate_fields> +configuration variable. + +Takes a single argument--the name of a field to translate. + +Returns C if the field could not be translated. + +=head1 METHODS YOU MUST IMPLEMENT + +These are methods that subclasses must implement: + +=head2 _read_bugs + +Should return an arrayref of hashes. The hashes will be passed to +L to create bugs in Bugzilla. In addition to +the normal C fields, the hashes can contain two additional +items: + +=over + +=item comments + +An arrayref of hashes, representing comments to be added to the +database. The keys should be the names of columns in the longdescs +table that you want to set for each comment. C must be a +username instead of a user id, though. + +You don't need to specify a value for C column. + +=item history + +An arrayref of hashes, representing the history of changes made +to this bug. The keys should be the names of columns in the +bugs_activity table to set for each change. C must be a username +instead of a user id, though, and C (containing the name of some field) +is taken instead of C. + +You don't need to specify a value for C column. + +=item attachments + +An arrayref of hashes, representing values to pass to +L. (Remember that the C argument +must be a file handle--we recommend using L to create +anonymous temporary files for this purpose.) You should specify a +C argument containing the username of the attachment's submitter. + +You don't need to specify a value for the C argument. + +=back + +=head2 _read_products + +Should return an arrayref of hashes to pass to L. +In addition to the normal C fields, this also accepts an additional +argument, C, which is an arrayref of hashes to pass to +L (though you do not need to specify the +C argument for L). + +=head2 _read_users + +Should return an arrayref of hashes to be passed to +L. + +=head1 METHODS YOU MIGHT WANT TO IMPLEMENT + +These are methods that you may want to override in your migrator. +All of these methods are called on an instantiated L +object of your subclass by L itself. + +=head2 REQUIRED_MODULES + +Returns an arrayref of Perl modules that must be installed in order +for your migrator to run, in the same format as +L. + +=head2 CUSTOM_FIELDS + +Returns a hashref, where the keys are the names of custom fields +to create in the database before inserting bugs. The values of the +hashref are the arguments (other than "name") that should be passed +to Bugzilla::Field->create() when creating the field. (C<< custom => 1 >> +will be specified automatically for you, so you don't need to specify it.) + +=head2 CONFIG_VARS + +This should return an array (not an arrayref) in the same format as +L, describing +configuration variables for migrating from your bug-tracker. You should +always include the default C (by calling +$self->SUPER::CONFIG_VARS) as part of your return value, if you +override this method. + +In addition to the normal fields from C, you can also +specify a C key for each item, which should be a subroutine +reference. When the configuration file is read, this subroutine will be +called (as a method) to make sure that the value is valid. + +=head2 NON_COMMENT_FIELDS + +An array (not an arrayref). If there are fields that are not translated +and yet shouldn't be added to the initial description of the bug when +translating bugs, then they should be listed here. See L for +more detail. + +=head2 after_read + +This is run after all data is read from the other bug-tracker, but +before the bug fields/values have been translated, and before any data +is inserted into Bugzilla. The default implementation does nothing. + +=head2 before_insert + +This is called after all bugs are translated from their "other bug-tracker" +values to Bugzilla values, but before any data is inserted into the database +or any custom fields are created. The default implementation does nothing. + +=head2 after_insert + +This is run after all data is inserted into Bugzilla. The default +implementation does nothing. diff --git a/Bugzilla/Migrate/Gnats.pm b/Bugzilla/Migrate/Gnats.pm new file mode 100644 index 000000000..232100f2d --- /dev/null +++ b/Bugzilla/Migrate/Gnats.pm @@ -0,0 +1,709 @@ +# -*- Mode: perl; indent-tabs-mode: nil -*- +# +# The contents of this file are subject to the Mozilla Public +# License Version 1.1 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS +# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or +# implied. See the License for the specific language governing +# rights and limitations under the License. +# +# The Original Code is The Bugzilla Migration Tool. +# +# The Initial Developer of the Original Code is Lambda Research +# Corporation. Portions created by the Initial Developer are Copyright +# (C) 2009 the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Max Kanat-Alexander + +package Bugzilla::Migrate::Gnats; +use strict; +use base qw(Bugzilla::Migrate); + +use Bugzilla::Constants; +use Bugzilla::Install::Util qw(indicate_progress); +use Bugzilla::Util qw(format_time trim generate_random_password lsearch); + +use Email::Address; +use Email::MIME; +use File::Basename; +use IO::File; +use List::Util qw(first); + +use constant REQUIRED_MODULES => [ + { + package => 'Email-Simple-FromHandle', + module => 'Email::Simple::FromHandle', + # This version added seekable handles. + version => 0.050, + }, +]; + +use constant FIELD_MAP => { + 'Number' => 'bug_id', + 'Category' => 'product', + 'Synopsis' => 'short_desc', + 'Responsible' => 'assigned_to', + 'State' => 'bug_status', + 'Class' => 'cf_type', + 'Classification' => '', + 'Originator' => 'reporter', + 'Arrival-Date' => 'creation_ts', + 'Last-Modified' => 'delta_ts', + 'Release' => 'version', + 'Severity' => 'bug_severity', + 'Description' => 'comment', +}; + +use constant VALUE_MAP => { + bug_severity => { + 'serious' => 'major', + 'cosmetic' => 'trivial', + 'new-feature' => 'enhancement', + 'non-critical' => 'normal', + }, + bug_status => { + 'open' => 'NEW', + 'analyzed' => 'ASSIGNED', + 'suspended' => 'RESOLVED', + 'feedback' => 'RESOLVED', + 'released' => 'VERIFIED', + }, + bug_status_resolution => { + 'feedback' => 'FIXED', + 'released' => 'FIXED', + 'closed' => 'FIXED', + 'suspended' => 'LATER', + }, + priority => { + 'medium' => 'Normal', + }, +}; + +use constant GNATS_CONFIG_VARS => ( + { + name => 'gnats_path', + default => '/var/lib/gnats', + desc => < 'default_email_domain', + default => 'example.com', + desc => <<'END', +# Some GNATS users do not have full email addresses, but Bugzilla requires +# every user to have an email address. What domain should be appended to +# usernames that don't have emails, to make them into email addresses? +# (For example, if you leave this at the default, "unknown" would become +# "unknown@example.com".) +END + }, + { + name => 'component_name', + default => 'General', + desc => <<'END', +# GNATS has only "Category" to classify bugs. However, Bugzilla has a +# multi-level system of Products that contain Components. When importing +# GNATS categories, they become a Product with one Component. What should +# the name of that Component be? +END + }, + { + name => 'version_regex', + default => '', + desc => <<'END', +# In GNATS, the "version" field can contain almost anything. However, in +# Bugzilla, it's a drop-down, so you don't want too many choices in there. +# If you specify a regular expression here, versions will be tested against +# this regular expression, and if they match, the first match (the first set +# of parentheses in the regular expression, also called "$1") will be used +# as the version value for the bug instead of the full version value specified +# in GNATS. +END + }, + { + name => 'default_originator', + default => 'gnats-admin', + desc => <<'END', +# Sometimes, a PR has no valid Originator, so we fall back to the From +# header of the email. If the From header also isn't a valid username +# (is just a name with spaces in it--we can't convert that to an email +# address) then this username (which can either be a GNATS username or an +# email address) will be considered to be the Originator of the PR. +END + } +); + +sub CONFIG_VARS { + my $self = shift; + my @vars = (GNATS_CONFIG_VARS, $self->SUPER::CONFIG_VARS); + my $field_map = first { $_->{name} eq 'translate_fields' } @vars; + $field_map->{default} = FIELD_MAP; + my $value_map = first { $_->{name} eq 'translate_values' } @vars; + $value_map->{default} = VALUE_MAP; + return @vars; +} + +# Directories that aren't projects, or that we shouldn't be parsing +use constant SKIP_DIRECTORIES => qw( + gnats-adm + gnats-queue + pending +); + +use constant NON_COMMENT_FIELDS => qw( + Audit-Trail + Closed-Date + Confidential + Unformatted + attachments +); + +# Certain fields can contain things that look like fields in them, +# because they might contain quoted emails. To avoid mis-parsing, +# we list out here the exact order of fields at the end of a PR +# and wait for the next field to consider that we actually have +# a field to parse. +use constant END_FIELD_ORDER => [qw( + Description + How-To-Repeat + Fix + Release-Note + Audit-Trail + Unformatted +)]; + +use constant CUSTOM_FIELDS => { + cf_type => { + type => FIELD_TYPE_SINGLE_SELECT, + description => 'Type', + }, +}; + +use constant FIELD_REGEX => qr/^>(\S+):\s*(.*)$/; + +# Used for bugs that have no Synopsis. +use constant NO_SUBJECT => "(no subject)"; + +# This is the divider that GNATS uses between attachments in its database +# files. It's missign two hyphens at the beginning because MIME Emails use +# -- to start boundaries. +use constant GNATS_BOUNDARY => '----gnatsweb-attachment----'; + +use constant LONG_VERSION_LENGTH => 32; + +######### +# Hooks # +######### + +sub before_insert { + my $self = shift; + + # gnats_id isn't a valid User::create field, and we don't need it + # anymore now. + delete $_->{gnats_id} foreach @{ $self->users }; + + # Grab a version out of a bug for each product, so that there is a + # valid "version" argument for Bugzilla::Product->create. + foreach my $product (@{ $self->products }) { + my $bug = first { $_->{product} eq $product->{name} and $_->{version} } + @{ $self->bugs }; + if (defined $bug) { + $product->{version} = $bug->{version}; + } + else { + $product->{version} = 'unspecified'; + } + } +} + +######### +# Users # +######### + +sub _read_users { + my $self = shift; + my $path = $self->config('gnats_path'); + my $file = "$path/gnats-adm/responsible"; + $self->debug("Reading users from $file"); + my $default_domain = $self->config('default_email_domain'); + open(my $users_fh, '<', $file) || die "$file: $!"; + my @users; + foreach my $line (<$users_fh>) { + $line = trim($line); + next if $line =~ /^#/; + my ($id, $name, $email) = split(':', $line, 3); + $email ||= "$id\@$default_domain"; + # We can't call our own translate_value, because that depends on + # the existence of user_map, which doesn't exist until after + # this method. However, we still want to translate any users found. + $email = $self->SUPER::translate_value('user', $email); + push(@users, { realname => $name, login_name => $email, + gnats_id => $id }); + } + close($users_fh); + return \@users; +} + +sub user_map { + my $self = shift; + $self->{user_map} ||= { map { $_->{gnats_id} => $_->{login_name} } + @{ $self->users } }; + return $self->{user_map}; +} + +sub add_user { + my ($self, $id, $email) = @_; + return if defined $self->user_map->{$id}; + $self->user_map->{$id} = $email; + push(@{ $self->users }, { login_name => $email, gnats_id => $id }); +} + +sub user_to_email { + my ($self, $value) = @_; + if (defined $self->user_map->{$value}) { + $value = $self->user_map->{$value}; + } + elsif ($value !~ /@/) { + my $domain = $self->config('default_email_domain'); + $value = "$value\@$domain"; + } + return $value; +} + +############ +# Products # +############ + +sub _read_products { + my $self = shift; + my $path = $self->config('gnats_path'); + my $file = "$path/gnats-adm/categories"; + $self->debug("Reading categories from $file"); + + open(my $categories_fh, '<', $file) || die "$file: $!"; + my @products; + foreach my $line (<$categories_fh>) { + $line = trim($line); + next if $line =~ /^#/; + my ($name, $description, $assigned_to, $cc) = split(':', $line, 4); + my %product = ( name => $name, description => $description ); + + my @initial_cc = split(',', $cc); + @initial_cc = @{ $self->translate_value('user', \@initial_cc) }; + $assigned_to = $self->translate_value('user', $assigned_to); + my %component = ( name => $self->config('component_name'), + description => $description, + initialowner => $assigned_to, + initial_cc => \@initial_cc ); + $product{components} = [\%component]; + push(@products, \%product); + } + close($categories_fh); + return \@products; +} + +################ +# Reading Bugs # +################ + +sub _read_bugs { + my $self = shift; + my $path = $self->config('gnats_path'); + my @directories = glob("$path/*"); + my @bugs; + foreach my $directory (@directories) { + next if !-d $directory; + my $name = basename($directory); + next if grep($_ eq $name, SKIP_DIRECTORIES); + push(@bugs, @{ $self->_parse_project($directory) }); + } + @bugs = sort { $a->{Number} <=> $b->{Number} } @bugs; + return \@bugs; +} + +sub _parse_project { + my ($self, $directory) = @_; + my @files = glob("$directory/*"); + + $self->debug("Reading Project: $directory"); + # Sometimes other files get into gnats directories. + @files = grep { basename($_) =~ /^\d+$/ } @files; + my @bugs; + my $count = 1; + my $total = scalar @files; + print basename($directory) . ":\n"; + foreach my $file (@files) { + push(@bugs, $self->_parse_bug_file($file)); + if (!$self->verbose) { + indicate_progress({ current => $count++, every => 5, + total => $total }); + } + } + return \@bugs; +} + +sub _parse_bug_file { + my ($self, $file) = @_; + $self->debug("Reading $file"); + open(my $fh, "<", $file) || die "$file: $!"; + my $email = Email::Simple::FromHandle->new($fh); + my $fields = $self->_get_gnats_field_data($email); + # We parse attachments here instead of during translate_bug, + # because otherwise we'd be taking up huge amounts of memory storing + # all the raw attachment data in memory. + $fields->{attachments} = $self->_parse_attachments($fields); + close($fh); + return $fields; +} + +sub _get_gnats_field_data { + my ($self, $email) = @_; + my ($current_field, @value_lines, %fields); + $email->reset_handle(); + my $handle = $email->handle; + foreach my $line (<$handle>) { + # If this line starts a field name + if ($line =~ FIELD_REGEX) { + my ($new_field, $rest_of_line) = ($1, $2); + + # If this is one of the last few PR fields, then make sure + # that we're getting our fields in the right order. + my $new_field_valid = 1; + my $current_field_pos = + lsearch(END_FIELD_ORDER, $current_field || ''); + if ($current_field_pos > -1) { + my $new_field_pos = lsearch(END_FIELD_ORDER, $new_field); + # We accept any field, as long as it's later than this one. + $new_field_valid = $new_field_pos > $current_field_pos ? 1 : 0; + } + + if ($new_field_valid) { + if ($current_field) { + $fields{$current_field} = _handle_lines(\@value_lines); + @value_lines = (); + } + $current_field = $new_field; + $line = $rest_of_line; + } + } + push(@value_lines, $line) if defined $line; + } + $fields{$current_field} = _handle_lines(\@value_lines); + $fields{cc} = [$email->header('Cc')] if $email->header('Cc'); + + # If the Originator is invalid and we don't have a translation for it, + # use the From header instead. + my $originator = $self->translate_value('reporter', $fields{Originator}, + { check_only => 1 }); + if ($originator !~ Bugzilla->params->{emailregexp}) { + # We use the raw header sometimes, because it looks like "From: user" + # which Email::Address won't parse but we can still use. + my $address = $email->header('From'); + my ($parsed) = Email::Address->parse($address); + if ($parsed) { + $address = $parsed->address; + } + if ($address) { + $self->debug( + "PR $fields{Number} had an Originator that was not a valid" + . " user ($fields{Originator}). Using From ($address)" + . " instead.\n"); + my $address_email = $self->translate_value('reporter', $address, + { check_only => 1 }); + if ($address_email !~ Bugzilla->params->{emailregexp}) { + $self->debug(" From was also invalid, using default_originator.\n"); + $address = $self->config('default_originator'); + } + $fields{Originator} = $address; + } + } + + $self->debug(\%fields, 3); + return \%fields; +} + +sub _handle_lines { + my ($lines) = @_; + my $value = join('', @$lines); + $value =~ s/\s+$//; + return $value; +} + +#################### +# Translating Bugs # +#################### + +sub translate_bug { + my ($self, $fields) = @_; + + my ($bug, $other_fields) = $self->SUPER::translate_bug($fields); + + $bug->{attachments} = delete $other_fields->{attachments}; + + if (defined $other_fields->{_add_to_comment}) { + $bug->{comment} .= delete $other_fields->{_add_to_comment}; + } + + my ($changes, $extra_comment) = + $self->_parse_audit_trail($bug, $other_fields->{'Audit-Trail'}); + + my @comments; + foreach my $change (@$changes) { + if (exists $change->{comment}) { + push(@comments, { + thetext => $change->{comment}, + who => $change->{who}, + bug_when => $change->{bug_when} }); + delete $change->{comment}; + } + } + $bug->{history} = $changes; + + if (trim($extra_comment)) { + push(@comments, { thetext => $extra_comment, who => $bug->{reporter}, + bug_when => $bug->{delta_ts} || $bug->{creation_ts} }); + } + $bug->{comments} = \@comments; + + $bug->{component} = $self->config('component_name'); + if (!$bug->{short_desc}) { + $bug->{short_desc} = NO_SUBJECT; + } + + foreach my $attachment (@{ $bug->{attachments} || [] }) { + $attachment->{submitter} = $bug->{reporter}; + $attachment->{creation_ts} = $bug->{creation_ts}; + } + + $self->debug($bug, 3); + return $bug; +} + +sub _parse_audit_trail { + my ($self, $bug, $audit_trail) = @_; + return [] if !trim($audit_trail); + $self->debug(" Parsing audit trail...", 2); + + if ($audit_trail !~ /^\S+-Changed-\S+:/ms) { + # This is just a comment from the bug's creator. + $self->debug(" Audit trail is just a comment.", 2); + return ([], $audit_trail); + } + + my (@changes, %current_data, $current_column, $on_why); + my $extra_comment = ''; + my $current_field; + my @all_lines = split("\n", $audit_trail); + foreach my $line (@all_lines) { + # GNATS history looks like: + # Status-Changed-From-To: open->closed + # Status-Changed-By: jack + # Status-Changed-When: Mon May 12 14:46:59 2003 + # Status-Changed-Why: + # This is some comment here about the change. + if ($line =~ /^(\S+)-Changed-(\S+):(.*)/) { + my ($field, $column, $value) = ($1, $2, $3); + my $bz_field = $self->translate_field($field); + # If it's not a field we're importing, we don't care about + # its history. + next if !$bz_field; + # GNATS doesn't track values for description changes, + # unfortunately, and that's the only information we'd be able to + # use in Bugzilla for the audit trail on that field. + next if $bz_field eq 'comment'; + $current_field = $bz_field if !$current_field; + if ($bz_field ne $current_field) { + $self->_store_audit_change( + \@changes, $current_field, \%current_data); + %current_data = (); + $current_field = $bz_field; + } + $value = trim($value); + $self->debug(" $bz_field $column: $value", 3); + if ($column eq 'From-To') { + my ($from, $to) = split('->', $value, 2); + # Sometimes there's just a - instead of a -> between the values. + if (!defined($to)) { + ($from, $to) = split('-', $value, 2); + } + $current_data{added} = $to; + $current_data{removed} = $from; + } + elsif ($column eq 'By') { + my $email = $self->translate_value('user', $value); + # Sometimes we hit users in the audit trail that we haven't + # seen anywhere else. + $current_data{who} = $email; + } + elsif ($column eq 'When') { + $current_data{bug_when} = $self->parse_date($value); + } + if ($column eq 'Why') { + $value = '' if !defined $value; + $current_data{comment} = $value; + $on_why = 1; + } + else { + $on_why = 0; + } + } + elsif ($on_why) { + # "Why" lines are indented four characters. + $line =~ s/^\s{4}//; + $current_data{comment} .= "$line\n"; + } + else { + $self->debug( + "Extra Audit-Trail line on $bug->{product} $bug->{bug_id}:" + . " $line\n", 2); + $extra_comment .= "$line\n"; + } + } + $self->_store_audit_change(\@changes, $current_field, \%current_data); + return (\@changes, $extra_comment); +} + +sub _store_audit_change { + my ($self, $changes, $old_field, $current_data) = @_; + + $current_data->{field} = $old_field; + $current_data->{removed} = + $self->translate_value($old_field, $current_data->{removed}); + $current_data->{added} = + $self->translate_value($old_field, $current_data->{added}); + push(@$changes, { %$current_data }); +} + +sub _parse_attachments { + my ($self, $fields) = @_; + my $unformatted = delete $fields->{'Unformatted'}; + my $gnats_boundary = GNATS_BOUNDARY; + # A sanity checker to make sure that we're parsing attachments right. + my $num_attachments = 0; + $num_attachments++ while ($unformatted =~ /\Q$gnats_boundary\E/g); + # Sometimes there's a GNATS_BOUNDARY that is on the same line as other data. + $unformatted =~ s/(\S\s*)\Q$gnats_boundary\E$/$1\n$gnats_boundary/mg; + # Often the "Unformatted" section starts with stuff before + # ----gnatsweb-attachment---- that isn't necessary. + $unformatted =~ s/^\s*From:.+?Reply-to:[^\n]+//s; + $unformatted = trim($unformatted); + return [] if !$unformatted; + $self->debug('Reading attachments...', 2); + my $boundary = generate_random_password(48); + $unformatted =~ s/\Q$gnats_boundary\E/--$boundary/g; + # Sometimes the whole Unformatted section is indented by exactly + # one space, and needs to be fixed. + if ($unformatted =~ /--\Q$boundary\E\n /) { + $unformatted =~ s/^ //mg; + } + $unformatted = <parts; + # Remove the fake body. + my $part1 = shift @parts; + if ($part1->body) { + $self->debug(" Additional Unformatted data found on " + . $fields->{Category} . " bug " . $fields->{Number}); + $self->debug($part1->body, 3); + $fields->{_add_comment} .= "\n\nUnformatted:\n" . $part1->body; + } + + my @attachments; + foreach my $part (@parts) { + $self->debug(' Parsing attachment: ' . $part->filename); + my $temp_fh = IO::File->new_tmpfile or die ("Can't create tempfile: $!"); + $temp_fh->binmode; + print $temp_fh $part->body; + my $content_type = $part->content_type; + $content_type =~ s/; name=.+$//; + my $attachment = { filename => $part->filename, + description => $part->filename, + mimetype => $content_type, + data => $temp_fh }; + $self->debug($attachment, 3); + push(@attachments, $attachment); + } + + if (scalar(@attachments) ne $num_attachments) { + warn "WARNING: Expected $num_attachments attachments but got " + . scalar(@attachments) . "\n" ; + $self->debug($unformatted, 3); + } + return \@attachments; +} + +sub translate_value { + my $self = shift; + my ($field, $value, $options) = @_; + my $original_value = $value; + $options ||= {}; + + if (!ref($value) and grep($_ eq $field, $self->USER_FIELDS)) { + if ($value =~ /(\S+\@\S+)/) { + $value = $1; + $value =~ s/^$//; + } + else { + # Sometimes names have extra stuff on the end like "(Somebody's Name)" + $value =~ s/\s+\(.+\)$//; + # Sometimes user fields look like "(user)" instead of just "user". + $value =~ s/^\((.+)\)$/$1/; + $value = trim($value); + } + } + + if ($field eq 'version' and $value ne '') { + my $version_re = $self->config('version_regex'); + if ($version_re and $value =~ $version_re) { + $value = $1; + } + # In the GNATS that I tested this with, there were many extremely long + # values for "version" that caused some import problems (they were + # longer than the max allowed version value). So if the version value + # is longer than 32 characters, pull out the first thing that looks + # like a version number. + elsif (length($value) > LONG_VERSION_LENGTH) { + $value =~ s/^.+?\b(\d[\w\.]+)\b.+$/$1/; + } + } + + my @args = @_; + $args[1] = $value; + + $value = $self->SUPER::translate_value(@args); + return $value if ref $value; + + if (grep($_ eq $field, $self->USER_FIELDS)) { + my $from_value = $value; + $value = $self->user_to_email($value); + $args[1] = $value; + # If we got something new from user_to_email, do any necessary + # translation of it. + $value = $self->SUPER::translate_value(@args); + if (!$options->{check_only}) { + $self->add_user($from_value, $value); + } + } + + return $value; +} + +1; diff --git a/contrib/bzdbcopy.pl b/contrib/bzdbcopy.pl index b4f1fffd2..a5e81d7f8 100755 --- a/contrib/bzdbcopy.pl +++ b/contrib/bzdbcopy.pl @@ -193,8 +193,7 @@ foreach my $table (@table_list) { # PostgreSQL doesn't like it when you insert values into # a serial field; it doesn't increment the counter # automatically. - $target_db->do("SELECT pg_catalog.setval - ('${table}_${column}_seq', $max_val, false)"); + $target_db->bz_set_next_serial_value($table, $column); } elsif ($target_db->isa('Bugzilla::DB::Oracle')) { # Oracle increments the counter on every insert, and *always* diff --git a/migrate.pl b/migrate.pl new file mode 100644 index 000000000..df6b833a0 --- /dev/null +++ b/migrate.pl @@ -0,0 +1,110 @@ +#!/usr/bin/perl -w +# -*- Mode: perl; indent-tabs-mode: nil -*- +# +# The contents of this file are subject to the Mozilla Public +# License Version 1.1 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS +# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or +# implied. See the License for the specific language governing +# rights and limitations under the License. +# +# The Original Code is The Bugzilla Migration Tool. +# +# The Initial Developer of the Original Code is Lambda Research +# Corporation. Portions created by the Initial Developer are Copyright +# (C) 2009 the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Max Kanat-Alexander + +use strict; +use File::Basename; +BEGIN { chdir dirname($0); } +use lib qw(. lib); +use Bugzilla; +use Bugzilla::Migrate; + +use Getopt::Long; +use Pod::Usage; + +my %switch; +GetOptions(\%switch, 'help|h|?', 'from=s', 'verbose|v+', 'dry-run'); + +# Print the help message if that switch was selected or if --from +# wasn't specified. +if (!$switch{'from'} or $switch{'help'}) { + pod2usage({-exitval => 1}); +} + +my $migrator = Bugzilla::Migrate->load($switch{'from'}); +$migrator->verbose($switch{'verbose'}); +$migrator->dry_run($switch{'dry-run'}); +$migrator->check_requirements(); +$migrator->do_migration(); + +# Even if there's an error, we want to be sure that the serial values +# get reset properly. +END { + if ($migrator and $migrator->dry_run) { + my $dbh = Bugzilla->dbh; + if ($dbh->bz_in_transaction) { + $dbh->bz_rollback_transaction(); + } + $migrator->reset_serial_values(); + } +} + +__END__ + +=head1 NAME + +migrate.pl - A script to migrate from other bug-trackers to Bugzilla. + +=head1 SYNOPSIS + + ./migrate.pl --from= [--verbose] [--dry-run] + + Migrates from another bug-tracker to Bugzilla. If you want + to upgrade Bugzilla, use checksetup.pl instead. + + Always test this on a backup copy of your database before + running it on your live Bugzilla. + +=head1 OPTIONS + +=over + +=item B<--from=tracker> + +Specifies what bug-tracker you're migrating from. To see what values +are valid, see the contents of the F directory. + +=item B<--dry-run> + +Don't modify the Bugzilla database at all, just test the import. +Note that this could cause significant slowdown and other strange effects +on a live Bugzilla, so only use it on a test instance. + +=item B<--verbose> + +If specified, this script will output extra debugging information +to STDERR. Specify multiple times (up to three) for more information. + +=back + +=head1 DESCRIPTION + +This script copies data from another bug-tracker into Bugzilla. It migrates +users, products, and bugs from the other bug-tracker into this Bugzilla, +without removing any of the data currently in this Bugzilla. + +Note that you will need enough space in your temporary directory to hold +the size of all attachments in your current bug-tracker. + +You may also need to increase the number of file handles a process is allowed +to hold open (as the migrator will create a file handle for each attachment +in your database). On Linux and simliar systems, you can do this as root +by typing C before running your script. \ No newline at end of file diff --git a/template/en/default/global/messages.html.tmpl b/template/en/default/global/messages.html.tmpl index edbf080de..bc6ca5601 100644 --- a/template/en/default/global/messages.html.tmpl +++ b/template/en/default/global/messages.html.tmpl @@ -491,6 +491,44 @@ [% title = "$terms.Bugzilla Login Changed" %] Your [% terms.Bugzilla %] login has been changed. + [% ELSIF message_tag == "migrate_component_created" %] + Component created: [% comp.name FILTER html %] + (in [% product.name FILTER html %]) + + [% ELSIF message_tag == "migrate_creating_bugs" %] + Creating [% terms.bugs %]... + + [% ELSIF message_tag == "migrate_field_created" %] + New custom field: [% field.description FILTER html %] + ([% field.name FILTER html %]) + + [% ELSIF message_tag == "migrate_product_created" %] + Product created: [% created.name FILTER html %] + + [% ELSIF message_tag == "migrate_reading_bugs" %] + Reading [% terms.bugs %]... + + [% ELSIF message_tag == "migrate_reading_products" %] + Reading products... + + [% ELSIF message_tag == "migrate_reading_users" %] + Reading users... + + [% ELSIF message_tag == "migrate_translating_bugs" %] + Converting [% terms.bug %] values to be appropriate for + [%+ terms.Bugzilla %]... + + [% ELSIF message_tag == "migrate_user_created" %] + User created: [% created.email FILTER html %] + [% IF password %] Password: [% password FILTER html %][% END %] + + [% ELSIF message_tag == "migrate_value_created" %] + [% IF product.defined %] + [% product.name FILTER html %] + [% END %] + [%+ field_descs.${field.name} FILTER html %] value + created: [% value FILTER html %] + [% ELSIF message_tag == "milestone_created" %] [% title = "Milestone Created" %] The milestone [% milestone.name FILTER html %] has been created. diff --git a/template/en/default/global/user-error.html.tmpl b/template/en/default/global/user-error.html.tmpl index 3783e523b..230f029b5 100644 --- a/template/en/default/global/user-error.html.tmpl +++ b/template/en/default/global/user-error.html.tmpl @@ -989,6 +989,15 @@ You can't use %user% without being logged in, because %user% refers to your login name, which we don't know. + [% ELSIF error == "migrate_config_created" %] + The file [% file FILTER html %] contains configuration + variables that must be set before continuing with the migration. + + [% ELSIF error == "migrate_from_invalid" %] + '[% from FILTER html %]' is not a valid type of [% terms.bug %]-tracker + to migrate from. See the contents of the B[% %]ugzilla/Migrate/ + directory for a list of valid [% terms.bug %]-trackers. + [% ELSIF error == "milestone_already_exists" %] [% title = "Milestone Already Exists" %] [% admindocslinks = {'products.html' => 'Administering products', diff --git a/template/en/default/setup/strings.txt.pl b/template/en/default/setup/strings.txt.pl index bbccf6339..2a8e993e7 100644 --- a/template/en/default/setup/strings.txt.pl +++ b/template/en/default/setup/strings.txt.pl @@ -42,7 +42,7 @@ EOT commands_optional => 'COMMANDS TO INSTALL OPTIONAL MODULES:', commands_required => < 'done.', -- cgit v1.2.3-24-g4f1b