1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
|
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# This Source Code Form is "Incompatible With Secondary Licenses", as
# defined by the Mozilla Public License, v. 2.0.
package Bugzilla::Migrate::Gnats;
use 5.14.0;
use strict;
use warnings;
use parent qw(Bugzilla::Migrate);
use Bugzilla::Constants;
use Bugzilla::Install::Util qw(indicate_progress);
use Bugzilla::Util qw(format_time trim generate_random_password);
use Email::Address;
use Email::MIME;
use File::Basename;
use IO::File;
use List::MoreUtils qw(firstidx);
use List::Util qw(first);
use constant REQUIRED_MODULES => [
{
package => 'Email-Simple-FromHandle',
module => 'Email::Simple::FromHandle',
# This version added seekable handles.
version => 0.050,
},
];
use constant FIELD_MAP => {
'Number' => 'bug_id',
'Category' => 'product',
'Synopsis' => 'short_desc',
'Responsible' => 'assigned_to',
'State' => 'bug_status',
'Class' => 'cf_type',
'Classification' => '',
'Originator' => 'reporter',
'Arrival-Date' => 'creation_ts',
'Last-Modified' => 'delta_ts',
'Release' => 'version',
'Severity' => 'bug_severity',
'Description' => 'comment',
};
use constant VALUE_MAP => {
bug_severity => {
'serious' => 'major',
'cosmetic' => 'trivial',
'new-feature' => 'enhancement',
'non-critical' => 'normal',
},
bug_status => {
'open' => 'CONFIRMED',
'analyzed' => 'IN_PROGRESS',
'suspended' => 'RESOLVED',
'feedback' => 'RESOLVED',
'released' => 'VERIFIED',
},
bug_status_resolution => {
'feedback' => 'FIXED',
'released' => 'FIXED',
'closed' => 'FIXED',
'suspended' => 'LATER',
},
priority => {
'medium' => 'Normal',
},
};
use constant GNATS_CONFIG_VARS => (
{
name => 'gnats_path',
default => '/var/lib/gnats',
desc => <<END,
# The path to the directory that contains the GNATS database.
END
},
{
name => 'default_email_domain',
default => 'example.com',
desc => <<'END',
# Some GNATS users do not have full email addresses, but Bugzilla requires
# every user to have an email address. What domain should be appended to
# usernames that don't have emails, to make them into email addresses?
# (For example, if you leave this at the default, "unknown" would become
# "unknown@example.com".)
END
},
{
name => 'component_name',
default => 'General',
desc => <<'END',
# GNATS has only "Category" to classify bugs. However, Bugzilla has a
# multi-level system of Products that contain Components. When importing
# GNATS categories, they become a Product with one Component. What should
# the name of that Component be?
END
},
{
name => 'version_regex',
default => '',
desc => <<'END',
# In GNATS, the "version" field can contain almost anything. However, in
# Bugzilla, it's a drop-down, so you don't want too many choices in there.
# If you specify a regular expression here, versions will be tested against
# this regular expression, and if they match, the first match (the first set
# of parentheses in the regular expression, also called "$1") will be used
# as the version value for the bug instead of the full version value specified
# in GNATS.
END
},
{
name => 'default_originator',
default => 'gnats-admin',
desc => <<'END',
# Sometimes, a PR has no valid Originator, so we fall back to the From
# header of the email. If the From header also isn't a valid username
# (is just a name with spaces in it--we can't convert that to an email
# address) then this username (which can either be a GNATS username or an
# email address) will be considered to be the Originator of the PR.
END
}
);
sub CONFIG_VARS {
my $self = shift;
my @vars = (GNATS_CONFIG_VARS, $self->SUPER::CONFIG_VARS);
my $field_map = first { $_->{name} eq 'translate_fields' } @vars;
$field_map->{default} = FIELD_MAP;
my $value_map = first { $_->{name} eq 'translate_values' } @vars;
$value_map->{default} = VALUE_MAP;
return @vars;
}
# Directories that aren't projects, or that we shouldn't be parsing
use constant SKIP_DIRECTORIES => qw(
gnats-adm
gnats-queue
pending
);
use constant NON_COMMENT_FIELDS => qw(
Audit-Trail
Closed-Date
Confidential
Unformatted
attachments
);
# Certain fields can contain things that look like fields in them,
# because they might contain quoted emails. To avoid mis-parsing,
# we list out here the exact order of fields at the end of a PR
# and wait for the next field to consider that we actually have
# a field to parse.
use constant END_FIELD_ORDER => qw(
Description
How-To-Repeat
Fix
Release-Note
Audit-Trail
Unformatted
);
use constant CUSTOM_FIELDS => {
cf_type => {
type => FIELD_TYPE_SINGLE_SELECT,
description => 'Type',
},
};
use constant FIELD_REGEX => qr/^>(\S+):\s*(.*)$/;
# Used for bugs that have no Synopsis.
use constant NO_SUBJECT => "(no subject)";
# This is the divider that GNATS uses between attachments in its database
# files. It's missign two hyphens at the beginning because MIME Emails use
# -- to start boundaries.
use constant GNATS_BOUNDARY => '----gnatsweb-attachment----';
use constant LONG_VERSION_LENGTH => 32;
#########
# Hooks #
#########
sub before_insert {
my $self = shift;
# gnats_id isn't a valid User::create field, and we don't need it
# anymore now.
delete $_->{gnats_id} foreach @{ $self->users };
# Grab a version out of a bug for each product, so that there is a
# valid "version" argument for Bugzilla::Product->create.
foreach my $product (@{ $self->products }) {
my $bug = first { $_->{product} eq $product->{name} and $_->{version} }
@{ $self->bugs };
if (defined $bug) {
$product->{version} = $bug->{version};
}
else {
$product->{version} = 'unspecified';
}
}
}
#########
# Users #
#########
sub _read_users {
my $self = shift;
my $path = $self->config('gnats_path');
my $file = "$path/gnats-adm/responsible";
$self->debug("Reading users from $file");
my $default_domain = $self->config('default_email_domain');
open(my $users_fh, '<', $file) || die "$file: $!";
my @users;
foreach my $line (<$users_fh>) {
$line = trim($line);
next if $line =~ /^#/;
my ($id, $name, $email) = split(':', $line, 3);
$email ||= "$id\@$default_domain";
# We can't call our own translate_value, because that depends on
# the existence of user_map, which doesn't exist until after
# this method. However, we still want to translate any users found.
$email = $self->SUPER::translate_value('user', $email);
push(@users, { realname => $name, login_name => $email,
gnats_id => $id });
}
close($users_fh);
return \@users;
}
sub user_map {
my $self = shift;
$self->{user_map} ||= { map { $_->{gnats_id} => $_->{login_name} }
@{ $self->users } };
return $self->{user_map};
}
sub add_user {
my ($self, $id, $email) = @_;
return if defined $self->user_map->{$id};
$self->user_map->{$id} = $email;
push(@{ $self->users }, { login_name => $email, gnats_id => $id });
}
sub user_to_email {
my ($self, $value) = @_;
if (defined $self->user_map->{$value}) {
$value = $self->user_map->{$value};
}
elsif ($value !~ /@/) {
my $domain = $self->config('default_email_domain');
$value = "$value\@$domain";
}
return $value;
}
############
# Products #
############
sub _read_products {
my $self = shift;
my $path = $self->config('gnats_path');
my $file = "$path/gnats-adm/categories";
$self->debug("Reading categories from $file");
open(my $categories_fh, '<', $file) || die "$file: $!";
my @products;
foreach my $line (<$categories_fh>) {
$line = trim($line);
next if $line =~ /^#/;
my ($name, $description, $assigned_to, $cc) = split(':', $line, 4);
my %product = ( name => $name, description => $description );
my @initial_cc = split(',', $cc);
@initial_cc = @{ $self->translate_value('user', \@initial_cc) };
$assigned_to = $self->translate_value('user', $assigned_to);
my %component = ( name => $self->config('component_name'),
description => $description,
initialowner => $assigned_to,
initial_cc => \@initial_cc );
$product{components} = [\%component];
push(@products, \%product);
}
close($categories_fh);
return \@products;
}
################
# Reading Bugs #
################
sub _read_bugs {
my $self = shift;
my $path = $self->config('gnats_path');
my @directories = glob("$path/*");
my @bugs;
foreach my $directory (@directories) {
next if !-d $directory;
my $name = basename($directory);
next if grep($_ eq $name, SKIP_DIRECTORIES);
push(@bugs, @{ $self->_parse_project($directory) });
}
@bugs = sort { $a->{Number} <=> $b->{Number} } @bugs;
return \@bugs;
}
sub _parse_project {
my ($self, $directory) = @_;
my @files = glob("$directory/*");
$self->debug("Reading Project: $directory");
# Sometimes other files get into gnats directories.
@files = grep { basename($_) =~ /^\d+$/ } @files;
my @bugs;
my $count = 1;
my $total = scalar @files;
print basename($directory) . ":\n";
foreach my $file (@files) {
push(@bugs, $self->_parse_bug_file($file));
if (!$self->verbose) {
indicate_progress({ current => $count++, every => 5,
total => $total });
}
}
return \@bugs;
}
sub _parse_bug_file {
my ($self, $file) = @_;
$self->debug("Reading $file");
open(my $fh, "<", $file) || die "$file: $!";
my $email = Email::Simple::FromHandle->new($fh);
my $fields = $self->_get_gnats_field_data($email);
# We parse attachments here instead of during translate_bug,
# because otherwise we'd be taking up huge amounts of memory storing
# all the raw attachment data in memory.
$fields->{attachments} = $self->_parse_attachments($fields);
close($fh);
return $fields;
}
sub _get_gnats_field_data {
my ($self, $email) = @_;
my ($current_field, @value_lines, %fields);
$email->reset_handle();
my $handle = $email->handle;
foreach my $line (<$handle>) {
# If this line starts a field name
if ($line =~ FIELD_REGEX) {
my ($new_field, $rest_of_line) = ($1, $2);
# If this is one of the last few PR fields, then make sure
# that we're getting our fields in the right order.
my $new_field_valid = 1;
my $search_for = $current_field || '';
my $current_field_pos = firstidx { $_ eq $search_for }
END_FIELD_ORDER;
if ($current_field_pos > -1) {
my $new_field_pos = firstidx { $_ eq $new_field }
END_FIELD_ORDER;
# We accept any field, as long as it's later than this one.
$new_field_valid = $new_field_pos > $current_field_pos ? 1 : 0;
}
if ($new_field_valid) {
if ($current_field) {
$fields{$current_field} = _handle_lines(\@value_lines);
@value_lines = ();
}
$current_field = $new_field;
$line = $rest_of_line;
}
}
push(@value_lines, $line) if defined $line;
}
$fields{$current_field} = _handle_lines(\@value_lines);
$fields{cc} = [$email->header('Cc')] if $email->header('Cc');
# If the Originator is invalid and we don't have a translation for it,
# use the From header instead.
my $originator = $self->translate_value('reporter', $fields{Originator},
{ check_only => 1 });
if ($originator !~ Bugzilla->params->{emailregexp}) {
# We use the raw header sometimes, because it looks like "From: user"
# which Email::Address won't parse but we can still use.
my $address = $email->header('From');
my ($parsed) = Email::Address->parse($address);
if ($parsed) {
$address = $parsed->address;
}
if ($address) {
$self->debug(
"PR $fields{Number} had an Originator that was not a valid"
. " user ($fields{Originator}). Using From ($address)"
. " instead.\n");
my $address_email = $self->translate_value('reporter', $address,
{ check_only => 1 });
if ($address_email !~ Bugzilla->params->{emailregexp}) {
$self->debug(" From was also invalid, using default_originator.\n");
$address = $self->config('default_originator');
}
$fields{Originator} = $address;
}
}
$self->debug(\%fields, 3);
return \%fields;
}
sub _handle_lines {
my ($lines) = @_;
my $value = join('', @$lines);
$value =~ s/\s+$//;
return $value;
}
####################
# Translating Bugs #
####################
sub translate_bug {
my ($self, $fields) = @_;
my ($bug, $other_fields) = $self->SUPER::translate_bug($fields);
$bug->{attachments} = delete $other_fields->{attachments};
if (defined $other_fields->{_add_to_comment}) {
$bug->{comment} .= delete $other_fields->{_add_to_comment};
}
my ($changes, $extra_comment) =
$self->_parse_audit_trail($bug, $other_fields->{'Audit-Trail'});
my @comments;
foreach my $change (@$changes) {
if (exists $change->{comment}) {
push(@comments, {
thetext => $change->{comment},
who => $change->{who},
bug_when => $change->{bug_when} });
delete $change->{comment};
}
}
$bug->{history} = $changes;
if (trim($extra_comment)) {
push(@comments, { thetext => $extra_comment, who => $bug->{reporter},
bug_when => $bug->{delta_ts} || $bug->{creation_ts} });
}
$bug->{comments} = \@comments;
$bug->{component} = $self->config('component_name');
if (!$bug->{short_desc}) {
$bug->{short_desc} = NO_SUBJECT;
}
foreach my $attachment (@{ $bug->{attachments} || [] }) {
$attachment->{submitter} = $bug->{reporter};
$attachment->{creation_ts} = $bug->{creation_ts};
}
$self->debug($bug, 3);
return $bug;
}
sub _parse_audit_trail {
my ($self, $bug, $audit_trail) = @_;
return [] if !trim($audit_trail);
$self->debug(" Parsing audit trail...", 2);
if ($audit_trail !~ /^\S+-Changed-\S+:/ms) {
# This is just a comment from the bug's creator.
$self->debug(" Audit trail is just a comment.", 2);
return ([], $audit_trail);
}
my (@changes, %current_data, $current_column, $on_why);
my $extra_comment = '';
my $current_field;
my @all_lines = split("\n", $audit_trail);
foreach my $line (@all_lines) {
# GNATS history looks like:
# Status-Changed-From-To: open->closed
# Status-Changed-By: jack
# Status-Changed-When: Mon May 12 14:46:59 2003
# Status-Changed-Why:
# This is some comment here about the change.
if ($line =~ /^(\S+)-Changed-(\S+):(.*)/) {
my ($field, $column, $value) = ($1, $2, $3);
my $bz_field = $self->translate_field($field);
# If it's not a field we're importing, we don't care about
# its history.
next if !$bz_field;
# GNATS doesn't track values for description changes,
# unfortunately, and that's the only information we'd be able to
# use in Bugzilla for the audit trail on that field.
next if $bz_field eq 'comment';
$current_field = $bz_field if !$current_field;
if ($bz_field ne $current_field) {
$self->_store_audit_change(
\@changes, $current_field, \%current_data);
%current_data = ();
$current_field = $bz_field;
}
$value = trim($value);
$self->debug(" $bz_field $column: $value", 3);
if ($column eq 'From-To') {
my ($from, $to) = split('->', $value, 2);
# Sometimes there's just a - instead of a -> between the values.
if (!defined($to)) {
($from, $to) = split('-', $value, 2);
}
$current_data{added} = $to;
$current_data{removed} = $from;
}
elsif ($column eq 'By') {
my $email = $self->translate_value('user', $value);
# Sometimes we hit users in the audit trail that we haven't
# seen anywhere else.
$current_data{who} = $email;
}
elsif ($column eq 'When') {
$current_data{bug_when} = $self->parse_date($value);
}
if ($column eq 'Why') {
$value = '' if !defined $value;
$current_data{comment} = $value;
$on_why = 1;
}
else {
$on_why = 0;
}
}
elsif ($on_why) {
# "Why" lines are indented four characters.
$line =~ s/^\s{4}//;
$current_data{comment} .= "$line\n";
}
else {
$self->debug(
"Extra Audit-Trail line on $bug->{product} $bug->{bug_id}:"
. " $line\n", 2);
$extra_comment .= "$line\n";
}
}
$self->_store_audit_change(\@changes, $current_field, \%current_data);
return (\@changes, $extra_comment);
}
sub _store_audit_change {
my ($self, $changes, $old_field, $current_data) = @_;
$current_data->{field} = $old_field;
$current_data->{removed} =
$self->translate_value($old_field, $current_data->{removed});
$current_data->{added} =
$self->translate_value($old_field, $current_data->{added});
push(@$changes, { %$current_data });
}
sub _parse_attachments {
my ($self, $fields) = @_;
my $unformatted = delete $fields->{'Unformatted'};
my $gnats_boundary = GNATS_BOUNDARY;
# A sanity checker to make sure that we're parsing attachments right.
my $num_attachments = 0;
$num_attachments++ while ($unformatted =~ /\Q$gnats_boundary\E/g);
# Sometimes there's a GNATS_BOUNDARY that is on the same line as other data.
$unformatted =~ s/(\S\s*)\Q$gnats_boundary\E$/$1\n$gnats_boundary/mg;
# Often the "Unformatted" section starts with stuff before
# ----gnatsweb-attachment---- that isn't necessary.
$unformatted =~ s/^\s*From:.+?Reply-to:[^\n]+//s;
$unformatted = trim($unformatted);
return [] if !$unformatted;
$self->debug('Reading attachments...', 2);
my $boundary = generate_random_password(48);
$unformatted =~ s/\Q$gnats_boundary\E/--$boundary/g;
# Sometimes the whole Unformatted section is indented by exactly
# one space, and needs to be fixed.
if ($unformatted =~ /--\Q$boundary\E\n /) {
$unformatted =~ s/^ //mg;
}
$unformatted = <<END;
From: nobody
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="$boundary"
This is a multi-part message in MIME format.
--$boundary
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
$unformatted
--$boundary--
END
my $email = new Email::MIME(\$unformatted);
my @parts = $email->parts;
# Remove the fake body.
my $part1 = shift @parts;
if ($part1->body) {
$self->debug(" Additional Unformatted data found on "
. $fields->{Category} . " bug " . $fields->{Number});
$self->debug($part1->body, 3);
$fields->{_add_comment} .= "\n\nUnformatted:\n" . $part1->body;
}
my @attachments;
foreach my $part (@parts) {
$self->debug(' Parsing attachment: ' . $part->filename);
my $temp_fh = IO::File->new_tmpfile or die ("Can't create tempfile: $!");
$temp_fh->binmode;
print $temp_fh $part->body;
my $content_type = $part->content_type;
$content_type =~ s/; name=.+$//;
my $attachment = { filename => $part->filename,
description => $part->filename,
mimetype => $content_type,
data => $temp_fh };
$self->debug($attachment, 3);
push(@attachments, $attachment);
}
if (scalar(@attachments) ne $num_attachments) {
warn "WARNING: Expected $num_attachments attachments but got "
. scalar(@attachments) . "\n" ;
$self->debug($unformatted, 3);
}
return \@attachments;
}
sub translate_value {
my $self = shift;
my ($field, $value, $options) = @_;
my $original_value = $value;
$options ||= {};
if (!ref($value) and grep($_ eq $field, $self->USER_FIELDS)) {
if ($value =~ /(\S+\@\S+)/) {
$value = $1;
$value =~ s/^<//;
$value =~ s/>$//;
}
else {
# Sometimes names have extra stuff on the end like "(Somebody's Name)"
$value =~ s/\s+\(.+\)$//;
# Sometimes user fields look like "(user)" instead of just "user".
$value =~ s/^\((.+)\)$/$1/;
$value = trim($value);
}
}
if ($field eq 'version' and $value ne '') {
my $version_re = $self->config('version_regex');
if ($version_re and $value =~ $version_re) {
$value = $1;
}
# In the GNATS that I tested this with, there were many extremely long
# values for "version" that caused some import problems (they were
# longer than the max allowed version value). So if the version value
# is longer than 32 characters, pull out the first thing that looks
# like a version number.
elsif (length($value) > LONG_VERSION_LENGTH) {
$value =~ s/^.+?\b(\d[\w\.]+)\b.+$/$1/;
}
}
my @args = @_;
$args[1] = $value;
$value = $self->SUPER::translate_value(@args);
return $value if ref $value;
if (grep($_ eq $field, $self->USER_FIELDS)) {
my $from_value = $value;
$value = $self->user_to_email($value);
$args[1] = $value;
# If we got something new from user_to_email, do any necessary
# translation of it.
$value = $self->SUPER::translate_value(@args);
if (!$options->{check_only}) {
$self->add_user($from_value, $value);
}
}
return $value;
}
1;
=head1 B<Methods in need of POD>
=over
=item user_map
=item user_to_email
=item add_user
=item translate_value
=item before_insert
=item translate_bug
=item CONFIG_VARS
=back
|