From 35f1ce88e12f8eee0d47fdc69d38cd7a4b12732d Mon Sep 17 00:00:00 2001 From: "lpsolit%gmail.com" <> Date: Thu, 4 Aug 2005 18:51:22 +0000 Subject: Bug 126266: Use UTF-8 (Unicode) charset encoding for pages and email for NEW installations - Patch by byron jones (glob) r=wurblzap a=justdave --- Bugzilla/BugMail.pm | 99 ++++++++++++++++++++++++- Bugzilla/CGI.pm | 4 +- Bugzilla/Util.pm | 11 ++- checksetup.pl | 30 +++++++- defparams.pl | 11 +++ template/en/default/bug/show.xml.tmpl | 2 +- template/en/default/config.rdf.tmpl | 2 +- template/en/default/list/list.rdf.tmpl | 2 +- template/en/default/list/list.rss.tmpl | 2 +- template/en/default/reports/duplicates.rdf.tmpl | 2 +- 10 files changed, 152 insertions(+), 13 deletions(-) diff --git a/Bugzilla/BugMail.pm b/Bugzilla/BugMail.pm index 487979c24..6bbebfa7c 100644 --- a/Bugzilla/BugMail.pm +++ b/Bugzilla/BugMail.pm @@ -26,6 +26,7 @@ # Bradley Baetz # J. Paul Reed # Gervase Markham +# Byron Jones use strict; @@ -47,6 +48,10 @@ use Date::Parse; use Date::Format; use Mail::Mailer; use Mail::Header; +use MIME::Base64; +use MIME::QuotedPrint; +use MIME::Parser; +use Mail::Address; # We need these strings for the X-Bugzilla-Reasons header # Note: this hash uses "," rather than "=>" to avoid auto-quoting of the LHS. @@ -619,16 +624,102 @@ sub MessageToMTA ($) { $Mail::Mailer::testfile::config{outfile} = "$datadir/mailer.testfile"; } - $msg =~ /(.*?)\n\n(.*)/ms; - my @header_lines = split(/\n/, $1); - my $body = $2; + my ($header, $body) = $msg =~ /(.*?\n)\n(.*)/s ? ($1, $2) : ('', $msg); + my $headers; + + if (Param('utf8') and (!is_7bit_clean($header) or !is_7bit_clean($body))) { + ($headers, $body) = encode_message($header, $body); + } else { + my @header_lines = split(/\n/, $header); + $headers = new Mail::Header \@header_lines, Modify => 0; + } - my $headers = new Mail::Header \@header_lines, Modify => 0; $mailer->open($headers->header_hashref); print $mailer $body; $mailer->close; } +sub encode_qp_words($) { + my ($line) = (@_); + my @encoded; + foreach my $word (split / /, $line) { + if (!is_7bit_clean($word)) { + push @encoded, '=?UTF-8?Q?_' . encode_qp($word, '') . '?='; + } else { + push @encoded, $word; + } + } + return join(' ', @encoded); +} + +sub encode_message($$) { + my ($header, $body) = @_; + + # read header into MIME::Entity + + my $parser = MIME::Parser->new; + $parser->output_to_core(1); + $parser->tmp_to_core(1); + my $entity = $parser->parse_data($header); + my $head = $entity->head; + + # set charset to UTF-8 + + $head->mime_attr('Content-Type' => 'text/plain') + unless defined $head->mime_attr('content-type'); + $head->mime_attr('Content-Type.charset' => 'UTF-8'); + + # encode the subject + + my $subject = $head->get('subject'); + if (defined $subject && !is_7bit_clean($subject)) { + $subject =~ s/[\r\n]+$//; + $head->replace('subject', encode_qp_words($subject)); + } + + # encode addresses + + foreach my $field (qw(from to cc reply-to sender errors-to)) { + my $high = $head->count($field) - 1; + foreach my $index (0..$high) { + my $value = $head->get($field, $index); + my @addresses; + my $changed = 0; + foreach my $addr (Mail::Address->parse($value)) { + my $phrase = $addr->phrase; + if (is_7bit_clean($phrase)) { + push @addresses, $addr->format; + } else { + push @addresses, encode_qp_phrase($phrase) . + ' <' . $addr->address . '>'; + $changed = 1; + } + } + $changed && $head->replace($field, join(', ', @addresses), $index); + } + } + + # process the body + + if (!is_7bit_clean($body)) { + # count number of 7-bit chars, and use quoted-printable if more + # than half the message is 7-bit clean + my $count = ($body =~ tr/\x20-\x7E\x0A\x0D//); + if ($count > length($body) / 2) { + $head->replace('Content-Transfer-Encoding', 'quoted-printable'); + $body = encode_qp($body); + } else { + $head->replace('Content-Transfer-Encoding', 'base64'); + $body = encode_base64($body); + } + } + + # done + + $head->fold(75); + return ($head, $body); +} + # Performs substitutions for sending out email with variables in it, # or for inserting a parameter into some other string. # diff --git a/Bugzilla/CGI.pm b/Bugzilla/CGI.pm index c2d61780f..f516dd5c6 100644 --- a/Bugzilla/CGI.pm +++ b/Bugzilla/CGI.pm @@ -60,8 +60,8 @@ sub new { # Make sure our outgoing cookie list is empty on each invocation $self->{Bugzilla_cookie_list} = []; - # Make sure that we don't send any charset headers - $self->charset(''); + # Send appropriate charset + $self->charset(Param('utf8') ? 'UTF-8' : ''); # Redirect to SSL if required if (Param('sslbase') ne '' and Param('ssl') eq 'always') { diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm index b694de752..43100b160 100644 --- a/Bugzilla/Util.pm +++ b/Bugzilla/Util.pm @@ -38,7 +38,7 @@ use base qw(Exporter); diff_arrays diff_strings trim wrap_comment find_wrap_point format_time format_time_decimal - file_mod_time + file_mod_time is_7bit_clean bz_crypt check_email_syntax); use Bugzilla::Config; @@ -374,6 +374,10 @@ sub ValidateDate { } } +sub is_7bit_clean { + return $_[0] !~ /[^\x20-\x7E\x0A\x0D]/; +} + 1; __END__ @@ -597,6 +601,11 @@ Search for a comma, a whitespace or a hyphen to split $string, within the first $maxpos characters. If none of them is found, just split $string at $maxpos. The search starts at $maxpos and goes back to the beginning of the string. +=item C + +Returns true is the string contains only 7-bit characters (ASCII 32 through 126, +ASCII 10 (LineFeed) and ASCII 13 (Carrage Return). + =back =head2 Formatting Time diff --git a/checksetup.pl b/checksetup.pl index f3a332870..311f8e3f5 100755 --- a/checksetup.pl +++ b/checksetup.pl @@ -317,7 +317,15 @@ my $modules = [ }, { name => 'Mail::Mailer', - version => '1.65' + version => '1.67' + }, + { + name => 'MIME::Base64', + version => $^O =~ /MSWin32/i ? '3.01' : '3.03' + }, + { + name => 'MIME::Tools', + version => '5.417' }, { name => 'Storable', @@ -339,6 +347,7 @@ my %ppm_modules = ( 'GD::Graph' => 'GDGraph', 'GD::Text::Align' => 'GDTextUtil', 'Mail::Mailer' => 'MailTools', + 'MIME::Tools' => 'MIME-Tools', ); sub install_command { @@ -1142,6 +1151,10 @@ END # Just to be sure ... unlink "$datadir/versioncache"; +# Check for a new install + +my $newinstall = !-e "$datadir/params"; + # Remove parameters from the params file that no longer exist in Bugzilla, # and set the defaults for new ones @@ -1185,6 +1198,11 @@ if ($^O =~ /MSWin32/i && Param('mail_delivery_method') eq 'sendmail') { SetParam('smtpserver', $smtp); } +# Enable UTF-8 on new installs +if ($newinstall) { + SetParam('utf8', 1); +} + # WriteParams will only write out still-valid entries WriteParams(); @@ -4211,6 +4229,9 @@ if ($sth->rows == 0) { if ($admin_create) { + require Bugzilla::Util; + import Bugzilla::Util 'is_7bit_clean'; + while( $realname eq "" ) { print "Enter the real name of the administrator: "; $realname = $answer{'ADMIN_REALNAME'} @@ -4220,6 +4241,13 @@ if ($sth->rows == 0) { if(! $realname ) { print "\nReally. We need a full name.\n"; } + if(! is_7bit_clean($realname)) { + print "\nSorry, but at this stage the real name can only " . + "contain standard English\ncharacters. Once Bugzilla " . + "has been installed, you can use the 'Prefs' page\nto " . + "update the real name.\n"; + $realname = ''; + } } # trap a few interrupts so we can fix the echo if we get aborted. diff --git a/defparams.pl b/defparams.pl index 49448b9d9..d8a7b7e38 100644 --- a/defparams.pl +++ b/defparams.pl @@ -366,6 +366,17 @@ sub find_languages { checker => \&check_languages }, + { + name => 'utf8', + desc => 'Use UTF-8 (Unicode) encoding for all text in Bugzilla. New ' . + 'installations should set this to true to avoid character encoding ' . + 'problems. Existing databases should set this to true only after ' . + 'the data has been converted from existing legacy character ' . + 'encodings to UTF-8.', + type => 'b', + default => '0', + }, + { name => 'cookiedomain', desc => 'The domain for Bugzilla cookies. Normally blank. ' . diff --git a/template/en/default/bug/show.xml.tmpl b/template/en/default/bug/show.xml.tmpl index 6c950a2bf..222204936 100644 --- a/template/en/default/bug/show.xml.tmpl +++ b/template/en/default/bug/show.xml.tmpl @@ -19,7 +19,7 @@ # Contributor(s): Bradley Baetz # #%] - + #%] - + diff --git a/template/en/default/list/list.rdf.tmpl b/template/en/default/list/list.rdf.tmpl index 39a2350f1..06376aca9 100644 --- a/template/en/default/list/list.rdf.tmpl +++ b/template/en/default/list/list.rdf.tmpl @@ -19,7 +19,7 @@ # Contributor(s): Myk Melez #%] - + + #%] - +