From 35f1ce88e12f8eee0d47fdc69d38cd7a4b12732d Mon Sep 17 00:00:00 2001 From: "lpsolit%gmail.com" <> Date: Thu, 4 Aug 2005 18:51:22 +0000 Subject: Bug 126266: Use UTF-8 (Unicode) charset encoding for pages and email for NEW installations - Patch by byron jones (glob) r=wurblzap a=justdave --- Bugzilla/BugMail.pm | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++--- Bugzilla/CGI.pm | 4 +-- Bugzilla/Util.pm | 11 +++++- 3 files changed, 107 insertions(+), 7 deletions(-) (limited to 'Bugzilla') diff --git a/Bugzilla/BugMail.pm b/Bugzilla/BugMail.pm index 487979c24..6bbebfa7c 100644 --- a/Bugzilla/BugMail.pm +++ b/Bugzilla/BugMail.pm @@ -26,6 +26,7 @@ # Bradley Baetz # J. Paul Reed # Gervase Markham +# Byron Jones use strict; @@ -47,6 +48,10 @@ use Date::Parse; use Date::Format; use Mail::Mailer; use Mail::Header; +use MIME::Base64; +use MIME::QuotedPrint; +use MIME::Parser; +use Mail::Address; # We need these strings for the X-Bugzilla-Reasons header # Note: this hash uses "," rather than "=>" to avoid auto-quoting of the LHS. @@ -619,16 +624,102 @@ sub MessageToMTA ($) { $Mail::Mailer::testfile::config{outfile} = "$datadir/mailer.testfile"; } - $msg =~ /(.*?)\n\n(.*)/ms; - my @header_lines = split(/\n/, $1); - my $body = $2; + my ($header, $body) = $msg =~ /(.*?\n)\n(.*)/s ? ($1, $2) : ('', $msg); + my $headers; + + if (Param('utf8') and (!is_7bit_clean($header) or !is_7bit_clean($body))) { + ($headers, $body) = encode_message($header, $body); + } else { + my @header_lines = split(/\n/, $header); + $headers = new Mail::Header \@header_lines, Modify => 0; + } - my $headers = new Mail::Header \@header_lines, Modify => 0; $mailer->open($headers->header_hashref); print $mailer $body; $mailer->close; } +sub encode_qp_words($) { + my ($line) = (@_); + my @encoded; + foreach my $word (split / /, $line) { + if (!is_7bit_clean($word)) { + push @encoded, '=?UTF-8?Q?_' . encode_qp($word, '') . '?='; + } else { + push @encoded, $word; + } + } + return join(' ', @encoded); +} + +sub encode_message($$) { + my ($header, $body) = @_; + + # read header into MIME::Entity + + my $parser = MIME::Parser->new; + $parser->output_to_core(1); + $parser->tmp_to_core(1); + my $entity = $parser->parse_data($header); + my $head = $entity->head; + + # set charset to UTF-8 + + $head->mime_attr('Content-Type' => 'text/plain') + unless defined $head->mime_attr('content-type'); + $head->mime_attr('Content-Type.charset' => 'UTF-8'); + + # encode the subject + + my $subject = $head->get('subject'); + if (defined $subject && !is_7bit_clean($subject)) { + $subject =~ s/[\r\n]+$//; + $head->replace('subject', encode_qp_words($subject)); + } + + # encode addresses + + foreach my $field (qw(from to cc reply-to sender errors-to)) { + my $high = $head->count($field) - 1; + foreach my $index (0..$high) { + my $value = $head->get($field, $index); + my @addresses; + my $changed = 0; + foreach my $addr (Mail::Address->parse($value)) { + my $phrase = $addr->phrase; + if (is_7bit_clean($phrase)) { + push @addresses, $addr->format; + } else { + push @addresses, encode_qp_phrase($phrase) . + ' <' . $addr->address . '>'; + $changed = 1; + } + } + $changed && $head->replace($field, join(', ', @addresses), $index); + } + } + + # process the body + + if (!is_7bit_clean($body)) { + # count number of 7-bit chars, and use quoted-printable if more + # than half the message is 7-bit clean + my $count = ($body =~ tr/\x20-\x7E\x0A\x0D//); + if ($count > length($body) / 2) { + $head->replace('Content-Transfer-Encoding', 'quoted-printable'); + $body = encode_qp($body); + } else { + $head->replace('Content-Transfer-Encoding', 'base64'); + $body = encode_base64($body); + } + } + + # done + + $head->fold(75); + return ($head, $body); +} + # Performs substitutions for sending out email with variables in it, # or for inserting a parameter into some other string. # diff --git a/Bugzilla/CGI.pm b/Bugzilla/CGI.pm index c2d61780f..f516dd5c6 100644 --- a/Bugzilla/CGI.pm +++ b/Bugzilla/CGI.pm @@ -60,8 +60,8 @@ sub new { # Make sure our outgoing cookie list is empty on each invocation $self->{Bugzilla_cookie_list} = []; - # Make sure that we don't send any charset headers - $self->charset(''); + # Send appropriate charset + $self->charset(Param('utf8') ? 'UTF-8' : ''); # Redirect to SSL if required if (Param('sslbase') ne '' and Param('ssl') eq 'always') { diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm index b694de752..43100b160 100644 --- a/Bugzilla/Util.pm +++ b/Bugzilla/Util.pm @@ -38,7 +38,7 @@ use base qw(Exporter); diff_arrays diff_strings trim wrap_comment find_wrap_point format_time format_time_decimal - file_mod_time + file_mod_time is_7bit_clean bz_crypt check_email_syntax); use Bugzilla::Config; @@ -374,6 +374,10 @@ sub ValidateDate { } } +sub is_7bit_clean { + return $_[0] !~ /[^\x20-\x7E\x0A\x0D]/; +} + 1; __END__ @@ -597,6 +601,11 @@ Search for a comma, a whitespace or a hyphen to split $string, within the first $maxpos characters. If none of them is found, just split $string at $maxpos. The search starts at $maxpos and goes back to the beginning of the string. +=item C + +Returns true is the string contains only 7-bit characters (ASCII 32 through 126, +ASCII 10 (LineFeed) and ASCII 13 (Carrage Return). + =back =head2 Formatting Time -- cgit v1.2.3-24-g4f1b