summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlpsolit%gmail.com <>2005-08-04 20:51:22 +0200
committerlpsolit%gmail.com <>2005-08-04 20:51:22 +0200
commit35f1ce88e12f8eee0d47fdc69d38cd7a4b12732d (patch)
treefa6c7f8dd0e3cabefd256606fd988d6b88906058
parent1f3e3f23e9f52cc74b19fe184004b67441b10ee1 (diff)
downloadbugzilla-35f1ce88e12f8eee0d47fdc69d38cd7a4b12732d.tar.gz
bugzilla-35f1ce88e12f8eee0d47fdc69d38cd7a4b12732d.tar.xz
Bug 126266: Use UTF-8 (Unicode) charset encoding for pages and email for NEW installations - Patch by byron jones (glob) <bugzilla@glob.com.au> r=wurblzap a=justdave
-rw-r--r--Bugzilla/BugMail.pm99
-rw-r--r--Bugzilla/CGI.pm4
-rw-r--r--Bugzilla/Util.pm11
-rwxr-xr-xchecksetup.pl30
-rw-r--r--defparams.pl11
-rw-r--r--template/en/default/bug/show.xml.tmpl2
-rw-r--r--template/en/default/config.rdf.tmpl2
-rw-r--r--template/en/default/list/list.rdf.tmpl2
-rw-r--r--template/en/default/list/list.rss.tmpl2
-rw-r--r--template/en/default/reports/duplicates.rdf.tmpl2
10 files changed, 152 insertions, 13 deletions
diff --git a/Bugzilla/BugMail.pm b/Bugzilla/BugMail.pm
index 487979c24..6bbebfa7c 100644
--- a/Bugzilla/BugMail.pm
+++ b/Bugzilla/BugMail.pm
@@ -26,6 +26,7 @@
# Bradley Baetz <bbaetz@student.usyd.edu.au>
# J. Paul Reed <preed@sigkill.com>
# Gervase Markham <gerv@gerv.net>
+# Byron Jones <bugzilla@glob.com.au>
use strict;
@@ -47,6 +48,10 @@ use Date::Parse;
use Date::Format;
use Mail::Mailer;
use Mail::Header;
+use MIME::Base64;
+use MIME::QuotedPrint;
+use MIME::Parser;
+use Mail::Address;
# We need these strings for the X-Bugzilla-Reasons header
# Note: this hash uses "," rather than "=>" to avoid auto-quoting of the LHS.
@@ -619,16 +624,102 @@ sub MessageToMTA ($) {
$Mail::Mailer::testfile::config{outfile} = "$datadir/mailer.testfile";
}
- $msg =~ /(.*?)\n\n(.*)/ms;
- my @header_lines = split(/\n/, $1);
- my $body = $2;
+ my ($header, $body) = $msg =~ /(.*?\n)\n(.*)/s ? ($1, $2) : ('', $msg);
+ my $headers;
+
+ if (Param('utf8') and (!is_7bit_clean($header) or !is_7bit_clean($body))) {
+ ($headers, $body) = encode_message($header, $body);
+ } else {
+ my @header_lines = split(/\n/, $header);
+ $headers = new Mail::Header \@header_lines, Modify => 0;
+ }
- my $headers = new Mail::Header \@header_lines, Modify => 0;
$mailer->open($headers->header_hashref);
print $mailer $body;
$mailer->close;
}
+sub encode_qp_words($) {
+ my ($line) = (@_);
+ my @encoded;
+ foreach my $word (split / /, $line) {
+ if (!is_7bit_clean($word)) {
+ push @encoded, '=?UTF-8?Q?_' . encode_qp($word, '') . '?=';
+ } else {
+ push @encoded, $word;
+ }
+ }
+ return join(' ', @encoded);
+}
+
+sub encode_message($$) {
+ my ($header, $body) = @_;
+
+ # read header into MIME::Entity
+
+ my $parser = MIME::Parser->new;
+ $parser->output_to_core(1);
+ $parser->tmp_to_core(1);
+ my $entity = $parser->parse_data($header);
+ my $head = $entity->head;
+
+ # set charset to UTF-8
+
+ $head->mime_attr('Content-Type' => 'text/plain')
+ unless defined $head->mime_attr('content-type');
+ $head->mime_attr('Content-Type.charset' => 'UTF-8');
+
+ # encode the subject
+
+ my $subject = $head->get('subject');
+ if (defined $subject && !is_7bit_clean($subject)) {
+ $subject =~ s/[\r\n]+$//;
+ $head->replace('subject', encode_qp_words($subject));
+ }
+
+ # encode addresses
+
+ foreach my $field (qw(from to cc reply-to sender errors-to)) {
+ my $high = $head->count($field) - 1;
+ foreach my $index (0..$high) {
+ my $value = $head->get($field, $index);
+ my @addresses;
+ my $changed = 0;
+ foreach my $addr (Mail::Address->parse($value)) {
+ my $phrase = $addr->phrase;
+ if (is_7bit_clean($phrase)) {
+ push @addresses, $addr->format;
+ } else {
+ push @addresses, encode_qp_phrase($phrase) .
+ ' <' . $addr->address . '>';
+ $changed = 1;
+ }
+ }
+ $changed && $head->replace($field, join(', ', @addresses), $index);
+ }
+ }
+
+ # process the body
+
+ if (!is_7bit_clean($body)) {
+ # count number of 7-bit chars, and use quoted-printable if more
+ # than half the message is 7-bit clean
+ my $count = ($body =~ tr/\x20-\x7E\x0A\x0D//);
+ if ($count > length($body) / 2) {
+ $head->replace('Content-Transfer-Encoding', 'quoted-printable');
+ $body = encode_qp($body);
+ } else {
+ $head->replace('Content-Transfer-Encoding', 'base64');
+ $body = encode_base64($body);
+ }
+ }
+
+ # done
+
+ $head->fold(75);
+ return ($head, $body);
+}
+
# Performs substitutions for sending out email with variables in it,
# or for inserting a parameter into some other string.
#
diff --git a/Bugzilla/CGI.pm b/Bugzilla/CGI.pm
index c2d61780f..f516dd5c6 100644
--- a/Bugzilla/CGI.pm
+++ b/Bugzilla/CGI.pm
@@ -60,8 +60,8 @@ sub new {
# Make sure our outgoing cookie list is empty on each invocation
$self->{Bugzilla_cookie_list} = [];
- # Make sure that we don't send any charset headers
- $self->charset('');
+ # Send appropriate charset
+ $self->charset(Param('utf8') ? 'UTF-8' : '');
# Redirect to SSL if required
if (Param('sslbase') ne '' and Param('ssl') eq 'always') {
diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm
index b694de752..43100b160 100644
--- a/Bugzilla/Util.pm
+++ b/Bugzilla/Util.pm
@@ -38,7 +38,7 @@ use base qw(Exporter);
diff_arrays diff_strings
trim wrap_comment find_wrap_point
format_time format_time_decimal
- file_mod_time
+ file_mod_time is_7bit_clean
bz_crypt check_email_syntax);
use Bugzilla::Config;
@@ -374,6 +374,10 @@ sub ValidateDate {
}
}
+sub is_7bit_clean {
+ return $_[0] !~ /[^\x20-\x7E\x0A\x0D]/;
+}
+
1;
__END__
@@ -597,6 +601,11 @@ Search for a comma, a whitespace or a hyphen to split $string, within the first
$maxpos characters. If none of them is found, just split $string at $maxpos.
The search starts at $maxpos and goes back to the beginning of the string.
+=item C<is_7bit_clean($str)>
+
+Returns true is the string contains only 7-bit characters (ASCII 32 through 126,
+ASCII 10 (LineFeed) and ASCII 13 (Carrage Return).
+
=back
=head2 Formatting Time
diff --git a/checksetup.pl b/checksetup.pl
index f3a332870..311f8e3f5 100755
--- a/checksetup.pl
+++ b/checksetup.pl
@@ -317,7 +317,15 @@ my $modules = [
},
{
name => 'Mail::Mailer',
- version => '1.65'
+ version => '1.67'
+ },
+ {
+ name => 'MIME::Base64',
+ version => $^O =~ /MSWin32/i ? '3.01' : '3.03'
+ },
+ {
+ name => 'MIME::Tools',
+ version => '5.417'
},
{
name => 'Storable',
@@ -339,6 +347,7 @@ my %ppm_modules = (
'GD::Graph' => 'GDGraph',
'GD::Text::Align' => 'GDTextUtil',
'Mail::Mailer' => 'MailTools',
+ 'MIME::Tools' => 'MIME-Tools',
);
sub install_command {
@@ -1142,6 +1151,10 @@ END
# Just to be sure ...
unlink "$datadir/versioncache";
+# Check for a new install
+
+my $newinstall = !-e "$datadir/params";
+
# Remove parameters from the params file that no longer exist in Bugzilla,
# and set the defaults for new ones
@@ -1185,6 +1198,11 @@ if ($^O =~ /MSWin32/i && Param('mail_delivery_method') eq 'sendmail') {
SetParam('smtpserver', $smtp);
}
+# Enable UTF-8 on new installs
+if ($newinstall) {
+ SetParam('utf8', 1);
+}
+
# WriteParams will only write out still-valid entries
WriteParams();
@@ -4211,6 +4229,9 @@ if ($sth->rows == 0) {
if ($admin_create) {
+ require Bugzilla::Util;
+ import Bugzilla::Util 'is_7bit_clean';
+
while( $realname eq "" ) {
print "Enter the real name of the administrator: ";
$realname = $answer{'ADMIN_REALNAME'}
@@ -4220,6 +4241,13 @@ if ($sth->rows == 0) {
if(! $realname ) {
print "\nReally. We need a full name.\n";
}
+ if(! is_7bit_clean($realname)) {
+ print "\nSorry, but at this stage the real name can only " .
+ "contain standard English\ncharacters. Once Bugzilla " .
+ "has been installed, you can use the 'Prefs' page\nto " .
+ "update the real name.\n";
+ $realname = '';
+ }
}
# trap a few interrupts so we can fix the echo if we get aborted.
diff --git a/defparams.pl b/defparams.pl
index 49448b9d9..d8a7b7e38 100644
--- a/defparams.pl
+++ b/defparams.pl
@@ -367,6 +367,17 @@ sub find_languages {
},
{
+ name => 'utf8',
+ desc => 'Use UTF-8 (Unicode) encoding for all text in Bugzilla. New ' .
+ 'installations should set this to true to avoid character encoding ' .
+ 'problems. Existing databases should set this to true only after ' .
+ 'the data has been converted from existing legacy character ' .
+ 'encodings to UTF-8.',
+ type => 'b',
+ default => '0',
+ },
+
+ {
name => 'cookiedomain',
desc => 'The domain for Bugzilla cookies. Normally blank. ' .
'If your website is at "www.foo.com", setting this to ' .
diff --git a/template/en/default/bug/show.xml.tmpl b/template/en/default/bug/show.xml.tmpl
index 6c950a2bf..222204936 100644
--- a/template/en/default/bug/show.xml.tmpl
+++ b/template/en/default/bug/show.xml.tmpl
@@ -19,7 +19,7 @@
# Contributor(s): Bradley Baetz <bbaetz@student.usyd.edu.au>
#
#%]
-<?xml version="1.0" standalone="yes"?>
+<?xml version="1.0" [% IF Param('utf8') %]encoding="UTF-8" [% END %]standalone="yes" ?>
<!DOCTYPE bugzilla SYSTEM "[% Param('urlbase') %]bugzilla.dtd">
<bugzilla version="[% VERSION %]"
diff --git a/template/en/default/config.rdf.tmpl b/template/en/default/config.rdf.tmpl
index 884c26a0c..27a7ba3a0 100644
--- a/template/en/default/config.rdf.tmpl
+++ b/template/en/default/config.rdf.tmpl
@@ -19,7 +19,7 @@
# Contributor(s): Myk Melez <myk@mozilla.org>
#%]
-<?xml version="1.0"?>
+<?xml version="1.0"[% IF Param('utf8') %] encoding="UTF-8"[% END %]?>
<!-- Note: this interface is experimental and under development.
- We may and probably will make breaking changes to it in the future. -->
diff --git a/template/en/default/list/list.rdf.tmpl b/template/en/default/list/list.rdf.tmpl
index 39a2350f1..06376aca9 100644
--- a/template/en/default/list/list.rdf.tmpl
+++ b/template/en/default/list/list.rdf.tmpl
@@ -19,7 +19,7 @@
# Contributor(s): Myk Melez <myk@mozilla.org>
#%]
-<?xml version="1.0"?>
+<?xml version="1.0"[% IF Param('utf8') %] encoding="UTF-8"[% END %]?>
<!-- [% template_version %] -->
<RDF xmlns="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
diff --git a/template/en/default/list/list.rss.tmpl b/template/en/default/list/list.rss.tmpl
index 96d7a903d..f1d6c4745 100644
--- a/template/en/default/list/list.rss.tmpl
+++ b/template/en/default/list/list.rss.tmpl
@@ -28,7 +28,7 @@
[% DEFAULT title = "$terms.Bugzilla $terms.Bugs" %]
-<?xml version="1.0"?>
+<?xml version="1.0"[% IF Param('utf8') %] encoding="UTF-8"[% END %]?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
diff --git a/template/en/default/reports/duplicates.rdf.tmpl b/template/en/default/reports/duplicates.rdf.tmpl
index 941f9f70f..15594c7ca 100644
--- a/template/en/default/reports/duplicates.rdf.tmpl
+++ b/template/en/default/reports/duplicates.rdf.tmpl
@@ -19,7 +19,7 @@
# Contributor(s): Myk Melez <myk@mozilla.org>
#%]
-<?xml version="1.0"?>
+<?xml version="1.0"[% IF Param('utf8') %] encoding="UTF-8"[% END %]?>
<!-- [% template_version %] -->
<RDF xmlns="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"