summaryrefslogtreecommitdiffstats
path: root/Bugzilla
diff options
context:
space:
mode:
authormkanat%bugzilla.org <>2007-11-23 13:58:33 +0100
committermkanat%bugzilla.org <>2007-11-23 13:58:33 +0100
commit9f0310bf8c0821347699b434f659eb52decabf87 (patch)
tree31153ecb72f2b57a9bb3daf638cc4e1152f8a0b0 /Bugzilla
parent8ab75a83c21606ad77a38c05057f886011fa0451 (diff)
downloadbugzilla-9f0310bf8c0821347699b434f659eb52decabf87.tar.gz
bugzilla-9f0310bf8c0821347699b434f659eb52decabf87.tar.xz
Bug 363153: Turn on the utf8 bit on all strings in Bugzilla that contain
non-ASCII data, if the utf8 parameter is on. This means that string functions like substr() work properly on multi-byte languages, now. Patch By Max Kanat-Alexander <mkanat@bugzilla.org> r=wurblzap, a=mkanat
Diffstat (limited to 'Bugzilla')
-rw-r--r--Bugzilla/CGI.pm21
-rw-r--r--Bugzilla/Constants.pm8
-rw-r--r--Bugzilla/DB/Mysql.pm4
-rw-r--r--Bugzilla/DB/Pg.pm4
-rw-r--r--Bugzilla/Mailer.pm4
-rw-r--r--Bugzilla/Util.pm6
6 files changed, 40 insertions, 7 deletions
diff --git a/Bugzilla/CGI.pm b/Bugzilla/CGI.pm
index ef2cb70f5..3498b3c70 100644
--- a/Bugzilla/CGI.pm
+++ b/Bugzilla/CGI.pm
@@ -233,6 +233,27 @@ sub header {
return $self->SUPER::header(@_) || "";
}
+# CGI.pm is not utf8-aware and passes data as bytes instead of UTF-8 strings.
+sub param {
+ my $self = shift;
+ if (Bugzilla->params->{'utf8'} && scalar(@_) == 1) {
+ if (wantarray) {
+ return map { _fix_utf8($_) } $self->SUPER::param(@_);
+ }
+ else {
+ return _fix_utf8(scalar $self->SUPER::param(@_));
+ }
+ }
+ return $self->SUPER::param(@_);
+}
+
+sub _fix_utf8 {
+ my $input = shift;
+ # The is_utf8 is here in case CGI gets smart about utf8 someday.
+ utf8::decode($input) if defined $input && !utf8::is_utf8($input);
+ return $input;
+}
+
# The various parts of Bugzilla which create cookies don't want to have to
# pass them around to all of the callers. Instead, store them locally here,
# and then output as required from |header|.
diff --git a/Bugzilla/Constants.pm b/Bugzilla/Constants.pm
index 12d54abee..e34fc0bb7 100644
--- a/Bugzilla/Constants.pm
+++ b/Bugzilla/Constants.pm
@@ -382,10 +382,10 @@ use constant DB_MODULE => {
dbd => {
package => 'DBD-mysql',
module => 'DBD::mysql',
- version => '2.9003',
- # Certain versions are broken, development versions are
- # always disallowed.
- blacklist => ['^3\.000[3-6]', '_'],
+ # Disallow development versions
+ blacklist => ['_'],
+ # For UTF-8 support
+ version => '4.00',
},
name => 'MySQL'},
'pg' => {db => 'Bugzilla::DB::Pg', db_version => '8.00.0000',
diff --git a/Bugzilla/DB/Mysql.pm b/Bugzilla/DB/Mysql.pm
index 25ee32b64..9e0d25277 100644
--- a/Bugzilla/DB/Mysql.pm
+++ b/Bugzilla/DB/Mysql.pm
@@ -58,8 +58,10 @@ sub new {
my $dsn = "DBI:mysql:host=$host;database=$dbname";
$dsn .= ";port=$port" if $port;
$dsn .= ";mysql_socket=$sock" if $sock;
+
+ my $attrs = { mysql_enable_utf8 => Bugzilla->params->{'utf8'} };
- my $self = $class->db_new($dsn, $user, $pass);
+ my $self = $class->db_new($dsn, $user, $pass, $attrs);
# This makes sure that if the tables are encoded as UTF-8, we
# return their data correctly.
diff --git a/Bugzilla/DB/Pg.pm b/Bugzilla/DB/Pg.pm
index 9f5b67757..bd16b654c 100644
--- a/Bugzilla/DB/Pg.pm
+++ b/Bugzilla/DB/Pg.pm
@@ -68,7 +68,9 @@ sub new {
# creating tables.
$dsn .= ";options='-c client_min_messages=warning'";
- my $self = $class->db_new($dsn, $user, $pass);
+ my $attrs = { pg_enable_utf8 => Bugzilla->params->{'utf8'} };
+
+ my $self = $class->db_new($dsn, $user, $pass, $attrs);
# all class local variables stored in DBI derived class needs to have
# a prefix 'private_'. See DBI documentation.
diff --git a/Bugzilla/Mailer.pm b/Bugzilla/Mailer.pm
index 03f370a4e..48f40d8b7 100644
--- a/Bugzilla/Mailer.pm
+++ b/Bugzilla/Mailer.pm
@@ -67,7 +67,9 @@ sub MessageToMTA {
# Encode the headers correctly in quoted-printable
foreach my $header qw(From To Cc Reply-To Sender Errors-To Subject) {
if (my $value = $email->header($header)) {
- $value = Encode::decode("UTF-8", $value) if Bugzilla->params->{'utf8'};
+ if (Bugzilla->params->{'utf8'} && !utf8::is_utf8($value)) {
+ $value = utf8::decode($value);
+ }
my $encoded = encode('MIME-Q', $value);
$email->header_set($header, $encoded);
}
diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm
index ffc2af95a..144fb87c1 100644
--- a/Bugzilla/Util.pm
+++ b/Bugzilla/Util.pm
@@ -185,6 +185,8 @@ sub html_light_quote {
# This originally came from CGI.pm, by Lincoln D. Stein
sub url_quote {
my ($toencode) = (@_);
+ utf8::encode($toencode) # The below regex works only on bytes
+ if Bugzilla->params->{'utf8'} && utf8::is_utf8($toencode);
$toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg;
return $toencode;
}
@@ -206,6 +208,10 @@ sub xml_quote {
return $var;
}
+# This function must not be relied upon to return a valid string to pass to
+# the DB or the user in UTF-8 situations. The only thing you can rely upon
+# it for is that if you url_decode a string, it will url_encode back to the
+# exact same thing.
sub url_decode {
my ($todecode) = (@_);
$todecode =~ tr/+/ /; # pluses become spaces