From 2ccf81dec1fbe4e215ea47700a4e006420318621 Mon Sep 17 00:00:00 2001 From: Frédéric Buclin Date: Sun, 15 Mar 2015 13:50:28 +0100 Subject: Bug 902395: Enforce utf8 = true for all installations and remove the utf8 parameter r=dkl a=sgreen --- Bugzilla/Attachment/PatchReader.pm | 19 +++++----- Bugzilla/BugMail.pm | 2 +- Bugzilla/CGI.pm | 6 ++-- Bugzilla/Config.pm | 2 -- Bugzilla/Config/Common.pm | 16 +-------- Bugzilla/Config/General.pm | 7 ---- Bugzilla/DB/Mysql.pm | 32 +++++++---------- Bugzilla/DB/Oracle.pm | 5 ++- Bugzilla/DB/Pg.pm | 2 +- Bugzilla/DB/Schema/Mysql.pm | 7 +--- Bugzilla/DB/Schema/Pg.pm | 7 +--- Bugzilla/DB/Sqlite.pm | 3 +- Bugzilla/Mailer.pm | 20 +++++------ Bugzilla/Template.pm | 11 ++---- Bugzilla/Token.pm | 4 +-- Bugzilla/Util.pm | 68 ++++++++++++++++------------------- Bugzilla/WebService/Server/JSONRPC.pm | 2 +- 17 files changed, 74 insertions(+), 139 deletions(-) (limited to 'Bugzilla') diff --git a/Bugzilla/Attachment/PatchReader.pm b/Bugzilla/Attachment/PatchReader.pm index d0e221220..202f4b4fd 100644 --- a/Bugzilla/Attachment/PatchReader.pm +++ b/Bugzilla/Attachment/PatchReader.pm @@ -73,12 +73,10 @@ sub process_diff { $vars->{'other_patches'} = \@other_patches; setup_template_patch_reader($reader, $vars); - # The patch is going to be displayed in a HTML page and if the utf8 - # param is enabled, we have to encode attachment data as utf8. - if (Bugzilla->params->{'utf8'}) { - $attachment->data; # Populate ->{data} - utf8::decode($attachment->{data}); - } + # The patch is going to be displayed in a HTML page, so we have + # to encode attachment data as utf8. + $attachment->data; # Populate ->{data} + utf8::decode($attachment->{data}); $reader->iterate_string('Attachment ' . $attachment->id, $attachment->data); } } @@ -91,9 +89,8 @@ sub process_interdiff { require PatchReader::Raw; - # Encode attachment data as utf8 if it's going to be displayed in a HTML - # page using the UTF-8 encoding. - if ($format ne 'raw' && Bugzilla->params->{'utf8'}) { + # Encode attachment data as utf8 if it's going to be displayed in a HTML page. + if ($format ne 'raw') { $old_attachment->data; # Populate ->{data} utf8::decode($old_attachment->{data}); $new_attachment->data; # Populate ->{data} @@ -130,7 +127,7 @@ sub process_interdiff { $use_select = 1; } - if ($format ne 'raw' && Bugzilla->params->{'utf8'}) { + if ($format ne 'raw') { binmode $interdiff_stdout, ':utf8'; binmode $interdiff_stderr, ':utf8'; } else { @@ -230,7 +227,7 @@ sub get_unified_diff { # Prints out to temporary file. my ($fh, $filename) = File::Temp::tempfile(); - if ($format ne 'raw' && Bugzilla->params->{'utf8'}) { + if ($format ne 'raw') { # The HTML page will be displayed with the UTF-8 encoding. binmode $fh, ':utf8'; } diff --git a/Bugzilla/BugMail.pm b/Bugzilla/BugMail.pm index 7dfb48238..df39bd108 100644 --- a/Bugzilla/BugMail.pm +++ b/Bugzilla/BugMail.pm @@ -472,7 +472,7 @@ sub _generate_bugmail { } else { $email->content_type_set('multipart/alternative'); # Some mail clients need same encoding for each part, even empty ones. - $email->charset_set('UTF-8') if Bugzilla->params->{'utf8'}; + $email->charset_set('UTF-8'); } $email->parts_set(\@parts); return $email; diff --git a/Bugzilla/CGI.pm b/Bugzilla/CGI.pm index fb9719547..0b1712d2f 100644 --- a/Bugzilla/CGI.pm +++ b/Bugzilla/CGI.pm @@ -73,7 +73,7 @@ sub new { } # Send appropriate charset - $self->charset(Bugzilla->params->{'utf8'} ? 'UTF-8' : ''); + $self->charset('UTF-8'); # Redirect to urlbase/sslbase if we are not viewing an attachment. if ($self->url_is_attachment_base and $script ne 'attachment.cgi') { @@ -378,9 +378,7 @@ sub param { } # Fix UTF-8-ness of input parameters. - if (Bugzilla->params->{'utf8'}) { - @result = map { _fix_utf8($_) } @result; - } + @result = map { _fix_utf8($_) } @result; return wantarray ? @result : $result[0]; } diff --git a/Bugzilla/Config.pm b/Bugzilla/Config.pm index 1e22b5239..5a8117c29 100644 --- a/Bugzilla/Config.pm +++ b/Bugzilla/Config.pm @@ -231,8 +231,6 @@ sub update_params { } } - $param->{'utf8'} = 1 if $new_install; - # Bug 452525: OR based groups are on by default for new installations $param->{'or_groups'} = 1 if $new_install; diff --git a/Bugzilla/Config/Common.pm b/Bugzilla/Config/Common.pm index b6e58b318..52a2f8f11 100644 --- a/Bugzilla/Config/Common.pm +++ b/Bugzilla/Config/Common.pm @@ -25,7 +25,7 @@ use parent qw(Exporter); qw(check_multi check_numeric check_regexp check_group check_sslbase check_priority check_severity check_platform check_opsys check_shadowdb check_urlbase check_user_verify_class - check_ip check_mail_delivery_method check_notification check_utf8 + check_ip check_mail_delivery_method check_notification check_bug_status check_smtp_auth check_theschwartz_available check_maxattachmentsize check_email check_smtp_ssl check_comment_taggers_group check_smtp_server @@ -115,18 +115,6 @@ sub check_ip { return ""; } -sub check_utf8 { - my $utf8 = shift; - # You cannot turn off the UTF-8 parameter if you've already converted - # your tables to utf-8. - my $dbh = Bugzilla->dbh; - if ($dbh->isa('Bugzilla::DB::Mysql') && $dbh->bz_db_is_utf8 && !$utf8) { - return "You cannot disable UTF-8 support, because your MySQL database" - . " is encoded in UTF-8"; - } - return ""; -} - sub check_priority { my ($value) = (@_); my $legal_priorities = get_legal_field_values('priority'); @@ -490,8 +478,6 @@ valid group is provided. =item check_maxattachmentsize -=item check_utf8 - =item check_group =item check_opsys diff --git a/Bugzilla/Config/General.pm b/Bugzilla/Config/General.pm index 380680590..8513a1f02 100644 --- a/Bugzilla/Config/General.pm +++ b/Bugzilla/Config/General.pm @@ -24,13 +24,6 @@ use constant get_param_list => ( checker => \&check_email }, - { - name => 'utf8', - type => 'b', - default => '0', - checker => \&check_utf8 - }, - { name => 'shutdownhtml', type => 'l', diff --git a/Bugzilla/DB/Mysql.pm b/Bugzilla/DB/Mysql.pm index 389cc6baa..402e9c58b 100644 --- a/Bugzilla/DB/Mysql.pm +++ b/Bugzilla/DB/Mysql.pm @@ -54,7 +54,7 @@ sub new { $dsn .= ";mysql_socket=$sock" if $sock; my %attrs = ( - mysql_enable_utf8 => Bugzilla->params->{'utf8'}, + mysql_enable_utf8 => 1, # Needs to be explicitly specified for command-line processes. mysql_auto_reconnect => 1, ); @@ -74,9 +74,8 @@ sub new { my $self = $class->db_new({ dsn => $dsn, user => $user, pass => $pass, attrs => \%attrs }); - # This makes sure that if the tables are encoded as UTF-8, we - # return their data correctly. - $self->do("SET NAMES utf8") if Bugzilla->params->{'utf8'}; + # This makes sure that we return table names correctly. + $self->do("SET NAMES utf8"); # all class local variables stored in DBI derived class needs to have # a prefix 'private_'. See DBI documentation. @@ -543,13 +542,11 @@ sub bz_setup_database { # If there are no tables, but the DB isn't utf8 and it should be, # then we should alter the database to be utf8. We know it should be - # if the utf8 parameter is true or there are no params at all. + # if there are no params at all. # This kind of situation happens when people create the database # themselves, and if we don't do this they will get the big # scary WARNING statement about conversion to UTF8. - if ( !$self->bz_db_is_utf8 && !@tables - && (Bugzilla->params->{'utf8'} || !scalar keys %{Bugzilla->params}) ) - { + if (!$self->bz_db_is_utf8 && !@tables) { $self->_alter_db_charset_to_utf8(); } @@ -653,7 +650,7 @@ sub bz_setup_database { MAX_ROWS=100000"); } - # Convert the database to UTF-8 if the utf8 parameter is on. + # Convert the database to UTF-8. # We check if any table isn't utf8, because lots of crazy # partial-conversion situations can happen, and this handles anything # that could come up (including having the DB charset be utf8 but not @@ -665,8 +662,8 @@ sub bz_setup_database { WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL AND TABLE_COLLATION NOT LIKE 'utf8%' LIMIT 1", undef, $db_name); - - if (Bugzilla->params->{'utf8'} && $non_utf8_tables) { + + if ($non_utf8_tables) { print "\n", install_string('mysql_utf8_conversion'); if (!Bugzilla->installation_answers->{NO_PAUSE}) { @@ -681,8 +678,7 @@ sub bz_setup_database { } } - print "Converting table storage format to UTF-8. This may take a", - " while.\n"; + say 'Converting table storage format to UTF-8. This may take a while.'; foreach my $table ($self->bz_table_list_real) { my $info_sth = $self->prepare("SHOW FULL COLUMNS FROM $table"); $info_sth->execute(); @@ -699,7 +695,7 @@ sub bz_setup_database { { my $name = $column->{Field}; - print "$table.$name needs to be converted to UTF-8...\n"; + say "$table.$name needs to be converted to UTF-8..."; # These will be automatically re-created at the end # of checksetup. @@ -737,7 +733,7 @@ sub bz_setup_database { } } - print "Converting the $table table to UTF-8...\n"; + say "Converting the $table table to UTF-8..."; my $bin = "ALTER TABLE $table " . join(', ', @binary_sql); my $utf = "ALTER TABLE $table " . join(', ', @utf8_sql, 'DEFAULT CHARACTER SET utf8'); @@ -761,11 +757,9 @@ sub bz_setup_database { # a mysqldump.) So we have this change outside of the above block, # so that it just happens silently if no actual *table* conversion # needs to happen. - if (Bugzilla->params->{'utf8'} && !$self->bz_db_is_utf8) { - $self->_alter_db_charset_to_utf8(); - } + $self->_alter_db_charset_to_utf8() unless $self->bz_db_is_utf8; - $self->_fix_defaults(); + $self->_fix_defaults(); # Bug 451735 highlighted a bug in bz_drop_index() which didn't # check for FKs before trying to delete an index. Consequently, diff --git a/Bugzilla/DB/Oracle.pm b/Bugzilla/DB/Oracle.pm index 7424019ac..51678dec9 100644 --- a/Bugzilla/DB/Oracle.pm +++ b/Bugzilla/DB/Oracle.pm @@ -55,7 +55,7 @@ sub new { $dbname ||= Bugzilla->localconfig->{db_name}; # Set the language enviroment - $ENV{'NLS_LANG'} = '.AL32UTF8' if Bugzilla->params->{'utf8'}; + $ENV{'NLS_LANG'} = '.AL32UTF8'; # construct the DSN from the parameters we got my $dsn = "dbi:Oracle:host=$host;sid=$dbname"; @@ -74,8 +74,7 @@ sub new { # Set the session's default date format to match MySQL $self->do("ALTER SESSION SET NLS_DATE_FORMAT='YYYY-MM-DD HH24:MI:SS'"); $self->do("ALTER SESSION SET NLS_TIMESTAMP_FORMAT='YYYY-MM-DD HH24:MI:SS'"); - $self->do("ALTER SESSION SET NLS_LENGTH_SEMANTICS='CHAR'") - if Bugzilla->params->{'utf8'}; + $self->do("ALTER SESSION SET NLS_LENGTH_SEMANTICS='CHAR'"); # To allow case insensitive query. $self->do("ALTER SESSION SET NLS_COMP='ANSI'"); $self->do("ALTER SESSION SET NLS_SORT='BINARY_AI'"); diff --git a/Bugzilla/DB/Pg.pm b/Bugzilla/DB/Pg.pm index 59231e2a6..37f06c354 100644 --- a/Bugzilla/DB/Pg.pm +++ b/Bugzilla/DB/Pg.pm @@ -53,7 +53,7 @@ sub new { # creating tables. $dsn .= ";options='-c client_min_messages=warning'"; - my $attrs = { pg_enable_utf8 => Bugzilla->params->{'utf8'} }; + my $attrs = { pg_enable_utf8 => 1 }; my $self = $class->db_new({ dsn => $dsn, user => $user, pass => $pass, attrs => $attrs }); diff --git a/Bugzilla/DB/Schema/Mysql.pm b/Bugzilla/DB/Schema/Mysql.pm index 1dc408654..db01e971d 100644 --- a/Bugzilla/DB/Schema/Mysql.pm +++ b/Bugzilla/DB/Schema/Mysql.pm @@ -148,12 +148,7 @@ sub _get_create_index_ddl { sub get_create_database_sql { my ($self, $name) = @_; - # We only create as utf8 if we have no params (meaning we're doing - # a new installation) or if the utf8 param is on. - my $create_utf8 = Bugzilla->params->{'utf8'} - || !defined Bugzilla->params->{'utf8'}; - my $charset = $create_utf8 ? "CHARACTER SET utf8" : ''; - return ("CREATE DATABASE `$name` $charset"); + return ("CREATE DATABASE `$name` CHARACTER SET utf8"); } # MySQL has a simpler ALTER TABLE syntax than ANSI. diff --git a/Bugzilla/DB/Schema/Pg.pm b/Bugzilla/DB/Schema/Pg.pm index 03a3b0a48..dd8531927 100644 --- a/Bugzilla/DB/Schema/Pg.pm +++ b/Bugzilla/DB/Schema/Pg.pm @@ -79,12 +79,7 @@ sub _initialize { sub get_create_database_sql { my ($self, $name) = @_; - # We only create as utf8 if we have no params (meaning we're doing - # a new installation) or if the utf8 param is on. - my $create_utf8 = Bugzilla->params->{'utf8'} - || !defined Bugzilla->params->{'utf8'}; - my $charset = $create_utf8 ? "ENCODING 'UTF8' TEMPLATE template0" : ''; - return ("CREATE DATABASE \"$name\" $charset"); + return ("CREATE DATABASE \"$name\" ENCODING 'UTF8' TEMPLATE template0"); } sub get_rename_column_ddl { diff --git a/Bugzilla/DB/Sqlite.pm b/Bugzilla/DB/Sqlite.pm index ddafc1696..0cdfa3477 100644 --- a/Bugzilla/DB/Sqlite.pm +++ b/Bugzilla/DB/Sqlite.pm @@ -93,8 +93,7 @@ sub new { my $dsn = "dbi:SQLite:dbname=$db_name"; my $attrs = { - # XXX Should we just enforce this to be always on? - sqlite_unicode => Bugzilla->params->{'utf8'}, + sqlite_unicode => 1, }; my $self = $class->db_new({ dsn => $dsn, user => '', diff --git a/Bugzilla/Mailer.pm b/Bugzilla/Mailer.pm index bcf17458f..ef75d0cf8 100644 --- a/Bugzilla/Mailer.pm +++ b/Bugzilla/Mailer.pm @@ -125,9 +125,7 @@ sub MessageToMTA { # We don't recode headers that happen multiple times. next if scalar(@values) > 1; if (my $value = $values[0]) { - if (Bugzilla->params->{'utf8'} && !utf8::is_utf8($value)) { - utf8::decode($value); - } + utf8::decode($value) unless utf8::is_utf8($value); # avoid excessive line wrapping done by Encode. local $Encode::Encoding{'MIME-Q'}->{'bpl'} = 998; @@ -187,18 +185,16 @@ sub MessageToMTA { my $content_type = $part->content_type || ''; $content_type =~ /charset=['"](.+)['"]/; # If no charset is defined or is the default us-ascii, - # then we encode the email to UTF-8 if Bugzilla has utf8 enabled. + # then we encode the email to UTF-8. # XXX - This is a hack to workaround bug 723944. if (!$1 || $1 eq 'us-ascii') { my $body = $part->body; - if (Bugzilla->params->{'utf8'}) { - $part->charset_set('UTF-8'); - # encoding_set works only with bytes, not with utf8 strings. - my $raw = $part->body_raw; - if (utf8::is_utf8($raw)) { - utf8::encode($raw); - $part->body_set($raw); - } + $part->charset_set('UTF-8'); + # encoding_set works only with bytes, not with utf8 strings. + my $raw = $part->body_raw; + if (utf8::is_utf8($raw)) { + utf8::encode($raw); + $part->body_set($raw); } $part->encoding_set('quoted-printable') if !is_7bit_clean($body); } diff --git a/Bugzilla/Template.pm b/Bugzilla/Template.pm index 1b6b48205..6de927b9a 100644 --- a/Bugzilla/Template.pm +++ b/Bugzilla/Template.pm @@ -752,7 +752,7 @@ sub create { # Initialize templates (f.e. by loading plugins like Hook). PRE_PROCESS => ["global/variables.none.tmpl"], - ENCODING => Bugzilla->params->{'utf8'} ? 'UTF-8' : undef, + ENCODING => 'UTF-8', # Functions for processing text within templates in various ways. # IMPORTANT! When adding a filter here that does not override a @@ -783,15 +783,10 @@ sub create { # Strips out control characters excepting whitespace strip_control_chars => sub { my ($data) = @_; - state $use_utf8 = Bugzilla->params->{'utf8'}; - # Only run for utf8 to avoid issues with other multibyte encodings - # that may be reassigning meaning to ascii characters. - if ($use_utf8) { - $data =~ s/(?![\t\r\n])[[:cntrl:]]//g; - } + $data =~ s/(?![\t\r\n])[[:cntrl:]]//g; return $data; }, - + # HTML collapses newlines in element attributes to a single space, # so form elements which may have whitespace (ie comments) need # to be encoded using diff --git a/Bugzilla/Token.pm b/Bugzilla/Token.pm index 84d86b8c6..a8358d4a7 100644 --- a/Bugzilla/Token.pm +++ b/Bugzilla/Token.pm @@ -185,9 +185,7 @@ sub issue_hash_token { my $token = join('*', @args); # Wide characters cause Digest::SHA to die. - if (Bugzilla->params->{'utf8'}) { - utf8::encode($token) if utf8::is_utf8($token); - } + utf8::encode($token) if utf8::is_utf8($token); $token = hmac_sha256_base64($token, Bugzilla->localconfig->{'site_wide_secret'}); $token =~ s/\+/-/g; $token =~ s/\//_/g; diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm index 670f5f8f2..f0b38b0b0 100644 --- a/Bugzilla/Util.pm +++ b/Bugzilla/Util.pm @@ -73,36 +73,31 @@ sub html_quote { # Obscure '@'. $var =~ s/\@/\@/g; - state $use_utf8 = Bugzilla->params->{'utf8'}; - - if ($use_utf8) { - # Remove control characters if the encoding is utf8. - # Other multibyte encodings may be using this range; so ignore if not utf8. - $var =~ s/(?![\t\r\n])[[:cntrl:]]//g; - - # Remove the following characters because they're - # influencing BiDi: - # -------------------------------------------------------- - # |Code |Name |UTF-8 representation| - # |------|--------------------------|--------------------| - # |U+202a|Left-To-Right Embedding |0xe2 0x80 0xaa | - # |U+202b|Right-To-Left Embedding |0xe2 0x80 0xab | - # |U+202c|Pop Directional Formatting|0xe2 0x80 0xac | - # |U+202d|Left-To-Right Override |0xe2 0x80 0xad | - # |U+202e|Right-To-Left Override |0xe2 0x80 0xae | - # -------------------------------------------------------- - # - # The following are characters influencing BiDi, too, but - # they can be spared from filtering because they don't - # influence more than one character right or left: - # -------------------------------------------------------- - # |Code |Name |UTF-8 representation| - # |------|--------------------------|--------------------| - # |U+200e|Left-To-Right Mark |0xe2 0x80 0x8e | - # |U+200f|Right-To-Left Mark |0xe2 0x80 0x8f | - # -------------------------------------------------------- - $var =~ tr/\x{202a}-\x{202e}//d; - } + # Remove control characters. + $var =~ s/(?![\t\r\n])[[:cntrl:]]//g; + + # Remove the following characters because they're + # influencing BiDi: + # -------------------------------------------------------- + # |Code |Name |UTF-8 representation| + # |------|--------------------------|--------------------| + # |U+202a|Left-To-Right Embedding |0xe2 0x80 0xaa | + # |U+202b|Right-To-Left Embedding |0xe2 0x80 0xab | + # |U+202c|Pop Directional Formatting|0xe2 0x80 0xac | + # |U+202d|Left-To-Right Override |0xe2 0x80 0xad | + # |U+202e|Right-To-Left Override |0xe2 0x80 0xae | + # -------------------------------------------------------- + # + # The following are characters influencing BiDi, too, but + # they can be spared from filtering because they don't + # influence more than one character right or left: + # -------------------------------------------------------- + # |Code |Name |UTF-8 representation| + # |------|--------------------------|--------------------| + # |U+200e|Left-To-Right Mark |0xe2 0x80 0x8e | + # |U+200f|Right-To-Left Mark |0xe2 0x80 0x8f | + # -------------------------------------------------------- + $var =~ tr/\x{202a}-\x{202e}//d; return $var; } @@ -203,8 +198,8 @@ sub email_filter { # This originally came from CGI.pm, by Lincoln D. Stein sub url_quote { my ($toencode) = (@_); - utf8::encode($toencode) # The below regex works only on bytes - if Bugzilla->params->{'utf8'} && utf8::is_utf8($toencode); + # The below regex works only on bytes + utf8::encode($toencode) if utf8::is_utf8($toencode); $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg; return $toencode; } @@ -635,9 +630,7 @@ sub bz_crypt { } # Wide characters cause crypt and Digest to die. - if (Bugzilla->params->{'utf8'}) { - utf8::encode($password) if utf8::is_utf8($password); - } + utf8::encode($password) if utf8::is_utf8($password); my $crypted_password; if (!$algorithm) { @@ -785,9 +778,8 @@ sub display_value { } sub disable_utf8 { - if (Bugzilla->params->{'utf8'}) { - binmode STDOUT, ':bytes'; # Turn off UTF8 encoding. - } + # Turn off UTF8 encoding. + binmode STDOUT, ':bytes'; } use constant UTF8_ACCIDENTAL => qw(shiftjis big5-eten euc-kr euc-jp); diff --git a/Bugzilla/WebService/Server/JSONRPC.pm b/Bugzilla/WebService/Server/JSONRPC.pm index 2b5c7ec9b..3fa0b65a9 100644 --- a/Bugzilla/WebService/Server/JSONRPC.pm +++ b/Bugzilla/WebService/Server/JSONRPC.pm @@ -54,7 +54,7 @@ sub create_json_coder { # This may seem a little backwards, but what this really means is # "don't convert our utf8 into byte strings, just leave it as a # utf8 string." - $json->utf8(0) if Bugzilla->params->{'utf8'}; + $json->utf8(0); return $json; } -- cgit v1.2.3-24-g4f1b