From 496a78f616b10c1c5c4bd0398021232e9bffa77e Mon Sep 17 00:00:00 2001 From: "mkanat%bugzilla.org" <> Date: Sat, 18 Nov 2006 23:10:11 +0000 Subject: Bug 304550: Bugzilla should always store data in MySQL as UTF-8 Patch By Max Kanat-Alexander r=LpSolit, a=justdave --- Bugzilla/Config/Common.pm | 14 +++- Bugzilla/Config/Core.pm | 1 + Bugzilla/Constants.pm | 2 +- Bugzilla/DB.pm | 16 ++++- Bugzilla/DB/Mysql.pm | 86 +++++++++++++++++++++++++ Bugzilla/DB/Schema.pm | 23 ++++++- Bugzilla/DB/Schema/Mysql.pm | 14 +++- template/en/default/admin/params/core.html.tmpl | 10 ++- template/en/default/global/messages.html.tmpl | 3 + 9 files changed, 157 insertions(+), 12 deletions(-) diff --git a/Bugzilla/Config/Common.pm b/Bugzilla/Config/Common.pm index 8b94220f8..a609936c0 100644 --- a/Bugzilla/Config/Common.pm +++ b/Bugzilla/Config/Common.pm @@ -48,7 +48,7 @@ use base qw(Exporter); check_opsys check_shadowdb check_urlbase check_webdotbase check_netmask check_user_verify_class check_image_converter check_languages check_mail_delivery_method check_notification - check_timezone + check_timezone check_utf8 ); # Checking functions for the various values @@ -114,6 +114,18 @@ sub check_sslbase { return ""; } +sub check_utf8 { + my $utf8 = shift; + # You cannot turn off the UTF-8 parameter if you've already converted + # your tables to utf-8. + my $dbh = Bugzilla->dbh; + if ($dbh->isa('Bugzilla::DB::Mysql') && $dbh->bz_db_is_utf8 && !$utf8) { + return "You cannot disable UTF-8 support, because your MySQL database" + . " is encoded in UTF-8"; + } + return ""; +} + sub check_priority { my ($value) = (@_); my $legal_priorities = get_legal_field_values('priority'); diff --git a/Bugzilla/Config/Core.pm b/Bugzilla/Config/Core.pm index 5688e5c8c..33eb46ac5 100644 --- a/Bugzilla/Config/Core.pm +++ b/Bugzilla/Config/Core.pm @@ -98,6 +98,7 @@ sub get_param_list { name => 'utf8', type => 'b', default => '0', + checker => \&check_utf8 }, { diff --git a/Bugzilla/Constants.pm b/Bugzilla/Constants.pm index ec1467136..5186fcbce 100644 --- a/Bugzilla/Constants.pm +++ b/Bugzilla/Constants.pm @@ -338,7 +338,7 @@ use constant ERROR_MODE_DIE_SOAP_FAULT => 2; # Data about what we require for different databases. use constant DB_MODULE => { - 'mysql' => {db => 'Bugzilla::DB::Mysql', db_version => '4.0.14', + 'mysql' => {db => 'Bugzilla::DB::Mysql', db_version => '4.1.2', dbd => 'DBD::mysql', dbd_version => '2.9003', name => 'MySQL'}, 'pg' => {db => 'Bugzilla::DB::Pg', db_version => '8.00.0000', diff --git a/Bugzilla/DB.pm b/Bugzilla/DB.pm index cefd361ae..33fdda0d8 100644 --- a/Bugzilla/DB.pm +++ b/Bugzilla/DB.pm @@ -130,6 +130,10 @@ sub bz_check_requirements { . bz_locations()->{'localconfig'}; } + die("It is not safe to run Bugzilla inside the 'mysql' database.\n" + . "Please pick a different value for \$db_name in localconfig.") + if $lc->{db_name} eq 'mysql'; + # Check the existence and version of the DBD that we need. my $dbd = $db->{dbd}; my $dbd_ver = $db->{dbd_version}; @@ -196,8 +200,14 @@ sub bz_create_database { print "Creating database $db_name...\n"; # Try to create the DB, and if we fail print a friendly error. - if (!eval { $dbh->do("CREATE DATABASE $db_name") }) { - my $error = $dbh->errstr; + my $success = eval { + my @sql = $dbh->_bz_schema->get_create_database_sql($db_name); + # This ends with 1 because this particular do doesn't always + # return something. + $dbh->do($_) foreach @sql; 1; + }; + if (!$success) { + my $error = $dbh->errstr || $@; chomp($error); print STDERR "The '$db_name' database could not be created.", " The error returned was:\n\n $error\n\n", @@ -221,7 +231,7 @@ sub _get_no_db_connection { if (!$conn_success) { my $sql_server = DB_MODULE->{lc($lc->{db_driver})}->{name}; # Can't use $dbh->errstr because $dbh is undef. - my $error = $DBI::errstr; + my $error = $DBI::errstr || $@; chomp($error); print STDERR "There was an error connecting to $sql_server:\n\n", " $error\n\n", _bz_connect_error_reasons(); diff --git a/Bugzilla/DB/Mysql.pm b/Bugzilla/DB/Mysql.pm index 3bcd74389..5dd598dff 100644 --- a/Bugzilla/DB/Mysql.pm +++ b/Bugzilla/DB/Mysql.pm @@ -59,6 +59,10 @@ sub new { my $self = $class->db_new($dsn, $user, $pass); + # This makes sure that if the tables are encoded as UTF-8, we + # return their data correctly. + $self->do("SET NAMES utf8") if Bugzilla->params->{'utf8'}; + # all class local variables stored in DBI derived class needs to have # a prefix 'private_'. See DBI documentation. $self->{private_bz_tables_locked} = ""; @@ -545,6 +549,88 @@ sub bz_setup_database { MAX_ROWS=100000"); } + # Convert the database to UTF-8 if the utf8 parameter is on. + if (Bugzilla->params->{'utf8'} && !$self->bz_db_is_utf8) { + print <bz_table_list_real) { + my $info_sth = $self->prepare("SHOW FULL COLUMNS FROM $table"); + $info_sth->execute(); + while (my $column = $info_sth->fetchrow_hashref) { + # If this particular column isn't stored in utf-8 + if ($column->{Collation} ne 'NULL' + && $column->{Collation} !~ /utf8/) + { + my $name = $column->{Field}; + + # The code below doesn't work on a field with a FULLTEXT + # index. So we drop it. The upgrade code will re-create + # it later. + if ($table eq 'longdescs' && $name eq 'thetext') { + $self->bz_drop_index('longdescs', + 'longdescs_thetext_idx'); + } + if ($table eq 'bugs' && $name eq 'short_desc') { + $self->bz_drop_index('bugs', 'bugs_short_desc_idx'); + } + + print "Converting $table.$name to be stored as UTF-8...\n"; + my $col_info = + $self->bz_column_info_real($table, $name); + + # CHANGE COLUMN doesn't take PRIMARY KEY + delete $col_info->{PRIMARYKEY}; + + my $sql_def = $self->_bz_schema->get_type_ddl($col_info); + # We don't want MySQL to actually try to *convert* + # from our current charset to UTF-8, we just want to + # transfer the bytes directly. This is how we do that. + + # The CHARACTER SET part of the definition has to come + # right after the type, which will always come first. + my ($binary, $utf8) = ($sql_def, $sql_def); + my $type = $self->_bz_schema->convert_type($col_info->{TYPE}); + $binary =~ s/(\Q$type\E)/$1 CHARACTER SET binary/; + $utf8 =~ s/(\Q$type\E)/$1 CHARACTER SET utf8/; + $self->do("ALTER TABLE $table CHANGE COLUMN $name $name + $binary"); + $self->do("ALTER TABLE $table CHANGE COLUMN $name $name + $utf8"); + } + $self->do("ALTER TABLE $table DEFAULT CHARACTER SET utf8"); + } + } # foreach my $table (@tables) + + my $db_name = Bugzilla->localconfig->{db_name}; + $self->do("ALTER DATABASE $db_name CHARACTER SET utf8"); + } +} + +sub bz_db_is_utf8 { + my $self = shift; + my $db_collation = $self->selectrow_arrayref( + "SHOW VARIABLES LIKE 'character_set_database'"); + # First column holds the variable name, second column holds the value. + return $db_collation->[1] =~ /utf8/ ? 1 : 0; } diff --git a/Bugzilla/DB/Schema.pm b/Bugzilla/DB/Schema.pm index 6846691e2..c2c884e6d 100644 --- a/Bugzilla/DB/Schema.pm +++ b/Bugzilla/DB/Schema.pm @@ -1302,7 +1302,7 @@ sub get_type_ddl { } my $fkref = $self->{enable_references} ? $finfo->{REFERENCES} : undef; - my $type_ddl = $self->{db_specific}{$type} || $type; + my $type_ddl = $self->convert_type($type); # DEFAULT attribute must appear before any column constraints # (e.g., NOT NULL), for Oracle $type_ddl .= " DEFAULT $default" if (defined($default)); @@ -1313,7 +1313,19 @@ sub get_type_ddl { return($type_ddl); } #eosub--get_type_ddl -#-------------------------------------------------------------------------- + +sub convert_type { + +=item C + +Converts a TYPE from the L format into the real SQL type. + +=cut + + my ($self, $type) = @_; + return $self->{db_specific}->{$type} || $type; +} + sub get_column { =item C @@ -1383,7 +1395,12 @@ sub get_table_columns { return @columns; } #eosub--get_table_columns -#-------------------------------------------------------------------------- + +sub get_create_database_sql { + my ($self, $name) = @_; + return ("CREATE DATABASE $name"); +} + sub get_table_ddl { =item C diff --git a/Bugzilla/DB/Schema/Mysql.pm b/Bugzilla/DB/Schema/Mysql.pm index 0069dcc7f..d7cd708a2 100644 --- a/Bugzilla/DB/Schema/Mysql.pm +++ b/Bugzilla/DB/Schema/Mysql.pm @@ -129,7 +129,9 @@ sub _get_create_table_ddl { my($self, $table) = @_; - return($self->SUPER::_get_create_table_ddl($table) . ' TYPE = MYISAM'); + my $charset = Bugzilla->dbh->bz_db_is_utf8 ? "CHARACTER SET utf8" : ''; + return($self->SUPER::_get_create_table_ddl($table) + . " ENGINE = MYISAM $charset"); } #eosub--_get_create_table_ddl #------------------------------------------------------------------------------ @@ -150,6 +152,16 @@ sub _get_create_index_ddl { } #eosub--_get_create_index_ddl #-------------------------------------------------------------------- +sub get_create_database_sql { + my ($self, $name) = @_; + # We only create as utf8 if we have no params (meaning we're doing + # a new installation) or if the utf8 param is on. + my $create_utf8 = Bugzilla->params->{'utf8'} + || !defined Bugzilla->params->{'utf8'}; + my $charset = $create_utf8 ? "CHARACTER SET utf8" : ''; + return ("CREATE DATABASE $name $charset"); +} + # MySQL has a simpler ALTER TABLE syntax than ANSI. sub get_alter_column_ddl { my ($self, $table, $column, $new_def, $set_nulls_to) = @_; diff --git a/template/en/default/admin/params/core.html.tmpl b/template/en/default/admin/params/core.html.tmpl index 3e1010a7d..2ab6856a8 100644 --- a/template/en/default/admin/params/core.html.tmpl +++ b/template/en/default/admin/params/core.html.tmpl @@ -64,9 +64,13 @@ utf8 => "Use UTF-8 (Unicode) encoding for all text in ${terms.Bugzilla}. New " _ "installations should set this to true to avoid character encoding " _ - "problems. Existing databases should set this to true only after " _ - "the data has been converted from existing legacy character " _ - "encodings to UTF-8.", + "problems. Existing databases should set this to true " _ + " only after the data has been converted from existing legacy " _ + " character encodings to UTF-8, using the " _ + " contrib/recode.pl script.

Note " _ + " that if you turn this parameter from "off" to " _ + " "on", you must re-run checksetup.pl immediately " _ + " afterward.", shutdownhtml => "If this field is non-empty, then $terms.Bugzilla will be completely " _ "disabled and this text will be displayed instead of all the " _ diff --git a/template/en/default/global/messages.html.tmpl b/template/en/default/global/messages.html.tmpl index a3b29bee7..b5f736120 100644 --- a/template/en/default/global/messages.html.tmpl +++ b/template/en/default/global/messages.html.tmpl @@ -216,6 +216,9 @@ [% IF param_changed.size > 0 %] [% FOREACH param = param_changed %] Changed [% param FILTER html %]
+ [% IF param == 'utf8' && Param('utf8') %] + You must now re-run checksetup.pl.
+ [% END %] [% END %] [% ELSE %] No changes made. -- cgit v1.2.3-24-g4f1b