From be1496d1a8618ef186047468009c7e3e0640183b Mon Sep 17 00:00:00 2001 From: Andrey Andreev Date: Tue, 11 Feb 2014 22:48:45 +0200 Subject: Utf8/iconv/mbstring-related changes --- system/core/Utf8.php | 33 +++++++++++++++++++----- system/libraries/Email.php | 6 ++--- system/libraries/Trackback.php | 9 ++++++- tests/mocks/core/utf8.php | 11 ++++++++ user_guide_src/source/changelog.rst | 7 +++++ user_guide_src/source/general/reserved_names.rst | 3 +++ 6 files changed, 59 insertions(+), 10 deletions(-) diff --git a/system/core/Utf8.php b/system/core/Utf8.php index ff3e49139..b58c611e1 100644 --- a/system/core/Utf8.php +++ b/system/core/Utf8.php @@ -59,17 +59,31 @@ class CI_Utf8 { { define('MB_ENABLED', TRUE); mb_internal_encoding($charset); + // This is required for mb_convert_encoding() to strip invalid characters + ini_set('mbstring.substitute_character', 'none'); } else { define('MB_ENABLED', FALSE); } + // Do the same for iconv, which actually has more easy to remember + // predefined constants (such as ICONV_IMPL), but the iconv PHP + // manual page says that using them is "strongly discouraged". + if (extension_loaded('iconv')) + { + define('ICONV_ENABLED', TRUE); + iconv_set_encoding('internal_encoding', $charset); + } + else + { + define('ICONV_ENABLED', FALSE); + } + if ( - defined('PREG_BAD_UTF8_ERROR') // PCRE must support UTF-8 - && function_exists('iconv') // iconv must be installed - && MB_ENABLED === TRUE // mbstring must be enabled - && $charset === 'UTF-8' // Application charset must be UTF-8 + defined('PREG_BAD_UTF8_ERROR') // PCRE must support UTF-8 + && (ICONV_ENABLED === TRUE OR MB_ENABLED === TRUE) // iconv or mbstring must be installed + && $charset === 'UTF-8' // Application charset must be UTF-8 ) { define('UTF8_ENABLED', TRUE); @@ -98,7 +112,14 @@ class CI_Utf8 { { if ($this->_is_ascii($str) === FALSE) { - $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + if (ICONV_ENABLED) + { + $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + } + elseif (MB_ENABLED) + { + $str = mb_convert_encoding($str, 'UTF-8', 'UTF-8'); + } } return $str; @@ -134,7 +155,7 @@ class CI_Utf8 { */ public function convert_to_utf8($str, $encoding) { - if (function_exists('iconv')) + if (ICONV_ENABLED) { return @iconv($encoding, 'UTF-8', $str); } diff --git a/system/libraries/Email.php b/system/libraries/Email.php index 7d13a4645..93c19de5e 100644 --- a/system/libraries/Email.php +++ b/system/libraries/Email.php @@ -1544,7 +1544,7 @@ class CI_Email { { return mb_encode_mimeheader($str, $this->charset, 'Q', $this->crlf); } - elseif (extension_loaded('iconv')) + elseif (ICONV_ENABLED === TRUE) { $output = @iconv_mime_encode('', $str, array( @@ -1573,9 +1573,9 @@ class CI_Email { isset($chars) OR $chars = strlen($str); $output = '=?'.$this->charset.'?Q?'; - for ($i = 0, $length = strlen($output), $iconv = extension_loaded('iconv'); $i < $chars; $i++) + for ($i = 0, $length = strlen($output); $i < $chars; $i++) { - $chr = ($this->charset === 'UTF-8' && $iconv === TRUE) + $chr = ($this->charset === 'UTF-8' && ICONV_ENABLED === TRUE) ? '='.implode('=', str_split(strtoupper(bin2hex(iconv_substr($str, $i, 1, $this->charset))), 2)) : '='.strtoupper(bin2hex($str[$i])); diff --git a/system/libraries/Trackback.php b/system/libraries/Trackback.php index 7bcb2aa21..9fa4a8edb 100644 --- a/system/libraries/Trackback.php +++ b/system/libraries/Trackback.php @@ -181,7 +181,14 @@ class CI_Trackback { if ($val !== 'url' && MB_ENABLED === TRUE) { - $_POST[$val] = mb_convert_encoding($_POST[$val], $this->charset, $this->data['charset']); + if (MB_ENABLED === TRUE) + { + $_POST[$val] = mb_convert_encoding($_POST[$val], $this->charset, $this->data['charset']); + } + elseif (ICONV_ENABLED === TRUE) + { + $_POST[$val] = @iconv($this->data['charset'], $this->charset.'//IGNORE', $_POST[$val]); + } } $_POST[$val] = ($val !== 'url') ? $this->convert_xml(strip_tags($_POST[$val])) : strip_tags($_POST[$val]); diff --git a/tests/mocks/core/utf8.php b/tests/mocks/core/utf8.php index a43138fbc..9dda43aec 100644 --- a/tests/mocks/core/utf8.php +++ b/tests/mocks/core/utf8.php @@ -16,11 +16,22 @@ class Mock_Core_Utf8 extends CI_Utf8 { { defined('MB_ENABLED') OR define('MB_ENABLED', TRUE); mb_internal_encoding('UTF-8'); + ini_set('mbstring.substitute_character', 'none'); } else { defined('MB_ENABLED') OR define('MB_ENABLED', FALSE); } + + if (extension_loaded('iconv')) + { + defined('ICONV_ENABLED') OR define('ICONV_ENABLED', TRUE); + iconv_set_encoding('internal_encoding', 'UTF-8'); + } + else + { + defined('ICONV_ENABLED') OR define('ICONV_ENABLED', FALSE); + } } public function is_ascii_test($str) diff --git a/user_guide_src/source/changelog.rst b/user_guide_src/source/changelog.rst index 9e63a6885..b5b31dcc2 100644 --- a/user_guide_src/source/changelog.rst +++ b/user_guide_src/source/changelog.rst @@ -396,6 +396,7 @@ Release Date: Not Released - Added support for setting table class defaults in a config file. - :doc:`Zip Library ` method ``read_file()`` can now also alter the original file path/name while adding files to an archive. + - :doc:`Trackback Library ` method ``receive()`` will now utilize ``iconv()`` if it is available but ``mb_convert_encoding()`` is not. - Core @@ -489,9 +490,15 @@ Release Date: Not Released - Language files are now loaded in a cascading style with the one in **system/** always loaded and overriden afterwards, if another one is found. - :doc:`Hooks Library ` changes include: + - Renamed method ``_call_hook()`` to ``call_hook()``. - Class instances are now stored in order to maintain their state. + - UTF-8 Library changes include: + + - ``UTF8_ENABLED`` now requires only one of `Multibyte String `_ or `iconv `_ to be available instead of both. + - Changed method ``clean_string()`` to utilize ``mb_convert_encoding()`` if it is available but ``iconv()`` is not. + - Removed ``CI_CORE`` boolean constant from *CodeIgniter.php* (no longer Reactor and Core versions). - Log Library will now try to create the **log_path** directory if it doesn't exist. - Added support for HTTP-Only cookies with new config option *cookie_httponly* (default FALSE). diff --git a/user_guide_src/source/general/reserved_names.rst b/user_guide_src/source/general/reserved_names.rst index a767651fb..81a05ace6 100644 --- a/user_guide_src/source/general/reserved_names.rst +++ b/user_guide_src/source/general/reserved_names.rst @@ -61,6 +61,9 @@ Constants - APPPATH - VIEWPATH - CI_VERSION +- MB_ENABLED +- ICONV_ENABLED +- UTF8_ENABLED - FILE_READ_MODE - FILE_WRITE_MODE - DIR_READ_MODE -- cgit v1.2.3-24-g4f1b