From bb3edf12d0ca82c62f7b7d570108375ec4379749 Mon Sep 17 00:00:00 2001 From: Andrey Andreev Date: Thu, 20 Feb 2014 17:51:41 +0200 Subject: CI_Utf8-related changes - Give priority to mb_convert_encoding() over iconv() in clean_string() (partially fixes #261) - Add more proper unit tests --- system/core/Utf8.php | 16 ++++---- tests/codeigniter/core/Input_test.php | 2 + tests/codeigniter/core/Utf8_test.php | 70 ++++++++++++++++++++++++++++++++++- tests/mocks/core/utf8.php | 9 +++-- user_guide_src/source/changelog.rst | 3 +- 5 files changed, 86 insertions(+), 14 deletions(-) diff --git a/system/core/Utf8.php b/system/core/Utf8.php index c789a7ba4..9a92efe68 100644 --- a/system/core/Utf8.php +++ b/system/core/Utf8.php @@ -80,13 +80,13 @@ class CI_Utf8 { { if ($this->is_ascii($str) === FALSE) { - if (ICONV_ENABLED) + if (MB_ENABLED) { - $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + $str = mb_convert_encoding($str, 'UTF-8', 'UTF-8'); } - elseif (MB_ENABLED) + elseif (ICONV_ENABLED) { - $str = mb_convert_encoding($str, 'UTF-8', 'UTF-8'); + $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); } } @@ -123,13 +123,13 @@ class CI_Utf8 { */ public function convert_to_utf8($str, $encoding) { - if (ICONV_ENABLED) + if (MB_ENABLED) { - return @iconv($encoding, 'UTF-8', $str); + return mb_convert_encoding($str, 'UTF-8', $encoding); } - elseif (MB_ENABLED === TRUE) + elseif (ICONV_ENABLED) { - return @mb_convert_encoding($str, 'UTF-8', $encoding); + return @iconv($encoding, 'UTF-8', $str); } return FALSE; diff --git a/tests/codeigniter/core/Input_test.php b/tests/codeigniter/core/Input_test.php index 95833fc91..21ff6d81f 100644 --- a/tests/codeigniter/core/Input_test.php +++ b/tests/codeigniter/core/Input_test.php @@ -13,6 +13,8 @@ class Input_test extends CI_TestCase { $this->ci_set_config('csrf_protection', FALSE); $security = new Mock_Core_Security(); + + $this->ci_set_config('charset', 'UTF-8'); $utf8 = new Mock_Core_Utf8(); $this->input = new Mock_Core_Input($security, $utf8); diff --git a/tests/codeigniter/core/Utf8_test.php b/tests/codeigniter/core/Utf8_test.php index 2cf404841..7e6ffd930 100644 --- a/tests/codeigniter/core/Utf8_test.php +++ b/tests/codeigniter/core/Utf8_test.php @@ -4,22 +4,88 @@ class Utf8_test extends CI_TestCase { public function set_up() { + $this->ci_set_config('charset', 'UTF-8'); $this->utf8 = new Mock_Core_Utf8(); + $this->ci_instance_var('utf8', $this->utf8); } // -------------------------------------------------------------------- - public function test_convert_to_utf8() + /** + * __construct() test + * + * @covers CI_Utf8::__construct + */ + public function test___construct() { - $this->assertEquals('ั‚ะตัั‚', $this->utf8->convert_to_utf8('๒ๅ๑๒', 'WINDOWS-1251')); + if (defined('PREG_BAD_UTF8_ERROR') && (ICONV_ENABLED === TRUE OR MB_ENABLED === TRUE) && strtoupper(config_item('charset')) === 'UTF-8') + { + $this->assertTrue(UTF8_ENABLED); + } + else + { + $this->assertFalse(UTF8_ENABLED); + } } // -------------------------------------------------------------------- + /** + * is_ascii() test + * + * Note: DO NOT move this below test_clean_string() + */ public function test_is_ascii() { $this->assertTrue($this->utf8->is_ascii('foo bar')); $this->assertFalse($this->utf8->is_ascii('ั‚ะตัั‚')); } + // -------------------------------------------------------------------- + + /** + * clean_string() test + * + * @depends test_is_ascii + * @covers CI_Utf8::clean_string + */ + public function test_clean_string() + { + $this->assertEquals('foo bar', $this->utf8->clean_string('foo bar')); + + $illegal_utf8 = "\xc0ั‚ะตัั‚"; + if (MB_ENABLED) + { + $this->assertEquals('ั‚ะตัั‚', $this->utf8->clean_string($illegal_utf8)); + } + elseif (ICONV_ENABLED) + { + // This is a known issue, iconv doesn't always work with //IGNORE + $this->assertTrue(in_array($this->utf8->clean_string($illegal_utf8), array('ั‚ะตัั‚', ''), TRUE)); + } + else + { + $this->assertEquals($illegal_utf8, $this->utf8->clean_string($illegal_utf8)); + } + } + + // -------------------------------------------------------------------- + + /** + * convert_to_utf8() test + * + * @covers CI_Utf8::convert_to_utf8 + */ + public function test_convert_to_utf8() + { + if (MB_ENABLED OR ICONV_ENABLED) + { + $this->assertEquals('ั‚ะตัั‚', $this->utf8->convert_to_utf8('๒ๅ๑๒', 'WINDOWS-1251')); + } + else + { + $this->assertFalse($this->utf8->convert_to_utf8('๒ๅ๑๒', 'WINDOWS-1251')); + } + } + } \ No newline at end of file diff --git a/tests/mocks/core/utf8.php b/tests/mocks/core/utf8.php index c8214a62a..3a6282e1d 100644 --- a/tests/mocks/core/utf8.php +++ b/tests/mocks/core/utf8.php @@ -5,12 +5,15 @@ class Mock_Core_Utf8 extends CI_Utf8 { /** * We need to define UTF8_ENABLED the same way that * CI_Utf8 constructor does. - * - * @covers CI_Utf8::__construct() */ public function __construct() { - defined('UTF8_ENABLED') OR define('UTF8_ENABLED', TRUE); + if (defined('UTF8_ENABLED')) + { + return; + } + + parent::__construct(); } } \ No newline at end of file diff --git a/user_guide_src/source/changelog.rst b/user_guide_src/source/changelog.rst index a3e79805f..0a6bd765a 100644 --- a/user_guide_src/source/changelog.rst +++ b/user_guide_src/source/changelog.rst @@ -507,7 +507,7 @@ Release Date: Not Released - UTF-8 Library changes include: - ``UTF8_ENABLED`` now requires only one of `Multibyte String `_ or `iconv `_ to be available instead of both. - - Changed method ``clean_string()`` to utilize ``mb_convert_encoding()`` if it is available but ``iconv()`` is not. + - Changed method ``clean_string()`` to utilize ``mb_convert_encoding()`` if it is available. - Renamed method ``_is_ascii()`` to ``is_ascii()`` and made it public. - Added `compatibility layers ` for: @@ -721,6 +721,7 @@ Bug fixes for 3.0 - Fixed a bug (#43) :doc:`Image Manipulation Library ` method ``text_watermark()`` didn't properly determine watermark placement. - Fixed a bug where :doc:`HTML Table Library ` ignored its *auto_heading* setting if headings were not already set. - Fixed a bug (#2364) - :doc:`Pagination Library ` appended the query string (if used) multiple times when there are successive calls to ``create_links()`` with no ``initialize()`` in between them. +- Partially fixed a bug (#261) - UTF-8 class method ``clean_string()`` generating log messages and/or not producing the desired result due to an upstream bug in iconv. Version 2.1.4 ============= -- cgit v1.2.3-24-g4f1b