summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrey Andreev <narf@devilix.net>2014-02-20 16:51:41 +0100
committerAndrey Andreev <narf@devilix.net>2014-02-20 16:51:41 +0100
commitbb3edf12d0ca82c62f7b7d570108375ec4379749 (patch)
tree8be304b5b58b42698bfaf37e6dd19a451a19fa96
parent10e7a32257b26c5609f4a745e3f80bde00da3a05 (diff)
CI_Utf8-related changes
- Give priority to mb_convert_encoding() over iconv() in clean_string() (partially fixes #261) - Add more proper unit tests
-rw-r--r--system/core/Utf8.php16
-rw-r--r--tests/codeigniter/core/Input_test.php2
-rw-r--r--tests/codeigniter/core/Utf8_test.php70
-rw-r--r--tests/mocks/core/utf8.php9
-rw-r--r--user_guide_src/source/changelog.rst3
5 files changed, 86 insertions, 14 deletions
diff --git a/system/core/Utf8.php b/system/core/Utf8.php
index c789a7ba4..9a92efe68 100644
--- a/system/core/Utf8.php
+++ b/system/core/Utf8.php
@@ -80,13 +80,13 @@ class CI_Utf8 {
{
if ($this->is_ascii($str) === FALSE)
{
- if (ICONV_ENABLED)
+ if (MB_ENABLED)
{
- $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
+ $str = mb_convert_encoding($str, 'UTF-8', 'UTF-8');
}
- elseif (MB_ENABLED)
+ elseif (ICONV_ENABLED)
{
- $str = mb_convert_encoding($str, 'UTF-8', 'UTF-8');
+ $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
}
}
@@ -123,13 +123,13 @@ class CI_Utf8 {
*/
public function convert_to_utf8($str, $encoding)
{
- if (ICONV_ENABLED)
+ if (MB_ENABLED)
{
- return @iconv($encoding, 'UTF-8', $str);
+ return mb_convert_encoding($str, 'UTF-8', $encoding);
}
- elseif (MB_ENABLED === TRUE)
+ elseif (ICONV_ENABLED)
{
- return @mb_convert_encoding($str, 'UTF-8', $encoding);
+ return @iconv($encoding, 'UTF-8', $str);
}
return FALSE;
diff --git a/tests/codeigniter/core/Input_test.php b/tests/codeigniter/core/Input_test.php
index 95833fc91..21ff6d81f 100644
--- a/tests/codeigniter/core/Input_test.php
+++ b/tests/codeigniter/core/Input_test.php
@@ -13,6 +13,8 @@ class Input_test extends CI_TestCase {
$this->ci_set_config('csrf_protection', FALSE);
$security = new Mock_Core_Security();
+
+ $this->ci_set_config('charset', 'UTF-8');
$utf8 = new Mock_Core_Utf8();
$this->input = new Mock_Core_Input($security, $utf8);
diff --git a/tests/codeigniter/core/Utf8_test.php b/tests/codeigniter/core/Utf8_test.php
index 2cf404841..7e6ffd930 100644
--- a/tests/codeigniter/core/Utf8_test.php
+++ b/tests/codeigniter/core/Utf8_test.php
@@ -4,22 +4,88 @@ class Utf8_test extends CI_TestCase {
public function set_up()
{
+ $this->ci_set_config('charset', 'UTF-8');
$this->utf8 = new Mock_Core_Utf8();
+ $this->ci_instance_var('utf8', $this->utf8);
}
// --------------------------------------------------------------------
- public function test_convert_to_utf8()
+ /**
+ * __construct() test
+ *
+ * @covers CI_Utf8::__construct
+ */
+ public function test___construct()
{
- $this->assertEquals('ั‚ะตัั‚', $this->utf8->convert_to_utf8('๒ๅ๑๒', 'WINDOWS-1251'));
+ if (defined('PREG_BAD_UTF8_ERROR') && (ICONV_ENABLED === TRUE OR MB_ENABLED === TRUE) && strtoupper(config_item('charset')) === 'UTF-8')
+ {
+ $this->assertTrue(UTF8_ENABLED);
+ }
+ else
+ {
+ $this->assertFalse(UTF8_ENABLED);
+ }
}
// --------------------------------------------------------------------
+ /**
+ * is_ascii() test
+ *
+ * Note: DO NOT move this below test_clean_string()
+ */
public function test_is_ascii()
{
$this->assertTrue($this->utf8->is_ascii('foo bar'));
$this->assertFalse($this->utf8->is_ascii('ั‚ะตัั‚'));
}
+ // --------------------------------------------------------------------
+
+ /**
+ * clean_string() test
+ *
+ * @depends test_is_ascii
+ * @covers CI_Utf8::clean_string
+ */
+ public function test_clean_string()
+ {
+ $this->assertEquals('foo bar', $this->utf8->clean_string('foo bar'));
+
+ $illegal_utf8 = "\xc0ั‚ะตัั‚";
+ if (MB_ENABLED)
+ {
+ $this->assertEquals('ั‚ะตัั‚', $this->utf8->clean_string($illegal_utf8));
+ }
+ elseif (ICONV_ENABLED)
+ {
+ // This is a known issue, iconv doesn't always work with //IGNORE
+ $this->assertTrue(in_array($this->utf8->clean_string($illegal_utf8), array('ั‚ะตัั‚', ''), TRUE));
+ }
+ else
+ {
+ $this->assertEquals($illegal_utf8, $this->utf8->clean_string($illegal_utf8));
+ }
+ }
+
+ // --------------------------------------------------------------------
+
+ /**
+ * convert_to_utf8() test
+ *
+ * @covers CI_Utf8::convert_to_utf8
+ */
+ public function test_convert_to_utf8()
+ {
+ if (MB_ENABLED OR ICONV_ENABLED)
+ {
+ $this->assertEquals('ั‚ะตัั‚', $this->utf8->convert_to_utf8('๒ๅ๑๒', 'WINDOWS-1251'));
+ }
+ else
+ {
+ $this->assertFalse($this->utf8->convert_to_utf8('๒ๅ๑๒', 'WINDOWS-1251'));
+ }
+ }
+
} \ No newline at end of file
diff --git a/tests/mocks/core/utf8.php b/tests/mocks/core/utf8.php
index c8214a62a..3a6282e1d 100644
--- a/tests/mocks/core/utf8.php
+++ b/tests/mocks/core/utf8.php
@@ -5,12 +5,15 @@ class Mock_Core_Utf8 extends CI_Utf8 {
/**
* We need to define UTF8_ENABLED the same way that
* CI_Utf8 constructor does.
- *
- * @covers CI_Utf8::__construct()
*/
public function __construct()
{
- defined('UTF8_ENABLED') OR define('UTF8_ENABLED', TRUE);
+ if (defined('UTF8_ENABLED'))
+ {
+ return;
+ }
+
+ parent::__construct();
}
} \ No newline at end of file
diff --git a/user_guide_src/source/changelog.rst b/user_guide_src/source/changelog.rst
index a3e79805f..0a6bd765a 100644
--- a/user_guide_src/source/changelog.rst
+++ b/user_guide_src/source/changelog.rst
@@ -507,7 +507,7 @@ Release Date: Not Released
- UTF-8 Library changes include:
- ``UTF8_ENABLED`` now requires only one of `Multibyte String <http://php.net/mbstring>`_ or `iconv <http://php.net/iconv>`_ to be available instead of both.
- - Changed method ``clean_string()`` to utilize ``mb_convert_encoding()`` if it is available but ``iconv()`` is not.
+ - Changed method ``clean_string()`` to utilize ``mb_convert_encoding()`` if it is available.
- Renamed method ``_is_ascii()`` to ``is_ascii()`` and made it public.
- Added `compatibility layers <general/compatibility_functions>` for:
@@ -721,6 +721,7 @@ Bug fixes for 3.0
- Fixed a bug (#43) :doc:`Image Manipulation Library <libraries/image_lib>` method ``text_watermark()`` didn't properly determine watermark placement.
- Fixed a bug where :doc:`HTML Table Library <libraries/table>` ignored its *auto_heading* setting if headings were not already set.
- Fixed a bug (#2364) - :doc:`Pagination Library <libraries/pagination>` appended the query string (if used) multiple times when there are successive calls to ``create_links()`` with no ``initialize()`` in between them.
+- Partially fixed a bug (#261) - UTF-8 class method ``clean_string()`` generating log messages and/or not producing the desired result due to an upstream bug in iconv.
Version 2.1.4
=============