From 08fef7de41cb06785cdb1024769892d9510e6e6b Mon Sep 17 00:00:00 2001 From: Andrey Andreev Date: Wed, 15 Jan 2014 18:37:01 +0200 Subject: Fix #2799 by adding conditional PCRE UTF-8 support to CI_URI::filter_uri() Also did a tiny micro-optimization in the Utf8 class. --- system/core/Utf8.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'system/core/Utf8.php') diff --git a/system/core/Utf8.php b/system/core/Utf8.php index a78616d40..828a8aeba 100644 --- a/system/core/Utf8.php +++ b/system/core/Utf8.php @@ -66,7 +66,7 @@ class CI_Utf8 { } if ( - @preg_match('/./u', 'é') === 1 // PCRE must support UTF-8 + defined('PREG_BAD_UTF8_ERROR') // PCRE must support UTF-8 && function_exists('iconv') // iconv must be installed && MB_ENABLED === TRUE // mbstring must be enabled && $charset === 'UTF-8' // Application charset must be UTF-8 -- cgit v1.2.3-24-g4f1b From 871754af60251993d640981e107d2def5f2db396 Mon Sep 17 00:00:00 2001 From: darwinel Date: Tue, 11 Feb 2014 17:34:57 +0100 Subject: 2013 > 2014 Update copyright notices from 2013 to 2014. And update one calendar example in user_guide from year 2013/2014 to 2014/2015. --- system/core/Utf8.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'system/core/Utf8.php') diff --git a/system/core/Utf8.php b/system/core/Utf8.php index 828a8aeba..ff3e49139 100644 --- a/system/core/Utf8.php +++ b/system/core/Utf8.php @@ -18,7 +18,7 @@ * * @package CodeIgniter * @author EllisLab Dev Team - * @copyright Copyright (c) 2008 - 2013, EllisLab, Inc. (http://ellislab.com/) + * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (http://ellislab.com/) * @license http://opensource.org/licenses/OSL-3.0 Open Software License (OSL 3.0) * @link http://codeigniter.com * @since Version 2.0 -- cgit v1.2.3-24-g4f1b From be1496d1a8618ef186047468009c7e3e0640183b Mon Sep 17 00:00:00 2001 From: Andrey Andreev Date: Tue, 11 Feb 2014 22:48:45 +0200 Subject: Utf8/iconv/mbstring-related changes --- system/core/Utf8.php | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'system/core/Utf8.php') diff --git a/system/core/Utf8.php b/system/core/Utf8.php index ff3e49139..b58c611e1 100644 --- a/system/core/Utf8.php +++ b/system/core/Utf8.php @@ -59,17 +59,31 @@ class CI_Utf8 { { define('MB_ENABLED', TRUE); mb_internal_encoding($charset); + // This is required for mb_convert_encoding() to strip invalid characters + ini_set('mbstring.substitute_character', 'none'); } else { define('MB_ENABLED', FALSE); } + // Do the same for iconv, which actually has more easy to remember + // predefined constants (such as ICONV_IMPL), but the iconv PHP + // manual page says that using them is "strongly discouraged". + if (extension_loaded('iconv')) + { + define('ICONV_ENABLED', TRUE); + iconv_set_encoding('internal_encoding', $charset); + } + else + { + define('ICONV_ENABLED', FALSE); + } + if ( - defined('PREG_BAD_UTF8_ERROR') // PCRE must support UTF-8 - && function_exists('iconv') // iconv must be installed - && MB_ENABLED === TRUE // mbstring must be enabled - && $charset === 'UTF-8' // Application charset must be UTF-8 + defined('PREG_BAD_UTF8_ERROR') // PCRE must support UTF-8 + && (ICONV_ENABLED === TRUE OR MB_ENABLED === TRUE) // iconv or mbstring must be installed + && $charset === 'UTF-8' // Application charset must be UTF-8 ) { define('UTF8_ENABLED', TRUE); @@ -98,7 +112,14 @@ class CI_Utf8 { { if ($this->_is_ascii($str) === FALSE) { - $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + if (ICONV_ENABLED) + { + $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + } + elseif (MB_ENABLED) + { + $str = mb_convert_encoding($str, 'UTF-8', 'UTF-8'); + } } return $str; @@ -134,7 +155,7 @@ class CI_Utf8 { */ public function convert_to_utf8($str, $encoding) { - if (function_exists('iconv')) + if (ICONV_ENABLED) { return @iconv($encoding, 'UTF-8', $str); } -- cgit v1.2.3-24-g4f1b From eb555ed7a1673dab9f51df0d1365d19c4429a900 Mon Sep 17 00:00:00 2001 From: Andrey Andreev Date: Wed, 12 Feb 2014 19:25:01 +0200 Subject: Move mbstring/iconv configuration and MB_ENABLED, ICONV_ENABLED out of CI_Utf8::__construct() Also, use mb_substitute_character() instead of ini_set() --- system/core/Utf8.php | 36 +++--------------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) (limited to 'system/core/Utf8.php') diff --git a/system/core/Utf8.php b/system/core/Utf8.php index b58c611e1..6ca1a02ca 100644 --- a/system/core/Utf8.php +++ b/system/core/Utf8.php @@ -48,42 +48,10 @@ class CI_Utf8 { */ public function __construct() { - log_message('debug', 'Utf8 Class Initialized'); - - $charset = strtoupper(config_item('charset')); - - // set internal encoding for multibyte string functions if necessary - // and set a flag so we don't have to repeatedly use extension_loaded() - // or function_exists() - if (extension_loaded('mbstring')) - { - define('MB_ENABLED', TRUE); - mb_internal_encoding($charset); - // This is required for mb_convert_encoding() to strip invalid characters - ini_set('mbstring.substitute_character', 'none'); - } - else - { - define('MB_ENABLED', FALSE); - } - - // Do the same for iconv, which actually has more easy to remember - // predefined constants (such as ICONV_IMPL), but the iconv PHP - // manual page says that using them is "strongly discouraged". - if (extension_loaded('iconv')) - { - define('ICONV_ENABLED', TRUE); - iconv_set_encoding('internal_encoding', $charset); - } - else - { - define('ICONV_ENABLED', FALSE); - } - if ( defined('PREG_BAD_UTF8_ERROR') // PCRE must support UTF-8 && (ICONV_ENABLED === TRUE OR MB_ENABLED === TRUE) // iconv or mbstring must be installed - && $charset === 'UTF-8' // Application charset must be UTF-8 + && strnatcasecmp(config_item('charset'), 'UTF-8') === 0 // Application charset must be UTF-8 ) { define('UTF8_ENABLED', TRUE); @@ -94,6 +62,8 @@ class CI_Utf8 { define('UTF8_ENABLED', FALSE); log_message('debug', 'UTF-8 Support Disabled'); } + + log_message('debug', 'Utf8 Class Initialized'); } // -------------------------------------------------------------------- -- cgit v1.2.3-24-g4f1b