diff options
author | Phil Sturgeon <email@philsturgeon.co.uk> | 2011-02-02 22:19:25 +0100 |
---|---|---|
committer | Phil Sturgeon <email@philsturgeon.co.uk> | 2011-02-02 22:19:25 +0100 |
commit | d88b31550ae2aeb0e3bcc11ba82d4838f8a5fd31 (patch) | |
tree | 792bff97d41430b5c36a08c8ec3ac54db97375b6 /system/core/Utf8.php | |
parent | 5c59c7dc3254616b18057922ce012f22c18b147b (diff) | |
parent | 75f5ff5d99533a423e68686d89889d172c37d98e (diff) |
Merged recent changes and tweaked multi-env changes.
Diffstat (limited to 'system/core/Utf8.php')
-rw-r--r-- | system/core/Utf8.php | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/system/core/Utf8.php b/system/core/Utf8.php new file mode 100644 index 000000000..5d5a7ef72 --- /dev/null +++ b/system/core/Utf8.php @@ -0,0 +1,165 @@ +<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed'); +/** + * CodeIgniter + * + * An open source application development framework for PHP 5.1.6 or newer + * + * @package CodeIgniter + * @author ExpressionEngine Dev Team + * @copyright Copyright (c) 2008 - 2011, EllisLab, Inc. + * @license http://codeigniter.com/user_guide/license.html + * @link http://codeigniter.com + * @since Version 2.0 + * @filesource + */ + +// ------------------------------------------------------------------------ + +/** + * Utf8 Class + * + * Provides support for UTF-8 environments + * + * @package CodeIgniter + * @subpackage Libraries + * @category UTF-8 + * @author ExpressionEngine Dev Team + * @link http://codeigniter.com/user_guide/libraries/utf8.html + */ +class CI_Utf8 { + + /** + * Constructor + * + * Determines if UTF-8 support is to be enabled + * + */ + function __construct() + { + log_message('debug', "Utf8 Class Initialized"); + + global $CFG; + + if ( + preg_match('/./u', 'é') === 1 // PCRE must support UTF-8 + AND function_exists('iconv') // iconv must be installed + AND ini_get('mbstring.func_overload') != 1 // Multibyte string function overloading cannot be enabled + AND $CFG->item('charset') == 'UTF-8' // Application charset must be UTF-8 + ) + { + log_message('debug', "UTF-8 Support Enabled"); + + define('UTF8_ENABLED', TRUE); + + // set internal encoding for multibyte string functions if necessary + // and set a flag so we don't have to repeatedly use extension_loaded() + // or function_exists() + if (extension_loaded('mbstring')) + { + define('MB_ENABLED', TRUE); + mb_internal_encoding('UTF-8'); + } + else + { + define('MB_ENABLED', FALSE); + } + } + else + { + log_message('debug', "UTF-8 Support Disabled"); + define('UTF8_ENABLED', FALSE); + } + } + + // -------------------------------------------------------------------- + + /** + * Clean UTF-8 strings + * + * Ensures strings are UTF-8 + * + * @access public + * @param string + * @return string + */ + function clean_string($str) + { + if ($this->_is_ascii($str) === FALSE) + { + $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + } + + return $str; + } + + // -------------------------------------------------------------------- + + /** + * Remove ASCII control characters + * + * Removes all ASCII control characters except horizontal tabs, + * line feeds, and carriage returns, as all others can cause + * problems in XML + * + * @access public + * @param string + * @return string + */ + function safe_ascii_for_xml($str) + { + return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str); + } + + // -------------------------------------------------------------------- + + /** + * Convert to UTF-8 + * + * Attempts to convert a string to UTF-8 + * + * @access public + * @param string + * @param string - input encoding + * @return string + */ + function convert_to_utf8($str, $encoding) + { + if (function_exists('iconv')) + { + $str = @iconv($encoding, 'UTF-8', $str); + } + elseif (function_exists('mb_convert_encoding')) + { + $str = @mb_convert_encoding($str, 'UTF-8', $encoding); + } + else + { + return FALSE; + } + + return $str; + } + + // -------------------------------------------------------------------- + + /** + * Is ASCII? + * + * Tests if a string is standard 7-bit ASCII or not + * + * @access public + * @param string + * @return bool + */ + function _is_ascii($str) + { + return (preg_match('/[^\x00-\x7F]/S', $str) == 0); + } + + // -------------------------------------------------------------------- + +} +// End Utf8 Class + +/* End of file Utf8.php */ +/* Location: ./system/core/Utf8.php */
\ No newline at end of file |