diff options
author | Derek Jones <derek.jones@ellislab.com> | 2010-03-02 20:08:02 +0100 |
---|---|---|
committer | Derek Jones <derek.jones@ellislab.com> | 2010-03-02 20:08:02 +0100 |
commit | 98badc154c59a1018ada449ad7100bc97c4fbd52 (patch) | |
tree | 4b76f4b64848cf2582fde8970f5f9ebf37128d5b | |
parent | c68dfbf9df1bd76f608307185ec16f5be4b550f1 (diff) |
adding Unicode class to core
-rw-r--r-- | system/core/Unicode.php | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/system/core/Unicode.php b/system/core/Unicode.php new file mode 100644 index 000000000..c8f1203f7 --- /dev/null +++ b/system/core/Unicode.php @@ -0,0 +1,165 @@ +<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed'); +/** + * CodeIgniter + * + * An open source application development framework for PHP 4.3.2 or newer + * + * @package CodeIgniter + * @author ExpressionEngine Dev Team + * @copyright Copyright (c) 2008 - 2010, EllisLab, Inc. + * @license http://codeigniter.com/user_guide/license.html + * @link http://codeigniter.com + * @since Version 1.0 + * @filesource + */ + +// ------------------------------------------------------------------------ + +/** + * Output Class + * + * Responsible for sending final output to browser + * + * @package CodeIgniter + * @subpackage Libraries + * @category Unicode + * @author ExpressionEngine Dev Team + * @link http://codeigniter.com/user_guide/libraries/unicode.html + */ +class CI_Unicode { + + /** + * Constructor + * + * Determines if UTF-8 support is to be enabled + * + */ + function CI_Unicode() + { + log_message('debug', "Unicode Class Initialized"); + + global $CFG; + + if ( + preg_match('/./u', 'é') === 1 // PCRE must support UTF-8 + AND function_exists('iconv') // iconv must be installed + AND ini_get('mbstring.func_overload') != 1 // Multibyte string function overloading cannot be enabled + AND $CFG->item('charset') == 'UTF-8' // Application charset must be UTF-8 + ) + { + log_message('debug', "Unicode Class - UTF-8 Support Enabled"); + + define('UTF8_ENABLED', TRUE); + + // set internal encoding for multibyte string functions if necessary + // and set a flag so we don't have to repeatedly use extension_loaded() + // or function_exists() + if (extension_loaded('mbstring')) + { + define('MB_ENABLED', TRUE); + mb_internal_encoding('UTF-8'); + } + else + { + define('MB_ENABLED', FALSE); + } + } + else + { + log_message('debug', "Unicode Class - UTF-8 Support Disabled"); + define('UTF8_ENABLED', FALSE); + } + } + + // -------------------------------------------------------------------- + + /** + * Clean UTF-8 strings + * + * Ensures strings are UTF-8 + * + * @access public + * @param string + * @return string + */ + function clean_string($str) + { + if ($this->_is_ascii($str) === FALSE) + { + $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + } + + return $str; + } + + // -------------------------------------------------------------------- + + /** + * Remove ASCII control characters + * + * Removes all ASCII control characters except horizontal tabs, + * line feeds, and carriage returns, as all others can cause + * problems in XML + * + * @access public + * @param string + * @return string + */ + function safe_ascii_for_xml($str) + { + return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str); + } + + // -------------------------------------------------------------------- + + /** + * Convert to UTF-8 + * + * Attempts to convert a string to UTF-8 + * + * @access public + * @param string + * @param string - input encoding + * @return string + */ + function convert_to_utf8($str, $encoding) + { + if (function_exists('iconv')) + { + $str = @iconv($encoding, 'UTF-8', $str); + } + elseif (function_exists('mb_convert_encoding')) + { + $str = @mb_convert_encoding($str, 'UTF-8', $encoding); + } + else + { + return FALSE; + } + + return $str; + } + + // -------------------------------------------------------------------- + + /** + * Is ASCII? + * + * Tests if a string is standard 7-bit ASCII or not + * + * @access public + * @param string + * @return bool + */ + function _is_ascii($str) + { + return (preg_match('/[^\x00-\x7F]/S', $str) == 0); + } + + // -------------------------------------------------------------------- + +} +// End Unicode Class + +/* End of file Unicode.php */ +/* Location: ./system/core/Unicode.php */
\ No newline at end of file |