From 98badc154c59a1018ada449ad7100bc97c4fbd52 Mon Sep 17 00:00:00 2001 From: Derek Jones Date: Tue, 2 Mar 2010 13:08:02 -0600 Subject: adding Unicode class to core --- system/core/Unicode.php | 165 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 system/core/Unicode.php (limited to 'system/core') diff --git a/system/core/Unicode.php b/system/core/Unicode.php new file mode 100644 index 000000000..c8f1203f7 --- /dev/null +++ b/system/core/Unicode.php @@ -0,0 +1,165 @@ +item('charset') == 'UTF-8' // Application charset must be UTF-8 + ) + { + log_message('debug', "Unicode Class - UTF-8 Support Enabled"); + + define('UTF8_ENABLED', TRUE); + + // set internal encoding for multibyte string functions if necessary + // and set a flag so we don't have to repeatedly use extension_loaded() + // or function_exists() + if (extension_loaded('mbstring')) + { + define('MB_ENABLED', TRUE); + mb_internal_encoding('UTF-8'); + } + else + { + define('MB_ENABLED', FALSE); + } + } + else + { + log_message('debug', "Unicode Class - UTF-8 Support Disabled"); + define('UTF8_ENABLED', FALSE); + } + } + + // -------------------------------------------------------------------- + + /** + * Clean UTF-8 strings + * + * Ensures strings are UTF-8 + * + * @access public + * @param string + * @return string + */ + function clean_string($str) + { + if ($this->_is_ascii($str) === FALSE) + { + $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + } + + return $str; + } + + // -------------------------------------------------------------------- + + /** + * Remove ASCII control characters + * + * Removes all ASCII control characters except horizontal tabs, + * line feeds, and carriage returns, as all others can cause + * problems in XML + * + * @access public + * @param string + * @return string + */ + function safe_ascii_for_xml($str) + { + return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str); + } + + // -------------------------------------------------------------------- + + /** + * Convert to UTF-8 + * + * Attempts to convert a string to UTF-8 + * + * @access public + * @param string + * @param string - input encoding + * @return string + */ + function convert_to_utf8($str, $encoding) + { + if (function_exists('iconv')) + { + $str = @iconv($encoding, 'UTF-8', $str); + } + elseif (function_exists('mb_convert_encoding')) + { + $str = @mb_convert_encoding($str, 'UTF-8', $encoding); + } + else + { + return FALSE; + } + + return $str; + } + + // -------------------------------------------------------------------- + + /** + * Is ASCII? + * + * Tests if a string is standard 7-bit ASCII or not + * + * @access public + * @param string + * @return bool + */ + function _is_ascii($str) + { + return (preg_match('/[^\x00-\x7F]/S', $str) == 0); + } + + // -------------------------------------------------------------------- + +} +// End Unicode Class + +/* End of file Unicode.php */ +/* Location: ./system/core/Unicode.php */ \ No newline at end of file -- cgit v1.2.3-24-g4f1b