summaryrefslogtreecommitdiffstats
path: root/system
diff options
context:
space:
mode:
authorDerek Jones <derek.jones@ellislab.com>2010-03-02 20:08:02 +0100
committerDerek Jones <derek.jones@ellislab.com>2010-03-02 20:08:02 +0100
commit98badc154c59a1018ada449ad7100bc97c4fbd52 (patch)
tree4b76f4b64848cf2582fde8970f5f9ebf37128d5b /system
parentc68dfbf9df1bd76f608307185ec16f5be4b550f1 (diff)
adding Unicode class to core
Diffstat (limited to 'system')
-rw-r--r--system/core/Unicode.php165
1 files changed, 165 insertions, 0 deletions
diff --git a/system/core/Unicode.php b/system/core/Unicode.php
new file mode 100644
index 000000000..c8f1203f7
--- /dev/null
+++ b/system/core/Unicode.php
@@ -0,0 +1,165 @@
+<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
+/**
+ * CodeIgniter
+ *
+ * An open source application development framework for PHP 4.3.2 or newer
+ *
+ * @package CodeIgniter
+ * @author ExpressionEngine Dev Team
+ * @copyright Copyright (c) 2008 - 2010, EllisLab, Inc.
+ * @license http://codeigniter.com/user_guide/license.html
+ * @link http://codeigniter.com
+ * @since Version 1.0
+ * @filesource
+ */
+
+// ------------------------------------------------------------------------
+
+/**
+ * Output Class
+ *
+ * Responsible for sending final output to browser
+ *
+ * @package CodeIgniter
+ * @subpackage Libraries
+ * @category Unicode
+ * @author ExpressionEngine Dev Team
+ * @link http://codeigniter.com/user_guide/libraries/unicode.html
+ */
+class CI_Unicode {
+
+ /**
+ * Constructor
+ *
+ * Determines if UTF-8 support is to be enabled
+ *
+ */
+ function CI_Unicode()
+ {
+ log_message('debug', "Unicode Class Initialized");
+
+ global $CFG;
+
+ if (
+ preg_match('/./u', 'é') === 1 // PCRE must support UTF-8
+ AND function_exists('iconv') // iconv must be installed
+ AND ini_get('mbstring.func_overload') != 1 // Multibyte string function overloading cannot be enabled
+ AND $CFG->item('charset') == 'UTF-8' // Application charset must be UTF-8
+ )
+ {
+ log_message('debug', "Unicode Class - UTF-8 Support Enabled");
+
+ define('UTF8_ENABLED', TRUE);
+
+ // set internal encoding for multibyte string functions if necessary
+ // and set a flag so we don't have to repeatedly use extension_loaded()
+ // or function_exists()
+ if (extension_loaded('mbstring'))
+ {
+ define('MB_ENABLED', TRUE);
+ mb_internal_encoding('UTF-8');
+ }
+ else
+ {
+ define('MB_ENABLED', FALSE);
+ }
+ }
+ else
+ {
+ log_message('debug', "Unicode Class - UTF-8 Support Disabled");
+ define('UTF8_ENABLED', FALSE);
+ }
+ }
+
+ // --------------------------------------------------------------------
+
+ /**
+ * Clean UTF-8 strings
+ *
+ * Ensures strings are UTF-8
+ *
+ * @access public
+ * @param string
+ * @return string
+ */
+ function clean_string($str)
+ {
+ if ($this->_is_ascii($str) === FALSE)
+ {
+ $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
+ }
+
+ return $str;
+ }
+
+ // --------------------------------------------------------------------
+
+ /**
+ * Remove ASCII control characters
+ *
+ * Removes all ASCII control characters except horizontal tabs,
+ * line feeds, and carriage returns, as all others can cause
+ * problems in XML
+ *
+ * @access public
+ * @param string
+ * @return string
+ */
+ function safe_ascii_for_xml($str)
+ {
+ return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
+ }
+
+ // --------------------------------------------------------------------
+
+ /**
+ * Convert to UTF-8
+ *
+ * Attempts to convert a string to UTF-8
+ *
+ * @access public
+ * @param string
+ * @param string - input encoding
+ * @return string
+ */
+ function convert_to_utf8($str, $encoding)
+ {
+ if (function_exists('iconv'))
+ {
+ $str = @iconv($encoding, 'UTF-8', $str);
+ }
+ elseif (function_exists('mb_convert_encoding'))
+ {
+ $str = @mb_convert_encoding($str, 'UTF-8', $encoding);
+ }
+ else
+ {
+ return FALSE;
+ }
+
+ return $str;
+ }
+
+ // --------------------------------------------------------------------
+
+ /**
+ * Is ASCII?
+ *
+ * Tests if a string is standard 7-bit ASCII or not
+ *
+ * @access public
+ * @param string
+ * @return bool
+ */
+ function _is_ascii($str)
+ {
+ return (preg_match('/[^\x00-\x7F]/S', $str) == 0);
+ }
+
+ // --------------------------------------------------------------------
+
+}
+// End Unicode Class
+
+/* End of file Unicode.php */
+/* Location: ./system/core/Unicode.php */ \ No newline at end of file