summaryrefslogtreecommitdiffstats
path: root/system/libraries
diff options
context:
space:
mode:
authorDerek Jones <derek.jones@ellislab.com>2010-03-30 17:33:09 +0200
committerDerek Jones <derek.jones@ellislab.com>2010-03-30 17:33:09 +0200
commita091147feb0331e758b74e7ea13f6ebcb645cd9b (patch)
tree8d79afcabccddd0d292959e4da5db31b45908f5a /system/libraries
parent4bde110ec6769fe0fd0194be602fd3ed85c52bc7 (diff)
moved entity_decode() to the Security library to handle an issue with HTML in input when the global XSS filter is enabled
Diffstat (limited to 'system/libraries')
-rw-r--r--system/libraries/Security.php55
1 files changed, 52 insertions, 3 deletions
diff --git a/system/libraries/Security.php b/system/libraries/Security.php
index 93da59204..60adf0a27 100644
--- a/system/libraries/Security.php
+++ b/system/libraries/Security.php
@@ -648,14 +648,63 @@ class CI_Security {
*/
function _decode_entity($match)
{
- $CI =& get_instance();
- $CI->load->helper('typography');
- return entity_decode($match[0], strtoupper($CI->config->item('charset')));
+ return $this->entity_decode($match[0], strtoupper(config_item('charset')));
}
// --------------------------------------------------------------------
/**
+ * HTML Entities Decode
+ *
+ * This function is a replacement for html_entity_decode()
+ *
+ * In some versions of PHP the native function does not work
+ * when UTF-8 is the specified character set, so this gives us
+ * a work-around. More info here:
+ * http://bugs.php.net/bug.php?id=25670
+ *
+ * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
+ * character set, and the PHP developers said they were not back porting the
+ * fix to versions other than PHP 5.x.
+ *
+ * @access public
+ * @param string
+ * @param string
+ * @return string
+ */
+ function entity_decode($str, $charset='UTF-8')
+ {
+ if (stristr($str, '&') === FALSE) return $str;
+
+ // The reason we are not using html_entity_decode() by itself is because
+ // while it is not technically correct to leave out the semicolon
+ // at the end of an entity most browsers will still interpret the entity
+ // correctly. html_entity_decode() does not convert entities without
+ // semicolons, so we are left with our own little solution here. Bummer.
+
+ if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR is_php('5.0.0')))
+ {
+ $str = html_entity_decode($str, ENT_COMPAT, $charset);
+ $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
+ return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
+ }
+
+ // Numeric Entities
+ $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
+ $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
+
+ // Literal Entities - Slightly slow so we do another check
+ if (stristr($str, '&') === FALSE)
+ {
+ $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
+ }
+
+ return $str;
+ }
+
+ // --------------------------------------------------------------------
+
+ /**
* Filename Security
*
* @access public