From a091147feb0331e758b74e7ea13f6ebcb645cd9b Mon Sep 17 00:00:00 2001 From: Derek Jones Date: Tue, 30 Mar 2010 10:33:09 -0500 Subject: moved entity_decode() to the Security library to handle an issue with HTML in input when the global XSS filter is enabled --- system/helpers/typography_helper.php | 38 ++----------------------- system/libraries/Security.php | 55 ++++++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 38 deletions(-) (limited to 'system') diff --git a/system/helpers/typography_helper.php b/system/helpers/typography_helper.php index 712b57509..e45480e1a 100644 --- a/system/helpers/typography_helper.php +++ b/system/helpers/typography_helper.php @@ -76,15 +76,6 @@ if ( ! function_exists('auto_typography')) * * This function is a replacement for html_entity_decode() * - * In some versions of PHP the native function does not work - * when UTF-8 is the specified character set, so this gives us - * a work-around. More info here: - * http://bugs.php.net/bug.php?id=25670 - * - * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the - * character set, and the PHP developers said they were not back porting the - * fix to versions other than PHP 5.x. - * * @access public * @param string * @return string @@ -93,32 +84,9 @@ if ( ! function_exists('entity_decode')) { function entity_decode($str, $charset='UTF-8') { - if (stristr($str, '&') === FALSE) return $str; - - // The reason we are not using html_entity_decode() by itself is because - // while it is not technically correct to leave out the semicolon - // at the end of an entity most browsers will still interpret the entity - // correctly. html_entity_decode() does not convert entities without - // semicolons, so we are left with our own little solution here. Bummer. - - if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>='))) - { - $str = html_entity_decode($str, ENT_COMPAT, $charset); - $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str); - return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str); - } - - // Numeric Entities - $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str); - $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str); - - // Literal Entities - Slightly slow so we do another check - if (stristr($str, '&') === FALSE) - { - $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES))); - } - - return $str; + $CI =& get_instance(); + $CI->load->library('security'); + return $CI->security->entity_decode($str, $charset); } } diff --git a/system/libraries/Security.php b/system/libraries/Security.php index 93da59204..60adf0a27 100644 --- a/system/libraries/Security.php +++ b/system/libraries/Security.php @@ -648,13 +648,62 @@ class CI_Security { */ function _decode_entity($match) { - $CI =& get_instance(); - $CI->load->helper('typography'); - return entity_decode($match[0], strtoupper($CI->config->item('charset'))); + return $this->entity_decode($match[0], strtoupper(config_item('charset'))); } // -------------------------------------------------------------------- + /** + * HTML Entities Decode + * + * This function is a replacement for html_entity_decode() + * + * In some versions of PHP the native function does not work + * when UTF-8 is the specified character set, so this gives us + * a work-around. More info here: + * http://bugs.php.net/bug.php?id=25670 + * + * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the + * character set, and the PHP developers said they were not back porting the + * fix to versions other than PHP 5.x. + * + * @access public + * @param string + * @param string + * @return string + */ + function entity_decode($str, $charset='UTF-8') + { + if (stristr($str, '&') === FALSE) return $str; + + // The reason we are not using html_entity_decode() by itself is because + // while it is not technically correct to leave out the semicolon + // at the end of an entity most browsers will still interpret the entity + // correctly. html_entity_decode() does not convert entities without + // semicolons, so we are left with our own little solution here. Bummer. + + if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR is_php('5.0.0'))) + { + $str = html_entity_decode($str, ENT_COMPAT, $charset); + $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str); + return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str); + } + + // Numeric Entities + $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str); + $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str); + + // Literal Entities - Slightly slow so we do another check + if (stristr($str, '&') === FALSE) + { + $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES))); + } + + return $str; + } + + // -------------------------------------------------------------------- + /** * Filename Security * -- cgit v1.2.3-24-g4f1b