summaryrefslogtreecommitdiffstats
path: root/system
diff options
context:
space:
mode:
authorDerek Jones <derek.jones@ellislab.com>2010-03-30 17:33:09 +0200
committerDerek Jones <derek.jones@ellislab.com>2010-03-30 17:33:09 +0200
commita091147feb0331e758b74e7ea13f6ebcb645cd9b (patch)
tree8d79afcabccddd0d292959e4da5db31b45908f5a /system
parent4bde110ec6769fe0fd0194be602fd3ed85c52bc7 (diff)
moved entity_decode() to the Security library to handle an issue with HTML in input when the global XSS filter is enabled
Diffstat (limited to 'system')
-rw-r--r--system/helpers/typography_helper.php38
-rw-r--r--system/libraries/Security.php55
2 files changed, 55 insertions, 38 deletions
diff --git a/system/helpers/typography_helper.php b/system/helpers/typography_helper.php
index 712b57509..e45480e1a 100644
--- a/system/helpers/typography_helper.php
+++ b/system/helpers/typography_helper.php
@@ -76,15 +76,6 @@ if ( ! function_exists('auto_typography'))
*
* This function is a replacement for html_entity_decode()
*
- * In some versions of PHP the native function does not work
- * when UTF-8 is the specified character set, so this gives us
- * a work-around. More info here:
- * http://bugs.php.net/bug.php?id=25670
- *
- * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
- * character set, and the PHP developers said they were not back porting the
- * fix to versions other than PHP 5.x.
- *
* @access public
* @param string
* @return string
@@ -93,32 +84,9 @@ if ( ! function_exists('entity_decode'))
{
function entity_decode($str, $charset='UTF-8')
{
- if (stristr($str, '&') === FALSE) return $str;
-
- // The reason we are not using html_entity_decode() by itself is because
- // while it is not technically correct to leave out the semicolon
- // at the end of an entity most browsers will still interpret the entity
- // correctly. html_entity_decode() does not convert entities without
- // semicolons, so we are left with our own little solution here. Bummer.
-
- if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
- {
- $str = html_entity_decode($str, ENT_COMPAT, $charset);
- $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
- return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
- }
-
- // Numeric Entities
- $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
- $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
-
- // Literal Entities - Slightly slow so we do another check
- if (stristr($str, '&') === FALSE)
- {
- $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
- }
-
- return $str;
+ $CI =& get_instance();
+ $CI->load->library('security');
+ return $CI->security->entity_decode($str, $charset);
}
}
diff --git a/system/libraries/Security.php b/system/libraries/Security.php
index 93da59204..60adf0a27 100644
--- a/system/libraries/Security.php
+++ b/system/libraries/Security.php
@@ -648,14 +648,63 @@ class CI_Security {
*/
function _decode_entity($match)
{
- $CI =& get_instance();
- $CI->load->helper('typography');
- return entity_decode($match[0], strtoupper($CI->config->item('charset')));
+ return $this->entity_decode($match[0], strtoupper(config_item('charset')));
}
// --------------------------------------------------------------------
/**
+ * HTML Entities Decode
+ *
+ * This function is a replacement for html_entity_decode()
+ *
+ * In some versions of PHP the native function does not work
+ * when UTF-8 is the specified character set, so this gives us
+ * a work-around. More info here:
+ * http://bugs.php.net/bug.php?id=25670
+ *
+ * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
+ * character set, and the PHP developers said they were not back porting the
+ * fix to versions other than PHP 5.x.
+ *
+ * @access public
+ * @param string
+ * @param string
+ * @return string
+ */
+ function entity_decode($str, $charset='UTF-8')
+ {
+ if (stristr($str, '&') === FALSE) return $str;
+
+ // The reason we are not using html_entity_decode() by itself is because
+ // while it is not technically correct to leave out the semicolon
+ // at the end of an entity most browsers will still interpret the entity
+ // correctly. html_entity_decode() does not convert entities without
+ // semicolons, so we are left with our own little solution here. Bummer.
+
+ if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR is_php('5.0.0')))
+ {
+ $str = html_entity_decode($str, ENT_COMPAT, $charset);
+ $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
+ return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
+ }
+
+ // Numeric Entities
+ $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
+ $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
+
+ // Literal Entities - Slightly slow so we do another check
+ if (stristr($str, '&') === FALSE)
+ {
+ $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
+ }
+
+ return $str;
+ }
+
+ // --------------------------------------------------------------------
+
+ /**
* Filename Security
*
* @access public