moved entity_decode() to the Security library to handle an issue with HTML in input when the global XSS filter is enabled

author: Derek Jones <derek.jones@ellislab.com> 2010-03-30 17:33:09 +0200
committer: Derek Jones <derek.jones@ellislab.com> 2010-03-30 17:33:09 +0200
commit: a091147feb0331e758b74e7ea13f6ebcb645cd9b (patch)
tree: 8d79afcabccddd0d292959e4da5db31b45908f5a /system/libraries
parent: 4bde110ec6769fe0fd0194be602fd3ed85c52bc7 (diff)
1 files changed, 52 insertions, 3 deletions
diff --git a/system/libraries/Security.php b/system/libraries/Security.php
index 93da59204..60adf0a27 100644
--- a/system/libraries/Security.php
+++ b/system/libraries/Security.php
@@ -648,14 +648,63 @@ class CI_Security {
 	 */
 	function _decode_entity($match)
 	{
-		$CI =& get_instance();
-		$CI->load->helper('typography');
-		return entity_decode($match[0], strtoupper($CI->config->item('charset')));
+		return $this->entity_decode($match[0], strtoupper(config_item('charset')));
 	}
 
 	// --------------------------------------------------------------------
 
 	/**
+	 * HTML Entities Decode
+	 *
+	 * This function is a replacement for html_entity_decode()
+	 *
+	 * In some versions of PHP the native function does not work
+	 * when UTF-8 is the specified character set, so this gives us
+	 * a work-around.  More info here:
+	 * http://bugs.php.net/bug.php?id=25670
+	 *
+	 * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
+	 * character set, and the PHP developers said they were not back porting the
+	 * fix to versions other than PHP 5.x.
+	 *
+	 * @access	public
+	 * @param	string
+	 * @param	string
+	 * @return	string
+	 */
+	function entity_decode($str, $charset='UTF-8')
+	{
+		if (stristr($str, '&') === FALSE) return $str;
+	
+		// The reason we are not using html_entity_decode() by itself is because
+		// while it is not technically correct to leave out the semicolon
+		// at the end of an entity most browsers will still interpret the entity
+		// correctly.  html_entity_decode() does not convert entities without
+		// semicolons, so we are left with our own little solution here. Bummer.
+	
+		if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR is_php('5.0.0')))
+		{
+			$str = html_entity_decode($str, ENT_COMPAT, $charset);
+			$str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
+			return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
+		}
+	
+		// Numeric Entities
+		$str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
+		$str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
+	
+		// Literal Entities - Slightly slow so we do another check
+		if (stristr($str, '&') === FALSE)
+		{
+			$str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
+		}
+	
+		return $str;
+	}
+	
+	// --------------------------------------------------------------------
+	
+	/**
 	 * Filename Security
 	 *
 	 * @access	public
author	Derek Jones <derek.jones@ellislab.com>	2010-03-30 17:33:09 +0200
committer	Derek Jones <derek.jones@ellislab.com>	2010-03-30 17:33:09 +0200
commit	a091147feb0331e758b74e7ea13f6ebcb645cd9b (patch)
tree	8d79afcabccddd0d292959e4da5db31b45908f5a /system/libraries
parent	4bde110ec6769fe0fd0194be602fd3ed85c52bc7 (diff)