summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDerek Jones <derek.jones@ellislab.com>2007-07-12 21:12:37 +0200
committerDerek Jones <derek.jones@ellislab.com>2007-07-12 21:12:37 +0200
commit303c9cb958a52ef2d13e985da397cb49590113b0 (patch)
treef27e2fee5fa17bc62eeb03bd39639707af985df4
parent33d1056c454eeb5e4484ff4454f78da9dc2a3ec5 (diff)
added attribute and html entity decode callbacks to xss_clean()
-rw-r--r--system/libraries/Input.php80
1 files changed, 69 insertions, 11 deletions
diff --git a/system/libraries/Input.php b/system/libraries/Input.php
index fcca722b7..ba94d854f 100644
--- a/system/libraries/Input.php
+++ b/system/libraries/Input.php
@@ -519,7 +519,7 @@ class CI_Input {
* @param string
* @return string
*/
- function xss_clean($str, $charset = 'ISO-8859-1')
+ function xss_clean($str)
{
/*
* Remove Null Characters
@@ -564,23 +564,46 @@ class CI_Input {
$str = str_replace('9u3iovBnRThju941s89rKozm', "%20", $str);
/*
- * Convert character entities to ASCII
+ * Convert character entities to ASCII
*
* This permits our tests below to work reliably.
* We only convert entities that are within tags since
* these are the ones that will pose security problems.
*
*/
- if (preg_match_all("/<(.+?)>/si", $str, $matches))
- {
- for ($i = 0; $i < count($matches['0']); $i++)
+
+ $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_attribute_conversion'), $str);
+
+ $str = preg_replace_callback("/<([\w]+)[^>]*>/si", array($this, '_html_entity_decode_callback'), $str);
+
+ /*
+
+ Old Code that when modified to use preg_replace()'s above became more efficient memory-wise
+
+ if (preg_match_all("/[a-z]+=([\'\"]).*?\\1/si", $str, $matches))
+ {
+ for ($i = 0; $i < count($matches[0]); $i++)
+ {
+ if (stristr($matches[0][$i], '>'))
+ {
+ $str = str_replace( $matches['0'][$i],
+ str_replace('>', '&lt;', $matches[0][$i]),
+ $str);
+ }
+ }
+ }
+
+ if (preg_match_all("/<([\w]+)[^>]*>/si", $str, $matches))
+ {
+ for ($i = 0; $i < count($matches[0]); $i++)
{
- $str = str_replace($matches['1'][$i],
- $this->_html_entity_decode($matches['1'][$i], $charset),
+ $str = str_replace($matches[0][$i],
+ $this->_html_entity_decode($matches[0][$i], $charset),
$str);
}
}
-
+ */
+
/*
* Convert all tabs to spaces
*
@@ -801,7 +824,42 @@ class CI_Input {
}
// --------------------------------------------------------------------
-
+
+ /**
+ * Attribute Conversion
+ *
+ * Used as a callback for XSS Clean
+ *
+ * @access public
+ * @param array
+ * @return string
+ */
+ function _attribute_conversion($match)
+ {
+ return str_replace('>', '&lt;', $match[0]);
+ }
+
+ // --------------------------------------------------------------------
+
+ /**
+ * HTML Entity Decode Callback
+ *
+ * Used as a callback for XSS Clean
+ *
+ * @access public
+ * @param array
+ * @return string
+ */
+ function _html_entity_decode_callback($match)
+ {
+ $CI =& get_instance();
+ $charset = $CI->config->item('charset');
+
+ return $this->_html_entity_decode($match[0], strtoupper($charset));
+ }
+
+ // --------------------------------------------------------------------
+
/**
* HTML Entities Decode
*
@@ -826,10 +884,10 @@ class CI_Input {
character set, and the PHP developers said they were not back porting the
fix to versions other than PHP 5.x.
*/
- function _html_entity_decode($str, $charset='ISO-8859-1')
+ function _html_entity_decode($str, $charset='UTF-8')
{
if (stristr($str, '&') === FALSE) return $str;
-
+
// The reason we are not using html_entity_decode() by itself is because
// while it is not technically correct to leave out the semicolon
// at the end of an entity most browsers will still interpret the entity