summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDerek Jones <derek.jones@ellislab.com>2008-05-15 22:13:14 +0200
committerDerek Jones <derek.jones@ellislab.com>2008-05-15 22:13:14 +0200
commit63fc5fe5c6d8c9c8a2d693b0f65c3c8af8f2a74f (patch)
tree49410adf5480f72cf7df6599538b833abc5e31d0
parent66c474d848907bb91c316e9acbbcaba6a2591032 (diff)
added ability to use xss_clean() to test images, and improved security for vectors particular to the Opera family of browsers
-rw-r--r--system/libraries/Input.php86
-rw-r--r--user_guide/changelog.html6
-rw-r--r--user_guide/libraries/input.html5
3 files changed, 60 insertions, 37 deletions
diff --git a/system/libraries/Input.php b/system/libraries/Input.php
index ec06101e6..e6ac460b0 100644
--- a/system/libraries/Input.php
+++ b/system/libraries/Input.php
@@ -554,7 +554,7 @@ class CI_Input {
* @param string
* @return string
*/
- function xss_clean($str)
+ function xss_clean($str, $is_image = FALSE, $loops = 0)
{
/*
* Is the string an array?
@@ -569,6 +569,15 @@ class CI_Input {
return $str;
}
+
+ /*
+ * Runaway loop prevention. If the text has had to be examined this many times
+ * I think it's safe to say that it is best to simply ignore it.
+ */
+ if ($loops > 9)
+ {
+ return '';
+ }
/*
* Remove Null Characters
@@ -608,7 +617,6 @@ class CI_Input {
/*
* Un-Protect GET variables in URLs
*/
-
$str = str_replace($this->xss_hash(), '&', $str);
/*
@@ -622,7 +630,7 @@ class CI_Input {
*
*/
$str = rawurldecode($str);
-
+
/*
* Convert character entities to ASCII
*
@@ -634,35 +642,7 @@ class CI_Input {
$str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_attribute_conversion'), $str);
- $str = preg_replace_callback("/<([\w]+)[^>]*>/si", array($this, '_html_entity_decode_callback'), $str);
-
- /*
-
- Old Code that when modified to use preg_replace()'s above became more efficient memory-wise
-
- if (preg_match_all("/[a-z]+=([\'\"]).*?\\1/si", $str, $matches))
- {
- for ($i = 0; $i < count($matches[0]); $i++)
- {
- if (stristr($matches[0][$i], '>'))
- {
- $str = str_replace( $matches['0'][$i],
- str_replace('>', '&lt;', $matches[0][$i]),
- $str);
- }
- }
- }
-
- if (preg_match_all("/<([\w]+)[^>]*>/si", $str, $matches))
- {
- for ($i = 0; $i < count($matches[0]); $i++)
- {
- $str = str_replace($matches[0][$i],
- $this->_html_entity_decode($matches[0][$i], $charset),
- $str);
- }
- }
- */
+ $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_html_entity_decode_callback'), $str);
/*
* Convert all tabs to spaces
@@ -679,6 +659,9 @@ class CI_Input {
$str = str_replace("\t", ' ', $str);
}
+ // capture for comparison at the end
+ $converted_string = $str;
+
/*
* Not Allowed Under Any Conditions
*/
@@ -808,10 +791,39 @@ class CI_Input {
foreach ($this->never_allowed_regex as $key => $val)
{
- $str = preg_replace("#".$key."#i", $val, $str);
+ $str = preg_replace("#".$key."#i", $val, $str);
}
+ /*
+ * Images are Handled in a Special Way
+ * - Essentially, we want to know that after all of the character conversion is done whether
+ * any unwanted, likely XSS, code was found. If not, we return TRUE, as the image is clean.
+ * However, if the string post-conversion does not matched the string post-removal of XSS,
+ * then it fails, as there was unwanted XSS code found and removed/changed during processing.
+ */
+ if ($is_image === TRUE)
+ {
+ if ($str == $converted_string)
+ {
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+ }
+
+ /*
+ * If something changed after character conversion, we can be fairly confident that something
+ * malicious was removed, so let's take no chances that the attacker is counting on specific
+ * mutations taking place to allow a new attack to reveal itself. So say we all.
+ */
+ if ($converted_string != $str)
+ {
+ $str = $this->xss_clean($str, $is_image, ++$loops);
+ }
+
log_message('debug', "XSS Filtering completed");
return $str;
}
@@ -884,7 +896,7 @@ class CI_Input {
*/
function _js_link_removal($match)
{
- return preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>.*?</a>#si", "", $match[0]);
+ return preg_replace("#<a.+?href=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss|base64\s*,).*?\>.*?</a>#si", "", $match[0]);
}
/**
@@ -901,7 +913,7 @@ class CI_Input {
*/
function _js_img_removal($match)
{
- return preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss).*?\>#si", "", $match[0]);
+ return preg_replace("#<img.+?src=.*?(alert\(|alert&\#40;|javascript\:|window\.|document\.|\.cookie|<script|<xss|base64\s*,).*?\>#si", "", $match[0]);
}
// --------------------------------------------------------------------
@@ -978,12 +990,12 @@ class CI_Input {
if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
{
$str = html_entity_decode($str, ENT_COMPAT, $charset);
- $str = preg_replace('~&#x([0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
+ $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
}
// Numeric Entities
- $str = preg_replace('~&#x([0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
+ $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
$str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
// Literal Entities - Slightly slow so we do another check
diff --git a/user_guide/changelog.html b/user_guide/changelog.html
index 28eb39d3b..e15cfc4f2 100644
--- a/user_guide/changelog.html
+++ b/user_guide/changelog.html
@@ -71,6 +71,12 @@ SVN Revision: not currently released</p>
<li>Set the mime type check in the <a href="libraries/file_uploading.html">Upload class</a> to reference the global mimes variable.</li>
</ul>
</li>
+ <li>Other changes
+ <ul>
+ <li>Added ability to <a href="libraries/input.html">use xss_clean() to test images</a> for XSS, useful for upload security.</li>
+ <li>Improved security in xss_clean() for the Opera family of browsers.</li>
+ </ul>
+ </li>
</ul>
diff --git a/user_guide/libraries/input.html b/user_guide/libraries/input.html
index 17ed7f0ff..c95ebbd98 100644
--- a/user_guide/libraries/input.html
+++ b/user_guide/libraries/input.html
@@ -109,7 +109,12 @@ Note: This function should only be used to deal with data upon submission. It's
<p>Note: If you use the form validation class, it gives you the option of XSS filtering as well.</p>
+<p>An optional second parameter, <dfn>is_image</dfn>, allows this function to be used to test images for potential XSS attacks, useful for file upload security. When this second parameter is set to <dfn>TRUE</dfn>, instead of returning an altered string, the function returns TRUE if the image is safe, and FALSE if it contained potentially malicious information that a browser may attempt to execute.</p>
+<code>if ($this->input->xss_clean($file, TRUE) === FALSE)<br />
+{<br />
+&nbsp;&nbsp;&nbsp;&nbsp;// file failed the XSS test<br />
+}</code>
<h2>Using POST, COOKIE, or SERVER Data</h2>