From 48bb32aece18e9dce381602e242609adfc71b0d0 Mon Sep 17 00:00:00 2001 From: Derek Jones Date: Thu, 12 Jul 2007 13:10:42 +0000 Subject: further xss_clean() enhancements --- system/libraries/Input.php | 97 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 30 deletions(-) (limited to 'system') diff --git a/system/libraries/Input.php b/system/libraries/Input.php index 33f288688..fcca722b7 100644 --- a/system/libraries/Input.php +++ b/system/libraries/Input.php @@ -538,15 +538,15 @@ class CI_Input { * the conversion of entities to ASCII later. * */ - $str = preg_replace('#(&\#*\w+)[\x00-\x20]+;#u',"\\1;",$str); + $str = preg_replace('#(&\#?[0-9a-z]+)[\x00-\x20]*;?#i', "\\1;", $str); /* - * Validate UTF16 two byte encoding (x00) + * Validate UTF16 two byte encoding (x00) * * Just as above, adds a semicolon if missing. * */ - $str = preg_replace('#(&\#x*)([0-9A-F]+);*#iu',"\\1\\2;",$str); + $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str); /* * URL Decode @@ -580,37 +580,50 @@ class CI_Input { $str); } } - + + /* + * Convert all tabs to spaces + * + * This prevents strings like this: ja vascript + * NOTE: we deal with spaces between characters later. + * NOTE: preg_replace was found to be amazingly slow here on large blocks of data, + * so we use str_replace. + * + */ + + $str = str_replace("\t", " ", $str); + /* * Not Allowed Under Any Conditions */ $bad = array( 'document.cookie' => '[removed]', + 'document.write' => '[removed]', '.parentNode' => '[removed]', '.innerHTML' => '[removed]', - 'document.write' => '[removed]', 'window.location' => '[removed]', + '-moz-binding' => '[removed]', + '' => '-->', + ' '<![CDATA[' + ); + + foreach ($bad as $key => $val) + { + $str = str_replace($key, $val, $str); + } + + $bad = array( "javascript\s*:" => '[removed]', "expression\s*\(" => '[removed]', // CSS and IE - "Redirect\s+302" => '[removed]', - '' => '-->' + "Redirect\s+302" => '[removed]' ); - + foreach ($bad as $key => $val) { $str = preg_replace("#".$key."#i", $val, $str); } - /* - * Convert all tabs to spaces - * - * This prevents strings like this: ja vascript - * Note: we deal with spaces between characters later. - * - */ - $str = preg_replace("#\t+#", " ", $str); - /* * Makes PHP tags safe * @@ -621,7 +634,7 @@ class CI_Input { * But it doesn't seem to pose a problem. * */ - $str = str_replace(array(''), array('<?php', '<?PHP', '<?', '?>'), $str); + $str = str_replace(array(''), array('<?php', '<?PHP', '<?', '?>'), $str); /* * Compact any exploded words @@ -650,10 +663,24 @@ class CI_Input { do { $original = $str; - - $str = preg_replace_callback("##si", array($this, '_js_link_removal'), $str); - $str = preg_replace_callback("##si", array($this, '_js_img_removal'), $str); - $str = preg_replace("##si", "", $str); + + if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '') !== FALSE) OR + preg_match("/<\/a>/i", $str)) + { + $str = preg_replace_callback("##si", array($this, '_js_link_removal'), $str); + } + + if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && stripos($str, '#si", array($this, '_js_img_removal'), $str); + } + + if ((version_compare(PHP_VERSION, '5.0', '>=') === TRUE && (stripos($str, 'script') !== FALSE OR stripos($str, 'xss') !== FALSE)) OR + preg_match("/(script|xss)/i", $str)) + { + $str = preg_replace("##si", "", $str); + } } while($original != $str); @@ -706,20 +733,30 @@ class CI_Input { */ $bad = array( 'document.cookie' => '[removed]', + 'document.write' => '[removed]', '.parentNode' => '[removed]', '.innerHTML' => '[removed]', - 'document.write' => '[removed]', 'window.location' => '[removed]', + '-moz-binding' => '[removed]', + '' => '-->', + ' '<![CDATA[' + ); + + foreach ($bad as $key => $val) + { + $str = str_replace($key, $val, $str); + } + + $bad = array( "javascript\s*:" => '[removed]', "expression\s*\(" => '[removed]', // CSS and IE - "Redirect\s+302" => '[removed]', - '' => '-->' + "Redirect\s+302" => '[removed]' ); - + foreach ($bad as $key => $val) { - $str = preg_replace("#".$key."#i", $val, $str); + $str = preg_replace("#".$key."#i", $val, $str); } @@ -764,7 +801,7 @@ class CI_Input { } // -------------------------------------------------------------------- - + /** * HTML Entities Decode * -- cgit v1.2.3-24-g4f1b