From 01f72ca6c27d99938cd1f814f812c5b844d51b83 Mon Sep 17 00:00:00 2001 From: Derek Jones Date: Fri, 4 May 2007 18:19:17 +0000 Subject: Modified XSS Cleaning routine to be more performance friendly and compatible with PHP 5.2's new PCRE backtrack and recursion limits. - replaced link and image tag javascript sanitization preg_replace()'s with callback functions to avoid excessive backtracks on strings with many links / image tags. --- system/libraries/Input.php | 55 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 8 deletions(-) (limited to 'system/libraries/Input.php') diff --git a/system/libraries/Input.php b/system/libraries/Input.php index 64c0ed418..63a6833d6 100644 --- a/system/libraries/Input.php +++ b/system/libraries/Input.php @@ -499,8 +499,10 @@ class CI_Input { * Note: Normally urldecode() would be easier but it removes plus signs * */ + $str = preg_replace("/(%20)+/", '9u3iovBnRThju941s89rKozm', $str); $str = preg_replace("/%u0([a-z0-9]{3})/i", "&#x\\1;", $str); - $str = preg_replace("/%([a-z0-9]{2})/i", "&#x\\1;", $str); + $str = preg_replace("/%([a-z0-9]{2})/i", "&#x\\1;", $str); + $str = str_replace('9u3iovBnRThju941s89rKozm', "%20", $str); /* * Convert character entities to ASCII @@ -575,17 +577,17 @@ class CI_Input { $temp .= substr($word, $i, 1)."\s*"; } - $temp = substr($temp, 0, -3); - $str = preg_replace('#'.$temp.'#s', $word, $str); - $str = preg_replace('#'.ucfirst($temp).'#s', ucfirst($word), $str); + // We only want to do this when it is followed by a non-word character + // That way valid stuff like "dealer to" does not become "dealerto" + $str = preg_replace('#('.substr($temp, 0, -3).')(\W)#ise', "preg_replace('/\s+/s', '', '\\1').'\\2'", $str); } /* * Remove disallowed Javascript in links or img tags */ - $str = preg_replace("#.*?#si", "", $str); - $str = preg_replace("##si", "", $str); - $str = preg_replace("#<(script|xss).*?\>#si", "", $str); + $str = preg_replace_callback("##si", array($this, '_js_link_removal'), $str); + $str = preg_replace_callback("##si", array($this, '_js_img_removal'), $str); + $str = preg_replace("#<(script|xss).*?\>#si", "", $str); /* * Remove JavaScript Event Handlers @@ -595,7 +597,8 @@ class CI_Input { * but it's unlikely to be a problem. * */ - $str = preg_replace('#(<[^>]+.*?)(onblur|onchange|onclick|onfocus|onload|onmouseover|onmouseup|onmousedown|onselect|onsubmit|onunload|onkeypress|onkeydown|onkeyup|onresize)[^>]*>#iU',"\\1>",$str); + $event_handlers = array('onblur','onchange','onclick','onfocus','onload','onmouseover','onmouseup','onmousedown','onselect','onsubmit','onunload','onkeypress','onkeydown','onkeyup','onresize', 'xmlns'); + $str = preg_replace("#<([^>]+)(".implode('|', $event_handlers).")([^>]*)>#iU", "<\\1\\2\\3>", $str); /* * Sanitize naughty HTML elements @@ -652,7 +655,43 @@ class CI_Input { } // -------------------------------------------------------------------- + + /** + * JS Link Removal + * + * Callback function for xss_clean() to sanitize links + * This limits the PCRE backtracks, making it more performance friendly + * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in + * PHP 5.2+ on link-heavy strings + * + * @access private + * @param array + * @return string + */ + function _js_link_removal($match) + { + return preg_replace("#.*?#si", "", $match[0]); + } + + /** + * JS Image Removal + * + * Callback function for xss_clean() to sanitize image tags + * This limits the PCRE backtracks, making it more performance friendly + * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in + * PHP 5.2+ on image tag heavy strings + * + * @access private + * @param array + * @return string + */ + function _js_img_removal($match) + { + return preg_replace("##si", "", $match[0]); + } + // -------------------------------------------------------------------- + /** * HTML Entities Decode * -- cgit v1.2.3-24-g4f1b