diff options
Diffstat (limited to 'system/core/Security.php')
-rw-r--r-- | system/core/Security.php | 188 |
1 files changed, 111 insertions, 77 deletions
diff --git a/system/core/Security.php b/system/core/Security.php index 7c5199255..ab85e2239 100644 --- a/system/core/Security.php +++ b/system/core/Security.php @@ -436,7 +436,7 @@ class CI_Security { $words = array( 'javascript', 'expression', 'vbscript', 'jscript', 'wscript', 'vbs', 'script', 'base64', 'applet', 'alert', 'document', - 'write', 'cookie', 'window', 'confirm', 'prompt' + 'write', 'cookie', 'window', 'confirm', 'prompt', 'eval' ); foreach ($words as $word) @@ -480,12 +480,8 @@ class CI_Security { } } while ($original !== $str); - unset($original); - // Remove evil attributes such as style, onclick and xmlns - $str = $this->_remove_evil_attributes($str, $is_image); - /* * Sanitize naughty HTML elements * @@ -495,8 +491,29 @@ class CI_Security { * So this: <blink> * Becomes: <blink> */ - $naughty = 'alert|prompt|confirm|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|button|select|isindex|layer|link|meta|keygen|object|plaintext|style|script|textarea|title|math|video|svg|xml|xss'; - $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str); + $pattern = '#' + .'<((?<slash>/*\s*)(?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)' // tag start and name, followed by a non-tag character + .'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator + // optional attributes + .'(?<attributes>(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons + .'[^\s\042\047>/=]+' // attribute characters + // optional attribute-value + .'(?:\s*=' // attribute-value separator + .'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value + .')?' // end optional attribute-value group + .')*)' // end optional attributes group + .'[^>]*)(?<closeTag>\>)?#isS'; + + // Note: It would be nice to optimize this for speed, BUT + // only matching the naughty elements here results in + // false positives and in turn - vulnerabilities! + do + { + $old_str = $str; + $str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str); + } + while ($old_str !== $str); + unset($old_str); /* * Sanitize naughty scripting elements @@ -510,9 +527,11 @@ class CI_Security { * For example: eval('some code') * Becomes: eval('some code') */ - $str = preg_replace('#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', - '\\1\\2(\\3)', - $str); + $str = preg_replace( + '#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', + '\\1\\2(\\3)', + $str + ); // Final clean up // This adds a bit of extra precaution in case @@ -750,58 +769,6 @@ class CI_Security { // -------------------------------------------------------------------- /** - * Remove Evil HTML Attributes (like event handlers and style) - * - * It removes the evil attribute and either: - * - * - Everything up until a space. For example, everything between the pipes: - * - * <code> - * <a |style=document.write('hello');alert('world');| class=link> - * </code> - * - * - Everything inside the quotes. For example, everything between the pipes: - * - * <code> - * <a |style="document.write('hello'); alert('world');"| class="link"> - * </code> - * - * @param string $str The string to check - * @param bool $is_image Whether the input is an image - * @return string The string with the evil attributes removed - */ - protected function _remove_evil_attributes($str, $is_image) - { - $evil_attributes = array('on\w*', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'); - - if ($is_image === TRUE) - { - /* - * Adobe Photoshop puts XML metadata into JFIF images, - * including namespacing, so we have to allow this for images. - */ - unset($evil_attributes[array_search('xmlns', $evil_attributes)]); - } - - do { - $count = $temp_count = 0; - - // replace occurrences of illegal attribute strings with quotes (042 and 047 are octal quotes) - $str = preg_replace('/(<[^>]+)(?<!\w)('.implode('|', $evil_attributes).')\s*=\s*(\042|\047)([^\\2]*?)(\\2)/is', '$1[removed]', $str, -1, $temp_count); - $count += $temp_count; - - // find occurrences of illegal attribute strings without quotes - $str = preg_replace('/(<[^>]+)(?<!\w)('.implode('|', $evil_attributes).')\s*=\s*([^\s>]*)/is', '$1[removed]', $str, -1, $temp_count); - $count += $temp_count; - } - while ($count); - - return $str; - } - - // -------------------------------------------------------------------- - - /** * Sanitize Naughty HTML * * Callback method for xss_clean() to remove naughty HTML elements. @@ -812,9 +779,70 @@ class CI_Security { */ protected function _sanitize_naughty_html($matches) { - return '<'.$matches[1].$matches[2].$matches[3] // encode opening brace - // encode captured opening or closing brace to prevent recursive vectors: - .str_replace(array('>', '<'), array('>', '<'), $matches[4]); + static $naughty_tags = array( + 'alert', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound', + 'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer', + 'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object', + 'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss' + ); + + static $evil_attributes = array( + 'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime' + ); + + // First, escape unclosed tags + if (empty($matches['closeTag'])) + { + return '<'.$matches[1]; + } + // Is the element that we caught naughty? If so, escape it + elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE)) + { + return '<'.$matches[1].'>'; + } + // For other tags, see if their attributes are "evil" and strip those + elseif (isset($matches['attributes'])) + { + // We'll need to catch all attributes separately first + $pattern = '#' + .'([\s\042\047/=]*)' // non-attribute characters, excluding > (tag close) for obvious reasons + .'(?<name>[^\s\042\047>/=]+)' // attribute characters + // optional attribute-value + .'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator + .'#i'; + + if ($count = preg_match_all($pattern, $matches['attributes'], $attributes, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) + { + // Since we'll be using substr_replace() below, we + // need to handle the attributes in reverse order, + // so we don't damage the string. + for ($i = $count - 1; $i > -1; $i--) + { + if ( + // Is it indeed an "evil" attribute? + preg_match('#^('.implode('|', $evil_attributes).')$#i', $attributes[$i]['name'][0]) + // Or an attribute not starting with a letter? Some parsers get confused by that + OR ! ctype_alpha($attributes[$i]['name'][0][0]) + // Does it have an equals sign, but no value and not quoted? Strip that too! + OR (trim($attributes[$i]['value'][0]) === '') + ) + { + $matches['attributes'] = substr_replace( + $matches['attributes'], + ' [removed]', + $attributes[$i][0][1], + strlen($attributes[$i][0][0]) + ); + } + } + + // Note: This will strip some non-space characters and/or + // reduce multiple spaces between attributes. + return '<'.$matches['slash'].$matches['tagName'].' '.trim($matches['attributes']).'>'; + } + } + + return $matches[0]; } // -------------------------------------------------------------------- @@ -834,12 +862,15 @@ class CI_Security { */ protected function _js_link_removal($match) { - return str_replace($match[1], - preg_replace('#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si', - '', - $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1])) - ), - $match[0]); + return str_replace( + $match[1], + preg_replace( + '#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si', + '', + $this->_filter_attributes($match[1]) + ), + $match[0] + ); } // -------------------------------------------------------------------- @@ -859,12 +890,15 @@ class CI_Security { */ protected function _js_img_removal($match) { - return str_replace($match[1], - preg_replace('#src=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si', - '', - $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1])) - ), - $match[0]); + return str_replace( + $match[1], + preg_replace( + '#src=.*?(?:(?:alert|prompt|confirm|eval)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si', + '', + $this->_filter_attributes($match[1]) + ), + $match[0] + ); } // -------------------------------------------------------------------- |