Made Environment Support optional. Comment out or delete the constant to stop environment checks.

author: Phil Sturgeon <email@philsturgeon.co.uk> 2011-04-06 23:59:37 +0200
committer: Phil Sturgeon <email@philsturgeon.co.uk> 2011-04-06 23:59:37 +0200
commit: eda33d5ece128fcc11f6b46df38b3bfd76eb74ad (patch)
tree: 0b5e72e236bde4ebd59c246e3cfd458baccb529e /system/core
parent: 05fa61144667c85b0463f7e8baa6af00aa195dc6 (diff)
parent: a6a7ef10a755a718008fbee4b01b80a682b89c39 (diff)
6 files changed, 882 insertions, 48 deletions
diff --git a/system/core/CodeIgniter.php b/system/core/CodeIgniter.php
index 143faec15..f3e1439ff 100644
--- a/system/core/CodeIgniter.php
+++ b/system/core/CodeIgniter.php
@@ -197,6 +197,13 @@
 	}
 
 /*
+ * -----------------------------------------------------
+ * Load the security class for xss and csrf support
+ * -----------------------------------------------------
+ */
+	$SEC =& load_class('Security', 'core');
+
+/*
  * ------------------------------------------------------
  *  Load the Input class and sanitize globals
  * ------------------------------------------------------
diff --git a/system/core/Common.php b/system/core/Common.php
index d7054ebe6..1aca809ab 100644
--- a/system/core/Common.php
+++ b/system/core/Common.php
@@ -476,28 +476,26 @@
 	 * @param	string
 	 * @return	string
 	 */
-	function remove_invisible_characters($str)
+	function remove_invisible_characters($str, $url_encoded = TRUE)
 	{
-		static $non_displayables;
-
-		if ( ! isset($non_displayables))
+		$non_displayables = array();
+		
+		// every control character except newline (dec 10)
+		// carriage return (dec 13), and horizontal tab (dec 09)
+		
+		if ($url_encoded)
 		{
-			// every control character except newline (dec 10), carriage return (dec 13), and horizontal tab (dec 09),
-			$non_displayables = array(
-										'/%0[0-8bcef]/',			// url encoded 00-08, 11, 12, 14, 15
-										'/%1[0-9a-f]/',				// url encoded 16-31
-										'/[\x00-\x08]/',			// 00-08
-										'/\x0b/', '/\x0c/',			// 11, 12
-										'/[\x0e-\x1f]/'				// 14-31
-									);
+			$non_displayables[] = '/%0[0-8bcef]/';	// url encoded 00-08, 11, 12, 14, 15
+			$non_displayables[] = '/%1[0-9a-f]/';	// url encoded 16-31
 		}
+		
+		$non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S';	// 00-08, 11, 12, 14-31, 127
 
 		do
 		{
-			$cleaned = $str;
-			$str = preg_replace($non_displayables, '', $str);
+			$str = preg_replace($non_displayables, '', $str, -1, $count);
 		}
-		while ($cleaned != $str);
+		while ($count);
 
 		return $str;
 	}
diff --git a/system/core/Input.php b/system/core/Input.php
index 18131350f..dc7612e64 100644
--- a/system/core/Input.php
+++ b/system/core/Input.php
@@ -53,11 +53,8 @@ class CI_Input {
 		$this->_enable_xss		= (config_item('global_xss_filtering') === TRUE);
 		$this->_enable_csrf		= (config_item('csrf_protection') === TRUE);
 
-		// Do we need to load the security class?
-		if ($this->_enable_xss == TRUE OR $this->_enable_csrf == TRUE)
-		{
-			$this->security =& load_class('Security');
-		}
+		global $SEC;
+		$this->security =& $SEC;
 
 		// Do we need the UTF-8 class?
 		if (UTF8_ENABLED === TRUE)
@@ -92,8 +89,7 @@ class CI_Input {
 
 		if ($xss_clean === TRUE)
 		{
-			$_security =& load_class('Security');
-			return $_security->xss_clean($array[$index]);
+			return $this->security->xss_clean($array[$index]);
 		}
 
 		return $array[$index];
@@ -527,6 +523,9 @@ class CI_Input {
 		{
 			$str = $this->uni->clean_string($str);
 		}
+		
+		// Remove control characters
+		$str = remove_invisible_characters($str);
 
 		// Should we filter the input data?
 		if ($this->_enable_xss === TRUE)
@@ -642,8 +641,7 @@ class CI_Input {
 
 		if ($xss_clean === TRUE)
 		{
-			$_security =& load_class('Security');
-			return $_security->xss_clean($this->headers[$index]);
+			return $this->security->xss_clean($this->headers[$index]);
 		}
 
 		return $this->headers[$index];		
diff --git a/system/core/Security.php b/system/core/Security.php
new file mode 100644
index 000000000..ceef9779c
--- /dev/null
+++ b/system/core/Security.php
@@ -0,0 +1,820 @@
+<?php  if ( ! defined('BASEPATH')) exit('No direct script access allowed');
+/**
+ * CodeIgniter
+ *
+ * An open source application development framework for PHP 5.1.6 or newer
+ *
+ * @package		CodeIgniter
+ * @author		ExpressionEngine Dev Team
+ * @copyright	Copyright (c) 2008 - 2011, EllisLab, Inc.
+ * @license		http://codeigniter.com/user_guide/license.html
+ * @link		http://codeigniter.com
+ * @since		Version 1.0
+ * @filesource
+ */
+
+// ------------------------------------------------------------------------
+
+/**
+ * Security Class
+ *
+ * @package		CodeIgniter
+ * @subpackage	Libraries
+ * @category	Security
+ * @author		ExpressionEngine Dev Team
+ * @link		http://codeigniter.com/user_guide/libraries/security.html
+ */
+class CI_Security {
+	
+	protected $_xss_hash			= '';
+	protected $_csrf_hash			= '';
+	protected $_csrf_expire			= 7200;  // Two hours (in seconds)
+	protected $_csrf_token_name		= 'ci_csrf_token';
+	protected $_csrf_cookie_name	= 'ci_csrf_token';
+
+	/* never allowed, string replacement */
+	protected $_never_allowed_str = array(
+					'document.cookie'	=> '[removed]',
+					'document.write'	=> '[removed]',
+					'.parentNode'		=> '[removed]',
+					'.innerHTML'		=> '[removed]',
+					'window.location'	=> '[removed]',
+					'-moz-binding'		=> '[removed]',
+					'<!--'				=> '&lt;!--',
+					'-->'				=> '--&gt;',
+					'<![CDATA['			=> '&lt;![CDATA['
+	);
+
+	/* never allowed, regex replacement */
+	protected $_never_allowed_regex = array(
+					"javascript\s*:"			=> '[removed]',
+					"expression\s*(\(|&\#40;)"	=> '[removed]', // CSS and IE
+					"vbscript\s*:"				=> '[removed]', // IE, surprise!
+					"Redirect\s+302"			=> '[removed]'
+	);
+	
+	/**
+	 * Constructor
+	 */
+	public function __construct()
+	{
+		// Append application specific cookie prefix to token name
+		$this->_csrf_cookie_name = (config_item('cookie_prefix')) ? config_item('cookie_prefix').$this->_csrf_token_name : $this->_csrf_token_name;
+
+		// Set the CSRF hash
+		$this->_csrf_set_hash();
+
+		log_message('debug', "Security Class Initialized");
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * Verify Cross Site Request Forgery Protection
+	 *
+	 * @return	object
+	 */
+	public function csrf_verify()
+	{
+		// If no POST data exists we will set the CSRF cookie
+		if (count($_POST) == 0)
+		{
+			return $this->csrf_set_cookie();
+		}
+
+		// Do the tokens exist in both the _POST and _COOKIE arrays?
+		if ( ! isset($_POST[$this->_csrf_token_name]) OR 
+			 ! isset($_COOKIE[$this->_csrf_cookie_name]))
+		{
+			$this->csrf_show_error();
+		}
+
+		// Do the tokens match?
+		if ($_POST[$this->_csrf_token_name] != $_COOKIE[$this->_csrf_cookie_name])
+		{
+			$this->csrf_show_error();
+		}
+
+		// We kill this since we're done and we don't want to 
+		// polute the _POST array
+		unset($_POST[$this->_csrf_token_name]);
+
+		// Nothing should last forever
+		unset($_COOKIE[$this->_csrf_cookie_name]);
+		$this->_csrf_set_hash();
+		$this->csrf_set_cookie();
+
+		log_message('debug', "CSRF token verified ");
+		
+		return $this;
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * Set Cross Site Request Forgery Protection Cookie
+	 *
+	 * @return	object
+	 */
+	public function csrf_set_cookie()
+	{
+		$expire = time() + $this->_csrf_expire;
+		$secure_cookie = (config_item('cookie_secure') === TRUE) ? 1 : 0;
+
+		if ($secure_cookie)
+		{
+			$req = isset($_SERVER['HTTPS']) ? $_SERVER['HTTPS'] : FALSE;
+
+			if ( ! $req OR $req == 'off')
+			{
+				return FALSE;
+			}
+		}
+
+		setcookie($this->_csrf_cookie_name, $this->_csrf_hash, $expire, config_item('cookie_path'), config_item('cookie_domain'), $secure_cookie);
+
+		log_message('debug', "CRSF cookie Set");
+		
+		return $this;
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * Show CSRF Error
+	 *
+	 * @return	void
+	 */
+	public function csrf_show_error()
+	{
+		show_error('The action you have requested is not allowed.');
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * Get CSRF Hash 
+	 *
+	 * Getter Method 
+	 *
+	 * @return 	string 	self::_csrf_hash
+	 */
+	public function get_csrf_hash()
+	{
+		return $this->_csrf_hash;
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * Get CSRF Token Name
+	 *
+	 * Getter Method
+	 *
+	 * @return 	string 	self::csrf_token_name
+	 */
+	public function get_csrf_token_name()
+	{
+		return $this->_csrf_token_name;
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * XSS Clean
+	 *
+	 * Sanitizes data so that Cross Site Scripting Hacks can be
+	 * prevented.  This function does a fair amount of work but
+	 * it is extremely thorough, designed to prevent even the
+	 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
+	 * of course, but I haven't been able to get anything passed
+	 * the filter.
+	 *
+	 * Note: This function should only be used to deal with data
+	 * upon submission.  It's not something that should
+	 * be used for general runtime processing.
+	 *
+	 * This function was based in part on some code and ideas I
+	 * got from Bitflux: http://channel.bitflux.ch/wiki/XSS_Prevention
+	 *
+	 * To help develop this script I used this great list of
+	 * vulnerabilities along with a few other hacks I've
+	 * harvested from examining vulnerabilities in other programs:
+	 * http://ha.ckers.org/xss.html
+	 *
+	 * @param	mixed	string or array
+	 * @return	string
+	 */
+	public function xss_clean($str, $is_image = FALSE)
+	{
+		/*
+		 * Is the string an array?
+		 *
+		 */
+		if (is_array($str))
+		{
+			while (list($key) = each($str))
+			{
+				$str[$key] = $this->xss_clean($str[$key]);
+			}
+
+			return $str;
+		}
+
+		/*
+		 * Remove Invisible Characters
+		 */
+		$str = remove_invisible_characters($str);
+
+		// Validate Entities in URLs
+		$str = $this->_validate_entities($str);
+
+		/*
+		 * URL Decode
+		 *
+		 * Just in case stuff like this is submitted:
+		 *
+		 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
+		 *
+		 * Note: Use rawurldecode() so it does not remove plus signs
+		 *
+		 */
+		$str = rawurldecode($str);
+
+		/*
+		 * Convert character entities to ASCII
+		 *
+		 * This permits our tests below to work reliably.
+		 * We only convert entities that are within tags since
+		 * these are the ones that will pose security problems.
+		 *
+		 */
+
+		$str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
+	
+		$str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_decode_entity'), $str);
+
+		/*
+		 * Remove Invisible Characters Again!
+		 */
+		$str = remove_invisible_characters($str);
+
+		/*
+		 * Convert all tabs to spaces
+		 *
+		 * This prevents strings like this: ja	vascript
+		 * NOTE: we deal with spaces between characters later.
+		 * NOTE: preg_replace was found to be amazingly slow here on 
+		 * large blocks of data, so we use str_replace.
+		 */
+
+		if (strpos($str, "\t") !== FALSE)
+		{
+			$str = str_replace("\t", ' ', $str);
+		}
+
+		/*
+		 * Capture converted string for later comparison
+		 */
+		$converted_string = $str;
+
+		// Remove Strings that are never allowed
+		$str = $this->_do_never_allowed($str);
+
+		/*
+		 * Makes PHP tags safe
+		 *
+		 * Note: XML tags are inadvertently replaced too:
+		 *
+		 * <?xml
+		 *
+		 * But it doesn't seem to pose a problem.
+		 */
+		if ($is_image === TRUE)
+		{
+			// Images have a tendency to have the PHP short opening and 
+			// closing tags every so often so we skip those and only 
+			// do the long opening tags.
+			$str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
+		}
+		else
+		{
+			$str = str_replace(array('<?', '?'.'>'),  array('&lt;?', '?&gt;'), $str);
+		}
+
+		/*
+		 * Compact any exploded words
+		 *
+		 * This corrects words like:  j a v a s c r i p t
+		 * These words are compacted back to their correct state.
+		 */
+		$words = array(
+				'javascript', 'expression', 'vbscript', 'script', 
+				'applet', 'alert', 'document', 'write', 'cookie', 'window'
+			);
+			
+		foreach ($words as $word)
+		{
+			$temp = '';
+
+			for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
+			{
+				$temp .= substr($word, $i, 1)."\s*";
+			}
+
+			// We only want to do this when it is followed by a non-word character
+			// That way valid stuff like "dealer to" does not become "dealerto"
+			$str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
+		}
+
+		/*
+		 * Remove disallowed Javascript in links or img tags
+		 * We used to do some version comparisons and use of stripos for PHP5, 
+		 * but it is dog slow compared to these simplified non-capturing 
+		 * preg_match(), especially if the pattern exists in the string
+		 */
+		do
+		{
+			$original = $str;
+
+			if (preg_match("/<a/i", $str))
+			{
+				$str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
+			}
+
+			if (preg_match("/<img/i", $str))
+			{
+				$str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
+			}
+
+			if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
+			{
+				$str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
+			}
+		}
+		while($original != $str);
+
+		unset($original);
+
+		// Remove evil attributes such as style, onclick and xmlns
+		$str = $this->_remove_evil_attributes($str, $is_image);
+
+		/*
+		 * Sanitize naughty HTML elements
+		 *
+		 * If a tag containing any of the words in the list
+		 * below is found, the tag gets converted to entities.
+		 *
+		 * So this: <blink>
+		 * Becomes: &lt;blink&gt;
+		 */
+		$naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
+		$str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
+
+		/*
+		 * Sanitize naughty scripting elements
+		 *
+		 * Similar to above, only instead of looking for
+		 * tags it looks for PHP and JavaScript commands
+		 * that are disallowed.  Rather than removing the
+		 * code, it simply converts the parenthesis to entities
+		 * rendering the code un-executable.
+		 *
+		 * For example:	eval('some code')
+		 * Becomes:		eval&#40;'some code'&#41;
+		 */
+		$str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
+
+
+		// Final clean up
+		// This adds a bit of extra precaution in case
+		// something got through the above filters
+		$str = $this->_do_never_allowed($str);
+
+		/*
+		 * Images are Handled in a Special Way
+		 * - Essentially, we want to know that after all of the character 
+		 * conversion is done whether any unwanted, likely XSS, code was found.  
+		 * If not, we return TRUE, as the image is clean.
+		 * However, if the string post-conversion does not matched the 
+		 * string post-removal of XSS, then it fails, as there was unwanted XSS 
+		 * code found and removed/changed during processing.
+		 */
+
+		if ($is_image === TRUE)
+		{
+			return ($str == $converted_string) ? TRUE: FALSE;
+		}
+
+		log_message('debug', "XSS Filtering completed");
+		return $str;
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * Random Hash for protecting URLs
+	 *
+	 * @return	string
+	 */
+	public function xss_hash()
+	{
+		if ($this->_xss_hash == '')
+		{
+			if (phpversion() >= 4.2)
+			{
+				mt_srand();
+			}
+			else
+			{
+				mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
+			}
+
+			$this->_xss_hash = md5(time() + mt_rand(0, 1999999999));
+		}
+
+		return $this->_xss_hash;
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * HTML Entities Decode
+	 *
+	 * This function is a replacement for html_entity_decode()
+	 *
+	 * In some versions of PHP the native function does not work
+	 * when UTF-8 is the specified character set, so this gives us
+	 * a work-around.  More info here:
+	 * http://bugs.php.net/bug.php?id=25670
+	 *
+	 * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
+	 * character set, and the PHP developers said they were not back porting the
+	 * fix to versions other than PHP 5.x.
+	 *
+	 * @param	string
+	 * @param	string
+	 * @return	string
+	 */
+	public function entity_decode($str, $charset='UTF-8')
+	{
+		if (stristr($str, '&') === FALSE) return $str;
+
+		// The reason we are not using html_entity_decode() by itself is because
+		// while it is not technically correct to leave out the semicolon
+		// at the end of an entity most browsers will still interpret the entity
+		// correctly.  html_entity_decode() does not convert entities without
+		// semicolons, so we are left with our own little solution here. Bummer.
+
+		if (function_exists('html_entity_decode') && 
+			(strtolower($charset) != 'utf-8'))
+		{
+			$str = html_entity_decode($str, ENT_COMPAT, $charset);
+			$str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
+			return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
+		}
+
+		// Numeric Entities
+		$str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
+		$str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
+
+		// Literal Entities - Slightly slow so we do another check
+		if (stristr($str, '&') === FALSE)
+		{
+			$str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
+		}
+
+		return $str;
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * Filename Security
+	 *
+	 * @param	string
+	 * @return	string
+	 */
+	public function sanitize_filename($str, $relative_path = FALSE)
+	{
+		$bad = array(
+						"../",
+						"<!--",
+						"-->",
+						"<",
+						">",
+						"'",
+						'"',
+						'&',
+						'$',
+						'#',
+						'{',
+						'}',
+						'[',
+						']',
+						'=',
+						';',
+						'?',
+						"%20",
+						"%22",
+						"%3c",		// <
+						"%253c",	// <
+						"%3e",		// >
+						"%0e",		// >
+						"%28",		// (
+						"%29",		// )
+						"%2528",	// (
+						"%26",		// &
+						"%24",		// $
+						"%3f",		// ?
+						"%3b",		// ;
+						"%3d"		// =
+					);
+		
+		if ( ! $relative_path)
+		{
+			$bad[] = './';
+			$bad[] = '/';
+		}
+
+		$str = remove_invisible_characters($str, FALSE);
+		return stripslashes(str_replace($bad, '', $str));
+	}
+
+	// ----------------------------------------------------------------
+
+	/**
+	 * Compact Exploded Words
+	 *
+	 * Callback function for xss_clean() to remove whitespace from
+	 * things like j a v a s c r i p t
+	 *
+	 * @param	type
+	 * @return	type
+	 */
+	protected function _compact_exploded_words($matches)
+	{
+		return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
+	}
+
+	// --------------------------------------------------------------------
+	
+	/*
+	 * Remove Evil HTML Attributes (like evenhandlers and style)
+	 *
+	 * It removes the evil attribute and either:
+	 * 	- Everything up until a space
+	 *		For example, everything between the pipes:
+	 *		<a |style=document.write('hello');alert('world');| class=link>
+	 * 	- Everything inside the quotes 
+	 *		For example, everything between the pipes:
+	 *		<a |style="document.write('hello'); alert('world');"| class="link">
+	 *
+	 * @param string $str The string to check
+	 * @param boolean $is_image TRUE if this is an image
+	 * @return string The string with the evil attributes removed
+	 */
+	protected function _remove_evil_attributes($str, $is_image)
+	{
+		// All javascript event handlers (e.g. onload, onclick, onmouseover), style, and xmlns
+		$evil_attributes = array('on\w*', 'style', 'xmlns');
+
+		if ($is_image === TRUE)
+		{
+			/*
+			 * Adobe Photoshop puts XML metadata into JFIF images, 
+			 * including namespacing, so we have to allow this for images.
+			 */
+			unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
+		}
+		
+		do {
+			$str = preg_replace(
+				"#<(/?[^><]+?)([^A-Za-z\-])(".implode('|', $evil_attributes).")(\s*=\s*)([\"][^>]*?[\"]|[\'][^>]*?[\']|[^>]*?)([\s><])([><]*)#i",
+				"<$1$6",
+				$str, -1, $count
+			);
+		} while ($count);
+		
+		return $str;
+	}
+	
+	// --------------------------------------------------------------------
+
+	/**
+	 * Sanitize Naughty HTML
+	 *
+	 * Callback function for xss_clean() to remove naughty HTML elements
+	 *
+	 * @param	array
+	 * @return	string
+	 */
+	protected function _sanitize_naughty_html($matches)
+	{
+		// encode opening brace
+		$str = '&lt;'.$matches[1].$matches[2].$matches[3];
+
+		// encode captured opening or closing brace to prevent recursive vectors
+		$str .= str_replace(array('>', '<'), array('&gt;', '&lt;'), 
+							$matches[4]);
+
+		return $str;
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * JS Link Removal
+	 *
+	 * Callback function for xss_clean() to sanitize links
+	 * This limits the PCRE backtracks, making it more performance friendly
+	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
+	 * PHP 5.2+ on link-heavy strings
+	 *
+	 * @param	array
+	 * @return	string
+	 */
+	protected function _js_link_removal($match)
+	{
+		$attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
+		
+		return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * JS Image Removal
+	 *
+	 * Callback function for xss_clean() to sanitize image tags
+	 * This limits the PCRE backtracks, making it more performance friendly
+	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
+	 * PHP 5.2+ on image tag heavy strings
+	 *
+	 * @param	array
+	 * @return	string
+	 */
+	protected function _js_img_removal($match)
+	{
+		$attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
+		
+		return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|livescript\:|mocha\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * Attribute Conversion
+	 *
+	 * Used as a callback for XSS Clean
+	 *
+	 * @param	array
+	 * @return	string
+	 */
+	protected function _convert_attribute($match)
+	{
+		return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * Filter Attributes
+	 *
+	 * Filters tag attributes for consistency and safety
+	 *
+	 * @param	string
+	 * @return	string
+	 */
+	protected function _filter_attributes($str)
+	{
+		$out = '';
+
+		if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
+		{
+			foreach ($matches[0] as $match)
+			{
+				$out .= preg_replace("#/\*.*?\*/#s", '', $match);
+			}
+		}
+
+		return $out;
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * HTML Entity Decode Callback
+	 *
+	 * Used as a callback for XSS Clean
+	 *
+	 * @param	array
+	 * @return	string
+	 */
+	protected function _decode_entity($match)
+	{
+		return $this->entity_decode($match[0], strtoupper(config_item('charset')));
+	}
+
+	// --------------------------------------------------------------------
+	
+	/**
+	 * Validate URL entities
+	 *
+	 * Called by xss_clean()
+	 *
+	 * @param 	string	
+	 * @return 	string
+	 */
+	protected function _validate_entities($str)
+	{
+		/*
+		 * Protect GET variables in URLs
+		 */
+		
+		 // 901119URL5918AMP18930PROTECT8198
+		
+		$str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_hash()."\\1=\\2", $str);
+
+		/*
+		 * Validate standard character entities
+		 *
+		 * Add a semicolon if missing.  We do this to enable
+		 * the conversion of entities to ASCII later.
+		 *
+		 */
+		$str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
+
+		/*
+		 * Validate UTF16 two byte encoding (x00)
+		 *
+		 * Just as above, adds a semicolon if missing.
+		 *
+		 */
+		$str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
+
+		/*
+		 * Un-Protect GET variables in URLs
+		 */
+		$str = str_replace($this->xss_hash(), '&', $str);
+		
+		return $str;
+	}
+
+	// ----------------------------------------------------------------------
+
+	/**
+	 * Do Never Allowed
+	 *
+	 * A utility function for xss_clean()
+	 *
+	 * @param 	string
+	 * @return 	string
+	 */
+	protected function _do_never_allowed($str)
+	{
+		foreach ($this->_never_allowed_str as $key => $val)
+		{
+			$str = str_replace($key, $val, $str);
+		}
+
+		foreach ($this->_never_allowed_regex as $key => $val)
+		{
+			$str = preg_replace("#".$key."#i", $val, $str);
+		}
+		
+		return $str;
+	}
+
+	// --------------------------------------------------------------------
+
+	/**
+	 * Set Cross Site Request Forgery Protection Cookie
+	 *
+	 * @return	string
+	 */
+	protected function _csrf_set_hash()
+	{
+		if ($this->_csrf_hash == '')
+		{
+			// If the cookie exists we will use it's value.  
+			// We don't necessarily want to regenerate it with
+			// each page load since a page could contain embedded 
+			// sub-pages causing this feature to fail
+			if (isset($_COOKIE[$this->_csrf_cookie_name]) && 
+				$_COOKIE[$this->_csrf_cookie_name] != '')
+			{
+				return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
+			}
+			
+			return $this->_csrf_hash = md5(uniqid(rand(), TRUE));
+		}
+
+		return $this->_csrf_hash;
+	}
+
+}
+// END Security Class
+
+/* End of file Security.php */
+/* Location: ./system/libraries/Security.php */
+\ No newline at end of file
diff --git a/system/core/URI.php b/system/core/URI.php
index c43cde005..80dc62e58 100644
--- a/system/core/URI.php
+++ b/system/core/URI.php
@@ -64,14 +64,14 @@ class CI_URI {
 			// Is the request coming from the command line?
 			if (defined('STDIN'))
 			{
-				$this->uri_string = $this->_parse_cli_args();
+				$this->_set_uri_string($this->_parse_cli_args());
 				return;
 			}
 
 			// Let's try the REQUEST_URI first, this will work in most situations
 			if ($uri = $this->_detect_uri())
 			{
-				$this->uri_string = $uri;
+				$this->_set_uri_string($uri);
 				return;
 			}
 
@@ -80,7 +80,7 @@ class CI_URI {
 			$path = (isset($_SERVER['PATH_INFO'])) ? $_SERVER['PATH_INFO'] : @getenv('PATH_INFO');
 			if (trim($path, '/') != '' && $path != "/".SELF)
 			{
-				$this->uri_string = $path;
+				$this->_set_uri_string($path);
 				return;
 			}
 
@@ -88,43 +88,54 @@ class CI_URI {
 			$path =  (isset($_SERVER['QUERY_STRING'])) ? $_SERVER['QUERY_STRING'] : @getenv('QUERY_STRING');
 			if (trim($path, '/') != '')
 			{
-				$this->uri_string = $path;
+				$this->_set_uri_string($path);
 				return;
 			}
 
 			// As a last ditch effort lets try using the $_GET array
 			if (is_array($_GET) && count($_GET) == 1 && trim(key($_GET), '/') != '')
 			{
-				$this->uri_string = key($_GET);
+				$this->_set_uri_string(key($_GET));
 				return;
 			}
 
 			// We've exhausted all our options...
 			$this->uri_string = '';
+			return;
 		}
-		else
-		{
-			$uri = strtoupper($this->config->item('uri_protocol'));
 
-			if ($uri == 'REQUEST_URI')
-			{
-				$this->uri_string = $this->_detect_uri();
-				return;
-			}
-			elseif ($uri == 'CLI')
-			{
-				$this->uri_string = $this->_parse_cli_args();
-				return;
-			}
+		$uri = strtoupper($this->config->item('uri_protocol'));
 
-			$this->uri_string = (isset($_SERVER[$uri])) ? $_SERVER[$uri] : @getenv($uri);
+		if ($uri == 'REQUEST_URI')
+		{
+			$this->_set_uri_string($this->_detect_uri());
+			return;
 		}
-
-		// If the URI contains only a slash we'll kill it
-		if ($this->uri_string == '/')
+		elseif ($uri == 'CLI')
 		{
-			$this->uri_string = '';
+			$this->_set_uri_string($this->_parse_cli_args());
+			return;
 		}
+
+		$path = (isset($_SERVER[$uri])) ? $_SERVER[$uri] : @getenv($uri);
+		$this->_set_uri_string($path);
+	}
+	
+	// --------------------------------------------------------------------
+
+	/**
+	 * Set the URI String
+	 *
+	 * @access	public
+	 * @return	string
+	 */
+	function _set_uri_string($str)
+	{
+		// Filter out control characters
+		$str = remove_invisible_characters($str, FALSE);
+		
+		// If the URI contains only a slash we'll kill it
+		$this->uri_string = ($str == '/') ? '' : $str;
 	}
 
 	// --------------------------------------------------------------------
diff --git a/system/core/Utf8.php b/system/core/Utf8.php
index 5d5a7ef72..2a27d1f35 100644
--- a/system/core/Utf8.php
+++ b/system/core/Utf8.php
@@ -107,7 +107,7 @@ class CI_Utf8 {
 	 */
 	function safe_ascii_for_xml($str)
 	{
-		return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
+		return remove_invisible_characters($str, FALSE);
 	}
 
 	// --------------------------------------------------------------------
author	Phil Sturgeon <email@philsturgeon.co.uk>	2011-04-06 23:59:37 +0200
committer	Phil Sturgeon <email@philsturgeon.co.uk>	2011-04-06 23:59:37 +0200
commit	eda33d5ece128fcc11f6b46df38b3bfd76eb74ad (patch)
tree	0b5e72e236bde4ebd59c246e3cfd458baccb529e /system/core
parent	05fa61144667c85b0463f7e8baa6af00aa195dc6 (diff)
parent	a6a7ef10a755a718008fbee4b01b80a682b89c39 (diff)