From 2b7de06bd0e2184c9bffc971490f7f640c736a6a Mon Sep 17 00:00:00 2001
From: Derek Jones <derek.jones@ellislab.com>
Date: Fri, 5 Mar 2010 10:31:08 -0600
Subject: added entity_decode() to Typography helper, updated auto_typography()

---
 system/helpers/typography_helper.php | 59 ++++++++++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 2 deletions(-)

diff --git a/system/helpers/typography_helper.php b/system/helpers/typography_helper.php
index 5621e6dd0..712b57509 100644
--- a/system/helpers/typography_helper.php
+++ b/system/helpers/typography_helper.php
@@ -54,16 +54,71 @@ if ( ! function_exists('nl2br_except_pre'))
  *
  * @access	public
  * @param	string
+ * @param	bool	whether to allow javascript event handlers
  * @param	bool	whether to reduce multiple instances of double newlines to two
  * @return	string
  */
 if ( ! function_exists('auto_typography'))
 {
-	function auto_typography($str, $reduce_linebreaks = FALSE)
+	function auto_typography($str, $strip_js_event_handlers = TRUE, $reduce_linebreaks = FALSE)
 	{
 		$CI =& get_instance();	
 		$CI->load->library('typography');
-		return $CI->typography->auto_typography($str, $reduce_linebreaks);
+		return $CI->typography->auto_typography($str, $strip_js_event_handlers, $reduce_linebreaks);
+	}
+}
+
+
+// --------------------------------------------------------------------
+
+/**
+ * HTML Entities Decode
+ *
+ * This function is a replacement for html_entity_decode()
+ *
+ * In some versions of PHP the native function does not work
+ * when UTF-8 is the specified character set, so this gives us
+ * a work-around.  More info here:
+ * http://bugs.php.net/bug.php?id=25670
+ *
+ * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
+ * character set, and the PHP developers said they were not back porting the
+ * fix to versions other than PHP 5.x.
+ *
+ * @access	public
+ * @param	string
+ * @return	string
+ */
+if ( ! function_exists('entity_decode'))
+{
+	function entity_decode($str, $charset='UTF-8')
+	{
+		if (stristr($str, '&') === FALSE) return $str;
+	
+		// The reason we are not using html_entity_decode() by itself is because
+		// while it is not technically correct to leave out the semicolon
+		// at the end of an entity most browsers will still interpret the entity
+		// correctly.  html_entity_decode() does not convert entities without
+		// semicolons, so we are left with our own little solution here. Bummer.
+	
+		if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
+		{
+			$str = html_entity_decode($str, ENT_COMPAT, $charset);
+			$str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
+			return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
+		}
+	
+		// Numeric Entities
+		$str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
+		$str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
+	
+		// Literal Entities - Slightly slow so we do another check
+		if (stristr($str, '&') === FALSE)
+		{
+			$str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
+		}
+	
+		return $str;
 	}
 }
 
-- 
cgit v1.2.3-24-g4f1b