summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDerek Jones <derek.jones@ellislab.com>2008-12-11 15:31:33 +0100
committerDerek Jones <derek.jones@ellislab.com>2008-12-11 15:31:33 +0100
commita633ec293e9647aaca615a829171d641fbac9035 (patch)
treefdf7dee6038edffee197a04a5e9151085e327cc7
parent40a2fc8ab781130761237a29455718d24cb23821 (diff)
various typography bugs, HTML comment parsing, <pre> parsing, more accurate block matching, etc.
-rw-r--r--system/libraries/Typography.php41
-rw-r--r--user_guide/changelog.html1
2 files changed, 30 insertions, 12 deletions
diff --git a/system/libraries/Typography.php b/system/libraries/Typography.php
index 27fa42192..fa1ec1999 100644
--- a/system/libraries/Typography.php
+++ b/system/libraries/Typography.php
@@ -88,11 +88,25 @@ class CI_Typography {
$str = preg_replace("/\n\n+/", "\n\n", $str);
}
+ // HTML comment tags don't conform to patterns of normal tags, so pull them out separately, only if needed
+ $html_comments = array();
+ if (strpos($str, '<!--') !== FALSE)
+ {
+ if (preg_match_all("#(<!\-\-.*?\-\->)#s", $str, $matches))
+ {
+ for ($i = 0, $total = count($matches[0]); $i < $total; $i++)
+ {
+ $html_comments[] = $matches[0][$i];
+ $str = str_replace($matches[0][$i], '{@HC'.$i.'}', $str);
+ }
+ }
+ }
+
// Convert quotes within tags to temporary markers. We don't want quotes converted
// within tags so we'll temporarily convert them to {@DQ} and {@SQ}
// and we don't want double dashes converted to emdash entities, so they are marked with {@DD}
// likewise double spaces are converted to {@NBS} to prevent entity conversion
- if (preg_match_all("#\<.+?>#si", $str, $matches))
+ if (preg_match_all("#<.+?>#si", $str, $matches))
{
for ($i = 0, $total = count($matches[0]); $i < $total; $i++)
{
@@ -104,7 +118,7 @@ class CI_Typography {
if ($this->protect_braced_quotes === TRUE)
{
- if (preg_match_all("#\{.+?}#si", $str, $matches))
+ if (preg_match_all("#\{.+?\}#si", $str, $matches))
{
for ($i = 0, $total = count($matches[0]); $i < $total; $i++)
{
@@ -114,7 +128,7 @@ class CI_Typography {
}
}
}
-
+
// Convert "ignore" tags to temporary marker. The parser splits out the string at every tag
// it encounters. Certain inline tags, like image tags, links, span tags, etc. will be
// adversely affected if they are split out so we'll convert the opening bracket < temporarily to: {@TAG}
@@ -139,7 +153,7 @@ class CI_Typography {
{
// Are we dealing with a tag? If so, we'll skip the processing for this cycle.
// Well also set the "process" flag which allows us to skip <pre> tags and a few other things.
- if (preg_match("#<(/*)(".$this->block_elements.").*?\>#", $chunk, $match))
+ if (preg_match("#<(/*)(".$this->block_elements.")[\s.]*?>#", $chunk, $match))
{
if (preg_match("#".$this->skip_elements."#", $match[2]))
{
@@ -157,23 +171,26 @@ class CI_Typography {
if ($process == FALSE)
{
- $str .= $chunk;
+ $str .= ($this->last_block_element == 'pre') ? $chunk : $this->format_characters($chunk);
continue;
}
// Convert Newlines into <p> and <br /> tags
- $str .= $this->_format_newlines($chunk);
+ $str .= $this->format_characters($this->_format_newlines($chunk));
}
// is the whole of the content inside a block level element?
- if ( ! preg_match("/^<(?:".$this->block_elements.")/i", $str, $match))
+ if ( ! preg_match("/^\s*<(?:".$this->block_elements.")/i", $str, $match))
{
$str = "<p>{$str}</p>";
}
-
- // Convert quotes, elipsis, and em-dashes
- $str = $this->format_characters($str);
-
+
+ // restore HTML comments
+ for ($i = 0, $total = count($html_comments); $i < $total; $i++)
+ {
+ $str = preg_replace('#(?:<p>)?{@HC'.$i.'}(?:\s*</p>)?#s', $html_comments[$i], $str);
+ }
+
// Final clean up
$table = array(
@@ -199,7 +216,7 @@ class CI_Typography {
'/\{@NBS\}/' => ' '
);
-
+
// Do we need to reduce empty lines?
if ($reduce_linebreaks === TRUE)
{
diff --git a/user_guide/changelog.html b/user_guide/changelog.html
index fe7b36d76..1ddec5456 100644
--- a/user_guide/changelog.html
+++ b/user_guide/changelog.html
@@ -97,6 +97,7 @@ SVN Revision: </p>
<li>Changed a few docblock comments to reflect actual return values.</li>
<li>Fixed a bug with high ascii characters in subject and from email headers.</li>
<li>Fixed a bug in xss_clean() where whitespace following a validated character entity would not be preserved.</li>
+ <li>Fixed a bug where HTML comments and &lt;pre&gt; tags were being parsed in Typography::auto_typography()</li>
</ul>