From 55d92323fddce45ee917751c41459052fb882cc1 Mon Sep 17 00:00:00 2001 From: Albert Ting Date: Fri, 14 Aug 2015 02:04:34 +0000 Subject: Bug 1154116 - Some markdown structures are not ignored in fenced code blocks r=dkl,a=sgreen --- Bugzilla/Markdown.pm | 50 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 13 deletions(-) (limited to 'Bugzilla') diff --git a/Bugzilla/Markdown.pm b/Bugzilla/Markdown.pm index 5ee476876..9c675099b 100644 --- a/Bugzilla/Markdown.pm +++ b/Bugzilla/Markdown.pm @@ -44,6 +44,8 @@ $g_nested_parens = qr{ }x; our %g_escape_table; +my @code_blocks; + foreach my $char (split //, '\\`*_{}[]()>#+-.!~') { $g_escape_table{$char} = md5_hex($char); } @@ -104,13 +106,37 @@ sub _RunSpanGamut { return $text; } +# We first replace all fenced code blocks with just their +# surrounding backticks and an empty body to know where +# they are exactly for later processing. The bodies of +# blocks will be in an array. This measure is taken to not +# interpret fenced code blocks contents as possible markdown +# structures. The contents of the body will be processed after +# processing markdown structures. +sub _removeFencedCodeBlocks { + my ($self, $text) = @_; + $text =~ s{ + ^ `{3,} [\s\t]* \n + ( # $1 = the entire code block + (?: .* \n+)+? + ) + `{3,} [\s\t]* $ + }{ + push @code_blocks, $1; + "%%%FENCED_BLOCK%%%"; + }egmx; + return $text; +} + # Override to check for HTML-escaped <>" chars. sub _StripLinkDefinitions { -# -# Strips link definitions from text, stores the URLs and titles in -# hash references. -# my ($self, $text) = @_; + + $text = $self->_removeFencedCodeBlocks($text); + # + # Strips link definitions from text, stores the URLs and titles in + # hash references. + # my $less_than_tab = $self->{tab_width} - 1; # Link defs are in the form: ^[id]: url "optional title" @@ -377,14 +403,14 @@ sub _DoCodeSpans { sub _DoCodeBlocks { my ($self, $text) = @_; + # First, do the standard code blocks to avoid generating nested code blocks + # if the block is both indented and is surrounded by backticks. + $text = $self->SUPER::_DoCodeBlocks($text); + $text =~ s{ - ^ `{3,} [\s\t]* \n - ( # $1 = the entire code block - (?: .* \n+)+? - ) - `{3,} [\s\t]* $ + ^ %%%FENCED_BLOCK%%% }{ - my $codeblock = $1; + my $codeblock = shift @code_blocks; my $result; $codeblock = $self->_EncodeCode($codeblock); @@ -395,9 +421,6 @@ sub _DoCodeBlocks { $result; }egmx; - # And now do the standard code blocks - $text = $self->SUPER::_DoCodeBlocks($text); - return $text; } @@ -417,6 +440,7 @@ sub _DoBlockQuotes { my $bq = $1; $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines + $bq = $self->_removeFencedCodeBlocks($bq); $bq = $self->_RunBlockGamut($bq, {wrap_in_p_tags => 1}); # recurse $bq =~ s/^/ /mg; # These leading spaces screw with
 content, so we need to fix that:
-- 
cgit v1.2.3-24-g4f1b