From ec5caa57cc14a328b8b994d49cb8def8eb95aea7 Mon Sep 17 00:00:00 2001 From: Koosha KM Date: Thu, 28 Aug 2014 17:17:54 +0000 Subject: Bug 330707: Add optional support for MarkDown r=dkl,a=sgreen --- Bugzilla/Bug.pm | 7 +- Bugzilla/Comment.pm | 18 ++ Bugzilla/Constants.pm | 6 + Bugzilla/DB/Schema.pm | 3 +- Bugzilla/Install.pm | 2 + Bugzilla/Install/DB.pm | 4 + Bugzilla/Install/Requirements.pm | 9 + Bugzilla/Markdown.pm | 493 +++++++++++++++++++++++++++++++++++++++ Bugzilla/Template.pm | 17 ++ Bugzilla/WebService/Bug.pm | 4 +- 10 files changed, 560 insertions(+), 3 deletions(-) create mode 100644 Bugzilla/Markdown.pm (limited to 'Bugzilla') diff --git a/Bugzilla/Bug.pm b/Bugzilla/Bug.pm index 07266da9c..d03c63768 100644 --- a/Bugzilla/Bug.pm +++ b/Bugzilla/Bug.pm @@ -691,6 +691,8 @@ sub create { unless defined $params->{rep_platform}; # Make sure a comment is always defined. $params->{comment} = '' unless defined $params->{comment}; + $params->{is_markdown} = 0 + unless defined $params->{is_markdown} && $params->{is_markdown} eq '1'; $class->check_required_create_fields($params); $params = $class->run_create_validators($params); @@ -704,6 +706,7 @@ sub create { my $blocked = delete $params->{blocked}; my $keywords = delete $params->{keywords}; my $creation_comment = delete $params->{comment}; + my $is_markdown = delete $params->{is_markdown}; my $see_also = delete $params->{see_also}; # We don't want the bug to appear in the system until it's correctly @@ -791,6 +794,7 @@ sub create { # We now have a bug id so we can fill this out $creation_comment->{'bug_id'} = $bug->id; + $creation_comment->{'is_markdown'} = $is_markdown; # Insert the comment. We always insert a comment on bug creation, # but sometimes it's blank. @@ -2413,7 +2417,8 @@ sub set_all { # there are lots of things that want to check if we added a comment. $self->add_comment($params->{'comment'}->{'body'}, { isprivate => $params->{'comment'}->{'is_private'}, - work_time => $params->{'work_time'} }); + work_time => $params->{'work_time'}, + is_markdown => $params->{'comment'}->{'is_markdown'} }); } if (exists $params->{alias} && $params->{alias}{set}) { diff --git a/Bugzilla/Comment.pm b/Bugzilla/Comment.pm index c235b8d30..3dabe6702 100644 --- a/Bugzilla/Comment.pm +++ b/Bugzilla/Comment.pm @@ -43,6 +43,7 @@ use constant DB_COLUMNS => qw( already_wrapped type extra_data + is_markdown ); use constant UPDATE_COLUMNS => qw( @@ -65,6 +66,7 @@ use constant VALIDATORS => { work_time => \&_check_work_time, thetext => \&_check_thetext, isprivate => \&_check_isprivate, + is_markdown => \&Bugzilla::Object::check_boolean, extra_data => \&_check_extra_data, type => \&_check_type, }; @@ -177,6 +179,7 @@ sub body { return $_[0]->{'thetext'}; } sub bug_id { return $_[0]->{'bug_id'}; } sub creation_ts { return $_[0]->{'bug_when'}; } sub is_private { return $_[0]->{'isprivate'}; } +sub is_markdown { return $_[0]->{'is_markdown'}; } sub work_time { # Work time is returned as a string (see bug 607909) return 0 if $_[0]->{'work_time'} + 0 == 0; @@ -274,6 +277,7 @@ sub body_full { sub set_is_private { $_[0]->set('isprivate', $_[1]); } sub set_type { $_[0]->set('type', $_[1]); } sub set_extra_data { $_[0]->set('extra_data', $_[1]); } +sub set_is_markdown { $_[0]->set('is_markdown', $_[1]); } sub add_tag { my ($self, $tag) = @_; @@ -522,6 +526,10 @@ C Time spent as related to this comment. C Comment is marked as private. +=item C + +C Whether this comment needs L rendering to be applied. + =item C If this comment is stored in the database word-wrapped, this will be C<1>. @@ -617,6 +625,16 @@ A string, the full text of the comment as it would be displayed to an end-user. =cut +=head2 Modifiers + +=over + +=item C + +Sets whether this comment needs L rendering to be applied. + +=back + =head1 B =over diff --git a/Bugzilla/Constants.pm b/Bugzilla/Constants.pm index 59333795f..397a8e65f 100644 --- a/Bugzilla/Constants.pm +++ b/Bugzilla/Constants.pm @@ -191,6 +191,8 @@ use Memoize; AUDIT_REMOVE MOST_FREQUENT_THRESHOLD + + MARKDOWN_TAB_WIDTH ); @Bugzilla::Constants::EXPORT_OK = qw(contenttypes); @@ -628,6 +630,10 @@ use constant AUDIT_REMOVE => '__remove__'; # on the "Most frequently reported bugs" page. use constant MOST_FREQUENT_THRESHOLD => 2; +# The number of spaces used to represent each tab character +# by Markdown engine +use constant MARKDOWN_TAB_WIDTH => 2; + sub bz_locations { # Force memoize() to re-compute data per project, to avoid # sharing the same data across different installations. diff --git a/Bugzilla/DB/Schema.pm b/Bugzilla/DB/Schema.pm index d1c1dc7e9..ebe2cb426 100644 --- a/Bugzilla/DB/Schema.pm +++ b/Bugzilla/DB/Schema.pm @@ -410,7 +410,8 @@ use constant ABSTRACT_SCHEMA => { DEFAULT => 'FALSE'}, type => {TYPE => 'INT2', NOTNULL => 1, DEFAULT => '0'}, - extra_data => {TYPE => 'varchar(255)'} + extra_data => {TYPE => 'varchar(255)'}, + is_markdown => {TYPE => 'BOOLEAN', NOTNULL => 1, DEFAULT => 'FALSE'} ], INDEXES => [ longdescs_bug_id_idx => [qw(bug_id work_time)], diff --git a/Bugzilla/Install.pm b/Bugzilla/Install.pm index 07bc9d6c3..5a2266e36 100644 --- a/Bugzilla/Install.pm +++ b/Bugzilla/Install.pm @@ -90,6 +90,8 @@ sub SETTINGS { bugmail_new_prefix => { options => ['on', 'off'], default => 'on' }, # 2013-07-26 joshi_sunil@in.com -- Bug 669535 possible_duplicates => { options => ['on', 'off'], default => 'on' }, + # 2014-05-24 koosha.khajeh@gmail.com -- Bug 1014164 + use_markdown => { options => ['on', 'off'], default => 'on' }, } }; diff --git a/Bugzilla/Install/DB.pm b/Bugzilla/Install/DB.pm index 282aa7f10..0b0603970 100644 --- a/Bugzilla/Install/DB.pm +++ b/Bugzilla/Install/DB.pm @@ -726,6 +726,10 @@ sub update_table_definitions { # 2014-08-11 sgreen@redhat.com - Bug 1012506 _update_alias(); + # 2014-08-14 koosha.khajeh@gmail.com - Bug 330707 + $dbh->bz_add_column('longdescs', 'is_markdown', + {TYPE => 'BOOLEAN', NOTNULL => 1, DEFAULT => 'FALSE'}); + ################################################################ # New --TABLE-- changes should go *** A B O V E *** this point # ################################################################ diff --git a/Bugzilla/Install/Requirements.pm b/Bugzilla/Install/Requirements.pm index 06b061f57..2ceb01cfd 100644 --- a/Bugzilla/Install/Requirements.pm +++ b/Bugzilla/Install/Requirements.pm @@ -405,6 +405,14 @@ sub OPTIONAL_MODULES { version => '0', feature => ['memcached'], }, + + # Markdown + { + package => 'Text-Markdown', + module => 'Text::Markdown', + version => '1.0.26', + feature => ['markdown'], + } ); my $extra_modules = _get_extension_requirements('OPTIONAL_MODULES'); @@ -428,6 +436,7 @@ use constant FEATURE_FILES => ( 'Bugzilla/JobQueue/*', 'jobqueue.pl'], patch_viewer => ['Bugzilla/Attachment/PatchReader.pm'], updates => ['Bugzilla/Update.pm'], + markdown => ['Bugzilla/Markdown.pm'], memcached => ['Bugzilla/Memcache.pm'], ); diff --git a/Bugzilla/Markdown.pm b/Bugzilla/Markdown.pm new file mode 100644 index 000000000..c5a34fb6e --- /dev/null +++ b/Bugzilla/Markdown.pm @@ -0,0 +1,493 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This Source Code Form is "Incompatible With Secondary Licenses", as +# defined by the Mozilla Public License, v. 2.0. + +package Bugzilla::Markdown; + +use 5.10.1; +use strict; +use warnings; + +use Bugzilla::Constants; +use Bugzilla::Template; + +use Digest::MD5 qw(md5_hex); + +use parent qw(Text::Markdown); + +@Bugzilla::Markdown::EXPORT = qw(new); + +# Regex to match balanced [brackets]. See Friedl's +# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. +our ($g_nested_brackets, $g_nested_parens); +$g_nested_brackets = qr{ + (?> # Atomic matching + [^\[\]]+ # Anything other than brackets + | + \[ + (??{ $g_nested_brackets }) # Recursive set of nested brackets + \] + )* +}x; +# Doesn't allow for whitespace, because we're using it to match URLs: +$g_nested_parens = qr{ + (?> # Atomic matching + [^()\s]+ # Anything other than parens or whitespace + | + \( + (??{ $g_nested_parens }) # Recursive set of nested brackets + \) + )* +}x; + +our %g_escape_table; +foreach my $char (split //, '\\`*_{}[]()>#+-.!~') { + $g_escape_table{$char} = md5_hex($char); +} + +sub new { + my $invocant = shift; + my $class = ref $invocant || $invocant; + return $class->SUPER::new(tab_width => MARKDOWN_TAB_WIDTH, + # Bugzilla uses HTML not XHTML + empty_element_suffix => '>'); +} + +sub markdown { + my $self = shift; + my $text = shift; + my $user = Bugzilla->user; + + if (Bugzilla->feature('markdown') + && $user->settings->{use_markdown}->{is_enabled} + && $user->setting('use_markdown') eq 'on') + { + return $self->SUPER::markdown($text, @_); + } + + return Bugzilla::Template::quoteUrls($text); +} + +sub _Markdown { + my $self = shift; + my $text = shift; + + $text = Bugzilla::Template::quoteUrls($text); + + return $self->SUPER::_Markdown($text, @_); +} + +sub _RunSpanGamut { + # These are all the transformations that occur *within* block-level + # tags like paragraphs, headers, and list items. + + my ($self, $text) = @_; + + $text = $self->_DoCodeSpans($text); + $text = $self->_EscapeSpecialCharsWithinTagAttributes($text); + $text = $self->_EscapeSpecialChars($text); + + $text = $self->_DoAnchors($text); + + # Strikethroughs is Bugzilla's extension + $text = $self->_DoStrikethroughs($text); + + $text = $self->_DoAutoLinks($text); + $text = $self->_EncodeAmpsAndAngles($text); + $text = $self->_DoItalicsAndBold($text); + + $text =~ s/ {2,}\n/ {empty_element_suffix}\n/g; + + return $text; +} + +# Override to check for HTML-escaped <>" chars. +sub _StripLinkDefinitions { +# +# Strips link definitions from text, stores the URLs and titles in +# hash references. +# + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; + + # Link defs are in the form: ^[id]: url "optional title" + while ($text =~ s{ + ^[ ]{0,$less_than_tab}\[(.+)\]: # id = \$1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + (?:<)?\2(?:>)? # url = \$2 + [ \t]* + \n? # maybe one newline + [ \t]* + (?: + (?<=\s) # lookbehind for whitespace + (?:"|\() + (.+?) # title = \$3 + (?:"|\)) + [ \t]* + )? # title is optional + (?:\n+|\Z) + }{}omx) { + $self->{_urls}{lc $1} = $self->_EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive + if ($3) { + $self->{_titles}{lc $1} = $3; + $self->{_titles}{lc $1} =~ s/"/"/g; + } + + } + + return $text; +} + +# We need to look for HTML-escaped '<' and '>' (i.e. < and >). +# We also remove Email linkification from the original implementation +# as it is already done in Bugzilla's quoteUrls(). +sub _DoAutoLinks { + my ($self, $text) = @_; + + $text =~ s{(?:<|<)((?:https?|ftp):[^'">\s]+)(?:>|>)}{$1}gi; + return $text; +} + +# The main reasons for overriding this method are +# resolving URL conflicts with Bugzilla's quoteUrls() +# and also changing '"' to '"' in regular expressions wherever needed. +sub _DoAnchors { +# +# Turn Markdown link shortcuts into tags. +# + my ($self, $text) = @_; + + # We revert linkifications of non-email links and only + # those links whose URL and title are the same because + # this way we can be sure that link is generated by quoteUrls() + $text =~ s@\1@$1@xmg; + + # + # First, handle reference-style links: [link text] [id] + # + $text =~ s{ + ( # wrap whole match in $1 + \[ + ($g_nested_brackets) # link text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + ) + }{ + my $whole_match = $1; + my $link_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $link_text; # for shortcut links like [this][]. + } + + $link_id =~ s{[ ]*\n}{ }g; # turn embedded newlines into spaces + + $self->_GenerateAnchor($whole_match, $link_text, $link_id); + }xsge; + + # + # Next, inline-style links: [link text](url "optional title") + # + $text =~ s{ + ( # wrap whole match in $1 + \[ + ($g_nested_brackets) # link text = $2 + \] + \( # literal paren + [ \t]* + ($g_nested_parens) # href = $3 + [ \t]* + ( # $4 + ("|') # quote char = $5 + (.*?) # Title = $6 + \5 # matching quote + [ \t]* # ignore any spaces/tabs between closing quote and ) + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + my $url = $3; + my $title = $6; + + # Remove Bugzilla quoteUrls() linkification + if ($url =~ /^a href="/ && $url =~ m|]+>//; + $url =~ s@_GenerateAnchor($whole_match, $link_text, undef, $url, $title); + }xsge; + + # + # Last, handle reference-style shortcuts: [link text] + # These must come last in case you've also got [link test][1] + # or [link test](/foo) + # + $text =~ s{ + ( # wrap whole match in $1 + \[ + ([^\[\]]+) # link text = $2; can't contain '[' or ']' + \] + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + (my $link_id = lc $2) =~ s{[ ]*\n}{ }g; # lower-case and turn embedded newlines into spaces + + $self->_GenerateAnchor($whole_match, $link_text, $link_id); + }xsge; + + return $text; +} + +# The purpose of overriding this function is to add support +# for a Github Flavored Markdown (GFM) feature called 'Multiple +# underscores in words'. The standard markdown specification +# specifies the underscore for making the text emphasized/bold. +# However, some variable names in programming languages contain underscores +# and we do not want a part of those variables to look emphasized/bold. +# Instead, we render them as the way they originally are. +sub _DoItalicsAndBold { + my ($self, $text) = @_; + + # Handle at beginning of lines: + $text =~ s{ (^__ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\S)) } + { + my $result = _has_multiple_underscores($2) ? $1 : "$2"; + $result; + }gsxe; + + $text =~ s{ ^\*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }{$1}gsx; + + $text =~ s{ (^_ (?=\S) (.+?) (?<=\S) _ (?!\S)) } + { + my $result = _has_multiple_underscores($2) ? $1 : "$2"; + $result; + }gsxe; + + $text =~ s{ ^\* (?=\S) (.+?) (?<=\S) \* }{$1}gsx; + + # must go first: + $text =~ s{ ( (?<=\W) __ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\S) ) } + { + my $result = _has_multiple_underscores($2) ? $1 : "$2"; + $result; + }gsxe; + + + $text =~ s{ (?<=\W) \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }{$1}gsx; + + $text =~ s{ ( (?<=\W) _ (?=\S) (.+?) (?<=\S) _ (?!\S) ) } + { + my $result = _has_multiple_underscores($2) ? $1 : "$2"; + $result; + }gsxe; + + $text =~ s{ (?<=\W) \* (?=\S) (.+?) (?<=\S) \* }{$1}gsx; + + # And now, a second pass to catch nested strong and emphasis special cases + $text =~ s{ ( (?<=\W) __ (?=\S) (.+?[*_]*) (?<=\S) __ (\S*) ) } + { + my $result = _has_multiple_underscores($3) ? $1 : "$2$3"; + $result; + }gsxe; + + $text =~ s{ (?<=\W) \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }{$1}gsx; + $text =~ s{ ( (?<=\W) _ (?=\S) (.+?) (?<=\S) _ (\S*) ) } + { + my $result = _has_multiple_underscores($3) ? $1 : "$2$3"; + $result; + }gsxe; + + $text =~ s{ (?<=\W) \* (?=\S) (.+?) (?<=\S) \* }{$1}gsx; + + return $text; +} + +# Override this function to ignore 'wrap_in_p_tags' from +# the caller and to not generate

tags around the output. +sub _FormParagraphs { + my ($self, $text) = @_; + return $self->SUPER::_FormParagraphs($text, { wrap_in_p_tags => 0 }); +} + +sub _DoStrikethroughs { + my ($self, $text) = @_; + + $text =~ s{ ^ ~~ (?=\S) ([^~]+?) (?<=\S) ~~ (?!~) }{$1}gsx; + $text =~ s{ (?<=_|[^~\w]) ~~ (?=\S) ([^~]+?) (?<=\S) ~~ (?!~) }{$1}gsx; + + return $text; +} + +# The original _DoCodeSpans() uses the 's' modifier in its regex +# which prevents _DoCodeBlocks() to match GFM fenced code blocks. +# We copy the code from the original implementation and remove the +# 's' modifier from it. +sub _DoCodeSpans { + my ($self, $text) = @_; + + $text =~ s@ + (?_EncodeCode($c); + "$c"; + @egx; + + return $text; +} + +# Override to add GFM Fenced Code Blocks +sub _DoCodeBlocks { + my ($self, $text) = @_; + + $text =~ s{ + ^ `{3,} [\s\t]* \n + ( # $1 = the entire code block + (?: .* \n+)+? + ) + `{3,} [\s\t]* $ + }{ + my $codeblock = $1; + my $result; + + $codeblock = $self->_EncodeCode($codeblock); + $codeblock = $self->_Detab($codeblock); + $codeblock =~ s/\n\z//; # remove the trailing newline + + $result = "\n\n

" . $codeblock . "
\n\n"; + $result; + }egmx; + + # And now do the standard code blocks + $text = $self->SUPER::_DoCodeBlocks($text); + + return $text; +} + +sub _EncodeCode { + my ($self, $text) = @_; + + # We need to unescape the escaped HTML characters in code blocks. + # These are the reverse of the escapings done in Bugzilla::Util::html_quote() + $text =~ s/<//g; + $text =~ s/"/"/g; + $text =~ s/@/@/g; + # '&' substitution must be the last one, otherwise a literal like '>' + # will turn to '>' because '&' is already changed to '&' in Bugzilla::Util::html_quote(). + # In other words, html_quote() will change '>' to '&gt;' and then we will + # change '&gt' -> '>' -> '>' if we write this substitution as the first one. + $text =~ s/&/&/g; + $text = $self->SUPER::_EncodeCode($text); + $text =~ s/~/$g_escape_table{'~'}/go; + + return $text; +} + +sub _EncodeBackslashEscapes { + my ($self, $text) = @_; + + $text = $self->SUPER::_EncodeBackslashEscapes($text); + $text =~ s/\\~/$g_escape_table{'~'}/go; + + return $text; +} + +sub _UnescapeSpecialChars { + my ($self, $text) = @_; + + $text = $self->SUPER::_UnescapeSpecialChars($text); + $text =~ s/$g_escape_table{'~'}/~/go; + + return $text; +} + +# Check if the passed string is of the form multiple_underscores_in_a_word. +# To check that, we first need to make sure that the string does not contain +# any white-space. Then, if the string is composed of non-space chunks which +# are bound together with underscores, the string has the desired form. +sub _has_multiple_underscores { + my $string = shift; + return 0 unless defined($string) && length($string); + return 0 if $string =~ /[\t\s]+/; + return 1 if scalar (split /_/, $string) > 1; + return 0; +} + +1; + +__END__ + +=head1 NAME + +Bugzilla::Markdown - Generates HTML output from structured plain-text input. + +=head1 SYNOPSIS + + use Bugzilla::Markdown; + + my $markdown = Bugzilla::Markdown->new(); + print $markdown->markdown($text); + +=head1 DESCRIPTION + +Bugzilla::Markdown implements a Markdown engine that produces +an HTML-based output from a given plain-text input. + +The majority of the implementation is done by C +CPAN module. It also applies the linkifications done in L +to the input resulting in an output which is a combination of both Markdown +structures and those defined by Bugzilla itself. + +=head2 Accessors + +=over + +=item C + +C Produces an HTML-based output string based on the structures +and format defined in the given plain-text input. + +=over + +=item B + +=over + +=item C + +C A plain-text string which includes Markdown structures. + +=back + +=back + +=back diff --git a/Bugzilla/Template.pm b/Bugzilla/Template.pm index 7e3527857..78a3e4120 100644 --- a/Bugzilla/Template.pm +++ b/Bugzilla/Template.pm @@ -807,6 +807,23 @@ sub create { 1 ], + markdown => [ sub { + my ($context, $bug, $comment, $user) = @_; + return sub { + my $text = shift; + return unless $text; + + if ((ref($comment) eq 'HASH' && $comment->{is_markdown}) + || (ref($comment) eq 'Bugzilla::Comment' && $comment->is_markdown)) + { + return Bugzilla->markdown->markdown($text); + } + return quoteUrls($text, $bug, $comment, $user); + }; + }, + 1 + ], + bug_link => [ sub { my ($context, $bug, $options) = @_; return sub { diff --git a/Bugzilla/WebService/Bug.pm b/Bugzilla/WebService/Bug.pm index 09a14ebf3..0062ecc14 100644 --- a/Bugzilla/WebService/Bug.pm +++ b/Bugzilla/WebService/Bug.pm @@ -331,7 +331,9 @@ sub render_comment { Bugzilla->switch_to_shadow_db(); my $bug = $params->{id} ? Bugzilla::Bug->check($params->{id}) : undef; - my $tmpl = '[% text FILTER quoteUrls(bug) %]'; + my $markdown = $params->{markdown} ? 1 : 0; + my $tmpl = $markdown ? '[% text FILTER markdown(bug, { is_markdown => 1 }) %]' : '[% text FILTER markdown(bug) %]'; + my $html; my $template = Bugzilla->template; $template->process( -- cgit v1.2.3-24-g4f1b