diff options
author | bbaetz%student.usyd.edu.au <> | 2002-07-03 11:45:12 +0200 |
---|---|---|
committer | bbaetz%student.usyd.edu.au <> | 2002-07-03 11:45:12 +0200 |
commit | 690839509f1ce985205824e8092d5893a2130ba6 (patch) | |
tree | 685d29eadb93a74bf85cb81e48c37cd116a438d8 | |
parent | 8f0251144e71826cd4f22999c448102848fb95f6 (diff) | |
download | bugzilla-690839509f1ce985205824e8092d5893a2130ba6.tar.gz bugzilla-690839509f1ce985205824e8092d5893a2130ba6.tar.xz |
Bug 151871 - rewrite quoteUrls to fix major performance problems, and a
few other misc bugs too.
r=myk x2
-rw-r--r-- | CGI.pl | 4 | ||||
-rw-r--r-- | globals.pl | 176 |
2 files changed, 92 insertions, 88 deletions
@@ -792,8 +792,8 @@ Content-type: text/html if (!defined $nexturl || $nexturl eq "") { # Sets nexturl to be argv0, stripping everything up to and # including the last slash (or backslash on Windows). - $0 =~ m:[^/\\]*$:; - $nexturl = $&; + $0 =~ m:([^/\\]*)$:; + $nexturl = $1; } my $method = "POST"; # We always want to use POST here, because we're submitting a password and don't diff --git a/globals.pl b/globals.pl index fdb189498..a2b4e68be 100644 --- a/globals.pl +++ b/globals.pl @@ -974,104 +974,104 @@ sub detaint_natural { # module by Gareth Rees <garethr@cre.canon.co.uk>. It has been heavily hacked, # all that is really recognizable from the original is bits of the regular # expressions. +# This has been rewritten to be faster, mainly by substituting 'as we go'. +# If you want to modify this routine, read the comments carefully sub quoteUrls { my ($text) = (@_); return $text unless $text; - - my $base = Param('urlbase'); - - my $protocol = join '|', - qw(afs cid ftp gopher http https mid news nntp prospero telnet wais); - - my $count = 0; - - # Now, quote any "#" characters so they won't confuse stuff later - $text =~ s/#/%#/g; - # Next, find anything that looks like a URL or an email address and - # pull them out the the text, replacing them with a "##<digits>## - # marker, and writing them into an array. All this confusion is - # necessary so that we don't match on something we've already replaced, - # which can happen if you do multiple s///g operations. + # We use /g for speed, but uris can have other things inside them + # (http://foo/bug#3 for example). Filtering that out filters valid + # bug refs out, so we have to do replacements. + # mailto can't contain space or #, so we don't have to bother for that + # Do this by escaping \0 to \1\0, and replacing matches with \0\0$count\0\0 + # \0 is used because its unliklely to occur in the text, so the cost of + # doing this should be very small + # Also, \0 won't appear in the value_quote'd bug title, so we don't have + # to worry about bogus substitutions from there + + # escape the 2nd escape char we're using + my $chr1 = chr(1); + $text =~ s/\0/$chr1\0/g; + + # However, note that adding the title (for buglinks) can affect things + # In particular, attachment matches go before bug titles, so that titles + # with 'attachment 1' don't double match. + # Dupe checks go afterwards, because that uses ^ and \Z, which won't occur + # if it was subsituted as a bug title (since that always involve leading + # and trailing text) + + # Because of entities, its easier (and quicker) to do this before escaping my @things; - while ($text =~ s%((mailto:)?([\w\.\-\+\=]+\@[\w\-]+(?:\.[\w\-]+)+)\b| - (\b((?:$protocol):[^ \t\n<>"]+[\w/])))%"##$count##"%exo) { - my $item = $&; - - $item = value_quote($item); - - if ($item !~ m/^$protocol:/o && $item !~ /^mailto:/) { - # We must have grabbed this one because it looks like an email - # address. - $item = qq{<A HREF="mailto:$item">$item</A>}; - } else { - $item = qq{<A HREF="$item">$item</A>}; - } - - $things[$count++] = $item; - } - # Either a comment string or no comma and a compulsory #. - while ($text =~ s/\bbug(\s|%\#)*(\d+),?\s*comment\s*(\s|%\#)(\d+)/"##$count##"/ei) { - my $item = $&; - my $bugnum = $2; - my $comnum = $4; - $item = GetBugLink($bugnum, $item); - $item =~ s/(id=\d+)/$1#c$comnum/; - $things[$count++] = $item; - } - while ($text =~ s/\bcomment(\s|%\#)*(\d+)/"##$count##"/ei) { - my $item = $&; - my $num = $2; - $item = value_quote($item); - $item = qq{<A HREF="#c$num">$item</A>}; - $things[$count++] = $item; - } - while ($text =~ s/\bbug(\s|%\#)*(\d+)/"##$count##"/ei) { - my $item = $&; - my $num = $2; - $item = GetBugLink($num, $item); - $things[$count++] = $item; - } - while ($text =~ s/\b(Created an )?attachment(\s|%\#)*(\(id=)?(\d+)\)?/"##$count##"/ei) { - my $item = $&; - my $num = $4; - $item = value_quote($item); # Not really necessary, since we know - # there's no special chars in it. - $item = qq{<a href="attachment.cgi?id=$num&action=view">$item</a>}; - $things[$count++] = $item; - } - while ($text =~ s/\*\*\* This bug has been marked as a duplicate of (\d+) \*\*\*/"##$count##"/ei) { - my $item = $&; - my $num = $1; - my $bug_link; - $bug_link = GetBugLink($num, $num); - $item =~ s@\d+@$bug_link@; - $things[$count++] = $item; - } - - $text = value_quote($text); - $text =~ s/\
/\n/g; - - # Stuff everything back from the array. - for (my $i=0 ; $i<$count ; $i++) { - $text =~ s/##$i##/$things[$i]/e; - } - - # And undo the quoting of "#" characters. - $text =~ s/%#/#/g; + my $count = 0; + my $tmp; + + # non-mailto protocols + my $protocol_re = qr/(afs|cid|ftp|gopher|http|https|mid|news|nntp|prospero|telnet|wais)/i; + + $text =~ s~\b(${protocol_re}: # The protocol: + [^\s<>\"]+ # Any non-whitespace + [\w\/]) # so that we end in \w or / + ~($tmp = html_quote($1)) && + ($things[$count++] = "<a href=\"$tmp\">$tmp</a>") && + ("\0\0" . ($count-1) . "\0\0") + ~egox; + + # We have to quote now, otherwise our html is itsself escaped + # THIS MEANS THAT A LITERAL ", <, >, ' MUST BE ESCAPED FOR A MATCH + + $text = html_quote($text); + + # mailto: + # Use |<nothing> so that $1 is defined regardless + $text =~ s~\b(mailto:|)?([\w\.\-\+\=]+\@[\w\-]+(?:\.[\w\-]+)+)\b + ~<a href=\"mailto:$2\">$1$2</a>~igx; + + # attachment links - handle both cases separatly for simplicity + $text =~ s~((?:^Created\ an\ |\b)attachment\s*\(id=(\d+)\)) + ~<a href=\"attachment.cgi?id=$2&action=view\">$1</a>~igx; + + $text =~ s~\b(attachment\s*\#?\s*(\d+)) + ~<a href=\"attachment.cgi?id=$2&action=view\">$1</a>~igx; + + # This handles bug a, comment b type stuff. Because we're using /g + # we have to do this in one pattern, and so this is semi-messy. + # Also, we can't use $bug_re?$comment_re? because that will match the + # empty string + my $bug_re = qr/bug\s*\#?\s*(\d+)/i; + my $comment_re = qr/comment\s*\#?\s*(\d+)/i; + $text =~ s~\b($bug_re(?:\s*,?\s*$comment_re)?|$comment_re) + ~ # We have several choices. $1 here is the link, and $2-4 are set + # depending on which part matched + (defined($2) ? GetBugLink($2,$1,$3) : + "<a href=\"#c$4\">$1</a>") + ~egox; + + # Duplicate markers + $text =~ s~(?<=^\*\*\*\ This\ bug\ has\ been\ marked\ as\ a\ duplicate\ of\ ) + (\d+) + (?=\ \*\*\*\Z) + ~GetBugLink($1, $1) + ~egmx; + + # Now remove the encoding hacks + $text =~ s/\0\0(\d+)\0\0/$things[$1]/eg; + $text =~ s/$chr1\0/\0/g; return $text; } -# This is a new subroutine written 12/20/00 for the purpose of processing a -# link to a bug. It can be called using "GetBugLink (<BugNumber>, <LinkText>);" -# Where <BugNumber> is the number of the bug and <LinkText> is what apprears -# between '<a>' and '</a>'. +# GetBugLink creates a link to a bug, including its title. +# It takes either two or three paramaters: +# - The bug number +# - The link text, to place between the <a>..</a> +# - An optional comment number, for linking to a particular +# comment in the bug sub GetBugLink { - my ($bug_num, $link_text) = (@_); + my ($bug_num, $link_text, $comment_num) = @_; detaint_natural($bug_num) || die "GetBugLink() called with non-integer bug number"; # If we've run GetBugLink() for this bug number before, %::buglink @@ -1122,7 +1122,11 @@ sub GetBugLink { my ($pre, $title, $post) = @{$::buglink{$bug_num}}; # $title will be undefined if the bug didn't exist in the database. if (defined $title) { - return qq{$pre<a href="show_bug.cgi?id=$bug_num" title="$title">$link_text</a>$post}; + my $linkval = "show_bug.cgi?id=$bug_num"; + if (defined $comment_num) { + $linkval .= "#c$comment_num"; + } + return qq{$pre<a href="$linkval" title="$title">$link_text</a>$post}; } else { return qq{$link_text}; |