From b1ef63e5bfc0d3995245b42154686db1400b2c22 Mon Sep 17 00:00:00 2001
From: "lpsolit%gmail.com" <>
Date: Sun, 15 Oct 2006 03:26:50 +0000
Subject: Bug 206037: [SECURITY] Fix escaping/quoting in edit*.cgi scripts -
 Patch by Frédéric Buclin <LpSolit@gmail.com> r=justdave a=justdave
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Bugzilla/Constants.pm            |  7 +++
 Bugzilla/Install/Requirements.pm | 23 ++++++++++
 Bugzilla/Template.pm             |  7 ++-
 Bugzilla/Util.pm                 | 95 +++++++++++++++++++++++++++++++++++++++-
 4 files changed, 129 insertions(+), 3 deletions(-)

(limited to 'Bugzilla')

diff --git a/Bugzilla/Constants.pm b/Bugzilla/Constants.pm
index 4ce2cbc09..337405a61 100644
--- a/Bugzilla/Constants.pm
+++ b/Bugzilla/Constants.pm
@@ -123,6 +123,8 @@ use File::Basename;
     ON_WINDOWS
 
     MAX_TOKEN_AGE
+
+    SAFE_PROTOCOLS
 );
 
 @Bugzilla::Constants::EXPORT_OK = qw(contenttypes);
@@ -302,6 +304,11 @@ use constant FIELD_TYPE_SINGLE_SELECT => 2;
 # The maximum number of days a token will remain valid.
 use constant MAX_TOKEN_AGE => 3;
 
+# Protocols which are considered as safe.
+use constant SAFE_PROTOCOLS => ('afs', 'cid', 'ftp', 'gopher', 'http', 'https',
+                                'irc', 'mid', 'news', 'nntp', 'prospero', 'telnet',
+                                'view-source', 'wais');
+
 # States that are considered to be "open" for bugs.
 use constant BUG_STATE_OPEN => ('NEW', 'REOPENED', 'ASSIGNED', 
                                 'UNCONFIRMED');
diff --git a/Bugzilla/Install/Requirements.pm b/Bugzilla/Install/Requirements.pm
index 06c8b557b..7dddefd75 100644
--- a/Bugzilla/Install/Requirements.pm
+++ b/Bugzilla/Install/Requirements.pm
@@ -125,6 +125,18 @@ use constant OPTIONAL_MODULES => [
         name => 'SOAP::Lite',
         version => 0
     },
+    {
+        # Since Perl 5.8, we need the 'utf8_mode' method of HTML::Parser
+        # which has been introduced in version 3.39_92 and fixed in 3.40
+        # to not complain when running Perl 5.6.
+        # This module is required by HTML::Scrubber.
+        name => 'HTML::Parser',
+        version => ($] >= 5.008) ? '3.40' : 0
+    },
+    {
+        name => 'HTML::Scrubber',
+        version => 0
+    },
 ];
 
 # These are only required if you want to use Bugzilla with
@@ -305,6 +317,17 @@ sub check_requirements {
                   "    " . install_command('Net::LDAP') . "\n\n";
         }
 
+        # HTML filtering
+        if (!$have_mod{'HTML::Parser'} || !$have_mod{'HTML::Scrubber'}) {
+            print "If you want additional HTML tags within product and group",
+                  " descriptions,\nyou should install:\n\n";
+            print "    HTML::Scrubber: " . install_command('HTML::Scrubber') . "\n"
+                if !$have_mod{'HTML::Scrubber'};
+            print "    HTML::Parser: " . install_command('HTML::Parser') . "\n"
+                if !$have_mod{'HTML::Parser'};
+            print "\n";
+        }
+
         # mod_perl
         if (!$have_mod{'mod_perl2'}) {
             print "If you would like mod_perl support, you must install at",
diff --git a/Bugzilla/Template.pm b/Bugzilla/Template.pm
index 7149828ef..915e3cdc6 100644
--- a/Bugzilla/Template.pm
+++ b/Bugzilla/Template.pm
@@ -289,7 +289,8 @@ sub quoteUrls {
               ~egox;
 
     # non-mailto protocols
-    my $protocol_re = qr/(afs|cid|ftp|gopher|http|https|irc|mid|news|nntp|prospero|telnet|view-source|wais)/i;
+    my $safe_protocols = join('|', SAFE_PROTOCOLS);
+    my $protocol_re = qr/($safe_protocols)/i;
 
     $text =~ s~\b(${protocol_re}:  # The protocol:
                   [^\s<>\"]+       # Any non-whitespace
@@ -734,7 +735,9 @@ sub create {
                 }
                 return $var;
             },
-            
+
+            html_light => \&Bugzilla::Util::html_light_quote,
+
             # iCalendar contentline filter
             ics => [ sub {
                          my ($context, @args) = @_;
diff --git a/Bugzilla/Util.pm b/Bugzilla/Util.pm
index 8457c8df8..d346d2547 100644
--- a/Bugzilla/Util.pm
+++ b/Bugzilla/Util.pm
@@ -34,7 +34,7 @@ use base qw(Exporter);
 @Bugzilla::Util::EXPORT = qw(is_tainted trick_taint detaint_natural
                              detaint_signed
                              html_quote url_quote value_quote xml_quote
-                             css_class_quote
+                             css_class_quote html_light_quote
                              i_am_cgi get_netaddr correct_urlbase
                              lsearch
                              diff_arrays diff_strings
@@ -95,6 +95,93 @@ sub html_quote {
     return $var;
 }
 
+sub html_light_quote {
+    my ($text) = @_;
+
+    # List of allowed HTML elements having no attributes.
+    my @allow = qw(b strong em i u p br abbr acronym ins del cite code var
+                   dfn samp kbd big small sub sup tt dd dt dl ul li ol);
+
+    # Are HTML::Scrubber and HTML::Parser installed?
+    eval { require HTML::Scrubber;
+           require HTML::Parser;
+    };
+
+    # We need utf8_mode() from HTML::Parser 3.40 if running Perl >= 5.8.
+    if ($@ || ($] >= 5.008 && $HTML::Parser::VERSION < 3.40)) { # Package(s) not installed.
+        my $safe = join('|', @allow);
+        my $chr = chr(1);
+
+        # First, escape safe elements.
+        $text =~ s#<($safe)>#$chr$1$chr#go;
+        $text =~ s#</($safe)>#$chr/$1$chr#go;
+        # Now filter < and >.
+        $text =~ s#<#&lt;#g;
+        $text =~ s#>#&gt;#g;
+        # Restore safe elements.
+        $text =~ s#$chr/($safe)$chr#</$1>#go;
+        $text =~ s#$chr($safe)$chr#<$1>#go;
+        return $text;
+    }
+    else { # Packages installed.
+        # We can be less restrictive. We can accept elements with attributes.
+        push(@allow, qw(a blockquote q span));
+
+        # Allowed protocols.
+        my $safe_protocols = join('|', SAFE_PROTOCOLS);
+        my $protocol_regexp = qr{(^(?:$safe_protocols):|^[^:]+$)}i;
+
+        # Deny all elements and attributes unless explicitly authorized.
+        my @default = (0 => {
+                             id    => 1,
+                             name  => 1,
+                             class => 1,
+                             '*'   => 0, # Reject all other attributes.
+                            }
+                       );
+
+        # Specific rules for allowed elements. If no specific rule is set
+        # for a given element, then the default is used.
+        my @rules = (a => {
+                           href  => $protocol_regexp,
+                           title => 1,
+                           id    => 1,
+                           name  => 1,
+                           class => 1,
+                           '*'   => 0, # Reject all other attributes.
+                          },
+                     blockquote => {
+                                    cite => $protocol_regexp,
+                                    id    => 1,
+                                    name  => 1,
+                                    class => 1,
+                                    '*'  => 0, # Reject all other attributes.
+                                   },
+                     'q' => {
+                             cite => $protocol_regexp,
+                             id    => 1,
+                             name  => 1,
+                             class => 1,
+                             '*'  => 0, # Reject all other attributes.
+                          },
+                    );
+
+        my $scrubber = HTML::Scrubber->new(default => \@default,
+                                           allow   => \@allow,
+                                           rules   => \@rules,
+                                           comment => 0,
+                                           process => 0);
+
+        # Avoid filling the web server error log with Perl 5.8.x.
+        # In HTML::Scrubber 0.08, the HTML::Parser object is stored in
+        # the "_p" key, but this may change in future versions.
+        if ($] >= 5.008 && ref($scrubber->{_p}) eq 'HTML::Parser') {
+            $scrubber->{_p}->utf8_mode(1);
+        }
+        return $scrubber->scrub($text);
+    }
+}
+
 # This originally came from CGI.pm, by Lincoln D. Stein
 sub url_quote {
     my ($toencode) = (@_);
@@ -553,6 +640,12 @@ be done in the template where possible.
 Returns a value quoted for use in HTML, with &, E<lt>, E<gt>, and E<34> being
 replaced with their appropriate HTML entities.
 
+=item C<html_light_quote($val)>
+
+Returns a string where only explicitly allowed HTML elements and attributes
+are kept. All HTML elements and attributes not being in the whitelist are either
+escaped (if HTML::Scrubber is not installed) or removed.
+
 =item C<url_quote($val)>
 
 Quotes characters so that they may be included as part of a url.
-- 
cgit v1.2.3-24-g4f1b