summaryrefslogtreecommitdiffstats
path: root/extensions/Profanivore
diff options
context:
space:
mode:
authorDave Lawrence <dlawrence@mozilla.com>2012-08-28 17:00:18 +0200
committerDave Lawrence <dlawrence@mozilla.com>2012-08-28 17:00:18 +0200
commitcca07e40f68720087992ec8b5337bca91a942d91 (patch)
treec06120a410eb111e9856e111d0c0ad5dfec0dc0c /extensions/Profanivore
parent2def70a01bd470f46fa8b83a71e1ec59b7089093 (diff)
downloadbugzilla-cca07e40f68720087992ec8b5337bca91a942d91.tar.gz
bugzilla-cca07e40f68720087992ec8b5337bca91a942d91.tar.xz
Bug 785309 - Profanivore is throwing "ascii "\xB4" does not map to Unicode" errors
r=glob
Diffstat (limited to 'extensions/Profanivore')
-rw-r--r--extensions/Profanivore/Config.pm7
-rw-r--r--extensions/Profanivore/Extension.pm84
2 files changed, 83 insertions, 8 deletions
diff --git a/extensions/Profanivore/Config.pm b/extensions/Profanivore/Config.pm
index 778301fbb..354325c58 100644
--- a/extensions/Profanivore/Config.pm
+++ b/extensions/Profanivore/Config.pm
@@ -29,7 +29,12 @@ use constant REQUIRED_MODULES => [
package => 'Regexp-Common',
module => 'Regexp::Common',
version => 0
+ },
+ {
+ package => 'HTML-Tree',
+ module => 'HTML::Tree',
+ version => 0,
}
];
-__PACKAGE__->NAME; \ No newline at end of file
+__PACKAGE__->NAME;
diff --git a/extensions/Profanivore/Extension.pm b/extensions/Profanivore/Extension.pm
index b77c09ce3..9889cc043 100644
--- a/extensions/Profanivore/Extension.pm
+++ b/extensions/Profanivore/Extension.pm
@@ -25,13 +25,15 @@ use base qw(Bugzilla::Extension);
use Regexp::Common 'RE_ALL';
+use Bugzilla::Util qw(is_7bit_clean);
+
our $VERSION = '0.01';
sub bug_format_comment {
my ($self, $args) = @_;
my $regexes = $args->{'regexes'};
my $comment = $args->{'comment'};
-
+
# Censor profanities if the comment author is not reasonably trusted.
# However, allow people to see their own profanities, which might stop
# them immediately noticing and trying to go around the filter. (I.e.
@@ -55,10 +57,10 @@ sub _replace_profanity {
sub mailer_before_send {
my ($self, $args) = @_;
my $email = $args->{'email'};
-
+
my $author = $email->header("X-Bugzilla-Who");
my $recipient = $email->header("To");
-
+
if ($author && $recipient && lc($author) ne lc($recipient)) {
my $email_suffix = Bugzilla->params->{'emailsuffix'};
if ($email_suffix ne '') {
@@ -72,14 +74,82 @@ sub mailer_before_send {
$author->id &&
!$author->in_group('editbugs'))
{
- my $body = $email->body_str();
+ # Multipart emails
+ if (scalar $email->parts > 1) {
+ $email->walk_parts(sub {
+ my ($part) = @_;
+ return if $part->parts > 1; # Top-level
+ # do not filter attachments such as patches, etc.
+ if ($part->header('Content-Disposition')
+ && $part->header('Content-Disposition') =~ /attachment/)
+ {
+ return;
+ }
+ _fix_encoding($part);
+ my $body = $part->body_str;
+ if ($part->content_type =~ /^text\/html/) {
+ $body = _filter_html($body);
+ }
+ elsif ($part->content_type =~ /^text\/plain/) {
+ $body = _filter_text($body);
+ }
+ $part->body_str_set($body);
+ });
+ }
+ # Single part email
+ else {
+ _fix_encoding($email);
+ $email->body_str_set(_filter_text($email->body_str));
+ }
+ }
+ }
+}
- my $offensive = RE_profanity();
- $body =~ s/$offensive/****/g;
+sub _fix_encoding {
+ my $part = shift;
+ my $body = $part->body;
+ if (Bugzilla->params->{'utf8'}) {
+ $part->charset_set('UTF-8');
+ # encoding_set works only with bytes, not with utf8 strings.
+ my $raw = $part->body_raw;
+ if (utf8::is_utf8($raw)) {
+ utf8::encode($raw);
+ $part->body_set($raw);
+ }
+ }
+ $part->encoding_set('quoted-printable') if !is_7bit_clean($body);
+}
+
+sub _filter_text {
+ my $text = shift;
+ my $offensive = RE_profanity();
+ $text =~ s/$offensive/****/g;
+ return $text;
+}
+
+sub _filter_html {
+ my $html = shift;
+ my $tree = HTML::Tree->new->parse_content($html);
+ my $comments_div = $tree->look_down( _tag => 'div', id => 'comments' );
+ return $html if !$comments_div;
+ my @comments = $comments_div->look_down( _tag => 'pre' );
+ foreach my $comment (@comments) {
+ _filter_html_node($comment);
+ }
+ return $tree->as_HTML;
+}
- $email->body_str_set($body);
+sub _filter_html_node {
+ my $node = shift;
+ my $content = [ $node->content_list ];
+ foreach my $item_r ($node->content_refs_list) {
+ if (ref $$item_r) {
+ _filter_html_node($$item_r);
+ } else {
+ $$item_r = _filter_text($$item_r);
}
}
+ return $node;
}
__PACKAGE__->NAME;