From 8ba7aa6be8fb5689634c453fd505822d02f586a6 Mon Sep 17 00:00:00 2001 From: Dylan William Hardison Date: Tue, 10 Apr 2018 17:35:23 -0400 Subject: Bug 1453126 - Bugzilla::Bloomfilter should encourage preserving the input files for its filters --- .circleci/config.yml | 2 +- Bugzilla/Bloomfilter.pm | 34 ++++++++++++++-------------------- Dockerfile | 2 +- Makefile.PL | 1 + scripts/bloomfilter-populate.pl | 6 ++---- 5 files changed, 19 insertions(+), 26 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1283d573b..64e6c5831 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -7,7 +7,7 @@ version: 2 defaults: bmo_slim_image: &bmo_slim_image - image: mozillabteam/bmo-slim:20180330.1 + image: mozillabteam/bmo-slim:20180410.1 user: app mysql_image: &mysql_image diff --git a/Bugzilla/Bloomfilter.pm b/Bugzilla/Bloomfilter.pm index 0d329b2ea..ba1d6d6c3 100644 --- a/Bugzilla/Bloomfilter.pm +++ b/Bugzilla/Bloomfilter.pm @@ -13,7 +13,8 @@ use warnings; use Bugzilla::Constants; use Algorithm::BloomFilter; -use File::Temp qw(tempfile); +use File::Slurper qw(write_binary read_binary read_lines); +use File::Spec::Functions qw(catfile); sub _new_bloom_filter { my ($n) = @_; @@ -24,44 +25,37 @@ sub _new_bloom_filter { } sub _filename { - my ($name) = @_; + my ($name, $type) = @_; my $datadir = bz_locations->{datadir}; - return sprintf("%s/%s.bloom", $datadir, $name); + + return catfile($datadir, "$name.$type"); } sub populate { - my ($class, $name, $items) = @_; + my ($class, $name) = @_; my $memcached = Bugzilla->memcached; + my @items = read_lines(_filename($name, 'list')); + my $filter = _new_bloom_filter(@items + 0); - my $filter = _new_bloom_filter(@$items + 0); - foreach my $item (@$items) { - $filter->add($item); - } - - my ($fh, $filename) = tempfile( "${name}XXXXXX", DIR => bz_locations->{datadir}, UNLINK => 0); - binmode $fh, ':bytes'; - print $fh $filter->serialize; - close $fh; - rename($filename, _filename($name)) or die "failed to rename $filename: $!"; + $filter->add($_) foreach @items; + write_binary(_filename($name, 'bloom'), $filter->serialize); $memcached->clear_bloomfilter({name => $name}); } sub lookup { my ($class, $name) = @_; my $memcached = Bugzilla->memcached; - my $filename = _filename($name); + my $filename = _filename($name, 'bloom'); my $filter_data = $memcached->get_bloomfilter( { name => $name } ); if (!$filter_data && -f $filename) { - open my $fh, '<:bytes', $filename; - local $/ = undef; - $filter_data = <$fh>; - close $fh; + $filter_data = read_binary($filename); $memcached->set_bloomfilter({ name => $name, filter => $filter_data }); } - return Algorithm::BloomFilter->deserialize($filter_data); + return Algorithm::BloomFilter->deserialize($filter_data) if $filter_data; + return undef; } 1; diff --git a/Dockerfile b/Dockerfile index 6aa2f2646..0e7bb7acd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM mozillabteam/bmo-slim:20180330.1 +FROM mozillabteam/bmo-slim:20180410.1 ARG CI ARG CIRCLE_SHA1 diff --git a/Makefile.PL b/Makefile.PL index 9f56cd487..4cea5b066 100755 --- a/Makefile.PL +++ b/Makefile.PL @@ -48,6 +48,7 @@ my %requires = ( 'Email::MIME' => '1.904', 'Email::Send' => '1.911', 'File::Slurp' => '9999.13', + 'File::Slurper' => '0.012', 'Future' => '0.34', 'HTML::Escape' => '1.10', 'IPC::System::Simple' => 0, diff --git a/scripts/bloomfilter-populate.pl b/scripts/bloomfilter-populate.pl index c591a61b3..780e98bd0 100755 --- a/scripts/bloomfilter-populate.pl +++ b/scripts/bloomfilter-populate.pl @@ -14,8 +14,6 @@ use Bugzilla::Bloomfilter; # set Bugzilla usage mode to USAGE_MODE_CMDLINE Bugzilla->usage_mode(USAGE_MODE_CMDLINE); -my $name = shift @ARGV or die "usage: $0 \$name < list\n"; -my @lines = ; -chomp @lines; -Bugzilla::Bloomfilter->populate($name, \@lines); +my $name = shift @ARGV or die "usage: $0 \$name\n"; +Bugzilla::Bloomfilter->populate($name); -- cgit v1.2.3-24-g4f1b