From 48fe3a1e0a4a833aa14e7f95136d0e4542959eb8 Mon Sep 17 00:00:00 2001 From: Dylan William Hardison Date: Wed, 13 Dec 2017 12:46:26 -0500 Subject: Bug 1424155 - Write scripts to import/export attachments to disk --- .perlcriticrc | 12 +++- Bugzilla/Attachment/Archive.pm | 123 +++++++++++++++++++++++++++++++++++++ scripts/attachment-data.pl | 133 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 267 insertions(+), 1 deletion(-) create mode 100644 Bugzilla/Attachment/Archive.pm create mode 100644 scripts/attachment-data.pl diff --git a/.perlcriticrc b/.perlcriticrc index 84eb0f8a5..14b2d7cab 100644 --- a/.perlcriticrc +++ b/.perlcriticrc @@ -5,7 +5,7 @@ severity = 1 #perltidyrc = .perltidyrc [InputOutput::RequireCheckedSyscalls] functions = :builtins -exclude_functions = print +exclude_functions = print say binmode [-BuiltinFunctions::ProhibitUselessTopic] [-ControlStructures::ProhibitCascadingIfElse] @@ -35,6 +35,16 @@ exclude_functions = print # think of javascript arrow functions. [-Subroutines::RequireFinalReturn] +# This test is technically correct but I do not care. +[-InputOutput::ProhibitInteractiveTest] + +# I believe in the opposite of this test. +[-InputOutput::ProhibitExplicitStdin] + +# _build_* are allowed +[Subroutines::ProhibitUnusedPrivateSubroutines] + private_name_regex = _(?!_|build_)\w+ + # I don't agree with this policy because # a bare return can actually cause more problems. [-Subroutines::ProhibitExplicitReturnUndef] diff --git a/Bugzilla/Attachment/Archive.pm b/Bugzilla/Attachment/Archive.pm new file mode 100644 index 000000000..ccedf1da4 --- /dev/null +++ b/Bugzilla/Attachment/Archive.pm @@ -0,0 +1,123 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This Source Code Form is "Incompatible With Secondary Licenses", as +# defined by the Mozilla Public License, v. 2.0. + +package Bugzilla::Attachment::Archive; + +use 5.10.1; +use Moo; +use Digest::SHA qw(sha256_hex); +use Carp; +use IO::File; + +use constant HEADER_SIZE => 45; +use constant HEADER_FORMAT => 'ANNNH64'; + +has 'file' => ( is => 'ro', required => 1 ); +has 'input_fh' => ( is => 'lazy', predicate => 'has_input_fh' ); +has 'output_fh' => ( is => 'lazy', predicate => 'has_output_fh' ); +has 'checksum' => ( is => 'lazy', clearer => 'reset_checksum' ); + +sub read_member { + my ($self) = @_; + my $header = $self->_read_header(); + my ($type, $bug_id, $attach_id, $data_len, $hash) = unpack HEADER_FORMAT, $header; + if ( $type eq 'D' ) { + $self->checksum->add($header); + my $data = $self->_read_data( $data_len, $hash ); + return { + bug_id => $bug_id, + attach_id => $attach_id, + data_len => $data_len, + hash => $hash, + data => $data, + }; + } + elsif ($type eq 'C') { + die "bad overall checksum\n" unless $hash eq $self->checksum->hexdigest; + $self->reset_checksum; + return undef; + } + else { + die "unknown member type: $type\n"; + } +} + +sub write_attachment { + my ( $self, $attachment ) = @_; + my $data = $attachment->data; + my $bug_id = $attachment->bug_id; + my $attach_id = $attachment->id; + + if (defined $data && length($data) == $attachment->datasize) { + my $header = pack HEADER_FORMAT, 'D', $bug_id, $attach_id, length($data), sha256_hex($data); + $self->checksum->add($header); + $self->output_fh->print($header, $data); + } +} + +sub write_checksum { + my ($self) = @_; + my $header = pack HEADER_FORMAT, 'C', 0, 0, 0, $self->checksum->hexdigest; + $self->output_fh->print($header); + $self->reset_checksum; + $self->output_fh->flush; +} + +sub _build_checksum { + my ($self) = @_; + return Digest::SHA->new(256); +} + +sub _build_input_fh { + my ($self) = @_; + if ($self->has_output_fh) { + croak "I will not read and write a file at the same time"; + } + my $file = $self->file; + return IO::File->new( $self->file, '<:bytes' ) or die "cannot read $file: $!"; +} + +sub _build_output_fh { + my ($self) = @_; + if ($self->has_input_fh) { + croak "I will not read and write a file at the same time"; + } + my $file = $self->file; + if (-e $file) { + croak "I will not overwrite a file (file $file already exists)"; + } + return IO::File->new( $file, '>:bytes' ) or die "cannot write $file: $!"; +} + +sub _read_header { + my ($self) = @_; + my $header = '' x HEADER_SIZE; + my $header_len = $self->input_fh->read($header, HEADER_SIZE); + if ( !$header_len || $header_len != HEADER_SIZE ) { + die "bad header\n"; + } + return $header; +} + +sub _read_data { + my ($self, $data_len, $hash) = @_; + + my $data = '' x $data_len; + my $read_data_len = $self->input_fh->read($data, $data_len); + + unless ( $read_data_len == $data_len ) { + die "bad data\n"; + } + + unless ( $hash eq sha256_hex($data) ) { + die "bad checksum:\n\t$hash\n\t" . sha226_hex($data) . "\n"; + } + + return $data; +} + +1; \ No newline at end of file diff --git a/scripts/attachment-data.pl b/scripts/attachment-data.pl new file mode 100644 index 000000000..4a3a1b414 --- /dev/null +++ b/scripts/attachment-data.pl @@ -0,0 +1,133 @@ +#!/usr/bin/perl +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This Source Code Form is "Incompatible With Secondary Licenses", as +# defined by the Mozilla Public License, v. 2.0. +use strict; +use warnings; +use 5.10.1; + +use File::Basename; +use File::Spec; + +BEGIN { + require lib; + my $dir = File::Spec->rel2abs( File::Spec->catdir( dirname(__FILE__), '..' ) ); + lib->import( $dir, File::Spec->catdir( $dir, 'lib' ), File::Spec->catdir( $dir, qw(local lib perl5) ) ); +} + +use Bugzilla; +use Bugzilla::Constants; +use Bugzilla::Attachment; +use Bugzilla::Attachment::Archive; +use Getopt::Long; +use Pod::Usage; + +BEGIN { Bugzilla->extensions } + +# set Bugzilla usage mode to USAGE_MODE_CMDLINE +Bugzilla->usage_mode(USAGE_MODE_CMDLINE); + +my ($help, $file); +GetOptions( + 'help|h' => \$help, + 'file|f=s' => \$file, +); +pod2usage(1) if $help || !$file; + +my $archive = Bugzilla::Attachment::Archive->new(file => $file); +my $cmd = shift @ARGV; + +if ($cmd eq 'export') { + while ( my $attach_id = ) { + chomp $attach_id; + my $attachment = Bugzilla::Attachment->new($attach_id); + unless ($attachment) { + warn "No attachment: $attach_id\n"; + next; + } + warn "writing $attach_id\n"; + $archive->write_attachment($attachment); + } + $archive->write_checksum; +} +elsif ($cmd eq 'import') { + while ( my $mem = $archive->read_member ) { + warn "read $mem->{attach_id}\n"; + + my $attachment = Bugzilla::Attachment->new($mem->{attach_id}); + next unless $mem->{data_len}; + next unless check_attachment($attachment, $mem->{bug_id}, $mem->{data_len}); + + Bugzilla::Attachment::current_storage()->store( $attachment->id, $mem->{data} ); + } +} +elsif ($cmd eq 'check') { + while ( my $mem = $archive->read_member() ) { + warn "checking $mem->{attach_id}\n"; + my $attachment = Bugzilla::Attachment->new($mem->{attach_id}); + next unless $mem->{data_len}; + die "bad attachment\n" unless check_attachment($attachment, $mem->{bug_id}, $mem->{data_len}); + } +} +elsif ($cmd eq 'remove') { + my %remove_ok; + while ( my $mem = $archive->read_member ) { + warn "checking $mem->{attach_id}\n"; + + my $attachment = Bugzilla::Attachment->new($mem->{attach_id}); + die "bad attachment\n" unless check_attachment($attachment, $mem->{bug_id}, $mem->{data_len}); + $remove_ok{$mem->{attach_id}} = 1; + } + while ( my $attach_id = ) { + chomp $attach_id; + if ($remove_ok{$attach_id}) { + warn "removing $attach_id\n"; + Bugzilla::Attachment::current_storage()->remove( $attach_id ); + } + else { + warn "Unable to remove $attach_id, as it did not occur in the archive.\n"; + } + } +} + +sub check_attachment { + my ($attachment, $bug_id, $data_len) = @_; + + unless ($attachment) { + warn "No attachment found. Skipping record.\n"; + return 0; + } + unless ( $attachment->bug_id == $bug_id ) { + warn 'Wrong bug id (should be ' . $attachment->bug_id . ")\n"; + return 0; + } + unless ( $attachment->datasize == $data_len ) { + warn 'Wrong size (should be ' . $attachment->datasize . ")\n"; + return 0; + } + + return 1; +} + + +__DATA__ + +=head1 NAME + +attachment-data.pl - import, export, and purge attachment data + +=head1 SYNOPSIS + + ./scripts/attachment-data.pl export -f attachments.dat < attachment-ids.txt + ./scripts/attachment-data.pl remove -f attachments.dat < attachment-ids.txt + ./scripts/attachment-data.pl import -f attachments.dat + + +=head1 SEE ALSO + +L<./scripts/attachment-export.pl> + + -- cgit v1.2.3-24-g4f1b