summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorian Pritz <bluewind@xinu.at>2016-07-18 15:12:55 +0200
committerAllan McRae <allan@archlinux.org>2016-08-30 12:08:33 +0200
commit6d8e3d2a9171df8cbe479d0a582c468970802aed (patch)
tree716545688fe0e862dd2293dd9ba363ce37e22ec1
parent1ec7fa89ad6ad650cedeafd5c7cbc36dcf29239f (diff)
downloadpacman-6d8e3d2a9171df8cbe479d0a582c468970802aed.tar.gz
pacman-6d8e3d2a9171df8cbe479d0a582c468970802aed.tar.xz
contrib: Add verify-pacman-repo-db.pl
From the documentation: verify-pacman-repo-db looks at a pacman repo database and verifies its content with the actual package files. The database is expected to be in the same directory as the packages (or symlinks to the packages). The following properties are verified for each package in the database: - existence of the package file - file size - MD5 and SHA256 checksum (--checksum) Signed-off-by: Florian Pritz <bluewind@xinu.at>
-rwxr-xr-xcontrib/verify-pacman-repo-db.pl259
1 files changed, 259 insertions, 0 deletions
diff --git a/contrib/verify-pacman-repo-db.pl b/contrib/verify-pacman-repo-db.pl
new file mode 100755
index 00000000..e0a54106
--- /dev/null
+++ b/contrib/verify-pacman-repo-db.pl
@@ -0,0 +1,259 @@
+#!/usr/bin/perl -T
+use warnings;
+use strict;
+
+
+# This is used for the usage output
+=pod
+
+=head1 SYNOPSIS
+
+verify-pacman-repo-db.pl [options] <database file> ...
+
+ Options:
+ --help, -h Show short help message
+ --debug Enable debug output
+ --checksum, -c Verify checksums of packages
+ --thread, -t <num> Use num threads to verify packages. Default: 1
+ NOTE: Each thread uses up to approx. 128MiB of memory
+
+=cut
+
+package main;
+use Getopt::Long;
+use Pod::Usage;
+
+exit main();
+
+sub main {
+ my %opts = (
+ threads => 1,
+ );
+
+ Getopt::Long::Configure ("bundling");
+ pod2usage(-verbose => 0) if (@ARGV== 0);
+ GetOptions(\%opts, "help|h", "debug", "threads|t=i", "checksum|c") or pod2usage(2);
+ pod2usage(0) if $opts{help};
+
+ my $verifier = Verifier->new(\%opts);
+
+ for my $repodb (@ARGV) {
+ $verifier->check_repodb($repodb);
+ }
+
+ $verifier->finalize();
+ return $verifier->get_error_status();
+}
+
+package Verifier;
+use Archive::Tar;
+use Digest::MD5;
+use Digest::SHA;
+use File::Basename;
+use threads;
+use threads::shared;
+use Thread::Queue;
+
+sub new {
+ my $class = shift;
+ my $opts = shift;
+
+ my $self :shared = shared_clone({
+ opts => \%{$opts},
+ package_queue => Thread::Queue->new(),
+ output_queue => Thread::Queue->new(),
+ workers => [],
+ errors => 0,
+ });
+
+ bless $self, $class;
+ $self->start_workers();
+ return $self;
+}
+
+sub start_workers {
+ my $self = shift;
+
+ threads->new(\&_worker_output_queue, $self);
+
+ for (my $i = 0; $i < $self->{opts}->{threads}; $i++) {
+ my $thr :shared = shared_clone(threads->new(\&_worker_package_queue, $self));
+ push @{$self->{workers}}, $thr;
+ }
+}
+
+sub _worker_package_queue {
+ my $self = shift;
+ while (my $workpack = $self->{package_queue}->dequeue()) {
+ my $dbdata = $self->_parse_db_entry($workpack->{db_desc_content});
+ $self->{errors} += $self->_verify_db_entry($workpack->{dirname}, $dbdata);
+ }
+}
+
+sub _worker_output_queue {
+ my $self = shift;
+ while (my $output = $self->{output_queue}->dequeue()) {
+ print STDERR $output;
+ }
+}
+
+sub finalize {
+ my $self = shift;
+
+ $self->{package_queue}->end();
+ $self->_join_threads($self->{workers});
+
+ $self->{output_queue}->end();
+ $self->_join_threads([threads->list]);
+}
+
+sub _join_threads {
+ my $self = shift;
+ my $threads = shift;
+
+ for my $thr (@{$threads}) {
+ if ($thr->tid && !threads::equal($thr, threads->self)) {
+ print "waiting for thread ".$thr->tid()." to finish\n" if $self->{opts}->{debug};
+ $thr->join;
+ }
+ }
+}
+
+sub get_error_status {
+ my $self = shift;
+
+ return $self->{errors} > 0;
+}
+
+sub check_repodb {
+ my $self = shift;
+ my $repodb = shift;
+
+ my $db = Archive::Tar->new();
+ $db->read($repodb);
+
+ my $dirname = dirname($repodb);
+ my $pkgcount = 0;
+
+ my @files = $db->list_files();
+ for my $file_object ($db->get_files()) {
+ if ($file_object->name =~ m/^([^\/]+)\/desc$/) {
+ my $package = $1;
+ $self->{package_queue}->enqueue({
+ package => $package,
+ db_desc_content => $file_object->get_content(),
+ dirname => $dirname,
+ });
+ $pkgcount++;
+ }
+ }
+
+ $self->_debug(sprintf("Queued %d package(s) from database '%s'\n", $pkgcount, $repodb));
+}
+
+sub _parse_db_entry {
+ my $self = shift;
+ my $content = shift;
+ my %db;
+ my $key;
+
+ for my $line (split /\n/, $content) {
+ if ($line eq '') {
+ $key = undef;
+ } elsif ($key) {
+ push @{$db{$key}}, $line;
+ } elsif ($line =~ m/^%(.+)%$/) {
+ $key = $1;
+ } else {
+ die "\$key not set. Is the db formatted incorrectly?" unless $key;
+ }
+ }
+ return \%db;
+}
+
+sub _output {
+ my $self = shift;
+ my $output = shift;
+
+ return if $output eq "";
+
+ $output = sprintf("Thread %s: %s", threads->self->tid(), $output);
+ $self->{output_queue}->enqueue($output);
+}
+
+sub _debug {
+ my $self = shift;
+ my $output = shift;
+ $self->_output($output) if $self->{opts}->{debug};
+}
+
+sub _verify_db_entry {
+ my $self = shift;
+ my $basedir = shift;
+ my $dbdata = shift;
+ my $ret = 0;
+ my $output = "";
+
+ # verify package exists
+ my $pkgfile = $basedir.'/'.$dbdata->{FILENAME}[0];
+ $self->_debug(sprintf("Checking package %s\n", $dbdata->{FILENAME}[0]));
+ unless (-e $pkgfile) {
+ $self->_output(sprintf("Package file missing: %s\n", $pkgfile));
+ return 1;
+ }
+
+ $ret += $self->_verify_package_size($dbdata, $pkgfile);
+ $ret += $self->_verify_package_checksum($dbdata, $pkgfile) if $self->{opts}->{checksum};
+
+ return $ret;
+}
+
+sub _verify_package_size {
+ my $self = shift;
+ my $dbdata = shift;
+ my $pkgfile = shift;
+
+ my $csize = $dbdata->{CSIZE}[0];
+ my $filesize = (stat($pkgfile))[7];
+ unless ($csize == $filesize) {
+ $self->_output(sprintf("Package file has incorrect size: %d vs %d: %s\n", $csize, $filesize, $pkgfile));
+ return 1;
+ }
+ return 0;
+}
+
+sub _verify_package_checksum {
+ my $self = shift;
+ my $dbdata = shift;
+ my $pkgfile = shift;
+
+ my $md5 = Digest::MD5->new;
+ my $sha = Digest::SHA->new(256);
+
+ my $content;
+ # 128MiB to keep random IO low when using multiple threads (only works for large packages though)
+ my $chunksize = 1024*1024*128;
+ open my $fh, "<", $pkgfile;
+ while (read($fh, $content, $chunksize)) {
+ $md5->add($content);
+ $sha->add($content);
+ }
+
+ my $expected_sha = $dbdata->{SHA256SUM}[0];
+ my $expected_md5 = $dbdata->{MD5SUM}[0];
+ my $got_md5 = $md5->hexdigest;
+ my $got_sha = $sha->hexdigest;
+
+ unless ($expected_sha eq $got_sha and $expected_md5 eq $got_md5) {
+ my $output;
+ $output .= sprintf "Package file has incorrect checksum: %s\n", $pkgfile;
+ $output .= sprintf "expected: SHA %s\n", $expected_sha;
+ $output .= sprintf "got: SHA %s\n", $got_sha;
+ $output .= sprintf "expected: MD5 %s\n", $expected_md5;
+ $output .= sprintf "got: MD5 %s\n", $got_md5;
+ $self->_output($output);
+ return 1;
+ }
+ return 0;
+}
+