From 9e52991889112d67c6eaea81a2211ea1427639da Mon Sep 17 00:00:00 2001 From: Florian Pritz Date: Thu, 11 May 2017 14:23:50 +0200 Subject: add verify-pacman-repo-db.pl from pacman Signed-off-by: Florian Pritz --- verify-pacman-repo-db.1.txt | 59 ++++++++++ verify-pacman-repo-db.pl | 259 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 318 insertions(+) create mode 100644 verify-pacman-repo-db.1.txt create mode 100755 verify-pacman-repo-db.pl diff --git a/verify-pacman-repo-db.1.txt b/verify-pacman-repo-db.1.txt new file mode 100644 index 0000000..de13975 --- /dev/null +++ b/verify-pacman-repo-db.1.txt @@ -0,0 +1,59 @@ +///// +vim:set ts=4 sw=4 syntax=asciidoc noet spell spelllang=en_us: +///// +verify-pacman-repo-db(1) +======================== + +Name +---- +verify-pacman-repo-db - package repository verification utility + + +Synopsis +-------- +'verify-pacman-repo-db' [options] + + +Description +----------- +'verify-pacman-repo-db' looks at a pacman repo database and verifies its +content with the actual package files. The database is expected to be in +the same directory as the packages (or symlinks to the packages). + +The following properties are verified for each package in the database: + + - existence of the package file + - file size + - MD5 and SHA256 checksum (--checksum) + +Options +------- +*-h, \--help*:: + Output a short help message. + +*\--debug*:: + Enable debug output. + +*-c, \--checksum*:: + Verify checksums of packages. Note that this means all packages files will + be read from disk. Otherwise only metadata is compared which does not + require to read package file contents. + +*-t, \--threads* :: + Use 'number' threads to verify packages. Note that each thread may use up + to approximately 128MiB of memory. Default: 1 + +Notes +----- + +This script does intentionally not use any ALPM libraries. The format is simple +enough to be parsed and this way we might just detect more problems because the +libalpm parsing code might also have bugs. We also stay much more portable +which might be good for people that want to check a db, but don't actually have +pacman installed. + + +See Also +-------- +linkman:repo-add[8] + diff --git a/verify-pacman-repo-db.pl b/verify-pacman-repo-db.pl new file mode 100755 index 0000000..e0a5410 --- /dev/null +++ b/verify-pacman-repo-db.pl @@ -0,0 +1,259 @@ +#!/usr/bin/perl -T +use warnings; +use strict; + + +# This is used for the usage output +=pod + +=head1 SYNOPSIS + +verify-pacman-repo-db.pl [options] ... + + Options: + --help, -h Show short help message + --debug Enable debug output + --checksum, -c Verify checksums of packages + --thread, -t Use num threads to verify packages. Default: 1 + NOTE: Each thread uses up to approx. 128MiB of memory + +=cut + +package main; +use Getopt::Long; +use Pod::Usage; + +exit main(); + +sub main { + my %opts = ( + threads => 1, + ); + + Getopt::Long::Configure ("bundling"); + pod2usage(-verbose => 0) if (@ARGV== 0); + GetOptions(\%opts, "help|h", "debug", "threads|t=i", "checksum|c") or pod2usage(2); + pod2usage(0) if $opts{help}; + + my $verifier = Verifier->new(\%opts); + + for my $repodb (@ARGV) { + $verifier->check_repodb($repodb); + } + + $verifier->finalize(); + return $verifier->get_error_status(); +} + +package Verifier; +use Archive::Tar; +use Digest::MD5; +use Digest::SHA; +use File::Basename; +use threads; +use threads::shared; +use Thread::Queue; + +sub new { + my $class = shift; + my $opts = shift; + + my $self :shared = shared_clone({ + opts => \%{$opts}, + package_queue => Thread::Queue->new(), + output_queue => Thread::Queue->new(), + workers => [], + errors => 0, + }); + + bless $self, $class; + $self->start_workers(); + return $self; +} + +sub start_workers { + my $self = shift; + + threads->new(\&_worker_output_queue, $self); + + for (my $i = 0; $i < $self->{opts}->{threads}; $i++) { + my $thr :shared = shared_clone(threads->new(\&_worker_package_queue, $self)); + push @{$self->{workers}}, $thr; + } +} + +sub _worker_package_queue { + my $self = shift; + while (my $workpack = $self->{package_queue}->dequeue()) { + my $dbdata = $self->_parse_db_entry($workpack->{db_desc_content}); + $self->{errors} += $self->_verify_db_entry($workpack->{dirname}, $dbdata); + } +} + +sub _worker_output_queue { + my $self = shift; + while (my $output = $self->{output_queue}->dequeue()) { + print STDERR $output; + } +} + +sub finalize { + my $self = shift; + + $self->{package_queue}->end(); + $self->_join_threads($self->{workers}); + + $self->{output_queue}->end(); + $self->_join_threads([threads->list]); +} + +sub _join_threads { + my $self = shift; + my $threads = shift; + + for my $thr (@{$threads}) { + if ($thr->tid && !threads::equal($thr, threads->self)) { + print "waiting for thread ".$thr->tid()." to finish\n" if $self->{opts}->{debug}; + $thr->join; + } + } +} + +sub get_error_status { + my $self = shift; + + return $self->{errors} > 0; +} + +sub check_repodb { + my $self = shift; + my $repodb = shift; + + my $db = Archive::Tar->new(); + $db->read($repodb); + + my $dirname = dirname($repodb); + my $pkgcount = 0; + + my @files = $db->list_files(); + for my $file_object ($db->get_files()) { + if ($file_object->name =~ m/^([^\/]+)\/desc$/) { + my $package = $1; + $self->{package_queue}->enqueue({ + package => $package, + db_desc_content => $file_object->get_content(), + dirname => $dirname, + }); + $pkgcount++; + } + } + + $self->_debug(sprintf("Queued %d package(s) from database '%s'\n", $pkgcount, $repodb)); +} + +sub _parse_db_entry { + my $self = shift; + my $content = shift; + my %db; + my $key; + + for my $line (split /\n/, $content) { + if ($line eq '') { + $key = undef; + } elsif ($key) { + push @{$db{$key}}, $line; + } elsif ($line =~ m/^%(.+)%$/) { + $key = $1; + } else { + die "\$key not set. Is the db formatted incorrectly?" unless $key; + } + } + return \%db; +} + +sub _output { + my $self = shift; + my $output = shift; + + return if $output eq ""; + + $output = sprintf("Thread %s: %s", threads->self->tid(), $output); + $self->{output_queue}->enqueue($output); +} + +sub _debug { + my $self = shift; + my $output = shift; + $self->_output($output) if $self->{opts}->{debug}; +} + +sub _verify_db_entry { + my $self = shift; + my $basedir = shift; + my $dbdata = shift; + my $ret = 0; + my $output = ""; + + # verify package exists + my $pkgfile = $basedir.'/'.$dbdata->{FILENAME}[0]; + $self->_debug(sprintf("Checking package %s\n", $dbdata->{FILENAME}[0])); + unless (-e $pkgfile) { + $self->_output(sprintf("Package file missing: %s\n", $pkgfile)); + return 1; + } + + $ret += $self->_verify_package_size($dbdata, $pkgfile); + $ret += $self->_verify_package_checksum($dbdata, $pkgfile) if $self->{opts}->{checksum}; + + return $ret; +} + +sub _verify_package_size { + my $self = shift; + my $dbdata = shift; + my $pkgfile = shift; + + my $csize = $dbdata->{CSIZE}[0]; + my $filesize = (stat($pkgfile))[7]; + unless ($csize == $filesize) { + $self->_output(sprintf("Package file has incorrect size: %d vs %d: %s\n", $csize, $filesize, $pkgfile)); + return 1; + } + return 0; +} + +sub _verify_package_checksum { + my $self = shift; + my $dbdata = shift; + my $pkgfile = shift; + + my $md5 = Digest::MD5->new; + my $sha = Digest::SHA->new(256); + + my $content; + # 128MiB to keep random IO low when using multiple threads (only works for large packages though) + my $chunksize = 1024*1024*128; + open my $fh, "<", $pkgfile; + while (read($fh, $content, $chunksize)) { + $md5->add($content); + $sha->add($content); + } + + my $expected_sha = $dbdata->{SHA256SUM}[0]; + my $expected_md5 = $dbdata->{MD5SUM}[0]; + my $got_md5 = $md5->hexdigest; + my $got_sha = $sha->hexdigest; + + unless ($expected_sha eq $got_sha and $expected_md5 eq $got_md5) { + my $output; + $output .= sprintf "Package file has incorrect checksum: %s\n", $pkgfile; + $output .= sprintf "expected: SHA %s\n", $expected_sha; + $output .= sprintf "got: SHA %s\n", $got_sha; + $output .= sprintf "expected: MD5 %s\n", $expected_md5; + $output .= sprintf "got: MD5 %s\n", $got_md5; + $self->_output($output); + return 1; + } + return 0; +} + -- cgit v1.2.3-24-g4f1b