From 8b6bbd57742623dbf1900e3212ca719de465a97f Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Mon, 12 Apr 2010 01:01:26 +0200 Subject: Optimize md5sums.txt creation Thanks to ZaB|SHC| for the awk script. In combination with parallel processing we are about 60 times faster than before. --- checkrepo | 50 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/checkrepo b/checkrepo index ec8a5e7..a62ff03 100755 --- a/checkrepo +++ b/checkrepo @@ -5,29 +5,49 @@ target="${home}/repo" lock='/tmp/mirrorsync.lck' tmp="$(mktemp -d)" -[ -f "${lock}" ] && exit 1 +[[ -f "${lock}" ]] && exit 1 touch "${lock}" -grep_dbinfo() { - local _ret +echo ' + BEGIN { + arg_filename = ARGV[1] + } - _ret="$(grep -m 1 -A 1 "%$2%" "$1" | tail -1)" - echo -n "$_ret" -} + arg_filename != FILENAME { + arg_filename = FILENAME + printf "%s %s%s\n", md5sum, path, filename + } + $0 == "%MD5SUM%" { + getline md5sum + } + + $0 == "%FILENAME%" { + getline filename + } + + END { + arg_filename = FILENAME + printf "%s %s%s\n", md5sum, path, filename + } + ' > ${tmp}/dbinfo.awk + +echo 'Checking repository...' for repo in {core,extra,community,testing,community-testing}; do for arch in {i686,x86_64}; do - echo "Reading $arch/$repo.db.tar.gz..." - mkdir -p $tmp/$repo/$arch - bsdtar xf $target/$repo/os/$arch/$repo.db.tar.gz -C $tmp/$repo/$arch - for p in $tmp/$repo/$arch/*/desc; do - echo "$(grep_dbinfo $p MD5SUM) $target/$repo/os/$arch/$(grep_dbinfo $p FILENAME)" >> $tmp/md5sums.txt - done + ( + mkdir -p ${tmp}/${repo}/${arch} + bsdtar xf ${target}/${repo}/os/${arch}/${repo}.db.tar.gz -C ${tmp}/${repo}/${arch} + find ${tmp}/${repo}/${arch} -name desc -print0 \ + | xargs -0 awk -f ${tmp}/dbinfo.awk -vpath=${target}/${repo}/os/${arch}/ \ + >> ${tmp}/md5sums-${arch}.txt + )& done + wait done -echo 'Checking repository...' -md5sum -c --quiet $tmp/md5sums.txt +cat ${tmp}/md5sums-{i686,x86_64}.txt > ${tmp}/md5sums.txt +md5sum -c --quiet ${tmp}/md5sums.txt -rm -rf $tmp +rm -rf ${tmp} rm -f "${lock}" -- cgit v1.2.3-24-g4f1b