From ff1530def072daf95f077ec0f8a4d984da4304d6 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sat, 27 Feb 2010 11:27:13 -0600 Subject: create-filelists: rework the package loop completely Instead of wasting time extracting .PKGINFO twice from every single package in the repos, use the package DB to eliminate most of the heavy lifting. This way we only need to worry about looking at the packages that actually have changed since the last time we built the package database. This should give a noticeable performance increase to this job in addition to reducing IO load and unnecessary reading of every package file. Signed-off-by: Dan McGee --- cron-jobs/create-filelists | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/cron-jobs/create-filelists b/cron-jobs/create-filelists index a0b6a57..6091bf4 100755 --- a/cron-jobs/create-filelists +++ b/cron-jobs/create-filelists @@ -14,8 +14,12 @@ if [ -f "$lock" ]; then fi touch "$lock" || exit 1 -TMPDIR="$(mktemp -d /tmp/create-filelists.XXXXXX)" || exit 1 -CACHEDIR="$(mktemp -d /tmp/create-filelists.XXXXXX)" || exit 1 +# location where the package DB is extracted so we know what to include +DBDIR="$(mktemp -d /tmp/create-filelists.dbdir.XXXXXX)" || exit 1 +# location where the old files DB is extracted to save us some work +CACHEDIR="$(mktemp -d /tmp/create-filelists.cachedir.XXXXXX)" || exit 1 +# location where the new files DB is built up and eventually zipped +TMPDIR="$(mktemp -d /tmp/create-filelists.tmpdir.XXXXXX)" || exit 1 #adjust the nice level to run at a lower priority /usr/bin/renice +10 -p $$ > /dev/null @@ -30,33 +34,45 @@ esac FILESEXT="${DBEXT//db/files}" for repo in $repos; do + REPO_DB_FILE="${repo}$DBEXT" FILES_DB_FILE="${repo}$FILESEXT" for arch in ${ARCHES[@]}; do +# echo "Running for architecture $arch, repo $repo" cd "$reposdir" repodir="${repo}/os/${arch}" cached="no" + # extract package db archive + if [ -f "${targetdir}/${repodir}/${REPO_DB_FILE}" ]; then + mkdir -p "${DBDIR}/${repodir}" +# echo "extracting $REPO_DB_FILE" + bsdtar -xf "${targetdir}/${repodir}/${REPO_DB_FILE}" -C "${DBDIR}/${repodir}" + else + echo "Fail! Does the repo $repo with arch $arch even exist?" + continue + fi + # extract old file archive if [ -f "${targetdir}/${repodir}/${FILES_DB_FILE}" ]; then mkdir -p "${CACHEDIR}/${repodir}" +# echo "extracting $FILES_DB_FILE" bsdtar -xf "${targetdir}/${repodir}/${FILES_DB_FILE}" -C "${CACHEDIR}/${repodir}" cached="yes" fi # create file lists - for pkg in $repodir/*${PKGEXT}; do - pkgname="$(getpkgname "$pkg")" - pkgver="$(getpkgver "$pkg")" - tmppkgdir="${TMPDIR}/${repodir}/${pkgname}-${pkgver}" + for pkg in $(ls ${DBDIR}/${repodir}); do + tmppkgdir="${TMPDIR}/${repodir}/${pkg}" mkdir -p "$tmppkgdir" - if [ -f "${CACHEDIR}/${repodir}/${pkgname}-${pkgver}/files" ]; then -# echo "cache: $pkgname" - mv "${CACHEDIR}/${repodir}/${pkgname}-${pkgver}/files" "${tmppkgdir}/files" + if [ -f "${CACHEDIR}/${repodir}/${pkg}/files" ]; then +# echo "cache: $pkg" + mv "${CACHEDIR}/${repodir}/${pkg}/files" "${tmppkgdir}/files" else -# echo "$repo/$arch: $pkgname" +# echo "not cache: $repo/$arch: $pkg" + filename=$(grep -A1 '^%FILENAME%$' "${DBDIR}/${repodir}/${pkg}/desc" | tail -n1) echo '%FILES%' > "${tmppkgdir}/files" - bsdtar --exclude=.* -tf "$pkg" >> "${tmppkgdir}/files" + bsdtar --exclude=.* -tf "$repodir/$filename" >> "${tmppkgdir}/files" cached="no" fi done @@ -76,8 +92,7 @@ for repo in $repos; do done cd - >/dev/null -rm -rf "$TMPDIR" || exit 1 -rm -rf "$CACHEDIR" || exit 1 +rm -rf "$TMPDIR" "$CACHEDIR" "$DBDIR" rm -f "$lock" || exit 1 # echo 'done' -- cgit v1.2.3-24-g4f1b