From 683db123710ff9092ae99929fce232620932fdb0 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Wed, 18 Aug 2010 16:20:27 +0200 Subject: Rewrite ftpdir-cleanup * runtime reduced to < 1 second * uses file lists and the comm command * searches for missing packages * removes empty legacy directories like extra/os/any --- cron-jobs/ftpdir-cleanup | 116 +++++++++++++++++++----- misc-scripts/ftpdir-cleanup-repo | 187 --------------------------------------- 2 files changed, 92 insertions(+), 211 deletions(-) delete mode 100755 misc-scripts/ftpdir-cleanup-repo diff --git a/cron-jobs/ftpdir-cleanup b/cron-jobs/ftpdir-cleanup index 3456e9c..0a43bd1 100755 --- a/cron-jobs/ftpdir-cleanup +++ b/cron-jobs/ftpdir-cleanup @@ -3,37 +3,105 @@ . "$(dirname $0)/../db-functions" . "$(dirname $0)/../config" -repos="$(get_repos_for_host)" +clean_pkg() { + local pkg + local target + + if ! ${CLEANUP_DRYRUN}; then + for pkg in "$@"; do + if [ -h "$pkg" ]; then + rm -f "$pkg" + else + mv -f "$pkg" "$CLEANUP_DESTDIR" + fi + done + fi +} + +repos=($(get_repos_for_host)) script_lock -#adjust the nice level to run at a lower priority -/usr/bin/renice +10 -p $$ > /dev/null +for repo in ${repos[@]}; do + for arch in ${ARCHES[@]}; do + repo_lock ${repo} ${arch} || exit 1 + done +done + +${CLEANUP_DRYRUN} && warning 'dry run mode is active' -repopaths='' -for repo in $repos; do - $(dirname $0)/../misc-scripts/ftpdir-cleanup-repo $repo - repopaths="${repopaths} ${FTP_BASE}/${repo}/os/" +for repo in ${repos[@]}; do + for arch in ${ARCHES[@]}; do + if [ ! -f "${FTP_BASE}/${repo}/os/${arch}/${repo}${DBEXT}" ]; then + warning "${FTP_BASE}/${repo}/os/${arch}/${repo}${DBEXT} not found, skipping" + continue + fi + # get a list of actual available package files + find "${FTP_BASE}/${repo}/os/${arch}" -xtype f -name "*${PKGEXT}" -printf '%f\n' | sort > "${WORKDIR}/repo-${repo}-${arch}" + # get a list of package files defined in the repo db + bsdtar -xOf "${FTP_BASE}/${repo}/os/${arch}/${repo}${DBEXT}" | awk '/^%FILENAME%/{getline;print}' | sort > "${WORKDIR}/db-${repo}-${arch}" + + missing_pkgs=($(comm -13 "${WORKDIR}/repo-${repo}-${arch}" "${WORKDIR}/db-${repo}-${arch}")) + if [ ${#missing_pkgs[@]} -ge 1 ]; then + error "Missing packages in [${repo}] (${arch})..." + for missing_pkg in ${missing_pkgs[@]}; do + msg2 "${missing_pkg}" + done + fi + + old_pkgs=($(comm -23 "${WORKDIR}/repo-${repo}-${arch}" "${WORKDIR}/db-${repo}-${arch}")) + if [ ${#old_pkgs[@]} -ge 1 ]; then + msg "Removing old packages from [${repo}] (${arch})..." + for old_pkg in ${old_pkgs[@]}; do + msg2 "${old_pkg}" + clean_pkg "${FTP_BASE}/${repo}/os/${arch}/${old_pkg}" + done + fi + done done -to_cleanup="" -poolpath="$FTP_BASE/$(get_pkgpool_for_host)/" -pushd $poolpath >/dev/null -for pkg in *$PKGEXT; do -[ -f "$pkg" ] || continue # in case we get a file named "*.pkg.tar.gz" -LINKS="$(/usr/bin/find $repopaths -type l -name "$pkg" 2>/dev/null)" -if [ -z "$LINKS" ]; then - to_cleanup="$to_cleanup $poolpath/$pkg" +# get a list of all available packages in the pacakge pool +find "$FTP_BASE/$(get_pkgpool_for_host)" -name "*${PKGEXT}" -printf '%f\n' | sort > "${WORKDIR}/pool" +# create a list of packages in our db +cat "${WORKDIR}/db-"* | sort -u > "${WORKDIR}/db" + +old_pkgs=($(comm -23 "${WORKDIR}/pool" "${WORKDIR}/db")) +if [ ${#old_pkgs[@]} -ge 1 ]; then + msg "Removing old packages from package pool..." + for old_pkg in ${old_pkgs[@]}; do + msg2 "${old_pkg}" + clean_pkg "$FTP_BASE/$(get_pkgpool_for_host)/${old_pkg}" + done fi + +# cleanup of legacy $repo/os/any directories +for repo in ${repos[@]}; do + if [ ! -d "${FTP_BASE}/${repo}/os/any" ]; then + continue + fi + if [ -n "$(find "${FTP_BASE}/${repo}/os/any" -type d -empty)" ]; then + msg "Removing empty legacy directory ${repo}/os/any" + ${CLEANUP_DRYRUN} && rmdir "${FTP_BASE}/${repo}/os/any" + continue + fi + find "${FTP_BASE}/${repo}/os/any" -name "*${PKGEXT}" -printf '%f\n' | sort > "${WORKDIR}/any-${repo}" + cat "${WORKDIR}/db-${repo}-"* | sort -u > "${WORKDIR}/all-${repo}" + + old_pkgs=($(comm -23 "${WORKDIR}/any-${repo}" "${WORKDIR}/all-${repo}")) + if [ ${#old_pkgs[@]} -ge 1 ]; then + msg "Removing old packages from [${repo}] (any)..." + for old_pkg in ${old_pkgs[@]}; do + msg2 "${old_pkg}" + clean_pkg "${FTP_BASE}/${repo}/os/any/${old_pkg}" + done + fi +done + + +for repo in ${repos[@]}; do + for arch in ${ARCHES[@]}; do + repo_unlock ${repo} ${arch} + done done -popd >/dev/null - -if [ -n "$to_cleanup" ]; then - msg "The following packages are no longer in any repo" - for f in $to_cleanup; do - msg2 "$(basename "$f")" - done - ${CLEANUP_DRYRUN} || mv $to_cleanup "$CLEANUP_DESTDIR" -fi script_unlock diff --git a/misc-scripts/ftpdir-cleanup-repo b/misc-scripts/ftpdir-cleanup-repo deleted file mode 100755 index bfc971e..0000000 --- a/misc-scripts/ftpdir-cleanup-repo +++ /dev/null @@ -1,187 +0,0 @@ -#!/bin/bash - -if [ $# -ne 1 ]; then - msg "usage: $(basename $0) " - exit 1 -fi - -reponame=$1 - -############################################################ - -. "$(dirname $0)/../db-functions" -. "$(dirname $0)/../config" - -clean_pkgs () { - if ! ${CLEANUP_DRYRUN}; then - for pkg in "$@"; do - if [ -h "$pkg" ]; then - rm -f "$pkg" - else - mv "$pkg" "$CLEANUP_DESTDIR" - fi - done - fi -} - -${CLEANUP_DRYRUN} && warning 'dry run mode is active' - -ftppath_base="$FTP_BASE/$reponame/os" - -for arch in ${ARCHES[@]}; do - - repo_lock $reponame $arch $LOCK_TIMEOUT || continue - - CLEANUP_TMPDIR=$(mktemp -d ${WORKDIR}/cleanup-XXXXXX) || exit 1 - ftppath="$ftppath_base/$arch" - MISSINGFILES="" - DELETEFILES="" - DELETESYMLINKS="" - EXTRAFILES="" - - if [ ! -d "$ftppath" ]; then - error "FTP path '$ftppath' does not exist" - exit 1 - fi - - if ! cd "${CLEANUP_TMPDIR}" ; then - error "Failed to cd to ${CLEANUP_TMPDIR}" - exit 1 - fi - - if [ ! -f "$ftppath/$reponame$DBEXT" ]; then - msg "The file \"$ftppath/$reponame$DBEXT\" could not be found, skipping." - repo_unlock $reponame $arch - continue - fi - - if ! bsdtar xf "$ftppath/$reponame$DBEXT"; then - error "Command failed: bsdtar xf \"$ftppath/$reponame$DBEXT\"" - exit 1 - fi - - cd "$ftppath" - - for pkg in $CLEANUP_TMPDIR/*; do - [ ! -d "${pkg}" ] && continue - filename=$(grep -A1 '^%FILENAME%$' "${pkg}/desc" | tail -n1) - - if [ ! -e "${filename}" ]; then - MISSINGFILES="${MISSINGFILES} ${filename}" - else - pkgname="$(getpkgname ${filename})" - for otherfile in ${pkgname}-*; do - if [ "${otherfile}" != "${filename}" -a "${pkgname}" = "$(getpkgname ${otherfile})" ]; then - if [ -h "${otherfile}" ]; then - DELETESYMLINKS="${DELETESYMLINKS} ${otherfile}" - else - DELETEFILES="${DELETEFILES} ${otherfile}" - fi - fi - done - fi - done - - for pkg in *$PKGEXT; do - if [ ! -e "$pkg" ]; then - continue - fi - pkgname="$(getpkgname $pkg)" - for p in ${CLEANUP_TMPDIR}/${pkgname}-*; do - [ ! -d "${p}" ] || continue 2 - dbpkgname=$(grep -A1 '^%FILENAME%$' "${p}/desc" 2>/dev/null| tail -n1) - if [ "${dbpkgname}" = "${pkgname}" ]; then - continue 2 - fi - done - EXTRAFILES="$EXTRAFILES $pkg" - done - - rm -rf ${CLEANUP_TMPDIR} - - # Do a quick check to see if a missing ARCHINDEPFILE is in the any dir - # If it is, and the file is MISSING, restore it - missfiles="$MISSINGFILES" - MISSINGFILES="" - for mf in $missfiles; do - if [ -e "${ftppath_base}/any/${mf}" ]; then - msg "Restoring missing 'any' symlink: ${mf}" - ${CLEANUP_DRYRUN} || ln -s "../any/${mf}" "${ftppath}" - else - MISSINGFILES="${MISSINGFILES} ${mf}" - fi - done - - repo_unlock $reponame $arch - - #Make sure we've done *something* before outputting anything - if [ -z "$DELETEFILES$DELETESYMLINKS$MISSINGFILES$EXTRAFILES" ]; then - continue - fi - - msg "Scan complete for $reponame ($arch) at ${ftppath}" - - if [ -n "$MISSINGFILES" ]; then - for f in $MISSINGFILES; do - error "$f is missing" - done - fi - - if [ -n "${DELETEFILES}" ]; then - msg "The following files are out of date" - for f in $DELETEFILES; do - msg2 "$f" - done - clean_pkgs ${DELETEFILES} - fi - - if [ -n "${DELETESYMLINKS}" ]; then - msg "The following symlinks are out of date" - for f in $DELETESYMLINKS; do - msg2 "$f" - done - clean_pkgs ${DELETESYMLINKS} - fi - - if [ -n "${EXTRAFILES}" ]; then - msg "The following files are in the repo but not the db" - for f in $EXTRAFILES; do - msg2 "$f" - done - clean_pkgs ${EXTRAFILES} - fi - -done - -ARCHINDEPFILES="" - -if [ -d "$ftppath_base/any" ]; then - cd "$ftppath_base/any" - for pkg in *$PKGEXT; do - [ -f "$pkg" ] || continue # in case we get a file named "*.pkg.tar.gz" - found=0 - #check for any existing symlinks - for arch in ${ARCHES[@]}; do - if [ -h "$ftppath_base/$arch/$pkg" ]; then - found=1 - break - fi - done - if [ $found -eq 0 ]; then - # We found no symlinks to this, delete it - ARCHINDEPFILES="$ARCHINDEPFILES $pkg" - fi - done -fi - -if [ -n "$ARCHINDEPFILES" ]; then - msg "The following architecture independent packages are not symlinked in the architecture repositories." - for f in $ARCHINDEPFILES; do - msg2 "$f" - done -fi - -if [ -d "$ftppath_base/any" -a -n "${ARCHINDEPFILES}" ]; then - cd "$ftppath_base/any" - clean_pkgs ${ARCHINDEPFILES} -fi -- cgit v1.2.3-24-g4f1b