#!/bin/bash # This is a simple mirroring script. To save bandwidth it first checks a # timestamp via HTTP and only runs rsync when the timestamp differs from the # local copy. As of 2016, a single rsync run without changes transfers roughly # 6MiB of data which adds up to roughly 250GiB of traffic per month when rsync # is run every minute. Performing a simple check via HTTP first can thus save a # lot of traffic. home="/srv" target="${home}/repo" tmp="${home}/tmp" lock='/var/lock/syncrepo.lck' # NOTE: You'll probably want to change this or remove the --bwlimit setting in # the rsync call below bwlimit=4096 # NOTE: most people reading this very likely need to change this since # rsync.archlinux.org requires you to be a tier 1 mirror source='rsync://rsync.archlinux.org/ftp_tier1' lastupdate_url="http://rsync.archlinux.org/lastupdate" [ ! -d "${target}" ] && mkdir -p "${target}" [ ! -d "${tmp}" ] && mkdir -p "${tmp}" exec 9>"${lock}" flock -n 9 || exit if stty &>/dev/null; then VERBOSE="-h -v --progress" fi rsync_cmd=(rsync -rtlH --safe-links --delete-after ${VERBOSE} "--timeout=600" "--contimeout=60" -p \ --delay-updates --no-motd "--bwlimit=$bwlimit" "--temp-dir=${tmp}") # if we are called without a tty (cronjob) only run when there are changes if ! tty -s && [[ -f "$target/lastupdate" ]] && diff -b <(curl -s "$lastupdate_url") "$target/lastupdate" >/dev/null; then # keep lastsync file in sync for statistics generated by the Arch Linux website "${rsync_cmd[@]}" "$source/lastsync" "$target/lastsync" exit 0 fi "${rsync_cmd[@]}" \ --exclude='*.links.tar.gz*' \ --exclude='/other' \ --exclude='/sources' \ --exclude='/iso' \ ${source} \ "${target}" #echo "Last sync was $(date -d @$(cat ${target}/lastsync))"