From 2f7d770b261b3428bcff366ba6ff4fa631dd980a Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 13 May 2012 20:15:00 -0500 Subject: Add rsync support to mirrorcheck and other small improvements The main changes in this patch implement rsync:// protocol checking support by calling the rsync binary, requested in FS#29878. We track and log much of the same things as we already do for FTP and HTTP URLs- check time, last sync, total check duration, etc. Also added in this patch is a configurable timeout value which defaults to the previous hardcoded value of 10 seconds; this can be passed as an option to the mirrorcheck command. Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 130 ++++++++++++++++++++--------- templates/mirrors/status.html | 4 +- 2 files changed, 91 insertions(+), 43 deletions(-) diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index c126922..ae89d5e 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -9,22 +9,26 @@ we encounter errors, record those as well. Usage: ./manage.py mirrorcheck """ -from django.core.management.base import NoArgsCommand -from django.db import transaction - from collections import deque from datetime import datetime import logging +import os +from optparse import make_option +from pytz import utc import re import socket +import subprocess import sys import time +import tempfile from threading import Thread import types -from pytz import utc from Queue import Queue, Empty import urllib2 +from django.core.management.base import NoArgsCommand +from django.db import transaction + from main.utils import utc_now from mirrors.models import MirrorUrl, MirrorLog @@ -37,10 +41,15 @@ logger = logging.getLogger() class Command(NoArgsCommand): + option_list = NoArgsCommand.option_list + ( + make_option('-t', '--timeout', dest='timeout', default='10', + help='Timeout value for connecting to URL'), + ) help = "Runs a check on all known mirror URLs to determine their up-to-date status." def handle_noargs(self, **options): v = int(options.get('verbosity', 0)) + timeout = int(options.get('timeout', 10)) if v == 0: logger.level = logging.ERROR elif v == 1: @@ -48,10 +57,29 @@ class Command(NoArgsCommand): elif v == 2: logger.level = logging.DEBUG - return check_current_mirrors() + urls = MirrorUrl.objects.select_related('protocol').filter( + mirror__active=True, mirror__public=True) + + pool = MirrorCheckPool(urls, timeout) + pool.run() + return 0 -def check_mirror_url(mirror_url): +def parse_lastsync(log, data): + '''lastsync file should be an epoch value created by us.''' + try: + parsed_time = datetime.utcfromtimestamp(int(data)) + log.last_sync = parsed_time.replace(tzinfo=utc) + except ValueError: + # it is bad news to try logging the lastsync value; + # sometimes we get a crazy-encoded web page. + # if we couldn't parse a time, this is a failure. + log.last_sync = None + log.error = "Could not parse time from lastsync" + log.is_success = False + + +def check_mirror_url(mirror_url, timeout): url = mirror_url.url + 'lastsync' logger.info("checking URL %s", url) log = MirrorLog(url=mirror_url, check_time=utc_now()) @@ -59,28 +87,14 @@ def check_mirror_url(mirror_url): req = urllib2.Request(url, None, headers) try: start = time.time() - result = urllib2.urlopen(req, timeout=10) + result = urllib2.urlopen(req, timeout=timeout) data = result.read() result.close() end = time.time() - # lastsync should be an epoch value created by us - parsed_time = None - try: - parsed_time = datetime.utcfromtimestamp(int(data)) - parsed_time = parsed_time.replace(tzinfo=utc) - except ValueError: - # it is bad news to try logging the lastsync value; - # sometimes we get a crazy-encoded web page. - pass - - log.last_sync = parsed_time - # if we couldn't parse a time, this is a failure - if parsed_time is None: - log.error = "Could not parse time from lastsync" - log.is_success = False + parse_lastsync(log, data) log.duration = end - start logger.debug("success: %s, %.2f", url, log.duration) - except urllib2.HTTPError, e: + except urllib2.HTTPError as e: if e.code == 404: # we have a duration, just not a success end = time.time() @@ -88,7 +102,7 @@ def check_mirror_url(mirror_url): log.is_success = False log.error = str(e) logger.debug("failed: %s, %s", url, log.error) - except urllib2.URLError, e: + except urllib2.URLError as e: log.is_success = False log.error = e.reason if isinstance(e.reason, types.StringTypes) and \ @@ -101,20 +115,64 @@ def check_mirror_url(mirror_url): elif isinstance(e.reason, socket.error): log.error = e.reason.args[1] logger.debug("failed: %s, %s", url, log.error) - except socket.timeout, e: + except socket.timeout as e: log.is_success = False log.error = "Connection timed out." logger.debug("failed: %s, %s", url, log.error) + except socket.error as e: + log.is_success = False + log.error = str(e) + logger.debug("failed: %s, %s", url, log.error) + + return log + + +def check_rsync_url(mirror_url, timeout): + url = mirror_url.url + 'lastsync' + logger.info("checking URL %s", url) + log = MirrorLog(url=mirror_url, check_time=utc_now()) + + tempdir = tempfile.mkdtemp() + lastsync_path = os.path.join(tempdir, 'lastsync') + rsync_cmd = ["rsync", "--quiet", "--contimeout=%d" % timeout, + "--timeout=%d" % timeout, url, lastsync_path] + try: + with open(os.devnull, 'w') as devnull: + proc = subprocess.Popen(rsync_cmd, stdout=devnull, + stderr=subprocess.PIPE) + start = time.time() + _, errdata = proc.communicate() + end = time.time() + log.duration = end - start + if proc.returncode != 0: + logger.debug("error: %s, %s", url, errdata) + log.is_success = False + log.error = errdata.strip() + # look at rsync error code- if we had a command error or timed out, + # don't record a duration as it is misleading + if proc.returncode in (1, 30, 35): + log.duration = None + else: + logger.debug("success: %s, %.2f", url, log.duration) + with open(lastsync_path, 'r') as lastsync: + parse_lastsync(log, lastsync.read()) + finally: + if os.path.exists(lastsync_path): + os.unlink(lastsync_path) + os.rmdir(tempdir) return log -def mirror_url_worker(work, output): +def mirror_url_worker(work, output, timeout): while True: try: - item = work.get(block=False) + url = work.get(block=False) try: - log = check_mirror_url(item) + if url.protocol.protocol == 'rsync': + log = check_rsync_url(url, timeout) + else: + log = check_mirror_url(url, timeout) output.append(log) finally: work.task_done() @@ -123,7 +181,7 @@ def mirror_url_worker(work, output): class MirrorCheckPool(object): - def __init__(self, urls, num_threads=10): + def __init__(self, urls, timeout=10, num_threads=10): self.tasks = Queue() self.logs = deque() for i in list(urls): @@ -131,7 +189,7 @@ class MirrorCheckPool(object): self.threads = [] for i in range(num_threads): thread = Thread(target=mirror_url_worker, - args=(self.tasks, self.logs)) + args=(self.tasks, self.logs, timeout)) thread.daemon = True self.threads.append(thread) @@ -142,18 +200,8 @@ class MirrorCheckPool(object): thread.start() logger.debug("joining on all threads") self.tasks.join() - logger.debug("processing log entries") + logger.debug("processing %d log entries", len(self.logs)) MirrorLog.objects.bulk_create(self.logs) logger.debug("log entries saved") - -def check_current_mirrors(): - urls = MirrorUrl.objects.filter( - protocol__is_download=True, - mirror__active=True, mirror__public=True) - - pool = MirrorCheckPool(urls) - pool.run() - return 0 - # vim: set ts=4 sw=4 et: diff --git a/templates/mirrors/status.html b/templates/mirrors/status.html index 34896c0..2c350f5 100644 --- a/templates/mirrors/status.html +++ b/templates/mirrors/status.html @@ -16,8 +16,8 @@ has synced recently. This page contains several pieces of information about each mirror.