summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mirrors/management/__init__.py0
-rw-r--r--mirrors/management/commands/__init__.py0
-rw-r--r--mirrors/management/commands/mirrorcheck.py149
3 files changed, 149 insertions, 0 deletions
diff --git a/mirrors/management/__init__.py b/mirrors/management/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/mirrors/management/__init__.py
diff --git a/mirrors/management/commands/__init__.py b/mirrors/management/commands/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/mirrors/management/commands/__init__.py
diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py
new file mode 100644
index 0000000..4a933ca
--- /dev/null
+++ b/mirrors/management/commands/mirrorcheck.py
@@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+"""
+mirrorcheck command
+
+Poll every active mirror URL we have in the database, grab the 'lastsync' file,
+and record details about how long it took and how up to date the mirror is. If
+we encounter errors, record those as well.
+
+Usage: ./manage.py mirrorcheck
+"""
+
+from django.core.management.base import NoArgsCommand
+from django.db.models import Q
+
+from datetime import datetime, timedelta
+import logging
+import re
+import socket
+import sys
+import time
+import thread
+from threading import Thread
+from Queue import Queue, Empty
+import urllib2
+
+from logging import ERROR, WARNING, INFO, DEBUG
+
+from mirrors.models import Mirror, MirrorUrl, MirrorLog
+
+logging.basicConfig(
+ level=WARNING,
+ format='%(asctime)s -> %(levelname)s: %(message)s',
+ datefmt='%Y-%m-%d %H:%M:%S',
+ stream=sys.stderr)
+logger = logging.getLogger()
+
+class Command(NoArgsCommand):
+ help = "Runs a check on all known mirror URLs to determine their up-to-date status."
+
+ def handle_noargs(self, **options):
+ v = int(options.get('verbosity', 0))
+ if v == 0:
+ logger.level = ERROR
+ elif v == 1:
+ logger.level = INFO
+ elif v == 2:
+ logger.level = DEBUG
+
+ import signal, traceback
+ handler = lambda sig, stack: traceback.print_stack(stack)
+ signal.signal(signal.SIGQUIT, handler)
+ signal.signal(signal.SIGUSR1, handler)
+
+ return check_current_mirrors()
+
+def parse_rfc3339_datetime(time):
+ # '2010-09-02 11:05:06+02:00'
+ m = re.match('^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})([-+])(\d{2}):(\d{2})', time)
+ if m:
+ vals = m.groups()
+ parsed = datetime(int(vals[0]), int(vals[1]), int(vals[2]),
+ int(vals[3]), int(vals[4]), int(vals[5]))
+ # now account for time zone offset
+ sign = vals[6]
+ offset = timedelta(hours=int(sign + vals[7]),
+ minutes=int(sign + vals[8]))
+ # subtract the offset, e.g. '-04:00' should be moved up 4 hours
+ return parsed - offset
+ return None
+
+def check_mirror_url(mirror_url):
+ url = mirror_url.url + 'lastsync'
+ logger.info("checking URL %s" % url)
+ log = MirrorLog(url=mirror_url, check_time=datetime.utcnow())
+ try:
+ start = time.time()
+ result = urllib2.urlopen(url, timeout=10)
+ data = result.read()
+ result.close()
+ end = time.time()
+ # lastsync should be an epoch value, but some mirrors
+ # are creating their own in RFC-3339 format:
+ # '2010-09-02 11:05:06+02:00'
+ try:
+ parsed_time = datetime.utcfromtimestamp(int(data))
+ except ValueError:
+ parsed_time = parse_rfc3339_datetime(data)
+
+ log.last_sync = parsed_time
+ log.duration = end - start
+ logger.debug("success: %s, %.2f" % (url, log.duration))
+ except urllib2.HTTPError, e:
+ log.is_success = False
+ log.error =str(e)
+ logger.debug("failed: %s, %s" % (url, log.error))
+ except urllib2.URLError, e:
+ log.is_success=False
+ log.error = e.reason
+ if isinstance(e.reason, socket.timeout):
+ log.error = "Connection timed out."
+ elif isinstance(e.reason, socket.error):
+ log.error = e.reason.args[1]
+ logger.debug("failed: %s, %s" % (url, log.error))
+
+ log.save()
+ return log
+
+def mirror_url_worker(queue):
+ while True:
+ try:
+ item = queue.get(block=False)
+ check_mirror_url(item)
+ queue.task_done()
+ except Empty:
+ return 0
+
+class MirrorCheckPool(object):
+ def __init__(self, work, num_threads=10):
+ self.tasks = Queue()
+ for i in work:
+ self.tasks.put(i)
+ self.threads = []
+ for i in range(num_threads):
+ thread = Thread(target=mirror_url_worker, args=(self.tasks,))
+ thread.daemon = True
+ self.threads.append(thread)
+
+ def run_and_join(self):
+ logger.debug("starting threads")
+ for t in self.threads:
+ t.start()
+ logger.debug("joining on all threads")
+ self.tasks.join()
+
+def check_current_mirrors():
+ urls = MirrorUrl.objects.filter(
+ Q(protocol__protocol__iexact='HTTP') |
+ Q(protocol__protocol__iexact='FTP'),
+ mirror__active=True, mirror__public=True)
+
+ pool = MirrorCheckPool(urls)
+ pool.run_and_join()
+ return 0
+
+# For lack of a better place to put it, here is a query to get latest check
+# result joined with mirror details:
+# SELECT mu.*, m.*, ml.* FROM mirrors_mirrorurl mu JOIN mirrors_mirror m ON mu.mirror_id = m.id JOIN mirrors_mirrorlog ml ON mu.id = ml.url_id LEFT JOIN mirrors_mirrorlog ml2 ON ml.url_id = ml2.url_id AND ml.id < ml2.id WHERE ml2.id IS NULL AND m.active = 1 AND m.public = 1;
+
+# vim: set ts=4 sw=4 et: