summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan McGee <dan@archlinux.org>2012-04-30 04:26:23 +0200
committerDan McGee <dan@archlinux.org>2012-04-30 04:26:23 +0200
commit44eb2d5ee0fa9e1b495027cec3e663ff85c0ed1d (patch)
treec469201a930f56baa87687e820f00efe9669778c
parent25a2fbc7c1cb50fa80ed4de50830721507765a91 (diff)
downloadarchweb-44eb2d5ee0fa9e1b495027cec3e663ff85c0ed1d.tar.gz
archweb-44eb2d5ee0fa9e1b495027cec3e663ff85c0ed1d.tar.xz
Use a custom User-Agent when checking mirror URLs
Signed-off-by: Dan McGee <dan@archlinux.org>
-rw-r--r--mirrors/management/commands/mirrorcheck.py17
1 files changed, 10 insertions, 7 deletions
diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py
index 7ffb777..c126922 100644
--- a/mirrors/management/commands/mirrorcheck.py
+++ b/mirrors/management/commands/mirrorcheck.py
@@ -35,6 +35,7 @@ logging.basicConfig(
stream=sys.stderr)
logger = logging.getLogger()
+
class Command(NoArgsCommand):
help = "Runs a check on all known mirror URLs to determine their up-to-date status."
@@ -49,13 +50,16 @@ class Command(NoArgsCommand):
return check_current_mirrors()
+
def check_mirror_url(mirror_url):
url = mirror_url.url + 'lastsync'
logger.info("checking URL %s", url)
log = MirrorLog(url=mirror_url, check_time=utc_now())
+ headers = {'User-Agent': 'archweb/1.0'}
+ req = urllib2.Request(url, None, headers)
try:
start = time.time()
- result = urllib2.urlopen(url, timeout=10)
+ result = urllib2.urlopen(req, timeout=10)
data = result.read()
result.close()
end = time.time()
@@ -104,6 +108,7 @@ def check_mirror_url(mirror_url):
return log
+
def mirror_url_worker(work, output):
while True:
try:
@@ -116,11 +121,12 @@ def mirror_url_worker(work, output):
except Empty:
return 0
+
class MirrorCheckPool(object):
- def __init__(self, work, num_threads=10):
+ def __init__(self, urls, num_threads=10):
self.tasks = Queue()
self.logs = deque()
- for i in list(work):
+ for i in list(urls):
self.tasks.put(i)
self.threads = []
for i in range(num_threads):
@@ -140,6 +146,7 @@ class MirrorCheckPool(object):
MirrorLog.objects.bulk_create(self.logs)
logger.debug("log entries saved")
+
def check_current_mirrors():
urls = MirrorUrl.objects.filter(
protocol__is_download=True,
@@ -149,8 +156,4 @@ def check_current_mirrors():
pool.run()
return 0
-# For lack of a better place to put it, here is a query to get latest check
-# result joined with mirror details:
-# SELECT mu.*, m.*, ml.* FROM mirrors_mirrorurl mu JOIN mirrors_mirror m ON mu.mirror_id = m.id JOIN mirrors_mirrorlog ml ON mu.id = ml.url_id LEFT JOIN mirrors_mirrorlog ml2 ON ml.url_id = ml2.url_id AND ml.id < ml2.id WHERE ml2.id IS NULL AND m.active = 1 AND m.public = 1;
-
# vim: set ts=4 sw=4 et: