summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan McGee <dan@archlinux.org>2014-01-11 20:07:40 +0100
committerDan McGee <dan@archlinux.org>2014-01-11 20:07:40 +0100
commit3827215fa3335f8da3c82d4d098eb402b6d29dbc (patch)
treea057c6f3d5ca70da72b01b6a416a7ab667bc9e56
parentf4d49590153a5c39d4b60ba0a9c2901c344ff45a (diff)
downloadarchweb-3827215fa3335f8da3c82d4d098eb402b6d29dbc.tar.gz
archweb-3827215fa3335f8da3c82d4d098eb402b6d29dbc.tar.xz
Speed up feeds generation by batching writes
The XML generation underlying our package feeds was doing 1600+ calls to the write() method on the outfile. For some reason, the Python standard library insists on calling flush() after every write, which really makes performance take a nosedive. Wrap the write calls and do them in batches to remove some of the overhead and make feed generation a bit snappier. Signed-off-by: Dan McGee <dan@archlinux.org>
-rw-r--r--feeds.py35
1 files changed, 32 insertions, 3 deletions
diff --git a/feeds.py b/feeds.py
index ecdb7d9..feb8a84 100644
--- a/feeds.py
+++ b/feeds.py
@@ -14,6 +14,22 @@ from news.models import News
from releng.models import Release
+class BatchWritesWrapper(object):
+ def __init__(self, outfile, chunks=20):
+ self.outfile = outfile
+ self.chunks = chunks
+ self.buf = []
+ def write(self, s):
+ buf = self.buf
+ buf.append(s)
+ if len(buf) >= self.chunks:
+ self.outfile.write(''.join(buf))
+ self.buf = []
+ def flush(self):
+ self.outfile.write(''.join(self.buf))
+ self.outfile.flush()
+
+
class GuidNotPermalinkFeed(Rss201rev2Feed):
@staticmethod
def check_for_unique_id(f):
@@ -26,13 +42,26 @@ class GuidNotPermalinkFeed(Rss201rev2Feed):
return wrapper
def write_items(self, handler):
- # Totally disgusting. Monkey-patch the hander so if it sees a
- # 'unique-id' field come through, add an isPermalink="false" attribute.
- # Workaround for http://code.djangoproject.com/ticket/9800
+ '''
+ Totally disgusting. Monkey-patch the handler so if it sees a
+ 'unique-id' field come through, add an isPermalink="false" attribute.
+ Workaround for http://code.djangoproject.com/ticket/9800
+ '''
handler.addQuickElement = self.check_for_unique_id(
handler.addQuickElement)
super(GuidNotPermalinkFeed, self).write_items(handler)
+ def write(self, outfile, encoding):
+ '''
+ Batch the underlying 'write' calls on the outfile because Python's
+ default saxutils XmlGenerator is a POS that insists on unbuffered
+ write/flush calls. This sucks when it is making 1-byte calls to write
+ '>' closing tags and over 1600 write calls in our package feed.
+ '''
+ wrapper = BatchWritesWrapper(outfile)
+ super(GuidNotPermalinkFeed, self).write(wrapper, encoding)
+ wrapper.flush()
+
def package_etag(request, *args, **kwargs):
latest = retrieve_latest(Package)