diff options
author | Dan McGee <dan@archlinux.org> | 2011-12-12 16:58:53 +0100 |
---|---|---|
committer | Dan McGee <dan@archlinux.org> | 2011-12-12 16:59:42 +0100 |
commit | 56af1e4f50587333da4e8c38800be8f720af98b7 (patch) | |
tree | bf3a9ea9110b8941ddb86d1d886e2a5f471fb705 | |
parent | 99eada17224b54799e7c2331a2b88dee76420107 (diff) | |
download | archweb-56af1e4f50587333da4e8c38800be8f720af98b7.tar.gz archweb-56af1e4f50587333da4e8c38800be8f720af98b7.tar.xz |
reporead: more efficient deletion of files
Rather than delegating to Django and batch deletion by ID, force issuing
of a single delete query to clear out all existing file objects when
necessary. This should speed up the deletion and update of packages with
a lot of files by a non-trivial amount.
Signed-off-by: Dan McGee <dan@archlinux.org>
-rw-r--r-- | devel/management/commands/reporead.py | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 7e058af..aa04a31 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -239,6 +239,12 @@ pkg_same_version = lambda pkg, dbpkg: pkg.ver == dbpkg.pkgver \ and pkg.rel == dbpkg.pkgrel and pkg.epoch == dbpkg.epoch +def delete_pkg_files(dbpkg): + database = router.db_for_write(Package, instance=dbpkg) + cursor = connections[database].cursor() + cursor.execute('DELETE FROM package_files WHERE pkg_id = %s', [dbpkg.id]) + + def populate_files(dbpkg, repopkg, force=False): if not force: if not pkg_same_version(repopkg, dbpkg): @@ -253,7 +259,7 @@ def populate_files(dbpkg, repopkg, force=False): # only delete files if we are reading a DB that contains them if repopkg.has_files: - dbpkg.packagefile_set.all().delete() + delete_pkg_files(dbpkg) logger.info("adding %d files for package %s", len(repopkg.files), dbpkg.pkgname) for f in repopkg.files: @@ -262,6 +268,7 @@ def populate_files(dbpkg, repopkg, force=False): filename = None # this is basically like calling dbpkg.packagefile_set.create(), # but much faster as we can skip a lot of the repeated code paths + # TODO use Django 1.4 bulk_create pkgfile = PackageFile(pkg=dbpkg, is_directory=(filename is None), directory=dirname + '/', @@ -361,6 +368,7 @@ def db_update(archname, reponame, pkgs, force=False): with transaction.commit_on_success(): # no race condition here as long as simultaneous threads both # issue deletes; second delete will be a no-op + delete_pkg_files(dbpkg) dbpkg.delete() # packages in both database and in syncdb (update in database) |