summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Klinkovský <j.l.k@gmx.com>2020-09-04 23:53:07 +0200
committerLukas Fleischer <lfleischer@archlinux.org>2021-02-20 17:25:21 +0100
commit3062a78a92d32144f423fdb460e96a309732d9d0 (patch)
tree88f1e6e579a53cf637b8624a5db68f461a15ce07
parent51a353582010a45b2121c25a5ad995111f1842a8 (diff)
downloadaur-3062a78a92d32144f423fdb460e96a309732d9d0.tar.gz
aur-3062a78a92d32144f423fdb460e96a309732d9d0.tar.xz
gendummydata.py: optimize iteration for big numbers of pkgs
Signed-off-by: Lukas Fleischer <lfleischer@archlinux.org>
-rwxr-xr-xschema/gendummydata.py11
1 files changed, 7 insertions, 4 deletions
diff --git a/schema/gendummydata.py b/schema/gendummydata.py
index 91a580c2..c7b3a06d 100755
--- a/schema/gendummydata.py
+++ b/schema/gendummydata.py
@@ -259,20 +259,23 @@ for p in list(track_votes.keys()):
# Create package dependencies and sources
#
log.debug("Creating statements for package depends/sources.")
-for p in list(seen_pkgs.keys()):
+# the keys of seen_pkgs are accessed many times by random.choice,
+# so the list has to be created outside the loops to keep it efficient
+seen_pkgs_keys = list(seen_pkgs.keys())
+for p in seen_pkgs_keys:
num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1])
for i in range(0, num_deps):
- dep = random.choice([k for k in seen_pkgs])
+ dep = random.choice(seen_pkgs_keys)
deptype = random.randrange(1, 5)
if deptype == 4:
- dep += ": for " + random.choice([k for k in seen_pkgs])
+ dep += ": for " + random.choice(seen_pkgs_keys)
s = "INSERT INTO PackageDepends(PackageID, DepTypeID, DepName) VALUES (%d, %d, '%s');\n"
s = s % (seen_pkgs[p], deptype, dep)
out.write(s)
num_rels = random.randrange(PKG_RELS[0], PKG_RELS[1])
for i in range(0, num_deps):
- rel = random.choice([k for k in seen_pkgs])
+ rel = random.choice(seen_pkgs_keys)
reltype = random.randrange(1, 4)
s = "INSERT INTO PackageRelations(PackageID, RelTypeID, RelName) VALUES (%d, %d, '%s');\n"
s = s % (seen_pkgs[p], reltype, rel)