diff options
author | Dan McGee <dan@archlinux.org> | 2011-01-25 00:48:54 +0100 |
---|---|---|
committer | Allan McRae <allan@archlinux.org> | 2011-02-04 00:55:45 +0100 |
commit | 5dae577a87795e7666f05613cf9aa7207fd17346 (patch) | |
tree | 5e898c5c7290e83bf6da41c9bfd6293a24269d56 | |
parent | e17b0446bd815f7b25f5bf0b838ef3d02d4eb64e (diff) | |
download | pacman-5dae577a87795e7666f05613cf9aa7207fd17346.tar.gz pacman-5dae577a87795e7666f05613cf9aa7207fd17346.tar.xz |
Get estimated package count when populating databases
This works for both local and sync databases in slightly different ways. For
the local database, we can use the directory hard link count on the local/
folder. For sync databases, we use the archive size coupled with some
computed average per-package sizes to determine an estimate.
This is currently a dead assignment once calculated, but could be used to
set the initial size of a hash table.
Signed-off-by: Dan McGee <dan@archlinux.org>
Signed-off-by: Allan McRae <allan@archlinux.org>
-rw-r--r-- | lib/libalpm/be_local.c | 8 | ||||
-rw-r--r-- | lib/libalpm/be_sync.c | 64 |
2 files changed, 70 insertions, 2 deletions
diff --git a/lib/libalpm/be_local.c b/lib/libalpm/be_local.c index c7110faf..b97fca71 100644 --- a/lib/libalpm/be_local.c +++ b/lib/libalpm/be_local.c @@ -367,7 +367,8 @@ static int is_dir(const char *path, struct dirent *entry) static int local_db_populate(pmdb_t *db) { - int count = 0; + int est_count, count = 0; + struct stat buf; struct dirent *ent = NULL; const char *dbpath; DIR *dbdir; @@ -384,6 +385,11 @@ static int local_db_populate(pmdb_t *db) if(dbdir == NULL) { return(0); } + if(fstat(dirfd(dbdir), &buf) != 0) { + return(0); + } + /* subtract the two always-there pointers to get # of children */ + est_count = (int)buf.st_nlink - 2; while((ent = readdir(dbdir)) != NULL) { const char *name = ent->d_name; diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index f7101f54..0c968b45 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -145,9 +145,67 @@ int SYMEXPORT alpm_db_update(int force, pmdb_t *db) static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entry *entry, pmpkg_t *likely_pkg); +/* + * This is the data table used to generate the estimating function below. + * "Weighted Avg" means averaging the bottom table values; thus each repo, big + * or small, will have equal influence. "Unweighted Avg" means averaging the + * sums of the top table columns, thus each package has equal influence. The + * final values are calculated by (surprise) averaging the averages, because + * why the hell not. + * + * Database Pkgs tar bz2 gz xz + * community 2096 5294080 256391 421227 301296 + * core 180 460800 25257 36850 29356 + * extra 2606 6635520 294647 470818 339392 + * multilib 126 327680 16120 23261 18732 + * testing 76 204800 10902 14348 12100 + * + * Bytes Per Package + * community 2096 2525.80 122.32 200.97 143.75 + * core 180 2560.00 140.32 204.72 163.09 + * extra 2606 2546.25 113.06 180.67 130.23 + * multilib 126 2600.63 127.94 184.61 148.67 + * testing 76 2694.74 143.45 188.79 159.21 + + * Weighted Avg 2585.48 129.42 191.95 148.99 + * Unweighted Avg 2543.39 118.74 190.16 137.93 + * Average of Avgs 2564.44 124.08 191.06 143.46 + */ +static int estimate_package_count(struct stat *st, struct archive *archive) +{ + unsigned int per_package; + + switch(archive_compression(archive)) { + case ARCHIVE_COMPRESSION_NONE: + per_package = 2564; + break; + case ARCHIVE_COMPRESSION_GZIP: + per_package = 191; + break; + case ARCHIVE_COMPRESSION_BZIP2: + per_package = 124; + break; + case ARCHIVE_COMPRESSION_COMPRESS: + per_package = 193; + break; + case ARCHIVE_COMPRESSION_LZMA: + case ARCHIVE_COMPRESSION_XZ: + per_package = 143; + break; + case ARCHIVE_COMPRESSION_UU: + per_package = 3543; + break; + default: + /* assume it is at least somewhat compressed */ + per_package = 200; + } + return((int)(st->st_size / per_package) + 1); +} + static int sync_db_populate(pmdb_t *db) { - int count = 0; + int est_count, count = 0; + struct stat buf; struct archive *archive; struct archive_entry *entry; pmpkg_t *pkg = NULL; @@ -169,6 +227,10 @@ static int sync_db_populate(pmdb_t *db) archive_read_finish(archive); RET_ERR(PM_ERR_DB_OPEN, 1); } + if(lstat(_alpm_db_path(db), &buf) != 0) { + RET_ERR(PM_ERR_DB_OPEN, 1); + } + est_count = estimate_package_count(&buf, archive); while(archive_read_next_header(archive, &entry) == ARCHIVE_OK) { const struct stat *st; |