From e17b0446bd815f7b25f5bf0b838ef3d02d4eb64e Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Fri, 7 Jan 2011 00:32:17 +1000 Subject: Add a hash table for holding packages Signed-off-by: Allan McRae --- lib/libalpm/Makefile.am | 1 + lib/libalpm/alpm.h | 1 + lib/libalpm/pkghash.c | 292 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/libalpm/pkghash.h | 58 ++++++++++ 4 files changed, 352 insertions(+) create mode 100644 lib/libalpm/pkghash.c create mode 100644 lib/libalpm/pkghash.h (limited to 'lib/libalpm') diff --git a/lib/libalpm/Makefile.am b/lib/libalpm/Makefile.am index da663cb5..1bda5714 100644 --- a/lib/libalpm/Makefile.am +++ b/lib/libalpm/Makefile.am @@ -40,6 +40,7 @@ libalpm_la_SOURCES = \ handle.h handle.c \ log.h log.c \ package.h package.c \ + pkghash.h pkghash.c \ remove.h remove.c \ sync.h sync.c \ trans.h trans.c \ diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index 19ea4ffd..f33abf7a 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -52,6 +52,7 @@ typedef struct __pmdepend_t pmdepend_t; typedef struct __pmdepmissing_t pmdepmissing_t; typedef struct __pmconflict_t pmconflict_t; typedef struct __pmfileconflict_t pmfileconflict_t; +typedef struct __pmpkghash_t pmpkghash_t; /* * Library diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c new file mode 100644 index 00000000..3ed6453f --- /dev/null +++ b/lib/libalpm/pkghash.c @@ -0,0 +1,292 @@ +/* + * pkghash.c + * + * Copyright (c) 2011 Pacman Development Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "pkghash.h" +#include "util.h" +#include "log.h" + +/* List of primes for possible sizes of hash tables. + * + * The maximum table size is the last prime under 1,000,000. That is + * more than an order of magnitude greater than the number of packages + * in any Linux distribution. + */ +static const size_t prime_list[] = +{ + 11ul, 13ul, 17ul, 19ul, 23ul, 29ul, 31ul, 37ul, 41ul, 43ul, 47ul, + 53ul, 59ul, 61ul, 67ul, 71ul, 73ul, 79ul, 83ul, 89ul, 97ul, 103ul, + 109ul, 113ul, 127ul, 137ul, 139ul, 149ul, 157ul, 167ul, 179ul, 193ul, + 199ul, 211ul, 227ul, 241ul, 257ul, 277ul, 293ul, 313ul, 337ul, 359ul, + 383ul, 409ul, 439ul, 467ul, 503ul, 541ul, 577ul, 619ul, 661ul, 709ul, + 761ul, 823ul, 887ul, 953ul, 1031ul, 1109ul, 1193ul, 1289ul, 1381ul, + 1493ul, 1613ul, 1741ul, 1879ul, 2029ul, 2179ul, 2357ul, 2549ul, + 2753ul, 2971ul, 3209ul, 3469ul, 3739ul, 4027ul, 4349ul, 4703ul, + 5087ul, 5503ul, 5953ul, 6427ul, 6949ul, 7517ul, 8123ul, 8783ul, + 9497ul, 10273ul, 11113ul, 12011ul, 12983ul, 14033ul, 15173ul, + 16411ul, 17749ul, 19183ul, 20753ul, 22447ul, 24281ul, 26267ul, + 28411ul, 30727ul, 33223ul, 35933ul, 38873ul, 42043ul, 45481ul, + 49201ul, 53201ul, 57557ul, 62233ul, 67307ul, 72817ul, 78779ul, + 85229ul, 92203ul, 99733ul, 107897ul, 116731ul, 126271ul, 136607ul, + 147793ul, 159871ul, 172933ul, 187091ul, 202409ul, 218971ul, 236897ul, + 256279ul, 277261ul, 299951ul, 324503ul, 351061ul, 379787ul, 410857ul, + 444487ul, 480881ul, 520241ul, 562841ul, 608903ul, 658753ul, 712697ul, + 771049ul, 834181ul, 902483ul, 976369ul +}; + +/* Allocate a hash table with at least "size" buckets */ +pmpkghash_t *_alpm_pkghash_create(size_t size) +{ + pmpkghash_t *hash = NULL; + size_t i, loopsize; + + MALLOC(hash, sizeof(pmpkghash_t), RET_ERR(PM_ERR_MEMORY, NULL)); + + hash->list = NULL; + hash->entries = 0; + + loopsize = sizeof(prime_list) / sizeof(*prime_list); + for(i = 0; i < loopsize; i++) { + if(prime_list[i] > size) { + hash->buckets = prime_list[i]; + break; + } + } + + CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t*), \ + free(hash); RET_ERR(PM_ERR_MEMORY, NULL)); + + return(hash); +} + +/* Expand the hash table size to the next increment and rebin the entries */ +static pmpkghash_t *rehash(pmpkghash_t *oldhash) +{ + //pmpkghash_t *newhash; + + /* TODO - calculate size of newhash */ + /* Hash tables will need resized in two cases: + * - adding packages to the local database + * - poor estimation of the number of packages in sync database + * + * For small hash tables sizes (<500) the increase in size is by a + * minimum of a factor of 2 for optimal rehash efficiency. For + * larger database sizes, this increase is reduced to avoid excess + * memory allocation as both scenarios requiring a rehash should not + * require a table size increase that large. */ + + //newhash = _alpm_pkghash_create(oldhash->buckets); + + /* TODO - rebin entries */ + + //_alpm_pkghash_free(oldhash); + + //return newhash; + + printf("rehash needed!!!\n"); + return(oldhash); +} + +pmpkghash_t *_alpm_pkghash_add(pmpkghash_t *hash, pmpkg_t *pkg) +{ + alpm_list_t *ptr; + size_t position; + + if(pkg == NULL || hash == NULL) { + return(hash); + } + + if((hash->entries + 1) / MAX_HASH_LOAD > hash->buckets) { + hash = rehash(hash); + } + + position = pkg->name_hash % hash->buckets; + + /* collision resolution using open addressing with linear probing */ + while((ptr = hash->hash_table[position]) != NULL) { + position = (position + 1) % hash->buckets; + } + + ptr = calloc(1, sizeof(alpm_list_t)); + if(ptr == NULL) { + return(hash); + } + + ptr->data = pkg; + ptr->next = NULL; + ptr->prev = ptr; + + hash->hash_table[position] = ptr; + hash->list = alpm_list_join(hash->list, ptr); + hash->entries += 1; + + return(hash); +} + +pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg) +{ + if(!hash) { + return(_alpm_pkghash_add(hash, pkg)); + } + + alpm_list_t *ptr; + size_t position; + + if((hash->entries + 1) / MAX_HASH_LOAD > hash->buckets) { + hash = rehash(hash); + } + + position = pkg->name_hash % hash->buckets; + + /* collision resolution using open addressing with linear probing */ + while((ptr = hash->hash_table[position]) != NULL) { + position = (position + 1) % hash->buckets; + } + + ptr = calloc(1, sizeof(alpm_list_t)); + if(ptr == NULL) { + return(hash); + } + + ptr->data = pkg; + ptr->next = NULL; + ptr->prev = ptr; + + hash->hash_table[position] = ptr; + hash->list = alpm_list_mmerge(hash->list, ptr, _alpm_pkg_cmp); + hash->entries += 1; + + return(hash); +} + +/** + * @brief Remove a package from a pkghash. + * + * @param hash the hash to remove the package from + * @param pkg the package we are removing + * @param data output parameter containing the removed item + * + * @return the resultant hash + */ +pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t *data) +{ + alpm_list_t *i; + size_t position; + + if(pkg == NULL || hash == NULL) { + return(hash); + } + + position = pkg->name_hash % hash->buckets; + while((i = hash->hash_table[position]) != NULL) { + pmpkg_t *info = i->data; + + if(info->name_hash == pkg->name_hash && + strcmp(info->name, pkg->name) == 0) { + +#if 0 /* Actually removing items is not a good idea with linear probing... */ + /* remove from list */ + /* TODO - refactor with alpm_list_remove */ + if(i == hash->list) { + /* Special case: removing the head node which has a back reference to + * the tail node */ + hash->list = i->next; + if(hash->list) { + hash->list->prev = i->prev; + } + i->prev = NULL; + } else if(i == hash->list->prev) { + /* Special case: removing the tail node, so we need to fix the back + * reference on the head node. We also know tail != head. */ + if(i->prev) { + /* i->next should always be null */ + i->prev->next = i->next; + hash->list->prev = i->prev; + i->prev = NULL; + } + } else { + /* Normal case, non-head and non-tail node */ + if(i->next) { + i->next->prev = i->prev; + } + if(i->prev) { + i->prev->next = i->next; + } + } + + /* remove from hash */ + data = info; + info = NULL; + free(i); + i = NULL; + + hash->entries -= 1; +#endif + + /* fake removal - pkgname can not be blank so 0 hash is impossible */ + info->name_hash = 0; + + return(hash); + } + + position = (position + 1) % hash->buckets; + } + + return(hash); +} + +void _alpm_pkghash_free(pmpkghash_t *hash) +{ + size_t i; + if(hash != NULL) { + for(i = 0; i < hash->buckets; i++) { + free(hash->hash_table[i]); + } + free(hash->hash_table); + } + free(hash); +} + +pmpkg_t *_alpm_pkghash_find(pmpkghash_t *hash, const char *name) +{ + alpm_list_t *lp; + unsigned long name_hash; + size_t position; + + ALPM_LOG_FUNC; + + if(name == NULL || hash == NULL) { + return(NULL); + } + + name_hash = _alpm_hash_sdbm(name); + + position = name_hash % hash->buckets; + + while((lp = hash->hash_table[position]) != NULL) { + pmpkg_t *info = lp->data; + + if(info->name_hash == name_hash && strcmp(info->name, name) == 0) { + return(info); + } + + position = (position + 1) % hash->buckets; + } + + return(NULL); +} diff --git a/lib/libalpm/pkghash.h b/lib/libalpm/pkghash.h new file mode 100644 index 00000000..7f90cb17 --- /dev/null +++ b/lib/libalpm/pkghash.h @@ -0,0 +1,58 @@ +/* + * pkghash.h + * + * Copyright (c) 2011 Pacman Development Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _ALPM_PKGHASH_H +#define _ALPM_PKGHASH_H + +#include + +#include "alpm.h" +#include "alpm_list.h" + + +/** + * @brief A hash table for holding pmpkg_t objects. + * + * A combination of a hash table and a list, allowing for fast look-up + * by package name but also iteration over the packages. + */ +struct __pmpkghash_t { + /** data held by the hash table */ + alpm_list_t **hash_table; + /** number of buckets in hash table */ + size_t buckets; + /** number of entries in hash table */ + size_t entries; + /** head node of the hash table data in normal list format */ + alpm_list_t *list; +}; + +pmpkghash_t *_alpm_pkghash_create(size_t size); + +pmpkghash_t *_alpm_pkghash_add(pmpkghash_t *hash, pmpkg_t *pkg); +pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg); +pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t *data); + +void _alpm_pkghash_free(pmpkghash_t *hash); + +pmpkg_t *_alpm_pkghash_find(pmpkghash_t *hash, const char *name); + +#define MAX_HASH_LOAD 0.7 + +#endif /* _ALPM_PKGHASH_H */ -- cgit v1.2.3-24-g4f1b From 5dae577a87795e7666f05613cf9aa7207fd17346 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Mon, 24 Jan 2011 17:48:54 -0600 Subject: Get estimated package count when populating databases This works for both local and sync databases in slightly different ways. For the local database, we can use the directory hard link count on the local/ folder. For sync databases, we use the archive size coupled with some computed average per-package sizes to determine an estimate. This is currently a dead assignment once calculated, but could be used to set the initial size of a hash table. Signed-off-by: Dan McGee Signed-off-by: Allan McRae --- lib/libalpm/be_local.c | 8 ++++++- lib/libalpm/be_sync.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) (limited to 'lib/libalpm') diff --git a/lib/libalpm/be_local.c b/lib/libalpm/be_local.c index c7110faf..b97fca71 100644 --- a/lib/libalpm/be_local.c +++ b/lib/libalpm/be_local.c @@ -367,7 +367,8 @@ static int is_dir(const char *path, struct dirent *entry) static int local_db_populate(pmdb_t *db) { - int count = 0; + int est_count, count = 0; + struct stat buf; struct dirent *ent = NULL; const char *dbpath; DIR *dbdir; @@ -384,6 +385,11 @@ static int local_db_populate(pmdb_t *db) if(dbdir == NULL) { return(0); } + if(fstat(dirfd(dbdir), &buf) != 0) { + return(0); + } + /* subtract the two always-there pointers to get # of children */ + est_count = (int)buf.st_nlink - 2; while((ent = readdir(dbdir)) != NULL) { const char *name = ent->d_name; diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index f7101f54..0c968b45 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -145,9 +145,67 @@ int SYMEXPORT alpm_db_update(int force, pmdb_t *db) static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entry *entry, pmpkg_t *likely_pkg); +/* + * This is the data table used to generate the estimating function below. + * "Weighted Avg" means averaging the bottom table values; thus each repo, big + * or small, will have equal influence. "Unweighted Avg" means averaging the + * sums of the top table columns, thus each package has equal influence. The + * final values are calculated by (surprise) averaging the averages, because + * why the hell not. + * + * Database Pkgs tar bz2 gz xz + * community 2096 5294080 256391 421227 301296 + * core 180 460800 25257 36850 29356 + * extra 2606 6635520 294647 470818 339392 + * multilib 126 327680 16120 23261 18732 + * testing 76 204800 10902 14348 12100 + * + * Bytes Per Package + * community 2096 2525.80 122.32 200.97 143.75 + * core 180 2560.00 140.32 204.72 163.09 + * extra 2606 2546.25 113.06 180.67 130.23 + * multilib 126 2600.63 127.94 184.61 148.67 + * testing 76 2694.74 143.45 188.79 159.21 + + * Weighted Avg 2585.48 129.42 191.95 148.99 + * Unweighted Avg 2543.39 118.74 190.16 137.93 + * Average of Avgs 2564.44 124.08 191.06 143.46 + */ +static int estimate_package_count(struct stat *st, struct archive *archive) +{ + unsigned int per_package; + + switch(archive_compression(archive)) { + case ARCHIVE_COMPRESSION_NONE: + per_package = 2564; + break; + case ARCHIVE_COMPRESSION_GZIP: + per_package = 191; + break; + case ARCHIVE_COMPRESSION_BZIP2: + per_package = 124; + break; + case ARCHIVE_COMPRESSION_COMPRESS: + per_package = 193; + break; + case ARCHIVE_COMPRESSION_LZMA: + case ARCHIVE_COMPRESSION_XZ: + per_package = 143; + break; + case ARCHIVE_COMPRESSION_UU: + per_package = 3543; + break; + default: + /* assume it is at least somewhat compressed */ + per_package = 200; + } + return((int)(st->st_size / per_package) + 1); +} + static int sync_db_populate(pmdb_t *db) { - int count = 0; + int est_count, count = 0; + struct stat buf; struct archive *archive; struct archive_entry *entry; pmpkg_t *pkg = NULL; @@ -169,6 +227,10 @@ static int sync_db_populate(pmdb_t *db) archive_read_finish(archive); RET_ERR(PM_ERR_DB_OPEN, 1); } + if(lstat(_alpm_db_path(db), &buf) != 0) { + RET_ERR(PM_ERR_DB_OPEN, 1); + } + est_count = estimate_package_count(&buf, archive); while(archive_read_next_header(archive, &entry) == ARCHIVE_OK) { const struct stat *st; -- cgit v1.2.3-24-g4f1b From f8fdce6cb0da4d832ffa730e0dacb5544c1f8154 Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Tue, 25 Jan 2011 11:49:34 +1000 Subject: Read pkgcache into hash Read the package information for sync/local databases into a pmpkghash_t structure. Provide a alpm_db_get_pkgcache_list() method that returns the list from the hash object. Most usages of alpm_db_get_pkgcache are converted to this at this stage for ease of implementation. Review whether these are better accessing the hash table directly at a later stage. Signed-off-by: Allan McRae --- lib/libalpm/alpm.h | 3 ++- lib/libalpm/be_local.c | 12 ++++++++--- lib/libalpm/be_sync.c | 13 ++++++++---- lib/libalpm/conflict.c | 4 ++-- lib/libalpm/db.c | 55 +++++++++++++++++++++++++++++++++++++------------- lib/libalpm/db.h | 7 +++++-- lib/libalpm/deps.c | 8 ++++---- lib/libalpm/package.c | 2 +- lib/libalpm/remove.c | 6 +++--- lib/libalpm/sync.c | 8 ++++---- 10 files changed, 80 insertions(+), 38 deletions(-) (limited to 'lib/libalpm') diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index f33abf7a..7fec293d 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -187,7 +187,8 @@ int alpm_db_setserver(pmdb_t *db, const char *url); int alpm_db_update(int level, pmdb_t *db); pmpkg_t *alpm_db_get_pkg(pmdb_t *db, const char *name); -alpm_list_t *alpm_db_get_pkgcache(pmdb_t *db); +pmpkghash_t *alpm_db_get_pkgcache(pmdb_t *db); +alpm_list_t *alpm_db_get_pkgcache_list(pmdb_t *db); pmgrp_t *alpm_db_readgrp(pmdb_t *db, const char *name); alpm_list_t *alpm_db_get_grpcache(pmdb_t *db); diff --git a/lib/libalpm/be_local.c b/lib/libalpm/be_local.c index b97fca71..12021ac2 100644 --- a/lib/libalpm/be_local.c +++ b/lib/libalpm/be_local.c @@ -390,6 +390,10 @@ static int local_db_populate(pmdb_t *db) } /* subtract the two always-there pointers to get # of children */ est_count = (int)buf.st_nlink - 2; + + /* initialize hash at 50% full */ + db->pkgcache = _alpm_pkghash_create(est_count * 2); + while((ent = readdir(dbdir)) != NULL) { const char *name = ent->d_name; @@ -416,7 +420,7 @@ static int local_db_populate(pmdb_t *db) } /* duplicated database entries are not allowed */ - if(_alpm_pkg_find(db->pkgcache, pkg->name)) { + if(_alpm_pkghash_find(db->pkgcache, pkg->name)) { _alpm_log(PM_LOG_ERROR, _("duplicated database entry '%s'\n"), pkg->name); _alpm_pkg_free(pkg); continue; @@ -436,12 +440,14 @@ static int local_db_populate(pmdb_t *db) /* add to the collection */ _alpm_log(PM_LOG_FUNCTION, "adding '%s' to package cache for db '%s'\n", pkg->name, db->treename); - db->pkgcache = alpm_list_add(db->pkgcache, pkg); + db->pkgcache = _alpm_pkghash_add(db->pkgcache, pkg); count++; } closedir(dbdir); - db->pkgcache = alpm_list_msort(db->pkgcache, (size_t)count, _alpm_pkg_cmp); + if(count > 0) { + db->pkgcache->list = alpm_list_msort(db->pkgcache->list, (size_t)count, _alpm_pkg_cmp); + } return(count); } diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 0c968b45..2c70e2c5 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -232,6 +232,9 @@ static int sync_db_populate(pmdb_t *db) } est_count = estimate_package_count(&buf, archive); + /* initialize hash at 50% full */ + db->pkgcache = _alpm_pkghash_create(est_count * 2); + while(archive_read_next_header(archive, &entry) == ARCHIVE_OK) { const struct stat *st; @@ -256,7 +259,7 @@ static int sync_db_populate(pmdb_t *db) } /* duplicated database entries are not allowed */ - if(_alpm_pkg_find(db->pkgcache, pkg->name)) { + if(_alpm_pkghash_find(db->pkgcache, pkg->name)) { _alpm_log(PM_LOG_ERROR, _("duplicated database entry '%s'\n"), pkg->name); _alpm_pkg_free(pkg); continue; @@ -269,7 +272,7 @@ static int sync_db_populate(pmdb_t *db) /* add to the collection */ _alpm_log(PM_LOG_FUNCTION, "adding '%s' to package cache for db '%s'\n", pkg->name, db->treename); - db->pkgcache = alpm_list_add(db->pkgcache, pkg); + db->pkgcache = _alpm_pkghash_add(db->pkgcache, pkg); count++; } else { /* we have desc, depends or deltas - parse it */ @@ -277,7 +280,9 @@ static int sync_db_populate(pmdb_t *db) } } - db->pkgcache = alpm_list_msort(db->pkgcache, (size_t)count, _alpm_pkg_cmp); + if(count > 0) { + db->pkgcache->list = alpm_list_msort(db->pkgcache->list, (size_t)count, _alpm_pkg_cmp); + } archive_read_finish(archive); return(count); @@ -343,7 +348,7 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, if(likely_pkg && strcmp(likely_pkg->name, pkgname) == 0) { pkg = likely_pkg; } else { - pkg = _alpm_pkg_find(db->pkgcache, pkgname); + pkg = _alpm_pkghash_find(db->pkgcache, pkgname); } if(pkg == NULL) { _alpm_log(PM_LOG_DEBUG, "package %s not found in %s sync database", diff --git a/lib/libalpm/conflict.c b/lib/libalpm/conflict.c index fc25e7d3..17e728a5 100644 --- a/lib/libalpm/conflict.c +++ b/lib/libalpm/conflict.c @@ -207,8 +207,8 @@ alpm_list_t *_alpm_outerconflicts(pmdb_t *db, alpm_list_t *packages) return(NULL); } - alpm_list_t *dblist = alpm_list_diff(_alpm_db_get_pkgcache(db), packages, - _alpm_pkg_cmp); + alpm_list_t *dblist = alpm_list_diff(_alpm_db_get_pkgcache_list(db), + packages, _alpm_pkg_cmp); /* two checks to be done here for conflicts */ _alpm_log(PM_LOG_DEBUG, "check targets vs db\n"); diff --git a/lib/libalpm/db.c b/lib/libalpm/db.c index c80dcbb8..c0c2f0ca 100644 --- a/lib/libalpm/db.c +++ b/lib/libalpm/db.c @@ -249,9 +249,9 @@ pmpkg_t SYMEXPORT *alpm_db_get_pkg(pmdb_t *db, const char *name) /** Get the package cache of a package database * @param db pointer to the package database to get the package from - * @return the list of packages on success, NULL on error + * @return the hash of packages on success, NULL on error */ -alpm_list_t SYMEXPORT *alpm_db_get_pkgcache(pmdb_t *db) +pmpkghash_t SYMEXPORT *alpm_db_get_pkgcache(pmdb_t *db) { ALPM_LOG_FUNC; @@ -262,6 +262,21 @@ alpm_list_t SYMEXPORT *alpm_db_get_pkgcache(pmdb_t *db) return(_alpm_db_get_pkgcache(db)); } +/** Get the package cache of a package database + * @param db pointer to the package database to get the package from + * @return the list of packages on success, NULL on error + */ +alpm_list_t SYMEXPORT *alpm_db_get_pkgcache_list(pmdb_t *db) +{ + ALPM_LOG_FUNC; + + /* Sanity checks */ + ASSERT(handle != NULL, return(NULL)); + ASSERT(db != NULL, return(NULL)); + + return(_alpm_db_get_pkgcache_list(db)); +} + /** Get a group entry from a package database * @param db pointer to the package database to get the group from * @param name of the group @@ -417,7 +432,7 @@ alpm_list_t *_alpm_db_search(pmdb_t *db, const alpm_list_t *needles) const alpm_list_t *i, *j, *k; alpm_list_t *ret = NULL; /* copy the pkgcache- we will free the list var after each needle */ - alpm_list_t *list = alpm_list_copy(_alpm_db_get_pkgcache(db)); + alpm_list_t *list = alpm_list_copy(_alpm_db_get_pkgcache_list(db)); ALPM_LOG_FUNC; @@ -523,14 +538,15 @@ void _alpm_db_free_pkgcache(pmdb_t *db) _alpm_log(PM_LOG_DEBUG, "freeing package cache for repository '%s'\n", db->treename); - alpm_list_free_inner(db->pkgcache, (alpm_list_fn_free)_alpm_pkg_free); - alpm_list_free(db->pkgcache); + alpm_list_free_inner(_alpm_db_get_pkgcache_list(db), + (alpm_list_fn_free)_alpm_pkg_free); + _alpm_pkghash_free(db->pkgcache); db->pkgcache_loaded = 0; _alpm_db_free_grpcache(db); } -alpm_list_t *_alpm_db_get_pkgcache(pmdb_t *db) +pmpkghash_t *_alpm_db_get_pkgcache(pmdb_t *db) { ALPM_LOG_FUNC; @@ -550,6 +566,19 @@ alpm_list_t *_alpm_db_get_pkgcache(pmdb_t *db) return(db->pkgcache); } +alpm_list_t *_alpm_db_get_pkgcache_list(pmdb_t *db) +{ + ALPM_LOG_FUNC; + + pmpkghash_t *hash = _alpm_db_get_pkgcache(db); + + if(hash == NULL) { + return(NULL); + } + + return(hash->list); +} + /* "duplicate" pkg then add it to pkgcache */ int _alpm_db_add_pkgincache(pmdb_t *db, pmpkg_t *pkg) { @@ -568,7 +597,7 @@ int _alpm_db_add_pkgincache(pmdb_t *db, pmpkg_t *pkg) _alpm_log(PM_LOG_DEBUG, "adding entry '%s' in '%s' cache\n", alpm_pkg_get_name(newpkg), db->treename); - db->pkgcache = alpm_list_add_sorted(db->pkgcache, newpkg, _alpm_pkg_cmp); + db->pkgcache = _alpm_pkghash_add_sorted(db->pkgcache, newpkg); _alpm_db_free_grpcache(db); @@ -577,8 +606,7 @@ int _alpm_db_add_pkgincache(pmdb_t *db, pmpkg_t *pkg) int _alpm_db_remove_pkgfromcache(pmdb_t *db, pmpkg_t *pkg) { - void *vdata; - pmpkg_t *data; + pmpkg_t *data = NULL; ALPM_LOG_FUNC; @@ -589,8 +617,7 @@ int _alpm_db_remove_pkgfromcache(pmdb_t *db, pmpkg_t *pkg) _alpm_log(PM_LOG_DEBUG, "removing entry '%s' from '%s' cache\n", alpm_pkg_get_name(pkg), db->treename); - db->pkgcache = alpm_list_remove(db->pkgcache, pkg, _alpm_pkg_cmp, &vdata); - data = vdata; + db->pkgcache = _alpm_pkghash_remove(db->pkgcache, pkg, data); if(data == NULL) { /* package not found */ _alpm_log(PM_LOG_DEBUG, "cannot remove entry '%s' from '%s' cache: not found\n", @@ -613,14 +640,14 @@ pmpkg_t *_alpm_db_get_pkgfromcache(pmdb_t *db, const char *target) return(NULL); } - alpm_list_t *pkgcache = _alpm_db_get_pkgcache(db); + pmpkghash_t *pkgcache = _alpm_db_get_pkgcache(db); if(!pkgcache) { _alpm_log(PM_LOG_DEBUG, "warning: failed to get '%s' from NULL pkgcache\n", target); return(NULL); } - return(_alpm_pkg_find(pkgcache, target)); + return(_alpm_pkghash_find(pkgcache, target)); } /* Returns a new group cache from db. @@ -638,7 +665,7 @@ int _alpm_db_load_grpcache(pmdb_t *db) _alpm_log(PM_LOG_DEBUG, "loading group cache for repository '%s'\n", db->treename); - for(lp = _alpm_db_get_pkgcache(db); lp; lp = lp->next) { + for(lp = _alpm_db_get_pkgcache_list(db); lp; lp = lp->next) { const alpm_list_t *i; pmpkg_t *pkg = lp->data; diff --git a/lib/libalpm/db.h b/lib/libalpm/db.h index b7fa7ca6..c5b3db69 100644 --- a/lib/libalpm/db.h +++ b/lib/libalpm/db.h @@ -23,6 +23,8 @@ #define _ALPM_DB_H #include "alpm.h" +#include "pkghash.h" + #include /* libarchive */ @@ -54,7 +56,7 @@ struct __pmdb_t { int grpcache_loaded; /* also indicates whether we are RO or RW */ int is_local; - alpm_list_t *pkgcache; + pmpkghash_t *pkgcache; alpm_list_t *grpcache; alpm_list_t *servers; @@ -84,7 +86,8 @@ int _alpm_db_load_pkgcache(pmdb_t *db); void _alpm_db_free_pkgcache(pmdb_t *db); int _alpm_db_add_pkgincache(pmdb_t *db, pmpkg_t *pkg); int _alpm_db_remove_pkgfromcache(pmdb_t *db, pmpkg_t *pkg); -alpm_list_t *_alpm_db_get_pkgcache(pmdb_t *db); +pmpkghash_t *_alpm_db_get_pkgcache(pmdb_t *db); +alpm_list_t *_alpm_db_get_pkgcache_list(pmdb_t *db); int _alpm_db_ensure_pkgcache(pmdb_t *db, pmdbinfrq_t infolevel); pmpkg_t *_alpm_db_get_pkgfromcache(pmdb_t *db, const char *target); /* groups */ diff --git a/lib/libalpm/deps.c b/lib/libalpm/deps.c index b667b0e8..dca8877e 100644 --- a/lib/libalpm/deps.c +++ b/lib/libalpm/deps.c @@ -475,7 +475,7 @@ static int can_remove_package(pmdb_t *db, pmpkg_t *pkg, alpm_list_t *targets, * if checkdeps detected it would break something */ /* see if other packages need it */ - for(i = _alpm_db_get_pkgcache(db); i; i = i->next) { + for(i = _alpm_db_get_pkgcache_list(db); i; i = i->next) { pmpkg_t *lpkg = i->data; if(_alpm_dep_edge(lpkg, pkg) && !_alpm_pkg_find(targets, lpkg->name)) { return(0); @@ -508,7 +508,7 @@ void _alpm_recursedeps(pmdb_t *db, alpm_list_t *targs, int include_explicit) for(i = targs; i; i = i->next) { pmpkg_t *pkg = i->data; - for(j = _alpm_db_get_pkgcache(db); j; j = j->next) { + for(j = _alpm_db_get_pkgcache_list(db); j; j = j->next) { pmpkg_t *deppkg = j->data; if(_alpm_dep_edge(pkg, deppkg) && can_remove_package(db, deppkg, targs, include_explicit)) { @@ -586,7 +586,7 @@ pmpkg_t *_alpm_resolvedep(pmdepend_t *dep, alpm_list_t *dbs, } /* 2. satisfiers (skip literals here) */ for(i = dbs; i; i = i->next) { - for(j = _alpm_db_get_pkgcache(i->data); j; j = j->next) { + for(j = _alpm_db_get_pkgcache_list(i->data); j; j = j->next) { pmpkg_t *pkg = j->data; if(_alpm_depcmp_tolerant(pkg, dep) && strcmp(pkg->name, dep->name) != 0 && !_alpm_pkg_find(excluding, pkg->name)) { @@ -614,7 +614,7 @@ pmpkg_t *_alpm_resolvedep(pmdepend_t *dep, alpm_list_t *dbs, /* first check if one provider is already installed locally */ for(i = providers; i; i = i->next) { pmpkg_t *pkg = i->data; - if (_alpm_pkg_find(_alpm_db_get_pkgcache(handle->db_local), pkg->name)) { + if (_alpm_pkghash_find(_alpm_db_get_pkgcache(handle->db_local), pkg->name)) { alpm_list_free(providers); return(pkg); } diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c index d4b3b9c0..0a1102c8 100644 --- a/lib/libalpm/package.c +++ b/lib/libalpm/package.c @@ -338,7 +338,7 @@ int SYMEXPORT alpm_pkg_has_scriptlet(pmpkg_t *pkg) static void find_requiredby(pmpkg_t *pkg, pmdb_t *db, alpm_list_t **reqs) { const alpm_list_t *i; - for(i = _alpm_db_get_pkgcache(db); i; i = i->next) { + for(i = _alpm_db_get_pkgcache_list(db); i; i = i->next) { if(!i->data) { continue; } diff --git a/lib/libalpm/remove.c b/lib/libalpm/remove.c index 5def92a6..823795be 100644 --- a/lib/libalpm/remove.c +++ b/lib/libalpm/remove.c @@ -95,7 +95,7 @@ static void remove_prepare_cascade(pmtrans_t *trans, pmdb_t *db, } alpm_list_free_inner(lp, (alpm_list_fn_free)_alpm_depmiss_free); alpm_list_free(lp); - lp = alpm_checkdeps(_alpm_db_get_pkgcache(db), 1, trans->remove, NULL); + lp = alpm_checkdeps(_alpm_db_get_pkgcache_list(db), 1, trans->remove, NULL); } } @@ -125,7 +125,7 @@ static void remove_prepare_keep_needed(pmtrans_t *trans, pmdb_t *db, } alpm_list_free_inner(lp, (alpm_list_fn_free)_alpm_depmiss_free); alpm_list_free(lp); - lp = alpm_checkdeps(_alpm_db_get_pkgcache(db), 1, trans->remove, NULL); + lp = alpm_checkdeps(_alpm_db_get_pkgcache_list(db), 1, trans->remove, NULL); } } @@ -147,7 +147,7 @@ int _alpm_remove_prepare(pmtrans_t *trans, pmdb_t *db, alpm_list_t **data) EVENT(trans, PM_TRANS_EVT_CHECKDEPS_START, NULL, NULL); _alpm_log(PM_LOG_DEBUG, "looking for unsatisfied dependencies\n"); - lp = alpm_checkdeps(_alpm_db_get_pkgcache(db), 1, trans->remove, NULL); + lp = alpm_checkdeps(_alpm_db_get_pkgcache_list(db), 1, trans->remove, NULL); if(lp != NULL) { if(trans->flags & PM_TRANS_FLAG_CASCADE) { diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c index 9f5bec3b..859b8c94 100644 --- a/lib/libalpm/sync.c +++ b/lib/libalpm/sync.c @@ -102,7 +102,7 @@ int SYMEXPORT alpm_sync_sysupgrade(int enable_downgrade) ASSERT(trans->state == STATE_INITIALIZED, RET_ERR(PM_ERR_TRANS_NOT_INITIALIZED, -1)); _alpm_log(PM_LOG_DEBUG, "checking for package upgrades\n"); - for(i = _alpm_db_get_pkgcache(db_local); i; i = i->next) { + for(i = _alpm_db_get_pkgcache_list(db_local); i; i = i->next) { pmpkg_t *lpkg = i->data; if(_alpm_pkg_find(trans->add, lpkg->name)) { @@ -149,7 +149,7 @@ int SYMEXPORT alpm_sync_sysupgrade(int enable_downgrade) break; /* jump to next local package */ } else { /* 2. search for replacers in sdb */ int found = 0; - for(k = _alpm_db_get_pkgcache(sdb); k; k = k->next) { + for(k = _alpm_db_get_pkgcache_list(sdb); k; k = k->next) { spkg = k->data; if(alpm_list_find_str(alpm_pkg_get_replaces(spkg), lpkg->name)) { found = 1; @@ -331,7 +331,7 @@ int _alpm_sync_prepare(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t *dbs_sync } /* Compute the fake local database for resolvedeps (partial fix for the phonon/qt issue) */ - alpm_list_t *localpkgs = alpm_list_diff(_alpm_db_get_pkgcache(db_local), trans->add, _alpm_pkg_cmp); + alpm_list_t *localpkgs = alpm_list_diff(_alpm_db_get_pkgcache_list(db_local), trans->add, _alpm_pkg_cmp); /* Resolve packages in the transaction one at a time, in addtion building up a list of packages which could not be resolved. */ @@ -518,7 +518,7 @@ int _alpm_sync_prepare(pmtrans_t *trans, pmdb_t *db_local, alpm_list_t *dbs_sync if(!(trans->flags & PM_TRANS_FLAG_NODEPS)) { _alpm_log(PM_LOG_DEBUG, "checking dependencies\n"); - deps = alpm_checkdeps(_alpm_db_get_pkgcache(db_local), 1, trans->remove, trans->add); + deps = alpm_checkdeps(_alpm_db_get_pkgcache_list(db_local), 1, trans->remove, trans->add); if(deps) { pm_errno = PM_ERR_UNSATISFIED_DEPS; ret = -1; -- cgit v1.2.3-24-g4f1b From ce5471511257ada996bfcb6ea4236cd89c0d6890 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 28 Jan 2011 12:03:36 -0600 Subject: Implement a quick and dirty rehash function This allows us to get through the rehash required by smoke001 and pass all pactests. It is by no means the best or most efficient implementation but it does do the job. Signed-off-by: Dan McGee --- lib/libalpm/pkghash.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'lib/libalpm') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 3ed6453f..f2497070 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -77,9 +77,9 @@ pmpkghash_t *_alpm_pkghash_create(size_t size) /* Expand the hash table size to the next increment and rebin the entries */ static pmpkghash_t *rehash(pmpkghash_t *oldhash) { - //pmpkghash_t *newhash; - - /* TODO - calculate size of newhash */ + pmpkghash_t *newhash; + alpm_list_t *ptr; + size_t newsize; /* Hash tables will need resized in two cases: * - adding packages to the local database * - poor estimation of the number of packages in sync database @@ -89,17 +89,22 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) * larger database sizes, this increase is reduced to avoid excess * memory allocation as both scenarios requiring a rehash should not * require a table size increase that large. */ + if(oldhash->buckets < 500) { + newsize = oldhash->buckets * 2; + } else if(oldhash->buckets < 3500) { + newsize = oldhash->buckets * 3 / 2; + } else { + newsize = oldhash->buckets * 4 / 3; + } - //newhash = _alpm_pkghash_create(oldhash->buckets); - - /* TODO - rebin entries */ - - //_alpm_pkghash_free(oldhash); + newhash = _alpm_pkghash_create(newsize); + for(ptr = oldhash->list; ptr != NULL; ptr = ptr->next) { + newhash = _alpm_pkghash_add(newhash, ptr->data); + } - //return newhash; + _alpm_pkghash_free(oldhash); - printf("rehash needed!!!\n"); - return(oldhash); + return(newhash); } pmpkghash_t *_alpm_pkghash_add(pmpkghash_t *hash, pmpkg_t *pkg) -- cgit v1.2.3-24-g4f1b From 021085624ea94a7116c60552e5c153de6cb5f057 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 28 Jan 2011 12:04:27 -0600 Subject: Change default sync hash table sizing to 66% full Since the sync database never changes size once we initialize it, we allow it to be filled a bit more. This reduces the overall memory footprint needed by the hash table. Signed-off-by: Dan McGee --- lib/libalpm/be_sync.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/libalpm') diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 2c70e2c5..4ad045c2 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -232,8 +232,8 @@ static int sync_db_populate(pmdb_t *db) } est_count = estimate_package_count(&buf, archive); - /* initialize hash at 50% full */ - db->pkgcache = _alpm_pkghash_create(est_count * 2); + /* initialize hash at 66% full */ + db->pkgcache = _alpm_pkghash_create(est_count * 3 / 2); while(archive_read_next_header(archive, &entry) == ARCHIVE_OK) { const struct stat *st; -- cgit v1.2.3-24-g4f1b From d843c86b7b7cbf376716817e7c2c55b1f9360a72 Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Sat, 29 Jan 2011 11:39:25 +1000 Subject: Error handling for maximum database size Check that the requested size of a pkghash is not beyond the maximum prime. Also check for successful creation of a new hash before rehashing. Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib/libalpm') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index f2497070..0324465f 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -59,6 +59,7 @@ pmpkghash_t *_alpm_pkghash_create(size_t size) hash->list = NULL; hash->entries = 0; + hash->buckets = 0; loopsize = sizeof(prime_list) / sizeof(*prime_list); for(i = 0; i < loopsize; i++) { @@ -68,6 +69,12 @@ pmpkghash_t *_alpm_pkghash_create(size_t size) } } + if(hash->buckets < size) { + _alpm_log(PM_LOG_ERROR, _("database larger than maximum size")); + free(hash); + return(NULL); + } + CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t*), \ free(hash); RET_ERR(PM_ERR_MEMORY, NULL)); @@ -98,6 +105,11 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) } newhash = _alpm_pkghash_create(newsize); + if(newhash == NULL) { + /* creation of newhash failed, stick with old one... */ + return(oldhash); + } + for(ptr = oldhash->list; ptr != NULL; ptr = ptr->next) { newhash = _alpm_pkghash_add(newhash, ptr->data); } -- cgit v1.2.3-24-g4f1b From c9820ec97b417d33dc37eb589f069766f1068ea1 Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Sat, 29 Jan 2011 11:47:31 +1000 Subject: Slightly more efficient rehash size selection While probably still not optimal in terms of everyday usage in pacman, this reduces the absolute size increase to "more reasonable" levels. For databases greater than 5000 in size, the minimum size increase is used which is still on the order of a 10% increase. Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib/libalpm') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 0324465f..8f7168d2 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -98,10 +98,12 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) * require a table size increase that large. */ if(oldhash->buckets < 500) { newsize = oldhash->buckets * 2; - } else if(oldhash->buckets < 3500) { + } else if(oldhash->buckets < 2000) { newsize = oldhash->buckets * 3 / 2; - } else { + } else if(oldhash->buckets < 5000) { newsize = oldhash->buckets * 4 / 3; + } else { + newsize = oldhash->buckets + 1; } newhash = _alpm_pkghash_create(newsize); -- cgit v1.2.3-24-g4f1b From 93207863493b54e794098b4ee5987d59c8c4b14b Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Sat, 29 Jan 2011 22:50:55 +1000 Subject: Rehash efficiently Rehash without recreating the hash table list or reallocating the memory for holding the list nodes. Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'lib/libalpm') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 8f7168d2..44a03ab5 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -86,7 +86,8 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) { pmpkghash_t *newhash; alpm_list_t *ptr; - size_t newsize; + size_t newsize, position, i; + /* Hash tables will need resized in two cases: * - adding packages to the local database * - poor estimation of the number of packages in sync database @@ -112,10 +113,27 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) return(oldhash); } - for(ptr = oldhash->list; ptr != NULL; ptr = ptr->next) { - newhash = _alpm_pkghash_add(newhash, ptr->data); + newhash->list = oldhash->list; + oldhash->list = NULL; + + for(i = 0; i < oldhash->buckets; i++) { + if(oldhash->hash_table[i] != NULL) { + pmpkg_t *package = oldhash->hash_table[i]->data; + + position = package->name_hash % newhash->buckets; + + /* collision resolution using open addressing with linear probing */ + while((ptr = newhash->hash_table[position]) != NULL) { + position = (position + 1) % newhash->buckets; + } + + newhash->hash_table[position] = oldhash->hash_table[i]; + oldhash->hash_table[i] = NULL; + } } + newhash->entries = oldhash->entries; + _alpm_pkghash_free(oldhash); return(newhash); -- cgit v1.2.3-24-g4f1b From 11e5e86151f83219b1cedbca31be7530911e25eb Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Sat, 29 Jan 2011 23:24:13 +1000 Subject: Refactor finding position for new hash entry Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'lib/libalpm') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 44a03ab5..bdff78ea 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -81,11 +81,25 @@ pmpkghash_t *_alpm_pkghash_create(size_t size) return(hash); } +static size_t get_hash_position(unsigned long name_hash, pmpkghash_t *hash) +{ + size_t position; + alpm_list_t *ptr; + + position = name_hash % hash->buckets; + + /* collision resolution using open addressing with linear probing */ + while((ptr = hash->hash_table[position]) != NULL) { + position = (position + 1) % hash->buckets; + } + + return(position); +} + /* Expand the hash table size to the next increment and rebin the entries */ static pmpkghash_t *rehash(pmpkghash_t *oldhash) { pmpkghash_t *newhash; - alpm_list_t *ptr; size_t newsize, position, i; /* Hash tables will need resized in two cases: @@ -120,12 +134,7 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) if(oldhash->hash_table[i] != NULL) { pmpkg_t *package = oldhash->hash_table[i]->data; - position = package->name_hash % newhash->buckets; - - /* collision resolution using open addressing with linear probing */ - while((ptr = newhash->hash_table[position]) != NULL) { - position = (position + 1) % newhash->buckets; - } + position = get_hash_position(package->name_hash, newhash); newhash->hash_table[position] = oldhash->hash_table[i]; oldhash->hash_table[i] = NULL; @@ -152,12 +161,7 @@ pmpkghash_t *_alpm_pkghash_add(pmpkghash_t *hash, pmpkg_t *pkg) hash = rehash(hash); } - position = pkg->name_hash % hash->buckets; - - /* collision resolution using open addressing with linear probing */ - while((ptr = hash->hash_table[position]) != NULL) { - position = (position + 1) % hash->buckets; - } + position = get_hash_position(pkg->name_hash, hash); ptr = calloc(1, sizeof(alpm_list_t)); if(ptr == NULL) { @@ -188,12 +192,7 @@ pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg) hash = rehash(hash); } - position = pkg->name_hash % hash->buckets; - - /* collision resolution using open addressing with linear probing */ - while((ptr = hash->hash_table[position]) != NULL) { - position = (position + 1) % hash->buckets; - } + position = get_hash_position(pkg->name_hash, hash); ptr = calloc(1, sizeof(alpm_list_t)); if(ptr == NULL) { -- cgit v1.2.3-24-g4f1b From 01c3c7e4f28d837f0b8a6aaaf27d16894d4b762d Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Sun, 30 Jan 2011 22:42:45 +1000 Subject: Actually remove packages from pkghash on removal Fully removes a package from the hash. Also unify prototype with removal from an alpm_list_t, fixing issues when removing a package from the pkgcache. Signed-off-by: Allan McRae --- lib/libalpm/db.c | 2 +- lib/libalpm/pkghash.c | 37 +++++++++++++++++++++++++++++-------- lib/libalpm/pkghash.h | 2 +- 3 files changed, 31 insertions(+), 10 deletions(-) (limited to 'lib/libalpm') diff --git a/lib/libalpm/db.c b/lib/libalpm/db.c index c0c2f0ca..02f82823 100644 --- a/lib/libalpm/db.c +++ b/lib/libalpm/db.c @@ -617,7 +617,7 @@ int _alpm_db_remove_pkgfromcache(pmdb_t *db, pmpkg_t *pkg) _alpm_log(PM_LOG_DEBUG, "removing entry '%s' from '%s' cache\n", alpm_pkg_get_name(pkg), db->treename); - db->pkgcache = _alpm_pkghash_remove(db->pkgcache, pkg, data); + db->pkgcache = _alpm_pkghash_remove(db->pkgcache, pkg, &data); if(data == NULL) { /* package not found */ _alpm_log(PM_LOG_DEBUG, "cannot remove entry '%s' from '%s' cache: not found\n", diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index bdff78ea..14056e37 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -219,11 +219,15 @@ pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg) * * @return the resultant hash */ -pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t *data) +pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t **data) { alpm_list_t *i; size_t position; + if(data) { + *data = NULL; + } + if(pkg == NULL || hash == NULL) { return(hash); } @@ -235,7 +239,6 @@ pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t *data if(info->name_hash == pkg->name_hash && strcmp(info->name, pkg->name) == 0) { -#if 0 /* Actually removing items is not a good idea with linear probing... */ /* remove from list */ /* TODO - refactor with alpm_list_remove */ if(i == hash->list) { @@ -266,16 +269,34 @@ pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t *data } /* remove from hash */ - data = info; - info = NULL; + if(data) { + *data = info; + } + hash->hash_table[position] = NULL; free(i); - i = NULL; hash->entries -= 1; -#endif - /* fake removal - pkgname can not be blank so 0 hash is impossible */ - info->name_hash = 0; + /* potentially move entries following removed entry to keep + * open addressing collision resolution working */ + size_t next_null = (position + 1) % hash->buckets; + while(hash->hash_table[next_null] != NULL) { + next_null = (next_null + 1) % hash->buckets; + } + + position = (position + 1) % hash->buckets; + + while((i = hash->hash_table[position]) != NULL) { + info = i->data; + size_t new_position = get_hash_position(info->name_hash, hash); + + if(new_position != next_null) { + hash->hash_table[new_position] = i; + hash->hash_table[position] = NULL; + } + + position = (position + 1) % hash->buckets; + } return(hash); } diff --git a/lib/libalpm/pkghash.h b/lib/libalpm/pkghash.h index 7f90cb17..a6c1db71 100644 --- a/lib/libalpm/pkghash.h +++ b/lib/libalpm/pkghash.h @@ -47,7 +47,7 @@ pmpkghash_t *_alpm_pkghash_create(size_t size); pmpkghash_t *_alpm_pkghash_add(pmpkghash_t *hash, pmpkg_t *pkg); pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg); -pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t *data); +pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t **data); void _alpm_pkghash_free(pmpkghash_t *hash); -- cgit v1.2.3-24-g4f1b From 14fd1e63a2ce420c555c6de34a815a8b62a9859b Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 2 Feb 2011 20:46:28 -0600 Subject: Add new alpm_list_remove_item() function This takes in the list and a list item, and does the pointer dance necessary to remove it from the list regardless of whether it is first, last, or somewhere in the middle. It is useful for callers that already know what item needs to be removed and have a pointer to it rather than doing a search by data that the plain alpm_list_remove() does. Refactor alpm_list_remove() to use this function as well. Signed-off-by: Dan McGee --- lib/libalpm/alpm_list.c | 86 ++++++++++++++++++++++++++++++------------------- lib/libalpm/alpm_list.h | 1 + 2 files changed, 54 insertions(+), 33 deletions(-) (limited to 'lib/libalpm') diff --git a/lib/libalpm/alpm_list.c b/lib/libalpm/alpm_list.c index 3f9525e8..4cab665f 100644 --- a/lib/libalpm/alpm_list.c +++ b/lib/libalpm/alpm_list.c @@ -285,6 +285,53 @@ alpm_list_t SYMEXPORT *alpm_list_msort(alpm_list_t *list, size_t n, alpm_list_fn return(list); } +/** + * @brief Remove an item from the list. + * item is not freed; this is the respnsiblity of the caller. + * + * @param haystack the list to remove the item from + * @param item the item to remove from the list + * + * @return the resultant list + */ +alpm_list_t SYMEXPORT *alpm_list_remove_item(alpm_list_t *haystack, + alpm_list_t *item) +{ + if(haystack == NULL || item == NULL) { + return(haystack); + } + + if(item == haystack) { + /* Special case: removing the head node which has a back reference to + * the tail node */ + haystack = item->next; + if(haystack) { + haystack->prev = item->prev; + } + item->prev = NULL; + } else if(item == haystack->prev) { + /* Special case: removing the tail node, so we need to fix the back + * reference on the head node. We also know tail != head. */ + if(item->prev) { + /* i->next should always be null */ + item->prev->next = item->next; + haystack->prev = item->prev; + item->prev = NULL; + } + } else { + /* Normal case, non-head and non-tail node */ + if(item->next) { + item->next->prev = item->prev; + } + if(item->prev) { + item->prev->next = item->next; + } + } + + return(haystack); +} + + /** * @brief Remove an item from the list. * @@ -295,9 +342,10 @@ alpm_list_t SYMEXPORT *alpm_list_msort(alpm_list_t *list, size_t n, alpm_list_fn * * @return the resultant list */ -alpm_list_t SYMEXPORT *alpm_list_remove(alpm_list_t *haystack, const void *needle, alpm_list_fn_cmp fn, void **data) +alpm_list_t SYMEXPORT *alpm_list_remove(alpm_list_t *haystack, + const void *needle, alpm_list_fn_cmp fn, void **data) { - alpm_list_t *i = haystack, *tmp = NULL; + alpm_list_t *i = haystack; if(data) { *data = NULL; @@ -312,44 +360,16 @@ alpm_list_t SYMEXPORT *alpm_list_remove(alpm_list_t *haystack, const void *needl i = i->next; continue; } - tmp = i->next; if(fn(i->data, needle) == 0) { - /* we found a matching item */ - if(i == haystack) { - /* Special case: removing the head node which has a back reference to - * the tail node */ - haystack = i->next; - if(haystack) { - haystack->prev = i->prev; - } - i->prev = NULL; - } else if(i == haystack->prev) { - /* Special case: removing the tail node, so we need to fix the back - * reference on the head node. We also know tail != head. */ - if(i->prev) { - /* i->next should always be null */ - i->prev->next = i->next; - haystack->prev = i->prev; - i->prev = NULL; - } - } else { - /* Normal case, non-head and non-tail node */ - if(i->next) { - i->next->prev = i->prev; - } - if(i->prev) { - i->prev->next = i->next; - } - } + haystack = alpm_list_remove_item(haystack, i); if(data) { *data = i->data; } - i->data = NULL; free(i); - i = NULL; + break; } else { - i = tmp; + i = i->next; } } diff --git a/lib/libalpm/alpm_list.h b/lib/libalpm/alpm_list.h index ee85a5dd..1f6393a6 100644 --- a/lib/libalpm/alpm_list.h +++ b/lib/libalpm/alpm_list.h @@ -57,6 +57,7 @@ alpm_list_t *alpm_list_add_sorted(alpm_list_t *list, void *data, alpm_list_fn_cm alpm_list_t *alpm_list_join(alpm_list_t *first, alpm_list_t *second); alpm_list_t *alpm_list_mmerge(alpm_list_t *left, alpm_list_t *right, alpm_list_fn_cmp fn); alpm_list_t *alpm_list_msort(alpm_list_t *list, size_t n, alpm_list_fn_cmp fn); +alpm_list_t *alpm_list_remove_item(alpm_list_t *haystack, alpm_list_t *item); alpm_list_t *alpm_list_remove(alpm_list_t *haystack, const void *needle, alpm_list_fn_cmp fn, void **data); alpm_list_t *alpm_list_remove_str(alpm_list_t *haystack, const char *needle, char **data); alpm_list_t *alpm_list_remove_dupes(const alpm_list_t *list); -- cgit v1.2.3-24-g4f1b From 1f145bcd1a4fb22d4cb548a3be64898f60820695 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 2 Feb 2011 20:46:29 -0600 Subject: Use alpm_list_remove_item in pkghash_remove Removes the code that was duplicated and has now been refactored into a separate method. Signed-off-by: Dan McGee --- lib/libalpm/pkghash.c | 35 ++++------------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) (limited to 'lib/libalpm') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 14056e37..3d420e1f 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -219,7 +219,8 @@ pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg) * * @return the resultant hash */ -pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t **data) +pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, + pmpkg_t **data) { alpm_list_t *i; size_t position; @@ -239,36 +240,8 @@ pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t **dat if(info->name_hash == pkg->name_hash && strcmp(info->name, pkg->name) == 0) { - /* remove from list */ - /* TODO - refactor with alpm_list_remove */ - if(i == hash->list) { - /* Special case: removing the head node which has a back reference to - * the tail node */ - hash->list = i->next; - if(hash->list) { - hash->list->prev = i->prev; - } - i->prev = NULL; - } else if(i == hash->list->prev) { - /* Special case: removing the tail node, so we need to fix the back - * reference on the head node. We also know tail != head. */ - if(i->prev) { - /* i->next should always be null */ - i->prev->next = i->next; - hash->list->prev = i->prev; - i->prev = NULL; - } - } else { - /* Normal case, non-head and non-tail node */ - if(i->next) { - i->next->prev = i->prev; - } - if(i->prev) { - i->prev->next = i->next; - } - } - - /* remove from hash */ + /* remove from list and hash */ + hash->list = alpm_list_remove_item(hash->list, i); if(data) { *data = info; } -- cgit v1.2.3-24-g4f1b From 6b0d4674bb132b2583920211cc798f3db77ec392 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 3 Feb 2011 10:55:26 -0600 Subject: Improve pkghash_remove algorithm Rather than potentially move every item to the next NULL, attempt to move at most one item at a time by iterating backwards from the NULL location in the hash array. If we move an item, we repeat the process on the now shorter "chain" until no more items need moving. Signed-off-by: Dan McGee Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 62 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 19 deletions(-) (limited to 'lib/libalpm') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 3d420e1f..54805275 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -210,6 +210,34 @@ pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg) return(hash); } +static size_t move_one_entry(pmpkghash_t *hash, size_t start, size_t end) +{ + /* Iterate backwards from 'end' to 'start', seeing if any of the items + * would hash to 'start'. If we find one, we move it there and break. If + * we get all the way back to position and find none that hash to it, we + * also end iteration. Iterating backwards helps prevent needless shuffles; + * we will never need to move more than one item per function call. The + * return value is our current iteration location; if this is equal to + * 'start' we can stop this madness. */ + while(end != start) { + alpm_list_t *i = hash->hash_table[end]; + pmpkg_t *info = i->data; + size_t new_position = get_hash_position(info->name_hash, hash); + + if(new_position == start) { + hash->hash_table[start] = i; + hash->hash_table[end] = NULL; + break; + } + + /* the odd math ensures we are always positive, e.g. + * e.g. (0 - 1) % 47 == -1 + * e.g. (47 + 0 - 1) % 47 == 46 */ + end = (hash->buckets + end - 1) % hash->buckets; + } + return(end); +} + /** * @brief Remove a package from a pkghash. * @@ -239,6 +267,7 @@ pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, if(info->name_hash == pkg->name_hash && strcmp(info->name, pkg->name) == 0) { + size_t stop, prev; /* remove from list and hash */ hash->list = alpm_list_remove_item(hash->list, i); @@ -247,28 +276,23 @@ pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, } hash->hash_table[position] = NULL; free(i); - hash->entries -= 1; - /* potentially move entries following removed entry to keep - * open addressing collision resolution working */ - size_t next_null = (position + 1) % hash->buckets; - while(hash->hash_table[next_null] != NULL) { - next_null = (next_null + 1) % hash->buckets; + /* Potentially move entries following removed entry to keep open + * addressing collision resolution working. We start by finding the + * next null bucket to know how far we have to look. */ + stop = (position + 1) % hash->buckets; + while(hash->hash_table[stop] != NULL && stop != position) { + stop = (stop + 1) % hash->buckets; } - - position = (position + 1) % hash->buckets; - - while((i = hash->hash_table[position]) != NULL) { - info = i->data; - size_t new_position = get_hash_position(info->name_hash, hash); - - if(new_position != next_null) { - hash->hash_table[new_position] = i; - hash->hash_table[position] = NULL; - } - - position = (position + 1) % hash->buckets; + stop = (hash->buckets + stop - 1) % hash->buckets; + + /* We now search backwards from stop to position. If we find an + * item that now hashes to position, we will move it, and then try + * to plug the new hole we just opened up, until we finally don't + * move anything. */ + while((prev = move_one_entry(hash, position, stop)) != position) { + position = prev; } return(hash); -- cgit v1.2.3-24-g4f1b