diff options
author | Dan McGee <dan@archlinux.org> | 2011-02-04 16:10:25 +0100 |
---|---|---|
committer | Dan McGee <dan@archlinux.org> | 2011-02-04 16:10:25 +0100 |
commit | e34fc4eddf73f2453b42235f5ae7d65f75db66fc (patch) | |
tree | 93b3a31a7db207f70b0a024f4de83ff8e0e3158f /lib/libalpm/pkghash.c | |
parent | c12ccbfb2c7aa907ba01339a1a29089c65ea9911 (diff) | |
parent | 6b0d4674bb132b2583920211cc798f3db77ec392 (diff) | |
download | pacman-e34fc4eddf73f2453b42235f5ae7d65f75db66fc.tar.gz pacman-e34fc4eddf73f2453b42235f5ae7d65f75db66fc.tar.xz |
Merge remote-tracking branch 'allan/hash'
Diffstat (limited to 'lib/libalpm/pkghash.c')
-rw-r--r-- | lib/libalpm/pkghash.c | 346 |
1 files changed, 346 insertions, 0 deletions
diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c new file mode 100644 index 00000000..54805275 --- /dev/null +++ b/lib/libalpm/pkghash.c @@ -0,0 +1,346 @@ +/* + * pkghash.c + * + * Copyright (c) 2011 Pacman Development Team <pacman-dev@archlinux.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "pkghash.h" +#include "util.h" +#include "log.h" + +/* List of primes for possible sizes of hash tables. + * + * The maximum table size is the last prime under 1,000,000. That is + * more than an order of magnitude greater than the number of packages + * in any Linux distribution. + */ +static const size_t prime_list[] = +{ + 11ul, 13ul, 17ul, 19ul, 23ul, 29ul, 31ul, 37ul, 41ul, 43ul, 47ul, + 53ul, 59ul, 61ul, 67ul, 71ul, 73ul, 79ul, 83ul, 89ul, 97ul, 103ul, + 109ul, 113ul, 127ul, 137ul, 139ul, 149ul, 157ul, 167ul, 179ul, 193ul, + 199ul, 211ul, 227ul, 241ul, 257ul, 277ul, 293ul, 313ul, 337ul, 359ul, + 383ul, 409ul, 439ul, 467ul, 503ul, 541ul, 577ul, 619ul, 661ul, 709ul, + 761ul, 823ul, 887ul, 953ul, 1031ul, 1109ul, 1193ul, 1289ul, 1381ul, + 1493ul, 1613ul, 1741ul, 1879ul, 2029ul, 2179ul, 2357ul, 2549ul, + 2753ul, 2971ul, 3209ul, 3469ul, 3739ul, 4027ul, 4349ul, 4703ul, + 5087ul, 5503ul, 5953ul, 6427ul, 6949ul, 7517ul, 8123ul, 8783ul, + 9497ul, 10273ul, 11113ul, 12011ul, 12983ul, 14033ul, 15173ul, + 16411ul, 17749ul, 19183ul, 20753ul, 22447ul, 24281ul, 26267ul, + 28411ul, 30727ul, 33223ul, 35933ul, 38873ul, 42043ul, 45481ul, + 49201ul, 53201ul, 57557ul, 62233ul, 67307ul, 72817ul, 78779ul, + 85229ul, 92203ul, 99733ul, 107897ul, 116731ul, 126271ul, 136607ul, + 147793ul, 159871ul, 172933ul, 187091ul, 202409ul, 218971ul, 236897ul, + 256279ul, 277261ul, 299951ul, 324503ul, 351061ul, 379787ul, 410857ul, + 444487ul, 480881ul, 520241ul, 562841ul, 608903ul, 658753ul, 712697ul, + 771049ul, 834181ul, 902483ul, 976369ul +}; + +/* Allocate a hash table with at least "size" buckets */ +pmpkghash_t *_alpm_pkghash_create(size_t size) +{ + pmpkghash_t *hash = NULL; + size_t i, loopsize; + + MALLOC(hash, sizeof(pmpkghash_t), RET_ERR(PM_ERR_MEMORY, NULL)); + + hash->list = NULL; + hash->entries = 0; + hash->buckets = 0; + + loopsize = sizeof(prime_list) / sizeof(*prime_list); + for(i = 0; i < loopsize; i++) { + if(prime_list[i] > size) { + hash->buckets = prime_list[i]; + break; + } + } + + if(hash->buckets < size) { + _alpm_log(PM_LOG_ERROR, _("database larger than maximum size")); + free(hash); + return(NULL); + } + + CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t*), \ + free(hash); RET_ERR(PM_ERR_MEMORY, NULL)); + + return(hash); +} + +static size_t get_hash_position(unsigned long name_hash, pmpkghash_t *hash) +{ + size_t position; + alpm_list_t *ptr; + + position = name_hash % hash->buckets; + + /* collision resolution using open addressing with linear probing */ + while((ptr = hash->hash_table[position]) != NULL) { + position = (position + 1) % hash->buckets; + } + + return(position); +} + +/* Expand the hash table size to the next increment and rebin the entries */ +static pmpkghash_t *rehash(pmpkghash_t *oldhash) +{ + pmpkghash_t *newhash; + size_t newsize, position, i; + + /* Hash tables will need resized in two cases: + * - adding packages to the local database + * - poor estimation of the number of packages in sync database + * + * For small hash tables sizes (<500) the increase in size is by a + * minimum of a factor of 2 for optimal rehash efficiency. For + * larger database sizes, this increase is reduced to avoid excess + * memory allocation as both scenarios requiring a rehash should not + * require a table size increase that large. */ + if(oldhash->buckets < 500) { + newsize = oldhash->buckets * 2; + } else if(oldhash->buckets < 2000) { + newsize = oldhash->buckets * 3 / 2; + } else if(oldhash->buckets < 5000) { + newsize = oldhash->buckets * 4 / 3; + } else { + newsize = oldhash->buckets + 1; + } + + newhash = _alpm_pkghash_create(newsize); + if(newhash == NULL) { + /* creation of newhash failed, stick with old one... */ + return(oldhash); + } + + newhash->list = oldhash->list; + oldhash->list = NULL; + + for(i = 0; i < oldhash->buckets; i++) { + if(oldhash->hash_table[i] != NULL) { + pmpkg_t *package = oldhash->hash_table[i]->data; + + position = get_hash_position(package->name_hash, newhash); + + newhash->hash_table[position] = oldhash->hash_table[i]; + oldhash->hash_table[i] = NULL; + } + } + + newhash->entries = oldhash->entries; + + _alpm_pkghash_free(oldhash); + + return(newhash); +} + +pmpkghash_t *_alpm_pkghash_add(pmpkghash_t *hash, pmpkg_t *pkg) +{ + alpm_list_t *ptr; + size_t position; + + if(pkg == NULL || hash == NULL) { + return(hash); + } + + if((hash->entries + 1) / MAX_HASH_LOAD > hash->buckets) { + hash = rehash(hash); + } + + position = get_hash_position(pkg->name_hash, hash); + + ptr = calloc(1, sizeof(alpm_list_t)); + if(ptr == NULL) { + return(hash); + } + + ptr->data = pkg; + ptr->next = NULL; + ptr->prev = ptr; + + hash->hash_table[position] = ptr; + hash->list = alpm_list_join(hash->list, ptr); + hash->entries += 1; + + return(hash); +} + +pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg) +{ + if(!hash) { + return(_alpm_pkghash_add(hash, pkg)); + } + + alpm_list_t *ptr; + size_t position; + + if((hash->entries + 1) / MAX_HASH_LOAD > hash->buckets) { + hash = rehash(hash); + } + + position = get_hash_position(pkg->name_hash, hash); + + ptr = calloc(1, sizeof(alpm_list_t)); + if(ptr == NULL) { + return(hash); + } + + ptr->data = pkg; + ptr->next = NULL; + ptr->prev = ptr; + + hash->hash_table[position] = ptr; + hash->list = alpm_list_mmerge(hash->list, ptr, _alpm_pkg_cmp); + hash->entries += 1; + + return(hash); +} + +static size_t move_one_entry(pmpkghash_t *hash, size_t start, size_t end) +{ + /* Iterate backwards from 'end' to 'start', seeing if any of the items + * would hash to 'start'. If we find one, we move it there and break. If + * we get all the way back to position and find none that hash to it, we + * also end iteration. Iterating backwards helps prevent needless shuffles; + * we will never need to move more than one item per function call. The + * return value is our current iteration location; if this is equal to + * 'start' we can stop this madness. */ + while(end != start) { + alpm_list_t *i = hash->hash_table[end]; + pmpkg_t *info = i->data; + size_t new_position = get_hash_position(info->name_hash, hash); + + if(new_position == start) { + hash->hash_table[start] = i; + hash->hash_table[end] = NULL; + break; + } + + /* the odd math ensures we are always positive, e.g. + * e.g. (0 - 1) % 47 == -1 + * e.g. (47 + 0 - 1) % 47 == 46 */ + end = (hash->buckets + end - 1) % hash->buckets; + } + return(end); +} + +/** + * @brief Remove a package from a pkghash. + * + * @param hash the hash to remove the package from + * @param pkg the package we are removing + * @param data output parameter containing the removed item + * + * @return the resultant hash + */ +pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, + pmpkg_t **data) +{ + alpm_list_t *i; + size_t position; + + if(data) { + *data = NULL; + } + + if(pkg == NULL || hash == NULL) { + return(hash); + } + + position = pkg->name_hash % hash->buckets; + while((i = hash->hash_table[position]) != NULL) { + pmpkg_t *info = i->data; + + if(info->name_hash == pkg->name_hash && + strcmp(info->name, pkg->name) == 0) { + size_t stop, prev; + + /* remove from list and hash */ + hash->list = alpm_list_remove_item(hash->list, i); + if(data) { + *data = info; + } + hash->hash_table[position] = NULL; + free(i); + hash->entries -= 1; + + /* Potentially move entries following removed entry to keep open + * addressing collision resolution working. We start by finding the + * next null bucket to know how far we have to look. */ + stop = (position + 1) % hash->buckets; + while(hash->hash_table[stop] != NULL && stop != position) { + stop = (stop + 1) % hash->buckets; + } + stop = (hash->buckets + stop - 1) % hash->buckets; + + /* We now search backwards from stop to position. If we find an + * item that now hashes to position, we will move it, and then try + * to plug the new hole we just opened up, until we finally don't + * move anything. */ + while((prev = move_one_entry(hash, position, stop)) != position) { + position = prev; + } + + return(hash); + } + + position = (position + 1) % hash->buckets; + } + + return(hash); +} + +void _alpm_pkghash_free(pmpkghash_t *hash) +{ + size_t i; + if(hash != NULL) { + for(i = 0; i < hash->buckets; i++) { + free(hash->hash_table[i]); + } + free(hash->hash_table); + } + free(hash); +} + +pmpkg_t *_alpm_pkghash_find(pmpkghash_t *hash, const char *name) +{ + alpm_list_t *lp; + unsigned long name_hash; + size_t position; + + ALPM_LOG_FUNC; + + if(name == NULL || hash == NULL) { + return(NULL); + } + + name_hash = _alpm_hash_sdbm(name); + + position = name_hash % hash->buckets; + + while((lp = hash->hash_table[position]) != NULL) { + pmpkg_t *info = lp->data; + + if(info->name_hash == name_hash && strcmp(info->name, name) == 0) { + return(info); + } + + position = (position + 1) % hash->buckets; + } + + return(NULL); +} |