From e17b0446bd815f7b25f5bf0b838ef3d02d4eb64e Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Fri, 7 Jan 2011 00:32:17 +1000 Subject: Add a hash table for holding packages Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 292 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 lib/libalpm/pkghash.c (limited to 'lib/libalpm/pkghash.c') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c new file mode 100644 index 00000000..3ed6453f --- /dev/null +++ b/lib/libalpm/pkghash.c @@ -0,0 +1,292 @@ +/* + * pkghash.c + * + * Copyright (c) 2011 Pacman Development Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "pkghash.h" +#include "util.h" +#include "log.h" + +/* List of primes for possible sizes of hash tables. + * + * The maximum table size is the last prime under 1,000,000. That is + * more than an order of magnitude greater than the number of packages + * in any Linux distribution. + */ +static const size_t prime_list[] = +{ + 11ul, 13ul, 17ul, 19ul, 23ul, 29ul, 31ul, 37ul, 41ul, 43ul, 47ul, + 53ul, 59ul, 61ul, 67ul, 71ul, 73ul, 79ul, 83ul, 89ul, 97ul, 103ul, + 109ul, 113ul, 127ul, 137ul, 139ul, 149ul, 157ul, 167ul, 179ul, 193ul, + 199ul, 211ul, 227ul, 241ul, 257ul, 277ul, 293ul, 313ul, 337ul, 359ul, + 383ul, 409ul, 439ul, 467ul, 503ul, 541ul, 577ul, 619ul, 661ul, 709ul, + 761ul, 823ul, 887ul, 953ul, 1031ul, 1109ul, 1193ul, 1289ul, 1381ul, + 1493ul, 1613ul, 1741ul, 1879ul, 2029ul, 2179ul, 2357ul, 2549ul, + 2753ul, 2971ul, 3209ul, 3469ul, 3739ul, 4027ul, 4349ul, 4703ul, + 5087ul, 5503ul, 5953ul, 6427ul, 6949ul, 7517ul, 8123ul, 8783ul, + 9497ul, 10273ul, 11113ul, 12011ul, 12983ul, 14033ul, 15173ul, + 16411ul, 17749ul, 19183ul, 20753ul, 22447ul, 24281ul, 26267ul, + 28411ul, 30727ul, 33223ul, 35933ul, 38873ul, 42043ul, 45481ul, + 49201ul, 53201ul, 57557ul, 62233ul, 67307ul, 72817ul, 78779ul, + 85229ul, 92203ul, 99733ul, 107897ul, 116731ul, 126271ul, 136607ul, + 147793ul, 159871ul, 172933ul, 187091ul, 202409ul, 218971ul, 236897ul, + 256279ul, 277261ul, 299951ul, 324503ul, 351061ul, 379787ul, 410857ul, + 444487ul, 480881ul, 520241ul, 562841ul, 608903ul, 658753ul, 712697ul, + 771049ul, 834181ul, 902483ul, 976369ul +}; + +/* Allocate a hash table with at least "size" buckets */ +pmpkghash_t *_alpm_pkghash_create(size_t size) +{ + pmpkghash_t *hash = NULL; + size_t i, loopsize; + + MALLOC(hash, sizeof(pmpkghash_t), RET_ERR(PM_ERR_MEMORY, NULL)); + + hash->list = NULL; + hash->entries = 0; + + loopsize = sizeof(prime_list) / sizeof(*prime_list); + for(i = 0; i < loopsize; i++) { + if(prime_list[i] > size) { + hash->buckets = prime_list[i]; + break; + } + } + + CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t*), \ + free(hash); RET_ERR(PM_ERR_MEMORY, NULL)); + + return(hash); +} + +/* Expand the hash table size to the next increment and rebin the entries */ +static pmpkghash_t *rehash(pmpkghash_t *oldhash) +{ + //pmpkghash_t *newhash; + + /* TODO - calculate size of newhash */ + /* Hash tables will need resized in two cases: + * - adding packages to the local database + * - poor estimation of the number of packages in sync database + * + * For small hash tables sizes (<500) the increase in size is by a + * minimum of a factor of 2 for optimal rehash efficiency. For + * larger database sizes, this increase is reduced to avoid excess + * memory allocation as both scenarios requiring a rehash should not + * require a table size increase that large. */ + + //newhash = _alpm_pkghash_create(oldhash->buckets); + + /* TODO - rebin entries */ + + //_alpm_pkghash_free(oldhash); + + //return newhash; + + printf("rehash needed!!!\n"); + return(oldhash); +} + +pmpkghash_t *_alpm_pkghash_add(pmpkghash_t *hash, pmpkg_t *pkg) +{ + alpm_list_t *ptr; + size_t position; + + if(pkg == NULL || hash == NULL) { + return(hash); + } + + if((hash->entries + 1) / MAX_HASH_LOAD > hash->buckets) { + hash = rehash(hash); + } + + position = pkg->name_hash % hash->buckets; + + /* collision resolution using open addressing with linear probing */ + while((ptr = hash->hash_table[position]) != NULL) { + position = (position + 1) % hash->buckets; + } + + ptr = calloc(1, sizeof(alpm_list_t)); + if(ptr == NULL) { + return(hash); + } + + ptr->data = pkg; + ptr->next = NULL; + ptr->prev = ptr; + + hash->hash_table[position] = ptr; + hash->list = alpm_list_join(hash->list, ptr); + hash->entries += 1; + + return(hash); +} + +pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg) +{ + if(!hash) { + return(_alpm_pkghash_add(hash, pkg)); + } + + alpm_list_t *ptr; + size_t position; + + if((hash->entries + 1) / MAX_HASH_LOAD > hash->buckets) { + hash = rehash(hash); + } + + position = pkg->name_hash % hash->buckets; + + /* collision resolution using open addressing with linear probing */ + while((ptr = hash->hash_table[position]) != NULL) { + position = (position + 1) % hash->buckets; + } + + ptr = calloc(1, sizeof(alpm_list_t)); + if(ptr == NULL) { + return(hash); + } + + ptr->data = pkg; + ptr->next = NULL; + ptr->prev = ptr; + + hash->hash_table[position] = ptr; + hash->list = alpm_list_mmerge(hash->list, ptr, _alpm_pkg_cmp); + hash->entries += 1; + + return(hash); +} + +/** + * @brief Remove a package from a pkghash. + * + * @param hash the hash to remove the package from + * @param pkg the package we are removing + * @param data output parameter containing the removed item + * + * @return the resultant hash + */ +pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t *data) +{ + alpm_list_t *i; + size_t position; + + if(pkg == NULL || hash == NULL) { + return(hash); + } + + position = pkg->name_hash % hash->buckets; + while((i = hash->hash_table[position]) != NULL) { + pmpkg_t *info = i->data; + + if(info->name_hash == pkg->name_hash && + strcmp(info->name, pkg->name) == 0) { + +#if 0 /* Actually removing items is not a good idea with linear probing... */ + /* remove from list */ + /* TODO - refactor with alpm_list_remove */ + if(i == hash->list) { + /* Special case: removing the head node which has a back reference to + * the tail node */ + hash->list = i->next; + if(hash->list) { + hash->list->prev = i->prev; + } + i->prev = NULL; + } else if(i == hash->list->prev) { + /* Special case: removing the tail node, so we need to fix the back + * reference on the head node. We also know tail != head. */ + if(i->prev) { + /* i->next should always be null */ + i->prev->next = i->next; + hash->list->prev = i->prev; + i->prev = NULL; + } + } else { + /* Normal case, non-head and non-tail node */ + if(i->next) { + i->next->prev = i->prev; + } + if(i->prev) { + i->prev->next = i->next; + } + } + + /* remove from hash */ + data = info; + info = NULL; + free(i); + i = NULL; + + hash->entries -= 1; +#endif + + /* fake removal - pkgname can not be blank so 0 hash is impossible */ + info->name_hash = 0; + + return(hash); + } + + position = (position + 1) % hash->buckets; + } + + return(hash); +} + +void _alpm_pkghash_free(pmpkghash_t *hash) +{ + size_t i; + if(hash != NULL) { + for(i = 0; i < hash->buckets; i++) { + free(hash->hash_table[i]); + } + free(hash->hash_table); + } + free(hash); +} + +pmpkg_t *_alpm_pkghash_find(pmpkghash_t *hash, const char *name) +{ + alpm_list_t *lp; + unsigned long name_hash; + size_t position; + + ALPM_LOG_FUNC; + + if(name == NULL || hash == NULL) { + return(NULL); + } + + name_hash = _alpm_hash_sdbm(name); + + position = name_hash % hash->buckets; + + while((lp = hash->hash_table[position]) != NULL) { + pmpkg_t *info = lp->data; + + if(info->name_hash == name_hash && strcmp(info->name, name) == 0) { + return(info); + } + + position = (position + 1) % hash->buckets; + } + + return(NULL); +} -- cgit v1.2.3-24-g4f1b From ce5471511257ada996bfcb6ea4236cd89c0d6890 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 28 Jan 2011 12:03:36 -0600 Subject: Implement a quick and dirty rehash function This allows us to get through the rehash required by smoke001 and pass all pactests. It is by no means the best or most efficient implementation but it does do the job. Signed-off-by: Dan McGee --- lib/libalpm/pkghash.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'lib/libalpm/pkghash.c') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 3ed6453f..f2497070 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -77,9 +77,9 @@ pmpkghash_t *_alpm_pkghash_create(size_t size) /* Expand the hash table size to the next increment and rebin the entries */ static pmpkghash_t *rehash(pmpkghash_t *oldhash) { - //pmpkghash_t *newhash; - - /* TODO - calculate size of newhash */ + pmpkghash_t *newhash; + alpm_list_t *ptr; + size_t newsize; /* Hash tables will need resized in two cases: * - adding packages to the local database * - poor estimation of the number of packages in sync database @@ -89,17 +89,22 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) * larger database sizes, this increase is reduced to avoid excess * memory allocation as both scenarios requiring a rehash should not * require a table size increase that large. */ + if(oldhash->buckets < 500) { + newsize = oldhash->buckets * 2; + } else if(oldhash->buckets < 3500) { + newsize = oldhash->buckets * 3 / 2; + } else { + newsize = oldhash->buckets * 4 / 3; + } - //newhash = _alpm_pkghash_create(oldhash->buckets); - - /* TODO - rebin entries */ - - //_alpm_pkghash_free(oldhash); + newhash = _alpm_pkghash_create(newsize); + for(ptr = oldhash->list; ptr != NULL; ptr = ptr->next) { + newhash = _alpm_pkghash_add(newhash, ptr->data); + } - //return newhash; + _alpm_pkghash_free(oldhash); - printf("rehash needed!!!\n"); - return(oldhash); + return(newhash); } pmpkghash_t *_alpm_pkghash_add(pmpkghash_t *hash, pmpkg_t *pkg) -- cgit v1.2.3-24-g4f1b From d843c86b7b7cbf376716817e7c2c55b1f9360a72 Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Sat, 29 Jan 2011 11:39:25 +1000 Subject: Error handling for maximum database size Check that the requested size of a pkghash is not beyond the maximum prime. Also check for successful creation of a new hash before rehashing. Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib/libalpm/pkghash.c') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index f2497070..0324465f 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -59,6 +59,7 @@ pmpkghash_t *_alpm_pkghash_create(size_t size) hash->list = NULL; hash->entries = 0; + hash->buckets = 0; loopsize = sizeof(prime_list) / sizeof(*prime_list); for(i = 0; i < loopsize; i++) { @@ -68,6 +69,12 @@ pmpkghash_t *_alpm_pkghash_create(size_t size) } } + if(hash->buckets < size) { + _alpm_log(PM_LOG_ERROR, _("database larger than maximum size")); + free(hash); + return(NULL); + } + CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t*), \ free(hash); RET_ERR(PM_ERR_MEMORY, NULL)); @@ -98,6 +105,11 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) } newhash = _alpm_pkghash_create(newsize); + if(newhash == NULL) { + /* creation of newhash failed, stick with old one... */ + return(oldhash); + } + for(ptr = oldhash->list; ptr != NULL; ptr = ptr->next) { newhash = _alpm_pkghash_add(newhash, ptr->data); } -- cgit v1.2.3-24-g4f1b From c9820ec97b417d33dc37eb589f069766f1068ea1 Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Sat, 29 Jan 2011 11:47:31 +1000 Subject: Slightly more efficient rehash size selection While probably still not optimal in terms of everyday usage in pacman, this reduces the absolute size increase to "more reasonable" levels. For databases greater than 5000 in size, the minimum size increase is used which is still on the order of a 10% increase. Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib/libalpm/pkghash.c') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 0324465f..8f7168d2 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -98,10 +98,12 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) * require a table size increase that large. */ if(oldhash->buckets < 500) { newsize = oldhash->buckets * 2; - } else if(oldhash->buckets < 3500) { + } else if(oldhash->buckets < 2000) { newsize = oldhash->buckets * 3 / 2; - } else { + } else if(oldhash->buckets < 5000) { newsize = oldhash->buckets * 4 / 3; + } else { + newsize = oldhash->buckets + 1; } newhash = _alpm_pkghash_create(newsize); -- cgit v1.2.3-24-g4f1b From 93207863493b54e794098b4ee5987d59c8c4b14b Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Sat, 29 Jan 2011 22:50:55 +1000 Subject: Rehash efficiently Rehash without recreating the hash table list or reallocating the memory for holding the list nodes. Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'lib/libalpm/pkghash.c') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 8f7168d2..44a03ab5 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -86,7 +86,8 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) { pmpkghash_t *newhash; alpm_list_t *ptr; - size_t newsize; + size_t newsize, position, i; + /* Hash tables will need resized in two cases: * - adding packages to the local database * - poor estimation of the number of packages in sync database @@ -112,10 +113,27 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) return(oldhash); } - for(ptr = oldhash->list; ptr != NULL; ptr = ptr->next) { - newhash = _alpm_pkghash_add(newhash, ptr->data); + newhash->list = oldhash->list; + oldhash->list = NULL; + + for(i = 0; i < oldhash->buckets; i++) { + if(oldhash->hash_table[i] != NULL) { + pmpkg_t *package = oldhash->hash_table[i]->data; + + position = package->name_hash % newhash->buckets; + + /* collision resolution using open addressing with linear probing */ + while((ptr = newhash->hash_table[position]) != NULL) { + position = (position + 1) % newhash->buckets; + } + + newhash->hash_table[position] = oldhash->hash_table[i]; + oldhash->hash_table[i] = NULL; + } } + newhash->entries = oldhash->entries; + _alpm_pkghash_free(oldhash); return(newhash); -- cgit v1.2.3-24-g4f1b From 11e5e86151f83219b1cedbca31be7530911e25eb Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Sat, 29 Jan 2011 23:24:13 +1000 Subject: Refactor finding position for new hash entry Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'lib/libalpm/pkghash.c') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 44a03ab5..bdff78ea 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -81,11 +81,25 @@ pmpkghash_t *_alpm_pkghash_create(size_t size) return(hash); } +static size_t get_hash_position(unsigned long name_hash, pmpkghash_t *hash) +{ + size_t position; + alpm_list_t *ptr; + + position = name_hash % hash->buckets; + + /* collision resolution using open addressing with linear probing */ + while((ptr = hash->hash_table[position]) != NULL) { + position = (position + 1) % hash->buckets; + } + + return(position); +} + /* Expand the hash table size to the next increment and rebin the entries */ static pmpkghash_t *rehash(pmpkghash_t *oldhash) { pmpkghash_t *newhash; - alpm_list_t *ptr; size_t newsize, position, i; /* Hash tables will need resized in two cases: @@ -120,12 +134,7 @@ static pmpkghash_t *rehash(pmpkghash_t *oldhash) if(oldhash->hash_table[i] != NULL) { pmpkg_t *package = oldhash->hash_table[i]->data; - position = package->name_hash % newhash->buckets; - - /* collision resolution using open addressing with linear probing */ - while((ptr = newhash->hash_table[position]) != NULL) { - position = (position + 1) % newhash->buckets; - } + position = get_hash_position(package->name_hash, newhash); newhash->hash_table[position] = oldhash->hash_table[i]; oldhash->hash_table[i] = NULL; @@ -152,12 +161,7 @@ pmpkghash_t *_alpm_pkghash_add(pmpkghash_t *hash, pmpkg_t *pkg) hash = rehash(hash); } - position = pkg->name_hash % hash->buckets; - - /* collision resolution using open addressing with linear probing */ - while((ptr = hash->hash_table[position]) != NULL) { - position = (position + 1) % hash->buckets; - } + position = get_hash_position(pkg->name_hash, hash); ptr = calloc(1, sizeof(alpm_list_t)); if(ptr == NULL) { @@ -188,12 +192,7 @@ pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg) hash = rehash(hash); } - position = pkg->name_hash % hash->buckets; - - /* collision resolution using open addressing with linear probing */ - while((ptr = hash->hash_table[position]) != NULL) { - position = (position + 1) % hash->buckets; - } + position = get_hash_position(pkg->name_hash, hash); ptr = calloc(1, sizeof(alpm_list_t)); if(ptr == NULL) { -- cgit v1.2.3-24-g4f1b From 01c3c7e4f28d837f0b8a6aaaf27d16894d4b762d Mon Sep 17 00:00:00 2001 From: Allan McRae Date: Sun, 30 Jan 2011 22:42:45 +1000 Subject: Actually remove packages from pkghash on removal Fully removes a package from the hash. Also unify prototype with removal from an alpm_list_t, fixing issues when removing a package from the pkgcache. Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) (limited to 'lib/libalpm/pkghash.c') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index bdff78ea..14056e37 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -219,11 +219,15 @@ pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg) * * @return the resultant hash */ -pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t *data) +pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t **data) { alpm_list_t *i; size_t position; + if(data) { + *data = NULL; + } + if(pkg == NULL || hash == NULL) { return(hash); } @@ -235,7 +239,6 @@ pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t *data if(info->name_hash == pkg->name_hash && strcmp(info->name, pkg->name) == 0) { -#if 0 /* Actually removing items is not a good idea with linear probing... */ /* remove from list */ /* TODO - refactor with alpm_list_remove */ if(i == hash->list) { @@ -266,16 +269,34 @@ pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t *data } /* remove from hash */ - data = info; - info = NULL; + if(data) { + *data = info; + } + hash->hash_table[position] = NULL; free(i); - i = NULL; hash->entries -= 1; -#endif - /* fake removal - pkgname can not be blank so 0 hash is impossible */ - info->name_hash = 0; + /* potentially move entries following removed entry to keep + * open addressing collision resolution working */ + size_t next_null = (position + 1) % hash->buckets; + while(hash->hash_table[next_null] != NULL) { + next_null = (next_null + 1) % hash->buckets; + } + + position = (position + 1) % hash->buckets; + + while((i = hash->hash_table[position]) != NULL) { + info = i->data; + size_t new_position = get_hash_position(info->name_hash, hash); + + if(new_position != next_null) { + hash->hash_table[new_position] = i; + hash->hash_table[position] = NULL; + } + + position = (position + 1) % hash->buckets; + } return(hash); } -- cgit v1.2.3-24-g4f1b From 1f145bcd1a4fb22d4cb548a3be64898f60820695 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 2 Feb 2011 20:46:29 -0600 Subject: Use alpm_list_remove_item in pkghash_remove Removes the code that was duplicated and has now been refactored into a separate method. Signed-off-by: Dan McGee --- lib/libalpm/pkghash.c | 35 ++++------------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) (limited to 'lib/libalpm/pkghash.c') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 14056e37..3d420e1f 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -219,7 +219,8 @@ pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg) * * @return the resultant hash */ -pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t **data) +pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, + pmpkg_t **data) { alpm_list_t *i; size_t position; @@ -239,36 +240,8 @@ pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, pmpkg_t **dat if(info->name_hash == pkg->name_hash && strcmp(info->name, pkg->name) == 0) { - /* remove from list */ - /* TODO - refactor with alpm_list_remove */ - if(i == hash->list) { - /* Special case: removing the head node which has a back reference to - * the tail node */ - hash->list = i->next; - if(hash->list) { - hash->list->prev = i->prev; - } - i->prev = NULL; - } else if(i == hash->list->prev) { - /* Special case: removing the tail node, so we need to fix the back - * reference on the head node. We also know tail != head. */ - if(i->prev) { - /* i->next should always be null */ - i->prev->next = i->next; - hash->list->prev = i->prev; - i->prev = NULL; - } - } else { - /* Normal case, non-head and non-tail node */ - if(i->next) { - i->next->prev = i->prev; - } - if(i->prev) { - i->prev->next = i->next; - } - } - - /* remove from hash */ + /* remove from list and hash */ + hash->list = alpm_list_remove_item(hash->list, i); if(data) { *data = info; } -- cgit v1.2.3-24-g4f1b From 6b0d4674bb132b2583920211cc798f3db77ec392 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 3 Feb 2011 10:55:26 -0600 Subject: Improve pkghash_remove algorithm Rather than potentially move every item to the next NULL, attempt to move at most one item at a time by iterating backwards from the NULL location in the hash array. If we move an item, we repeat the process on the now shorter "chain" until no more items need moving. Signed-off-by: Dan McGee Signed-off-by: Allan McRae --- lib/libalpm/pkghash.c | 62 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 19 deletions(-) (limited to 'lib/libalpm/pkghash.c') diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c index 3d420e1f..54805275 100644 --- a/lib/libalpm/pkghash.c +++ b/lib/libalpm/pkghash.c @@ -210,6 +210,34 @@ pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg) return(hash); } +static size_t move_one_entry(pmpkghash_t *hash, size_t start, size_t end) +{ + /* Iterate backwards from 'end' to 'start', seeing if any of the items + * would hash to 'start'. If we find one, we move it there and break. If + * we get all the way back to position and find none that hash to it, we + * also end iteration. Iterating backwards helps prevent needless shuffles; + * we will never need to move more than one item per function call. The + * return value is our current iteration location; if this is equal to + * 'start' we can stop this madness. */ + while(end != start) { + alpm_list_t *i = hash->hash_table[end]; + pmpkg_t *info = i->data; + size_t new_position = get_hash_position(info->name_hash, hash); + + if(new_position == start) { + hash->hash_table[start] = i; + hash->hash_table[end] = NULL; + break; + } + + /* the odd math ensures we are always positive, e.g. + * e.g. (0 - 1) % 47 == -1 + * e.g. (47 + 0 - 1) % 47 == 46 */ + end = (hash->buckets + end - 1) % hash->buckets; + } + return(end); +} + /** * @brief Remove a package from a pkghash. * @@ -239,6 +267,7 @@ pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, if(info->name_hash == pkg->name_hash && strcmp(info->name, pkg->name) == 0) { + size_t stop, prev; /* remove from list and hash */ hash->list = alpm_list_remove_item(hash->list, i); @@ -247,28 +276,23 @@ pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg, } hash->hash_table[position] = NULL; free(i); - hash->entries -= 1; - /* potentially move entries following removed entry to keep - * open addressing collision resolution working */ - size_t next_null = (position + 1) % hash->buckets; - while(hash->hash_table[next_null] != NULL) { - next_null = (next_null + 1) % hash->buckets; + /* Potentially move entries following removed entry to keep open + * addressing collision resolution working. We start by finding the + * next null bucket to know how far we have to look. */ + stop = (position + 1) % hash->buckets; + while(hash->hash_table[stop] != NULL && stop != position) { + stop = (stop + 1) % hash->buckets; } - - position = (position + 1) % hash->buckets; - - while((i = hash->hash_table[position]) != NULL) { - info = i->data; - size_t new_position = get_hash_position(info->name_hash, hash); - - if(new_position != next_null) { - hash->hash_table[new_position] = i; - hash->hash_table[position] = NULL; - } - - position = (position + 1) % hash->buckets; + stop = (hash->buckets + stop - 1) % hash->buckets; + + /* We now search backwards from stop to position. If we find an + * item that now hashes to position, we will move it, and then try + * to plug the new hole we just opened up, until we finally don't + * move anything. */ + while((prev = move_one_entry(hash, position, stop)) != position) { + position = prev; } return(hash); -- cgit v1.2.3-24-g4f1b