summaryrefslogtreecommitdiffstats
path: root/lib/libalpm/pkghash.c
diff options
context:
space:
mode:
authorDan McGee <dan@archlinux.org>2011-02-04 16:10:25 +0100
committerDan McGee <dan@archlinux.org>2011-02-04 16:10:25 +0100
commite34fc4eddf73f2453b42235f5ae7d65f75db66fc (patch)
tree93b3a31a7db207f70b0a024f4de83ff8e0e3158f /lib/libalpm/pkghash.c
parentc12ccbfb2c7aa907ba01339a1a29089c65ea9911 (diff)
parent6b0d4674bb132b2583920211cc798f3db77ec392 (diff)
downloadpacman-e34fc4eddf73f2453b42235f5ae7d65f75db66fc.tar.gz
pacman-e34fc4eddf73f2453b42235f5ae7d65f75db66fc.tar.xz
Merge remote-tracking branch 'allan/hash'
Diffstat (limited to 'lib/libalpm/pkghash.c')
-rw-r--r--lib/libalpm/pkghash.c346
1 files changed, 346 insertions, 0 deletions
diff --git a/lib/libalpm/pkghash.c b/lib/libalpm/pkghash.c
new file mode 100644
index 00000000..54805275
--- /dev/null
+++ b/lib/libalpm/pkghash.c
@@ -0,0 +1,346 @@
+/*
+ * pkghash.c
+ *
+ * Copyright (c) 2011 Pacman Development Team <pacman-dev@archlinux.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pkghash.h"
+#include "util.h"
+#include "log.h"
+
+/* List of primes for possible sizes of hash tables.
+ *
+ * The maximum table size is the last prime under 1,000,000. That is
+ * more than an order of magnitude greater than the number of packages
+ * in any Linux distribution.
+ */
+static const size_t prime_list[] =
+{
+ 11ul, 13ul, 17ul, 19ul, 23ul, 29ul, 31ul, 37ul, 41ul, 43ul, 47ul,
+ 53ul, 59ul, 61ul, 67ul, 71ul, 73ul, 79ul, 83ul, 89ul, 97ul, 103ul,
+ 109ul, 113ul, 127ul, 137ul, 139ul, 149ul, 157ul, 167ul, 179ul, 193ul,
+ 199ul, 211ul, 227ul, 241ul, 257ul, 277ul, 293ul, 313ul, 337ul, 359ul,
+ 383ul, 409ul, 439ul, 467ul, 503ul, 541ul, 577ul, 619ul, 661ul, 709ul,
+ 761ul, 823ul, 887ul, 953ul, 1031ul, 1109ul, 1193ul, 1289ul, 1381ul,
+ 1493ul, 1613ul, 1741ul, 1879ul, 2029ul, 2179ul, 2357ul, 2549ul,
+ 2753ul, 2971ul, 3209ul, 3469ul, 3739ul, 4027ul, 4349ul, 4703ul,
+ 5087ul, 5503ul, 5953ul, 6427ul, 6949ul, 7517ul, 8123ul, 8783ul,
+ 9497ul, 10273ul, 11113ul, 12011ul, 12983ul, 14033ul, 15173ul,
+ 16411ul, 17749ul, 19183ul, 20753ul, 22447ul, 24281ul, 26267ul,
+ 28411ul, 30727ul, 33223ul, 35933ul, 38873ul, 42043ul, 45481ul,
+ 49201ul, 53201ul, 57557ul, 62233ul, 67307ul, 72817ul, 78779ul,
+ 85229ul, 92203ul, 99733ul, 107897ul, 116731ul, 126271ul, 136607ul,
+ 147793ul, 159871ul, 172933ul, 187091ul, 202409ul, 218971ul, 236897ul,
+ 256279ul, 277261ul, 299951ul, 324503ul, 351061ul, 379787ul, 410857ul,
+ 444487ul, 480881ul, 520241ul, 562841ul, 608903ul, 658753ul, 712697ul,
+ 771049ul, 834181ul, 902483ul, 976369ul
+};
+
+/* Allocate a hash table with at least "size" buckets */
+pmpkghash_t *_alpm_pkghash_create(size_t size)
+{
+ pmpkghash_t *hash = NULL;
+ size_t i, loopsize;
+
+ MALLOC(hash, sizeof(pmpkghash_t), RET_ERR(PM_ERR_MEMORY, NULL));
+
+ hash->list = NULL;
+ hash->entries = 0;
+ hash->buckets = 0;
+
+ loopsize = sizeof(prime_list) / sizeof(*prime_list);
+ for(i = 0; i < loopsize; i++) {
+ if(prime_list[i] > size) {
+ hash->buckets = prime_list[i];
+ break;
+ }
+ }
+
+ if(hash->buckets < size) {
+ _alpm_log(PM_LOG_ERROR, _("database larger than maximum size"));
+ free(hash);
+ return(NULL);
+ }
+
+ CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t*), \
+ free(hash); RET_ERR(PM_ERR_MEMORY, NULL));
+
+ return(hash);
+}
+
+static size_t get_hash_position(unsigned long name_hash, pmpkghash_t *hash)
+{
+ size_t position;
+ alpm_list_t *ptr;
+
+ position = name_hash % hash->buckets;
+
+ /* collision resolution using open addressing with linear probing */
+ while((ptr = hash->hash_table[position]) != NULL) {
+ position = (position + 1) % hash->buckets;
+ }
+
+ return(position);
+}
+
+/* Expand the hash table size to the next increment and rebin the entries */
+static pmpkghash_t *rehash(pmpkghash_t *oldhash)
+{
+ pmpkghash_t *newhash;
+ size_t newsize, position, i;
+
+ /* Hash tables will need resized in two cases:
+ * - adding packages to the local database
+ * - poor estimation of the number of packages in sync database
+ *
+ * For small hash tables sizes (<500) the increase in size is by a
+ * minimum of a factor of 2 for optimal rehash efficiency. For
+ * larger database sizes, this increase is reduced to avoid excess
+ * memory allocation as both scenarios requiring a rehash should not
+ * require a table size increase that large. */
+ if(oldhash->buckets < 500) {
+ newsize = oldhash->buckets * 2;
+ } else if(oldhash->buckets < 2000) {
+ newsize = oldhash->buckets * 3 / 2;
+ } else if(oldhash->buckets < 5000) {
+ newsize = oldhash->buckets * 4 / 3;
+ } else {
+ newsize = oldhash->buckets + 1;
+ }
+
+ newhash = _alpm_pkghash_create(newsize);
+ if(newhash == NULL) {
+ /* creation of newhash failed, stick with old one... */
+ return(oldhash);
+ }
+
+ newhash->list = oldhash->list;
+ oldhash->list = NULL;
+
+ for(i = 0; i < oldhash->buckets; i++) {
+ if(oldhash->hash_table[i] != NULL) {
+ pmpkg_t *package = oldhash->hash_table[i]->data;
+
+ position = get_hash_position(package->name_hash, newhash);
+
+ newhash->hash_table[position] = oldhash->hash_table[i];
+ oldhash->hash_table[i] = NULL;
+ }
+ }
+
+ newhash->entries = oldhash->entries;
+
+ _alpm_pkghash_free(oldhash);
+
+ return(newhash);
+}
+
+pmpkghash_t *_alpm_pkghash_add(pmpkghash_t *hash, pmpkg_t *pkg)
+{
+ alpm_list_t *ptr;
+ size_t position;
+
+ if(pkg == NULL || hash == NULL) {
+ return(hash);
+ }
+
+ if((hash->entries + 1) / MAX_HASH_LOAD > hash->buckets) {
+ hash = rehash(hash);
+ }
+
+ position = get_hash_position(pkg->name_hash, hash);
+
+ ptr = calloc(1, sizeof(alpm_list_t));
+ if(ptr == NULL) {
+ return(hash);
+ }
+
+ ptr->data = pkg;
+ ptr->next = NULL;
+ ptr->prev = ptr;
+
+ hash->hash_table[position] = ptr;
+ hash->list = alpm_list_join(hash->list, ptr);
+ hash->entries += 1;
+
+ return(hash);
+}
+
+pmpkghash_t *_alpm_pkghash_add_sorted(pmpkghash_t *hash, pmpkg_t *pkg)
+{
+ if(!hash) {
+ return(_alpm_pkghash_add(hash, pkg));
+ }
+
+ alpm_list_t *ptr;
+ size_t position;
+
+ if((hash->entries + 1) / MAX_HASH_LOAD > hash->buckets) {
+ hash = rehash(hash);
+ }
+
+ position = get_hash_position(pkg->name_hash, hash);
+
+ ptr = calloc(1, sizeof(alpm_list_t));
+ if(ptr == NULL) {
+ return(hash);
+ }
+
+ ptr->data = pkg;
+ ptr->next = NULL;
+ ptr->prev = ptr;
+
+ hash->hash_table[position] = ptr;
+ hash->list = alpm_list_mmerge(hash->list, ptr, _alpm_pkg_cmp);
+ hash->entries += 1;
+
+ return(hash);
+}
+
+static size_t move_one_entry(pmpkghash_t *hash, size_t start, size_t end)
+{
+ /* Iterate backwards from 'end' to 'start', seeing if any of the items
+ * would hash to 'start'. If we find one, we move it there and break. If
+ * we get all the way back to position and find none that hash to it, we
+ * also end iteration. Iterating backwards helps prevent needless shuffles;
+ * we will never need to move more than one item per function call. The
+ * return value is our current iteration location; if this is equal to
+ * 'start' we can stop this madness. */
+ while(end != start) {
+ alpm_list_t *i = hash->hash_table[end];
+ pmpkg_t *info = i->data;
+ size_t new_position = get_hash_position(info->name_hash, hash);
+
+ if(new_position == start) {
+ hash->hash_table[start] = i;
+ hash->hash_table[end] = NULL;
+ break;
+ }
+
+ /* the odd math ensures we are always positive, e.g.
+ * e.g. (0 - 1) % 47 == -1
+ * e.g. (47 + 0 - 1) % 47 == 46 */
+ end = (hash->buckets + end - 1) % hash->buckets;
+ }
+ return(end);
+}
+
+/**
+ * @brief Remove a package from a pkghash.
+ *
+ * @param hash the hash to remove the package from
+ * @param pkg the package we are removing
+ * @param data output parameter containing the removed item
+ *
+ * @return the resultant hash
+ */
+pmpkghash_t *_alpm_pkghash_remove(pmpkghash_t *hash, pmpkg_t *pkg,
+ pmpkg_t **data)
+{
+ alpm_list_t *i;
+ size_t position;
+
+ if(data) {
+ *data = NULL;
+ }
+
+ if(pkg == NULL || hash == NULL) {
+ return(hash);
+ }
+
+ position = pkg->name_hash % hash->buckets;
+ while((i = hash->hash_table[position]) != NULL) {
+ pmpkg_t *info = i->data;
+
+ if(info->name_hash == pkg->name_hash &&
+ strcmp(info->name, pkg->name) == 0) {
+ size_t stop, prev;
+
+ /* remove from list and hash */
+ hash->list = alpm_list_remove_item(hash->list, i);
+ if(data) {
+ *data = info;
+ }
+ hash->hash_table[position] = NULL;
+ free(i);
+ hash->entries -= 1;
+
+ /* Potentially move entries following removed entry to keep open
+ * addressing collision resolution working. We start by finding the
+ * next null bucket to know how far we have to look. */
+ stop = (position + 1) % hash->buckets;
+ while(hash->hash_table[stop] != NULL && stop != position) {
+ stop = (stop + 1) % hash->buckets;
+ }
+ stop = (hash->buckets + stop - 1) % hash->buckets;
+
+ /* We now search backwards from stop to position. If we find an
+ * item that now hashes to position, we will move it, and then try
+ * to plug the new hole we just opened up, until we finally don't
+ * move anything. */
+ while((prev = move_one_entry(hash, position, stop)) != position) {
+ position = prev;
+ }
+
+ return(hash);
+ }
+
+ position = (position + 1) % hash->buckets;
+ }
+
+ return(hash);
+}
+
+void _alpm_pkghash_free(pmpkghash_t *hash)
+{
+ size_t i;
+ if(hash != NULL) {
+ for(i = 0; i < hash->buckets; i++) {
+ free(hash->hash_table[i]);
+ }
+ free(hash->hash_table);
+ }
+ free(hash);
+}
+
+pmpkg_t *_alpm_pkghash_find(pmpkghash_t *hash, const char *name)
+{
+ alpm_list_t *lp;
+ unsigned long name_hash;
+ size_t position;
+
+ ALPM_LOG_FUNC;
+
+ if(name == NULL || hash == NULL) {
+ return(NULL);
+ }
+
+ name_hash = _alpm_hash_sdbm(name);
+
+ position = name_hash % hash->buckets;
+
+ while((lp = hash->hash_table[position]) != NULL) {
+ pmpkg_t *info = lp->data;
+
+ if(info->name_hash == name_hash && strcmp(info->name, name) == 0) {
+ return(info);
+ }
+
+ position = (position + 1) % hash->buckets;
+ }
+
+ return(NULL);
+}