diff options
author | Dan McGee <dan@archlinux.org> | 2010-12-30 01:43:44 +0100 |
---|---|---|
committer | Dan McGee <dan@archlinux.org> | 2010-12-30 01:43:44 +0100 |
commit | a58083459b096e935693d94b9cb51a447b3a1abd (patch) | |
tree | e21857a8cc0b3dbe562cfe015571280c41023729 | |
parent | 126f50ab0b5ee3ed46c5a6ecae241e8af49b0fe2 (diff) | |
parent | e3c19569cfe7cd77674490b30624e71512417e0b (diff) | |
download | pacman-a58083459b096e935693d94b9cb51a447b3a1abd.tar.gz pacman-a58083459b096e935693d94b9cb51a447b3a1abd.tar.xz |
Merge branch 'fgets-perf'
-rw-r--r-- | lib/libalpm/be_package.c | 13 | ||||
-rw-r--r-- | lib/libalpm/be_sync.c | 15 | ||||
-rw-r--r-- | lib/libalpm/util.c | 100 | ||||
-rw-r--r-- | lib/libalpm/util.h | 18 | ||||
-rwxr-xr-x | test/pacman/pmrule.py | 10 | ||||
-rwxr-xr-x | test/pacman/pmtest.py | 7 | ||||
-rw-r--r-- | test/pacman/tests/smoke002.py | 19 |
7 files changed, 140 insertions, 42 deletions
diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c index f9f18d38..df5b28d1 100644 --- a/lib/libalpm/be_package.c +++ b/lib/libalpm/be_package.c @@ -155,17 +155,22 @@ static struct pkg_operations *get_file_pkg_ops(void) */ static int parse_descfile(struct archive *a, pmpkg_t *newpkg) { - char line[PATH_MAX]; char *ptr = NULL; char *key = NULL; int linenum = 0; + struct archive_read_buffer buf; ALPM_LOG_FUNC; - /* loop until we reach EOF (where archive_fgets will return NULL) */ - while(_alpm_archive_fgets(line, PATH_MAX, a) != NULL) { + memset(&buf, 0, sizeof(buf)); + /* 512K for a line length seems reasonable */ + buf.max_line_size = 512 * 1024; + + /* loop until we reach EOF or other error */ + while(_alpm_archive_fgets(a, &buf) == ARCHIVE_OK) { + char *line = _alpm_strtrim(buf.line); + linenum++; - _alpm_strtrim(line); if(strlen(line) == 0 || line[0] == '#') { continue; } diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 137fc1b2..d0f98c20 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -219,8 +219,8 @@ static int sync_db_populate(pmdb_t *db) } #define READ_NEXT(s) do { \ - if(_alpm_archive_fgets(s, sizeof(s), archive) == NULL) goto error; \ - _alpm_strtrim(s); \ + if(_alpm_archive_fgets(archive, &buf) != ARCHIVE_OK) goto error; \ + s = _alpm_strtrim(buf.line); \ } while(0) #define READ_AND_STORE(f) do { \ @@ -238,10 +238,10 @@ static int sync_db_populate(pmdb_t *db) static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entry *entry) { - char line[1024]; const char *entryname = NULL; char *filename, *pkgname, *p, *q; pmpkg_t *pkg; + struct archive_read_buffer buf; ALPM_LOG_FUNC; @@ -260,6 +260,10 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entr _alpm_log(PM_LOG_FUNCTION, "loading package data from archive entry %s\n", entryname); + memset(&buf, 0, sizeof(buf)); + /* 512K for a line length seems reasonable */ + buf.max_line_size = 512 * 1024; + /* get package and db file names */ STRDUP(pkgname, entryname, RET_ERR(PM_ERR_MEMORY, -1)); p = pkgname + strlen(pkgname); @@ -279,8 +283,9 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entr if(strcmp(filename, "desc") == 0 || strcmp(filename, "depends") == 0 || strcmp(filename, "deltas") == 0) { - while(_alpm_archive_fgets(line, sizeof(line), archive) != NULL) { - _alpm_strtrim(line); + while(_alpm_archive_fgets(archive, &buf) == ARCHIVE_OK) { + char *line = _alpm_strtrim(buf.line); + if(strcmp(line, "%NAME%") == 0) { READ_NEXT(line); if(strcmp(line, pkg->name) != 0) { diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c index 1291ea0f..d34eab5e 100644 --- a/lib/libalpm/util.c +++ b/lib/libalpm/util.c @@ -771,33 +771,89 @@ int _alpm_test_md5sum(const char *filepath, const char *md5sum) return(ret); } -char *_alpm_archive_fgets(char *line, size_t size, struct archive *a) +/* Note: does NOT handle sparse files on purpose for speed. */ +int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b) { - /* for now, just read one char at a time until we get to a - * '\n' char. we can optimize this later with an internal - * buffer. */ - /* leave room for zero terminator */ - char *last = line + size - 1; - char *i; - - for(i = line; i < last; i++) { - int ret = archive_read_data(a, i, 1); - /* special check for first read- if null, return null, - * this indicates EOF */ - if(i == line && (ret <= 0 || *i == '\0')) { - return(NULL); + char *i = NULL; + int64_t offset; + int done = 0; + + while(1) { + /* have we processed this entire block? */ + if(b->block + b->block_size == b->block_offset) { + if(b->ret == ARCHIVE_EOF) { + /* reached end of archive on the last read, now we are out of data */ + goto cleanup; + } + + /* zero-copy - this is the entire next block of data. */ + b->ret = archive_read_data_block(a, (void*)&b->block, + &b->block_size, &offset); + b->block_offset = b->block; + + /* error or end of archive with no data read, cleanup */ + if(b->ret < ARCHIVE_OK || + (b->block_size == 0 && b->ret == ARCHIVE_EOF)) { + goto cleanup; + } } - /* check if read value was null or newline */ - if(ret <= 0 || *i == '\0' || *i == '\n') { - last = i + 1; - break; + + /* loop through the block looking for EOL characters */ + for(i = b->block_offset; i < (b->block + b->block_size); i++) { + /* check if read value was null or newline */ + if(*i == '\0' || *i == '\n') { + done = 1; + break; + } } - } - /* always null terminate the buffer */ - *last = '\0'; + /* allocate our buffer, or ensure our existing one is big enough */ + if(!b->line) { + /* set the initial buffer to the read block_size */ + CALLOC(b->line, b->block_size + 1, sizeof(char), + RET_ERR(PM_ERR_MEMORY, -1)); + b->line_size = b->block_size + 1; + b->line_offset = b->line; + } else { + size_t needed = (b->line_offset - b->line) + (i - b->block_offset) + 1; + if(needed > b->max_line_size) { + RET_ERR(PM_ERR_MEMORY, -1); + } + if(needed > b->line_size) { + /* need to realloc + copy data to fit total length */ + char *new; + CALLOC(new, needed, sizeof(char), RET_ERR(PM_ERR_MEMORY, -1)); + memcpy(new, b->line, b->line_size); + b->line_size = needed; + b->line_offset = new + (b->line_offset - b->line); + free(b->line); + b->line = new; + } + } + + if(done) { + size_t len = i - b->block_offset; + memcpy(b->line_offset, b->block_offset, len); + b->line_offset[len] = '\0'; + b->block_offset = ++i; + /* this is the main return point; from here you can read b->line */ + return(ARCHIVE_OK); + } else { + /* we've looked through the whole block but no newline, copy it */ + size_t len = b->block + b->block_size - b->block_offset; + memcpy(b->line_offset, b->block_offset, len); + b->line_offset += len; + b->block_offset = i; + } + } - return(line); +cleanup: + { + int ret = b->ret; + FREE(b->line); + memset(b, 0, sizeof(b)); + return(ret); + } } int _alpm_splitname(const char *target, pmpkg_t *pkg) diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h index 5464b239..543643b1 100644 --- a/lib/libalpm/util.h +++ b/lib/libalpm/util.h @@ -59,6 +59,22 @@ _alpm_log(PM_LOG_DEBUG, "returning error %d from %s : %s\n", err, __func__, alpm_strerrorlast()); \ return(ret); } while(0) +/** + * Used as a buffer/state holder for _alpm_archive_fgets(). + */ +struct archive_read_buffer { + char *line; + char *line_offset; + size_t line_size; + size_t max_line_size; + + char *block; + char *block_offset; + size_t block_size; + + int ret; +}; + int _alpm_makepath(const char *path); int _alpm_makepath_mode(const char *path, mode_t mode); int _alpm_copyfile(const char *src, const char *dest); @@ -76,7 +92,7 @@ char *_alpm_filecache_find(const char *filename); const char *_alpm_filecache_setup(void); int _alpm_lstat(const char *path, struct stat *buf); int _alpm_test_md5sum(const char *filepath, const char *md5sum); -char *_alpm_archive_fgets(char *line, size_t size, struct archive *a); +int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b); int _alpm_splitname(const char *target, pmpkg_t *pkg); unsigned long _alpm_hash_sdbm(const char *str); diff --git a/test/pacman/pmrule.py b/test/pacman/pmrule.py index e7c9c44f..89ae3f49 100755 --- a/test/pacman/pmrule.py +++ b/test/pacman/pmrule.py @@ -29,7 +29,9 @@ class pmrule: self.result = 0 def __str__(self): - return "rule = %s" % self.rule + if len(self.rule) <= 40: + return self.rule + return self.rule[:37] + '...' def check(self, root, retcode, localdb, files): """ @@ -76,6 +78,9 @@ class pmrule: elif case == "VERSION": if value != newpkg.version: success = 0 + elif case == "DESC": + if value != newpkg.desc: + success = 0 elif case == "GROUPS": if not value in newpkg.groups: success = 0 @@ -153,7 +158,4 @@ class pmrule: self.result = success return success - -if __name__ != "__main__": - rule = pmrule("PKG_EXIST=dummy") # vim: set ts=4 sw=4 et: diff --git a/test/pacman/pmtest.py b/test/pacman/pmtest.py index b7af5809..c70e41ae 100755 --- a/test/pacman/pmtest.py +++ b/test/pacman/pmtest.py @@ -260,11 +260,6 @@ class pmtest: self.result["fail"] += 1 else: msg = "SKIP" - print "\t[%s] %s" % (msg, i.rule) - i.result = success - - -if __name__ == "__main__": - pass + print "\t[%s] %s" % (msg, i) # vim: set ts=4 sw=4 et: diff --git a/test/pacman/tests/smoke002.py b/test/pacman/tests/smoke002.py new file mode 100644 index 00000000..44f2d0ec --- /dev/null +++ b/test/pacman/tests/smoke002.py @@ -0,0 +1,19 @@ +self.description = "Install packages with huge descriptions" + +p1 = pmpkg("pkg1") +p1.desc = 'A' * 500 * 1024 +self.addpkg(p1) + +p2 = pmpkg("pkg2") +p2.desc = 'A' * 600 * 1024 +self.addpkg(p2) + +self.args = "-U %s %s" % (p1.filename(), p2.filename()) + +# Note that the current cutoff on line length is 512K, so the first package +# will succeed while the second one will fail to record the description. +self.addrule("PACMAN_RETCODE=0") +self.addrule("PKG_EXIST=pkg1") +self.addrule("PKG_DESC=pkg1|%s" % p1.desc) +self.addrule("PKG_EXIST=pkg1") +self.addrule("!PKG_DESC=pkg1|%s" % p2.desc) |