summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan McGee <dan@archlinux.org>2010-12-30 01:43:44 +0100
committerDan McGee <dan@archlinux.org>2010-12-30 01:43:44 +0100
commita58083459b096e935693d94b9cb51a447b3a1abd (patch)
treee21857a8cc0b3dbe562cfe015571280c41023729
parent126f50ab0b5ee3ed46c5a6ecae241e8af49b0fe2 (diff)
parente3c19569cfe7cd77674490b30624e71512417e0b (diff)
downloadpacman-a58083459b096e935693d94b9cb51a447b3a1abd.tar.gz
pacman-a58083459b096e935693d94b9cb51a447b3a1abd.tar.xz
Merge branch 'fgets-perf'
-rw-r--r--lib/libalpm/be_package.c13
-rw-r--r--lib/libalpm/be_sync.c15
-rw-r--r--lib/libalpm/util.c100
-rw-r--r--lib/libalpm/util.h18
-rwxr-xr-xtest/pacman/pmrule.py10
-rwxr-xr-xtest/pacman/pmtest.py7
-rw-r--r--test/pacman/tests/smoke002.py19
7 files changed, 140 insertions, 42 deletions
diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c
index f9f18d38..df5b28d1 100644
--- a/lib/libalpm/be_package.c
+++ b/lib/libalpm/be_package.c
@@ -155,17 +155,22 @@ static struct pkg_operations *get_file_pkg_ops(void)
*/
static int parse_descfile(struct archive *a, pmpkg_t *newpkg)
{
- char line[PATH_MAX];
char *ptr = NULL;
char *key = NULL;
int linenum = 0;
+ struct archive_read_buffer buf;
ALPM_LOG_FUNC;
- /* loop until we reach EOF (where archive_fgets will return NULL) */
- while(_alpm_archive_fgets(line, PATH_MAX, a) != NULL) {
+ memset(&buf, 0, sizeof(buf));
+ /* 512K for a line length seems reasonable */
+ buf.max_line_size = 512 * 1024;
+
+ /* loop until we reach EOF or other error */
+ while(_alpm_archive_fgets(a, &buf) == ARCHIVE_OK) {
+ char *line = _alpm_strtrim(buf.line);
+
linenum++;
- _alpm_strtrim(line);
if(strlen(line) == 0 || line[0] == '#') {
continue;
}
diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
index 137fc1b2..d0f98c20 100644
--- a/lib/libalpm/be_sync.c
+++ b/lib/libalpm/be_sync.c
@@ -219,8 +219,8 @@ static int sync_db_populate(pmdb_t *db)
}
#define READ_NEXT(s) do { \
- if(_alpm_archive_fgets(s, sizeof(s), archive) == NULL) goto error; \
- _alpm_strtrim(s); \
+ if(_alpm_archive_fgets(archive, &buf) != ARCHIVE_OK) goto error; \
+ s = _alpm_strtrim(buf.line); \
} while(0)
#define READ_AND_STORE(f) do { \
@@ -238,10 +238,10 @@ static int sync_db_populate(pmdb_t *db)
static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entry *entry)
{
- char line[1024];
const char *entryname = NULL;
char *filename, *pkgname, *p, *q;
pmpkg_t *pkg;
+ struct archive_read_buffer buf;
ALPM_LOG_FUNC;
@@ -260,6 +260,10 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entr
_alpm_log(PM_LOG_FUNCTION, "loading package data from archive entry %s\n",
entryname);
+ memset(&buf, 0, sizeof(buf));
+ /* 512K for a line length seems reasonable */
+ buf.max_line_size = 512 * 1024;
+
/* get package and db file names */
STRDUP(pkgname, entryname, RET_ERR(PM_ERR_MEMORY, -1));
p = pkgname + strlen(pkgname);
@@ -279,8 +283,9 @@ static int sync_db_read(pmdb_t *db, struct archive *archive, struct archive_entr
if(strcmp(filename, "desc") == 0 || strcmp(filename, "depends") == 0
|| strcmp(filename, "deltas") == 0) {
- while(_alpm_archive_fgets(line, sizeof(line), archive) != NULL) {
- _alpm_strtrim(line);
+ while(_alpm_archive_fgets(archive, &buf) == ARCHIVE_OK) {
+ char *line = _alpm_strtrim(buf.line);
+
if(strcmp(line, "%NAME%") == 0) {
READ_NEXT(line);
if(strcmp(line, pkg->name) != 0) {
diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c
index 1291ea0f..d34eab5e 100644
--- a/lib/libalpm/util.c
+++ b/lib/libalpm/util.c
@@ -771,33 +771,89 @@ int _alpm_test_md5sum(const char *filepath, const char *md5sum)
return(ret);
}
-char *_alpm_archive_fgets(char *line, size_t size, struct archive *a)
+/* Note: does NOT handle sparse files on purpose for speed. */
+int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b)
{
- /* for now, just read one char at a time until we get to a
- * '\n' char. we can optimize this later with an internal
- * buffer. */
- /* leave room for zero terminator */
- char *last = line + size - 1;
- char *i;
-
- for(i = line; i < last; i++) {
- int ret = archive_read_data(a, i, 1);
- /* special check for first read- if null, return null,
- * this indicates EOF */
- if(i == line && (ret <= 0 || *i == '\0')) {
- return(NULL);
+ char *i = NULL;
+ int64_t offset;
+ int done = 0;
+
+ while(1) {
+ /* have we processed this entire block? */
+ if(b->block + b->block_size == b->block_offset) {
+ if(b->ret == ARCHIVE_EOF) {
+ /* reached end of archive on the last read, now we are out of data */
+ goto cleanup;
+ }
+
+ /* zero-copy - this is the entire next block of data. */
+ b->ret = archive_read_data_block(a, (void*)&b->block,
+ &b->block_size, &offset);
+ b->block_offset = b->block;
+
+ /* error or end of archive with no data read, cleanup */
+ if(b->ret < ARCHIVE_OK ||
+ (b->block_size == 0 && b->ret == ARCHIVE_EOF)) {
+ goto cleanup;
+ }
}
- /* check if read value was null or newline */
- if(ret <= 0 || *i == '\0' || *i == '\n') {
- last = i + 1;
- break;
+
+ /* loop through the block looking for EOL characters */
+ for(i = b->block_offset; i < (b->block + b->block_size); i++) {
+ /* check if read value was null or newline */
+ if(*i == '\0' || *i == '\n') {
+ done = 1;
+ break;
+ }
}
- }
- /* always null terminate the buffer */
- *last = '\0';
+ /* allocate our buffer, or ensure our existing one is big enough */
+ if(!b->line) {
+ /* set the initial buffer to the read block_size */
+ CALLOC(b->line, b->block_size + 1, sizeof(char),
+ RET_ERR(PM_ERR_MEMORY, -1));
+ b->line_size = b->block_size + 1;
+ b->line_offset = b->line;
+ } else {
+ size_t needed = (b->line_offset - b->line) + (i - b->block_offset) + 1;
+ if(needed > b->max_line_size) {
+ RET_ERR(PM_ERR_MEMORY, -1);
+ }
+ if(needed > b->line_size) {
+ /* need to realloc + copy data to fit total length */
+ char *new;
+ CALLOC(new, needed, sizeof(char), RET_ERR(PM_ERR_MEMORY, -1));
+ memcpy(new, b->line, b->line_size);
+ b->line_size = needed;
+ b->line_offset = new + (b->line_offset - b->line);
+ free(b->line);
+ b->line = new;
+ }
+ }
+
+ if(done) {
+ size_t len = i - b->block_offset;
+ memcpy(b->line_offset, b->block_offset, len);
+ b->line_offset[len] = '\0';
+ b->block_offset = ++i;
+ /* this is the main return point; from here you can read b->line */
+ return(ARCHIVE_OK);
+ } else {
+ /* we've looked through the whole block but no newline, copy it */
+ size_t len = b->block + b->block_size - b->block_offset;
+ memcpy(b->line_offset, b->block_offset, len);
+ b->line_offset += len;
+ b->block_offset = i;
+ }
+ }
- return(line);
+cleanup:
+ {
+ int ret = b->ret;
+ FREE(b->line);
+ memset(b, 0, sizeof(b));
+ return(ret);
+ }
}
int _alpm_splitname(const char *target, pmpkg_t *pkg)
diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h
index 5464b239..543643b1 100644
--- a/lib/libalpm/util.h
+++ b/lib/libalpm/util.h
@@ -59,6 +59,22 @@
_alpm_log(PM_LOG_DEBUG, "returning error %d from %s : %s\n", err, __func__, alpm_strerrorlast()); \
return(ret); } while(0)
+/**
+ * Used as a buffer/state holder for _alpm_archive_fgets().
+ */
+struct archive_read_buffer {
+ char *line;
+ char *line_offset;
+ size_t line_size;
+ size_t max_line_size;
+
+ char *block;
+ char *block_offset;
+ size_t block_size;
+
+ int ret;
+};
+
int _alpm_makepath(const char *path);
int _alpm_makepath_mode(const char *path, mode_t mode);
int _alpm_copyfile(const char *src, const char *dest);
@@ -76,7 +92,7 @@ char *_alpm_filecache_find(const char *filename);
const char *_alpm_filecache_setup(void);
int _alpm_lstat(const char *path, struct stat *buf);
int _alpm_test_md5sum(const char *filepath, const char *md5sum);
-char *_alpm_archive_fgets(char *line, size_t size, struct archive *a);
+int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b);
int _alpm_splitname(const char *target, pmpkg_t *pkg);
unsigned long _alpm_hash_sdbm(const char *str);
diff --git a/test/pacman/pmrule.py b/test/pacman/pmrule.py
index e7c9c44f..89ae3f49 100755
--- a/test/pacman/pmrule.py
+++ b/test/pacman/pmrule.py
@@ -29,7 +29,9 @@ class pmrule:
self.result = 0
def __str__(self):
- return "rule = %s" % self.rule
+ if len(self.rule) <= 40:
+ return self.rule
+ return self.rule[:37] + '...'
def check(self, root, retcode, localdb, files):
"""
@@ -76,6 +78,9 @@ class pmrule:
elif case == "VERSION":
if value != newpkg.version:
success = 0
+ elif case == "DESC":
+ if value != newpkg.desc:
+ success = 0
elif case == "GROUPS":
if not value in newpkg.groups:
success = 0
@@ -153,7 +158,4 @@ class pmrule:
self.result = success
return success
-
-if __name__ != "__main__":
- rule = pmrule("PKG_EXIST=dummy")
# vim: set ts=4 sw=4 et:
diff --git a/test/pacman/pmtest.py b/test/pacman/pmtest.py
index b7af5809..c70e41ae 100755
--- a/test/pacman/pmtest.py
+++ b/test/pacman/pmtest.py
@@ -260,11 +260,6 @@ class pmtest:
self.result["fail"] += 1
else:
msg = "SKIP"
- print "\t[%s] %s" % (msg, i.rule)
- i.result = success
-
-
-if __name__ == "__main__":
- pass
+ print "\t[%s] %s" % (msg, i)
# vim: set ts=4 sw=4 et:
diff --git a/test/pacman/tests/smoke002.py b/test/pacman/tests/smoke002.py
new file mode 100644
index 00000000..44f2d0ec
--- /dev/null
+++ b/test/pacman/tests/smoke002.py
@@ -0,0 +1,19 @@
+self.description = "Install packages with huge descriptions"
+
+p1 = pmpkg("pkg1")
+p1.desc = 'A' * 500 * 1024
+self.addpkg(p1)
+
+p2 = pmpkg("pkg2")
+p2.desc = 'A' * 600 * 1024
+self.addpkg(p2)
+
+self.args = "-U %s %s" % (p1.filename(), p2.filename())
+
+# Note that the current cutoff on line length is 512K, so the first package
+# will succeed while the second one will fail to record the description.
+self.addrule("PACMAN_RETCODE=0")
+self.addrule("PKG_EXIST=pkg1")
+self.addrule("PKG_DESC=pkg1|%s" % p1.desc)
+self.addrule("PKG_EXIST=pkg1")
+self.addrule("!PKG_DESC=pkg1|%s" % p2.desc)