summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorian Pritz <bluewind@xinu.at>2020-09-27 14:24:44 +0200
committerFlorian Pritz <bluewind@xinu.at>2020-09-27 14:52:23 +0200
commit62f5bc3ce90fedb396d103caae68dc2f211f1b16 (patch)
treedb81e08790295762ebfcf7559fa5b3e839ab30d7
parentf842d625980ca3a946c17c4b3c2846186496c1d2 (diff)
downloadApp-BorgRestore-62f5bc3ce90fedb396d103caae68dc2f211f1b16.tar.gz
App-BorgRestore-62f5bc3ce90fedb396d103caae68dc2f211f1b16.tar.xz
DB: Fix incorrect subpath handling for path that is a superstring of the
previous path Files that share a common substring (e.g. `/home/foo/.ssh/id_rsa` and `/home/foo/.ssh/id_rsa.pub`) could incorrectly trigger an optimization that was only supposed to be triggered if the first path is a directory. The first path would then not be added to the database cache even though it should have been. Signed-off-by: Florian Pritz <bluewind@xinu.at>
-rw-r--r--Changes4
-rw-r--r--lib/App/BorgRestore/DB.pm9
-rw-r--r--lib/App/BorgRestore/PathTimeTable/DB.pm31
-rw-r--r--t/handle_added_archives_with_db.t18
4 files changed, 56 insertions, 6 deletions
diff --git a/Changes b/Changes
index dd5812e..f77e9f9 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,10 @@
Revision history for Perl extension App-BorgRestore
{{$NEXT}}
+ - Fix missing cache data for files that exist with and without an
+ extension. For example '/home/*/.ssh/id_rsa' would be missing from the
+ database and only the accompanying `id_rsa.pub` file would be contained
+ in the database.
3.4.0 2019-09-28T13:28:49Z
- Remove archive name untaint restrictions (remove untaint_archive_name
diff --git a/lib/App/BorgRestore/DB.pm b/lib/App/BorgRestore/DB.pm
index 50f5854..319ce55 100644
--- a/lib/App/BorgRestore/DB.pm
+++ b/lib/App/BorgRestore/DB.pm
@@ -80,7 +80,14 @@ method _migrate() {
my $archive_id = $self->get_archive_id($archive);
$self->{dbh}->do("alter table `files` rename column `timestamp-$archive` to `$archive_id`");
}
-},
+ },
+ 3 => sub {
+ # Drop all cached files due to a bug in
+ # lib/App/BorgRestore/PathTimeTable/DB.pm that caused certain files
+ # to be skipped rather than being added to the `files` table.
+ $self->{dbh}->do('delete from `archives`');
+ $self->{dbh}->do('delete from `files`');
+ },
};
for my $target_version (sort { $a <=> $b } keys %$schema) {
diff --git a/lib/App/BorgRestore/PathTimeTable/DB.pm b/lib/App/BorgRestore/PathTimeTable/DB.pm
index 221063c..f92d24c 100644
--- a/lib/App/BorgRestore/PathTimeTable/DB.pm
+++ b/lib/App/BorgRestore/PathTimeTable/DB.pm
@@ -50,18 +50,46 @@ method set_archive_id($archive_id) {
}
method add_path($path, $time) {
+ $log->tracef("Adding path to cache: %s", $path) if TRACE;
$self->{stats}->{total_paths}++;
my $old_cache_path = $self->{current_path_in_cache};
+ # Check if the new path requires us to (partially) invalidate our cache and
+ # add any files/directories to the database. If the new path is a subpath
+ # (substring actually) of the cached path, we can keep it only in the cache
+ # and no flush is needed. Otherwise we need to flush all parts of the path
+ # that are no longer contained in the new path.
+ #
+ # We start by checking the currently cached path ($old_cache_path) against
+ # the new $path. Then we remove one part from the path at a time, until we
+ # reach a parent path (directory) of $path.
+ $log->tracef("Checking if cache invalidation is required") if TRACE;
while ((my $slash_index = rindex($old_cache_path, "/")) != -1) {
$self->{stats}->{cache_invalidation_loop_iterations}++;
- if ($old_cache_path eq substr($path, 0, length($old_cache_path))) {
+ # Directories in the borg output cannot be differentiated by their
+ # path, since their path looks just like a file path. I.e. there is no
+ # directory separator (/) at the end of a directory path.
+ #
+ # Since we want to keep any directory in our cache, if it contains
+ # $path, we can treat any cached path as a directory path. If the
+ # cached path was really a directory, the new $path will also contain a
+ # directory separator (/) between the old cached path (the parent
+ # directory) and the new path (a subdirectory or a file in the
+ # directory). If the cached path was not actually a directory,
+ # but a file, the new path cannot match the old one because a file name
+ # cannot contain a directory separator.
+ my $cache_check_path = $old_cache_path.'/';
+ $log->tracef("Checking if cached path '%s' contains '%s'", $cache_check_path, $path) if TRACE;
+ if ($cache_check_path eq substr($path, 0, length($cache_check_path))) {
+ $log->tracef("Cache path '%s' is a parent directory of new path '%s'", $old_cache_path, $path) if TRACE;
# keep the cache content for the part of the path that stays the same
last;
}
+ $log->tracef("Cached path '%s' requires flush to database", $old_cache_path) if TRACE;
my $removed_time = delete $self->{cache}->{$old_cache_path};
$self->_add_path_to_db($self->{archive_id}, $old_cache_path, $removed_time);
# strip last part of path
$old_cache_path = substr($old_cache_path, 0, $slash_index);
+ $log->tracef("Changed cache path to parent directory: %s", $old_cache_path) if TRACE;
# update parent timestamp
my $cached = $self->{cache}->{$old_cache_path};
@@ -70,6 +98,7 @@ method add_path($path, $time) {
$self->{cache}->{$old_cache_path} = $removed_time;
}
}
+ $log->tracef("Cache invalidation complete") if TRACE;
if ($old_cache_path ne substr($path, 0, length($old_cache_path))) {
# ensure that top level directory is also written
diff --git a/t/handle_added_archives_with_db.t b/t/handle_added_archives_with_db.t
index 6fd94ea..2225145 100644
--- a/t/handle_added_archives_with_db.t
+++ b/t/handle_added_archives_with_db.t
@@ -23,6 +23,7 @@ for my $in_memory (0,1) {
$cb->("XXX, 1970-01-01 00:00:10 boot");
$cb->("XXX, 1970-01-01 00:00:20 boot/grub");
$cb->("XXX, 1970-01-01 00:00:08 boot/grub/grub.cfg");
+ $cb->("XXX, 1970-01-01 00:00:09 boot/grub/grub.cfg.sig");
$cb->("XXX, 1970-01-01 00:00:13 boot/foo");
$cb->("XXX, 1970-01-01 00:00:13 boot/foo/blub");
$cb->("XXX, 1970-01-01 00:00:19 boot/foo/bar");
@@ -42,7 +43,7 @@ for my $in_memory (0,1) {
$app->_handle_added_archives(['archive-1']);
# check database content
- is($db->get_archive_row_count(), 15, 'correct row count (15 paths with timestamps)');
+ is($db->get_archive_row_count(), 16, 'correct row count (16 paths with timestamps)');
eq_or_diff($db->get_archives_for_path('.'), [{archive => 'archive-1', modification_time => undef},]);
eq_or_diff($db->get_archives_for_path('boot'), [{archive => 'archive-1', modification_time => 20},]);
eq_or_diff($db->get_archives_for_path('boot/foo'), [{archive => 'archive-1', modification_time => 19},]);
@@ -50,6 +51,7 @@ for my $in_memory (0,1) {
eq_or_diff($db->get_archives_for_path('boot/foo/blub'), [{archive => 'archive-1', modification_time => 13},]);
eq_or_diff($db->get_archives_for_path('boot/grub'), [{archive => 'archive-1', modification_time => 20},]);
eq_or_diff($db->get_archives_for_path('boot/grub/grub.cfg'), [{archive => 'archive-1', modification_time => 8},]);
+ eq_or_diff($db->get_archives_for_path('boot/grub/grub.cfg.sig'), [{archive => 'archive-1', modification_time => 9},]);
eq_or_diff($db->get_archives_for_path('boot/test1'), [{archive => 'archive-1', modification_time => 4},]);
eq_or_diff($db->get_archives_for_path('boot/test1/f1'), [{archive => 'archive-1', modification_time => 3},]);
eq_or_diff($db->get_archives_for_path('boot/test1/f2'), [{archive => 'archive-1', modification_time => 4},]);
@@ -70,6 +72,7 @@ for my $in_memory (0,1) {
$cb->("XXX, 1970-01-01 00:00:10 boot");
$cb->("XXX, 1970-01-01 00:00:20 boot/grub");
$cb->("XXX, 1970-01-01 00:00:08 boot/grub/grub.cfg");
+ $cb->("XXX, 1970-01-01 00:00:09 boot/grub/grub.cfg.sig");
$cb->("XXX, 1970-01-01 00:00:13 boot/foo");
$cb->("XXX, 1970-01-01 00:00:13 boot/foo/blub");
$cb->("XXX, 1970-01-01 00:00:19 boot/foo/bar");
@@ -86,7 +89,7 @@ for my $in_memory (0,1) {
$app->_handle_added_archives(['archive-2']);
# check database content
- is($db->get_archive_row_count(), 16, 'correct row count (16 paths with timestamps)');
+ is($db->get_archive_row_count(), 17, 'correct row count (17 paths with timestamps)');
eq_or_diff($db->get_archives_for_path('.'), [
{archive => 'archive-1', modification_time => undef},
{archive => 'archive-2', modification_time => undef},
@@ -115,6 +118,10 @@ for my $in_memory (0,1) {
{archive => 'archive-1', modification_time => 8},
{archive => 'archive-2', modification_time => 8},
]);
+ eq_or_diff($db->get_archives_for_path('boot/grub/grub.cfg.sig'), [
+ {archive => 'archive-1', modification_time => 9},
+ {archive => 'archive-2', modification_time => 9},
+ ]);
eq_or_diff($db->get_archives_for_path('boot/test1'), [
{archive => 'archive-1', modification_time => 4},
{archive => 'archive-2', modification_time => 7},
@@ -164,7 +171,7 @@ for my $in_memory (0,1) {
$app->_handle_removed_archives(['archive-2']);
# check database contents
- is($db->get_archive_row_count(), 15, 'correct row count (15 paths with timestamps)');
+ is($db->get_archive_row_count(), 16, 'correct row count (16 paths with timestamps)');
eq_or_diff($db->get_archives_for_path('.'), [{archive => 'archive-2', modification_time => undef},]);
eq_or_diff($db->get_archives_for_path('boot'), [{archive => 'archive-2', modification_time => 20},]);
eq_or_diff($db->get_archives_for_path('boot/foo'), [{archive => 'archive-2', modification_time => 19},]);
@@ -172,6 +179,7 @@ for my $in_memory (0,1) {
eq_or_diff($db->get_archives_for_path('boot/foo/blub'), [{archive => 'archive-2', modification_time => 13},]);
eq_or_diff($db->get_archives_for_path('boot/grub'), [{archive => 'archive-2', modification_time => 20},]);
eq_or_diff($db->get_archives_for_path('boot/grub/grub.cfg'), [{archive => 'archive-2', modification_time => 8},]);
+ eq_or_diff($db->get_archives_for_path('boot/grub/grub.cfg.sig'), [{archive => 'archive-2', modification_time => 9},]);
eq_or_diff($db->get_archives_for_path('boot/test1'), [{archive => 'archive-2', modification_time => 7},]);
eq_or_diff($db->get_archives_for_path('boot/test1/f1'), [{archive => 'archive-2', modification_time => 3},]);
eq_or_diff($db->get_archives_for_path('boot/test1/f2'), [{archive => 'archive-2', modification_time => 5},]);
@@ -186,7 +194,7 @@ for my $in_memory (0,1) {
# run remove again. shouldn't change anything
$app->_handle_removed_archives(['archive-2']);
- is($db->get_archive_row_count(), 15, 'correct row count (15 paths with timestamps)');
+ is($db->get_archive_row_count(), 16, 'correct row count (16 paths with timestamps)');
eq_or_diff($db->get_archives_for_path('.'), [{archive => 'archive-2', modification_time => undef},]);
eq_or_diff($db->get_archives_for_path('boot'), [{archive => 'archive-2', modification_time => 20},]);
eq_or_diff($db->get_archives_for_path('boot/foo'), [{archive => 'archive-2', modification_time => 19},]);
@@ -194,6 +202,7 @@ for my $in_memory (0,1) {
eq_or_diff($db->get_archives_for_path('boot/foo/blub'), [{archive => 'archive-2', modification_time => 13},]);
eq_or_diff($db->get_archives_for_path('boot/grub'), [{archive => 'archive-2', modification_time => 20},]);
eq_or_diff($db->get_archives_for_path('boot/grub/grub.cfg'), [{archive => 'archive-2', modification_time => 8},]);
+ eq_or_diff($db->get_archives_for_path('boot/grub/grub.cfg.sig'), [{archive => 'archive-2', modification_time => 9},]);
eq_or_diff($db->get_archives_for_path('boot/test1'), [{archive => 'archive-2', modification_time => 7},]);
eq_or_diff($db->get_archives_for_path('boot/test1/f1'), [{archive => 'archive-2', modification_time => 3},]);
eq_or_diff($db->get_archives_for_path('boot/test1/f2'), [{archive => 'archive-2', modification_time => 5},]);
@@ -218,6 +227,7 @@ for my $in_memory (0,1) {
eq_or_diff($db->get_archives_for_path('boot/foo/blub'), []);
eq_or_diff($db->get_archives_for_path('boot/grub'), []);
eq_or_diff($db->get_archives_for_path('boot/grub/grub.cfg'), []);
+ eq_or_diff($db->get_archives_for_path('boot/grub/grub.cfg.sig'), []);
eq_or_diff($db->get_archives_for_path('boot/test1'), []);
eq_or_diff($db->get_archives_for_path('boot/test1/f1'), []);
eq_or_diff($db->get_archives_for_path('boot/test1/f2'), []);