diff options
author | Florian Pritz <bluewind@xinu.at> | 2020-09-27 14:24:44 +0200 |
---|---|---|
committer | Florian Pritz <bluewind@xinu.at> | 2020-09-27 14:52:23 +0200 |
commit | 62f5bc3ce90fedb396d103caae68dc2f211f1b16 (patch) | |
tree | db81e08790295762ebfcf7559fa5b3e839ab30d7 /lib | |
parent | f842d625980ca3a946c17c4b3c2846186496c1d2 (diff) | |
download | App-BorgRestore-62f5bc3ce90fedb396d103caae68dc2f211f1b16.tar.gz App-BorgRestore-62f5bc3ce90fedb396d103caae68dc2f211f1b16.tar.xz |
DB: Fix incorrect subpath handling for path that is a superstring of the
previous path
Files that share a common substring (e.g. `/home/foo/.ssh/id_rsa` and
`/home/foo/.ssh/id_rsa.pub`) could incorrectly trigger an optimization
that was only supposed to be triggered if the first path is a directory.
The first path would then not be added to the database cache even though
it should have been.
Signed-off-by: Florian Pritz <bluewind@xinu.at>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/App/BorgRestore/DB.pm | 9 | ||||
-rw-r--r-- | lib/App/BorgRestore/PathTimeTable/DB.pm | 31 |
2 files changed, 38 insertions, 2 deletions
diff --git a/lib/App/BorgRestore/DB.pm b/lib/App/BorgRestore/DB.pm index 50f5854..319ce55 100644 --- a/lib/App/BorgRestore/DB.pm +++ b/lib/App/BorgRestore/DB.pm @@ -80,7 +80,14 @@ method _migrate() { my $archive_id = $self->get_archive_id($archive); $self->{dbh}->do("alter table `files` rename column `timestamp-$archive` to `$archive_id`"); } -}, + }, + 3 => sub { + # Drop all cached files due to a bug in + # lib/App/BorgRestore/PathTimeTable/DB.pm that caused certain files + # to be skipped rather than being added to the `files` table. + $self->{dbh}->do('delete from `archives`'); + $self->{dbh}->do('delete from `files`'); + }, }; for my $target_version (sort { $a <=> $b } keys %$schema) { diff --git a/lib/App/BorgRestore/PathTimeTable/DB.pm b/lib/App/BorgRestore/PathTimeTable/DB.pm index 221063c..f92d24c 100644 --- a/lib/App/BorgRestore/PathTimeTable/DB.pm +++ b/lib/App/BorgRestore/PathTimeTable/DB.pm @@ -50,18 +50,46 @@ method set_archive_id($archive_id) { } method add_path($path, $time) { + $log->tracef("Adding path to cache: %s", $path) if TRACE; $self->{stats}->{total_paths}++; my $old_cache_path = $self->{current_path_in_cache}; + # Check if the new path requires us to (partially) invalidate our cache and + # add any files/directories to the database. If the new path is a subpath + # (substring actually) of the cached path, we can keep it only in the cache + # and no flush is needed. Otherwise we need to flush all parts of the path + # that are no longer contained in the new path. + # + # We start by checking the currently cached path ($old_cache_path) against + # the new $path. Then we remove one part from the path at a time, until we + # reach a parent path (directory) of $path. + $log->tracef("Checking if cache invalidation is required") if TRACE; while ((my $slash_index = rindex($old_cache_path, "/")) != -1) { $self->{stats}->{cache_invalidation_loop_iterations}++; - if ($old_cache_path eq substr($path, 0, length($old_cache_path))) { + # Directories in the borg output cannot be differentiated by their + # path, since their path looks just like a file path. I.e. there is no + # directory separator (/) at the end of a directory path. + # + # Since we want to keep any directory in our cache, if it contains + # $path, we can treat any cached path as a directory path. If the + # cached path was really a directory, the new $path will also contain a + # directory separator (/) between the old cached path (the parent + # directory) and the new path (a subdirectory or a file in the + # directory). If the cached path was not actually a directory, + # but a file, the new path cannot match the old one because a file name + # cannot contain a directory separator. + my $cache_check_path = $old_cache_path.'/'; + $log->tracef("Checking if cached path '%s' contains '%s'", $cache_check_path, $path) if TRACE; + if ($cache_check_path eq substr($path, 0, length($cache_check_path))) { + $log->tracef("Cache path '%s' is a parent directory of new path '%s'", $old_cache_path, $path) if TRACE; # keep the cache content for the part of the path that stays the same last; } + $log->tracef("Cached path '%s' requires flush to database", $old_cache_path) if TRACE; my $removed_time = delete $self->{cache}->{$old_cache_path}; $self->_add_path_to_db($self->{archive_id}, $old_cache_path, $removed_time); # strip last part of path $old_cache_path = substr($old_cache_path, 0, $slash_index); + $log->tracef("Changed cache path to parent directory: %s", $old_cache_path) if TRACE; # update parent timestamp my $cached = $self->{cache}->{$old_cache_path}; @@ -70,6 +98,7 @@ method add_path($path, $time) { $self->{cache}->{$old_cache_path} = $removed_time; } } + $log->tracef("Cache invalidation complete") if TRACE; if ($old_cache_path ne substr($path, 0, length($old_cache_path))) { # ensure that top level directory is also written |