From 489f2eefcdfd5f5f6acf6628404aa5aa9c44beee Mon Sep 17 00:00:00 2001 From: Florian Pritz Date: Mon, 8 Aug 2016 00:12:52 +0200 Subject: Add WIP version of borg-restore.pl Signed-off-by: Florian Pritz --- borg-restore.pl | 455 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 455 insertions(+) create mode 100755 borg-restore.pl (limited to 'borg-restore.pl') diff --git a/borg-restore.pl b/borg-restore.pl new file mode 100755 index 0000000..dd76dee --- /dev/null +++ b/borg-restore.pl @@ -0,0 +1,455 @@ +#!/usr/bin/perl -T +use warnings; +use strict; + +=head1 NAME + +borg-restore.pl - Restore paths from borg backups + +=head1 DESCRIPTION + +Script that helps to restore files from borg backups. + +Takes one or more paths, looks for their backups, shows a list of distinct +versions and allows to select one to be restored. + +=cut + +=head1 SYNOPSIS + +borg-restore.pl [options] + + Options: + --help, -h short help message + --debug show debug messages + --update-cache, -u update cache files + +=cut + +use v5.10; + +package main; + +use autodie; +use Cwd qw(abs_path); +use Data::Dumper; +use DateTime; +use File::Basename; +use File::Path qw(mkpath); +use File::Slurp; +use Getopt::Long; +use IO::Compress::Gzip qw($GzipError); +use IPC::Run qw(run start); +use Pod::Usage; +use DB_File; +use MLDBM qw(GDBM_File Storable); +use Time::HiRes; +use Devel::Size qw(total_size); +use Storable; +use List::MoreUtils qw(firstidx); +use List::Util qw(any all); + +my %opts; +my $cache_path_base = "/home/flo/.cache/borg-restore.pl"; +my $backup_prefix = "/"; + +sub debug { + say STDERR @_ if $opts{debug}; +} + +sub untaint { + my $data = shift; + my $regex = shift; + + $data =~ m/^($regex)$/ or die "Failed to untaint: $data"; + return $1; +} + +sub borg_list { + my @archives; + + run [qw(borg list)], '>', \my $output or die "borg list returned $?"; + + for (split/^/, $output) { + if (m/^([^\s]+)\s/) { + push @archives, $1; + } + } + + splice @archives, 4; + #splice @archives, 1, 1; + + return \@archives; +} + +sub find_archives { + my $path = shift; + + my $archives = retrieve get_cache_path('archive_list') or die "Failed to read cache file: $!"; + + my %db; + tie %db, 'MLDBM', get_cache_path("archives.db"), O_RDWR, 0600 or die "Failed to open database: $!"; + my $modtimes = $db{$path}; + untie %db; + + my $last_modtime; + my @ret; + + debug("Building archive list"); + + for my $archive (@$archives) { + my $modtime = $$modtimes[get_archive_index($archive, $archives)]; + + if (defined($modtime) && (!defined($last_modtime) || $modtime > $last_modtime)) { + push @ret, { + modification_time => $modtime, + archive => $archive, + }; + $last_modtime = $modtime; + } + } + + if (!@ret) { + printf "\e[0;91mWarning:\e[0m Path '%s' not found in any archive.\n", $path; + } + + return \@ret; +} + +sub select_archive { + my $archives = shift; + + my $selected_archive; + + my $counter = 0; + + if (!@$archives) { + return undef; + } + + for my $archive (@$archives) { + my $dt = DateTime->from_epoch(epoch => $archive->{modification_time}); + printf "\e[0;33m%d: \e[1;33m%s\e[0m %s\n", $counter++, $dt->strftime("%a. %F %H:%M:%S"), $archive->{archive}; + } + + printf "\e[0;34m%s: \e[0m", "Enter ID to restore (Enter to skip)"; + my $selection = ; + return undef if !defined($selection); + chomp $selection; + + return undef unless ($selection =~ /^\d+$/ && defined(${$archives}[$selection])); + return ${$archives}[$selection]; +} + +sub restore { + my $path = shift; + my $archive = shift; + my $destination = shift; + + printf "Restoring %s to %s from archive %s\n", $path, $destination, $archive->{archive}; + + $destination = untaint($destination, qr(.*)); + $path = untaint($path, qr(.*)); + + my $components_to_strip =()= dirname($destination) =~ /\//g; + + chdir dirname($destination); + #File::Path::remove_tree("restore-test"); + #mkdir "restore-test"; + #chdir "restore-test"; + system(qw(echo borg extract -v --strip-components), $components_to_strip, "::".$archive->{archive}, $path); +} + +sub get_cache_dir { + return "$cache_path_base/v1"; +} + +sub get_cache_path { + my $item = shift; + return get_cache_dir()."/$item"; +} + +sub add_path_to_hash { + my $hash = shift; + my $path = shift; + my $time = shift; + + my @components = split /\//, $path; + + my $node = $hash; + + if ($path eq ".") { + if ($time > $$node[1]) { + $$node[1] = $time; + } + return; + } + + # each node is an arrayref of the format [$hashref_of_children, $mtime] + # $hashref_of_children is undef if there are no children + for my $component (@components) { + if (!defined($$node[0]->{$component})) { + $$node[0]->{$component} = [undef, $time]; + } + # update mtime per child + if ($time > $$node[1]) { + $$node[1] = $time; + } + $node = $$node[0]->{$component}; + } +} + +sub get_missing_items { + my $have = shift; + my $want = shift; + + my $ret = []; + + for my $item (@$want) { + my $exists = any { $_ eq $item } @$have; + push @$ret, $item if not $exists; + } + + return $ret; +} + +sub get_archive_index { + my $archive = shift; + my $archives = shift; + + return firstidx(sub { $_ eq $archive }, @$archives); +} + +sub handle_removed_archives { + my $db = shift; + my $archives = shift; + my $previous_archives = shift; + my $borg_archives = shift; + + my $start = Time::HiRes::gettimeofday(); + + # TODO this name is slightly confusing, but it works as expected and + # returns elements that are in the previous list, but missing in the new + # one + my $remove_archives = get_missing_items($borg_archives, $previous_archives); + + if (@$remove_archives) { + for my $archive (@$remove_archives) { + my $archive_index = get_archive_index($archive, $archives); + debug(sprintf("Removing archive %s at index %d", $archive, $archive_index)); + + while (my ($path, $data) = each %$db) { + # TODO remove archive indexes all at once + splice @$data, $archive_index, 1; + $db->{$path} = sanitize_db_data($data); + } + splice @$archives, $archive_index, 1; + } + + clean_db($db); + compact_db(); + + my $end = Time::HiRes::gettimeofday(); + debug(sprintf("Removing archives finished after: %.5fs", $end - $start)); + } +} + +sub sanitize_db_data { + my $data = shift; + + my @ret; + + for my $item (@$data) { + if (defined($item)) { + push @ret, $item + 0; + } else { + push @ret, undef; + } + } + + return \@ret; +} + +sub handle_added_archives { + my $db = shift; + my $archives = shift; + my $borg_archives = shift; + + my $add_archives = get_missing_items($archives, $borg_archives); + push @$archives, @$add_archives; + + for my $archive (@$add_archives) { + my $start = Time::HiRes::gettimeofday(); + my $archive_index = get_archive_index($archive, $archives); + my $lookuptable = [{}, 0]; + + debug(sprintf("Adding archive %s at index %d", $archive, $archive_index)); + + # FIXME: remove /dev/null redirect + my $proc = start [qw(borg list --list-format), '{isomtime} {path}{NEWLINE}', "::".$archive], ">pipe", \*OUT; + #my $counter = 20; + while () { + #close OUT if $counter--<0; + # roll our own parsing of timestamps for speed since we will be parsing + # a huge number of lines here + # example timestamp: "Wed, 2016-01-27 10:31:59" + if (m/^.{4} (?....)-(?..)-(?..) (?..):(?..):(?..) (?.+)$/) { + my $time = POSIX::mktime($+{second},$+{minute},$+{hour},$+{day},$+{month}-1,$+{year}-1900); + #debug(sprintf("Adding path %s with time %s", $+{path}, $time)); + add_path_to_hash($lookuptable, $+{path}, $time); + } + } + $proc->finish() or die "borg list returned $?"; + #$proc->finish(); + + #say "Total size lookup table: ", total_size($lookuptable); + #say "Total size output hash: ", total_size(\%output); + + save_node($db, $archive_index, undef, $lookuptable); + compact_db(); + my $end = Time::HiRes::gettimeofday(); + debug(sprintf("Adding archive finished after: %.5fs", $end - $start)); + #print Dumper($db); + } +} + +sub build_archive_cache { + my $borg_archives = borg_list(); + my $db_path = get_cache_path('archives.db'); + my $archive_cache = get_cache_path('archive_list'); + my $previous_archives = []; + my $archives = []; + + if (! -f $db_path and ! -f $archive_cache) { + debug("Unable to find both, archive list and database. Creating new ones"); + } else { + $previous_archives = retrieve $archive_cache or die "Failed to read archive list from cache: $!"; + } + + # start with the contents of the cache file and update it as we change the db + $archives = $previous_archives; + + # ensure the cache directory exists + mkpath(get_cache_dir(), {mode => 0700}); + + # TODO when the database is updated, create a temporary copy, create a copy + # of the settings database and then rename both. + # TODO save database to real location each time an archive has been added or removed + my %db; + tie %db, 'MLDBM', $db_path, O_RDWR|O_CREAT, 0600 or die "Failed to open db file: $!"; + + handle_removed_archives(\%db, $archives, $previous_archives, $borg_archives); + handle_added_archives(\%db, $archives, $borg_archives); + + print Dumper(0+keys %db, $db{"home/flo/TODO"}); + untie %db; + + store $archives, $archive_cache or die "Failed to save archive list to cache: $!"; + # TODO rename temp caches here +} + +sub save_node { + my $db = shift; + my $archive_index = shift; + my $prefix = shift; + my $node = shift; + + for my $child (keys %{$$node[0]}) { + my $path; + $path = $prefix."/" if defined($prefix); + $path .= $child; + + my $data = $db->{$path}; + + if (!defined($data)) { + $data = []; + } + $$data[$archive_index] = $$node[0]->{$child}[1]; + + $db->{$path} = sanitize_db_data($data); + + save_node($db, $archive_index, $path, $$node[0]->{$child}); + } +} + +sub get_mtime_from_lookuptable { + my $lookuptable = shift; + my $path = shift; + + my @components = split /\//, $path; + my $node = $lookuptable; + + for my $component (@components) { + $node = $$node[0]->{$component}; + if (!defined($node)) { + return undef; + } + } + return $$node[1]; +} + +sub clean_db { + my $db = shift; + + while (my ($path, $data) = each %$db) { + # check if data is empty or all fields in data are undef + if (!@$data || all { !defined($_) } @$data) { + debug("Deleting path because it's not part of any archive: ", $path); + delete $db->{$path}; + } + } +} + +sub compact_db { + my $db_path = get_cache_path("archives.db"); + run([qw(echo reorganize)], "|", ["gdbmtool", $db_path]) or die "Failed to reorganize database: $!"; +} + +sub update_cache { + debug("Checking if cache is complete"); + build_archive_cache(); + debug("Cache complete"); +} + +sub main { + # untaint PATH because we only expect this to run as root + $ENV{PATH} = untaint($ENV{PATH}, qr(.*)); + + Getopt::Long::Configure ("bundling"); + GetOptions(\%opts, "help|h", "debug", "update-cache|u") or pod2usage(2); + pod2usage(0) if $opts{help}; + + if ($opts{"update-cache"}) { + update_cache(); + return 0; + } + + pod2usage(-verbose => 0) if (@ARGV== 0); + + my @paths = @ARGV; + + #if (@paths > 1) { + #say STDERR "ERROR: more than one path is currently not supported"; + #exit 1; + #} + + for my $path (@paths) { + my $abs_path = abs_path($path); + my $destination = $abs_path; + my $backup_path = $abs_path; + $backup_path =~ s/^\Q$backup_prefix\E//; + + debug( "Asked to restore $backup_path to $destination"); + + my $archives = find_archives($backup_path); + + my $selected_archive = select_archive($archives); + next if not defined($selected_archive); + + restore($backup_path, $selected_archive, $destination); + } + + return 0; +} + +exit main() -- cgit v1.2.3-24-g4f1b