summaryrefslogtreecommitdiffstats
path: root/borg-restore.pl
diff options
context:
space:
mode:
authorFlorian Pritz <bluewind@xinu.at>2016-08-08 00:12:52 +0200
committerFlorian Pritz <bluewind@xinu.at>2016-08-08 00:12:52 +0200
commit489f2eefcdfd5f5f6acf6628404aa5aa9c44beee (patch)
tree566c25084d6f7f9634bc7de7d251bb2e15076ebd /borg-restore.pl
parent40d80853ce253690d1934e77c64076b6c1f038dd (diff)
downloadbin-489f2eefcdfd5f5f6acf6628404aa5aa9c44beee.tar.gz
bin-489f2eefcdfd5f5f6acf6628404aa5aa9c44beee.tar.xz
Add WIP version of borg-restore.pl
Signed-off-by: Florian Pritz <bluewind@xinu.at>
Diffstat (limited to 'borg-restore.pl')
-rwxr-xr-xborg-restore.pl455
1 files changed, 455 insertions, 0 deletions
diff --git a/borg-restore.pl b/borg-restore.pl
new file mode 100755
index 0000000..dd76dee
--- /dev/null
+++ b/borg-restore.pl
@@ -0,0 +1,455 @@
+#!/usr/bin/perl -T
+use warnings;
+use strict;
+
+=head1 NAME
+
+borg-restore.pl - Restore paths from borg backups
+
+=head1 DESCRIPTION
+
+Script that helps to restore files from borg backups.
+
+Takes one or more paths, looks for their backups, shows a list of distinct
+versions and allows to select one to be restored.
+
+=cut
+
+=head1 SYNOPSIS
+
+borg-restore.pl [options] <path>
+
+ Options:
+ --help, -h short help message
+ --debug show debug messages
+ --update-cache, -u update cache files
+
+=cut
+
+use v5.10;
+
+package main;
+
+use autodie;
+use Cwd qw(abs_path);
+use Data::Dumper;
+use DateTime;
+use File::Basename;
+use File::Path qw(mkpath);
+use File::Slurp;
+use Getopt::Long;
+use IO::Compress::Gzip qw($GzipError);
+use IPC::Run qw(run start);
+use Pod::Usage;
+use DB_File;
+use MLDBM qw(GDBM_File Storable);
+use Time::HiRes;
+use Devel::Size qw(total_size);
+use Storable;
+use List::MoreUtils qw(firstidx);
+use List::Util qw(any all);
+
+my %opts;
+my $cache_path_base = "/home/flo/.cache/borg-restore.pl";
+my $backup_prefix = "/";
+
+sub debug {
+ say STDERR @_ if $opts{debug};
+}
+
+sub untaint {
+ my $data = shift;
+ my $regex = shift;
+
+ $data =~ m/^($regex)$/ or die "Failed to untaint: $data";
+ return $1;
+}
+
+sub borg_list {
+ my @archives;
+
+ run [qw(borg list)], '>', \my $output or die "borg list returned $?";
+
+ for (split/^/, $output) {
+ if (m/^([^\s]+)\s/) {
+ push @archives, $1;
+ }
+ }
+
+ splice @archives, 4;
+ #splice @archives, 1, 1;
+
+ return \@archives;
+}
+
+sub find_archives {
+ my $path = shift;
+
+ my $archives = retrieve get_cache_path('archive_list') or die "Failed to read cache file: $!";
+
+ my %db;
+ tie %db, 'MLDBM', get_cache_path("archives.db"), O_RDWR, 0600 or die "Failed to open database: $!";
+ my $modtimes = $db{$path};
+ untie %db;
+
+ my $last_modtime;
+ my @ret;
+
+ debug("Building archive list");
+
+ for my $archive (@$archives) {
+ my $modtime = $$modtimes[get_archive_index($archive, $archives)];
+
+ if (defined($modtime) && (!defined($last_modtime) || $modtime > $last_modtime)) {
+ push @ret, {
+ modification_time => $modtime,
+ archive => $archive,
+ };
+ $last_modtime = $modtime;
+ }
+ }
+
+ if (!@ret) {
+ printf "\e[0;91mWarning:\e[0m Path '%s' not found in any archive.\n", $path;
+ }
+
+ return \@ret;
+}
+
+sub select_archive {
+ my $archives = shift;
+
+ my $selected_archive;
+
+ my $counter = 0;
+
+ if (!@$archives) {
+ return undef;
+ }
+
+ for my $archive (@$archives) {
+ my $dt = DateTime->from_epoch(epoch => $archive->{modification_time});
+ printf "\e[0;33m%d: \e[1;33m%s\e[0m %s\n", $counter++, $dt->strftime("%a. %F %H:%M:%S"), $archive->{archive};
+ }
+
+ printf "\e[0;34m%s: \e[0m", "Enter ID to restore (Enter to skip)";
+ my $selection = <STDIN>;
+ return undef if !defined($selection);
+ chomp $selection;
+
+ return undef unless ($selection =~ /^\d+$/ && defined(${$archives}[$selection]));
+ return ${$archives}[$selection];
+}
+
+sub restore {
+ my $path = shift;
+ my $archive = shift;
+ my $destination = shift;
+
+ printf "Restoring %s to %s from archive %s\n", $path, $destination, $archive->{archive};
+
+ $destination = untaint($destination, qr(.*));
+ $path = untaint($path, qr(.*));
+
+ my $components_to_strip =()= dirname($destination) =~ /\//g;
+
+ chdir dirname($destination);
+ #File::Path::remove_tree("restore-test");
+ #mkdir "restore-test";
+ #chdir "restore-test";
+ system(qw(echo borg extract -v --strip-components), $components_to_strip, "::".$archive->{archive}, $path);
+}
+
+sub get_cache_dir {
+ return "$cache_path_base/v1";
+}
+
+sub get_cache_path {
+ my $item = shift;
+ return get_cache_dir()."/$item";
+}
+
+sub add_path_to_hash {
+ my $hash = shift;
+ my $path = shift;
+ my $time = shift;
+
+ my @components = split /\//, $path;
+
+ my $node = $hash;
+
+ if ($path eq ".") {
+ if ($time > $$node[1]) {
+ $$node[1] = $time;
+ }
+ return;
+ }
+
+ # each node is an arrayref of the format [$hashref_of_children, $mtime]
+ # $hashref_of_children is undef if there are no children
+ for my $component (@components) {
+ if (!defined($$node[0]->{$component})) {
+ $$node[0]->{$component} = [undef, $time];
+ }
+ # update mtime per child
+ if ($time > $$node[1]) {
+ $$node[1] = $time;
+ }
+ $node = $$node[0]->{$component};
+ }
+}
+
+sub get_missing_items {
+ my $have = shift;
+ my $want = shift;
+
+ my $ret = [];
+
+ for my $item (@$want) {
+ my $exists = any { $_ eq $item } @$have;
+ push @$ret, $item if not $exists;
+ }
+
+ return $ret;
+}
+
+sub get_archive_index {
+ my $archive = shift;
+ my $archives = shift;
+
+ return firstidx(sub { $_ eq $archive }, @$archives);
+}
+
+sub handle_removed_archives {
+ my $db = shift;
+ my $archives = shift;
+ my $previous_archives = shift;
+ my $borg_archives = shift;
+
+ my $start = Time::HiRes::gettimeofday();
+
+ # TODO this name is slightly confusing, but it works as expected and
+ # returns elements that are in the previous list, but missing in the new
+ # one
+ my $remove_archives = get_missing_items($borg_archives, $previous_archives);
+
+ if (@$remove_archives) {
+ for my $archive (@$remove_archives) {
+ my $archive_index = get_archive_index($archive, $archives);
+ debug(sprintf("Removing archive %s at index %d", $archive, $archive_index));
+
+ while (my ($path, $data) = each %$db) {
+ # TODO remove archive indexes all at once
+ splice @$data, $archive_index, 1;
+ $db->{$path} = sanitize_db_data($data);
+ }
+ splice @$archives, $archive_index, 1;
+ }
+
+ clean_db($db);
+ compact_db();
+
+ my $end = Time::HiRes::gettimeofday();
+ debug(sprintf("Removing archives finished after: %.5fs", $end - $start));
+ }
+}
+
+sub sanitize_db_data {
+ my $data = shift;
+
+ my @ret;
+
+ for my $item (@$data) {
+ if (defined($item)) {
+ push @ret, $item + 0;
+ } else {
+ push @ret, undef;
+ }
+ }
+
+ return \@ret;
+}
+
+sub handle_added_archives {
+ my $db = shift;
+ my $archives = shift;
+ my $borg_archives = shift;
+
+ my $add_archives = get_missing_items($archives, $borg_archives);
+ push @$archives, @$add_archives;
+
+ for my $archive (@$add_archives) {
+ my $start = Time::HiRes::gettimeofday();
+ my $archive_index = get_archive_index($archive, $archives);
+ my $lookuptable = [{}, 0];
+
+ debug(sprintf("Adding archive %s at index %d", $archive, $archive_index));
+
+ # FIXME: remove /dev/null redirect
+ my $proc = start [qw(borg list --list-format), '{isomtime} {path}{NEWLINE}', "::".$archive], ">pipe", \*OUT;
+ #my $counter = 20;
+ while (<OUT>) {
+ #close OUT if $counter--<0;
+ # roll our own parsing of timestamps for speed since we will be parsing
+ # a huge number of lines here
+ # example timestamp: "Wed, 2016-01-27 10:31:59"
+ if (m/^.{4} (?<year>....)-(?<month>..)-(?<day>..) (?<hour>..):(?<minute>..):(?<second>..) (?<path>.+)$/) {
+ my $time = POSIX::mktime($+{second},$+{minute},$+{hour},$+{day},$+{month}-1,$+{year}-1900);
+ #debug(sprintf("Adding path %s with time %s", $+{path}, $time));
+ add_path_to_hash($lookuptable, $+{path}, $time);
+ }
+ }
+ $proc->finish() or die "borg list returned $?";
+ #$proc->finish();
+
+ #say "Total size lookup table: ", total_size($lookuptable);
+ #say "Total size output hash: ", total_size(\%output);
+
+ save_node($db, $archive_index, undef, $lookuptable);
+ compact_db();
+ my $end = Time::HiRes::gettimeofday();
+ debug(sprintf("Adding archive finished after: %.5fs", $end - $start));
+ #print Dumper($db);
+ }
+}
+
+sub build_archive_cache {
+ my $borg_archives = borg_list();
+ my $db_path = get_cache_path('archives.db');
+ my $archive_cache = get_cache_path('archive_list');
+ my $previous_archives = [];
+ my $archives = [];
+
+ if (! -f $db_path and ! -f $archive_cache) {
+ debug("Unable to find both, archive list and database. Creating new ones");
+ } else {
+ $previous_archives = retrieve $archive_cache or die "Failed to read archive list from cache: $!";
+ }
+
+ # start with the contents of the cache file and update it as we change the db
+ $archives = $previous_archives;
+
+ # ensure the cache directory exists
+ mkpath(get_cache_dir(), {mode => 0700});
+
+ # TODO when the database is updated, create a temporary copy, create a copy
+ # of the settings database and then rename both.
+ # TODO save database to real location each time an archive has been added or removed
+ my %db;
+ tie %db, 'MLDBM', $db_path, O_RDWR|O_CREAT, 0600 or die "Failed to open db file: $!";
+
+ handle_removed_archives(\%db, $archives, $previous_archives, $borg_archives);
+ handle_added_archives(\%db, $archives, $borg_archives);
+
+ print Dumper(0+keys %db, $db{"home/flo/TODO"});
+ untie %db;
+
+ store $archives, $archive_cache or die "Failed to save archive list to cache: $!";
+ # TODO rename temp caches here
+}
+
+sub save_node {
+ my $db = shift;
+ my $archive_index = shift;
+ my $prefix = shift;
+ my $node = shift;
+
+ for my $child (keys %{$$node[0]}) {
+ my $path;
+ $path = $prefix."/" if defined($prefix);
+ $path .= $child;
+
+ my $data = $db->{$path};
+
+ if (!defined($data)) {
+ $data = [];
+ }
+ $$data[$archive_index] = $$node[0]->{$child}[1];
+
+ $db->{$path} = sanitize_db_data($data);
+
+ save_node($db, $archive_index, $path, $$node[0]->{$child});
+ }
+}
+
+sub get_mtime_from_lookuptable {
+ my $lookuptable = shift;
+ my $path = shift;
+
+ my @components = split /\//, $path;
+ my $node = $lookuptable;
+
+ for my $component (@components) {
+ $node = $$node[0]->{$component};
+ if (!defined($node)) {
+ return undef;
+ }
+ }
+ return $$node[1];
+}
+
+sub clean_db {
+ my $db = shift;
+
+ while (my ($path, $data) = each %$db) {
+ # check if data is empty or all fields in data are undef
+ if (!@$data || all { !defined($_) } @$data) {
+ debug("Deleting path because it's not part of any archive: ", $path);
+ delete $db->{$path};
+ }
+ }
+}
+
+sub compact_db {
+ my $db_path = get_cache_path("archives.db");
+ run([qw(echo reorganize)], "|", ["gdbmtool", $db_path]) or die "Failed to reorganize database: $!";
+}
+
+sub update_cache {
+ debug("Checking if cache is complete");
+ build_archive_cache();
+ debug("Cache complete");
+}
+
+sub main {
+ # untaint PATH because we only expect this to run as root
+ $ENV{PATH} = untaint($ENV{PATH}, qr(.*));
+
+ Getopt::Long::Configure ("bundling");
+ GetOptions(\%opts, "help|h", "debug", "update-cache|u") or pod2usage(2);
+ pod2usage(0) if $opts{help};
+
+ if ($opts{"update-cache"}) {
+ update_cache();
+ return 0;
+ }
+
+ pod2usage(-verbose => 0) if (@ARGV== 0);
+
+ my @paths = @ARGV;
+
+ #if (@paths > 1) {
+ #say STDERR "ERROR: more than one path is currently not supported";
+ #exit 1;
+ #}
+
+ for my $path (@paths) {
+ my $abs_path = abs_path($path);
+ my $destination = $abs_path;
+ my $backup_path = $abs_path;
+ $backup_path =~ s/^\Q$backup_prefix\E//;
+
+ debug( "Asked to restore $backup_path to $destination");
+
+ my $archives = find_archives($backup_path);
+
+ my $selected_archive = select_archive($archives);
+ next if not defined($selected_archive);
+
+ restore($backup_path, $selected_archive, $destination);
+ }
+
+ return 0;
+}
+
+exit main()