diff options
author | Niko Tyni <ntyni@iki.fi> | 2005-09-14 21:44:00 +0200 |
---|---|---|
committer | Niko Tyni <ntyni@iki.fi> | 2005-09-14 21:44:00 +0200 |
commit | f0021857e52510645acf59997de6b5ee952186c3 (patch) | |
tree | ff20064d52eebad9d83ca9d1cebeff62e9795c3a /lib/Smokeping.pm | |
parent | d788fb4fa54adfb8cc2d8d0344bdb9a7af2d642d (diff) | |
download | smokeping-f0021857e52510645acf59997de6b5ee952186c3.tar.gz smokeping-f0021857e52510645acf59997de6b5ee952186c3.tar.xz |
* (trunk)/
lib/Smokeping.pm,
bin/smokeping.dist,
CHANGES,
TODO:
+ reload the config file on HUP signal and bail out if it's broken.
After reloading, restart gracefully so that no measurement is
interrupted. The signal can also be triggered by 'smokeping --reload'.
Diffstat (limited to 'lib/Smokeping.pm')
-rw-r--r-- | lib/Smokeping.pm | 206 |
1 files changed, 166 insertions, 40 deletions
diff --git a/lib/Smokeping.pm b/lib/Smokeping.pm index e1de056..9ae9c63 100644 --- a/lib/Smokeping.pm +++ b/lib/Smokeping.pm @@ -1776,6 +1776,7 @@ be appended to the process name as '[probe]', eg. '/usr/bin/smokeping [FPing]'. If you don't like this behaviour, set this variable to 'no'. If 'concurrentprobes' is not set to 'yes', this variable has no effect. DOC + _default => 'yes', }, tmail => { @@ -2395,16 +2396,23 @@ sub get_config ($$){ return $parser->parse( $cfgfile ) || die "ERROR: $parser->{err}\n"; } -sub kill_smoke ($) { +sub kill_smoke ($$) { my $pidfile = shift; + my $signal = shift; if (defined $pidfile){ if ( -f $pidfile && open PIDFILE, "<$pidfile" ) { <PIDFILE> =~ /(\d+)/; my $pid = $1; - kill 2, $pid if kill 0, $pid; - sleep 3; # let it die - die "ERROR: Can not stop running instance of SmokePing ($pid)\n" - if kill 0, $pid; + if ($signal == SIGINT || $signal == SIGTERM) { + kill $signal, $pid if kill 0, $pid; + sleep 3; # let it die + die "ERROR: Can not stop running instance of SmokePing ($pid)\n" + if kill 0, $pid; + } else { + die "ERROR: no instance of SmokePing running (pid $pid)?\n" + unless kill 0, $pid; + kill $signal, $pid; + } close PIDFILE; } else { die "ERROR: Can not read pid from $pidfile: $!\n"; @@ -2487,7 +2495,9 @@ sub daemonize_me ($) { } sub do_syslog ($){ - syslog("$syslog_facility|$syslog_priority", shift); + my $str = shift; + $str =~ s,%,%%,g; + syslog("$syslog_facility|$syslog_priority", $str); } sub do_cgilog ($){ @@ -2540,7 +2550,9 @@ sub load_cfg ($) { $cfg->{__probes} = $probes; init_alerts $cfg if $cfg->{Alerts}; init_target_tree $cfg, $probes, $cfg->{Targets}, $cfg->{General}{datadir}; - } + } else { + do_log("Config file unmodified, skipping reload") unless $cgimode; + } } @@ -2762,6 +2774,40 @@ sub verify_cfg { print "Configuration file '$cfgfile' syntax OK.\n"; } +sub make_kid { + my $sleep_count = 0; + my $pid; + do { + $pid = fork; + unless (defined $pid) { + do_log("Fatal: cannot fork: $!"); + die "bailing out" + if $sleep_count++ > 6; + sleep 10; + } + } until defined $pid; + srand(); + return $pid; +} + +sub start_probes { + my $pids = shift; + my $pid; + my $myprobe; + for my $p (keys %$probes) { + if ($probes->{$p}->target_count == 0) { + do_log("No targets defined for probe $p, skipping."); + next; + } + $pid = make_kid(); + $myprobe = $p; + $pids->{$pid} = $p; + last unless $pid; + do_log("Child process $pid started for probe $p."); + } + return $pid; +} + sub main (;$) { $cgimode = 0; umask 022; @@ -2769,7 +2815,7 @@ sub main (;$) { $opt{filter}=[]; GetOptions(\%opt, 'version', 'email', 'man:s','help','logfile=s','static-pages:s', 'debug-daemon', 'nosleep', 'makepod:s','debug','restart', 'filter=s', 'nodaemon|nodemon', - 'config=s', 'check', 'gen-examples') or pod2usage(2); + 'config=s', 'check', 'gen-examples', 'reload') or pod2usage(2); if($opt{version}) { print "$RCS_VERSION\n"; exit(0) }; if(exists $opt{man}) { if ($opt{man}) { @@ -2802,7 +2848,12 @@ sub main (;$) { load_cfg $cfgfile; if(defined $opt{'static-pages'}) { makestaticpages $cfg, $opt{'static-pages'}; exit 0 }; if($opt{email}) { enable_dynamic $cfg, $cfg->{Targets},"",""; exit 0 }; - if($opt{restart}) { kill_smoke $cfg->{General}{piddir}."/smokeping.pid";}; + if($opt{restart}) { kill_smoke $cfg->{General}{piddir}."/smokeping.pid", SIGINT;}; + if($opt{reload}) { + kill_smoke $cfg->{General}{piddir}."/smokeping.pid", SIGHUP; + print "HUP signal sent to the running SmokePing process, exiting.\n"; + exit 0; + }; if($opt{logfile}) { initialize_filelog($opt{logfile}) }; if (not keys %$probes) { do_log("No probes defined, exiting."); @@ -2817,35 +2868,22 @@ sub main (;$) { } do_log "Smokeping version $VERSION successfully launched."; +RESTART: my $myprobe; + my $multiprocessmode; my $forkprobes = $cfg->{General}{concurrentprobes} || 'yes'; if ($forkprobes eq "yes" and keys %$probes > 1 and not $opt{debug}) { + $multiprocessmode = 1; my %probepids; my $pid; do_log("Entering multiprocess mode."); - for my $p (keys %$probes) { - if ($probes->{$p}->target_count == 0) { - do_log("No targets defined for probe $p, skipping."); - next; - } - my $sleep_count = 0; - do { - $pid = fork; - unless (defined $pid) { - do_log("Fatal: cannot fork: $!"); - die "bailing out" - if $sleep_count++ > 6; - sleep 10; - } - } until defined $pid; - $myprobe = $p; - goto KID unless $pid; # child skips rest of loop - do_log("Child process $pid started for probe $myprobe."); - $probepids{$pid} = $myprobe; - } + $pid = start_probes(\%probepids); + $myprobe = $probepids{$pid}; + goto KID unless $pid; # child skips rest of loop # parent do_log("All probe processes started successfully."); my $exiting = 0; + my $reloading = 0; for my $sig (qw(INT TERM)) { $SIG{$sig} = sub { do_log("Got $sig signal, terminating child processes."); @@ -2854,7 +2892,7 @@ sub main (;$) { my $now = time; while(keys %probepids) { # SIGCHLD handler below removes the keys if (time - $now > 2) { - do_log("Can't terminate all child processes, giving up."); + do_log("Fatal: can't terminate all child processes, giving up."); exit 1; } sleep 1; @@ -2868,14 +2906,64 @@ sub main (;$) { my $p = $probepids{$dead}; $p = 'unknown' unless defined $p; do_log("Child process $dead (probe $p) exited unexpectedly with status $?.") - unless $exiting; + unless $exiting or $reloading; delete $probepids{$dead}; } }; - sleep while 1; # just wait for the signals + my $gothup = 0; + $SIG{HUP} = sub { + do_debuglog("Got HUP signal."); + $gothup = 1; + }; + while (1) { # just wait for the signals + sleep; + next unless $gothup; + $reloading = 1; + $gothup = 0; + my $oldprobes = $probes; + $reloading = 0, next unless reload_cfg($cfgfile); + do_debuglog("Restarting probe processes " . join(",", keys %probepids) . "."); + kill SIGHUP, $_ for (keys %probepids); + my $i=0; + while (keys %probepids) { + sleep 1; + if ($i % 10 == 0) { + do_log("Waiting for child processes to terminate."); + } + $i++; + my %termsent; + for (keys %probepids) { + my $step = $oldprobes->{$probepids{$_}}->step; + if ($i > $step) { + do_log("Child process $_ took over its step value to terminate, killing it with SIGTERM"); + if (kill SIGTERM, $_ == 0 and exists $probepids{$_}) { + do_log("Fatal: Child process $_ has disappeared? This shouldn't happen. Giving up."); + exit 1; + } else { + $termsent{$_} = time; + } + } + for (keys %termsent) { + if (exists $probepids{$_}) { + if (time() - $termsent{$_} > 2) { + do_log("Fatal: Child process $_ took over 2 seconds to exit on TERM signal. Giving up."); + exit 1; + } + } else { + delete $termsent{$_}; + } + } + } + } + $reloading = 0; + do_log("Child processes terminated, restarting with new configuration."); + $SIG{CHLD} = 'DEFAULT'; # restore + goto RESTART; + } do_log("Exiting abnormally - this should not happen."); exit 1; # not reached } else { + $multiprocessmode = 0; if ($forkprobes ne "yes") { do_log("Not entering multiprocess mode because the 'concurrentprobes' variable is not set."); for my $p (keys %$probes) { @@ -2890,21 +2978,26 @@ sub main (;$) { do_log("Not entering multiprocess mode for just a single probe."); $myprobe = (keys %$probes)[0]; # this way we won't ignore a probe-specific step parameter } - for my $sig (qw(INT TERM)) { - $SIG{$sig} = sub { - do_log("Got $sig signal, terminating."); - exit 1; - } - } } KID: my $offset; my $step; + my $gothup = 0; + my $changeprocessnames = $cfg->{General}{changeprocessnames} ne "no"; + $SIG{HUP} = sub { + do_log("Got HUP signal, " . ($multiprocessmode ? "exiting" : "restarting") . " gracefully."); + $gothup = 1; + }; + for my $sig (qw(INT TERM)) { + $SIG{$sig} = sub { + do_log("got $sig signal, terminating."); + exit 1; + } + } if (defined $myprobe) { $offset = $probes->{$myprobe}->offset || 'random'; $step = $probes->{$myprobe}->step; - $0 .= " [$myprobe]" unless defined $cfg->{General}{changeprocessnames} - and $cfg->{General}{changeprocessnames} eq "no"; + $0 .= " [$myprobe]" if $changeprocessnames; } else { $offset = $cfg->{General}{offset} || 'random'; $step = $cfg->{Database}{step}; @@ -2935,6 +3028,7 @@ KID: do_debuglog("Sleeping $sleeptime seconds."); } sleep $sleeptime; + last if checkhup($multiprocessmode, $gothup) && reload_cfg($cfgfile); } my $now = time; run_probes $probes, $myprobe; # $myprobe is undef if running without 'concurrentprobes' @@ -2951,9 +3045,41 @@ KID: do_log($warn); } } + last if checkhup($multiprocessmode, $gothup) && reload_cfg($cfgfile); } + $0 =~ s/ \[$myprobe\]$// if $changeprocessnames; + goto RESTART; +} + +sub checkhup ($$) { + my $multiprocessmode = shift; + my $gothup = shift; + if ($gothup) { + if ($multiprocessmode) { + do_log("Exiting due to HUP signal."); + exit 0; + } else { + do_log("Restarting due to HUP signal."); + return 1; + } + } + return 0; } +sub reload_cfg ($) { + my $cfgfile = shift; + my ($oldcfg, $oldprobes) = ($cfg, $probes); + do_log("Reloading configuration."); + eval { load_cfg($cfgfile) }; + if ($@) { + do_log("Reloading configuration from $cfgfile failed: $@"); + ($cfg, $probes) = ($oldcfg, $oldprobes); + return 0; + } + return 1; +} + + sub gen_imgs ($){ my $cfg = shift; |