summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGES5
-rw-r--r--TODO9
-rwxr-xr-xbin/smokeping.dist3
-rw-r--r--lib/Smokeping.pm206
4 files changed, 174 insertions, 49 deletions
diff --git a/CHANGES b/CHANGES
index 86f19f2..405a623 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,8 @@
+* reload the config file on HUP signal and bail out if it's broken.
+ After reloading, restart gracefully so that no measurement is
+ interrupted. The signal can also be triggered by 'smokeping --reload'.
+ -- niko, suggested by Taisuke Yamada <tyamadajp *list.rakugaki.org>
+
2005/9/14 -- released version 20050914_trunk
* made some alert docs in smokeping_config visible --niko
diff --git a/TODO b/TODO
index cd6e866..a2cb5d0 100644
--- a/TODO
+++ b/TODO
@@ -20,15 +20,6 @@
this through) visible (eg. different colour) in the CGI menu
- suggested by Cornel Badea <cornel *sbhost.ro>
-* DAEMON
- reread config periodically or with SIGHUP so that
- no measurement is interrupted
- - suggested by Taisuke Yamada,
- <http://lists.ee.ethz.ch/smokeping-users/msg01445.html>
- - concurrent probe processes need a signal anyway,
- they have to exit and new ones started so that
- we don't have to compare to the old config
-
* REMOTE EXECUTION
generic remote probe
- a possibility for basefork-derived probes to reuse the same
diff --git a/bin/smokeping.dist b/bin/smokeping.dist
index 5c50cb8..9980369 100755
--- a/bin/smokeping.dist
+++ b/bin/smokeping.dist
@@ -38,6 +38,9 @@ B<smokeping> [ B<--email> | B<--makepod> | B<--version> | B<--restart> ]
--restart Restart SmokePing
+ --reload Reload configuration in the running process without interrupting
+ any probes
+
--nodaemon Do no daemonize the process (no fork)
--filter=x Only measure entries which pass the filter x
diff --git a/lib/Smokeping.pm b/lib/Smokeping.pm
index e1de056..9ae9c63 100644
--- a/lib/Smokeping.pm
+++ b/lib/Smokeping.pm
@@ -1776,6 +1776,7 @@ be appended to the process name as '[probe]', eg. '/usr/bin/smokeping
[FPing]'. If you don't like this behaviour, set this variable to 'no'.
If 'concurrentprobes' is not set to 'yes', this variable has no effect.
DOC
+ _default => 'yes',
},
tmail =>
{
@@ -2395,16 +2396,23 @@ sub get_config ($$){
return $parser->parse( $cfgfile ) || die "ERROR: $parser->{err}\n";
}
-sub kill_smoke ($) {
+sub kill_smoke ($$) {
my $pidfile = shift;
+ my $signal = shift;
if (defined $pidfile){
if ( -f $pidfile && open PIDFILE, "<$pidfile" ) {
<PIDFILE> =~ /(\d+)/;
my $pid = $1;
- kill 2, $pid if kill 0, $pid;
- sleep 3; # let it die
- die "ERROR: Can not stop running instance of SmokePing ($pid)\n"
- if kill 0, $pid;
+ if ($signal == SIGINT || $signal == SIGTERM) {
+ kill $signal, $pid if kill 0, $pid;
+ sleep 3; # let it die
+ die "ERROR: Can not stop running instance of SmokePing ($pid)\n"
+ if kill 0, $pid;
+ } else {
+ die "ERROR: no instance of SmokePing running (pid $pid)?\n"
+ unless kill 0, $pid;
+ kill $signal, $pid;
+ }
close PIDFILE;
} else {
die "ERROR: Can not read pid from $pidfile: $!\n";
@@ -2487,7 +2495,9 @@ sub daemonize_me ($) {
}
sub do_syslog ($){
- syslog("$syslog_facility|$syslog_priority", shift);
+ my $str = shift;
+ $str =~ s,%,%%,g;
+ syslog("$syslog_facility|$syslog_priority", $str);
}
sub do_cgilog ($){
@@ -2540,7 +2550,9 @@ sub load_cfg ($) {
$cfg->{__probes} = $probes;
init_alerts $cfg if $cfg->{Alerts};
init_target_tree $cfg, $probes, $cfg->{Targets}, $cfg->{General}{datadir};
- }
+ } else {
+ do_log("Config file unmodified, skipping reload") unless $cgimode;
+ }
}
@@ -2762,6 +2774,40 @@ sub verify_cfg {
print "Configuration file '$cfgfile' syntax OK.\n";
}
+sub make_kid {
+ my $sleep_count = 0;
+ my $pid;
+ do {
+ $pid = fork;
+ unless (defined $pid) {
+ do_log("Fatal: cannot fork: $!");
+ die "bailing out"
+ if $sleep_count++ > 6;
+ sleep 10;
+ }
+ } until defined $pid;
+ srand();
+ return $pid;
+}
+
+sub start_probes {
+ my $pids = shift;
+ my $pid;
+ my $myprobe;
+ for my $p (keys %$probes) {
+ if ($probes->{$p}->target_count == 0) {
+ do_log("No targets defined for probe $p, skipping.");
+ next;
+ }
+ $pid = make_kid();
+ $myprobe = $p;
+ $pids->{$pid} = $p;
+ last unless $pid;
+ do_log("Child process $pid started for probe $p.");
+ }
+ return $pid;
+}
+
sub main (;$) {
$cgimode = 0;
umask 022;
@@ -2769,7 +2815,7 @@ sub main (;$) {
$opt{filter}=[];
GetOptions(\%opt, 'version', 'email', 'man:s','help','logfile=s','static-pages:s', 'debug-daemon',
'nosleep', 'makepod:s','debug','restart', 'filter=s', 'nodaemon|nodemon',
- 'config=s', 'check', 'gen-examples') or pod2usage(2);
+ 'config=s', 'check', 'gen-examples', 'reload') or pod2usage(2);
if($opt{version}) { print "$RCS_VERSION\n"; exit(0) };
if(exists $opt{man}) {
if ($opt{man}) {
@@ -2802,7 +2848,12 @@ sub main (;$) {
load_cfg $cfgfile;
if(defined $opt{'static-pages'}) { makestaticpages $cfg, $opt{'static-pages'}; exit 0 };
if($opt{email}) { enable_dynamic $cfg, $cfg->{Targets},"",""; exit 0 };
- if($opt{restart}) { kill_smoke $cfg->{General}{piddir}."/smokeping.pid";};
+ if($opt{restart}) { kill_smoke $cfg->{General}{piddir}."/smokeping.pid", SIGINT;};
+ if($opt{reload}) {
+ kill_smoke $cfg->{General}{piddir}."/smokeping.pid", SIGHUP;
+ print "HUP signal sent to the running SmokePing process, exiting.\n";
+ exit 0;
+ };
if($opt{logfile}) { initialize_filelog($opt{logfile}) };
if (not keys %$probes) {
do_log("No probes defined, exiting.");
@@ -2817,35 +2868,22 @@ sub main (;$) {
}
do_log "Smokeping version $VERSION successfully launched.";
+RESTART:
my $myprobe;
+ my $multiprocessmode;
my $forkprobes = $cfg->{General}{concurrentprobes} || 'yes';
if ($forkprobes eq "yes" and keys %$probes > 1 and not $opt{debug}) {
+ $multiprocessmode = 1;
my %probepids;
my $pid;
do_log("Entering multiprocess mode.");
- for my $p (keys %$probes) {
- if ($probes->{$p}->target_count == 0) {
- do_log("No targets defined for probe $p, skipping.");
- next;
- }
- my $sleep_count = 0;
- do {
- $pid = fork;
- unless (defined $pid) {
- do_log("Fatal: cannot fork: $!");
- die "bailing out"
- if $sleep_count++ > 6;
- sleep 10;
- }
- } until defined $pid;
- $myprobe = $p;
- goto KID unless $pid; # child skips rest of loop
- do_log("Child process $pid started for probe $myprobe.");
- $probepids{$pid} = $myprobe;
- }
+ $pid = start_probes(\%probepids);
+ $myprobe = $probepids{$pid};
+ goto KID unless $pid; # child skips rest of loop
# parent
do_log("All probe processes started successfully.");
my $exiting = 0;
+ my $reloading = 0;
for my $sig (qw(INT TERM)) {
$SIG{$sig} = sub {
do_log("Got $sig signal, terminating child processes.");
@@ -2854,7 +2892,7 @@ sub main (;$) {
my $now = time;
while(keys %probepids) { # SIGCHLD handler below removes the keys
if (time - $now > 2) {
- do_log("Can't terminate all child processes, giving up.");
+ do_log("Fatal: can't terminate all child processes, giving up.");
exit 1;
}
sleep 1;
@@ -2868,14 +2906,64 @@ sub main (;$) {
my $p = $probepids{$dead};
$p = 'unknown' unless defined $p;
do_log("Child process $dead (probe $p) exited unexpectedly with status $?.")
- unless $exiting;
+ unless $exiting or $reloading;
delete $probepids{$dead};
}
};
- sleep while 1; # just wait for the signals
+ my $gothup = 0;
+ $SIG{HUP} = sub {
+ do_debuglog("Got HUP signal.");
+ $gothup = 1;
+ };
+ while (1) { # just wait for the signals
+ sleep;
+ next unless $gothup;
+ $reloading = 1;
+ $gothup = 0;
+ my $oldprobes = $probes;
+ $reloading = 0, next unless reload_cfg($cfgfile);
+ do_debuglog("Restarting probe processes " . join(",", keys %probepids) . ".");
+ kill SIGHUP, $_ for (keys %probepids);
+ my $i=0;
+ while (keys %probepids) {
+ sleep 1;
+ if ($i % 10 == 0) {
+ do_log("Waiting for child processes to terminate.");
+ }
+ $i++;
+ my %termsent;
+ for (keys %probepids) {
+ my $step = $oldprobes->{$probepids{$_}}->step;
+ if ($i > $step) {
+ do_log("Child process $_ took over its step value to terminate, killing it with SIGTERM");
+ if (kill SIGTERM, $_ == 0 and exists $probepids{$_}) {
+ do_log("Fatal: Child process $_ has disappeared? This shouldn't happen. Giving up.");
+ exit 1;
+ } else {
+ $termsent{$_} = time;
+ }
+ }
+ for (keys %termsent) {
+ if (exists $probepids{$_}) {
+ if (time() - $termsent{$_} > 2) {
+ do_log("Fatal: Child process $_ took over 2 seconds to exit on TERM signal. Giving up.");
+ exit 1;
+ }
+ } else {
+ delete $termsent{$_};
+ }
+ }
+ }
+ }
+ $reloading = 0;
+ do_log("Child processes terminated, restarting with new configuration.");
+ $SIG{CHLD} = 'DEFAULT'; # restore
+ goto RESTART;
+ }
do_log("Exiting abnormally - this should not happen.");
exit 1; # not reached
} else {
+ $multiprocessmode = 0;
if ($forkprobes ne "yes") {
do_log("Not entering multiprocess mode because the 'concurrentprobes' variable is not set.");
for my $p (keys %$probes) {
@@ -2890,21 +2978,26 @@ sub main (;$) {
do_log("Not entering multiprocess mode for just a single probe.");
$myprobe = (keys %$probes)[0]; # this way we won't ignore a probe-specific step parameter
}
- for my $sig (qw(INT TERM)) {
- $SIG{$sig} = sub {
- do_log("Got $sig signal, terminating.");
- exit 1;
- }
- }
}
KID:
my $offset;
my $step;
+ my $gothup = 0;
+ my $changeprocessnames = $cfg->{General}{changeprocessnames} ne "no";
+ $SIG{HUP} = sub {
+ do_log("Got HUP signal, " . ($multiprocessmode ? "exiting" : "restarting") . " gracefully.");
+ $gothup = 1;
+ };
+ for my $sig (qw(INT TERM)) {
+ $SIG{$sig} = sub {
+ do_log("got $sig signal, terminating.");
+ exit 1;
+ }
+ }
if (defined $myprobe) {
$offset = $probes->{$myprobe}->offset || 'random';
$step = $probes->{$myprobe}->step;
- $0 .= " [$myprobe]" unless defined $cfg->{General}{changeprocessnames}
- and $cfg->{General}{changeprocessnames} eq "no";
+ $0 .= " [$myprobe]" if $changeprocessnames;
} else {
$offset = $cfg->{General}{offset} || 'random';
$step = $cfg->{Database}{step};
@@ -2935,6 +3028,7 @@ KID:
do_debuglog("Sleeping $sleeptime seconds.");
}
sleep $sleeptime;
+ last if checkhup($multiprocessmode, $gothup) && reload_cfg($cfgfile);
}
my $now = time;
run_probes $probes, $myprobe; # $myprobe is undef if running without 'concurrentprobes'
@@ -2951,9 +3045,41 @@ KID:
do_log($warn);
}
}
+ last if checkhup($multiprocessmode, $gothup) && reload_cfg($cfgfile);
}
+ $0 =~ s/ \[$myprobe\]$// if $changeprocessnames;
+ goto RESTART;
+}
+
+sub checkhup ($$) {
+ my $multiprocessmode = shift;
+ my $gothup = shift;
+ if ($gothup) {
+ if ($multiprocessmode) {
+ do_log("Exiting due to HUP signal.");
+ exit 0;
+ } else {
+ do_log("Restarting due to HUP signal.");
+ return 1;
+ }
+ }
+ return 0;
}
+sub reload_cfg ($) {
+ my $cfgfile = shift;
+ my ($oldcfg, $oldprobes) = ($cfg, $probes);
+ do_log("Reloading configuration.");
+ eval { load_cfg($cfgfile) };
+ if ($@) {
+ do_log("Reloading configuration from $cfgfile failed: $@");
+ ($cfg, $probes) = ($oldcfg, $oldprobes);
+ return 0;
+ }
+ return 1;
+}
+
+
sub gen_imgs ($){
my $cfg = shift;