From 6c15f78cac4a0b94907352c4184d6d7d5ca87164 Mon Sep 17 00:00:00 2001 From: Tobi Oetiker Date: Wed, 5 Mar 2008 23:13:24 +0000 Subject: longer timeout for slaves and separate cache files per slave for improved scaling -- Phillip Moore --- CHANGES | 3 +++ lib/Smokeping.pm | 15 ++++----------- lib/Smokeping/Master.pm | 26 +++++++++++++++----------- lib/Smokeping/Slave.pm | 2 +- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/CHANGES b/CHANGES index 4d2c27b..d0ed715 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +* use separate slave cache and longer slave timeout. this + helps with slave congestion -- Phillip Moore pdm yahoo-inc.com + * add an optional --pid-dir option to specify the pid directory when running as a slave. --niko * slaves were not polling targets where nomasterpoll was set --tobi diff --git a/lib/Smokeping.pm b/lib/Smokeping.pm index c7726ca..b3e37b9 100644 --- a/lib/Smokeping.pm +++ b/lib/Smokeping.pm @@ -1849,22 +1849,15 @@ sub update_rrds($$$$$$) { my $probeobj = $probes->{$probe}; my $pings = $probeobj->_pings($tree); if ($prop eq 'host' and check_filter($cfg,$name) and $tree->{$prop} !~ m|^/|) { # skip multihost - my %slave_test; - my $slaveupdates; my @updates; if (not $tree->{nomasterpoll} or $tree->{nomasterpoll} eq 'no'){ @updates = ([ "", time, $probeobj->rrdupdate_string($tree) ]); } if ($tree->{slaves}){ - %slave_test = ( map { $_,1 } split(/\s+/, $tree->{slaves})); - $slaveupdates = Smokeping::Master::get_slaveupdates($name); - for my $slave (@{$slaveupdates}){ - if (not $slave_test{$slave->[0]}){ - warn "WARNING: skipping update for $slave->[0] since it is not configured for $name\n"; - next; - } - push @updates, $slave; - } + my @slaves = split(/\s+/, $tree->{slaves}); + foreach my $slave (@slaves) { + push @updates, Smokeping::Master::get_slaveupdates($name, $slave); + } #foreach my $checkslave } for my $update (sort {$a->[1] <=> $b->[1]} @updates){ # make sure we put the updates in chronological order in my $s = $update->[0] ? "~".$update->[0] : ""; diff --git a/lib/Smokeping/Master.pm b/lib/Smokeping/Master.pm index 8560de3..d31c95b 100644 --- a/lib/Smokeping/Master.pm +++ b/lib/Smokeping/Master.pm @@ -97,16 +97,18 @@ sub save_updates { my $updates = shift; # name\ttime\tupdatestring # name\ttime\tupdatestring + for my $update (split /\n/, $updates){ my ($name, $time, $updatestring) = split /\t/, $update; - my $file = $cfg->{General}{datadir}."/${name}.slave_cache"; + my $file = $cfg->{General}{datadir}."/${name}.${slave}.slave_cache"; if ( ${name} =~ m{(^|/)\.\.($|/)} ){ - warn "Skipping update for ${name}.slave_cache since ". + warn "Skipping update for ${name}.${slave}.slave_cache since ". "you seem to try todo some directory magic here. Don't!"; } elsif ( open (my $lock, '>>' , "$file.lock") ) { - for (my $i = 10; $i > 0; $i--){ - if ( flock $lock, LOCK_EX ){ + + for (my $i = 3; $i > 0; $i--){ + if ( flock($lock, LOCK_EX) ){ my $existing = []; if ( -r $file ){ my $in = eval { retrieve $file }; @@ -116,12 +118,12 @@ sub save_updates { $existing = $in; }; }; - push @{$existing}, [ $slave, $time, $updatestring]; + push @{$existing}, [ $slave, $time, $updatestring ]; nstore($existing, $file); last; } else { - warn "Could not lock $file. Trying again for $i rounds.\n"; - sleep rand(3); + warn "Could not lock $file ($!). Trying again for $i rounds.\n"; + sleep rand(2); } } close $lock; @@ -133,13 +135,15 @@ sub save_updates { =head3 get_slaveupdates -Read in all updates provided by slaves and return an array reference. +Read in all updates provided by the selected slave and return an array reference. =cut sub get_slaveupdates { my $name = shift; - my $file = $name.".slave_cache"; + my $slave = shift; + my $file = $name . "." . $slave. ".slave_cache"; + my @empty = (); my $data; if ( -r $file and open (my $lock, '>>', "$file.lock") ) { if ( flock $lock, LOCK_EX ){ @@ -147,7 +151,7 @@ sub get_slaveupdates { unlink $file; if ($@) { #error warn "Loading $file: $@"; - return undef; + return @empty; } } else { warn "Could not lock $file. Will skip and try again in the next round. No harm done!\n"; @@ -155,7 +159,7 @@ sub get_slaveupdates { close $lock; return $data; } - return; + return @empty; } diff --git a/lib/Smokeping/Slave.pm b/lib/Smokeping/Slave.pm index 6214817..0747860 100644 --- a/lib/Smokeping/Slave.pm +++ b/lib/Smokeping/Slave.pm @@ -72,7 +72,7 @@ sub submit_results { my $data_dump = join("\n",@{$restore}) || ""; my $ua = LWP::UserAgent->new( agent => 'smokeping-slave/1.0', - timeout => 10, + timeout => 60, env_proxy => 1 ); my $response = $ua->post( -- cgit v1.2.3-24-g4f1b