summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorian Pritz <f-p@gmx.at>2009-02-24 20:17:15 +0100
committerFlorian Pritz <f-p@gmx.at>2009-02-24 20:17:15 +0100
commitd5ee1a6157065363843e6367e932f7b11b17706c (patch)
treef9030eefc5fa5e0dfec29eacd4a6095d9759c5ce
parentf129bf7a7bad968939838d8efbd7cd878cf58257 (diff)
downloadbin-d5ee1a6157065363843e6367e932f7b11b17706c.tar.gz
bin-d5ee1a6157065363843e6367e932f7b11b17706c.tar.xz
added watchdog script
-rw-r--r--.gitignore1
-rwxr-xr-xwatchdog.sh186
2 files changed, 187 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..35fcdf9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+watchdog_ips
diff --git a/watchdog.sh b/watchdog.sh
new file mode 100755
index 0000000..a0fb1ec
--- /dev/null
+++ b/watchdog.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+#----------------------------------------------------
+# File: watchdog.sh
+# Version: 0.1.7.0
+# Author: Florian "Bluewind" Pritz <f-p@gmx.at>
+#
+# Copyright (C) 2008-2009 Florian Pritz
+#
+# Licensed under GNU General Public License v3
+# (see COPYING for full license text)
+#
+#----------------------------------------------------
+# Script to check if services are working and take
+# actions if not
+#----------------------------------------------------
+# NOTE:
+# Format for service file
+# <IP or hostname> <service name>
+
+#-------------------------- CONFIGURATION -----------------------------#
+# Time to wait between 2 checks; Shouldn't be lower than 10 as that
+# could cause overlap with the timeouts
+WAIT=60
+
+# Take action after x faild checks
+MAX_FAIL=2
+
+# set to /dev/null if you don't want logs
+LOGFILE="$HOME/watchdog.log"
+
+# Paths (Shouldn't need to be changed)
+CURL="/usr/bin/curl"
+WGET="/usr/bin/wget"
+PING="/bin/ping"
+#----------------------------------------------------------------------#
+
+SCRIPTNAME=$(basename $0)
+
+EXIT_SUCCESS=0
+EXIT_FAILURE=1
+EXIT_ERROR=2
+EXIT_BUG=10
+
+# Colors for output
+red='\e[0;31m'
+RED='\e[1;31m'
+green='\e[0;32m'
+GREEN='\e[1;32m'
+blue='\e[0;34m'
+BLUE='\e[1;34m'
+cyan='\e[0;36m'
+CYAN='\e[1;36m'
+NC='\e[0m'
+
+function usage {
+ echo -e "${blue}Usage:${NC} ${SCRIPTNAME} [OPTIONS] [<hostname or IP> <service type>]" >&2
+ echo -e "Possible service types: http, ping (default)"
+ echo -e "Options:" >&2
+ echo -e "-h this help" >&2
+ echo -e "-f <file> loads IP list from a file" >&2
+ echo -e "-t <time> Time between 2 checks"
+ echo -e "-m <tries> Max. fails before taking actions"
+ [[ $# -eq 1 ]] && exit $1 || exit $EXIT_FAILURE
+}
+
+if [ ! -f "$CURL" ]; then
+ if [ ! -f "$WGET" ]; then
+ echo -e "${red}Wget doesn't exist!$NC"
+ exit $EXIT_ERROR
+ fi
+ HTTP="$WGET -t 2 -T 1 -O /dev/null"
+else
+ HTTP="$CURL --connect-timeout 1 --retry 2 -O /dev/null"
+fi
+
+if [ ! -f "$PING" ]; then
+ echo -e "${red}Ping command doesn't exist! Please fix the path.$NC"
+ exit $EXIT_ERROR
+else
+ PING="$PING -c 1 -W 1"
+fi
+
+if [ ! "$1" ] && [ ! "$2" ]; then
+ usage $EXIT_SUCCESS
+fi
+
+while getopts ':f:ht:m:' OPTION ; do
+ case $OPTION in
+ f)
+ IPFILE="$OPTARG"
+ ;;
+ t)
+ WAIT="$OPTARG"
+ ;;
+ m)
+ MAX_FAIL="$OPTARG"
+ ;;
+ h)
+ usage $EXIT_SUCCESS
+ ;;
+ \?)
+ echo "Unknown option \"-$OPTARG\"." >&2
+ usage $EXIT_ERROR
+ ;;
+ :)
+ echo "Option \"-$OPTARG\" needs an argument" >&2
+ usage $EXIT_ERROR
+ ;;
+ *)
+ echo "This shouldn't happen, please file a bugreport.">&2
+ usage $EXIT_BUG
+ ;;
+ esac
+done
+
+
+shift $(( OPTIND - 1 ))
+
+if (( $# > 1 )) ; then
+ INPUT="$1 $2"
+else
+ INPUT=$(cat $IPFILE)
+fi
+
+watcher () {
+ IP="$1"
+ TYPE="$2"
+
+ case "$TYPE" in
+ http) COMMAND="$HTTP";;
+ *) COMMAND="$PING";;
+ esac
+
+ counter=0
+
+ while :;
+ do
+ precmd_time=$(date +%s)
+ $COMMAND $IP &> /dev/null
+ exitcode=$?
+ if [ "$exitcode" -ne "0" ]; then
+ if [ "$counter" -lt "1" ]; then
+ downtime=`date`
+ fi
+ let counter=$counter+1
+ if [ "$counter" -eq $MAX_FAIL ]; then
+ mail root -s "$IP - $TYPE DOWN! " <<< "$IP - $TYPE is down since $downtime"
+ fi
+ else
+ if [ "$counter" -gt $MAX_FAIL ] || [ "$counter" -eq $MAX_FAIL ]; then
+ mail root -s "$IP - $TYPE UP! " <<< "$IP - $TYPE is OK again.
+ Downtime: ${downtime} - $(date)"
+ echo "Downtime: ${downtime} - $(date)" > $LOGFILE
+ fi
+ counter=0
+ fi
+ current_time=$(date +%s)
+ processing_time=`expr ${current_time} - ${precmd_time}`
+ sleeptime=`expr ${WAIT} - ${processing_time}`
+ if [ "${sleeptime}" -lt "1" ]; then
+ sleeptime=0
+ fi
+ sleep ${sleeptime}
+ done
+}
+
+# stops watchers and exits
+cleanup() {
+ kill $jobs &> /dev/null
+ exit
+}
+trap cleanup 0 2 15
+
+# start watchers
+while read line
+do
+ watcher $line &
+done <<< $INPUT
+
+# Prepare killing all watchers
+jobs=$(jobs -p)
+disown -ar
+
+echo "Press any key to kill"
+read -n 1
+