diff options
author | Florian Pritz <f-p@gmx.at> | 2009-02-24 20:17:15 +0100 |
---|---|---|
committer | Florian Pritz <f-p@gmx.at> | 2009-02-24 20:17:15 +0100 |
commit | d5ee1a6157065363843e6367e932f7b11b17706c (patch) | |
tree | f9030eefc5fa5e0dfec29eacd4a6095d9759c5ce | |
parent | f129bf7a7bad968939838d8efbd7cd878cf58257 (diff) | |
download | bin-d5ee1a6157065363843e6367e932f7b11b17706c.tar.gz bin-d5ee1a6157065363843e6367e932f7b11b17706c.tar.xz |
added watchdog script
-rw-r--r-- | .gitignore | 1 | ||||
-rwxr-xr-x | watchdog.sh | 186 |
2 files changed, 187 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..35fcdf9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +watchdog_ips diff --git a/watchdog.sh b/watchdog.sh new file mode 100755 index 0000000..a0fb1ec --- /dev/null +++ b/watchdog.sh @@ -0,0 +1,186 @@ +#!/bin/bash +#---------------------------------------------------- +# File: watchdog.sh +# Version: 0.1.7.0 +# Author: Florian "Bluewind" Pritz <f-p@gmx.at> +# +# Copyright (C) 2008-2009 Florian Pritz +# +# Licensed under GNU General Public License v3 +# (see COPYING for full license text) +# +#---------------------------------------------------- +# Script to check if services are working and take +# actions if not +#---------------------------------------------------- +# NOTE: +# Format for service file +# <IP or hostname> <service name> + +#-------------------------- CONFIGURATION -----------------------------# +# Time to wait between 2 checks; Shouldn't be lower than 10 as that +# could cause overlap with the timeouts +WAIT=60 + +# Take action after x faild checks +MAX_FAIL=2 + +# set to /dev/null if you don't want logs +LOGFILE="$HOME/watchdog.log" + +# Paths (Shouldn't need to be changed) +CURL="/usr/bin/curl" +WGET="/usr/bin/wget" +PING="/bin/ping" +#----------------------------------------------------------------------# + +SCRIPTNAME=$(basename $0) + +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_ERROR=2 +EXIT_BUG=10 + +# Colors for output +red='\e[0;31m' +RED='\e[1;31m' +green='\e[0;32m' +GREEN='\e[1;32m' +blue='\e[0;34m' +BLUE='\e[1;34m' +cyan='\e[0;36m' +CYAN='\e[1;36m' +NC='\e[0m' + +function usage { + echo -e "${blue}Usage:${NC} ${SCRIPTNAME} [OPTIONS] [<hostname or IP> <service type>]" >&2 + echo -e "Possible service types: http, ping (default)" + echo -e "Options:" >&2 + echo -e "-h this help" >&2 + echo -e "-f <file> loads IP list from a file" >&2 + echo -e "-t <time> Time between 2 checks" + echo -e "-m <tries> Max. fails before taking actions" + [[ $# -eq 1 ]] && exit $1 || exit $EXIT_FAILURE +} + +if [ ! -f "$CURL" ]; then + if [ ! -f "$WGET" ]; then + echo -e "${red}Wget doesn't exist!$NC" + exit $EXIT_ERROR + fi + HTTP="$WGET -t 2 -T 1 -O /dev/null" +else + HTTP="$CURL --connect-timeout 1 --retry 2 -O /dev/null" +fi + +if [ ! -f "$PING" ]; then + echo -e "${red}Ping command doesn't exist! Please fix the path.$NC" + exit $EXIT_ERROR +else + PING="$PING -c 1 -W 1" +fi + +if [ ! "$1" ] && [ ! "$2" ]; then + usage $EXIT_SUCCESS +fi + +while getopts ':f:ht:m:' OPTION ; do + case $OPTION in + f) + IPFILE="$OPTARG" + ;; + t) + WAIT="$OPTARG" + ;; + m) + MAX_FAIL="$OPTARG" + ;; + h) + usage $EXIT_SUCCESS + ;; + \?) + echo "Unknown option \"-$OPTARG\"." >&2 + usage $EXIT_ERROR + ;; + :) + echo "Option \"-$OPTARG\" needs an argument" >&2 + usage $EXIT_ERROR + ;; + *) + echo "This shouldn't happen, please file a bugreport.">&2 + usage $EXIT_BUG + ;; + esac +done + + +shift $(( OPTIND - 1 )) + +if (( $# > 1 )) ; then + INPUT="$1 $2" +else + INPUT=$(cat $IPFILE) +fi + +watcher () { + IP="$1" + TYPE="$2" + + case "$TYPE" in + http) COMMAND="$HTTP";; + *) COMMAND="$PING";; + esac + + counter=0 + + while :; + do + precmd_time=$(date +%s) + $COMMAND $IP &> /dev/null + exitcode=$? + if [ "$exitcode" -ne "0" ]; then + if [ "$counter" -lt "1" ]; then + downtime=`date` + fi + let counter=$counter+1 + if [ "$counter" -eq $MAX_FAIL ]; then + mail root -s "$IP - $TYPE DOWN! " <<< "$IP - $TYPE is down since $downtime" + fi + else + if [ "$counter" -gt $MAX_FAIL ] || [ "$counter" -eq $MAX_FAIL ]; then + mail root -s "$IP - $TYPE UP! " <<< "$IP - $TYPE is OK again. + Downtime: ${downtime} - $(date)" + echo "Downtime: ${downtime} - $(date)" > $LOGFILE + fi + counter=0 + fi + current_time=$(date +%s) + processing_time=`expr ${current_time} - ${precmd_time}` + sleeptime=`expr ${WAIT} - ${processing_time}` + if [ "${sleeptime}" -lt "1" ]; then + sleeptime=0 + fi + sleep ${sleeptime} + done +} + +# stops watchers and exits +cleanup() { + kill $jobs &> /dev/null + exit +} +trap cleanup 0 2 15 + +# start watchers +while read line +do + watcher $line & +done <<< $INPUT + +# Prepare killing all watchers +jobs=$(jobs -p) +disown -ar + +echo "Press any key to kill" +read -n 1 + |