#!/bin/bash ######################################################################## # check_backup v1.0 # # Nagios plugin by John E.P. Hynes, HyTronix [03/22/2015] john@hytronix.com # # Arguments can be in any order: d h m s # x f u and i. # # All except filename are optional. (Well, except that time defaulting # to "0" will always fail...) and if you specify u, you # must also specify i. # # Example: ./check_backup h3 m45 x2 f/tmp/timefile # # u, if provided, means to retrieve the contents of # from a remote host via ssh. It must be in the search path. # # This string should contain user@host, so the complete declaration would # be: # # ujohn@mybox (for user "john" at host "mybox") # # i tells ssh to log in using the ssh key provided. # It would also allow you to just use a local key # for which there was no local user. Make the remote user privilege-less # for security. (So long as they can read ...) # # I suppose ssh agent setup would work too; if you know about this you # don't need me to tell you how to do it. # # ssh example: # # ./check_backup h3 m45 x2 f/tmp/timefile ujohn@mybox i~/.ssh/id_rsa # # Returns (from above example): # OK if timestamp in /tmp/timefile is no older # than 3 hours and 45 minutes ago, and the exit code in # /tmp/timefile is 0. WARNING if time exceeded, FAIL if exceeded by # multiplier - in this case, FAIL at 7 hours 30 minutes overdue. # # Always FAIL if exit code is not 0. # # If x is omitted, goes right to FAIL on time exceeded. # # Your backup scripts must produce this file. As implied, it's format # is: # # ------8<------ CUT # # ------8<------ CUT # # is seconds since the epoch, # is the backup process exit code (make this 0 if successful). # # For example, your script could, when complete, do a: # # echo -n `date +%s | tr -d '\n'> /tmp/test`; \ # echo -n " " >> /tmp/test; echo "0" >> /tmp/test # # ...or somesuch. I'm sure you get the idea. # # Other than supporting getting timefile data via ssh, this script has # no dependencies other than bash itself. # # Questions, comments, and suggestions appreciated. # ######################################################################## # Nagios exit codes OK="0" WARNING="1" CRITICAL="2" UNKNOWN="3" ######################################################################## # Pre-set time variables to zero (except multiplier) DAYS="0" HOURS="0" MINUTES="0" SECONDS="0" MULTIPLIER="1" SSH_USER="" SSH_KEY="" ######################################################################## # Pre-set time and flie variables to false/unset DAYS_SET=false HOURS_SET=false MINUTES_SET=false SECONDS_SET=false FILENAME_SET=false MULTIPLIER_SET=false SSH_USER_SET=false SSH_KEY_SET=false ######################################################################## # File descriptor for timefile FD="3" ######################################################################## # Function arg_set - tests if a variable is set already, sets it if not. arg_set() { if ! eval \$$1_SET; then eval $1="${argument:1}" eval $1_SET=true else echo "$1 already set." exit $UNKNOWN fi } ######################################################################## # Function check_args - checks for time and file statements from caller check_args() { case $1 in "d") arg_set DAYS ;; "h") arg_set HOURS ;; "m") arg_set MINUTES ;; "s") arg_set SECONDS ;; "f") arg_set FILENAME ;; "x") arg_set MULTIPLIER ;; "u") arg_set SSH_USER ;; "i") arg_set SSH_KEY ;; *) echo "Invalid argument: $argument" exit $UNKNOWN ;; esac } ######################################################################## # Function validate_input - check that time declarations are numeric validate_input() { NUM_REGEX='^[0-9]+$' if ! [[ $1 =~ $NUM_REGEX ]]; then echo "Invalid numeric input $1." exit $UNKNOWN fi } ######################################################################## # Function convert_time_to_seconds - so time differential can be calc'ed convert_time_to_seconds() { HOURS=`expr $HOURS + $DAYS \* 24` MINUTES=`expr $MINUTES + $HOURS \* 60` SECONDS=`expr $SECONDS + $MINUTES \* 60` } ######################################################################## # Function read_bu_data - local or via ssh read_bu_data() { if [ -z "$SSH_USER" ]; then if [ ! -f "$FILENAME" ]; then echo "Bad filename: $FILENAME." exit $UNKNOWN; fi eval "exec $FD<$FILENAME" eval "read -u $FD STAMPTIME EXITCODE" eval "exec $FD<&-" else TIME_CODE=$(ssh $SSH_USER -i $SSH_KEY "cat $FILENAME") HOLDIFS=$IFS IFS=" " read -ra INFOLINE <<< "$TIME_CODE" STAMPTIME="${INFOLINE[0]}" EXITCODE="${INFOLINE[1]}" IFS=$HOLDIFS fi } ######################################################################## # Main script for argument in "$@" do check_args ${argument:0:1} done for CHECK_VAR_NUM in $DAYS $HOURS $MINUTES $SECONDS $MULTIPLIER do validate_input $CHECK_VAR_NUM done convert_time_to_seconds read_bu_data if [ -z "$STAMPTIME" ] || [ -z "$EXITCODE" ]; then echo " not in proper format." exit $UNKNOWN else validate_input $STAMPTIME validate_input $EXITCODE fi printf -v TIMENOW '%(%s)T' -1 DIFFERENCE=`expr $TIMENOW - $STAMPTIME` MARGIN=`expr $SECONDS - $DIFFERENCE` WARNZONE=`expr $SECONDS \* $MULTIPLIER` if [ "$DIFFERENCE" -lt "$SECONDS" ] && [ "$EXITCODE" -eq "0" ]; then echo "OK: Backup completed within time range. | Time differential (in seconds): $MARGIN" exit $OK else if [ "$DIFFERENCE" -gt "$SECONDS" ] && [ "$DIFFERENCE" -lt "$WARNZONE" ]; then if [ "$EXITCODE" -eq "0" ]; then echo "WARNING: Last backup too long ago - Investigate. | Time differential (in seconds): $MARGIN" exit $WARNING fi fi if [ "$DIFFERENCE" -gt "$SECONDS" ]; then echo "FAIL: Last backup too long ago! | Time differential (in seconds): $MARGIN" fi if [ "$EXITCODE" -ne "0" ]; then echo "FAIL: Backup exited with code $EXITCODE" fi exit $CRITICAL fi ########################################################################