#!/bin/bash # Nagios plugin to do a Novell eDirectory partition continuity check # Written by Jesse Pretorius, jesse.pretorius@gmail.com # Version 1.2, 20 July 2011 # Project location: www.monitoringexchange.org # # Changelog: # v1.2 : Added error handling for situation where all partitions have errors thanks to input from Magnus Felix # v1.1 : Added specific critical and unknown errors thanks to input from Magnus Felix errorlvl=0 tmpfile=`mktemp` warning=2 critical=6 ### Beginning of functions ### # If inappropriate parameters are provided, this usage statement is output usage() { echo -e "\nNagios plugin to do a Novell eDirectory partition continuity check" echo -e "Written by Jesse Pretorius, jesse.pretorius@gmail.com" echo -e "Version 1.2, 20 July 2011" echo -e "Project location: www.monitoringexchange.org" echo -e "\nUsage:" echo -e "\t$0 " echo -e "\nOptions:" echo -e "\t[-w warning]\tThe number of hours old the oldest partition sync age must be to produce a warning state. Default: 2" echo -e "\t[-c critical]\tThe number of hours old the oldest partition sync age must be to produce a critical state. Default: 6" echo -e "\nSample:" echo -e "\t$0 -w 5 -c 10\n" exit 3 } ### End of functions ### while getopts w:c: OPTIONS do case "$OPTIONS" in w ) warning=$OPTARG ;; c ) critical=$OPTARG ;; * ) usage ;; esac done if [ "$critical" -le "$warning" ]; then echo -e "\nThe critical level must be higher than the warning level!" usage exit 3 fi # Run the ndsrepair command and save the output to a temporary file if [ -e /opt/novell/eDirectory/bin/ndsrepair ]; then NDSREPAIR="/opt/novell/eDirectory/bin/ndsrepair" elif [ -e /usr/bin/ndsrepair ]; then NDSREPAIR="/usr/bin/ndsrepair" else NDSREPAIR=`which ndsrepair` fi $NDSREPAIR -E > $tmpfile 2>&1 ndsrepairerrorlvl="$?" # If the command produces an error, return the error if [ "$ndsrepairerrorlvl" -eq "5" ]; then output="CRITICAL: Unable to connect to eDirectory!" errorlvl=2 elif [ "$ndsrepairerrorlvl" -eq "8" ]; then output="UNKNOWN: The ndsrepair module is already loaded." errorlvl=3 elif [ "$ndsrepairerrorlvl" -eq "9" ]; then output="CRITICAL: eDirectory is in a DEFUNCT state!" errorlvl=2 elif [ "$ndsrepairerrorlvl" -eq "127" ]; then output="UNKNOWN: The ndsrepair binary could not be found." errorlvl=3 elif [ "$ndsrepairerrorlvl" -gt "0" ]; then output="UNKNOWN: Failed to run ndsrepair successfully! `cat $tmpfile`" errorlvl=3 else # Extract the details from the output in the temporary file oldest_part_date_time=`grep "All servers" $tmpfile | awk '{print $7"-"$8}' | sed 's/:/-/g' | awk -F"-" '{print $3 $1 $2" "$4" "$5" "$6}' | sort -r -u | tail -n 1` # If there are errors in all partitions then there will be no value in $oldest_part_date_time if [ -z "$oldest_part_date_time" ]; then output="UNKNOWN: No partitions are able to completely synchronise!" errorlvl=3 else oldest_part_date="$(echo $oldest_part_date_time | awk '{print $1}')" oldest_part_hours="$(echo $oldest_part_date_time | awk '{print $2}' | sed 's/0*//')" if [ -z "$oldest_part_hours" ]; then oldest_part_hours="0"; fi oldest_part_minutes="$(echo $oldest_part_date_time | awk '{print $3}' | sed 's/0*//')" if [ -z "$oldest_part_minutes" ]; then oldest_part_minutes="0"; fi oldest_part_seconds="$(echo $oldest_part_date_time | awk '{print $4}' | sed 's/0*//')" if [ -z "$oldest_part_seconds" ]; then oldest_part_seconds="0"; fi # Calculate how old the oldest date is current_date_unix=$(date +%s) oldest_part_date_unix=$(($(date --date "$oldest_part_date" +%s)+$(($oldest_part_hours*60*60))+$(($oldest_part_minutes*60))+$oldest_part_seconds)) diff_unix=$((current_date_unix-oldest_part_date_unix)) if ((diff_unix < 0)); then abs=-1; else abs=1; fi oldest_part_age_hours=$((diff_unix/3600*abs)) oldest_part_age_seconds=$((diff_unix % 3600)) oldest_part_age_minutes=$((oldest_part_age_seconds/60*abs)) oldest_part_age_seconds=$((diff_unix % 60)) # Work out whether the oldest_part_age is above any thresholds and set the output accordingly if [ "$oldest_part_age_hours" -ge "$warning" ]; then if [ "$oldest_part_age_hours" -ge "$critical" ]; then output="CRITICAL: The oldest partition continuity age is "$oldest_part_age_hours"h"$oldest_part_age_minutes"m"$oldest_part_age_seconds"s old!" errorlvl=2 else output="WARNING: The oldest partition continuity age is "$oldest_part_age_hours"h"$oldest_part_age_minutes"m"$oldest_part_age_seconds"s old!" errorlvl=1 fi else output="OK: The oldest partition continuity age is "$oldest_part_age_hours"h"$oldest_part_age_minutes"m"$oldest_part_age_seconds"s old." errorlvl=0 fi # Add the performance data to the output output="$output | 'oldest_partition_age'=$diff_unix;$(($warning*3600));$(($critical*3600));" fi fi # Clean up the temp file, output the result and the error level rm -f $tmpfile echo $output exit $errorlvl