#!/bin/ksh # Nagios Performance # # returns the average execution time and check latency for graphing # by G. Stangl 18 02 2007 # PROGNAME=`/bin/basename $0` PROGPATH=`echo $0 | /bin/sed -e 's,[\\/][^\\/][^\\/]*$,,'` REVISION="1.0" . $PROGPATH/utils.sh # ====== Define default thresholds ====== ExeW=10 ExeC=30 LatW=30 LatC=60 Debug=no # ======================================= print_usage() { echo "Usage: $PROGNAME -E war,crit -L warn,crit [-d]" echo " $PROGNAME -h" echo " $PROGNAME -V" echo "" echo " -d Debug Mode on" echo " -E warn,crit Service Check Execution Time Warning and Critical threshold [sec]" echo " -L warn,crit Service Check Latency Warning and Critical threshold [sec]" echo "" } print_help() { print_revision $PROGNAME $REVISION echo "" print_usage echo "" echo "Nagios Key Performance data plugin for Nagios" echo "" } while test -n "$1"; do if [ $Debug = "y" ]; then echo "DEBUG: processing Arg $1"; fi case "$1" in -h) print_help exit $STATE_OK ;; -V) print_revision $PROGNAME $VERSION exit $STATE_OK ;; -d) Debug=y echo "DEBUG mode swithed on" ;; -E) # split tuple into Warning and Critical threshol ExeW=`echo $2 | cut -d ',' -f1` ExeC=`echo $2 | cut -d ',' -f2` if [ $Debug = "y" ]; then echo "DEBUG: Thresholds found Execution Time warn,crit: $ExeW,$ExeC"; fi if [ $ExeW -ge $ExeC ]; then echo "Crit must be greater than Warn threshold"; print_usage; exit $STATE_UNKNOWN; fi shift ;; -L) # split tuple into Warning and Critical threshol LatW=`echo $2 | cut -d ',' -f1` LatC=`echo $2 | cut -d ',' -f2` if [ $Debug = "y" ]; then echo "DEBUG: Thresholds found Latency warn,crit: $LatW,$LatC"; fi if [ $LatW -ge $LatC ]; then echo "Crit must be greater than Warn threshold"; print_usage; exit $STATE_UNKNOWN; fi shift ;; *) echo "Unknown argument: $1" print_usage exit $STATE_UNKNOWN ;; esac shift done if [ $Debug = "y" ]; then echo "DEBUG: using thresholds for Exe: $ExeW,$ExeC and Lat: $LatW,$LatC"; fi NS=/usr/local/nagios/bin/nagiostats # raw input from nagiosstats output: # Active Service Latency: 0.001 / 2.189 / 0.360 % # Active Service Execution Time: 0.041 / 40.317 / 1.009 sec Lat=`$NS | grep 'Active Service Latency:' | awk '{print $8}'` Exe=`$NS | grep 'Active Service Execution Time:' | awk '{print $9}'` # check against thresholds # ideally latency and average execution time are <5sec ES=0 if [ $Lat -gt $LatW ]; then ES=1; fi if [ $Lat -gt $LatC ]; then ES=2; fi if [ $Exe -gt $ExeW ]; then ES=1; fi if [ $Exe -gt $ExeC ]; then ES=2; fi # extract from uptime if a "marker shall be set" i.e. setting exe=-1 # Program Running Time: 0d 7h 40m 35s marker=`$NS | grep 'Program Running Time:' | grep ' 0d 0h [0-5]m' | wc -l | awk '{print $1}'` if [ $marker -eq 1 ]; then Exe=-1; fi # compose output msg="Nagios average Service Execution time:$Exe sec, average latency:$Lat sec" perf="exec_time=$Exe;$ExeW;$ExeC;0;0 latency=$Lat;$LatW;$LatC;0;0" echo "$msg | $perf" exit $ES