#!/bin/bash # # Log file pattern detector plugin for Nagios # Written by Ethan Galstad (nagios@nagios.org) # Last Modified: 07-31-1999 # Heavily modified by Thomas Sluyter (nagios@kilala.nl) # Last Modified: 19-06-2006 # # Thanks to Ali Khan and Kyle Tucker for troubleshooting, debugging # and snippets of code. # # Usage: ./check_log3 -F -O -C -W # # Description: # # This plugin will scan a log file (specified by the option) # for specific patterns (specified by the options). Successive # calls to the plugin script will only report *new* pattern matches in the # log file, since an copy of the log file from the previous run is saved # to . # # Output: # # On the first run of the plugin, it will return an OK state with a message # of "Log check data initialized". On successive runs, it will return an OK # state if *no* pattern matches have been found in the *difference* between the # log file and the older copy of the log file. If the plugin detects any # pattern matches in the log diff, it will return a CRITICAL state and print # out a message is the following format: "(x) last_match", where "x" is the # total number of pattern matches found in the file and "last_match" is the # last entry in the log file which matches the pattern. # # Notes: # # If you use this plugin make sure to keep the following in mind: # # 1. The "max_attempts" value for the service should be 1, as this # will prevent Nagios from retrying the service check (the # next time the check is run it will not produce the same results). # # 2. The "notify_recovery" value for the service should be 0, so that # Nagios does not notify you of "recoveries" for the check. Since # pattern matches in the log file will only be reported once and not # the next time, there will always be "recoveries" for the service, even # though recoveries really don't apply to this type of check. # # 3. You *must* supply a different for each service that # you define to use this plugin script - even if the different services # check the same for pattern matches. This is necessary # because of the way the script operates. # # 4. Changes to the script were made by Thomas Sluyter (cailin@kilala.nl). # * The first set of changes will allow the script to run properly on Solaris, which # it did not do by default. The second set of changes will allow the following: # * State retention. In the original script, if a NOK was put into the log file # at point A in time and it is not repeated at A+1, then an OK is sent to Nagios. # Not something that you would like to happen. # I've added the $oldlog.STATE trigger file which retains the last exitstatus. Should # there be no new lines added to the log, check_log will simply repeat the last state # instead of give an OK. # In order for this state retention to work properly your client system MUST # HAVE THE DIRECTORY /USR/LOCAL/NAGIOS/VAR. # * Two queries. In the original script you could only enter one query which, when # found, would result in a Critical message being sent to Nagios. I've added the # possibility to add another query, which will result in a Warning message. # * Bugfix: changed all instances of "crit-count" and "warn-count" to "critcount" and # "warncount" after a tip from Kyle Tucker who ran into problems running this script # with bash on Solaris. # # Paths to commands used in this script. These # may have to be modified to match your system setup. PATH="/usr/bin:/usr/sbin:/bin:/sbin" PROGNAME=`basename $0` PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'` #. $PROGPATH/utils.sh . /usr/local/nagios/libexec/utils.sh print_usage() { echo "Usage: $PROGNAME -F logfile -O oldlog -C CRITquery -W WARNquery" echo "Usage: $PROGNAME --help" echo "Usage: $PROGNAME --version" } print_help() { echo "" print_usage echo "" echo "Log file pattern detector plugin for Nagios" echo "" support } # Make sure the correct number of command line # arguments have been supplied if [ $# -lt 8 ]; then print_usage exit $STATE_UNKNOWN fi # Grab the command line arguments exitstatus=$STATE_WARNING #default while test -n "$1"; do case "$1" in --help) print_help exit $STATE_OK ;; -h) print_help exit $STATE_OK ;; -F) logfile=$2 shift ;; -O) oldlog=$2 shift ;; -C) CRITquery=$2 shift ;; -W) WARNquery=$2 shift ;; *) echo "Unknown argument: $1" print_usage exit $STATE_UNKNOWN ;; esac shift done # If the source log file doesn't exist, exit if [ ! -e $logfile ]; then echo "Log check error: Log file $logfile does not exist!" exit $STATE_UNKNOWN echo $STATE_UNKNOWN > $oldlog.STATE fi # If the dump/temp log file doesn't exist, this must be the first time # we're running this test, so copy the original log file over to # the old diff file and exit if [ ! -e $oldlog ]; then cat $logfile > $oldlog TEMPcount=0 let TEMPcount=$TEMPcount+$(tail -1 $logfile | grep -i $WARNquery | wc -l | awk '{print $1}') let TEMPcount=$TEMPcount+$(tail -1 $logfile | grep -i $CRITquery | wc -l | awk '{print $1}') if [ $TEMPcount -gt 0 ] then echo "Log check data initialized... Last line contained error message." echo $STATE_WARNING > $oldlog.STATE exit $STATE_WARNING else echo "Log check data initialized..." echo $STATE_OK > $oldlog.STATE exit $STATE_OK fi fi # A bug which was caught very late: # If newlog is shorter than oldlog, the diff used below will return # false positives for the query because the will be in $oldlog. Why? # Because $oldlog is not rolled over / rotated, like $newlog. I need # to fix this in a kludgy way. if [ `wc -l $logfile|awk '{print $1}'` -lt `wc -l $oldlog|awk '{print $1}'` ] then rm $oldlog cat $logfile > $oldlog TEMPcount=0 let TEMPcount=$TEMPcount+$(tail -1 $logfile | grep -i $WARNquery | wc -l | awk '{print $1}') let TEMPcount=$TEMPcount+$(tail -1 $logfile | grep -i $CRITquery | wc -l | awk '{print $1}') if [ $TEMPcount -gt 0 ] then echo "Log check data initialized... Last line contained error message." echo $STATE_WARNING > $oldlog.STATE exit $STATE_WARNING else echo "Log check data initialized..." echo $STATE_OK > $oldlog.STATE exit $STATE_OK fi fi # The oldlog file exists, so compare it to the original log now # The temporary file that the script should use while # processing the log file. if [ -x mktemp ]; then tempdiff=`mktemp /tmp/check_log.XXXXXXXXXX` else tempdate=`/bin/date '+%H%M%S'` tempdiff="/tmp/check_log.${tempdate}" touch $tempdiff fi diff $logfile $oldlog > $tempdiff if [ `wc -l $tempdiff | awk '{print $1}'` -eq 0 ] then rm $tempdiff touch $oldlog.STATE exitstatus=`cat $oldlog.STATE` echo "LOG FILE - No status change detected. Status = $exitstatus" exit $exitstatus fi # Count the number of matching log entries we have CRITcount=`grep -c "$CRITquery" $tempdiff` WARNcount=`grep -c "$WARNquery" $tempdiff` # Get the last matching entry in the diff file CRITlastentry=`grep "$CRITquery" $tempdiff | tail -1` WARNlastentry=`grep "$WARNquery" $tempdiff | tail -1` rm $tempdiff cat $logfile > $oldlog if [ "$CRITcount" -gt 0 ]; then echo "($CRITcount) $CRITlastentry" echo $STATE_CRITICAL > $oldlog.STATE exit $STATE_CRITICAL fi if [ "$WARNcount" -gt 0 ]; then echo "($WARNcount) $WARNlastentry" echo $STATE_WARNING > $oldlog.STATE exit $STATE_WARNING fi echo "Log check ok - 0 pattern matches found" echo $STATE_OK > $oldlog.STATE exit $STATE_OK