#!/bin/bash
#
# Log file pattern detector plugin for Nagios
# Written by Ethan Galstad (nagios@nagios.org)
# Last Modified: 07-31-1999
# Heavily modified by Thomas Sluyter (nagios@kilala.nl)
# Last Modified: 19-06-2006
#
# Thanks to Ali Khan and Kyle Tucker for troubleshooting, debugging
# and snippets of code.
#
# Usage: ./check_log3 -F <log_file> -O <old_log_file> -C <crit-pattern> -W <warn-pattern>
#
# Description:
#
# This plugin will scan a log file (specified by the <log_file> option)
# for specific patterns (specified by the <XXX-pattern> options).  Successive
# calls to the plugin script will only report *new* pattern matches in the
# log file, since an copy of the log file from the previous run is saved
# to <old_log_file>.
#
# Output:
#
# On the first run of the plugin, it will return an OK state with a message
# of "Log check data initialized".  On successive runs, it will return an OK
# state if *no* pattern matches have been found in the *difference* between the
# log file and the older copy of the log file.  If the plugin detects any 
# pattern matches in the log diff, it will return a CRITICAL state and print
# out a message is the following format: "(x) last_match", where "x" is the
# total number of pattern matches found in the file and "last_match" is the
# last entry in the log file which matches the pattern.
#
# Notes:
#
# If you use this plugin make sure to keep the following in mind:
#
#    1.  The "max_attempts" value for the service should be 1, as this
#        will prevent Nagios from retrying the service check (the
#        next time the check is run it will not produce the same results).
#
#    2.  The "notify_recovery" value for the service should be 0, so that
#        Nagios does not notify you of "recoveries" for the check.  Since
#        pattern matches in the log file will only be reported once and not
#        the next time, there will always be "recoveries" for the service, even
#        though recoveries really don't apply to this type of check.
#
#    3.  You *must* supply a different <old_file_log> for each service that
#        you define to use this plugin script - even if the different services
#        check the same <log_file> for pattern matches.  This is necessary
#        because of the way the script operates.
#
#    4.  Changes to the script were made by Thomas Sluyter (cailin@kilala.nl).
#	 * The first set of changes will allow the script to run properly on Solaris, which
#	   it did not do by default. The second set of changes will allow the following:
#	 * State retention. In the original script, if a NOK was put into the log file
#	   at point A in time and it is not repeated at A+1, then an OK is sent to Nagios. 
# 	   Not something that you would like to happen.
#	      I've added the $oldlog.STATE trigger file which retains the last exitstatus. Should
# 	   there be no new lines added to the log, check_log will simply repeat the last state
#	   instead of give an OK.
#	      In order for this state retention to work properly your client system MUST
#	   HAVE THE DIRECTORY /USR/LOCAL/NAGIOS/VAR.
#        * Two queries. In the original script you could only enter one query which, when
#	   found, would result in  a Critical message being sent to Nagios. I've added the 
#	   possibility to add another query, which will result in a Warning message.
#	 * Bugfix: changed all instances of "crit-count" and "warn-count" to "critcount" and
#	   "warncount" after a tip from Kyle Tucker who ran into problems running this script
#	   with bash on Solaris.
#

# Paths to commands used in this script.  These
# may have to be modified to match your system setup.

PATH="/usr/bin:/usr/sbin:/bin:/sbin"

PROGNAME=`basename $0`
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`

#. $PROGPATH/utils.sh
. /usr/local/nagios/libexec/utils.sh

print_usage() {
    echo "Usage: $PROGNAME -F logfile -O oldlog -C CRITquery -W WARNquery"
    echo "Usage: $PROGNAME --help"
    echo "Usage: $PROGNAME --version"
}

print_help() {
    echo ""
    print_usage
    echo ""
    echo "Log file pattern detector plugin for Nagios"
    echo ""
    support
}

# Make sure the correct number of command line
# arguments have been supplied

if [ $# -lt 8 ]; then
    print_usage
    exit $STATE_UNKNOWN
fi

# Grab the command line arguments

exitstatus=$STATE_WARNING #default
while test -n "$1"; do
    case "$1" in
        --help)
            print_help
            exit $STATE_OK
            ;;
        -h)
            print_help
            exit $STATE_OK
            ;;
        -F)
            logfile=$2
            shift
            ;;
        -O)
            oldlog=$2
            shift
            ;;
        -C)
            CRITquery=$2
            shift
            ;;
        -W)
            WARNquery=$2
            shift
            ;;
        *)
            echo "Unknown argument: $1"
            print_usage
            exit $STATE_UNKNOWN
            ;;
    esac
    shift
done

# If the source log file doesn't exist, exit

if [ ! -e $logfile ]; then
    echo "Log check error: Log file $logfile does not exist!"
    exit $STATE_UNKNOWN
    echo $STATE_UNKNOWN > $oldlog.STATE
fi

# If the dump/temp log file doesn't exist, this must be the first time
# we're running this test, so copy the original log file over to
# the old diff file and exit

if [ ! -e $oldlog ]; then
    cat $logfile > $oldlog

    TEMPcount=0
    let TEMPcount=$TEMPcount+$(tail -1 $logfile | grep -i $WARNquery | wc -l | awk '{print $1}')
    let TEMPcount=$TEMPcount+$(tail -1 $logfile | grep -i $CRITquery | wc -l | awk '{print $1}')

    if [ $TEMPcount -gt 0 ]
    then
       echo "Log check data initialized... Last line contained error message."
       echo $STATE_WARNING > $oldlog.STATE
       exit $STATE_WARNING
    else
       echo "Log check data initialized..."
       echo $STATE_OK > $oldlog.STATE
       exit $STATE_OK
    fi
fi

# A bug which was caught very late:
# If newlog is shorter than oldlog, the diff used below will return
# false positives for the query because the will be in $oldlog. Why?
# Because $oldlog is not rolled over / rotated, like $newlog. I need
# to fix this in a kludgy way.

if [ `wc -l $logfile|awk '{print $1}'` -lt `wc -l $oldlog|awk '{print $1}'` ]
then
    rm $oldlog
    cat $logfile > $oldlog
    TEMPcount=0
    let TEMPcount=$TEMPcount+$(tail -1 $logfile | grep -i $WARNquery | wc -l | awk '{print $1}')
    let TEMPcount=$TEMPcount+$(tail -1 $logfile | grep -i $CRITquery | wc -l | awk '{print $1}')

    if [ $TEMPcount -gt 0 ]
    then
       echo "Log check data initialized... Last line contained error message."
       echo $STATE_WARNING > $oldlog.STATE
       exit $STATE_WARNING
    else
       echo "Log check data initialized..."
       echo $STATE_OK > $oldlog.STATE
       exit $STATE_OK
    fi
fi

# The oldlog file exists, so compare it to the original log now

# The temporary file that the script should use while
# processing the log file.
if [ -x mktemp ]; then
    tempdiff=`mktemp /tmp/check_log.XXXXXXXXXX`
else
    tempdate=`/bin/date '+%H%M%S'`
    tempdiff="/tmp/check_log.${tempdate}"
    touch $tempdiff
fi

diff $logfile $oldlog > $tempdiff

if [ `wc -l $tempdiff | awk '{print $1}'` -eq 0 ]
then
     rm $tempdiff
     touch $oldlog.STATE
     exitstatus=`cat $oldlog.STATE`
     echo "LOG FILE - No status change detected. Status = $exitstatus"
     exit $exitstatus
fi

# Count the number of matching log entries we have
CRITcount=`grep -c "$CRITquery" $tempdiff`
WARNcount=`grep -c "$WARNquery" $tempdiff`

# Get the last matching entry in the diff file
CRITlastentry=`grep "$CRITquery" $tempdiff | tail -1`
WARNlastentry=`grep "$WARNquery" $tempdiff | tail -1`

rm $tempdiff
cat $logfile > $oldlog

if [ "$CRITcount" -gt 0 ]; then
    	echo "($CRITcount) $CRITlastentry"
    	echo $STATE_CRITICAL > $oldlog.STATE
	exit $STATE_CRITICAL
fi

if [ "$WARNcount" -gt 0 ]; then
    	echo "($WARNcount) $WARNlastentry"
    	echo $STATE_WARNING > $oldlog.STATE
	exit $STATE_WARNING
fi

echo "Log check ok - 0 pattern matches found"
echo $STATE_OK > $oldlog.STATE
exit $STATE_OK