#! /bin/bash #This Plug-in monitors Cassandra ConcurrentMarkSweep Garbage Collection happening in particuler duration ; # This take four parameters as input # 1) -d Duration within which the count needs to be monitores (in seconds ) # 2) -f Log file location of cassandra status.log # 3) Warning Value # 4)Critical Values. # Author - Juned Memon # Please make sure your log entry for CMS GC is like # " INFO [ScheduledTasks:1] 2011-10-21 00:09:22,753 GCInspector.java (line 128) GC for ConcurrentMarkSweep: 15828 ms, 11324439288 reclaimed leaving 1007293704 used; max is 17232297984" #Otherwise adjust the coloumns to be cut in line where varibale TIME is poppulated. #########THIS part is for Nagios ################################ PROGNAME=`/usr/bin/basename $0` PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'` REVISION=`echo '$Revision: 1749 $' | sed -e 's/[^0-9.]//g'` #. $PROGPATH/utils.sh . /usr/local/nagios/libexec/utils.sh ###################################################################### #Function to print Usage function usage { usage="Usage: $0 -d -f -w ] [-c " echo $usage usage=" is Duration within which the count needs to be monitores (in seconds ) ; Default:900 seconds (15 minutes)" echo $usage usage=" is location of Cassandra Log file. Default is /BigData/lib/cassandra/log/system.log" echo $usage usage=" is Rate/second for WARNing state Default is 9." echo $usage usage=" is Rate/second for Critical state Default is 15." echo $usage exit $STATE_UNKNOWN } WARN=9 CRIT=15 DURATION=900 # in seconds ##################################################################### # get parameter values in Variables while test -n "$1"; do case "$1" in -c ) CRIT=$2 shift ;; -w ) WARN=$2 shift ;; -d ) DURATION=$2 shift ;; -f ) FILE=$2 shift ;; -h) usage ;; *) echo "Unknown argument: $1" usage ;; esac shift done ##################################################################### CURR_EPOCH=$(date +%s ) START_EPOCH=`expr $CURR_EPOCH - $DURATION` COUNT=0 COUNTER=1 LOG_FILE="/BigData/lib/cassandra/log/system.log" STATUS=1 while [ $STATUS -eq 1 ]; do TIME=$( grep ConcurrentMarkSweep $LOG_FILE | tail -n $COUNTER | head -n 1 |cut -d " " -f4,5 | cut -d "," -f1) TIME_E=$( date --date "$TIME" +%s) #3echo "Last log entry was at $var i.e. $V_var (epcch time)" #echo "Last log entry was at $var1 i.e. $V_var1 (epcch time)" #DIFF=$(echo "$V_var-$V_var1" | bc) if [ $TIME_E -gt $START_EPOCH ] then COUNT=`expr $COUNT + 1` COUNTER=`expr $COUNTER + 1` STATUS=1 else STATUS=0 fi done echo "$COUNT ConcurrentMarkSweep Garbage Collection Occoured in last $DURATION seconds." #################################################################################### #if CRIT > COUNT >WARN then WARNing if [ $COUNT -ge $WARN ]; then if [ $COUNT -lt $CRIT ]; then exitstatus=$STATE_WARNING exit $exitstatus fi fi # COUNT>CRIT then CRITical if [ $COUNT -ge $CRIT ]; then exitstatus=$STATE_CRITICAL exit $exitstatus fi # 0<=COUNT