#!/bin/sh ########################################################################### # This Nagios plugin checks the status of an ICP SATA RAID controller. # # Copyright (C) 2007 Ralf Kruedewagen, # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . ########################################################################### PROGNAME="check_icp" VERSION="0.4" ########################################################################### # Last Updated: 2007-07-12 # # Script developed for ICP Vortex RAID Adapter ICP9047MA, BIOS 11564, ISM v5.1. # Tested under Linux with one controller and one logical RAID5 device. # Optional Hot-Spare check is restricted to one dedicated disk. # # Prerequisites: # 1) ICP Storage Manager (ISM) must be installed including arcconf tool. # 2) arcconf must be run as root or with sudo. Configure /etc/sudoers like: # daemon ALL=NOPASSWD: /usr/StorMan/arcconf ########################################################################### ################################################ # Adapt this variables to your needs: ################################################ ICP_CHECK_BIN=/usr/StorMan/arcconf LOG_DIR=/data/raid/log # or e.g. /var/tmp ################################################ function usage() { cat <<-EOR >/dev/stderr ---------------------------------------------------------- $PROGNAME version $VERSION - licensed under GPL v3. Usage: ${0} Example: ${0} 1 1 0 3 CHANNEL# and HOTSPARE# are only needed for Hot-Spare check. Script must run as root (or with sudo). ---------------------------------------------------------- EOR } function exit_script() { $ECHO "\n" exit $1 } if [ $# -lt 2 ]; then usage exit 1 fi CONTROLLER=$1 # number of the RAID controller, usually 1 LD=$2 # logical device, begins with 1 CHANNEL=$3 # channel number, begins with 0, optional HOTSPARE=$4 # number of Hot-Spare device , optional STATE_OK=0 STATE_WARNING=1 STATE_CRITICAL=2 STATE_UNKNOWN=3 STATE_DEPENDENT=4 LOG_AD=$LOG_DIR/icp_C${CONTROLLER}_AD_status.log LOG_LD=$LOG_DIR/icp_C${CONTROLLER}_LD${LD}_status.log CHECK_LINE_AD_1="Controller Status" CHECK_LINE_AD_2="Logical devices/Failed (error)/Degraded" CHECK_LINE_LD_1="Status of logical device" CHECK_LINE_LD_2="Dedicated Hot-Spare(s)" OK_STATUS_AD_1="Optimal" # expected result OK_STATUS_AD_2="0/0" OK_STATUS_LD_1="Optimal" OK_STATUS_LD_2="$CHANNEL,$HOTSPARE" # e.g. 0,3 if test -x /usr/bin/printf; then ECHO=/usr/bin/printf else ECHO=echo fi # create log dir if not existing test -d $LOG_DIR || mkdir -p $LOG_DIR # set initial state to UNKNOWN STATE=$STATE_UNKNOWN ################################################ # Check RAID controller ################################################ sudo $ICP_CHECK_BIN GETCONFIG $CONTROLLER AD > $LOG_AD if [ $? != 0 ]; then STATE=$STATE_UNKNOWN $ECHO "Command not valid (AD)" exit_script $STATE else # check controller status grep "$CHECK_LINE_AD_1" $LOG_AD > /dev/null if [ $? != 0 ]; then # set STATE to WARNING only if it was OK or UNKNOWN before if [ $STATE = $STATE_OK -o $STATE = $STATE_UNKNOWN ]; then STATE=$STATE_WARNING $ECHO "String \"$CHECK_LINE_AD_1\" not found" fi else # grep for string, take 2nd field and delete spaces RAID_STATUS=$(grep "$CHECK_LINE_AD_1" $LOG_AD | cut -f2 -d: | tr -d ' ') $ECHO "Controller:" if [ $RAID_STATUS = "$OK_STATUS_AD_1" ]; then STATE=$STATE_OK $ECHO $RAID_STATUS else # all other results are critical for now STATE=$STATE_CRITICAL $ECHO $RAID_STATUS fi # check failed and degraded logical devices grep "$CHECK_LINE_AD_2" $LOG_AD > /dev/null if [ $? != 0 ]; then # set STATE to WARNING only if it was OK or UNKNOWN before if [ $STATE = $STATE_OK -o $STATE = $STATE_UNKNOWN ]; then STATE=$STATE_WARNING $ECHO " - String \"$CHECK_LINE_AD_2\" not found" fi else RAID_STATUS=$(grep "$CHECK_LINE_AD_2" $LOG_AD | cut -f2 -d: | tr -d ' '| cut -f2-3 -d/) $ECHO ", Error/Degraded:" if [ $RAID_STATUS = "$OK_STATUS_AD_2" ]; then # don't overwrite STATES other than UNKNOWN if [ $STATE = $STATE_UNKNOWN ]; then STATE=$STATE_OK fi $ECHO $RAID_STATUS else # all other results are critical for now STATE=$STATE_CRITICAL $ECHO $RAID_STATUS fi fi # end of if logical devices line was found fi # end of if controller status line was found fi # end of if arcconf command successfull # exit script here if STATE is not OK if [ $STATE != $STATE_OK ]; then exit_script $STATE fi # STATE is OK at this stage ################################################ # Check logical device ################################################ sudo $ICP_CHECK_BIN GETCONFIG $CONTROLLER LD $LD > $LOG_LD if [ $? != 0 ]; then STATE=$STATE_UNKNOWN $ECHO "Command not valid (LD)" exit_script $STATE else # if a not existing LD is defined, arcconf gives a notice grep "Logical device $LD does not exist" $LOG_LD >/dev/null if [ $? = 0 ]; then # set STATE to WARNING only if it was OK or UNKNOWN before if [ $STATE = $STATE_OK -o $STATE = $STATE_UNKNOWN ]; then STATE=$STATE_WARNING $ECHO " - " ; $ECHO "Logical device $LD does not exist" fi else # grep for string, take 2nd field and delete spaces RAID_STATUS=$(grep "$CHECK_LINE_LD_1" $LOG_LD | cut -f2 -d: | tr -d ' ') $ECHO " - Logical Device:" if [ $RAID_STATUS = "$OK_STATUS_LD_1" ]; then STATE=$STATE_OK $ECHO $RAID_STATUS else # all other results are critical STATE=$STATE_CRITICAL $ECHO $RAID_STATUS fi # check Hot-Spares, you must provide the disk number if [ "$HOTSPARE" != "" ]; then grep "$CHECK_LINE_LD_2" $LOG_LD > /dev/null if [ $? != 0 ]; then # set STATE to WARNING only if it was OK or UNKNOWN before if [ $STATE = $STATE_OK -o $STATE = $STATE_UNKNOWN ]; then STATE=$STATE_WARNING $ECHO " - String \"$CHECK_LINE_LD_2\" not found" fi else RAID_STATUS=$(grep "$CHECK_LINE_LD_2" $LOG_LD | cut -f2 -d: | tr -d ' ') $ECHO ", Hot-Spare:" if [ $RAID_STATUS = "$OK_STATUS_LD_2" ]; then # don't overwrite STATES other than UNKNOWN if [ $STATE = $STATE_UNKNOWN ]; then STATE=$STATE_OK fi $ECHO $RAID_STATUS else # all other results are warning STATE=$STATE_WARNING $ECHO $RAID_STATUS fi fi # end of if hot spare line was found fi # end of if hot spare shall be checked fi # end of if logical device exists fi # end of if arcconf command successfull chmod 664 $LOG_AD chmod 664 $LOG_LD exit_script $STATE