#!/usr/bin/python # check_om - Nagios plugin to check the hardware status of a Dell machine using Dell's snmp extensions # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # Copyright 2004 Duke University # Written by Sean Dilda # Version: 0.2 # # Will check the overall chassis status. If it is non-ok, it will then check # a number of other status indicators in order to create an error message that # indicates where the problem lies. import sys import os import string import getopt import popen2 import signal version = '0.2' snmpgetPath = '/usr/bin/snmpget' snmpgetArgs = '-t 1 -r 9 -v 1 -c' # Three arguments will be appened onto this # this string. First, the community name; # second, the host name; third, the OID generalOID = '.1.3.6.1.4.1.674.10892.1.200.10.1.4.1' # The overall chassis status componentOIDs = { 'power supply' : '.1.3.6.1.4.1.674.10892.1.200.10.1.9.1', 'voltage' : '.1.3.6.1.4.1.674.10892.1.200.10.1.12.1', 'cooling device' : '.1.3.6.1.4.1.674.10892.1.200.10.1.21.1', 'temperature' : '.1.3.6.1.4.1.674.10892.1.200.10.1.24.1', 'memory' : '.1.3.6.1.4.1.674.10892.1.200.10.1.27.1', 'intrusion' : '.1.3.6.1.4.1.674.10892.1.200.10.1.30.1' } omStateUnknown = 2 omStateOk = 3 omStateWarning = 4 nagiosStateOk = 0 nagiosStateWarning = 1 nagiosStateCritical = 2 nagiosStateUnknown = 3 def printUsage(): print 'Usage: %s -H [-C ] [-t ]' % (sys.argv[0]) def printHelp(): printUsage() print '' print 'Options:' print '-H, --hostname=HOST' print ' The hostname or IP address of the machine you want to check' print '-C, --community=community' print ' The community name for connecting to the snmp server' print ' The default is \'public\'' print '-t, --timeout=TIMEOUT' print ' Plugin timeout in seconds. (default: 15)' communityName = 'public' hostName = '' childPid = 0 timeout = 15 if os.access(snmpgetPath, os.X_OK) == 0: print 'Cannot execute %s' % (snmpgetPath) sys.exit(nagiosStateUnknown) try: options, extraArgs = getopt.getopt(sys.argv[1:], 'H:C:t:vVh?', ['hostname=', 'community=', 'timeout=', 'verbose', 'version', 'help']) except getopt.GetoptError, errorStr: print errorStr printUsage() sys.exit(nagiosStateUnknown) if len(extraArgs) != 0: print 'Unknown arguments: %s' % (string.join(extraArgs)) printUsage() sys.exit(nagiosStateUnknown) for opt, arg in options: if opt in ('-H', '--hostname'): hostName = arg elif opt in ('-C', '--community'): communityName = arg elif opt in ('-t', '--timeout'): try: timeout = int(arg) except ValueError: print 'Invalid argument for %s: %s' % (opt, arg) sys.exit(nagiosStateUnknown) elif opt in ('-V', '--version'): print 'check_om %s' % (version) sys.exit(nagiosStateUnknown) elif opt in ('-h', '--help'): printHelp() sys.exit(nagiosStateUnknown) elif opt in ('-v', '--verbose'): pass elif opt == '-?': printUsage() sys.exit(nagiosStateUnknown) if hostName == '': print 'Unknown hostname' sys.exit(nagiosStateUnknown) def handleAlarm(signum, frame): try: if childPid != 0: os.kill(childPid, os.SIGKILL) except OSError: pass print 'Execution timeout exceeded' sys.exit(nagiosStateUnknown) def getState(oid): snmpgetPipe = popen2.Popen4('%s %s %s %s %s' % (snmpgetPath, snmpgetArgs, communityName, hostName, oid)) childPid = snmpgetPipe.pid exitStatus = snmpgetPipe.wait() childPid = 0 lines = snmpgetPipe.fromchild.readlines() if not os.WIFEXITED(exitStatus) or os.WEXITSTATUS(exitStatus) != 0 or len(lines) == 0: if len(lines) >= 1: print 'Error with snmpget: %s' % (lines[0][:-1]) else: print 'Error from snmpget' sys.exit(nagiosStateUnknown) # Lets hope the output format of snmpget doesn't change return int(string.split(lines[0])[-1]) unknowns = [] warnings = [] critical = [] signal.signal(signal.SIGALRM, handleAlarm) signal.alarm(15) globalState = getState(generalOID) if globalState == omStateOk: print 'Status Ok' sys.exit(nagiosStateOk) for pair in componentOIDs.items(): state = getState(pair[1]) if state == omStateOk: continue elif state == omStateUnknown: unknowns.append(pair[0]) elif state == omStateWarning: warnings.append(pair[0]) else: critical.append(pair[0]) if len(critical) > 0: print 'Critical: %s' % (string.join(critical, ', ')) sys.exit(nagiosStateCritical) elif len(warnings) > 0: print 'Warning: %s' % (string.join(warnings, ', ')) sys.exit(nagiosStateWarning) elif len(unknowns) > 0: print 'Unknown: %s' % (string.join(unknowns, ', ')) sys.exit(nagiosStateUnknown) else: # We got an error of some sort, but can't figure out what component.. if globalState == omStateUnknown: msg = 'Unknown State' exitCode = nagiosStateUnknown elif globalState == omStateWarning: msg = 'Warning' exitCode = nagiosStateWarning else: msg = 'Critical Error' exitCode = nagiosStateCritical print '%s in unknown component' % (msg) sys.exit(exitCode)