#!/usr/bin/env python """ #===================================================================================# FILE : check_failover.py USAGE : ./check_failover.py DESCRIPTION : Nagios plugin to check if Red Hat cluster services has moved to another node. OPTION(S) : -i / --init & -v / --version & -h / --help REQUIREMENTS : RedHat cluster. Tested with clustat 1.9.53 BUGS : Search for XXX in the script. NOTES : Tab stop = 8. I prefer '#' inside the code for comments I don't like the block comments with ''' so much as I am used to BASH and the colours that I work with in cream / vim. AUTHOR(s) : Martinus Nel (martinus.nel@linuxit.com) COMPANY : LinuxIT VERSION : 1.0 CREATED : 16-07-08 WISH LIST : Copyright (C) 2008 LinuxIT Europe LTD, www.linuxit.com This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . #===================================================================================# #============# # CHANGE LOG # #============# #===============# # V2 - xx-xx-08 # #===============# Features -------- Bugs ---- Please send bug reports / feature requests to Martinus Nel (martinus.nel@linuxit.com) """ import os, re, sys, getopt, string, pickle #====================# # Nagios exit status # #====================# STATE_OK = 0 STATE_WARNING = 1 STATE_CRITICAL = 2 STATE_UNKNOWN = 3 STATE_DEPENDENT = 4 #=================## # Global variables # #=================## VERSION = 1 DATA_FILE = '' CLUSTAT = '/usr/sbin/clustat' #===========# # Functions # #===========# def usage(): print 'check_failover.py version %s' % (VERSION) print 'This is a Nagios check to see if Red Hat cluster services has moved to another node.' print ''' Copyright (C) 2008 LinuxIT Europe LTD, www.linuxit.com Usage : check_failover.py Options: -i / --init -- Initialize for first run or use to re-initialize. -h / --help -- Displays this help message. -v / --version -- Displays version. ''' sys.exit(STATE_OK) def get_data(): pipe = os.popen(CLUSTAT) output = pipe.readlines() exit_status = pipe.close() # Next 2 lines is cutting out everything above the dashed lines. LOCATION = output.index(' ------- ---- ----- ------ ----- \n') TAIL = output[LOCATION + 1:] # As I don't know any better, the following 3 lines takes : # [' SERVICE NODE-NAME STATUS \n'] # and makes it: # ['SERVICE', 'NODE-NAME', 'STATUS'] TAIL = str(TAIL) TAIL = TAIL.split() TAIL = TAIL[1:-1] COUNTER = 0 DATA=[] # I only want the first 2 columns of clustat output (under Service Name). while COUNTER < len(TAIL): DATA.append(TAIL[COUNTER]) COUNTER = COUNTER + 1 DATA.append(TAIL[COUNTER]) COUNTER = COUNTER + 4 return DATA def initialize(): print 'Initializing now ...' try: INIT_FILE = open(DATA_FILE, 'w') except IOError: print 'Unable to write to file. Make sure the Nagios user can read/write to %s.' % (DATA_FILE) sys.exit(STATE_CRITICAL) DATA = get_data() pickle.dump(DATA, INIT_FILE) INIT_FILE.close() print 'Initialization completed OK' sys.exit(STATE_OK) #======# # Main # #======# #===========================# # Check options / arguments # #===========================# try: options, argument = getopt.getopt(sys.argv[1:],'ihv', ["init", "help", "version"]) except getopt.error: usage() for a in options[:]: if a[0] == '-i' or a[0] == '--init': initialize() for a in options[:]: if a[0] == '-h' or a[0] == '--help': usage() for a in options[:]: if a[0] == '-v' or a[0] == '--version': print 'check_rhcluster.py version %s' % (VERSION) sys.exit(STATE_OK) if len(argument) != 0: print "Incorrect amount of arguments." print "See 'check_failover.py -h' for more details" sys.exit(STATE_CRITICAL) if DATA_FILE == '': print 'First set "DATA_FILE" variable in script. Make sure the Nagios user can read/write to it.' sys.exit(STATE_CRITICAL) if os.path.exists(DATA_FILE) != 1: print 'Could not find data file.' print 'Make sure the Nagios user can read/write to it and that you have initialized with -i / --init' sys.exit(STATE_CRITICAL) #==================================# # Get old and new data and compare # #==================================# CURRENT_DATA = get_data() INIT_FILE = open(DATA_FILE, 'r') # Should add exception here ... OLD_DATA = pickle.load(INIT_FILE) if CURRENT_DATA == OLD_DATA: print 'No service failed over.' sys.exit(STATE_OK) else: print 'Service failed over.' sys.exit(STATE_CRITICAL)