#!/usr/bin/python #Author: Kyle Brandt #License: GPLv3 #Date: Oct 2008 #Version: 0.1 #A script to watch the bean counters #This script is designed to run on the OpenVZ host that has the vzs on it, and checked using check_by_ssh #If you don't want run this as root, create a setuid root script #that just copys /proc/user_beancounters somewhere and owns it as your #'nagios' user, and then change the countfile variable in the Globals section #Most systems will ignore setuid on a script, but you can compile it with shc import re, pickle, os, optparse, sys #Options Parsing cmd_parser = optparse.OptionParser(description="A Nagios Plugin the will track OpenVZ Values and Alert if FailCount Increases", prog="Nagios_UPC_Check", version="0.1") cmd_parser.add_option('-w', action='store', dest='warning_range', nargs=1, default=10, help="Set increment for warning response, default=10") cmd_parser.add_option('-c', action='store', dest='critical_range', nargs=1, default=20, help="Set increment for critical response, default=20") cmd_parser.add_option('-f', action='store', dest='check_file', nargs=1, default='/tmp/bean_check.txt', help="This script uses a text file to store information, you can specify the file here") options, arguments = cmd_parser.parse_args() #Globals allvz = {} #countfile = open('/proc/user_beancounters', 'r').readlines() os.system('/usr/local/nagios/libexec/nagios_vz_bean_setuid.sh.x') countfile = open('/tmp/user_beancounters', 'r').readlines() check_file= options.check_file errors = [] exit_status_msg = [ 0 ] #The BeanCounters (/proc/user_beancounters) Parser, puts the data in the allvz object #allvz's data structure is Nested, A List within a dictionary with a dictionary. See the next line #{ VZID:{ VPS_RESOURCE:[ held, maxheld, barrier, limit, failcnt ] } } #Had I noticed the /proc/bc before this would have been a lot easier!, but whatever, it works counter = 0 vzmatch = re.compile(r'(?P\d+):') for line in countfile: if vzmatch.search(line): vzid = vzmatch.search(line).group(1) counter = 1 line_without_vzid = vzmatch.split(line)[2].split() allvz[vzid] = { line_without_vzid[0] : line_without_vzid[1:] } elif counter: allvz[vzid][line.split()[0]] = line.split()[1:] #If file does not exsist create the allvz pickle file if not os.path.exists(check_file): previous_check_file = open(check_file, 'w') pickle.dump(allvz, previous_check_file) previous_check_file.close() #Open the last check previous_check_file = open(check_file, 'r') previous_check = pickle.load(previous_check_file) previous_check_file.close() #Check Fail Count for vz in allvz.keys(): for item in allvz[vz].keys(): current_failcnt = int(allvz[vz][item][4]) #This try statement is so the script does not fail if a new vz has been created try: previous_failcnt = int(previous_check[vz][item][4]) except KeyError: previous_failcnt = 0 if current_failcnt > previous_failcnt: errors.append(['The failcnt for the ' + item + ' parameter on vz ' + vz + ', has increased from ' + str(previous_failcnt) + ' to ' + str(current_failcnt), current_failcnt - previous_failcnt]) #Save when done with the check previous_check_file = open(check_file, 'w') pickle.dump(allvz, previous_check_file) previous_check_file.close() #The Alerting if errors: for itteration, error in enumerate(errors): if error[1] > options.warning_range and error[1] < options.critical_range: exit_status_msg[0] = 1 exit_status_msg.append(error[0]) del errors[itteration] for itteration, error in enumerate(errors): if error[1] > options.critical_range: exit_status_msg[0] = 2 exit_status_msg.append(error[0]) del errors[itteration] else: exit_status_msg[0] = 0 if exit_status_msg: #for error_msg in exit_status_msg[1:]: # print error_msg print ' '.join(exit_status_msg[1:]).replace('\n', '') sys.exit(exit_status_msg[0])