#!/usr/bin/python # -*- Mode: Python -*- # vi:si:et:sw=4:sts=4:ts=4 # check_process_cpu - plugin for nagios to check CPU use of processes # (C) Copyright 2008 Ioannis Aslanidis (deathwing00 at deathwing00 dot org) # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # exit status will be: # 0 if everything is ok # 1 if any of the processes is using more CPU than the warning threshold # 2 if any of the processes is using more CPU than the critical threshold import commands import sys import string import optparse from threading import Thread, Semaphore from time import time as _time from collections import deque class Empty(Exception): "Exception raised by Queue.get(block=0)/get_nowait()." pass class Full(Exception): "Exception raised by Queue.put(block=0)/put_nowait()." pass class Queue: """Create a queue object with a given maximum size. If maxsize is <= 0, the queue size is infinite. """ def init(self, maxsize=0): try: import threading except ImportError: import dummy_threading as threading self._init(maxsize) # mutex must be held whenever the queue is mutating. All methods # that acquire mutex must release it before returning. mutex # is shared between the three conditions, so acquiring and # releasing the conditions also acquires and releases mutex. self.mutex = threading.Lock() # Notify not_empty whenever an item is added to the queue; a # thread waiting to get is notified then. self.not_empty = threading.Condition(self.mutex) # Notify not_full whenever an item is removed from the queue; # a thread waiting to put is notified then. self.not_full = threading.Condition(self.mutex) # Notify all_tasks_done whenever the number of unfinished tasks # drops to zero; thread waiting to join() is notified to resume self.all_tasks_done = threading.Condition(self.mutex) self.unfinished_tasks = 0 def task_done(self): """Indicate that a formerly enqueued task is complete. Used by Queue consumer threads. For each get() used to fetch a task, a subsequent call to task_done() tells the queue that the processing on the task is complete. If a join() is currently blocking, it will resume when all items have been processed (meaning that a task_done() call was received for every item that had been put() into the queue). Raises a ValueError if called more times than there were items placed in the queue. """ self.all_tasks_done.acquire() try: unfinished = self.unfinished_tasks - 1 if unfinished <= 0: if unfinished < 0: raise ValueError('task_done() called too many times') self.all_tasks_done.notifyAll() self.unfinished_tasks = unfinished finally: self.all_tasks_done.release() def join(self): """Blocks until all items in the Queue have been gotten and processed. The count of unfinished tasks goes up whenever an item is added to the queue. The count goes down whenever a consumer thread calls task_done() to indicate the item was retrieved and all work on it is complete. When the count of unfinished tasks drops to zero, join() unblocks. """ self.all_tasks_done.acquire() try: while self.unfinished_tasks: self.all_tasks_done.wait() finally: self.all_tasks_done.release() def qsize(self): """Return the approximate size of the queue (not reliable!).""" self.mutex.acquire() n = self._qsize() self.mutex.release() return n def empty(self): """Return True if the queue is empty, False otherwise (not reliable!).""" self.mutex.acquire() n = self._empty() self.mutex.release() return n def full(self): """Return True if the queue is full, False otherwise (not reliable!).""" self.mutex.acquire() n = self._full() self.mutex.release() return n def put(self, item, block=True, timeout=None): """Put an item into the queue. If optional args 'block' is true and 'timeout' is None (the default), block if necessary until a free slot is available. If 'timeout' is a positive number, it blocks at most 'timeout' seconds and raises the Full exception if no free slot was available within that time. Otherwise ('block' is false), put an item on the queue if a free slot is immediately available, else raise the Full exception ('timeout' is ignored in that case). """ self.not_full.acquire() try: if not block: if self._full(): raise Full elif timeout is None: while self._full(): self.not_full.wait() else: if timeout < 0: raise ValueError("'timeout' must be a positive number") endtime = _time() + timeout while self._full(): remaining = endtime - _time() if remaining <= 0.0: raise Full self.not_full.wait(remaining) self._put(item) self.unfinished_tasks += 1 self.not_empty.notify() finally: self.not_full.release() def put_nowait(self, item): """Put an item into the queue without blocking. Only enqueue the item if a free slot is immediately available. Otherwise raise the Full exception. """ return self.put(item, False) def get(self, block=True, timeout=None): """Remove and return an item from the queue. If optional args 'block' is true and 'timeout' is None (the default), block if necessary until an item is available. If 'timeout' is a positive number, it blocks at most 'timeout' seconds and raises the Empty exception if no item was available within that time. Otherwise ('block' is false), return an item if one is immediately available, else raise the Empty exception ('timeout' is ignored in that case). """ self.not_empty.acquire() try: if not block: if self._empty(): raise Empty elif timeout is None: while self._empty(): self.not_empty.wait() else: if timeout < 0: raise ValueError("'timeout' must be a positive number") endtime = _time() + timeout while self._empty(): remaining = endtime - _time() if remaining <= 0.0: raise Empty self.not_empty.wait(remaining) item = self._get() self.not_full.notify() return item finally: self.not_empty.release() def get_nowait(self): """Remove and return an item from the queue without blocking. Only get an item if one is immediately available. Otherwise raise the Empty exception. """ return self.get(False) # Override these methods to implement other queue organizations # (e.g. stack or priority queue). # These will only be called with appropriate locks held # Initialize the queue representation def _init(self, maxsize): self.maxsize = maxsize self.queue = deque() def _qsize(self): return len(self.queue) # Check whether the queue is empty def _empty(self): return not self.queue # Check whether the queue is full def _full(self): return self.maxsize > 0 and len(self.queue) == self.maxsize # Put a new item in the queue def _put(self, item): self.queue.append(item) # Get an item from the queue def _get(self): return self.queue.popleft() parser = optparse.OptionParser() parser.add_option('-v', '', action="count", dest="verbose", help="increase verbosity level (0-3)") parser.add_option('-w', '--warning', action="store", dest="warning", default="50", help="CPU% to warn at") parser.add_option('-c', '--critical', action="store", dest="critical", default="95", help="CPU% to critical at") opts, args = parser.parse_args(sys.argv[1:]) _verbose = opts.verbose def debug(msg): global _verbose if _verbose >= 3: sys.stdout.write('DEBUG: %s\n' % msg) def ok(msg): sys.stdout.write('OK: %s\n' % msg) sys.exit(0) def warning(msg): sys.stdout.write('WARNING: %s\n' % msg) sys.exit(1) def critical(msg): sys.stdout.write('CRITICAL: %s\n' % msg) sys.exit(2) def unknown(msg): sys.stdout.write('UNKNOWN: %s\n' % msg) sys.exit(3) def worker(): global highest_pid global highest_cpu # Try to get an entry from the pool and process it or finish. while not pool.empty(): item = pool.get() item_result = commands.getoutput( """/usr/bin/top b n 1 c p %s | grep PID -A 1 | head -n 2 | tail -n 1""" % item ) current_array = item_result.split() current_pid = current_array[0] current_cpu = current_array[8] # Prevent other threads to do this part in order to prevent data corruption. sem.acquire() if float(current_cpu) > float(highest_cpu): highest_pid = current_pid highest_cpu = current_cpu sem.release() # Python 2.5 uses the line below pool.task_done() debug('Options: %r' % opts) if not args: unknown("No process specified.") # The argument gets interpreted as a full command line (see pgrep -f). command_line = args[0] command_line = command_line[:15] # We use the -x flag because we want an exact match. proc_list = commands.getoutput("pgrep -x %s | xargs | tr ' ' ','" % command_line) # Split the PIDs in groups of 20, because top does not allow more than 20 PIDs. proc_array = proc_list.split(',') if not proc_array[0]: ok("There are no '%s' processes in the system" % command_line) procs_in_list = 0 # The variable below is (will be, after processing) a comma-separated string # of a set of elements of proc_array. procs_set = "" highest_pid = -1 highest_cpu = -1 pool = Queue() sem = Semaphore() # We have to pass procs_set to the 'top' command and we want to prevent a # trailing comma ',' we passing this variable to the command, thus we have # to separate the cases. Finally, we put the comma-separate PID string into # the pool. while proc_array: selected, proc_array = proc_array[:20], proc_array[20:] procs_set = ','.join([str(pid) for pid in selected]) pool.put(procs_set) # We create a number of threads that will get a PID string from the pool and process it. n_threads = 20 for i in range(n_threads): t = Thread(target=worker) t.setDaemon(True) t.start() # We wait for all the threads to finish processing. # Python 2.5 uses the line below pool.join() # We evaluate the result. if float(highest_cpu) >= float(opts.critical): critical("CPU for process '%(command_line)s' with PID %(highest_pid)s is %(highest_cpu)s
" % locals())
elif float(highest_cpu) >= float(opts.warning):
    warning("CPU for process '%(command_line)s' with PID %(highest_pid)s is %(highest_cpu)s
""" % locals()) else: ok("highest CPU for '%(command_line)s' with PID %(highest_pid)s is %(highest_cpu)s%%" % locals())