#!/usr/bin/env python import atexit import datetime, time import errno import re import sys import os from copy import copy from md5 import md5 from stat import S_IWUSR, S_IRUSR, S_IRGRP, S_IROTH """ check_file_system: Compare checksums of files against a list of cached checksums for use as a nagios plugin. Returns warning if: - new file found, i.e. file path not yet in cache - file not readable, e.g. error on access (not yet: - file not found, i.e. a path from the cache does no longer exist in the file system) Returns critical if: - checksum from cache does not match checksum of path - checksum cache has been modified (without updating the separately kept checksum for the cache.) Handles dangling symbolic links if: - checksum cache contains special check sum "-1" """ # Setup # Avoid memory issues by restricting how much bytes should be processed by a single read() operation CFS_BUFSIZ = 8192 # Keep results from initial run in this location. Update whenever files had to be changed. # Security: Do __not__ use a local cache. # TODO: Download cache from server CFS_CACHE = "/etc/nagios/file_system_cache.csv" # Define character to use for separating file name from checksum in CFS_CACHE CFS_SEPCHAR = ';' # Default list of items (files or directories) to be checked. Command line overwrites. CFS_CHECK_PATH = [ '/etc', '/usr/lib', ] # Separate checksum for CFS_CACHE, create by running this command: # check_file_system.py -i CFS_CACHE | cut -d'CFS_SEPCHAR' -f2 > CFS_CACHE_CHECKSUM # Required if CFS_CACHE is to be checked by check_file_system. CFS_CACHE_CHECKSUM = "/etc/nagios/file_system_cache.md5" # Objects from CFS_CHECK_PATH matching any one of these regular expression patterns won't # be checked. Always exlucde CFS_CACHE_CHECKSUM since its checksum depends on CFS_CACHE, # i.e. you will always produce a checksum error when checking CFS_CACHE_CHECKSUM. CFS_EXCLUDE_PATTERN = [ re.compile('^/proc'), re.compile('~$'), re.compile('\.swp$'), re.compile('\.tmp$'), re.compile(CFS_CACHE_CHECKSUM), ] # If nagios is running without root priviledges (recommended and default) and check_file_system.py # should check read-protected objects, you need to run check_file_system.py via password-less # sudo. This requires changes to /etc/sudoers.d directory and possibly /etc/sudoers.conf: # - Sudo should be allowed for non-interactive sessions, i.e. add or enable the following # entry in /etc/sudoers.conf by running "sudo visudo" # Defaults !requiretty # - Allow priviledged execution of check_file_system.py for user running nagios plugins, e.g. if # all your plugins are run by user "nagios" and check_file_system.py has been added to # default plugin installation path, create or add this entry to /etc/sudoers.d/nagios: # nagios ALL = NOPASSWD: /usr/local/libexec/nagios/plugins/check_file_system.py # If password-less sudo is not an option and you need to check read-protected files, you # could run check_file_system.py as priviledged user, e.g. as root cron job and redirect # its output to a file. Use check_file_system_cron_job plugin to interprete this output # from nagios. To enable output redirection, set CFS_CRON_MODE to True. CFS_CRON_MODE = True CFS_STDOUT_WHEN_IN_CRON_MODE = '/var/log/nagios/check_file_system.log' WORKING_LOG = "{}.tmp".format(CFS_STDOUT_WHEN_IN_CRON_MODE) # End Setup # Globals # Save global state for exit code global_exit_code = 0 out = None err = None def clean_exit(): """Replace log file from last run by renaming current log when CFS_CRON_MODE is True.""" global out if CFS_CRON_MODE: out.close() os.rename(WORKING_LOG, CFS_STDOUT_WHEN_IN_CRON_MODE) os.chmod(CFS_STDOUT_WHEN_IN_CRON_MODE, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH) def init_globals(cron_mode=True): global out global err global WORKING_LOG if cron_mode: log_dir = os.path.dirname(WORKING_LOG) if not os.path.isdir(log_dir): os.makedirs(log_dir, mode=0755) out = open(WORKING_LOG, 'w') atexit.register(clean_exit) timestamp = int(time.time()) out.write("{} -- {} -- check_file_system.py started\n".format( timestamp, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp)), )) else: out = sys.stdout err = sys.stderr class ChecksumCache(object): """Read expected checksums from location given and provide access to stored checksum of previously known file system objects.""" def __init__(self, cache_location): self.cache_location = cache_location self.cache = {} self._setup() # override checksum for checksum cache (must be created and kept separately) if CFS_CACHE in self.cache: try: self.cache[CFS_CACHE] = open(CFS_CACHE_CHECKSUM, "r").read().strip() except IOError: out.write("Error: If local checksum cache is included by itself, you must provide its checksum in a separate file, see configuration.\n") def _setup(self): lineno = 0 with open(self.cache_location, 'r') as f: while True: lineno += 1 p, sep, sum = f.readline().partition(CFS_SEPCHAR) if not p: break if p.startswith('#'): continue sum = sum.strip() if sep != CFS_SEPCHAR or not sum: raise Exception("Read error in line {} of cache file {}: Could not split '{}' on '{}' to separate path from checksum".format( lineno, self.cache_location, CFS_SEPCHAR, p )) if self.cache.has_key(p): out.write("WARN: Duplicate entry for {} in cache {}\n".format( p, self.cache_location )) self.cache[p] = sum def get_checksum(self, path): return self.cache[path] def get_checksum(path): """Return MD5 checksum of file system object at path, if existing.""" cs = md5() with open(path, 'r') as f: while True: buf = f.read(CFS_BUFSIZ) if not buf: break cs.update(buf) return cs.hexdigest() def check_file(path, expected_checksum): """Compare checksum of file system object against expected_checksum. @path: file system object location (absolute) @checksum: expected checksum value @return: code, details code will be 0 if checksum matches expected_checksum, 1 if an exception occurred or 2 if checksum do not match details may contain additional information like exception messages. """ try: cs = get_checksum(path) except IOError as e: # handle special case of dangling symbolic link: cs = "-1" if e.errno == errno.ENOENT and os.path.islink(path): details = "WARN: Dangling symbolic link: {}".format(path) else: details = "WARN: {}".format(e) code = 1 if cs == expected_checksum: code = 0 details = 'OK' elif cs == "-1": # code and details have been set in exception handler pass else: code = 2 details = "CRITICAL: Checksum mismatch for {}".format(path) return code, details def check_path(cache, path, files): global global_exit_code for f in files: f = os.path.normpath(os.path.join(path, f)) if os.path.isdir(f): continue if not cache: # Assume update mode, output forced to stdout try: sum = get_checksum(f) except IOError as e: # handle special case of dangling symbolic link: if e.errno == errno.ENOENT and os.path.islink(f): err.write("WARN: Dangling symbolic link in {}\n".format(f)) sys.stdout.write("{};-1\n".format(f)) else: err.write("WARN: Skipped {} due to IOError on read.\n".format(f)) else: sys.stdout.write("{};{}\n".format(f, sum)) continue # match patterns vs. the real thing match_path = os.path.realpath(f) for rexp in CFS_EXCLUDE_PATTERN: if rexp.search(match_path): break if rexp.search(match_path): reason = "real path {} ".format(match_path) if match_path != f else "" err.write("Info: {} skipped since {}matched by exclude pattern '{}'.\n".format(f, reason, rexp.pattern)) continue try: s = cache.get_checksum(f) except KeyError: details = "WARN: {} not present in checksum cache".format(f) code = 1 else: code, details = check_file(f, s) if code != 0: out.write("{}\n".format(details)) global_exit_code = max(code, global_exit_code) def main(path_list, ignore_cache=False, cron_mode=True): if ignore_cache: err.write("Checksum setup mode, ignoring entries from {}\n".format(CFS_CACHE)) cache = None else: try: cache = ChecksumCache(CFS_CACHE) except Exception as e: errmsg = "Error: Failed to initialize file system cache from {}: {}\n".format(CFS_CACHE, e) err.write(errmsg) if cron_mode: out.write(errmsg) sys.exit(2) for path in path_list: if os.path.isdir(path): os.path.walk(path, check_path, cache) else: check_path(cache, path, [""]) if global_exit_code <= 0 and not ignore_cache: out.write("OK: Checksums matched for {}\n".format(path_list)) if cron_mode: out.write("{} EXITCODE\n".format(global_exit_code)) return global_exit_code if __name__ == '__main__': sys.argv.pop(0) ignore_cache = False path_list = [] while sys.argv: arg = sys.argv.pop(0) if arg == "-i": ignore_cache = True CFS_CRON_MODE = False else: path_list.append(arg) if not path_list: path_list = CFS_CHECK_PATH init_globals(cron_mode=CFS_CRON_MODE) retval = main(path_list, ignore_cache=ignore_cache, cron_mode=CFS_CRON_MODE) if retval != 0 and CFS_CRON_MODE: err.write("Failed with exit code {}, see {} for details.\n".format(retval, CFS_STDOUT_WHEN_IN_CRON_MODE)) sys.exit(retval)