#!/usr/bin/perl eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if 0; # not running under some shell package main; # check_smart checks the output of smartctl contained in a file for health, # temperature and prefail attributes. # # A way to create an file containing the output of smartctl is to add an cron # entry like the following: # */20 * * * * root sh -c "/usr/sbin/smartctl -a /dev/sda > /var/log/smart.sda" # # Some parts of the plugin are based on the plugin check_updates from Matteo # Corti # # Copyright (c) 2012, Bernhard Czech # # This module is free software; you can redistribute it and/or modify it # under the terms of GNU general public license (gpl) version 3, # or (at your option) any later version. # See the COPYING file for details. # # RCS information # enable substitution with: # $ svn propset svn:keywords "Id Revision HeadURL Source Date" # # $Id$ # $Revision$ # $HeadURL$ # $Date$ use 5.00800; use strict; use warnings; our $VERSION = '0.0.1'; use Nagios::Plugin::Getopt; use Nagios::Plugin::Threshold; use Nagios::Plugin; use Date::Parse; use Readonly; Readonly our $EXIT_UNKNOWN => 3; Readonly our $BITS_PER_BYTE => 8; # IMPORTANT: Nagios plugins could be executed using embedded perl in this case # the main routine would be executed as a subroutine and all the # declared subroutines would therefore be inner subroutines # This will cause all the global lexical variables not to stay shared # in the subroutines! # # All variables are therefore declared as package variables... # ## no critic (ProhibitPackageVars) use vars qw( $exit_message $help $options $plugin $threshold $status @status_lines ); ## use critic # the script is declared as a package so that it can be unit tested # but it should not be used as a module if ( !caller ) { run(); } ############################################################################## # subroutines ############################################################################## # Usage : exit_with_error( $status, $message) # Purpose : if a plugin object is available exits via ->nagios_exit # otherwise prints to the shell and exit normally # Returns : n/a # Arguments : n/a # Throws : n/a # Comments : n/a # See also : n/a sub exit_with_error { my $status = shift; my $message = shift; if ($plugin) { $plugin->nagios_exit( $status, $message ); } else { #<<< print "Error: $message"; ## no critic (RequireCheckedSyscalls) #>>> exit $status; } return; } ############################################################################## # Usage : verbose("some message string", $optional_verbosity_level); # Purpose : write a message if the verbosity level is high enough # Returns : n/a # Arguments : message : message string # level : options verbosity level # Throws : n/a # Comments : n/a # See also : n/a sub verbose { # arguments my $message = shift; my $level = shift; if ( !defined $message ) { exit_with_error( UNKNOWN, q{Internal error: not enough parameters for 'verbose'} ); } if ( !defined $level ) { $level = 0; } if ( $level < $options->verbose ) { #<<< print $message; ## no critic (RequireCheckedSyscalls) #>>> } return; } ############################################################################## ### subroutines ## ################################################################################ ### Usage : parse_smart($file) ### Purpose : opens file and parses the contents. ### Returns : a hashmap containing selected entries: ### - overall health ### - SMART attributes ### Arguments : $file ### Throws : n/a ### Comments : n/a ### See also : n/a sub parse_smart { my $file = shift; my %smartvalue; open(SMART,'<'.$file) || exit_with_error( UNKNOWN, q{'critical unable to open' (} . $file . q{)} ); while() { if(/^Local Time is: (.*)$/) { $smartvalue{'time'} = str2time($1); verbose 'parsed smart date as timestamp '.$smartvalue{'time'}."\n"; } elsif(/SMART overall-health self-assessment test result: (.*)$/) { $smartvalue{'health'} = $1; } elsif(/Temperature_Celsius/) { $smartvalue{'temperature'} = (split(/\s+/))[9]; } elsif(/\d+\s+(\w+).*\s+(\d+)\s+(\d+)\s+(\d+)\s+Pre-fail/) { verbose 'found pre-fail smart value: '.$1.' '.$2.' '.$3.' '.$4."\n"; $smartvalue{'attr'}{$1}{'value'} = $2; $smartvalue{'attr'}{$1}{'worst'} = $3; $smartvalue{'attr'}{$1}{'thresh'} = $4; } elsif(/^DEVICE (.*)$/) { $smartvalue{'device'} = $1; } } close(SMART); return %smartvalue; } ############################################################################## # Usage : run(); # Purpose : main method # Returns : n/a # Arguments : n/a # Throws : n/a # Comments : n/a # See also : n/a sub run { $status = OK; ############################################################################## # main # ################ # initialization $help = q{}; $plugin = Nagios::Plugin->new( shortname => 'CHECK_SMART' ); ######################## # Command line arguments $options = Nagios::Plugin::Getopt->new( usage => 'Usage: %s [OPTIONS]', version => $VERSION, url => 'http://example.org', blurb => 'Checks disk states using smart', ); $options->arg( spec => '', help => 'Checks the output of smartctl stored in files within a directory', ); $options->arg( spec => 'file|f=s', help => 'File containing output of smartctl to be check', default => undef, required => 1 ); $options->arg( spec => 'maxage|m=i', help => 'Exit with UNKNOWN if the extended smartctl output is older than INTEGER minutes', default => 25 ); $options->arg( spec => 'tempwarn|W=i', help => 'Exit with WARNING if the temperature in celsius exceds INTEGER', default => 35 ); $options->arg( spec => 'tempcrit|C=i', help => 'Exit with CRITICAL if the temperature in celsius exceds INTEGER', default => 40 ); $options->arg( spec => 'warning|w=i', help => 'Exit with WARNING status if Pre-fail attributes are below the INTEGER percentage of threshold', default => 120 ); $options->arg( spec => 'critical|c=i', help => 'Exit with CRITICAL status if Pre-fail attributes are below the INTEGER percentage of threshold', default => 100 ); $options->getopts(); ############### # Sanity checks if ( $options->get('warning') < $options->get('critical') ) { exit_with_error( UNKNOWN, q{'critical' (} . $options->get('critical') . q{) must be lower than 'warning' (} . $options->get('warning') . q{)} ); } if ( $options->get('tempwarn') > $options->get('tempcrit') ) { exit_with_error( UNKNOWN, q{'critical' (} . $options->get('tempwarn') . q{) must not be lower than 'warning' (} . $options->get('tempcrit') . q{)} ); } if ( $options->get('maxage') <0 ) { exit_with_error( UNKNOWN, q{'maxage' (} . $options->get('maxage') . q{) must be greater or eqal to 0 } ); } # file if(! -r $options->get('file') ) { exit_with_error( UNKNOWN, q{'file' (} . $options->get('file') . q{) must be readable } ); } $threshold = Nagios::Plugin::Threshold->set_thresholds( warning => $options->get('warning'), critical => $options->get('critical'), ); ######### # Timeout alarm $options->timeout; my (%smartvalue) = &parse_smart($options->get('file')); # check data age if($smartvalue{'time'} < time - $options->get('maxage')*60) { exit_with_error( UNKNOWN, q{'age of data' (} . localtime($smartvalue{'time'}) . q{) older than (} . $options->get('maxage') . q{) minutes'} ); } # general message and status vars my $overall_status = OK; my @message = (); # check attribut values my %smart_attr = %{$smartvalue{'attr'}}; foreach my $attr_name (keys(%smart_attr)) { my %values = %{$smart_attr{$attr_name}}; my $threshold = Nagios::Plugin::Threshold->set_thresholds( warning => int(($options->get('warning')/100)*$values{'thresh'}).':', critical => int(($options->get('critical')/100)*$values{'thresh'}).':' ); my $status = $threshold->get_status($values{'worst'}); if($status == WARNING) { if($overall_status != CRITICAL) { $overall_status = WARNING; } push(@message, $attr_name); } elsif($status == CRITICAL) { $overall_status = CRITICAL; push(@message, $attr_name); } $plugin->add_perfdata( label => $attr_name, value => $values{'worst'}, threshold => $threshold ); } # check temperature my $threshold = Nagios::Plugin::Threshold->set_thresholds( warning => $options->get('tempwarn'), critical => $options->get('tempcrit') ); $status = $threshold->get_status($smartvalue{'temperature'}); if($status == WARNING) { if($overall_status != CRITICAL) { $overall_status = WARNING; } push(@message, 'temperature'); } elsif($status == CRITICAL) { $overall_status = CRITICAL; push(@message, 'temperature'); } $plugin->add_perfdata( label => 'temperature', uom => 'C', value => $smartvalue{'temperature'}, threshold => $threshold ); # check health if($smartvalue{'health'} ne "PASSED") { $overall_status = CRITICAL; push(@message, 'health'); } if($overall_status == OK) { push(@message, 'Health, temperature and pre-fail attributes are ok'); } # Nagios::Plugin does not support the addition Nagios 3 status lines # -> we do it manually print 'SMART ' ## no critic (RequireCheckedSyscalls) . $Nagios::Plugin::STATUS_TEXT{$overall_status} . ' - '.join('; ',@message).' |'; for my $pdata ( @{ $plugin->perfdata } ) { print q{ } . $pdata->perfoutput; ## no critic (RequireCheckedSyscalls) } print "\n"; exit $overall_status; return; } 1;