#!/usr/bin/perl -w # # check_ospf_counter - nagios plugin # # by Frank Bulk <frnkblk@iname.com> # inspiration by Douglas E. Warner <silfreed@silfreed.net> # inspiration by Christoph Kron <ck@zet.net> - check_ifstatus.pl # with code for SNMPv3 contributed by Duncan Ferguson # # This plugin is a sister to check_bgp_counters. # # For those that have no SNMP trap manager, this poll-based approach will still catch short OSPF # outages because certain counters will increment or change on OSPF session failure and re-establishment. # There's no need to know or enumerate the IP addresses of the router's OSPF peers - the # code handles that automatically. # # This code does not work with OSPFv3 at this time. # # Here is a suggested command definition: # # 'check_ospf_counters command definition # define command{ # command_name check_ospf_counters # command_line perl $USER1$/check_ospf_counters -H $HOSTADDRESS$ -C $ARG1$ -f /tmp/ # } # # Here is a suggested service configuration: # define service{ # use generic-service # host_name router # service_description OSPF # contact_groups router-admins # notification_interval 15 # normal_check_interval 2 # max_check_attempts 1 # notification_options w,c,r # check_command check_ospf_counters!community_string # } # # Several notes: # - increasing the max_check_attempts could result in missing counter changes, # so it's recommended to leave it at '1' # - the host definition should use an IP address rather than a host name or FQDN # - make sure the directory and filename is writable by the NAGIOS process. # If you first test using 'root' or another user, the cached files may not be # overwritable # - you can change the error level of a counter by modifying the definition in # the code below. In some environments prefixes may change all the time and # so those OIDs could be commented out altogether. Another option is to change # the notification option to just 'c,r' # - if you silence notifications due to an issue with one OSPF session, you won't be # notified if another OSPF session on that host goes awry. This is potentially a # feature request: to narrow checking to per OSPF session basis, which would naturally # require setting up a separate service check for each BGP session. # # To test from the command-line, try something like this: # ./check_ospf_counters -C community_string -H hostip -f /tmp # # Release notes # none use strict; use Net::SNMP; use Getopt::Long; use File::Basename; &Getopt::Long::config('auto_abbrev'); use IO::Socket; my $version = "0.8"; my $status; my $needhelp = ''; my $TIMEOUT = 30; my %ERRORS = ( 'OK' => '0', 'WARNING' => '1', 'CRITICAL' => '2', 'UNKNOWN' => '3', ); # default return value is UNKNOWN my $state = "UNKNOWN"; # time this script was run my $runtime = time(); # responses from script my $answer = ""; my $oidmsg = ""; my $error; my $oidwarn = 0; my $oidcrit = 0; # external variable declarations my $hostname; my $community = "public"; my $port = 161; my @ignorestring; my $counterFilePath; my $counterFile; my $warntmp; my @warning; my $crittmp; my @critical; my $snmpversion = "2c"; my $snmpv3_username = "initial"; # SNMPv3 username my $snmpv3_password = ""; # SNMPv3 password my $snmpv3_authprotocol = "md5"; # SNMPv3 hash algorithm (md5 / sha) my $snmpv3_privprotocol = "des"; # SNMPv3 encryption protocol (des / aes / aes128) my $snmpv3_privpassword = ""; my %snmpOID; # ospfIfEvents $snmpOID{""} = "The number of OSPF events on interface"; # ospfNbrState $snmpOID{""} = "The OSPF neighbor state for"; # ospfNbrEvents $snmpOID{""} = "The number of OSPF neighbor events for"; my %ospfNbrState; $ospfNbrState{1} = "down"; $ospfNbrState{2} = "attempt"; $ospfNbrState{3} = "init"; $ospfNbrState{4} = "twoWay"; $ospfNbrState{5} = "exchangeStart"; $ospfNbrState{6} = "exchange"; $ospfNbrState{7} = "loading"; $ospfNbrState{8} = "full"; # snmp related variables my $session; my $response; my $snmpkey; my $snmpoid; my $key; our %snmpIndexes; my $snmpSysUpTime = "."; my $snmpHostUptime; # file related variables my $fileRuntime; my $fileHostUptime; my %fileIndexes; ## main program # Just in case of problems, let's not hang NetSaint $SIG{'ALRM'} = sub { print ("ERROR: No snmp response from $hostname (alarm)\n"); exit $ERRORS{"UNKNOWN"}; }; alarm($TIMEOUT); # we must have -some- arguments if (scalar(@ARGV) == 0) { usage(); } # end if no options Getopt::Long::Configure("no_ignore_case"); $status = GetOptions( "h|help" => \$needhelp, "C|snmpcommunity=s" => \$community, "i|ignore-string=s" => \@ignorestring, "p|port=i" => \$port, "f|filepath=s" => \$counterFilePath, "H|hostname=s" => \$hostname, "U|username=s" => \$snmpv3_username, "P|password=s" => \$snmpv3_password, "a|authprotocol=s" => \$snmpv3_authprotocol, "e|privprotocol=s" => \$snmpv3_privprotocol, "v|snmpversion=s" => \$snmpversion, "x|privpassword=s" => \$snmpv3_privpassword, ); if ($status == 0 || $needhelp) { usage(); } # end if getting options fails or the user wants help if (!defined($counterFilePath)) { $state = "UNKNOWN"; $answer = "Filepath must be specified"; print "$state: $answer\n"; exit $ERRORS{$state}; } # end check for filepath if (!defined($hostname)) { $state = "UNKNOWN"; $answer = "Hostname must be specified"; print "$state: $answer\n"; exit $ERRORS{$state}; } # end check for hostname # setup counterFile now that we have hostname and oid $counterFile = "$counterFilePath/$hostname.check_ospf_counters.nagioscache"; readolddata(); getSysUpTime(); # ospfIfEvents foreach $key (keys %snmpOID) { getCounters($key); } checkIgnores(); outputdata(); # check to see if we pulled data from the cache file or not if (!defined($fileRuntime)) { $state = "OK"; $answer = "never cached - caching\n"; print "$state: $answer\n"; exit $ERRORS{$state}; } # end if cache file didn't exist # check host's uptime to see if it goes backward if ($fileHostUptime > $snmpHostUptime) { $state = "WARNING"; $answer = "uptime goes backward - recaching data\n"; print "$state: $answer\n"; exit $ERRORS{$state}; } # end if host uptime goes backward # check if number of indexes in file is different than our new data if (scalar(keys(%fileIndexes)) != scalar(keys(%snmpIndexes))) { $state = "WARNING"; $answer = "number of indexes changed - recaching data\n"; print "$state: $answer\n"; exit $ERRORS{$state}; } # end number of indexes different # check if there are no neighbors at all if (scalar(keys(%snmpIndexes)) eq 0) { $state = "WARNING"; $answer = "No neighbors\n"; print "$state: $answer\n"; exit $ERRORS{$state}; } # end number of indexes different # foreach snmp key (sorted numerically), figure stuff out foreach $key (sort numerically (keys %snmpIndexes)) { my $timeperiod = ($runtime-$fileRuntime); if ($snmpIndexes{$key} ne $fileIndexes{$key}) { (my @oid_array) = split (/\./, $key); my $int_ip = join(".", @oid_array[$#oid_array-4 .. $#oid_array-1]); my $oid = join(".", @oid_array[0 .. $#oid_array-5]); my $int_hostname = gethostbyaddr(inet_aton($int_ip), AF_INET); if ($fileIndexes{$key} eq "") { $fileIndexes{$key} = "null"; } $oidcrit++; if ($int_hostname) { $oidmsg .= "$snmpOID{$oid} $int_hostname [$int_ip] has changed from '$fileIndexes{$key}' to '$snmpIndexes{$key}'\n"; } else { $oidmsg .= "$snmpOID{$oid} $int_ip has changed from '$fileIndexes{$key}' to '$snmpIndexes{$key}'\n"; } } } # end foreach $key # figure out what state we're in if ($oidcrit > 0) { $state = "CRITICAL"; } elsif ($oidwarn > 0) { $state = "WARNING"; } else { $state = "OK"; } # end if we have warnings or not # setup final message $answer = "critical $oidcrit, warning $oidwarn\n$oidmsg"; print ("$state: $answer"); # foreach snmp key (sorted numerically), figure stuff out foreach $key (sort numerically (keys %snmpIndexes)) { if ($key =~ /^1\.3\.6\.1\.2\.1\.14\.10\.1\.6\./) { (my @oid_array) = split (/\./, $key); my $int_ip = join(".", @oid_array[$#oid_array-4 .. $#oid_array-1]); my $oid = join(".", @oid_array[0 .. $#oid_array-5]); my $int_hostname = gethostbyaddr(inet_aton($int_ip), AF_INET); if ($int_hostname) { print "OSPF state with $int_hostname [$int_ip]: $snmpIndexes{$key}\n"; } else { print "OSPF state with $int_ip: $snmpIndexes{$key}\n"; } } } # end foreach $key exit $ERRORS{$state}; ## subroutines ## # the usage of this program (duh) sub usage { print <<END; == check_ospf_counters v$version == Perl SNMP Check Counter plugin for Nagios Frank Bulk <frnkblk\@iname.com> checks a provided counter and verifies that it was within Usage: check_ospf_counters (-C|--snmpcommunity) <read_community> (-H|--hostname) <hostname> [-i|--ignore-string] <ignore_string> [-p|--port] <port> (-f|--filepath) <file> END exit $ERRORS{"UNKNOWN"}; } # for sorting things numerically sub numerically { (pack'C*',split/\./,$a) cmp (pack'C*',split/\./,$b); # $a <=> $b } # end numerically # read in the old data (if it exists) sub readolddata { if (-e $counterFile) { open(FILE, "$counterFile"); chomp($fileRuntime = <FILE>); chomp($fileHostUptime = <FILE>); while (my $line = <FILE>) { chomp($line); my @splitline = split(/ /, $line); $fileIndexes{$splitline[0]} = $splitline[1]; } # end while rest of file close(FILE); } # end if file exists } # end readolddata # output data for cache sub outputdata { if ((-w $counterFile) || (-w dirname($counterFile))) { open(FILE, ">$counterFile"); print FILE "$runtime\n"; print FILE "$snmpHostUptime\n"; foreach $key (sort numerically (keys %snmpIndexes)) { print FILE "$key $snmpIndexes{$key}\n"; } # end for each value to output close(FILE); } else { $state = "WARNING"; $answer = "file $counterFile is not writable\n"; print ("$state: $answer\n"); exit $ERRORS{$state}; } # end if file is writable } # end outputdata # get sysUpTime from host sub getSysUpTime { # get the uptime for the host given if($snmpversion eq "1" || $snmpversion eq "2c") { ($session, $error) = Net::SNMP->session( -hostname => $hostname, -community => $community, -port => $port ); } else { ($session, $error) = Net::SNMP->session( -hostname => $hostname, -port => $port, -username => $snmpv3_username, -authpassword => $snmpv3_password, -authprotocol => $snmpv3_authprotocol, -privpassword => $snmpv3_privpassword, -privprotocol => $snmpv3_privprotocol, -version => $snmpversion, -timeout => 30, ); } if (!defined($session)) { $state = "UNKNOWN"; $answer = $error; print "$state: $answer"; exit $ERRORS{$state}; } $session->translate( [-timeticks => 0x0] ); $response = $session->get_request( -varbindlist => [$snmpSysUpTime] ); if (!defined($response)) { $answer=$session->error; $session->close; $state = "WARNING"; print "$state: $answer,$community,$snmpSysUpTime"; exit $ERRORS{$state}; } $snmpHostUptime = $response->{$snmpSysUpTime}; $session->close; } # end getSysUpTime # get counters the user wants from host sub getCounters { my $temp_snmpCounter = shift; # get the value(s) for the oid given if($snmpversion eq "1" || $snmpversion eq "2c") { ($session, $error) = Net::SNMP->session( -hostname => $hostname, -community => $community, -port => $port ); } else { ($session, $error) = Net::SNMP->session( -hostname => $hostname, -port => $port, -username => $snmpv3_username, -authpassword => $snmpv3_password, -authprotocol => $snmpv3_authprotocol, -privpassword => $snmpv3_privpassword, -privprotocol => $snmpv3_privprotocol, -version => $snmpversion, -timeout => 30, ); } if (!defined($session)) { $state = "UNKNOWN"; $answer = $error; print "$state: $answer"; exit $ERRORS{$state}; } if ( !defined($response = $session->get_table($temp_snmpCounter)) && !defined($response = $session->get_request($temp_snmpCounter)) ) { $answer = $session->error; $session->close; $state = "WARNING"; print "$state: $answer,$community,$temp_snmpCounter\n"; exit $ERRORS{$state}; } foreach $snmpkey (keys %{$response}) { $key = $snmpkey; (my @oid_array) = split (/\./, $key); my $oid = join(".", @oid_array[0 .. $#oid_array-5]); if ($oid eq "") { $response->{$snmpkey} = $ospfNbrState{$response->{$snmpkey}}; } # if the value this is isn't in @ignorestring, okay $snmpIndexes{$key} = $response->{$snmpkey}; } $session->close; } # end getCounters # check to see if we're supposed to be ignoring a key. if so, kill it sub checkIgnores { foreach my $key (keys(%snmpIndexes)) { if (arrayInScalar($key, @ignorestring)) { delete($snmpIndexes{$key}); } # end if ignore, nuke key } # end foreach key } # end checkIgnores # check to see if variables in @array match $scalar sub arrayInScalar { my $temp_key = shift; my @temp_ignorestring = @_; foreach (@temp_ignorestring) { if ($temp_key =~ /$_\.\d+$/) { return 1; #true } } return 0; #false } # end arrayInScalar