#!/usr/bin/perl # # AUTHORS: # Copyright (C) 2008 Altinity Limited # Written by Neil Ferguson, kindly sponsored by GotVMail # # This file is part of Opsview # # Opsview is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # Opsview is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Opsview; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # use strict; use lib qw ( /usr/local/nagios/perl/lib ); use Storable qw(lock_store lock_retrieve); use Net::SNMP; use Getopt::Std; # About us my $script = "check_snmp_vmware_cpu"; my $script_version = "0.1"; my $hostname = ''; # Our store my $store; my $store_dir = "/usr/local/nagios/var/"; my $store_basefn = ""; my $storename; # SNMP variables my $oid_sysDescr = ".1.3.6.1.2.1.1.1.0"; # Used to check whether SNMP is actually responding my $oid_namebase = ".1.3.6.1.4.1.6876.2.1.1.2."; my $oid_vmidbase = ".1.3.6.1.4.1.6876.2.1.1.7."; my $oid_vmbase = ".1.3.6.1.4.1.6876."; my $oid_vm_cpu = "3.1.2.1.3."; my $community = "public"; # Default community my $timeout = 10; # SNMP timeout my $retval = 3; # Innocent until proven guilty my $retmsg = ""; # Text to return from plugin my $version = "2c"; my $warning = 0; my $critical = 0; my $result_text = 0; my $cpu_util = 0; my $vm_name = ""; my $percent_used = 0; our ( $s, $e ); # Command line arguments our ( $opt_h, $opt_H, $opt_C, $opt_t, $opt_w, $opt_c, $opt_v ); getopts("hH:C:t:w:c:av:"); if ($opt_h) { usage(); exit 0; } if ($opt_H) { $hostname = $opt_H; } else { print "No hostname specified\n"; usage(); exit 3; } if ($opt_C) { $community = $opt_C; } if ($opt_t) { # Validity test - must be numeric unless ( $opt_t =~ /^\d+$/ ) { print "Specify time in seconds - $opt_t is not a valid integer\n"; exit 3; } $timeout = $opt_t; } # Thresholds if ($opt_w) { if ( $opt_w =~ /^\d+$/ ) { $warning = $opt_w; } else { print "Warning value must be an integer\n"; exit 3; } } if ($opt_c) { if ( $opt_c =~ /^\d+$/ ) { $critical = $opt_c; } else { print "Critical value must an integer\n"; exit 3; } } if ($opt_v) { $vm_name = $opt_v; } # Capture stderr open STDERR, ">>/tmp/cpu-stderr.log"; # Set our base store name as we now have our hostname $store_basefn = "$store_dir/$script-$hostname"; sub usage { print < -C [...] Options: -H Hostname or IP address -C SNMP community string -t SNMP timeout (in seconds) -w Warning threshold for CPU usage (%) -c Critical threshold for CPU usage (%) -v Report CPU usage of this VM only This plugin returns the CPU usage of one VM, or all VMs on the specified ESX host. -------------------------------------------------------------------- Copyright 2008 Altinity Limited Plugin development was sponsored by GotVMail (http://www.gotvmail.com) This program is free software; you can redistribute it or modify it under the terms of the GNU General Public License ------------------------------------------------------------------ EOF } sub two_dp { my $val = shift(@_); return int( $val * 100 ) / 100; } # Call this when you know you'll get a single value back sub get_oid_value { our ( $oid, $result, $status, $returnstring ); $oid = shift(@_); if ( !defined( $s->get_request($oid) ) ) { if ( !defined( $s->get_request($oid_sysDescr) ) ) { print "SNMP agent not responding\n"; exit 3; } else { #print "SNMP OID does not exist\n"; return ""; } } foreach ( $s->var_bind_names() ) { $result = $s->var_bind_list()->{$_}; } if ( $result eq "noSuchObject" || $result eq "noSuchInstance" ) { return ""; } return $result; } sub get_vm_id { my $name = shift; my $i = 0; my $result; # Avoid accidental infinite loops by limiting # ourselves to 100 VMs. Real solution is to # get the whole table. while ( $i < 100 ) { $result = get_oid_value( $oid_namebase . $i ); if ( $result eq "" || $result eq "noSuchInstance" ) { last; } elsif ( $result =~ /^$name$/i ) { return get_oid_value( $oid_vmidbase . $i ); } $i++; } return undef; } sub get_vm_stats { my $vm = shift(@_); my $cpu = shift(@_); our ( $prev_cpu, $prev_time ); my $store_fn; if ($opt_v) { $store_fn = $store_basefn . "-$vm.dat"; } else { $store_fn = $store_basefn . "-$vm-all.dat"; } # Previous value? if ( -f $store_fn ) { $store = lock_retrieve($store_fn); $prev_time = $store->{$vm}[0]; $prev_cpu = $store->{$vm}[1]; } else { # Force usage to 0% if we have no previous data $prev_time = 0; $prev_cpu = $cpu; } print STDERR "Returning time, cpu as $prev_time, $prev_cpu\n"; return ( $prev_time, $prev_cpu ); } sub save_vm_stats { my $vm = shift(@_); my $cpu = shift(@_); my $store_fn; if ($opt_v) { $store_fn = $store_basefn . "-$vm.dat"; } else { $store_fn = $store_basefn . "-$vm-all.dat"; } # Update the relevant storable $store->{$vm} = [ time(), $cpu ]; lock_store( $store, $store_fn ); } sub get_vm_cpu_util { my $i = 0; my $result = 0; my $vmid = 0; my $vm_name_snmp = 0; my $prev_time; my $prev_cpu; my $cur_cpu; my $cpu_usage; my $idmap; $idmap = $s->get_table($oid_vmidbase); return undef if !defined($idmap); %$idmap = reverse %$idmap; # Reporting on a single VM if ($vm_name) { $vmid = get_vm_id($vm_name); return undef if !defined($vmid); $cur_cpu = get_oid_value( $oid_vmbase . $oid_vm_cpu . $vmid ); ( $prev_time, $prev_cpu ) = get_vm_stats( $vm_name, $cur_cpu ); # Work out the usage in that time (seconds/sec) $cpu_usage = ( $cur_cpu - $prev_cpu ) / ( time() - $prev_time ) * 100; # Store the details save_vm_stats( $vm_name_snmp, $cur_cpu ); return $cpu_usage; } else { # Total across all VMs our ( $key, $response ); $response = $s->get_table( $oid_vmbase . $oid_vm_cpu ); return undef if !defined($response); foreach $key ( keys %{$response} ) { my $snmp_id = $key; $cur_cpu = $response->{$key}; $snmp_id =~ s/.*\.//; $vmid = $idmap->{$snmp_id}; $vmid =~ s/.*\.//; # Get the name $vm_name_snmp = get_oid_value( $oid_namebase . $vmid ); # Previous info ( $prev_time, $prev_cpu ) = get_vm_stats( $vm_name_snmp, $cur_cpu ); # Work out the usage in that time (seconds/sec) $cpu_usage = ( $cur_cpu - $prev_cpu ) / ( time() - $prev_time ) * 100; # Store the details save_vm_stats( $vm_name_snmp, $cur_cpu ); # Finally, keep a total $result = $result + $cpu_usage; } } return $result; } # Create the SNMP session $version = "2c"; ( $s, $e ) = Net::SNMP->session( -community => $community, -hostname => $hostname, -version => $version, -timeout => $timeout, ); if ( !defined( $s->get_request($oid_sysDescr) ) ) { # If we can't connect using SNMPv1 lets try as SNMPv2 $s->close(); sleep 0.5; $version = "1"; ( $s, $e ) = Net::SNMP->session( -community => $community, -hostname => $hostname, -version => $version, -timeout => $timeout, ); if ( !defined( $s->get_request($oid_sysDescr) ) ) { print "Agent not responding, tried SNMP v1 and v2\n"; exit 3; } } # Check for an SNMP error first... if ( $s->error ) { print "UNKNOWN - " . $s->error . "|\n"; exit 3; } # Get all the CPU info my $cpu_util = get_vm_cpu_util(); if ( !defined($cpu_util) ) { if ( defined($vm_name) ) { print "Could not find VM $vm_name\n"; } else { print "Error retrieving memory data from SNMP\n"; } exit 3; } # Sensible display if ( $cpu_util < 0 ) { $cpu_util = 0; } $cpu_util = two_dp($cpu_util); $result_text = "$cpu_util% " . "|cpu_util=$cpu_util%;$warning;$critical;;"; # The results are in... if ( $cpu_util > $critical && $critical > 0 ) { print "CRITICAL - $result_text\n"; exit 2; } elsif ( $cpu_util > $warning && $warning > 0 ) { print "WARNING - $result_text\n"; exit 1; } close STDERR; print "OK - $result_text\n"; exit 0