#!/bin/bash # # $Id: check_ovm3 688 2013-02-01 21:36:09Z tbr $ # # (c) Thorsten Bruhns (tbruhns@gmx.de) # License: GNU General Public Licens # # This Nagios plugin was created to check Oracle status # # Requirements: # - OracleVM 3.1.1 or newer # This plugin only works with OVM3.1.1 or newer. Older versions doesn't have the # new SSHCLI # Some Details are missing when running under Version 3.1.1. # # - expect # this plugin needs expect on the machine where the plugin is started # http://expect.sourceforge.net/ # The executable for expect must be at /usr/bin/expect # - expect for OracleLunux5 # wget http://mirror.centos.org/centos/5/os/x86_64/CentOS/expect-5.43.0-8.el5.x86_64.rpm # yum install -y --nogpgcheck expect-5.43.0-8.el5.x86_64.rpm # # - SSH-Connection to OVM-Manager on Port 10000 # The informations are gathered with the new OVMCLI. The connection is done # over Port 10000 (Default Port from OVM-Manager for SSHCLI) # # - Password for admin is required # The connection is done with password to the SSHCLI. There is a stupid restriction # in OVM-Manager that makes public key authentication in monitoring impossible. # The password is visible when executing the script on the process list! # # - ssh-hostkeys are not stored # this is needed because some NRPE-Servers are unable to write the known_hosts # => Connections are imposibble. :-( # # What could be checked? # Currently there are 3 Checks implemented: # # - filesystem # Checks used space in the filesystem and returns a Warning or Critical # Example: # check_ovm3 -p secret -a filesystem -n fs_Samsung_750GB -w 51 -c 75 -H localhost # OK - Filesystem Used Space 40% in fs_Samsung_750GB warn(51) crit(75) |pctused=40; Total=698; Used=281 # # - server # Checks the state of the server. Result is CRITICAL when server is not in 'Running'-Mode # WARNING when Server is in Maintenance-Mode # Example: # check_ovm3 -p secret -a server -n ovm3server -H localhost # OK - ovm3server is Running ovs-release 3.2.1-494 Processors 3 OVM-Server-IP 192.168.100.200 Maintenance Mode=Off Memory=16.0 Memory Dom0=0.82 CPU-Type=AMD Phenom(tm) II X3 705e Processor CPU-Speed=2.494557 # # - vm # Checks the state of the virtual machine. Result is CRITICAL when VM is not in 'Running'-Mode # There is some statistical data for the virtual machine like RAM and CPUs. # The data in '()' are the allowd maximum values while running the vm. # Example: # check_ovm3 -p secret -a vm -n rac1 -H localhost # OK - rac1 is Running on ovm3server Repository=Samsung_750GB HA-Mode=No CPUs 3(3) RAM 1500(3000) OS=Oracle Linux 6 Dom-Type=Xen PVM # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # History: # Date # 27.01.2013 Initial Version # 01.02.2013 Added parameter -u for username PROGNAME=`basename $0` PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'` REVISION='$Revision: 688 $ $Date: 2013-02-01 22:36:09 +0100 (Fr, 01 Feb 2013) $' function print_usage() { echo "Usage:" echo " $PROGNAME -p -H -c -n -w -c " echo " $PROGNAME -p -H -c filesystem -n -w -c " echo " $PROGNAME -p -H -c server -n " echo " $PROGNAME -p -H -c vm -n " echo " $PROGNAME -h" } function print_help() { echo $PROGNAME $REVISION echo "" print_usage echo "" echo "Check Resouce of an Oracle VM3 Environment" echo "" echo "-H/--ovmhost Hostname of OVM3-Manager" echo " defaults to localhost when not passed" echo "-u/--username Username for login into OVM-Manager (Default: admin)" echo "-p/--password Password of admin Account on OVM3-Manager" echo "-a/--action what should be checked? allowed values:" echo " filesystem : checks for filesystemusage for given name" echo " server : checks the state of given server" echo " vm : checks the state of given vm" echo "-n/--name Name for given Action" echo "-c/--critical Critical Value for Check" echo " only valid for action filesystem" echo "-w/--warning Warning Value for Check" echo " only valid for action filesystem" echo "-d/--debug Debug mode. Activates 'set -x' while executing the script" echo "-t/--timeout Timeout in expect for SSHCLI. Default 15 seconds" echo "" echo "" echo " action=filesystem" echo " warning and critical are used space in %" } function eval_state() { currValue=${1} if [ ! -n ${currValue:-""} ] then # we got no value to check return ${NagiosCrit} fi if [ ${CritValue:-100} -lt ${currValue} ] then # we have a critical State! return ${NagiosCrit} fi if [ ${WarnValue:-100} -lt ${currValue} ] then # we have a warning State! return ${NagiosWarn} fi return ${NagiosOK} } function print_state() { currstate=${1} if [ ${currstate} = ${NagiosOK} ] then echo ${NagiosOKText} fi if [ ${currstate} = ${NagiosWarn} ] then echo ${NagiosWarnText} fi if [ ${currstate} = ${NagiosCrit} ] then echo ${NAGIOSCritText} fi } function set_env() { # Default Port for OVM3 is 10000 OVMSSHPort=10000 EXPECTCMD=/usr/bin/expect NagiosOK=0 NagiosWarn=1 NagiosCrit=2 NagiosUnknown=3 NagiosOKText="OK" NagiosWarnText="WARNING" NAGIOSCritText="CRITICAL" } function set_param() { Debug=N # we use admin as default login user. This could be changed with -u OVMUser=admin # we set Timeout for expect to 15 Seconds - could be overwritten with parameter -t EXPECTTimeout=15 SHORTOPTS="hdH:p:a:n:c:w:u:t:" LONGOPTS="help,ovmhost:,password:,action:,debug,timeout:,critical:,warning:,parameter,username:" ARGS=$(getopt -s bash --options $SHORTOPTS --longoptions $LONGOPTS --name $PROGNAME -- "$@" ) if [ ${?} -ne 0 ] then exit fi eval set -- "$ARGS" while true; do case "$1" in -h|--help) print_help exit 0;; -d|--debug) set -x shift;; -H|--ovmhost) if [ -n "$2" ]; then OVMHostname=${2} fi shift 2;; -u|--username) if [ -n "$2" ]; then OVMUser=${2} fi shift 2;; -t|--timeout) if [ -n "$2" ]; then EXPECTTimeout=${2} fi shift 2;; -c|--critical) if [ -n "$2" ]; then CritValue=${2} fi shift 2;; -w|--warning) if [ -n "$2" ]; then WarnValue=${2} fi shift 2;; -p|--password) if [ -n "$2" ]; then OVMPPassword=${2} fi shift 2;; -a|--action) if [ -n "$2" ]; then OVMAction=${2} fi shift 2;; -n|--parameter) if [ -n "$2" ]; then OVMparameter=${2} fi shift 2;; --) shift break;; esac done } function check_parameter() { # is expect availible? if [ ! -x ${EXPECTCMD} ] then echo $(print_state ${NagiosCrit})" - Cannot find expect at ${EXPECTCMD}" exit ${NagiosCrit} fi # set Hostname for manager to localhost when no hostname was given if [ ! -n ${OVMHostname:-""} ] then # if hostname is not given we go to localhost! OVMHostname=localhost fi # we need to check for an action if [ ! -n "${OVMAction:-""}" ] then # we can't work without an action :-( echo $(print_state ${NagiosCrit})" - Missing action parameter." exit ${NagiosCrit} fi } function read_dom () { # split the xml from ovm in entity and content! local IFS=\> read -d \< ENTITY CONTENT } function get_value_XML1() { local __XMLString="${1}" local __SearchEntity="${2}" # Search for a value from XML # 1st occurence will be returned entity_found=N echo ${__XMLString} | while read_dom; do if [ ${entity_found} = 'N' -a "${ENTITY}" = 'PropertyName' -a "${CONTENT}" = "${__SearchEntity}" ] then # we found the entity! # we need to search for the value entity_found=Y fi if [ ${entity_found} = 'Y' -a "${ENTITY}" = 'PropertyValue' ] then # we found the property, now we need the value! # we leave this loop after getting the value! echo ${CONTENT} break fi done } function do_ssh() { OVMCommand=${1} SSHDOptions="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" # we start ssh with no hostkeys due to problems when running the script # under NRPE. Sometimes NRPE is unable to open a ssh connection due to missing # ability to write in ~/.ssh # ignore the security risk due to 'man in the middle' attack XMLResult=$(${EXPECTCMD} -c " set timeout ${EXPECTTimeout} spawn ssh ${SSHDOptions} -p ${OVMSSHPort} ${OVMUser}@${OVMHostname} # eof to return 1 for invalid logins or unknown host etc. expect_after eof {exit 1} expect { \"$SSH_HOST*assword\" { send {$OVMPPassword} send \"\n\" } } # we got a login. normal completion results in return 0 expect_after eof {exit 0} expect { \"OVM>\" { send \"set endlineChars=CRLF\r\" } timeout { exit 1} } expect { \"Status: Success\" { send \"set outputMode=XML\r\" } timeout { exit 2} } expect { \"Success\" { send \"${OVMCommand}\r\" } timeout { exit 3} } expect { \"Success\" { send \"exit\r\" } timeout { exit 4} } expect eof" ) retcode=${?} # check some returncodes from expect if [ ${retcode} -eq 0 ] then # all is fine! return 0 elif [ ${retcode} -eq 1 ] then cat $XMLResult >> /tmp/nagios_checkfs.log 2>&1 echo ${NAGIOSCritText}" - Login not possible! Timeout "${EXPECTTimeout}" Try ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p ${OVMSSHPort} ${OVMUser}@${OVMHostname}" exit ${NagiosCrit} elif [ ${retcode} -eq 4 ] then echo ${NAGIOSCritText}" - Cannot get result for command: "${OVMCommand} exit ${NagiosCrit} else # we have a problem in getting the XMLResult from OVM-Manager # maybe the credentials are wrong? echo ${NAGIOSCritText}" - Unable to get result from OVM-Manager Return Copde "${retcode} exit ${NagiosCrit} fi } function do_action() { did_action=N # we have a working set of parameters # which action was given? if [ ${OVMAction} = 'filesystem' ] then # are warn and crit values ok? if [ ${CritValue:-2} -lt ${WarnValue:-1} ] then echo $(print_state ${NagiosCrit})" - critical ("${CritValue:-2}") must be > warning ("${WarnValue:-1}") value!" exit ${NagiosCrit} fi # get Data from OVM-Manager with SSHCLI CLIAction="show filesystem name="${OVMparameter} do_ssh "${CLIAction}" # Get values from XML resultTotal=$(get_value_XML1 "${XMLResult}" "Total Size (GiB)") resultUsed=$(get_value_XML1 "${XMLResult}" "Used Size (GiB)") resultPct=$(get_value_XML1 "${XMLResult}" "Used %") # Cut values at '." resultTotalRnd=$(echo $resultTotal | cut -d"." -f1) resultUsedRnd=$(echo $resultUsed | cut -d"." -f1) resultPctRnd=$(echo $resultPct | cut -d"." -f1) # get state for the value! eval_state ${resultPctRnd} NagiosState=${?} echo $(print_state ${NagiosState})" - Filesystem Used Space "${resultPctRnd:-"Error "}"% in " \ ${OVMparameter}" warn("${WarnValue:-1}") crit("${CritValue:-2}") |pctused="${resultPctRnd}"; Total="${resultTotalRnd}"; Used="${resultUsedRnd} elif [ ${OVMAction} = 'server' ] then # we chack the state of the OVM-Server # there is currently no warn or critical values for this check # get Data from OVM-Manager with SSHCLI CLIAction="show server name="${OVMparameter} do_ssh "${CLIAction}" # Get values from XML resultState=$(get_value_XML1 "${XMLResult}" "Status") resultVersion=$(get_value_XML1 "${XMLResult}" "Version") resultProcessors=$(get_value_XML1 "${XMLResult}" "Processors") resultIP=$(get_value_XML1 "${XMLResult}" "IP Address") resultMaintMode=$(get_value_XML1 "${XMLResult}" "Maintenance Mode") resultMemory=$(get_value_XML1 "${XMLResult}" "Memory (GiB)") resultMemoryDom0=$(get_value_XML1 "${XMLResult}" "Dom0 Memory (GiB)") resultCPUType=$(get_value_XML1 "${XMLResult}" "Processor Model") resultCPUSpeed=$(get_value_XML1 "${XMLResult}" "Processor Speed (GHz)") NagiosOptStr="ovs-release "${resultVersion:-"unknown"}" Processors "${resultProcessors}" OVM-Server-IP "${resultIP}\ " Maintenance Mode="${resultMaintMode}" Memory="${resultMemory}" Memory Dom0="${resultMemoryDom0}\ " CPU-Type="${resultCPUType}" CPU-Speed="${resultCPUSpeed} if [ ${resultState} = 'Running' ] then NagiosState=${NagiosOK} # check for Maintenance Mode # => Server in Maintenance mode is going to warning state! if [ "${resultMaintMode}" = 'On' ] then # Server is in Maintenance Mode! # Warning because we cannot run any VM on this server! NagiosState=${NagiosWarn} fi else NagiosState=${NagiosCrit} fi # creating nagios output echo $(print_state ${NagiosState})" - "${OVMparameter}" is "${resultState} \ ${NagiosOptStr} elif [ ${OVMAction} = 'vm' ] then # we chack the state of the VM # there is currently no warn or critical values for this check # get Data from OVM-Manager with SSHCLI CLIAction="show vm name="${OVMparameter} do_ssh "${CLIAction}" # Get values from XML Processors=$(get_value_XML1 "${XMLResult}" "Processors") resultCPUCntMax=$(get_value_XML1 "${XMLResult}" "Max. Processors") resultMemory=$(get_value_XML1 "${XMLResult}" "Memory (MB)") resultMemoryMax=$(get_value_XML1 "${XMLResult}" "Max. Memory (MB)") resultHAMode=$(get_value_XML1 "${XMLResult}" "High Availability") resultOS=$(get_value_XML1 "${XMLResult}" "Operating System") resultDomainType=$(get_value_XML1 "${XMLResult}" "Domain Type") resultRepository=$(get_value_XML1 "${XMLResult}" "Repository") resultState=$(get_value_XML1 "${XMLResult}" "Status") # cut name from output OVMRepository=$(echo ${resultRepository} | cut -d"[" -f2 | cut -d"]" -f1) NagiosOptStr="Repository="${OVMRepository}" HA-Mode="${resultHAMode}" CPUs "${Processors}"("${resultCPUCntMax}") RAM "${resultMemory}"("${resultMemoryMax}") OS="${resultOS}" Dom-Type="${resultDomainType} if [ ${resultState} = 'Running' ] then NagiosState=${NagiosOK} # Get Servername when VM is currently running! resultServer=$(get_value_XML1 "${XMLResult}" "Server") # cut name from output ServerName=$(echo ${resultServer} | cut -d"[" -f2 | cut -d"]" -f1) # creating nagios output echo $(print_state ${NagiosState})" - "${OVMparameter}" is "${resultState}" on "${ServerName} \ ${NagiosOptStr} else NagiosState=${NagiosCrit} echo $(print_state ${NagiosCrit})" - "${OVMparameter}" is "${resultState} \ ${NagiosOptStr} fi fi # we leave the script with last state exit ${NagiosState} } ############################################################################### ############################################################################### # # # MAIN # # # ############################################################################### ############################################################################### set_param "$@" set_env check_parameter do_action