#!/bin/pwsh -noni <# .SYNOPSIS Name: check_azureservicehealth.psl Script to check defined Azure Tenant Service Health .DESCRIPTION Nagios plugin to test given Azure tenant services, and report health .PARAMETER Help Provides help for the command .PARAMETER Domain : the domain of the tenant to be checked. Yeah, you shouldn't need both this and the GUID, but the API is picky .PARAMETER TenantID : the GUID of the tenant to be checked (required) .PARAMETER ClientID : The GUID of the registered application. Must have o365 Management AP ServiceHealth.Read permissions. (required) .PARAMETER ClientSecret : A valid client secret (required) .PARAMETER Service : Defaults to "all" .PARAMETER CLI If provided, will return 'human readable' status for the service. Else will return Nagios formatted result .NOTES There are (at time of this writing) 9 different possible service statuses. In Nagios mode, the response will be " Status=" , with no performance data (because there really isn't anything to graph here. OK status is "ServiceRestored", "PostIncidentReportPublished", and "ServiceOperational". All else is CRITICAL Possible services (12/2/2019) Id DisplayName Features -- ----------- -------- Exchange Exchange Online {@{DisplayName=Networking Issues; Name=Networking Issues}, @{DisplayName=Sign-in; Name=Sign... Forms Microsoft Forms {@{DisplayName=Service; Name=service}, @{DisplayName=Form functionality; Name=functionality... Intune Microsoft Intune {@{DisplayName=Microsoft Intune; Name=Intune}} kaizalamessagingservices Microsoft Kaizala {@{DisplayName=Kaizala Messaging; Name=kaizalamessaging}, @{DisplayName=Kaizala Management ... Lync Skype for Business {@{DisplayName=Audio and Video; Name=AudioVideo}, @{DisplayName=Federation; Name=Federation... MDATP Microsoft Defender ATP {@{DisplayName=Components/Features; Name=mdatpcomponents}} MicrosoftFlow Microsoft Flow {@{DisplayName=Service and web access issues; Name=Service}} MicrosoftFlowM365 Flow in Microsoft 365 {@{DisplayName=Service and web access issues; Name=Service}} microsoftteams Microsoft Teams {@{DisplayName=Teams Components; Name=TeamsComponents}} MobileDeviceManagement Mobile Device Management for Office 365 {@{DisplayName=Mobile Device Management; Name=MobileDeviceManagement}} O365Client Office Client Applications {@{DisplayName=Office Client issues; Name=OfficeClient}} officeonline Office for the web {@{DisplayName=Excel Online; Name=excelonline}, @{DisplayName=OneNote Online; Name=onenoteo... OneDriveForBusiness OneDrive for Business {@{DisplayName=OneDrive for Business; Name=OneDrive for Business}} OrgLiveID Identity Service {@{DisplayName=Sign-In; Name=authentication}, @{DisplayName=Administration; Name=administra... OSDPPlatform Office 365 Portal {@{DisplayName=Portal; Name=Portal}, @{DisplayName=Administration; Name=Administration}, @{... OSub Office Subscription {@{DisplayName=Licensing and Renewal; Name=LicensingRenewal}, @{DisplayName=Network Availab... Planner Planner {@{DisplayName=Planner; Name=Planner}} PowerAppsM365 PowerApps in Microsoft 365 {@{DisplayName=Service and web access issues; Name=Service}} PowerBIcom Power BI {@{DisplayName=PowerBI.com; Name=PowerBI}} RMS Azure Information Protection {@{DisplayName=Azure RMS Available; Name=RMS}} SharePoint SharePoint Online {@{DisplayName=Provisioning; Name=provisioning}, @{DisplayName=SharePoint Features; Name=sp... StaffHub Microsoft StaffHub {@{DisplayName=Service and web access issues; Name=Service}, @{DisplayName=Web client issue... yammer Yammer Enterprise {@{DisplayName=Yammer Components; Name=yammerfeatures}} @Author: Paul Davidson @Version: 0.3 @Changelog 0.1 12/14/2019 : Initial release. Basic ability to report Azure status 0.2 01/10/2020 : Added ability to report most recent incident text as Perfdata. This allows administrators to see highlevel issue status and react faster 0.3 01/30/2020 : Reformatted comments and parameters for readability and publication #> [CmdletBinding()] param( [Alias("T")] [Parameter(Mandatory=$True, HelpMessage="check_servicehealth.ps1 v0.2 `nParameters: `n-help`tThis help`n-H`ttenant domain`n-T`tTenant GUID`n-C`tClient ID`n-P`tClient Secret`n-S`tService`n-CLI`n")] [string]$TenantGUID, [Alias("C")] [Parameter(Mandatory=$True, HelpMessage="check_servicehealth.ps1 v0.2 `nParameters: `n-help`tThis help`n-H`ttenant domain`n-T`tTenant GUID`n-C`tClient ID`n-P`tClient Secret`n-S`tService`n-CLI`n")] [string]$ClientID, [Alias("P")] [Parameter(Mandatory=$True, HelpMessage="check_servicehealth.ps1 v0.2 `nParameters: `n-help`tThis help`n-H`ttenant domain`n-T`tTenant GUID`n-C`tClient ID`n-P`tClient Secret`n-S`tService`n-CLI`n")] [string]$ClientSecret, [Alias("H")] [Parameter(Mandatory=$True, HelpMessage="check_servicehealth.ps1 v0.2 `nParameters: `n-help`tThis help`n-H`ttenant domain`n-T`tTenant GUID`n-C`tClient ID`n-P`tClient Secret`n-S`tService`n-CLI`n")] [string]$TenantDomain, [Alias("S")] [Parameter(Mandatory=$false, HelpMessage="check_servicehealth.ps1 v0.2 `nParameters: `n-help`tThis help`n-H`ttenant domain`n-T`tTenant GUID`n-C`tClient ID`n-P`tClient Secret`n-S`tService`n-CLI`n")] [string]$Service, [switch]$help,[switch]$v,[switch]$vv,[switch]$CLI ) #v is for verbose, which dumps more. vv is for even more verbose #cli says 'don't output this as nagios, make it more or less human readable $ErrorActionPreference = "stop" if ($Service -eq "") { [string]$Service ='all'} $RedirectUri = "https://localhost" $LoginUrl = "https://login.microsoft.com" $Resource = "https://manage.office.com" #Retrieve OAuth token if ($vv) { write-host "CID: $ClientID `nDomain: $TenantDomain `nTID: $TenantGUID `nSecret: $ClientSecret `nService: $Service" } $RequestBody = @{grant_type="client_credentials";redirect_uri=$RedirectUri;resource=$Resource;client_id=$ClientID;client_secret=$ClientSecret} try { $OAuth = Invoke-RestMethod -Method Post -Uri $LoginUrl/$TenantDomain/oauth2/token?api-version=1.0 -Body $RequestBody } catch { if ($v -or $vv) { write-host "Error generating OAuth token- likely bad password"; } if ($CLI) { write-host "Error generating OAuth token. Check parameters and re-try."; } if (!$CLI) { write-host "SERVICE UNKNOWN: Invalid authorization to service. Attempt aborted."; } exit 3 } $HeaderParams = @{'Authorization'="$($OAuth.Token_Type) $($Oauth.Access_Token)"} if ($vv) { write-host "Successful auhorization- token-" $HeaderParams } #Retrieve current status try { if ($Service -eq "all") { # This will retrieve 'all' services that are in your tenant. Not highly useful for Nagios- but moreso if you call from a command line $O365Status = (Invoke-RestMethod -Method Get -Uri $Resource/api/v1.0/$TenantGUID/ServiceComms/CurrentStatus -Headers $HeaderParams).value } else { $O365Status = (Invoke-RestMethod -Method Get -Uri $Resource/api/v1.0/$TenantGUID/ServiceComms/CurrentStatus -Headers $HeaderParams).value | Where {$_.Workload -eq $Service} } } catch { #likely cause- the application is not registered properly- but we have no way to tell if ($v -or $vv) { write-host "Error getting service status. Check application permissions in your tenant"; } if (!$CLI) { write-host "SERVICE UNKNOWN: Unable to retrieve status. Check application permissions. Attempt aborted."; } exit 3 } #Ok, we now have status. Output it. if ($CLI) { if ($Service -eq "all") { foreach ($value in $O365Status) { write-host $value.Workload ": `t`t`t`t`t`t`t" $value.Status } } else { write-host $Service ": " $O365Status.Status } } else { #non-CLI indicates that we're called as a nagios plugin if ($Service -eq "all") { write-host "UNKNOWN: Invalid parameter 'all' specified (CLI flag required)" exit 3 } else { $nagstatus = "OK" switch ($O365Status.Status) { "Investigating" { $nagstatus = "WARNING"; break } "ServiceDegradation" {$nagstatus = "WARNING"; break } "ServiceInterruption" {$nagstatus = "CRITICAL"; break } "RestoringService" {$nagstatus = "WARNING"; break } "ExtendedRecovery" {$nagstatus = "WARNING"; break } "ServiceRestored" {$nagstatus = "OK"; break } "PostIncidentReportPublished" {$nagstatus = "OK"; break } "VerifyingService" {$nagstatus = "WARNING"; break } "ServiceOperational" {$nagstatus = "OK"; break } default {$nagstatus = "UNKNOWN"; break } } if ($nagstatus -eq "WARNING" -or $nagstatus -eq "CRITICAL") { $O365Incidents = (Invoke-RestMethod -Method GET -Uri $Resource/api/v1.0/$TenantGUID/ServiceComms/Messages -Headers $HeaderParams).Value | Where {$_.MessageType -eq "Incident" -and $_.Workload -eq $Service} $O365Incidents = $O365Incidents | Sort-Object -Descending -Property StartTime ForEach ($Incident in $O365Incidents) { $LatestMsgFull = $($Incident.Messages | Sort-Object -Descending -Property PublishedTime)[0] #[$Incident.Messages.Count -1] $LatestMsg = ($LatestMsgFull -split [System.Environment]::NewLine)[0] $colpos = $LatestMsg.IndexOf(":") $LatestMsg = $LatestMsg.Remove(0,$colpos + 2) $MsgDate = $((Get-Date($Incident.LastUpdatedTime)).ToLocalTime().ToString("yyyy/MM/dd HH:mm:ss")) $PerfData = $MsgDate + ":" + $LatestMsg if ($v -or $vv) { write-host $PerfData } break; } } switch ($nagstatus) { "OK" { write-host SERVICE $nagstatus : $Service is $O365Status.Status -NoNewline; exit 0} "WARNING" { write-host SERVICE $nagstatus : $Service is $O365Status.Status"|"$PerfData -NoNewline; exit 1} "CRITICAL" { write-host SERVICE $nagstatus : $Service is $O365Status.Status"|"$PerfData -NoNewline; exit 2} "UNKNOWN" { write-host SERVICE $nagstatus : $Service is $O365Status.Status -NoNewline; exit 3} } } }