check-health: split off plugins...

... from 'check-health', so the script works on all devices to monitor
CPU and RAM. The supported plugins for sensors in hardware are installed
automatically.
This commit is contained in:
Christian Hesse 2025-02-04 12:27:35 +01:00
parent 84ba3a463a
commit 23d38927bc
9 changed files with 239 additions and 100 deletions

48
check-health.d/state.rsc Normal file
View file

@ -0,0 +1,48 @@
#!rsc by RouterOS
# RouterOS script: check-health.d/state
# Copyright (c) 2019-2025 Christian Hesse <mail@eworm.de>
# https://rsc.eworm.de/COPYING.md
#
# requires RouterOS, version=7.14
#
# check for RouterOS health state - state plugin
# https://rsc.eworm.de/doc/check-health.md
:global CheckHealthPlugins;
:set ($CheckHealthPlugins->[ :jobname ]) do={
:local FuncName [ :tostr $0 ];
:global CheckHealthLast;
:global Identity;
:global LogPrint;
:global SendNotification2;
:global SymbolForNotification;
:if ([ :len [ /system/health/find where type="" name~"-state\$"] ] = 0) do={
$LogPrint debug $FuncName ("Your device does not provide any state health values.");
:return false;
}
:foreach State in=[ /system/health/find where type="" name~"-state\$" ] do={
:local Name [ /system/health/get $State name ];
:local Value [ /system/health/get $State value ];
:if ([ :typeof ($CheckHealthLast->$Name) ] != "nothing") do={
:if ($CheckHealthLast->$Name = "ok" && \
$Value != "ok") do={
$SendNotification2 ({ origin=$FuncName; \
subject=([ $SymbolForNotification "cross-mark" ] . "Health warning: " . $Name); \
message=("The device '" . $Name . "' on " . $Identity . " failed!") });
}
:if ($CheckHealthLast->$Name != "ok" && \
$Value = "ok") do={
$SendNotification2 ({ origin=$FuncName; \
subject=([ $SymbolForNotification "white-heavy-check-mark" ] . "Health recovery: " . $Name); \
message=("The device '" . $Name . "' on " . $Identity . " recovered!") });
}
}
:set ($CheckHealthLast->$Name) $Value;
}
}

View file

@ -0,0 +1,74 @@
#!rsc by RouterOS
# RouterOS script: check-health.d/temperature
# Copyright (c) 2019-2025 Christian Hesse <mail@eworm.de>
# https://rsc.eworm.de/COPYING.md
#
# requires RouterOS, version=7.14
#
# check for RouterOS health state - temperature plugin
# https://rsc.eworm.de/doc/check-health.md
:global CheckHealthPlugins;
:set ($CheckHealthPlugins->[ :jobname ]) do={
:local FuncName [ :tostr $0 ];
:global CheckHealthLast;
:global CheckHealthTemperature;
:global CheckHealthTemperatureDeviation;
:global CheckHealthTemperatureNotified;
:global Identity;
:global LogPrint;
:global SendNotification2;
:global SymbolForNotification;
:if ([ :len [ /system/health/find where type="C" ] ] = 0) do={
$LogPrint debug $FuncName ("Your device does not provide any voltage health values.");
:return false;
}
:local TempToNum do={
:global CharacterReplace;
:local T [ :toarray [ $CharacterReplace $1 "." "," ] ];
:return ($T->0 * 10 + $T->1);
}
:if ([ :typeof $CheckHealthTemperatureNotified ] != "array") do={
:set CheckHealthTemperatureNotified ({});
}
:foreach Temperature in=[ /system/health/find where type="C" ] do={
:local Name [ /system/health/get $Temperature name ];
:local Value [ /system/health/get $Temperature value ];
:if ([ :typeof ($CheckHealthLast->$Name) ] != "nothing") do={
:if ([ :typeof ($CheckHealthTemperature->$Name) ] != "num" ) do={
$LogPrint info $FuncName ("No threshold given for " . $Name . ", assuming 50C.");
:set ($CheckHealthTemperature->$Name) 50;
}
:local Validate [ /system/health/get [ find where name=$Name ] value ];
:while ($Value != $Validate) do={
:set Value $Validate;
:set Validate [ /system/health/get [ find where name=$Name ] value ];
}
:if ($Value > $CheckHealthTemperature->$Name && \
$CheckHealthTemperatureNotified->$Name != true) do={
$SendNotification2 ({ origin=$FuncName; \
subject=([ $SymbolForNotification "fire" ] . "Health warning: " . $Name); \
message=("The " . $Name . " on " . $Identity . " is above threshold: " . \
$Value . "\C2\B0" . "C") });
:set ($CheckHealthTemperatureNotified->$Name) true;
}
:if ($Value <= ($CheckHealthTemperature->$Name - $CheckHealthTemperatureDeviation) && \
$CheckHealthTemperatureNotified->$Name = true) do={
$SendNotification2 ({ origin=$FuncName; \
subject=([ $SymbolForNotification "white-heavy-check-mark" ] . "Health recovery: " . $Name); \
message=("The " . $Name . " on " . $Identity . " dropped below threshold: " . \
$Value . "\C2\B0" . "C") });
:set ($CheckHealthTemperatureNotified->$Name) false;
}
}
:set ($CheckHealthLast->$Name) $Value;
}
}

View file

@ -0,0 +1,63 @@
#!rsc by RouterOS
# RouterOS script: check-health.d/voltage
# Copyright (c) 2019-2025 Christian Hesse <mail@eworm.de>
# https://rsc.eworm.de/COPYING.md
#
# requires RouterOS, version=7.14
#
# check for RouterOS health state - voltage plugin
# https://rsc.eworm.de/doc/check-health.md
:global CheckHealthPlugins;
:set ($CheckHealthPlugins->[ :jobname ]) do={
:local FuncName [ :tostr $0 ];
:global CheckHealthLast;
:global CheckHealthVoltageLow;
:global CheckHealthVoltagePercent;
:global Identity;
:global FormatLine;
:global IfThenElse;
:global LogPrint;
:global SendNotification2;
:global SymbolForNotification;
:if ([ :len [ /system/health/find where type="V" ] ] = 0) do={
$LogPrint debug $FuncName ("Your device does not provide any voltage health values.");
:return false;
}
:foreach Voltage in=[ /system/health/find where type="V" ] do={
:local Name [ /system/health/get $Voltage name ];
:local Value [ /system/health/get $Voltage value ];
:if ([ :typeof ($CheckHealthLast->$Name) ] != "nothing") do={
:local NumCurr [ $TempToNum $Value ];
:local NumLast [ $TempToNum ($CheckHealthLast->$Name) ];
:if ($NumLast * (100 + $CheckHealthVoltagePercent) < $NumCurr * 100 || \
$NumLast * 100 > $NumCurr * (100 + $CheckHealthVoltagePercent)) do={
$SendNotification2 ({ origin=$FuncName; \
subject=([ $SymbolForNotification ("high-voltage-sign,chart-" . [ $IfThenElse ($NumLast < \
$NumCurr) "in" "de" ] . "creasing") ] . "Health warning: " . $Name); \
message=("The " . $Name . " on " . $Identity . " jumped more than " . $CheckHealthVoltagePercent . "%.\n\n" . \
[ $FormatLine "old value" ($CheckHealthLast->$Name . " V") 12 ] . "\n" . \
[ $FormatLine "new value" ($Value . " V") 12 ]) });
} else={
:if ($NumCurr <= $CheckHealthVoltageLow && $NumLast > $CheckHealthVoltageLow) do={
$SendNotification2 ({ origin=$FuncName; \
subject=([ $SymbolForNotification "high-voltage-sign,chart-decreasing" ] . "Health warning: Low " . $Name); \
message=("The " . $Name . " on " . $Identity . " dropped to " . $Value . " V below hard limit.") });
}
:if ($NumCurr > $CheckHealthVoltageLow && $NumLast <= $CheckHealthVoltageLow) do={
$SendNotification2 ({ origin=$FuncName; \
subject=([ $SymbolForNotification "high-voltage-sign,chart-increasing" ] . "Health recovery: Low " . $Name); \
message=("The " . $Name . " on " . $Identity . " recovered to " . $Value . " V above hard limit.") });
}
}
}
:set ($CheckHealthLast->$Name) $Value;
}
}

View file

@ -19,11 +19,6 @@
:global CheckHealthCPUUtilizationNotified; :global CheckHealthCPUUtilizationNotified;
:global CheckHealthLast; :global CheckHealthLast;
:global CheckHealthRAMUtilizationNotified; :global CheckHealthRAMUtilizationNotified;
:global CheckHealthTemperature;
:global CheckHealthTemperatureDeviation;
:global CheckHealthTemperatureNotified;
:global CheckHealthVoltageLow;
:global CheckHealthVoltagePercent;
:global Identity; :global Identity;
:global FormatLine; :global FormatLine;
@ -33,6 +28,7 @@
:global ScriptLock; :global ScriptLock;
:global SendNotification2; :global SendNotification2;
:global SymbolForNotification; :global SymbolForNotification;
:global ValidateSyntax;
:local TempToNum do={ :local TempToNum do={
:global CharacterReplace; :global CharacterReplace;
@ -78,105 +74,37 @@
:set CheckHealthRAMUtilizationNotified false; :set CheckHealthRAMUtilizationNotified false;
} }
:if ([ :len [ /system/health/find ] ] = 0) do={ :local Plugins [ /system/script/find where name~"^check-health.d/." ];
$LogPrint debug $ScriptName ("Your device does not provide any health values."); :if ([ :len $Plugins ] = 0) do={
$LogPrint debug $ScriptName ("No plugins installed.");
:set ExitOK true; :set ExitOK true;
:error true; :error true;
} }
:global CheckHealthPlugins ({});
:if ([ :typeof $CheckHealthLast ] != "array") do={ :if ([ :typeof $CheckHealthLast ] != "array") do={
:set CheckHealthLast ({}); :set CheckHealthLast ({});
} }
:if ([ :typeof $CheckHealthTemperatureNotified ] != "array") do={
:set CheckHealthTemperatureNotified ({});
}
:foreach Voltage in=[ /system/health/find where type="V" ] do={ :foreach Plugin in=$Plugins do={
:local Name [ /system/health/get $Voltage name ]; :local PluginVal [ /system/script/get $Plugin ];
:local Value [ /system/health/get $Voltage value ]; :if ([ $ValidateSyntax ($PluginVal->"source") ] = true) do={
:do {
:if ([ :typeof ($CheckHealthLast->$Name) ] != "nothing") do={ /system/script/run $Plugin;
:local NumCurr [ $TempToNum $Value ]; } on-error={
:local NumLast [ $TempToNum ($CheckHealthLast->$Name) ]; $LogPrint error $ScriptName ("Plugin '" . $ScriptVal->"name" . "' failed to run.");
:if ($NumLast * (100 + $CheckHealthVoltagePercent) < $NumCurr * 100 || \
$NumLast * 100 > $NumCurr * (100 + $CheckHealthVoltagePercent)) do={
$SendNotification2 ({ origin=$ScriptName; \
subject=([ $SymbolForNotification ("high-voltage-sign,chart-" . [ $IfThenElse ($NumLast < \
$NumCurr) "in" "de" ] . "creasing") ] . "Health warning: " . $Name); \
message=("The " . $Name . " on " . $Identity . " jumped more than " . $CheckHealthVoltagePercent . "%.\n\n" . \
[ $FormatLine "old value" ($CheckHealthLast->$Name . " V") 12 ] . "\n" . \
[ $FormatLine "new value" ($Value . " V") 12 ]) });
} else={
:if ($NumCurr <= $CheckHealthVoltageLow && $NumLast > $CheckHealthVoltageLow) do={
$SendNotification2 ({ origin=$ScriptName; \
subject=([ $SymbolForNotification "high-voltage-sign,chart-decreasing" ] . "Health warning: Low " . $Name); \
message=("The " . $Name . " on " . $Identity . " dropped to " . $Value . " V below hard limit.") });
}
:if ($NumCurr > $CheckHealthVoltageLow && $NumLast <= $CheckHealthVoltageLow) do={
$SendNotification2 ({ origin=$ScriptName; \
subject=([ $SymbolForNotification "high-voltage-sign,chart-increasing" ] . "Health recovery: Low " . $Name); \
message=("The " . $Name . " on " . $Identity . " recovered to " . $Value . " V above hard limit.") });
}
} }
} else={
$LogPrint error $ScriptName ("Plugin '" . $ScriptVal->"name" . "' failed syntax validation, skipping.");
} }
:set ($CheckHealthLast->$Name) $Value;
} }
:foreach PSU in=[ /system/health/find where name~"^psu.*-state\$" ] do={ :foreach PluginName,Discard in=$CheckHealthPlugins do={
:local Name [ /system/health/get $PSU name ]; ($CheckHealthPlugins->$PluginName) \
:local Value [ /system/health/get $PSU value ]; ("\$CheckHealthPlugins->\"" . $PluginName . "\"");
:if ([ :typeof ($CheckHealthLast->$Name) ] != "nothing") do={
:if ($CheckHealthLast->$Name = "ok" && \
$Value != "ok") do={
$SendNotification2 ({ origin=$ScriptName; \
subject=([ $SymbolForNotification "cross-mark" ] . "Health warning: " . $Name); \
message=("The power supply unit '" . $Name . "' on " . $Identity . " failed!") });
}
:if ($CheckHealthLast->$Name != "ok" && \
$Value = "ok") do={
$SendNotification2 ({ origin=$ScriptName; \
subject=([ $SymbolForNotification "white-heavy-check-mark" ] . "Health recovery: " . $Name); \
message=("The power supply unit '" . $Name . "' on " . $Identity . " recovered!") });
}
}
:set ($CheckHealthLast->$Name) $Value;
} }
:foreach Temperature in=[ /system/health/find where type="C" ] do={ :set CheckHealthPlugins;
:local Name [ /system/health/get $Temperature name ];
:local Value [ /system/health/get $Temperature value ];
:if ([ :typeof ($CheckHealthLast->$Name) ] != "nothing") do={
:if ([ :typeof ($CheckHealthTemperature->$Name) ] != "num" ) do={
$LogPrint info $ScriptName ("No threshold given for " . $Name . ", assuming 50C.");
:set ($CheckHealthTemperature->$Name) 50;
}
:local Validate [ /system/health/get [ find where name=$Name ] value ];
:while ($Value != $Validate) do={
:set Value $Validate;
:set Validate [ /system/health/get [ find where name=$Name ] value ];
}
:if ($Value > $CheckHealthTemperature->$Name && \
$CheckHealthTemperatureNotified->$Name != true) do={
$SendNotification2 ({ origin=$ScriptName; \
subject=([ $SymbolForNotification "fire" ] . "Health warning: " . $Name); \
message=("The " . $Name . " on " . $Identity . " is above threshold: " . \
$Value . "\C2\B0" . "C") });
:set ($CheckHealthTemperatureNotified->$Name) true;
}
:if ($Value <= ($CheckHealthTemperature->$Name - $CheckHealthTemperatureDeviation) && \
$CheckHealthTemperatureNotified->$Name = true) do={
$SendNotification2 ({ origin=$ScriptName; \
subject=([ $SymbolForNotification "white-heavy-check-mark" ] . "Health recovery: " . $Name); \
message=("The " . $Name . " on " . $Identity . " dropped below threshold: " . \
$Value . "\C2\B0" . "C") });
:set ($CheckHealthTemperatureNotified->$Name) false;
}
}
:set ($CheckHealthLast->$Name) $Value;
}
} on-error={ } on-error={
:global ExitError; $ExitError $ExitOK [ :jobname ]; :global ExitError; $ExitError $ExitOK [ :jobname ];
} }

View file

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

Before After
Before After

View file

@ -17,21 +17,21 @@ Description
----------- -----------
This script is run from scheduler periodically, sending notification on This script is run from scheduler periodically, sending notification on
health related events: health related events. Monitoring CPU and RAM utilization (available
processing and memory resources) works on all devices:
* high CPU utilization * high CPU utilization
* high RAM utilization (low available RAM) * high RAM utilization (low available RAM)
With additional plugins functionality can be extended, depending on
sensors available in hardware:
* voltage jumps up or down more than configured threshold * voltage jumps up or down more than configured threshold
* voltage drops below hard lower limit * voltage drops below hard lower limit
* fan failed or recovered
* power supply failed or recovered * power supply failed or recovered
* temperature is above or below threshold * temperature is above or below threshold
Monitoring CPU and RAM utilization (available processing and memory
resources) works on all devices. Other than that only sensors available
in hardware can be checked. See what your hardware supports:
/system/health/print;
> ⚠️ **Warning**: Note that bad initial state will not trigger an event! For > ⚠️ **Warning**: Note that bad initial state will not trigger an event! For
> example rebooting a device that is already too hot will not trigger an > example rebooting a device that is already too hot will not trigger an
> alert on high temperature. > alert on high temperature.
@ -59,8 +59,8 @@ in hardware can be checked. See what your hardware supports:
#### PSU state #### PSU state
![check-health notification psu fail](check-health.d/notification-08-psu-fail.avif) ![check-health notification state fail](check-health.d/notification-08-state-fail.avif)
![check-health notification psu ok](check-health.d/notification-09-psu-ok.avif) ![check-health notification state ok](check-health.d/notification-09-state-ok.avif)
Requirements and installation Requirements and installation
----------------------------- -----------------------------
@ -74,6 +74,30 @@ Just install the script and create a scheduler:
> precision of cpu utilization, escpecially on devices with limited > precision of cpu utilization, escpecially on devices with limited
> resources. Thus an unusual interval is used here. > resources. Thus an unusual interval is used here.
### Plugins
Additional plugins are available for sensors available in hardware. First
check what your hardware supports:
/system/health/print;
Then install the plugin for *fan* and *power supply unit* *state*:
$ScriptInstallUpdate check-health,check-health.d/state;
... or *temperature*:
$ScriptInstallUpdate check-health,check-health.d/temperature;
... or *voltage*:
$ScriptInstallUpdate check-health,check-health.d/voltage;
You can also combine the commands and install all or a subset of plugins
in one go:
$ScriptInstallUpdate check-health,check-health.d/state,check-health.d/temperature,check-health.d/voltage;
Configuration Configuration
------------- -------------

View file

@ -13,7 +13,7 @@
:local ScriptName [ :jobname ]; :local ScriptName [ :jobname ];
# expected configuration version # expected configuration version
:global ExpectedConfigVersion 131; :global ExpectedConfigVersion 132;
# global variables not to be changed by user # global variables not to be changed by user
:global GlobalFunctionsReady false; :global GlobalFunctionsReady false;

View file

@ -56,6 +56,7 @@
129="Extended 'backup-partition' to support RouterOS copy-over - interactively or before feature update."; 129="Extended 'backup-partition' to support RouterOS copy-over - interactively or before feature update.";
130="Dropped intermediate certificates, depending on just root certificates now."; 130="Dropped intermediate certificates, depending on just root certificates now.";
131="Enhanced certificate download to fallback to mkcert.org, so all (commonly trusted) root certificates are available now."; 131="Enhanced certificate download to fallback to mkcert.org, so all (commonly trusted) root certificates are available now.";
132="Split off plugins from 'check-health', so the script works on all devices to monitor CPU and RAM. The supported plugins for sensors in hardware are installed automatically.";
}; };
# Migration steps to be applied on script updates # Migration steps to be applied on script updates
@ -64,4 +65,5 @@
100=":global ScriptInstallUpdate; :if ([ :len [ /system/script/find where name=\"ssh-keys-import\" source~\"^#!rsc by RouterOS\\r?\\n\" ] ] > 0) do={ /system/script/set name=\"mod/ssh-keys-import\" ssh-keys-import; \$ScriptInstallUpdate; }"; 100=":global ScriptInstallUpdate; :if ([ :len [ /system/script/find where name=\"ssh-keys-import\" source~\"^#!rsc by RouterOS\\r?\\n\" ] ] > 0) do={ /system/script/set name=\"mod/ssh-keys-import\" ssh-keys-import; \$ScriptInstallUpdate; }";
104=":global CharacterReplace; :global ScriptInstallUpdate; :foreach Script in={ \"capsman-download-packages\"; \"capsman-rolling-upgrade\"; \"hotspot-to-wpa\"; \"hotspot-to-wpa-cleanup\" } do={ /system/script/set name=(\$Script . \".capsman\") [ find where name=\$Script ]; :foreach Scheduler in=[ /system/scheduler/find where on-event~(\$Script . \"([^-.]|\\\$)\") ] do={ /system/scheduler/set \$Scheduler on-event=[ \$CharacterReplace [ get \$Scheduler on-event ] \$Script (\$Script . \".capsman\") ]; }; }; /ip/hotspot/user/profile/set on-login=\"hotspot-to-wpa.capsman\" [ find where on-login=\"hotspot-to-wpa\" ]; \$ScriptInstallUpdate;"; 104=":global CharacterReplace; :global ScriptInstallUpdate; :foreach Script in={ \"capsman-download-packages\"; \"capsman-rolling-upgrade\"; \"hotspot-to-wpa\"; \"hotspot-to-wpa-cleanup\" } do={ /system/script/set name=(\$Script . \".capsman\") [ find where name=\$Script ]; :foreach Scheduler in=[ /system/scheduler/find where on-event~(\$Script . \"([^-.]|\\\$)\") ] do={ /system/scheduler/set \$Scheduler on-event=[ \$CharacterReplace [ get \$Scheduler on-event ] \$Script (\$Script . \".capsman\") ]; }; }; /ip/hotspot/user/profile/set on-login=\"hotspot-to-wpa.capsman\" [ find where on-login=\"hotspot-to-wpa\" ]; \$ScriptInstallUpdate;";
111=":local Rec [ /ip/dns/static/find where comment~\"^managed by dhcp-to-dns for \" ]; :if ([ :len \$Rec ] > 0) do={ /ip/dns/static/remove \$Rec; /system/script/run dhcp-to-dns; }"; 111=":local Rec [ /ip/dns/static/find where comment~\"^managed by dhcp-to-dns for \" ]; :if ([ :len \$Rec ] > 0) do={ /ip/dns/static/remove \$Rec; /system/script/run dhcp-to-dns; }";
132=":if ([ :len [ /system/script/find where name=\"check-health\" ] ] > 0) do={ :local Code \":local Install \\\"check-health\\\"; :if ([ :len [ /system/health/find where type=\\\"\\\" name~\\\"-state\\\\\\\$\\\" ] ] > 0) do={ :set Install (\\\$Install . \\\",check-health.d/state\\\"); }; :if ([ :len [ /system/health/find where type=\\\"C\\\" ] ] > 0) do={ :set Install (\\\$Install . \\\",check-health.d/temperature\\\"); }; :if ([ :len [ /system/health/find where type=\\\"V\\\" ] ] > 0) do={ :set Install (\\\$Install . \\\",check-health.d/voltage\\\"); }; :global ScriptInstallUpdate; \\\$ScriptInstallUpdate \\\$Install;\"; :global ValidateSyntax; :if ([ \$ValidateSyntax \$Code ] = true) do={ :do { [ :parse \$Code ]; } on-error={ }; }; }";
}; };