add missing routeros tools from dinotools

This commit is contained in:
sepehr 2024-08-09 12:48:59 +03:30
parent e4b6fed5f2
commit 60c4f1870f
35 changed files with 5085 additions and 1 deletions

View file

@ -0,0 +1,6 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from .helper import load_modules
load_modules()

View file

@ -0,0 +1,6 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from .cli import cli
cli()

View file

@ -0,0 +1,2 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later

View file

@ -0,0 +1,450 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Any, Dict, List, Optional, Union
import click
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext, ScalarPercentContext
from ..helper import escape_filename, logger
from ..resource import RouterOSCheckResource
class InterfaceResource(RouterOSCheckResource):
name = "Interface"
def __init__(
self,
cmd_options: Dict[str, Any],
check: nagiosplugin.Check,
names: List[str],
regex: bool,
single_interface: bool,
ignore_disabled: bool,
cookie_filename: str,
warning_values: List[str],
critical_values: List[str],
override_values: List[str],
):
super().__init__(cmd_options=cmd_options)
self._check = check
self._interface_data: Optional[Dict[str, Any]] = None
self.names: List[Union[Any]] = names
self.regex = regex
if self.regex:
regex_names = []
for name in names:
regex_names.append(re.compile(name))
self.names = regex_names
self.single_interface = single_interface
self.ignore_disabled = ignore_disabled
self.cookie_filename = cookie_filename
self._parsed_warning_values: Dict[str, str] = self.prepare_thresholds(warning_values)
self._parsed_critical_values: Dict[str, str] = self.prepare_thresholds(critical_values)
self._parsed_override_values: Dict[str, str] = self.prepare_override_values(override_values)
self._routeros_metric_values = [
# Later values depend on the speed
{
"name": "speed",
"missing_ok": True,
"dst_value_name": "speed-byte",
"type": self.parse_routeros_speed,
"factor": 1 / 8,
"no_metric": True,
},
{
"name": "speed",
"missing_ok": True,
"type": self.parse_routeros_speed,
"min": 0,
},
{
"name": "disabled",
"type": bool,
"context_class": None,
},
{
"name": "running",
"type": bool,
"context_class": None,
},
{
"name": "actual-mtu",
"type": int,
"min": 0,
},
{
"name": "fp-rx-byte",
"type": int,
"min": 0,
"uom": "B",
"rate": True,
"rate_percent_total_name": "speed-byte",
},
{
"name": "fp-rx-packet",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "fp-tx-byte",
"type": int,
"min": 0,
"uom": "B",
"rate": True,
"rate_percent_total_name": "speed-byte",
},
{
"name": "fp-tx-packet",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "l2mtu",
"type": int,
"min": 0,
# CHR devices don't report l2mtu
"missing_ok": True,
},
{
"name": "link-downs",
"type": int,
"min": 0,
"uom": "c",
},
# {"name": "mtu", "type": int, "min": 0},
{
"name": "rx-byte",
"type": int,
"min": 0,
"uom": "B",
"rate": True,
"rate_percent_total_name": "speed-byte",
},
{
"name": "rx-drop",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "rx-error",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "rx-packet",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
"rate_percent_total_name": "speed-byte",
},
{
"name": "tx-byte",
"type": int,
"min": 0,
"uom": "B",
"rate": True,
},
{
"name": "tx-drop",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "tx-error",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "tx-packet",
"type": int,
"min": 0,
"uom": "c",
"rate": True,
},
{
"name": "tx-queue-drop",
"type": int,
"min": 0,
"uom": "c",
"rate": True
},
]
def _add_contexts(self, name, values, metric_prefix=""):
self._check.add(
InterfaceDisabledContext(f"{metric_prefix.format(name=name)}disabled", interface_name=name),
InterfaceRunningContext(f"{metric_prefix.format(name=name)}running", interface_name=name),
)
custom_metric_names = ["disabled", "running"]
for metric_value in self._routeros_metric_values:
metric_value_name = metric_value.get("dst", metric_value["name"])
if metric_value_name in custom_metric_names:
continue
if metric_value.get("no_metric"):
continue
context_class = metric_value.get("context_class", nagiosplugin.ScalarContext)
self._check.add(
context_class(
f"{metric_prefix.format(name=name)}{metric_value_name}",
warning=self._parsed_warning_values.get(metric_value["name"]),
critical=self._parsed_critical_values.get(metric_value["name"]),
)
)
if metric_value.get("rate"):
rate_percent_total_name = metric_value.get("rate_percent_total_name")
rate_total_value = None
if rate_percent_total_name:
rate_total_value = values.get(rate_percent_total_name)
if rate_total_value is not None:
rate_context_class_percent = metric_value.get("context_class", ScalarPercentContext)
self._check.add(
rate_context_class_percent(
name=f"{metric_prefix.format(name=name)}{metric_value_name}_rate",
total_value=rate_total_value,
warning=self._parsed_warning_values.get(f"{metric_value['name']}_rate"),
critical=self._parsed_critical_values.get(f"{metric_value['name']}_rate"),
)
)
else:
rate_context_class = metric_value.get("context_class", nagiosplugin.ScalarContext)
self._check.add(
rate_context_class(
name=f"{metric_prefix.format(name=name)}{metric_value_name}_rate",
warning=self._parsed_warning_values.get(metric_value["name"]),
critical=self._parsed_critical_values.get(metric_value["name"]),
)
)
def fetch_data(self) -> Dict[str, Dict]:
if self._interface_data:
return self._interface_data
api = self._connect_api()
logger.info("Fetching data ...")
interface_ethernet_data = {}
call = api.path(
"/interface/ethernet"
)
call_results = tuple(call)
for result in call_results:
interface_ethernet_data[result["name"]] = {
"speed": result["speed"],
}
call = api.path(
"/interface"
)
call_results = tuple(call)
self._interface_data = {}
for result in call_results:
if self.ignore_disabled and result["disabled"]:
continue
if result["name"] in interface_ethernet_data:
result.update(interface_ethernet_data[result["name"]])
result.update(self._parsed_override_values)
if len(self.names) == 0:
self._interface_data[result["name"]] = result
elif self.regex:
for name in self.names:
if name.match(result["name"]):
self._interface_data[result["name"]] = result
elif result["name"] in self.names:
self._interface_data[result["name"]] = result
return self._interface_data
@property
def interface_names(self):
return tuple(self.fetch_data().keys())
def probe(self):
routeros_metrics = []
data = self.fetch_data()
if self.single_interface:
if len(self.interface_names) == 1:
cookie_filename = self.cookie_filename.format(
name=escape_filename(self.interface_names[0])
)
with nagiosplugin.Cookie(cookie_filename) as cookie:
routeros_metrics += self.get_routeros_metric_item(data[self.interface_names[0]], cookie=cookie)
self._add_contexts(name=self.interface_names[0], values=data[self.interface_names[0]])
else:
for name in self.interface_names:
cookie_filename = self.cookie_filename.format(
name=escape_filename(name)
)
with nagiosplugin.Cookie(cookie_filename) as cookie:
routeros_metrics += self.get_routeros_metric_item(data[name], name_prefix=f"{name} ", cookie=cookie)
self._add_contexts(name=name, values=data[name], metric_prefix="{name} ")
return routeros_metrics
class InterfaceDisabledContext(BooleanContext):
def __init__(self, name, interface_name):
super().__init__(name=name)
self._interface_name = interface_name
def evaluate(self, metric, resource: InterfaceResource):
if metric.value is True:
return self.result_cls(
nagiosplugin.state.Warn,
hint="Interface '{self._interface_name}' is disabled",
metric=metric
)
return self.result_cls(nagiosplugin.state.Ok)
class InterfaceRunningContext(BooleanContext):
def __init__(self, name, interface_name):
super().__init__(name=name)
self._interface_name = interface_name
def evaluate(self, metric, resource: InterfaceResource):
if metric.value is False:
return self.result_cls(
state=nagiosplugin.state.Warn,
hint=f"Interface '{self._interface_name}' not running",
metric=metric
)
return self.result_cls(nagiosplugin.state.Ok)
@cli.command("interface")
@click.option(
"--name",
"names",
default=[],
multiple=True,
help="The name of the GRE interface to monitor. This can be specified multiple times",
)
@click.option(
"--regex",
"regex",
default=False,
is_flag=True,
help="Treat the specified names as regular expressions and try to find all matching interfaces. (Default: not set)",
)
@click.option(
"--single",
"single",
default=False,
is_flag=True,
help="If set the check expects the interface to exist",
)
@click.option(
"--ignore-disabled/--no-ignore-disabled",
default=True,
is_flag=True,
help="Ignore disabled interfaces",
)
@click.option(
"--cookie-filename",
"cookie_filename",
default="/tmp/check_routeros_interface_{name}.data",
help=(
"The filename to use to store the information to calculate the rate. '{name}' will be replaced with an "
"internal uniq id. It Will create one file per interface."
"(Default: /tmp/check_routeros_interface_{name}.data)"
),
)
@click.option(
"override_values",
"--value-override",
multiple=True,
help=(
"Override a value read from the RouterOS device. "
"Format of the value must be compatible with RouterOS values. "
"Example: Override/Set the speed value for bridges or tunnels: "
"--value-override speed:10Gbps"
)
)
@click.option(
"warning_values",
"--value-warning",
multiple=True,
help=(
"Set a warning threshold for a value. "
"Example: If cpu1-load should be in the range of 10% to 20% you can set "
"--value-warning cpu-load:10:200 "
"Can be specified multiple times"
)
)
@click.option(
"critical_values",
"--value-critical",
multiple=True,
help=(
"Set a critical threshold for a value. "
"Example: If cpu1-load should be in the range of 10% to 20% you can set "
"--value-critical cpu-load:10:200 "
"Can be specified multiple times"
)
)
@click.pass_context
def interface(
ctx, names, regex, single, ignore_disabled, cookie_filename, warning_values, critical_values, override_values
):
"""Check the state and the stats of interfaces"""
check = nagiosplugin.Check()
resource = InterfaceResource(
cmd_options=ctx.obj,
check=check,
names=names,
regex=regex,
single_interface=single,
ignore_disabled=ignore_disabled,
cookie_filename=cookie_filename,
warning_values=warning_values,
critical_values=critical_values,
override_values=override_values,
)
check.add(resource)
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Ok,
"All interfaces UP"
)
)
if single and len(resource.interface_names) != 1:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
f"Only one matching interface is allowed. Found {len(resource.interface_names)}"
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,186 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Any, Dict, List, Optional, Union
import click
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext
from ..helper import logger
from ..resource import RouterOSCheckResource
class InterfaceGREResource(RouterOSCheckResource):
name = "GRE"
def __init__(
self,
cmd_options: Dict[str, Any],
names: List[str],
regex: bool,
single_interface: bool,
ignore_disabled: bool,
):
super().__init__(cmd_options=cmd_options)
self._interface_data: Optional[Dict[str, Any]] = None
self.names: List[Union[Any]] = names
self.regex = regex
if self.regex:
regex_names = []
for name in names:
regex_names.append(re.compile(name))
self.names = regex_names
self.single_interface = single_interface
self.ignore_disabled = ignore_disabled
self._routeros_metric_values = [
{"name": "disabled", "type": bool},
{"name": "running", "type": bool},
{"name": "actual-mtu", "type": int, "min": 0},
]
def fetch_data(self) -> Dict[str, Dict]:
if self._interface_data:
return self._interface_data
api = self._connect_api()
logger.info("Fetching data ...")
call = api.path(
"/interface/gre"
)
call_results = tuple(call)
self._interface_data = {}
for result in call_results:
if self.ignore_disabled and result["disabled"]:
continue
if len(self.names) == 0:
self._interface_data[result["name"]] = result
elif self.regex:
for name in self.names:
if name.match(result["name"]):
self._interface_data[result["name"]] = result
elif result["name"] in self.names:
self._interface_data[result["name"]] = result
return self._interface_data
@property
def interface_names(self):
return tuple(self.fetch_data().keys())
def probe(self):
routeros_metrics = []
data = self.fetch_data()
if self.single_interface:
if len(self.interface_names) == 1:
return self.get_routeros_metric_item(data[self.interface_names[0]])
else:
for name in self.interface_names:
routeros_metrics += self.get_routeros_metric_item(data[name], name_prefix=f"{name} ")
return routeros_metrics
class InterfaceGREDisabledContext(BooleanContext):
def __init__(self, name, interface_name):
super().__init__(name=name)
self._interface_name = interface_name
def evaluate(self, metric, resource: InterfaceGREResource):
if metric.value is True:
return self.result_cls(
nagiosplugin.state.Warn,
hint="GRE interface '{self._interface_name}' is disabled",
metric=metric
)
return self.result_cls(nagiosplugin.state.Ok)
class InterfaceGRERunningContext(BooleanContext):
def __init__(self, name, interface_name):
super().__init__(name=name)
self._interface_name = interface_name
def evaluate(self, metric, resource: InterfaceGREResource):
if metric.value is False:
return self.result_cls(
state=nagiosplugin.state.Warn,
hint=f"GRE interface '{self._interface_name}' not running",
metric=metric
)
return self.result_cls(nagiosplugin.state.Ok)
@cli.command("interface.gre")
@click.option(
"--name",
"names",
default=[],
multiple=True,
help="The name of the GRE interface to monitor. This can be specified multiple times",
)
@click.option(
"--regex",
"regex",
default=False,
is_flag=True,
help="Treat the specified names as regular expressions and try to find all matching interfaces. (Default: not set)",
)
@click.option(
"--single",
"single",
default=False,
is_flag=True,
help="If set the check expects the interface to exist",
)
@click.option(
"--ignore-disabled/--no-ignore-disabled",
default=True,
is_flag=True,
help="Ignore disabled interfaces",
)
@click.pass_context
def interface_gre(ctx, names, regex, single, ignore_disabled):
"""Check the state of a GRE interface."""
resource = InterfaceGREResource(
cmd_options=ctx.obj,
names=names,
regex=regex,
single_interface=single,
ignore_disabled=ignore_disabled,
)
check = nagiosplugin.Check(
resource,
)
if single:
if len(resource.interface_names) == 1:
name = resource.interface_names[0]
check.add(
InterfaceGREDisabledContext("disabled", interface_name=name),
InterfaceGRERunningContext("running", interface_name=name),
nagiosplugin.ScalarContext("actual-mtu"),
)
else:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
f"Only one matching interface is allowed. Found {len(resource.interface_names)}"
)
)
else:
for name in resource.interface_names:
check.add(
InterfaceGREDisabledContext(f"{name} disabled", interface_name=name),
InterfaceGRERunningContext(f"{name} running", interface_name=name),
nagiosplugin.ScalarContext(f"{name} actual-mtu"),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,124 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import click
import librouteros
import librouteros.query
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext
from ..helper import logger
from ..resource import RouterOSCheckResource
class InterfaceVrrpCheck(RouterOSCheckResource):
name = "VRRP"
def __init__(self, cmd_options, name, master_must):
super().__init__(cmd_options=cmd_options)
self._name = name
self.backup = None
self.disabled = None
self.enabled = None
self.invalid = None
self.master = None
self.master_must = master_must
self.running = None
def probe(self):
key_name = librouteros.query.Key("name")
api = self._connect_api()
logger.info("Fetching data ...")
call = api.path(
"/interface/vrrp"
).select(
key_name,
librouteros.query.Key("backup"),
librouteros.query.Key("disabled"),
librouteros.query.Key("invalid"),
librouteros.query.Key("master"),
librouteros.query.Key("running"),
).where(
key_name == self._name
)
results = tuple(call)
result = results[0]
self.disabled = result["disabled"]
self.enabled = not self.disabled
yield nagiosplugin.Metric(
name="disabled",
value=self.disabled,
)
if self.enabled:
for n in ("backup", "invalid", "master", "running"):
if n not in result:
continue
setattr(self, n, result[n])
yield nagiosplugin.Metric(
name=n,
value=result[n],
)
class InterfaceVrrpDisabled(BooleanContext):
def evaluate(self, metric, resource: InterfaceVrrpCheck):
if metric.value is True:
return self.result_cls(nagiosplugin.state.Warn, "VRRP is disabled", metric)
return self.result_cls(nagiosplugin.state.Ok)
class InterfaceVrrpInvalid(BooleanContext):
def evaluate(self, metric, resource: InterfaceVrrpCheck):
if metric.value is True:
return self.result_cls(
state=nagiosplugin.state.Warn,
hint="VRRP config is invalid"
)
return self.result_cls(nagiosplugin.state.Ok)
class InterfaceVrrpMaster(BooleanContext):
def evaluate(self, metric, resource: InterfaceVrrpCheck):
if not metric.value and resource.master_must:
return self.result_cls(
state=nagiosplugin.state.Warn,
hint="VRRP interface is not master"
)
return self.result_cls(nagiosplugin.state.Ok)
@cli.command("interface.vrrp")
@click.option(
"--name",
required=True,
help="The name of the VRRP interface to check",
)
@click.option(
"--master",
default=False,
help="If set the interface must be master",
)
@click.pass_context
def interface_vrrp(ctx, name, master):
"""Check the state of VRRP interfaces"""
check = nagiosplugin.Check(
InterfaceVrrpCheck(
cmd_options=ctx.obj,
name=name,
master_must=master,
),
BooleanContext("backup"),
InterfaceVrrpDisabled("disabled"),
InterfaceVrrpInvalid("invalid"),
InterfaceVrrpMaster("master"),
BooleanContext("running")
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,195 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Any, Dict, List, Optional, Union
import click
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext
from ..helper import logger
from ..resource import RouterOSCheckResource
class RoutingBGPPeerResource(RouterOSCheckResource):
name = "BGP Peer"
def __init__(
self,
cmd_options: Dict[str, Any],
names: List[str],
regex: bool,
single_peer: bool,
):
super().__init__(cmd_options=cmd_options)
self._peer_data: Optional[Dict[str, Any]] = None
self.names: List[Union[Any]] = names
self.regex = regex
if self.regex:
regex_names = []
for name in names:
regex_names.append(re.compile(name))
self.names = regex_names
self.single_peer = single_peer
self.state: Optional[str] = None
self._routeros_metric_values = [
{"name": "disabled", "type": bool},
{"name": "prefix-count", "dst": "prefix_count", "type": int},
{"name": "state", "type": str},
{"name": "updates-received", "dst": "updates_received", "type": int},
{"name": "updates-sent", "dst": "updates_sent", "type": int},
{"name": "uptime", "type": self.parse_routeros_time_duration, "min": 0, "uom": "s"},
]
def fetch_data(self) -> Dict[str, Dict]:
if self._peer_data:
return self._peer_data
api = self._connect_api()
logger.info("Fetching data ...")
call = api.path(
"/routing/bgp/peer"
)
call_results = tuple(call)
self._peer_data = {}
for result in call_results:
if self.regex:
for name in self.names:
if name.match(result["name"]):
self._peer_data[result["name"]] = result
elif result["name"] in self.names:
self._peer_data[result["name"]] = result
return self._peer_data
@property
def peer_names(self):
return tuple(self.fetch_data().keys())
def probe(self):
routeros_metrics = []
data = self.fetch_data()
if self.single_peer:
if len(self.peer_names) == 1:
return self.get_routeros_metric_item(data[self.peer_names[0]])
else:
for name in self.peer_names:
routeros_metrics += self.get_routeros_metric_item(data[name], name_prefix=f"{name} ")
return routeros_metrics
class RoutingBGPPeerState(BooleanContext):
def __init__(self, *args, **kwargs):
super(RoutingBGPPeerState, self).__init__(*args, **kwargs)
self.fmt_metric = "{name} is {valueunit}"
def evaluate(self, metric, resource: RoutingBGPPeerResource):
if metric.value is None:
return nagiosplugin.Result(
state=nagiosplugin.state.Critical,
# hint=f"Neighbor for instance '{resource.instance}' and router-id '{resource.router_id}' not found"
)
value = metric.value
if value in ("established",):
return self.result_cls(
state=nagiosplugin.state.Ok,
hint="Connection with peer established",
)
elif value in ("idle", "connect", "active", "opensent", "openconfirm"):
return self.result_cls(
state=nagiosplugin.state.Critical,
hint=f"Connection to peer not established (State: {value})"
)
else:
return self.result_cls(
state=nagiosplugin.state.Unknown,
hint=f"Unable to find peer state (State: {value})"
)
class RoutingBGPPeerSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
for result in results:
if isinstance(result.resource, RoutingBGPPeerResource):
data = result.resource.fetch_data()
texts = []
for name in result.resource.peer_names:
texts.append(f"Connection to {name} is {data[name]['state']}")
return ", ".join(texts)
return ""
@cli.command("routing.bgp.peers")
@click.option(
"--name",
"names",
default=[],
multiple=True,
help="The name of the BGP peer to check. This can be specified multiple times",
)
@click.option(
"--regex",
"regex",
default=False,
is_flag=True,
help="Treat the specified names as regular expressions and try to find all matching peers. (Default: not set)",
)
@click.option(
"--single",
"single",
default=False,
is_flag=True,
help="If set the check expects the peer to exist",
)
@click.pass_context
def routing_bgp_peer(ctx, names, regex, single):
resource = RoutingBGPPeerResource(
cmd_options=ctx.obj,
names=names,
regex=regex,
single_peer=single,
)
check = nagiosplugin.Check(
resource,
RoutingBGPPeerSummary(),
)
if single:
if len(resource.peer_names) == 1:
check.add(
BooleanContext("disabled"),
RoutingBGPPeerState("state"),
nagiosplugin.ScalarContext("prefix_count"),
nagiosplugin.ScalarContext("uptime"),
nagiosplugin.ScalarContext("updates_received"),
nagiosplugin.ScalarContext("updates_sent"),
)
else:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
f"Only one matching peer is allowed. Found {len(resource.peer_names)}"
)
)
else:
for name in resource.peer_names:
check.add(
BooleanContext(f"{name} disabled"),
RoutingBGPPeerState(f"{name} state"),
nagiosplugin.ScalarContext(f"{name} prefix_count"),
nagiosplugin.ScalarContext(f"{name} uptime"),
nagiosplugin.ScalarContext(f"{name} updates_received"),
nagiosplugin.ScalarContext(f"{name} updates_sent"),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,160 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from typing import Any, Dict, Optional
import click
import librouteros
import librouteros.query
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class RoutingOSPFNeighborResource(RouterOSCheckResource):
name = "OSPF NEIGHBOR"
def __init__(
self,
cmd_options: Dict[str, Any],
instance: str,
router_id: str,
area: Optional[str] = None
):
super().__init__(cmd_options=cmd_options)
self.area = area
self.instance = instance
self.router_id = router_id
self.state: Optional[str] = None
self._routeros_metric_values = [
{"name": "adjacency", "type": self.parse_routeros_time_duration, "min": 0, "uom": "s"},
{"name": "state", "type": None},
{"name": "state-changes", "dst": "state_changes", "type": int},
]
if self.routeros_version < RouterOSVersion("7"):
self._routeros_metric_values += [
{"name": "priority", "type": int},
{"name": "ls-retransmits", "dst": "ls_retransmits", "type": int},
{"name": "ls-requests", "dst": "ls_requests", "type": int},
{"name": "db-summaries", "dst": "db_summaries", "type": int},
]
def probe(self):
# ToDo: Only available in v7.x
key_area = librouteros.query.Key("area")
key_instance = librouteros.query.Key("instance")
key_router_id = librouteros.query.Key("router-id")
logger.info("Fetching data ...")
select_keys = [
key_instance,
key_router_id,
] + self.get_routeros_select_keys()
if self.routeros_version >= RouterOSVersion("7"):
select_keys.append(key_area)
where = [
key_instance == self.instance,
key_router_id == self.router_id,
]
if self.area is not None:
if self.routeros_version >= RouterOSVersion("7"):
where.append(key_area == self.area)
else:
logger.warning("The area selector is only available on RouterOS 7.x")
call = self.api.path(
"/routing/ospf/neighbor"
).select(
*select_keys
).where(
*where
)
results = tuple(call)
if len(results) == 0:
return nagiosplugin.Metric(
name="state",
value=None
)
result = results[0]
return self.get_routeros_metric_item(result)
class RoutingOSPFNeighborState(BooleanContext):
def evaluate(self, metric, resource: RoutingOSPFNeighborResource):
if metric.value is None:
if resource.area is None:
hint = f"Neighbor for instance '{resource.instance}' and router-id '{resource.router_id}' not found"
else:
hint = (
f"Neighbor for area '{resource.area}', instance '{resource.instance}' and "
f"router-id '{resource.router_id}' not found"
)
return nagiosplugin.Result(
state=nagiosplugin.state.Critical,
hint=hint
)
elif metric.value in ("Down",):
return self.result_cls(
state=nagiosplugin.state.Critical,
hint="Link to neighbor down"
)
elif metric.value in ("Full",):
return self.result_cls(
state=nagiosplugin.state.Ok,
hint="Communicating with neighbor"
)
else:
return self.result_cls(
state=nagiosplugin.state.Warn,
hint=f"Link to neighbor not fully up, state: {metric.value}"
)
@cli.command("routing.ospf.neighbors")
@click.option(
"--area",
help="The area the neighbor router belongs to (only supported on RouterOS v7.x",
)
@click.option(
"--instance",
required=True,
help="The name of the OSPF instance",
)
@click.option(
"--router-id",
required=True,
help="The ID of the neighbor router",
)
@click.pass_context
def routing_ospf_neighbors(ctx, area, instance, router_id):
"""Check the state of an OSPF neighbor"""
resource = RoutingOSPFNeighborResource(
cmd_options=ctx.obj,
area=area,
instance=instance,
router_id=router_id,
)
check = nagiosplugin.Check(
resource,
nagiosplugin.ScalarContext("priority"),
nagiosplugin.ScalarContext("adjacency"),
nagiosplugin.ScalarContext("state_changes"),
nagiosplugin.ScalarContext("ls_retransmits"),
nagiosplugin.ScalarContext("ls_requests"),
nagiosplugin.ScalarContext("db_summaries"),
RoutingOSPFNeighborState("state")
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,94 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2023)
# SPDX-License-Identifier: GPL-3.0-or-later
from datetime import datetime
from pprint import pformat
from typing import List
import click
import nagiosplugin
from ..cli import cli
from ..context import SimplePositiveFloatContext
from ..helper import logger
from ..resource import RouterOSCheckResource
class SystemClockResource(RouterOSCheckResource):
name = "CLOCK"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
):
super().__init__(cmd_options=cmd_options)
self._check = check
def probe(self):
api = self._connect_api()
logger.info("Fetching clock data ...")
call = api.path(
"/system/clock"
)
results = tuple(call)
result = results[0]
logger.debug(f"Extracted values {pformat(result)}")
device_datetime = self.parse_routeros_date_time(result["date"], result["time"])
device_timediff = datetime.now() - device_datetime
yield nagiosplugin.Metric(
name="time-diff",
value=device_timediff.total_seconds(),
uom="s",
)
class SystemClockSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
for result in results:
if result.metric and result.metric.name == "time-diff":
return f"Time diff is {result.metric.value:.2f}s"
return ""
@cli.command("system.clock")
@click.option(
"--warning",
help="Warning threshold for time diff in seconds",
type=float,
)
@click.option(
"--critical",
help="Critical threshold for time diff in seconds",
type=float,
)
@click.pass_context
@nagiosplugin.guarded
def system_clock(ctx, warning, critical):
"""This command reads the information from /system/clock to extract the required information."""
check = nagiosplugin.Check()
resource = SystemClockResource(
cmd_options=ctx.obj,
check=check,
)
check.add(
resource,
SimplePositiveFloatContext(
name="time-diff",
warning=warning,
critical=critical,
fmt_metric="Time diff is {valueunit}",
),
SystemClockSummary(),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,184 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from pprint import pformat
import re
from typing import Dict, List
import click
import librouteros
import librouteros.query
import nagiosplugin
from ..cli import cli
from ..helper import logger
from ..resource import RouterOSCheckResource
class SystemCpuResource(RouterOSCheckResource):
name = "CPU"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
warning_values: List[str],
critical_values: List[str],
use_regex: bool
):
super().__init__(cmd_options=cmd_options)
self._check = check
self.values: Dict[str, float] = {}
self.use_regex: bool = use_regex
self.warning_values: Dict[str, str] = {}
self.critical_values: Dict[str, str] = {}
self.warning_regex_values: Dict[re.Pattern, str] = {}
self.critical_regex_values: Dict[re.Pattern, str] = {}
if self.use_regex:
self.warning_regex_values = self.prepare_regex_thresholds(warning_values)
self.critical_regex_values = self.prepare_regex_thresholds(critical_values)
else:
self.warning_values = self.prepare_thresholds(warning_values)
self.critical_values = self.prepare_thresholds(critical_values)
def probe(self):
key_cpu_load = librouteros.query.Key("cpu-load")
api = self._connect_api()
logger.info("Fetching global data ...")
call = api.path(
"/system/resource"
).select(
key_cpu_load
)
results = tuple(call)
result = results[0]
logger.debug(f"Extracted values {pformat(result)}")
yield nagiosplugin.Metric(
name="cpu-load",
value=result["cpu-load"],
uom="%",
min=0,
max=100,
)
logger.info("Fetching cpu data ...")
call = api.path(
"/system/resource/cpu"
)
results = tuple(call)
logger.debug(f"Extracted values {pformat(results)}")
for cpu in results:
name = cpu["cpu"]
for value_name_suffix in ("load", "irq", "disk"):
value_name = f"{name}-{value_name_suffix}"
if self.use_regex:
for regex, threshold in self.warning_regex_values.items():
if regex.match(value_name):
self.warning_values[value_name] = threshold
break
for regex, threshold in self.critical_regex_values.items():
if regex.match(value_name):
self.critical_values[value_name] = threshold
break
self.values[value_name] = float(cpu[value_name_suffix])
for name, value in self.values.items():
self._check.add(nagiosplugin.ScalarContext(
name=name,
warning=self.warning_values.get(name),
critical=self.critical_values.get(name),
))
yield nagiosplugin.Metric(
name=name,
value=value,
uom="%",
min=0,
max=100,
)
class SystemCpuSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
for result in results:
if result.metric and result.metric.name == "cpu-load":
return f"System load is {result.metric.value}%"
return ""
@cli.command("system.cpu")
@click.option(
"--load-warning",
help="Warning threshold for global cpu load",
)
@click.option(
"--load-critical",
help="Critical threshold for global cpu load",
)
@click.option(
"warning_values",
"--value-warning",
multiple=True,
help=(
"Set a warning threshold for a value. "
"Example: If cpu1-load should be in the range of 10% to 20% you can set "
"--value-warning cpu-load:10:200 "
"Can be specified multiple times"
)
)
@click.option(
"critical_values",
"--value-critical",
multiple=True,
help=(
"Set a critical threshold for a value. "
"Example: If cpu1-load should be in the range of 10% to 20% you can set "
"--value-critical cpu-load:10:200 "
"Can be specified multiple times"
)
)
@click.option(
"--regex",
"use_regex",
default=False,
is_flag=True,
help=(
"Treat values from --value-warning and --value-critical as regex to find all matching values."
"Example: Warn if cpu load of at least one CPU is above 80%: --value-warning 'cpu\\d+-load:80'"
)
)
@click.pass_context
@nagiosplugin.guarded
def system_cpu(ctx, load_warning, load_critical, warning_values, critical_values, use_regex):
"""This command reads the information from /system/resource and /system/resource/cpu to extract the required
information.
"""
check = nagiosplugin.Check()
resource = SystemCpuResource(
cmd_options=ctx.obj,
check=check,
warning_values=warning_values,
critical_values=critical_values,
use_regex=use_regex,
)
check.add(
resource,
nagiosplugin.ScalarContext(
name="cpu-load",
warning=load_warning,
critical=load_critical,
),
SystemCpuSummary(),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,184 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Dict, List, Set
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemFanResource(RouterOSCheckResource):
name = "FAN"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
warning_values: List[str],
critical_values: List[str],
use_regex: bool
):
super().__init__(cmd_options=cmd_options)
self._check = check
self.fan_names: Set[str] = set()
self.fan_values: Dict[str, int] = {}
self.use_regex: bool = use_regex
self.warning_values: Dict[str, str] = {}
self.critical_values: Dict[str, str] = {}
self.warning_regex_values: Dict[re.Pattern, str] = {}
self.critical_regex_values: Dict[re.Pattern, str] = {}
if self.use_regex:
self.warning_regex_values = self.prepare_regex_thresholds(warning_values)
self.critical_regex_values = self.prepare_regex_thresholds(critical_values)
else:
self.warning_values = self.prepare_thresholds(warning_values)
self.critical_values = self.prepare_thresholds(critical_values)
self._fetch_data()
def _fetch_data(self):
logger.info("Fetching data ...")
call = self.api.path(
"/system/health"
)
api_results = tuple(call)
if self.routeros_version < RouterOSVersion("7"):
api_result_items = []
for name, value in api_results[0].items():
api_result_items.append({
"name": name,
"value": value,
})
else:
api_result_items = api_results
regex_name = re.compile(r"(?P<name>fan\d+)-(?P<type>(speed))")
for item in api_result_items:
m = regex_name.match(item["name"])
if not m:
continue
if self.use_regex:
for regex, threshold in self.warning_regex_values.items():
if regex.match(item["name"]):
self.warning_values[item["name"]] = threshold
break
for regex, threshold in self.critical_regex_values.items():
if regex.match(item["name"]):
self.critical_values[item["name"]] = threshold
break
if m.group("type") in ("speed",):
self.fan_values[item["name"]] = int(item["value"])
self.fan_names.add(m.group("name"))
def probe(self):
for name, value in self.fan_values.items():
self._check.add(nagiosplugin.ScalarContext(
name=name,
warning=self.warning_values.get(name),
critical=self.critical_values.get(name),
))
yield nagiosplugin.Metric(
name=name,
value=value,
)
@cli.command("system.fan")
@click.option(
"warning_values",
"--value-warning",
multiple=True,
help=(
"Set a warning threshold for a value. "
"Example: If fan1-speed should be in the range of 4000 to 5000 you can set "
"--value-warning fan1-speed:4000:5000 "
"Can be specified multiple times"
)
)
@click.option(
"critical_values",
"--value-critical",
multiple=True,
help=(
"Set a critical threshold for a value. "
"Example: If fan1-speed should be in the range of 4000 to 5000 you can set "
"--value-critical fan1-speed:4000:5000 "
"Can be specified multiple times"
)
)
@click.option(
"--regex",
"use_regex",
default=False,
is_flag=True,
help="Treat values from --value-warning and --value-critical as regex to find all matching values"
)
@click.option(
"--no-fan-ok",
is_flag=True,
default=False,
help="The check will be unknown if no fan is available. Provide this option to ignore this."
)
@click.option(
"expected_names",
"--expect-fan",
multiple=True,
default=[],
help="Name of the fan to expect. Can be specified multiple times."
)
@click.pass_context
@nagiosplugin.guarded
def system_fan(ctx, warning_values, critical_values, use_regex, no_fan_ok, expected_names):
check = nagiosplugin.Check()
fan_resource = SystemFanResource(
cmd_options=ctx.obj,
check=check,
warning_values=warning_values,
critical_values=critical_values,
use_regex=use_regex,
)
check.add(fan_resource)
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Ok,
hint=f"Looks like all fans work as expected: {', '.join(sorted(fan_resource.fan_names))}"
)
)
if len(fan_resource.fan_names) == 0 and not no_fan_ok:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
hint="No FANs found"
)
)
if len(expected_names) > 0:
missing_names = []
for name in expected_names:
if name not in fan_resource.fan_names:
missing_names.append(name)
if len(missing_names) > 0:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Warn,
hint=f"Expected FAN(s) not found: {', '.join(missing_names)}"
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,149 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from datetime import datetime
from typing import List, Optional
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger
from ..resource import RouterOSCheckResource
class SystemLicenseResource(RouterOSCheckResource):
name = "License"
def __init__(self, cmd_options):
super().__init__(cmd_options=cmd_options)
def days_left(value):
time_delta = self.parse_routeros_datetime(value) - datetime.now()
return int(time_delta.total_seconds()) / 60 / 60 / 24
logger.info("Fetching information ...")
call = self.api.path(
"/system/resource"
)
result = tuple(call)[0]
self.has_renewal = result["board-name"].lower() == "chr"
self.deadline_datetime: Optional[datetime] = None
self.next_renewal_datetime: Optional[datetime] = None
self._routeros_metric_values = []
if self.has_renewal:
self._routeros_metric_values += [
{"name": "level", "type": None},
{"name": "deadline-at", "dst": "deadline-in", "type": days_left, "missing_ok": True},
{"name": "next-renewal-at", "dst": "next-renewal-in", "type": days_left, "missing_ok": True},
]
else:
self._routeros_metric_values += [
{"name": "nlevel", "dst": "level", "type": None},
]
def probe(self):
logger.info("Fetching data ...")
call = self.api.path(
"/system/license"
)
result = tuple(call)[0]
if self.has_renewal:
if "deadline-at" in result:
self.deadline_datetime = self.parse_routeros_datetime(result["deadline-at"])
if "next-renewal-at" in result:
self.next_renewal_datetime = self.parse_routeros_datetime(result["next-renewal-at"])
return self.get_routeros_metric_item(result)
class SystemLicenseRenewSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
hints = []
resource: Optional[SystemLicenseResource] = None
for result in results:
if result.resource:
resource = result.resource
if result.hint:
hints.append(result.hint)
if resource and resource.has_renewal:
if resource.next_renewal_datetime:
time_delta = resource.next_renewal_datetime - datetime.now()
hints.append(f"Next renewal in {time_delta.days} day(s) ({resource.next_renewal_datetime})")
if resource.deadline_datetime:
time_delta = resource.deadline_datetime - datetime.now()
hints.append(f"Deadline in {time_delta.days} day(s) ({resource.deadline_datetime})")
return ", ".join(hints)
class SystemLicenseLevelContext(nagiosplugin.Context):
def __init__(self, *args, levels=None, **kwargs):
self._levels = levels
super(SystemLicenseLevelContext, self).__init__(*args, **kwargs)
def evaluate(self, metric, resource):
if self._levels is None or len(self._levels) == 0 or metric.value in self._levels:
return nagiosplugin.Result(
nagiosplugin.Ok,
hint=f"License level is '{metric.value}'"
)
return nagiosplugin.Result(
nagiosplugin.Warn,
hint=f"License level '{metric.value}' not in list with allowed levels: {', '.join(self._levels)}"
)
@cli.command("system.license")
@click.option("--deadline-warning", default="28:", help="Number of days until deadline is reached (Default: '28:')")
@click.option("--deadline-critical", default="14:", help="Number of days until deadline is reached (Default: '14:')")
@click.option(
"--next-renewal-warning",
default=None,
help="Number of days until renewal is done (Default: None, Example: '-14:')"
)
@click.option("--next-renewal-critical", default=None, help="Number of days until renewal is done (Default: None)")
@click.option(
"--level",
"levels",
default=None,
multiple=True,
help="Allowed license levels. Repeat to use multiple values."
)
@click.pass_context
@nagiosplugin.guarded
def system_license(ctx, deadline_warning, deadline_critical, next_renewal_warning, next_renewal_critical, levels):
resource = SystemLicenseResource(
cmd_options=ctx.obj,
)
check = nagiosplugin.Check(resource)
if resource.has_renewal:
check.add(
nagiosplugin.ScalarContext(
name="deadline-in",
warning=deadline_warning,
critical=deadline_critical,
),
nagiosplugin.ScalarContext(
name="next-renewal-in",
warning=next_renewal_warning,
critical=next_renewal_critical,
),
SystemLicenseRenewSummary(),
)
check.add(
SystemLicenseLevelContext(
name="level",
levels=levels,
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,121 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from typing import List
import click
import librouteros
import librouteros.query
import nagiosplugin
from ..cli import cli
from ..context import ScalarPercentContext
from ..helper import logger
from ..resource import RouterOSCheckResource
class SystemMemoryResource(RouterOSCheckResource):
name = "MEMORY"
def __init__(self, cmd_options):
super().__init__(cmd_options=cmd_options)
self.memory_total = None
def probe(self):
api = self._connect_api()
logger.info("Fetching data ...")
call = api.path(
"/system/resource"
).select(
librouteros.query.Key("free-memory"),
librouteros.query.Key("total-memory")
)
results = tuple(call)
result = results[0]
memory_free = result["free-memory"]
self.memory_total = result["total-memory"]
yield nagiosplugin.Metric(
name="free",
value=memory_free,
uom="B",
min=0,
max=self.memory_total,
)
yield nagiosplugin.Metric(
name="used",
value=self.memory_total - memory_free,
uom="B",
min=0,
max=self.memory_total,
)
class SystemMemorySummary(nagiosplugin.summary.Summary):
def __init__(self, result_names: List[str]):
super().__init__()
self._result_names = result_names
def ok(self, results):
msgs = []
for result_name in self._result_names:
msgs.append(str(results[result_name]))
return " ".join(msgs)
@cli.command("system.memory")
@click.option(
"--used/--free",
is_flag=True,
default=True,
help="Set if used or free memory should be checked. (Default: used)",
)
@click.option(
"--warning",
required=True,
help="Warning threshold in % or MB. Example (20% oder 20 = 20MB)",
)
@click.option(
"--critical",
required=True,
help="Critical threshold in % or MB. Example (20% oder 20 = 20MB)",
)
@click.pass_context
@nagiosplugin.guarded
def system_memory(ctx, used, warning, critical):
check = nagiosplugin.Check(
SystemMemoryResource(
cmd_options=ctx.obj,
)
)
if used:
check.add(nagiosplugin.ScalarContext(
name="free",
))
check.add(ScalarPercentContext(
name="used",
total_name="memory_total",
warning=warning,
critical=critical
))
else:
check.add(ScalarPercentContext(
name="free",
total_name="memory_total",
warning=f"{warning}:",
critical=f"{critical}:"
))
check.add(nagiosplugin.ScalarContext(
name="used",
))
check.add(SystemMemorySummary(
result_names=["used"] if used else ["free"]
))
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,261 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2023)
# SPDX-License-Identifier: GPL-3.0-or-later
from pprint import pformat
from typing import List, Optional
import click
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext, PerfdataScalarContext
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemNtpClientResource(RouterOSCheckResource):
name = "NTP"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
expected_servers: Optional[List[str]] = None,
last_update_before_warning: Optional[float] = None,
last_update_before_critical: Optional[float] = None,
offset_warning: Optional[float] = None,
offset_critical: Optional[float] = None,
stratum_warning: Optional[int] = None,
stratum_critical: Optional[int] = None,
):
super().__init__(cmd_options=cmd_options)
self._check = check
self._expected_servers = expected_servers
self._offset_warning = offset_warning
self._offset_critical = offset_critical
self._last_update_before_warning = last_update_before_warning
self._last_update_before_critical = last_update_before_critical
self._stratum_warning = stratum_warning
self._stratum_critical = stratum_critical
def probe(self):
logger.info("Fetching ntp client data ...")
call = self.api.path(
"/system/ntp/client"
)
results = tuple(call)
result = results[0]
logger.debug(f"Extracted values {pformat(result)}")
self._routeros_metric_values += [
{"name": "enabled", "type": bool},
]
if not result["enabled"]:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.Critical,
"NTP Client not enabled"
)
)
return self.get_routeros_metric_item(result)
#: Current address of the server the devices gets its time from
current_server_address: Optional[str] = None
if self.routeros_version < RouterOSVersion("7"):
metric_values = [
{"name": "last-adjustment", "dst": "offset", "type": self.parse_routeros_time_duration, "uom": "s"},
{"name": "last-update-before", "type": self.parse_routeros_time_duration, "uom": "s"},
]
metric_value_names_not_found = []
for metric_value in metric_values:
if metric_value["name"] not in result:
metric_value_names_not_found.append(metric_value["name"])
if len(metric_value_names_not_found) > 0:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Critical,
(
f"Looks like NTP client not running. "
f"Unable to find values for {', '.join(metric_value_names_not_found)}"
)
)
)
else:
self._routeros_metric_values += metric_values
self._check.add(
nagiosplugin.ScalarContext(
name="last-update-before",
warning=self._last_update_before_warning,
critical=self._last_update_before_critical,
),
nagiosplugin.ScalarContext(
name="offset",
warning=f"-{self._offset_warning}:{self._offset_warning}" if self._offset_warning else None,
critical=f"-{self._offset_critical}:{self._offset_critical}" if self._offset_critical else None,
),
)
if self._expected_servers:
current_server_address = result.get("last-update-from")
if current_server_address is None:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
"Unable to get address of server (last-update-from)"
)
)
else:
self._routeros_metric_values += [
{"name": "freq-drift", "type": float},
{"name": "synced-stratum", "dst": "stratum", "type": int},
{"name": "system-offset", "dst": "offset", "type": lambda v: float(v) / 1000, "uom": "s"},
]
self._check.add(
PerfdataScalarContext(
name="freq-drift",
),
nagiosplugin.ScalarContext(
name="offset",
warning=f"-{self._offset_warning}:{self._offset_warning}" if self._offset_warning else None,
critical=f"-{self._offset_critical}:{self._offset_critical}" if self._offset_critical else None,
),
nagiosplugin.ScalarContext(
name="stratum",
warning=self._stratum_warning,
critical=self._stratum_critical,
),
)
if self._expected_servers:
current_server_address = result.get("synced-server")
if current_server_address is None:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
"Unable to get address of server (synced-server)"
)
)
if current_server_address and current_server_address not in self._expected_servers:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Warn,
(
f"Server '{current_server_address}' not in list of expected servers: "
f"{', '.join(self._expected_servers)}"
)
)
)
return self.get_routeros_metric_item(result)
class SystemNtpClientSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
messages: List[str] = []
for result in results:
if result.metric and result.metric.name == "stratum":
messages.append(f"Stratum is {result.metric.value}")
if result.metric and result.metric.name == "offset":
messages.append(f"Offset is {result.metric.value:.2f}s")
return ", ".join(messages)
@cli.command("system.ntp.client")
@click.option(
"--last-update-before-warning",
help=(
"The time from the NTP server should at least be synchronised in the last N seconds. "
"Default: 30 minutes = 1800 seconds "
"Note: This is only available on RouterOS 6.x"
),
type=float,
default=60 * 30,
)
@click.option(
"--last-update-before-critical",
help=(
"The time from the NTP server should at least be synchronised in the last N seconds. "
"Default: 60 minutes = 3600 seconds "
"Note: This is only available on RouterOS 6.x"
),
type=float,
default=60 * 60,
)
@click.option(
"--offset-warning",
help="Warning threshold for offset from the NTP server in seconds",
type=float,
default=10.0,
)
@click.option(
"--offset-critical",
help="Critical threshold for offset from the NTP server in seconds",
type=float,
default=30.0,
)
@click.option(
"--stratum-warning",
help=(
"Check the stratum and report warning state if it does not match. "
"Note: The stratum is only available on RouterOS 7.x"
),
type=int,
)
@click.option(
"--stratum-critical",
help=(
"Check the stratum and report critical state if it does not match. "
"Note: The stratum is only available on RouterOS 7.x"
),
type=int,
)
@click.option(
"expected_servers",
"--expected-server",
multiple=True,
help=(
"Address of the ntp server we expect to get our time from. "
"This must be the IPv4/IPv6 address and not the FQDN. "
"It can be provided multiple times. "
"Example: --expected-server 10.0.0.1 --expected-server 192.168.1.1"
),
)
@click.pass_context
@nagiosplugin.guarded
def system_clock(ctx, last_update_before_warning, last_update_before_critical, offset_warning, offset_critical,
stratum_warning, stratum_critical, expected_servers):
"""
This command reads the information from /system/ntp/client to extract the required information.
It checks if is the NTP client enabled, if the NTP server is reachable and if is the offset in the threshold.
"""
check = nagiosplugin.Check()
resource = SystemNtpClientResource(
cmd_options=ctx.obj,
check=check,
last_update_before_warning=last_update_before_warning,
last_update_before_critical=last_update_before_critical,
offset_warning=offset_warning,
offset_critical=offset_critical,
stratum_warning=stratum_warning,
stratum_critical=stratum_critical,
expected_servers=expected_servers,
)
check.add(
resource,
SystemNtpClientSummary(),
BooleanContext(
name="enabled",
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,81 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemPowerResource(RouterOSCheckResource):
name = "Power"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
):
super().__init__(cmd_options=cmd_options)
self._check = check
self._routeros_metric_values = [
{"name": "power-consumption", "type": float},
]
def probe(self):
logger.info("Fetching data ...")
call = self.api.path(
"/system/health"
)
if self.routeros_version < RouterOSVersion("7"):
call = call.select(
*self.get_routeros_select_keys()
)
api_result_items = tuple(call)
api_result_items = self._convert_v6_list_to_v7(api_result_items)
else:
api_result_items = tuple(call)
result_metrics = self.get_routeros_metrics(api_result_items)
if len(result_metrics) == 0:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
hint="Power consumption not found."
)
)
return result_metrics
@cli.command("system.power")
@click.option(
"--warning",
help="Warning threshold for total power consumption",
)
@click.option(
"--critical",
help="Critical threshold for total power consumption",
)
@click.pass_context
@nagiosplugin.guarded
def system_power(ctx, warning, critical):
"""Check the total power consumption of a device. This might not be available on all devices"""
check = nagiosplugin.Check()
check.add(
SystemPowerResource(
cmd_options=ctx.obj,
check=check,
),
nagiosplugin.ScalarContext(
"power-consumption",
warning=warning,
critical=critical,
fmt_metric="Power consumption {value}W",
),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,182 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Dict, List, Set
import click
import nagiosplugin
from ..cli import cli
from ..context import BooleanContext
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemPsuResource(RouterOSCheckResource):
name = "PSU"
def __init__(
self, cmd_options, check: nagiosplugin.Check, warning_values: List[str], critical_values: List[str],
no_psu_ok: bool,
):
super().__init__(cmd_options=cmd_options)
self._check = check
self.psu_names: Set[str] = set()
self.psu_states: Dict[str, str] = {}
self.psu_values: Dict[str, float] = {}
self.warning_values = self._prepare_thresholds(warning_values)
self.critical_values = self._prepare_thresholds(critical_values)
self.no_psu_ok = no_psu_ok
self._fetch_data()
def _fetch_data(self):
logger.info("Fetching data ...")
call = self.api.path(
"/system/health"
)
api_results = tuple(call)
if self.routeros_version < RouterOSVersion("7"):
api_result_items = []
for name, value in api_results[0].items():
api_result_items.append({
"name": name,
"value": value,
})
else:
api_result_items = api_results
regex_name = re.compile(r"(?P<name>psu\d+)-(?P<type>(state|current|voltage))")
for api_result_item in api_result_items:
m = regex_name.match(api_result_item["name"])
if not m:
continue
self.psu_names.add(m.group("name"))
if m.group("type") in ("current", "voltage"):
self.psu_values[api_result_item["name"]] = float(api_result_item["value"])
if m.group("type") == "state":
self.psu_states[m.group("name")] = api_result_item["value"]
if not self.no_psu_ok and len(self.psu_values) == 0 and len(self.psu_states) == 0:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
hint="No PSU values and stats found"
)
)
@staticmethod
def _prepare_thresholds(thresholds: List[str]):
results = {}
for threshold in thresholds:
name, _, value = threshold.partition(":")
if value is None or value == "":
logger.warning(f"Unable to parse threshold for {name}")
results[name] = value
return results
def probe(self):
for name, value in self.psu_values.items():
self._check.add(nagiosplugin.ScalarContext(
name=name,
warning=self.warning_values.get(name),
critical=self.critical_values.get(name),
))
yield nagiosplugin.Metric(
name=name,
value=value,
)
for name, value in self.psu_states.items():
value_name = f"{name}-state-ok"
self._check.add(
BooleanContext(value_name)
)
if value != "ok":
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Warn,
hint=f"PSU: {name} state {value}"
)
)
yield nagiosplugin.Metric(
name=value_name,
value=(value == "ok")
)
@cli.command("system.psu")
@click.option(
"warning_values",
"--value-warning",
multiple=True,
help=(
"Set a warning threshold for a value. "
"Example: If psu1-voltage should be in the range of 12-12.1V you can set --value-warning psu1-voltage:12:12.1 "
"Can be specified multiple times"
)
)
@click.option(
"critical_values",
"--value-critical",
multiple=True,
help=(
"Set a critical threshold for a value. "
"Example: If psu1-voltage should be in the range of 12-12.1V you can set --value-critical psu1-voltage:12:12.1 "
"Can be specified multiple times"
)
)
@click.option(
"--no-psu-ok",
is_flag=True,
default=False,
help="The check will be unknown if not at least one psu stat or value is available. Set this to ignore this."
)
@click.option(
"expected_psu_names",
"--expect-psu",
multiple=True,
default=[],
help="Name of the PSU to expect at least one value or state. Can be specified multiple times."
)
@click.pass_context
@nagiosplugin.guarded
def system_psu(ctx, warning_values, critical_values, no_psu_ok, expected_psu_names):
"""Check the power supply units (PSU)"""
check = nagiosplugin.Check()
psu_resource = SystemPsuResource(
cmd_options=ctx.obj,
check=check,
warning_values=warning_values,
critical_values=critical_values,
no_psu_ok=no_psu_ok,
)
check.add(psu_resource)
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Ok,
hint=f"Looks like all PSU work like expected: {', '.join(psu_resource.psu_names)}"
)
)
if len(expected_psu_names) > 0:
missing_psu_names = []
for psu_name in expected_psu_names:
if psu_name not in psu_resource.psu_names:
missing_psu_names.append(psu_name)
if len(missing_psu_names) > 0:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Warn,
hint=f"Expected PSU(s) not found: {', '.join(missing_psu_names)}"
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,184 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Dict, List, Set
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemTemperatureResource(RouterOSCheckResource):
name = "Temperature"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
warning_values: List[str],
critical_values: List[str],
use_regex: bool
):
super().__init__(cmd_options=cmd_options)
self._check = check
self.names: Set[str] = set()
self.values: Dict[str, float] = {}
self.use_regex: bool = use_regex
self.warning_values: Dict[str, str] = {}
self.critical_values: Dict[str, str] = {}
self.warning_regex_values: Dict[re.Pattern, str] = {}
self.critical_regex_values: Dict[re.Pattern, str] = {}
if self.use_regex:
self.warning_regex_values = self.prepare_regex_thresholds(warning_values)
self.critical_regex_values = self.prepare_regex_thresholds(critical_values)
else:
self.warning_values = self.prepare_thresholds(warning_values)
self.critical_values = self.prepare_thresholds(critical_values)
self._fetch_data()
def _fetch_data(self):
logger.info("Fetching data ...")
call = self.api.path(
"/system/health"
)
api_result_items = tuple(call)
if self.routeros_version < RouterOSVersion("7"):
api_result_items = self._convert_v6_list_to_v7(api_result_items)
regex_name = re.compile(r".*temperature.*")
for item in api_result_items:
m = regex_name.match(item["name"])
if not m:
continue
if self.use_regex:
for regex, threshold in self.warning_regex_values.items():
if regex.match(item["name"]):
self.warning_values[item["name"]] = threshold
break
for regex, threshold in self.critical_regex_values.items():
if regex.match(item["name"]):
self.critical_values[item["name"]] = threshold
break
self.names.add(item["name"])
self.values[item["name"]] = float(item["value"])
def probe(self):
for name, value in self.values.items():
self._check.add(nagiosplugin.ScalarContext(
name=name,
warning=self.warning_values.get(name),
critical=self.critical_values.get(name),
))
yield nagiosplugin.Metric(
name=name,
value=value,
)
@cli.command("system.temperature")
@click.option(
"warning_values",
"--value-warning",
multiple=True,
help=(
"Set a warning threshold for a value. "
"Example: If cpu-temperature should be in the range of 40 and 60°C you can set "
"--value-warning cpu-temperature:40:60 "
"If cpu-temperature should not be higher than 50.5°C you can set "
"--value-warning cpu-temperature:50.5 "
"Can be specified multiple times"
)
)
@click.option(
"critical_values",
"--value-critical",
multiple=True,
help=(
"Set a critical threshold for a value. "
"Example: If cpu-temperature should be in the range of 40 and 60°C you can set "
"--value-critical cpu-temperature:40:60 "
"If cpu-temperature should not be higher than 50.5°C you can set "
"--value-critical cpu-temperature:50.5 "
"Can be specified multiple times"
)
)
@click.option(
"--regex",
"use_regex",
default=False,
is_flag=True,
help="Treat values from --value-warning and --value-critical as regex to find all matching values"
)
@click.option(
"--no-temperature-ok",
is_flag=True,
default=False,
help="The check will be unknown if no temperature is available. Provide this option to ignore this."
)
@click.option(
"expected_names",
"--expect-temperature",
multiple=True,
default=[],
help="Name of the temperature to expect. Can be specified multiple times. Example: board-temperature1"
)
@click.pass_context
@nagiosplugin.guarded
def system_temperature(ctx, warning_values, critical_values, use_regex, no_temperature_ok, expected_names):
"""This command reads the information from /system/health and extracts all values containing the
word temperature in its name. Like 'board-temperature', 'board-temperature1', 'cpu-temperature', ...
Be aware that not all devices return the same values.
"""
check = nagiosplugin.Check()
temperature_resource = SystemTemperatureResource(
cmd_options=ctx.obj,
check=check,
warning_values=warning_values,
critical_values=critical_values,
use_regex=use_regex,
)
check.add(temperature_resource)
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Ok,
hint=f"Looks like all temperatures are OK: {', '.join(sorted(temperature_resource.names))}"
)
)
if len(temperature_resource.names) == 0 and not no_temperature_ok:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Unknown,
hint="No temperatures found"
)
)
if len(expected_names) > 0:
missing_names = []
for name in expected_names:
if name not in temperature_resource.names:
missing_names.append(name)
if len(missing_names) > 0:
check.results.add(
nagiosplugin.Result(
nagiosplugin.state.Warn,
hint=f"Expected temperature(s) not found: {', '.join(missing_names)}"
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,175 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from typing import List, Optional
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger, RouterOSVersion
from ..resource import RouterOSCheckResource
class SystemUpdateResource(RouterOSCheckResource):
name = "Update"
def __init__(
self,
cmd_options,
check: nagiosplugin.Check,
check_for_update: bool = False,
latest_version: Optional[str] = None,
):
super().__init__(cmd_options=cmd_options)
self._check = check
self._check_for_update = check_for_update
self._installed_version = None
self._latest_version = None
if latest_version:
self._latest_version = RouterOSVersion(latest_version)
def probe(self):
logger.info("Fetching data ...")
if self._check_for_update:
logger.debug("Run command to check for updates ...")
call = self.api(
"/system/package/update/check-for-updates"
)
logger.debug("Waiting that update command finished")
# Wait until command has finished
tuple(call)
call = self.api.path(
"/system/package/update"
)
result = tuple(call)[0]
self._routeros_metric_values = [
{"name": "channel", "type": None},
]
installed_version = result.get("installed-version")
if installed_version:
self._installed_version = RouterOSVersion(installed_version)
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.Ok,
hint=f"Installed version: {self._installed_version}"
)
)
else:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.Warn,
hint="Unable to get installed version"
)
)
latest_version = result.get("latest-version")
if self._latest_version is None and latest_version:
self._latest_version = RouterOSVersion(latest_version)
if self._installed_version and self._latest_version:
if self._installed_version < self._latest_version:
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.Critical,
hint=(
f"Update version '{self._latest_version}' available. "
f"Version installed '{self._installed_version}'"
)
)
)
status = result.get("status")
if isinstance(status, str) and "error" in status.lower():
self._check.results.add(
nagiosplugin.Result(
nagiosplugin.Critical,
hint=f"Looks like there was an error: '{status}'"
)
)
return self.get_routeros_metric_item(result)
class SystemUpdateChannelContext(nagiosplugin.Context):
def __init__(self, *args, channels: Optional[List[str]] = None, **kwargs):
super(SystemUpdateChannelContext, self).__init__(*args, **kwargs)
self._channels = channels
def evaluate(self, metric, resource):
if self._channels is None or len(self._channels) == 0 or metric.value in self._channels:
return nagiosplugin.Result(
nagiosplugin.Ok,
hint=f"Update channel is '{metric.value}'"
)
return nagiosplugin.Result(
nagiosplugin.Warn,
hint=f"Update channel '{metric.value}' not in list with allowed channels: {', '.join(self._channels)}"
)
class SystemUpdateSummary(nagiosplugin.Summary):
def ok(self, results: List[nagiosplugin.Result]):
messages = []
for result in results:
messages.append(result.hint)
if len(messages) > 0:
return ", ".join(messages)
return "Looks good"
@cli.command("system.update")
@click.option(
"--channel",
"channels",
default=None,
multiple=True,
help="Allowed update channel. Repeat to use multiple values."
)
@click.option(
"--latest-version",
"latest_version",
default=None,
help=(
"Set a version that should at least be installed. "
"Use this if the update server is not available or if you want check with your own update policy."
)
)
@click.option(
"--check-for-update",
"check_for_update",
is_flag=True,
default=False,
help=(
"Actively check for updates. "
"This will run the command /system/package/update/check-for-updates . "
"If you don't whant to use this feature you have to schedule a task to look for updates."
)
)
@click.pass_context
@nagiosplugin.guarded
def system_update(ctx, channels, latest_version, check_for_update):
check = nagiosplugin.Check()
check.add(
SystemUpdateResource(
cmd_options=ctx.obj,
check=check,
check_for_update=check_for_update,
latest_version=latest_version,
),
SystemUpdateChannelContext(
name="channel",
channels=channels,
),
SystemUpdateSummary(),
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,54 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import click
import librouteros
import librouteros.query
import nagiosplugin
from ..cli import cli
from ..helper import logger
from ..resource import RouterOSCheckResource
class SystemUptimeResource(RouterOSCheckResource):
name = "UPTIME"
def __init__(self, cmd_options):
super().__init__(cmd_options=cmd_options)
def probe(self):
api = self._connect_api()
logger.info("Fetching data ...")
call = api.path(
"/system/resource"
).select(
librouteros.query.Key("uptime"),
)
results = tuple(call)
result = results[0]
yield nagiosplugin.Metric(
name="uptime",
value=self.parse_routeros_time_duration(result["uptime"]),
uom="s",
min=0,
)
@cli.command("system.uptime")
@click.pass_context
@nagiosplugin.guarded
def system_uptime(ctx):
"""Get Uptime of a device"""
check = nagiosplugin.Check(
SystemUptimeResource(
cmd_options=ctx.obj,
),
nagiosplugin.ScalarContext(
name="uptime",
)
)
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,150 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Optional, Tuple
import click
import nagiosplugin
from ..cli import cli
from ..helper import logger
from ..resource import RouterOSCheckResource
class ToolPingCheck(RouterOSCheckResource):
name = "PING"
def __init__(self, cmd_options, address):
super().__init__(cmd_options=cmd_options)
self._address = address
self._max_packages = 1
def probe(self):
def strip_time(value) -> Tuple[Optional[int], Optional[str]]:
m = re.compile(r"^(?P<time>[0-9]+)(?P<uom>.*)$").match(value)
if m:
return int(m.group("time")), m.group("uom")
return None, None
params = {"address": self._address, "count": self._max_packages}
api = self._connect_api()
logger.info("Call /ping command ...")
call = api("/ping", **params)
results = tuple(call)
result = results[-1]
yield nagiosplugin.Metric(
name="packet_loss",
value=result["packet-loss"],
uom="%",
min=0,
max=100,
)
yield nagiosplugin.Metric(
name="sent",
value=result["sent"],
min=0,
max=self._max_packages,
)
yield nagiosplugin.Metric(
name="received",
value=result["received"],
min=0,
max=self._max_packages,
)
if result["received"] > 0:
yield nagiosplugin.Metric(
name="rtt_min",
value=strip_time(result["min-rtt"])[0],
min=0,
)
yield nagiosplugin.Metric(
name="rtt_max",
value=strip_time(result["max-rtt"])[0],
min=0,
)
yield nagiosplugin.Metric(
name="rtt_avg",
value=strip_time(result["avg-rtt"])[0],
min=0,
)
yield nagiosplugin.Metric(
name="size",
value=result["size"]
)
yield nagiosplugin.Metric(
name="ttl",
value=result["ttl"],
min=0,
max=255,
)
@cli.command("tool.ping")
@click.option(
"--address",
required=True,
help="Address of device to ping",
)
@click.option(
"--packet-loss-warning",
help="Warning threshold for packet loss",
)
@click.option(
"--packet-loss-critical",
help="Critical threshold for packet loss",
)
@click.option(
"--ttl-warning",
help="Warning threshold for the Time-To-Live (TTL) value",
)
@click.option(
"--ttl-critical",
help="Critical threshold for the Time-To-Live (TTL) value",
)
@click.pass_context
def tool_ping(ctx, address, packet_loss_warning, packet_loss_critical, ttl_warning, ttl_critical):
"""Execute a ping command on the device to check other devices"""
check = nagiosplugin.Check(
ToolPingCheck(
cmd_options=ctx.obj,
address=address
)
)
check.add(nagiosplugin.ScalarContext(
name="packet_loss",
warning=packet_loss_warning,
critical=packet_loss_critical
))
check.add(nagiosplugin.ScalarContext(
name="sent"
))
check.add(nagiosplugin.ScalarContext(
name="received"
))
check.add(nagiosplugin.ScalarContext(
name="rtt_avg"
))
check.add(nagiosplugin.ScalarContext(
name="rtt_min"
))
check.add(nagiosplugin.ScalarContext(
name="rtt_max"
))
check.add(nagiosplugin.ScalarContext(
name="size"
))
check.add(nagiosplugin.ScalarContext(
name="ttl",
warning=ttl_warning,
critical=ttl_critical
))
check.main(verbose=ctx.obj["verbose"])

View file

@ -0,0 +1,93 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from typing import Optional
import click
import nagiosplugin
@click.group()
@click.option(
"--host",
required=True,
help="Hostname or IP address of the device to connect to",
)
@click.option(
"--hostname",
help="Use this hostname to check the SSL certificates",
)
@click.option(
"--port",
default=None,
help="The port to use. Defaults to 8728 for non SSL connections and 8729 for SSL connections",
)
@click.option(
"--username",
required=True,
help="The username of the monitoring user. Do NOT use a user with admin privileges",
)
@click.option(
"--password",
required=True,
help="The password of the monitoring user",
)
@click.option(
"--routeros-version",
default="auto",
help=(
"Version of RouterOS running on the device. "
"The value 'auto' is special and if set the check will try to detect the version automatically. "
"The 'auto' option is recommended. "
"Examples: '6', '6.48.8', '7', '7.8', 'auto' "
"(Default: auto)"
)
)
@click.option(
"--ssl/--no-ssl",
"use_ssl",
default=True,
help="Use a SSL encrypted connections to communicate with the device",
)
@click.option(
"--ssl-cafile",
help="Custom CA file to check SSL certificates",
)
@click.option(
"--ssl-capath",
help="Custom path to look for CA files to check SSL certificates",
)
@click.option(
"--ssl-force-no-certificate",
is_flag=True,
default=False,
help="Force an anonymous connection",
)
@click.option(
"--ssl-verify/--no-ssl-verify",
default=True,
help="Verify the SSL certificate",
)
@click.option("--ssl-verify-hostname/--no-ssl-verify-hostname", default=True)
@click.option("-v", "--verbose", count=True)
@click.pass_context
def cli(ctx, host: str, hostname: Optional[str], port: int, username: str, password: str, routeros_version: str,
use_ssl: bool, ssl_cafile: Optional[str], ssl_capath: Optional[str], ssl_force_no_certificate: bool,
ssl_verify: bool, ssl_verify_hostname: bool, verbose: int):
ctx.ensure_object(dict)
ctx.obj["host"] = host
ctx.obj["hostname"] = hostname
ctx.obj["port"] = port
ctx.obj["username"] = username
ctx.obj["password"] = password
ctx.obj["routeros_version"] = routeros_version
ctx.obj["ssl"] = use_ssl
ctx.obj["ssl_cafile"] = ssl_cafile
ctx.obj["ssl_capath"] = ssl_capath
ctx.obj["ssl_force_no_certificate"] = ssl_force_no_certificate
ctx.obj["ssl_verify"] = ssl_verify
ctx.obj["ssl_verify_hostname"] = ssl_verify_hostname
ctx.obj["verbose"] = verbose
runtime = nagiosplugin.Runtime()
runtime.verbose = verbose

View file

@ -0,0 +1,93 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
import re
from typing import Optional, Union
import nagiosplugin
from nagiosplugin.state import Ok as STATE_Ok, Warn as STATE_Warn, Critical as STATE_Critical
class BooleanContext(nagiosplugin.Context):
def performance(self, metric, resource):
return nagiosplugin.performance.Performance(
label=metric.name,
value=1 if metric.value else 0
)
class PerfdataScalarContext(nagiosplugin.ScalarContext):
def evaluate(self, metric, resource):
return self.result_cls(STATE_Ok, None, metric)
def performance(self, metric, resource):
return super(PerfdataScalarContext, self).performance(metric, resource)
class SimplePositiveFloatContext(nagiosplugin.ScalarContext):
def __init__(self, name, warning=None, critical=None, fmt_metric='{name} is {valueunit}',
result_cls=nagiosplugin.Result):
super(SimplePositiveFloatContext, self).__init__(name, fmt_metric=fmt_metric, result_cls=result_cls)
self._warning = warning
self._critical = critical
def evaluate(self, metric, resource):
metric_value_abs = abs(metric.value)
if self._critical and metric_value_abs > self._critical:
return self.result_cls(
STATE_Critical,
None,
metric
)
if self._warning and metric_value_abs > self._warning:
return self.result_cls(
STATE_Warn,
None,
metric
)
return self.result_cls(STATE_Ok, None, metric)
def performance(self, metric, resource):
return super(SimplePositiveFloatContext, self).performance(metric, resource)
class ScalarPercentContext(nagiosplugin.ScalarContext):
def __init__(self, name, total_name: Optional[str] = None, total_value: Optional[Union[int, float]] = None,
warning=None, critical=None, fmt_metric='{name} is {valueunit}', result_cls=nagiosplugin.Result):
super(ScalarPercentContext, self).__init__(name, fmt_metric=fmt_metric, result_cls=result_cls)
self._warning = warning
self._critical = critical
self._total_name = total_name
self._total_value = total_value
if self._total_value is None and self._total_name is None:
raise ValueError("At least total_value or total_name must be given.")
self.warning = nagiosplugin.Range(None)
self.critical = nagiosplugin.Range(None)
def _prepare_ranges(self, metric, resource):
def replace(m):
if m.group("unit") == "%":
return str(float(total_value) * (float(m.group("value")) / 100))
else:
raise ValueError("Unable to convert type")
if self._total_value is not None:
total_value = self._total_value
else:
total_value = getattr(resource, self._total_name)
regex = re.compile(r"(?P<value>[\d.]+)(?P<unit>[%])")
if self._warning is not None:
self.warning = nagiosplugin.Range(regex.sub(replace, self._warning))
if self._critical is not None:
self.critical = nagiosplugin.Range(regex.sub(replace, self._critical))
def evaluate(self, metric, resource):
self._prepare_ranges(metric, resource)
return super(ScalarPercentContext, self).evaluate(metric, resource)
def performance(self, metric, resource):
self._prepare_ranges(metric, resource)
return super(ScalarPercentContext, self).performance(metric, resource)

View file

@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
class MissingValue(ValueError):
pass

View file

@ -0,0 +1,144 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
# Modified by sepehr.ha@gmail.com
import importlib
import logging
import os
import re
from typing import List, Optional
logger = logging.getLogger('nagiosplugin')
REGEX_VERSION_PATTERN = r"""
(?P<release>[0-9]+(?:\.[0-9]+)*)
(?P<pre>
[-_\.]?
(?P<pre_type>(a|b|c|rc|alpha|beta|pre|preview))
[-_\.]?
(?P<pre_serial>[0-9]+)?
)?
"""
class RouterOSVersion(object):
def __init__(self, version_string: str):
regex = re.compile(r"^\s*" + REGEX_VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
m = regex.match(version_string)
if not m:
raise ValueError(f"Unable to parse version string: '{version_string}'")
self.release = tuple([int(v) for v in m.group("release").split(".")])
self.pres = m.group("pre")
if not self.pres:
self.pres =""
# At the moment we don't handle the pre releases like alpha, beta or rc
# We should try to work with major and minor version
self._cmp_attribute_names = ("major", "minor", "patch")
self._cmp_pre_names = ("pre")
def __eq__(self, other):
for attr_name in self._cmp_attribute_names:
if getattr(self, attr_name) != getattr(other, attr_name):
return False
if getattr(self, "pre") != getattr(other, "pre"):
return False
return True
def __ge__(self, other):
return self > other or self == other
def __gt__(self, other):
for attr_name in self._cmp_attribute_names:
if getattr(self, attr_name) > getattr(other, attr_name):
return True
if getattr(self, attr_name) < getattr(other, attr_name):
return False
if getattr(self, "pre") > getattr(other, "pre"):
return True
if getattr(self, "pre") < getattr(other, "pre"):
return False
return False
def __le__(self, other):
return self < other or self == other
def __lt__(self, other):
for attr_name in self._cmp_attribute_names:
if getattr(self, attr_name) < getattr(other, attr_name):
return True
if getattr(self, attr_name) > getattr(other, attr_name):
return False
if getattr(self, "pre") < getattr(other, "pre"):
return True
if getattr(self, "pre") > getattr(other, "pre"):
return False
return False
def __repr__(self):
return f"{self.__class__.__name__}('{self}')"
def __str__(self):
return f"{'.'.join([str(v) for v in self.release])}" + self.pres
@property
def major(self) -> int:
return self.release[0] if len(self.release) >= 1 else 0
@property
def minor(self) -> int:
return self.release[1] if len(self.release) >= 2 else 0
@property
def patch(self) -> int:
return self.release[2] if len(self.release) >= 3 else 0
@property
def pre(self) -> int:
if self.pres is not None and self.pres.strip()!='':
var = self.pres
else:
var = "0"
return int(''.join(c for c in var if c.isdigit()))
def escape_filename(value):
value = re.sub(r"[^\w\s-]", "_", value).strip().lower()
return re.sub(r"[-\s]+", '-', value)
def load_modules(pkg_names: Optional[List] = None):
if pkg_names is None:
pkg_names = [".check"]
for base_pkg_name in pkg_names:
logger.debug("Base package name: %s", base_pkg_name)
base_pkg = importlib.import_module(base_pkg_name, package=__package__)
logger.debug("Base package: %s", base_pkg)
path = base_pkg.__path__[0]
logger.debug("Base path: %s", path)
for filename in os.listdir(path):
if filename == "__init__.py":
continue
pkg_name = None
if os.path.isdir(os.path.join(path, filename)) and \
os.path.exists(os.path.join(path, filename, "__init__.py")):
pkg_name = filename
if filename[-3:] == '.py':
pkg_name = filename[:-3]
if pkg_name is None:
continue
mod_name = "{}.{}".format(base_pkg_name, pkg_name)
try:
importlib.import_module(mod_name, package=__package__)
logger.info("Loaded '%s' successfully", mod_name)
except ImportError:
logger.warning("Unable to load: '%s'", mod_name)
logger.debug("An error occurred while importing '%s'", mod_name, exc_info=True)

View file

@ -0,0 +1,483 @@
# SPDX-FileCopyrightText: PhiBo DinoTools (2021)
# SPDX-License-Identifier: GPL-3.0-or-later
from datetime import date, datetime, time
from decimal import Decimal
import re
import ssl
from typing import Any, Dict, List, Optional, Union
import librouteros
import librouteros.query
import nagiosplugin
from .helper import logger, RouterOSVersion
from .exeption import MissingValue
class RouterOSCheckResource(nagiosplugin.Resource):
month_mapping: Dict[str, int] = {
"jan": 1,
"feb": 2,
"mar": 3,
"apr": 4,
"may": 5,
"jun": 6,
"jul": 7,
"aug": 8,
"sep": 9,
"oct": 10,
"nov": 11,
"dec": 12,
}
regex_datetime = re.compile(
r"(?P<month>[a-z]{3})/(?P<day>\d+)/(?P<year>\d{4})\s+(?P<hour>\d+):(?P<minute>\d+):(?P<second>\d+)",
flags=re.IGNORECASE
)
regex_date = re.compile(
r"(?P<month>[a-z]{3})/(?P<day>\d+)/(?P<year>\d{4})",
flags=re.IGNORECASE
)
regex_date_iso = re.compile(
r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})",
flags=re.IGNORECASE
)
regex_time = re.compile(
r"(?P<hour>\d+):(?P<minute>\d+):(?P<second>\d+)",
flags=re.IGNORECASE
)
def __init__(self, cmd_options: Dict[str, Any]):
self._cmd_options = cmd_options
self._routeros_metric_values: List[Dict[str, Any]] = []
self._routeros_version: Optional[RouterOSVersion] = None
self._api: Optional[librouteros.api.Api] = None
self.current_time = datetime.now()
@property
def api(self):
if self._api is None:
self._api = self.connect_api()
return self._api
@property
def routeros_version(self):
if self._routeros_version is None:
if self._cmd_options["routeros_version"].strip().lower() == "auto":
self._routeros_version = self._get_routeros_version()
else:
self._routeros_version = RouterOSVersion(self._cmd_options["routeros_version"].strip())
return self._routeros_version
@staticmethod
def _calc_rate(
cookie: nagiosplugin.Cookie,
name: str,
cur_value: int,
elapsed_seconds: Optional[float],
factor: int
) -> float:
old_value: Optional[int] = cookie.get(name)
cookie[name] = cur_value
if old_value is None:
raise MissingValue(f"Unable to find old value for '{name}'")
if elapsed_seconds is None:
raise MissingValue("Unable to get elapsed seconds")
return (cur_value - old_value) / elapsed_seconds * factor
def _connect_api(self) -> librouteros.api.Api:
def wrap_socket(socket):
server_hostname: Optional[str] = self._cmd_options["hostname"]
if server_hostname is None:
server_hostname = self._cmd_options["host"]
return ssl_ctx.wrap_socket(socket, server_hostname=server_hostname)
# logger.info("Connecting to device ...")
port = self._cmd_options["port"]
extra_kwargs = {}
if self._cmd_options["ssl"]:
if port is None:
port = 8729
context_kwargs = {}
if self._cmd_options["ssl_cafile"]:
context_kwargs["cafile"] = self._cmd_options["ssl_cafile"]
if self._cmd_options["ssl_capath"]:
context_kwargs["capath"] = self._cmd_options["ssl_capath"]
ssl_ctx = ssl.create_default_context(**context_kwargs)
if self._cmd_options["ssl_force_no_certificate"]:
ssl_ctx.check_hostname = False
ssl_ctx.set_ciphers("ADH:@SECLEVEL=0")
elif not self._cmd_options["ssl_verify"]:
# We have do disable hostname check if we disable certificate verification
ssl_ctx.check_hostname = False
ssl_ctx.verify_mode = ssl.CERT_NONE
elif not self._cmd_options["ssl_verify_hostname"]:
ssl_ctx.check_hostname = False
extra_kwargs["ssl_wrapper"] = wrap_socket
else:
if port is None:
port = 8728
api = librouteros.connect(
host=self._cmd_options["host"],
username=self._cmd_options["username"],
password=self._cmd_options["password"],
port=port,
timeout=self._cmd_options.get("timeout",5) ,
**extra_kwargs
)
return api
@staticmethod
def _convert_v6_list_to_v7(api_results) -> List[Dict[str, Any]]:
result_items = []
for name, value in api_results[0].items():
result_items.append({
"name": name,
"value": value,
})
return result_items
def _get_routeros_version(self) -> RouterOSVersion:
call = self.api.path(
"/system/resource"
)
results = tuple(call)
result: Dict[str, str] = results[0]
# version: 7.8 (stable)
version_string = result["version"].partition(" ")[0]
return RouterOSVersion(version_string)
def connect_api(self) -> librouteros.api.Api:
if self._api is None:
self._api = self._connect_api()
return self._api
@classmethod
def parse_routeros_date(cls, date_string: str) -> date:
# Try iso date
# Looks like they have switched date format in 7.11
m = cls.regex_date_iso.match(date_string)
if m:
return date(
year=int(m.group("year")),
month=int(m.group("month")),
day=int(m.group("day"))
)
# Try US date
m = cls.regex_date.match(date_string)
if m:
return date(
year=int(m.group("year")),
month=cls.month_mapping[m.group("month").lower()],
day=int(m.group("day"))
)
raise ValueError("Unable to parse datetime string")
@classmethod
def parse_routeros_date_time(cls, date_string: str, time_string: str) -> datetime:
parsed_date = cls.parse_routeros_date(date_string=date_string)
parsed_time = cls.parse_routeros_time(time_string=time_string)
return datetime.combine(parsed_date, parsed_time)
@classmethod
def parse_routeros_datetime(cls, datetime_string: str) -> datetime:
m = cls.regex_datetime.match(datetime_string)
if not m:
raise ValueError("Unable to parse datetime string")
return datetime(
year=int(m.group("year")),
month=cls.month_mapping[m.group("month").lower()],
day=int(m.group("day")),
hour=int(m.group("hour")),
minute=int(m.group("minute")),
second=int(m.group("second"))
)
@staticmethod
def parse_routeros_speed(value_string: str) -> int:
factors = {
"": 1,
"K": 1000,
"M": 1000 * 1000,
"G": 1000 * 1000 * 1000,
}
m = re.compile(r"(?P<value>\d+)(?P<factor>[A-Z]*)bps").match(value_string)
if not m:
raise ValueError(f"Unable to parse speed string: '{value_string}'")
factor = factors.get(m.group("factor"))
if factor is None:
raise ValueError(f"Unable to parse element '{m.group()}' of speed string: '{value_string}'")
return int(m.group("value")) * factor
@classmethod
def parse_routeros_time(cls, time_string: str) -> time:
m = cls.regex_time.match(time_string)
if not m:
raise ValueError("Unable to parse datetime string")
return time(
hour=int(m.group("hour")),
minute=int(m.group("minute")),
second=int(m.group("second"))
)
@staticmethod
def parse_routeros_time_duration(time_string: str) -> float:
factors: Dict[str, Union[int, Decimal]] = {
"us": Decimal(1e-6),
"ms": Decimal(0.001),
"s": 1,
"m": 60,
"h": 60 * 60,
"d": 24 * 60 * 60,
"w": 7 * 24 * 60 * 60,
}
value_is_negativ = time_string.startswith("-")
seconds = Decimal(0)
for m in re.compile(r"(?P<value>\d+)(?P<type>[a-z]+)").finditer(time_string):
factor = factors.get(m.group("type"))
if factor is None:
raise ValueError(f"Unable to parse element '{m.group()}' of time string: '{time_string}'")
seconds += int(m.group("value")) * factor
seconds_float = float(round(seconds, 6))
if value_is_negativ:
return -seconds_float
return seconds_float
@staticmethod
def prepare_override_values(override_values: List[str]) -> Dict[str, str]:
results: Dict[str, str] = {}
for override_value in override_values:
name, _, value = override_value.partition(":")
if value is None or value == "":
logger.warning(f"Unable to parse override value for {name}")
results[name] = value
return results
@staticmethod
def prepare_thresholds(thresholds: List[str]) -> Dict[str, str]:
results: Dict[str, str] = {}
for threshold in thresholds:
name, _, value = threshold.partition(":")
if value is None or value == "":
logger.warning(f"Unable to parse threshold for {name}")
results[name] = value
return results
@staticmethod
def prepare_regex_thresholds(thresholds: List[str]) -> Dict[re.Pattern, str]:
results: Dict[re.Pattern, str] = {}
for threshold in thresholds:
name, _, value = threshold.partition(":")
if value is None or value == "":
logger.warning(f"Unable to parse threshold for {name}")
results[re.compile(name)] = value
return results
def get_routeros_select_keys(self) -> List[librouteros.query.Key]:
keys = []
for metric_value in self._routeros_metric_values:
keys.append(librouteros.query.Key(metric_value["name"]))
return keys
def get_routeros_metric_item(
self, api_result: Dict[str, Any], name_prefix="", cookie=None
) -> List[nagiosplugin.Metric]:
metrics = []
elapsed_seconds = None
if cookie:
last_time_tuple = cookie.get("last_time")
if isinstance(last_time_tuple, (list, tuple)):
last_time = datetime(*last_time_tuple[0:6])
delta_time = self.current_time - last_time
elapsed_seconds = delta_time.total_seconds()
#
for metric_value in self._routeros_metric_values:
metric_value_name = metric_value["name"]
if metric_value.get("missing_ok", False) and metric_value_name not in api_result:
continue
value = api_result[metric_value_name]
metric_value_type = metric_value.get("type")
if callable(metric_value_type):
try:
value = metric_value_type(value)
except ValueError as e:
logger.warning(f"Error parsing value with name {metric_value_name}", exc_info=True)
raise e
value = value * metric_value.get("factor", 1)
extra_kwargs = {}
for n in ("min", "max", "uom"):
if n in metric_value:
extra_kwargs[n] = metric_value[n]
dst_value_name = metric_value.get("dst_value_name")
if isinstance(dst_value_name, str):
api_result[dst_value_name] = value
if not metric_value.get("no_metric"):
metrics.append(
nagiosplugin.Metric(
name=name_prefix + metric_value.get("dst", metric_value_name),
value=value,
**extra_kwargs,
)
)
if metric_value.get("rate"):
try:
rate_value = self._calc_rate(
cookie=cookie,
name=metric_value_name,
cur_value=value,
elapsed_seconds=elapsed_seconds,
factor=metric_value.get("rate_factor", 1)
)
metrics.append(
nagiosplugin.Metric(
name=f"{name_prefix}{metric_value.get('dst', metric_value_name)}_rate",
value=rate_value,
uom=metric_value.get("rate_uom"),
min=metric_value.get("rate_min"),
max=metric_value.get("rate_max"),
)
)
except MissingValue as e:
logger.debug(f"{e}", exc_info=e)
if cookie:
cookie["last_time"] = self.current_time.timetuple()
return metrics
def get_routeros_metrics(
self, api_results: Union[List[Dict[str, Any]], Dict[str, Any]], name_prefix="", cookie=None
) -> List[nagiosplugin.Metric]:
def get_api_result_by_name(api_results, name):
for item in api_results:
if name == item["name"]:
return item
return None
def new_api_result_item(api_results, item, ignore_if_exist=True):
tmp_item = get_api_result_by_name(api_results, item["name"])
if tmp_item is not None:
api_results.append(item)
return api_results
if ignore_if_exist:
return api_results
raise ValueError("Duplicated entry")
metrics = []
elapsed_seconds = None
if cookie:
last_time_tuple = cookie.get("last_time")
if isinstance(last_time_tuple, (list, tuple)):
last_time = datetime(*last_time_tuple[0:6])
delta_time = self.current_time - last_time
elapsed_seconds = delta_time.total_seconds()
if isinstance(api_results, dict):
from pprint import pprint
pprint(api_results)
api_results = self._convert_v6_list_to_v7(api_results=api_results)
#
for metric_value in self._routeros_metric_values:
metric_value_name = metric_value["name"]
api_result = get_api_result_by_name(api_results, metric_value_name)
if metric_value.get("missing_ok", False) and api_result is None:
continue
value = api_result["value"]
metric_value_type = metric_value.get("type")
if callable(metric_value_type):
try:
value = metric_value_type(value)
except ValueError as e:
logger.warning(f"Error parsing value with name {metric_value_name}", exc_info=True)
raise e
value = value * metric_value.get("factor", 1)
extra_kwargs = {}
for n in ("min", "max", "uom"):
if n in metric_value:
extra_kwargs[n] = metric_value[n]
dst_value_name = metric_value.get("dst_value_name")
if isinstance(dst_value_name, str):
api_results = new_api_result_item(
api_results,
{
"name": dst_value_name,
"value": value,
},
ignore_if_exist=True
)
if not metric_value.get("no_metric"):
metrics.append(
nagiosplugin.Metric(
name=name_prefix + metric_value.get("dst", metric_value_name),
value=value,
**extra_kwargs,
)
)
if metric_value.get("rate"):
try:
rate_value = self._calc_rate(
cookie=cookie,
name=metric_value_name,
cur_value=value,
elapsed_seconds=elapsed_seconds,
factor=metric_value.get("rate_factor", 1)
)
metrics.append(
nagiosplugin.Metric(
name=f"{name_prefix}{metric_value.get('dst', metric_value_name)}_rate",
value=rate_value,
uom=metric_value.get("rate_uom"),
min=metric_value.get("rate_min"),
max=metric_value.get("rate_max"),
)
)
except MissingValue as e:
logger.debug(f"{e}", exc_info=e)
if cookie:
cookie["last_time"] = self.current_time.timetuple()
return metrics