diff --git a/README.md b/README.md index 39bec85..e689ba0 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ Custom timeouts (`-t`) and retries (`-r`) can be specified by using `-t` and `-r | storage | Detects and checks all disks (free, total, %) | if more used than w/c in % | | update | Shows the current DSM version and if DSM update is available | if update is "Unavailable", will trigger OK
if update is "Available", will trigger WARNING
otherwise: UNKNOWN | | status | Shows model, s/n, temp and status of system, fan, cpu fan and power supply | if temp higher than w/c in °C | - +| raid | Shows raid volume status | if status is 4-10 will trigger WARNING, any other values will be CRITICAL | ## Example check diff --git a/check_synology.py b/check_synology.py index fe72ae5..bf28c0e 100755 --- a/check_synology.py +++ b/check_synology.py @@ -15,7 +15,7 @@ parser.add_argument("username", help="the snmp user name", type=str) parser.add_argument("authkey", help="the auth key", type=str) parser.add_argument("privkey", help="the priv key", type=str) -parser.add_argument("mode", help="the mode", type=str, choices=["load", "memory", "disk", "storage", "update", "status"]) +parser.add_argument("mode", help="the mode", type=str, choices=["load", "memory", "disk", "storage", "update", "status", "raid"]) parser.add_argument("-w", help="warning value for selected mode", type=int) parser.add_argument("-c", help="critical value for selected mode", type=int) parser.add_argument("-p", help="the snmp port", type=int, dest="port", default=161) @@ -106,7 +106,7 @@ def exitCode(): if critical and critical < int(math.ceil(float(load1))): state = 'CRITICAL' - print(state + ' - load average: %s, %s, %s' % (load1, load5, load15), '| load1=%sc' % load1, 'load5=%sc' % load5, 'load15=%sc' % load15) + print(state + ' - load average: %s, %s, %s' % (load1, load5, load15), '| load1=%s' % load1, 'load5=%s' % load5, 'load15=%s' % load15) exitCode() if mode == 'memory': @@ -121,7 +121,7 @@ def exitCode(): if critical and critical > int(memory_percent): state = 'CRITICAL' - print(state + ' - {:0.1f}% '.format(memory_percent) + 'usable ({0:0.1f} MB free and {1:0.1f} MB cached out of {2:0.1f} MB)'.format((memory_unused / 1024), (memory_cached / 1024), (memory_total / 1024)), '|memory_total=%dc' % memory_total, 'memory_unused=%dc' % memory_unused, 'memory_cached=%dc' % memory_cached, 'memory_usable=%dc' % memory_usable, 'memory_percent=%d' % memory_percent + '%') + print(state + ' - {:0.1f}% '.format(memory_percent) + 'usable ({0:0.1f} MB free and {1:0.1f} MB cached out of {2:0.1f} MB)'.format((memory_unused / 1024), (memory_cached / 1024), (memory_total / 1024)), '|memory_total=%dKB' % memory_total, 'memory_unused=%dKB' % memory_unused, 'memory_cached=%dKB' % memory_cached, 'memory_usable=%dKB' % memory_usable, 'memory_percent=%d' % memory_percent + '%') exitCode() if mode == 'disk': @@ -165,7 +165,7 @@ def exitCode(): # 2. Compute textual and perfdata output. output += ' - ' + disk_name + ': Status: ' + disk_status + ', Temperature: ' + disk_temp + ' C' + ', Health status: ' + disk_health_status - perfdata += 'temperature' + disk_name + '=' + disk_temp + 'c ' + perfdata += 'temperature' + disk_name + '=' + disk_temp + ' ' # 3. Collect outcome for individual sensor state. @@ -233,7 +233,7 @@ def exitCode(): state = 'CRITICAL' output += ' - free space: ' + storage_name + ' ' + str(storage_free) + ' GB (' + str(storage_used) + ' GB of ' + str(storage_size) + ' GB used, ' + str(storage_used_percent) + '%)' - perfdata += storage_name + '=' + str(storage_used) + 'c ' + perfdata += storage_name + '=' + str(storage_used) + 'GB ' print('%s%s %s' % (state, output, perfdata)) exitCode() @@ -300,5 +300,35 @@ def exitCode(): state = 'CRITICAL' # 3. Respond with textual and perfdata output and propagate exit code. - print(state + ' - Model: %s, S/N: %s, System Temperature: %s C, System Status: %s, System Fan: %s, CPU Fan: %s, Powersupply : %s' % (status_model, status_serial, status_temperature, status_system, status_system_fan, status_cpu_fan, status_power) + ' | system_temp=%sc' % status_temperature) + print(state + ' - Model: %s, S/N: %s, System Temperature: %s C, System Status: %s, System Fan: %s, CPU Fan: %s, Powersupply : %s' % (status_model, status_serial, status_temperature, status_system, status_system_fan, status_cpu_fan, status_power) + ' | system_temp=%s' % status_temperature) + exitCode() + +RAID_WARNING_STATUSES = {"2", "3", "4", "5", "6", "7", "8", "9", "10", "11"} +RAID_CRITICAL_STATUSES = {"12"} +def next_raid_state(current_state, raid_status): + raid_status = str(raid_status) + if raid_status in RAID_CRITICAL_STATUSES: + return "CRITICAL" + if raid_status in RAID_WARNING_STATUSES and current_state != "CRITICAL": + return "WARNING" + if raid_status != "1" and current_state == "OK": + return "UNKNOWN" + return current_state + +if mode == 'raid': + codes = { + "1": "Normal", "2": "Repairing", "3": "Migrating", "4": "Expanding", "5": "Deleting", "6": "Creating", "7": "RaidSyncing", + "8": "RaidParityChecking", "9": "RaidAssembling", "10": "Canceling", "11": "Degrade", "12": "Crashed" + } + output = '' + perfdata = '|' + for item in snmpwalk('1.3.6.1.4.1.6574.3.1.1.1'): + i = item.oid_index or item.oid.split('.')[-1] + storage_name = snmpget('1.3.6.1.4.1.6574.3.1.1.2.' + str(i)) + raid_status = str(snmpget('1.3.6.1.4.1.6574.3.1.1.3.' + str(i))) + + state = next_raid_state(state, raid_status) + output += ' - raid status: [' + storage_name + '] status=' + (codes.get(str(raid_status)) or str(raid_status)) + perfdata += ' "' + storage_name + '"=' + raid_status + print('%s%s %s' % (state, output, perfdata)) exitCode() diff --git a/tests/test_raid.py b/tests/test_raid.py new file mode 100644 index 0000000..a436ad8 --- /dev/null +++ b/tests/test_raid.py @@ -0,0 +1,47 @@ +import ast +from pathlib import Path + +# Run with: python3 -m pytest -q tests/test_raid.py (requires pytest to be installed) + +def load_raid_logic(): + source = Path(__file__).resolve().parents[1] / "check_synology.py" + module = ast.parse(source.read_text()) + selected_nodes = [] + + for node in module.body: + if isinstance(node, ast.Assign): + targets = [target.id for target in node.targets if isinstance(target, ast.Name)] + if any(name in {"RAID_WARNING_STATUSES", "RAID_CRITICAL_STATUSES"} for name in targets): + selected_nodes.append(node) + elif isinstance(node, ast.FunctionDef) and node.name == "next_raid_state": + selected_nodes.append(node) + + namespace = {} + exec(compile(ast.Module(body=selected_nodes, type_ignores=[]), str(source), "exec"), namespace) + return namespace["next_raid_state"] + + +next_raid_state = load_raid_logic() + + +def test_raid_status_2_is_warning(): + assert next_raid_state("OK", "2") == "WARNING" + + +def test_raid_status_11_is_warning(): + assert next_raid_state("OK", "11") == "WARNING" + + +def test_raid_critical_is_not_downgraded(): + state = next_raid_state("OK", "12") + state = next_raid_state(state, "7") + + assert state == "CRITICAL" + + +def test_raid_unknown_status_becomes_unknown(): + assert next_raid_state("OK", "99") == "UNKNOWN" + + +def test_raid_ok_stays_ok_for_normal_status(): + assert next_raid_state("OK", "1") == "OK"