From f4a3a6406a90bec3a4c07d93fb2aaada4e4fe5d0 Mon Sep 17 00:00:00 2001 From: dropthemic <20729448+dropthemic@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:08:18 -0800 Subject: [PATCH 1/4] Added raid check Added check for raid status --- check_synology.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/check_synology.py b/check_synology.py index fe72ae5..68388fe 100755 --- a/check_synology.py +++ b/check_synology.py @@ -15,7 +15,7 @@ parser.add_argument("username", help="the snmp user name", type=str) parser.add_argument("authkey", help="the auth key", type=str) parser.add_argument("privkey", help="the priv key", type=str) -parser.add_argument("mode", help="the mode", type=str, choices=["load", "memory", "disk", "storage", "update", "status"]) +parser.add_argument("mode", help="the mode", type=str, choices=["load", "memory", "disk", "storage", "update", "status", "raid"]) parser.add_argument("-w", help="warning value for selected mode", type=int) parser.add_argument("-c", help="critical value for selected mode", type=int) parser.add_argument("-p", help="the snmp port", type=int, dest="port", default=161) @@ -302,3 +302,24 @@ def exitCode(): # 3. Respond with textual and perfdata output and propagate exit code. print(state + ' - Model: %s, S/N: %s, System Temperature: %s C, System Status: %s, System Fan: %s, CPU Fan: %s, Powersupply : %s' % (status_model, status_serial, status_temperature, status_system, status_system_fan, status_cpu_fan, status_power) + ' | system_temp=%sc' % status_temperature) exitCode() + +if mode == 'raid': + output = '' + perfdata = '|' + for item in snmpwalk('1.3.6.1.4.1.6574.3.1.1.1'): + i = item.oid_index or item.oid.split('.')[-1] + storage_name = snmpget('1.3.6.1.4.1.6574.3.1.1.2.' + str(i)) + raid_status = snmpget('1.3.6.1.4.1.6574.3.1.1.3.' + str(i)) + + #Normal(1):The raid functions normally. + #Degrade(11):Degrade happens when a tolerable failure of disk(s) occurs. + #Crashed(12):Raid has crashed and just uses for read-only operation. + #Repairing(2), Migrating(3), Expanding(4), Deleting(5), Creating(6), RaidSyncing(7), RaidParityChecking(8), RaidAssembling(9) and Canceling(10). + + if str(raid_status) in ("3","4","5","6","7","8","9","10"): state = "WARNING" + elif str(raid_status) != "1": + state = "CRITICAL" + output += ' - raid status: [' + storage_name + '] status=' + str(raid_status) + perfdata += ' "' + storage_name + '"=' + str(raid_status) + '' + print('%s%s %s' % (state, output, perfdata)) + exitCode() From 55c09730e66fbdbe0a4cd065c86aa17166f81aa8 Mon Sep 17 00:00:00 2001 From: dropthemic <20729448+dropthemic@users.noreply.github.com> Date: Tue, 26 Nov 2024 11:17:49 -0800 Subject: [PATCH 2/4] Update README.md Added raid mode --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 39bec85..e689ba0 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ Custom timeouts (`-t`) and retries (`-r`) can be specified by using `-t` and `-r | storage | Detects and checks all disks (free, total, %) | if more used than w/c in % | | update | Shows the current DSM version and if DSM update is available | if update is "Unavailable", will trigger OK
if update is "Available", will trigger WARNING
otherwise: UNKNOWN | | status | Shows model, s/n, temp and status of system, fan, cpu fan and power supply | if temp higher than w/c in °C | - +| raid | Shows raid volume status | if status is 4-10 will trigger WARNING, any other values will be CRITICAL | ## Example check From b7c6ef84f6fc7a55f730b7dbc93a3cef3cfe3965 Mon Sep 17 00:00:00 2001 From: dropthemic <20729448+dropthemic@users.noreply.github.com> Date: Thu, 21 May 2026 07:37:36 -0700 Subject: [PATCH 3/4] Update check_synology.py Fix RAID state escalation and document/test status mapping --- check_synology.py | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/check_synology.py b/check_synology.py index 68388fe..bf28c0e 100755 --- a/check_synology.py +++ b/check_synology.py @@ -106,7 +106,7 @@ def exitCode(): if critical and critical < int(math.ceil(float(load1))): state = 'CRITICAL' - print(state + ' - load average: %s, %s, %s' % (load1, load5, load15), '| load1=%sc' % load1, 'load5=%sc' % load5, 'load15=%sc' % load15) + print(state + ' - load average: %s, %s, %s' % (load1, load5, load15), '| load1=%s' % load1, 'load5=%s' % load5, 'load15=%s' % load15) exitCode() if mode == 'memory': @@ -121,7 +121,7 @@ def exitCode(): if critical and critical > int(memory_percent): state = 'CRITICAL' - print(state + ' - {:0.1f}% '.format(memory_percent) + 'usable ({0:0.1f} MB free and {1:0.1f} MB cached out of {2:0.1f} MB)'.format((memory_unused / 1024), (memory_cached / 1024), (memory_total / 1024)), '|memory_total=%dc' % memory_total, 'memory_unused=%dc' % memory_unused, 'memory_cached=%dc' % memory_cached, 'memory_usable=%dc' % memory_usable, 'memory_percent=%d' % memory_percent + '%') + print(state + ' - {:0.1f}% '.format(memory_percent) + 'usable ({0:0.1f} MB free and {1:0.1f} MB cached out of {2:0.1f} MB)'.format((memory_unused / 1024), (memory_cached / 1024), (memory_total / 1024)), '|memory_total=%dKB' % memory_total, 'memory_unused=%dKB' % memory_unused, 'memory_cached=%dKB' % memory_cached, 'memory_usable=%dKB' % memory_usable, 'memory_percent=%d' % memory_percent + '%') exitCode() if mode == 'disk': @@ -165,7 +165,7 @@ def exitCode(): # 2. Compute textual and perfdata output. output += ' - ' + disk_name + ': Status: ' + disk_status + ', Temperature: ' + disk_temp + ' C' + ', Health status: ' + disk_health_status - perfdata += 'temperature' + disk_name + '=' + disk_temp + 'c ' + perfdata += 'temperature' + disk_name + '=' + disk_temp + ' ' # 3. Collect outcome for individual sensor state. @@ -233,7 +233,7 @@ def exitCode(): state = 'CRITICAL' output += ' - free space: ' + storage_name + ' ' + str(storage_free) + ' GB (' + str(storage_used) + ' GB of ' + str(storage_size) + ' GB used, ' + str(storage_used_percent) + '%)' - perfdata += storage_name + '=' + str(storage_used) + 'c ' + perfdata += storage_name + '=' + str(storage_used) + 'GB ' print('%s%s %s' % (state, output, perfdata)) exitCode() @@ -300,26 +300,35 @@ def exitCode(): state = 'CRITICAL' # 3. Respond with textual and perfdata output and propagate exit code. - print(state + ' - Model: %s, S/N: %s, System Temperature: %s C, System Status: %s, System Fan: %s, CPU Fan: %s, Powersupply : %s' % (status_model, status_serial, status_temperature, status_system, status_system_fan, status_cpu_fan, status_power) + ' | system_temp=%sc' % status_temperature) + print(state + ' - Model: %s, S/N: %s, System Temperature: %s C, System Status: %s, System Fan: %s, CPU Fan: %s, Powersupply : %s' % (status_model, status_serial, status_temperature, status_system, status_system_fan, status_cpu_fan, status_power) + ' | system_temp=%s' % status_temperature) exitCode() +RAID_WARNING_STATUSES = {"2", "3", "4", "5", "6", "7", "8", "9", "10", "11"} +RAID_CRITICAL_STATUSES = {"12"} +def next_raid_state(current_state, raid_status): + raid_status = str(raid_status) + if raid_status in RAID_CRITICAL_STATUSES: + return "CRITICAL" + if raid_status in RAID_WARNING_STATUSES and current_state != "CRITICAL": + return "WARNING" + if raid_status != "1" and current_state == "OK": + return "UNKNOWN" + return current_state + if mode == 'raid': + codes = { + "1": "Normal", "2": "Repairing", "3": "Migrating", "4": "Expanding", "5": "Deleting", "6": "Creating", "7": "RaidSyncing", + "8": "RaidParityChecking", "9": "RaidAssembling", "10": "Canceling", "11": "Degrade", "12": "Crashed" + } output = '' perfdata = '|' for item in snmpwalk('1.3.6.1.4.1.6574.3.1.1.1'): i = item.oid_index or item.oid.split('.')[-1] storage_name = snmpget('1.3.6.1.4.1.6574.3.1.1.2.' + str(i)) - raid_status = snmpget('1.3.6.1.4.1.6574.3.1.1.3.' + str(i)) - - #Normal(1):The raid functions normally. - #Degrade(11):Degrade happens when a tolerable failure of disk(s) occurs. - #Crashed(12):Raid has crashed and just uses for read-only operation. - #Repairing(2), Migrating(3), Expanding(4), Deleting(5), Creating(6), RaidSyncing(7), RaidParityChecking(8), RaidAssembling(9) and Canceling(10). - - if str(raid_status) in ("3","4","5","6","7","8","9","10"): state = "WARNING" - elif str(raid_status) != "1": - state = "CRITICAL" - output += ' - raid status: [' + storage_name + '] status=' + str(raid_status) - perfdata += ' "' + storage_name + '"=' + str(raid_status) + '' + raid_status = str(snmpget('1.3.6.1.4.1.6574.3.1.1.3.' + str(i))) + + state = next_raid_state(state, raid_status) + output += ' - raid status: [' + storage_name + '] status=' + (codes.get(str(raid_status)) or str(raid_status)) + perfdata += ' "' + storage_name + '"=' + raid_status print('%s%s %s' % (state, output, perfdata)) exitCode() From a86eca14593e7db1844db76b0452e8b23e70f5f7 Mon Sep 17 00:00:00 2001 From: dropthemic <20729448+dropthemic@users.noreply.github.com> Date: Thu, 21 May 2026 07:38:20 -0700 Subject: [PATCH 4/4] Create test_raid.py --- tests/test_raid.py | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 tests/test_raid.py diff --git a/tests/test_raid.py b/tests/test_raid.py new file mode 100644 index 0000000..a436ad8 --- /dev/null +++ b/tests/test_raid.py @@ -0,0 +1,47 @@ +import ast +from pathlib import Path + +# Run with: python3 -m pytest -q tests/test_raid.py (requires pytest to be installed) + +def load_raid_logic(): + source = Path(__file__).resolve().parents[1] / "check_synology.py" + module = ast.parse(source.read_text()) + selected_nodes = [] + + for node in module.body: + if isinstance(node, ast.Assign): + targets = [target.id for target in node.targets if isinstance(target, ast.Name)] + if any(name in {"RAID_WARNING_STATUSES", "RAID_CRITICAL_STATUSES"} for name in targets): + selected_nodes.append(node) + elif isinstance(node, ast.FunctionDef) and node.name == "next_raid_state": + selected_nodes.append(node) + + namespace = {} + exec(compile(ast.Module(body=selected_nodes, type_ignores=[]), str(source), "exec"), namespace) + return namespace["next_raid_state"] + + +next_raid_state = load_raid_logic() + + +def test_raid_status_2_is_warning(): + assert next_raid_state("OK", "2") == "WARNING" + + +def test_raid_status_11_is_warning(): + assert next_raid_state("OK", "11") == "WARNING" + + +def test_raid_critical_is_not_downgraded(): + state = next_raid_state("OK", "12") + state = next_raid_state(state, "7") + + assert state == "CRITICAL" + + +def test_raid_unknown_status_becomes_unknown(): + assert next_raid_state("OK", "99") == "UNKNOWN" + + +def test_raid_ok_stays_ok_for_normal_status(): + assert next_raid_state("OK", "1") == "OK"