Skip to content

Commit 6498df5

Browse files
pandafynemesifier
authored andcommitted
[fix] Alerts aren't sent for passive metrics #670
Closes #670
1 parent d9183f5 commit 6498df5

File tree

2 files changed

+81
-36
lines changed

2 files changed

+81
-36
lines changed

openwisp_monitoring/monitoring/base/models.py

Lines changed: 33 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -992,39 +992,38 @@ def _is_crossed_by(self, current_value, time=None, retention_policy=None):
992992
extra_fields = []
993993
if self.metric.alert_on_related_field:
994994
extra_fields = [self.metric.alert_field]
995-
if time is None:
996-
# retrieves latest measurements, ordered by most recent first
997-
points = self.metric.read(
998-
since=timezone.now() - timedelta(minutes=self._tolerance_search_range),
999-
limit=None,
1000-
order="-time",
1001-
retention_policy=retention_policy,
1002-
extra_fields=extra_fields,
1003-
)
1004-
# store a list with the results
1005-
results = [value_crossed]
1006-
# loop on each measurement starting from the most recent
1007-
for i, point in enumerate(points, 1):
1008-
# skip the first point because it was just added before this
1009-
# check started and its value coincides with ``current_value``
1010-
if i <= 1:
1011-
continue
1012-
utc_time = utc.localize(datetime.utcfromtimestamp(point["time"]))
1013-
# did this point cross the threshold? Append to result list
1014-
results.append(self._value_crossed(point[self.metric.alert_field]))
1015-
# tolerance is trepassed
1016-
if self._time_crossed(utc_time):
1017-
# if the latest results are consistent, the metric being
1018-
# monitored is not flapping and we can confidently return
1019-
# wheter the value crosses the threshold or not
1020-
if len(set(results)) == 1:
1021-
return value_crossed
1022-
# otherwise, the results are flapping, the situation has not changed
1023-
# we will return a value that will not trigger changes
1024-
return not self.metric.is_healthy_tolerant
1025-
# otherwise keep looking back
995+
time = time or timezone.now()
996+
# retrieves latest measurements, ordered by most recent first
997+
points = self.metric.read(
998+
since=timezone.now() - timedelta(minutes=self._tolerance_search_range),
999+
limit=None,
1000+
order="-time",
1001+
retention_policy=retention_policy,
1002+
extra_fields=extra_fields,
1003+
)
1004+
# store a list with the results
1005+
results = [value_crossed]
1006+
# loop on each measurement starting from the most recent
1007+
for i, point in enumerate(points, 1):
1008+
# skip the first point because it was just added before this
1009+
# check started and its value coincides with ``current_value``
1010+
if i <= 1:
10261011
continue
1027-
# the search has not yielded any conclusion
1028-
# return result based on the current value and time
1029-
time = timezone.now()
1012+
utc_time = utc.localize(datetime.utcfromtimestamp(point["time"]))
1013+
# did this point cross the threshold? Append to result list
1014+
results.append(self._value_crossed(point[self.metric.alert_field]))
1015+
# tolerance is trepassed
1016+
if self._time_crossed(utc_time):
1017+
# if the latest results are consistent, the metric being
1018+
# monitored is not flapping and we can confidently return
1019+
# wheter the value crosses the threshold or not
1020+
if len(set(results)) == 1:
1021+
return value_crossed
1022+
# otherwise, the results are flapping, the situation has not changed
1023+
# we will return a value that will not trigger changes
1024+
return not self.metric.is_healthy_tolerant
1025+
# otherwise keep looking back
1026+
continue
1027+
# the search has not yielded any conclusion
1028+
# return result based on the current value and time
10301029
return self._time_crossed(time) and value_crossed

openwisp_monitoring/monitoring/tests/test_monitoring_notifications.py

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
)
1111

1212
Metric = load_model("monitoring", "Metric")
13+
AlertSettings = load_model("monitoring", "AlertSettings")
1314
Notification = load_model("openwisp_notifications", "Notification")
1415
Device = load_model("config", "Device")
1516
Config = load_model("config", "Config")
@@ -358,8 +359,8 @@ def test_object_check_threshold_crossed_historical_data(self):
358359
self._write_metric(om, 99, time=start_time)
359360
om.refresh_from_db()
360361
self.assertEqual(om.is_healthy, False)
361-
self.assertEqual(om.is_healthy_tolerant, True)
362-
self.assertEqual(Notification.objects.count(), 0)
362+
self.assertEqual(om.is_healthy_tolerant, False)
363+
self.assertEqual(Notification.objects.count(), 1)
363364

364365
def test_flapping_metric_with_tolerance(self):
365366
self._create_admin()
@@ -679,3 +680,48 @@ def test_multiple_notifications(self):
679680
self.assertEqual(Notification.objects.count(), 1)
680681
n = notification_queryset.first()
681682
self._check_notification_parameters(n, admin, om, user)
683+
684+
def test_passive_metric_alert(self):
685+
self._get_admin()
686+
data = {
687+
"type": "DeviceMonitoring",
688+
"resources": {
689+
"cpus": 1,
690+
"load": [0, 0, 0],
691+
},
692+
}
693+
device = self._create_device(organization=self._create_org())
694+
self._post_data(device.id, device.key, data)
695+
cpu_metric = Metric.objects.get(key="cpu")
696+
self.assertEqual(Notification.objects.count(), 0)
697+
698+
AlertSettings.objects.update(
699+
custom_tolerance=5,
700+
custom_threshold=90,
701+
)
702+
703+
data["resources"]["load"] = [100, 100, 100]
704+
705+
with freeze_time(timezone.now() + timedelta(minutes=1)):
706+
response = self._post_data(device.id, device.key, data)
707+
self.assertEqual(response.status_code, 200)
708+
self.assertEqual(Notification.objects.count(), 0)
709+
cpu_metric.refresh_from_db()
710+
self.assertEqual(cpu_metric.is_healthy, False)
711+
self.assertEqual(cpu_metric.is_healthy_tolerant, True)
712+
713+
with freeze_time(timezone.now() + timedelta(minutes=4)):
714+
response = self._post_data(device.id, device.key, data)
715+
self.assertEqual(response.status_code, 200)
716+
self.assertEqual(Notification.objects.count(), 0)
717+
cpu_metric.refresh_from_db()
718+
self.assertEqual(cpu_metric.is_healthy, False)
719+
self.assertEqual(cpu_metric.is_healthy_tolerant, True)
720+
721+
with freeze_time(timezone.now() + timedelta(minutes=6)):
722+
response = self._post_data(device.id, device.key, data)
723+
self.assertEqual(response.status_code, 200)
724+
cpu_metric.refresh_from_db()
725+
self.assertEqual(cpu_metric.is_healthy, False)
726+
self.assertEqual(cpu_metric.is_healthy_tolerant, False)
727+
self.assertEqual(Notification.objects.count(), 1)

0 commit comments

Comments
 (0)