Skip to content

Commit c869207

Browse files
committed
Unset triage status command
1 parent db32b1a commit c869207

4 files changed

Lines changed: 414 additions & 0 deletions

File tree

server/reportmanager/cron.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ def update_report_stats():
5858
ReportHit.objects.filter(last_update__lt=old_cutoff).delete()
5959

6060

61+
@app.task(ignore_result=True)
62+
def unset_buckets_triage_status():
63+
call_command("unset_buckets_triage_status")
64+
65+
6166
@app.task(ignore_result=True)
6267
def bug_update_status():
6368
call_command("bug_update_status")
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
# This Source Code Form is subject to the terms of the Mozilla Public
2+
# License, v. 2.0. If a copy of the MPL was not distributed with this
3+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
from datetime import timedelta
5+
from logging import getLogger
6+
7+
from django.core.management import BaseCommand
8+
from django.db.models import Case, Count, Exists, FloatField, IntegerField, Max, OuterRef, Q, Subquery, When
9+
from django.db.models.functions import Coalesce
10+
from django.utils import timezone
11+
12+
from reportmanager.models import Bucket, ReportEntry
13+
14+
LOG = getLogger("reportmanager.autountriage")
15+
16+
SHORT_WINDOW = 2 # days
17+
LONG_WINDOW = 60 # days
18+
SPIKE_THRESHOLD = 3 # x times increase
19+
MIN_TOTAL_REPORTS = 5 # minimum total reports required
20+
MIN_SHORT_WINDOW_REPORTS = 2 # minimum reports in short window to trigger spike
21+
22+
# Statuses where new data plausibly changes the decision
23+
SPIKE_SENSITIVE = {
24+
Bucket.TriageStatus.INCOMPLETE,
25+
Bucket.TriageStatus.WORKS_FOR_ME,
26+
}
27+
28+
# Statuses where the reason is structural — spikes rarely change anything
29+
SPIKE_INSENSITIVE = {
30+
Bucket.TriageStatus.CANT_TEST,
31+
Bucket.TriageStatus.NON_COMPAT,
32+
Bucket.TriageStatus.INVALID,
33+
}
34+
35+
class Command(BaseCommand):
36+
help = (
37+
"Check triaged buckets and automatically untriage them based on "
38+
"quality improvements (incomplete status) or spike detection (all statuses)"
39+
)
40+
41+
def handle(self, *args, **options):
42+
LOG.info("Starting auto-untriage check")
43+
44+
inomplete_untriaged = self.unset_incomplete()
45+
spike_untriaged = self.unset_status_if_spike()
46+
47+
total = inomplete_untriaged + spike_untriaged
48+
LOG.info(
49+
f"Auto-untriage complete: {inomplete_untriaged} incomplete, "
50+
f"{spike_untriaged} spike-based, {total} total"
51+
)
52+
53+
def unset_incomplete(self):
54+
"""Untriage incomplete buckets if they received with better quality reports."""
55+
56+
max_probability_at_triage = Subquery(
57+
ReportEntry.objects.filter(
58+
bucket=OuterRef('pk'),
59+
reported_at__lte=OuterRef('triaged_at'),
60+
).order_by().values('bucket').annotate(
61+
max_probability_at_triage=Max('ml_valid_probability')
62+
).values('max_probability_at_triage')[:1],
63+
output_field=FloatField(),
64+
)
65+
66+
has_better_new_report = Exists(
67+
ReportEntry.objects.filter(
68+
bucket=OuterRef('pk'),
69+
reported_at__gt=OuterRef('triaged_at'),
70+
ml_valid_probability__gt=OuterRef('max_probability_at_triage'),
71+
)
72+
)
73+
74+
to_untriage = (
75+
Bucket.objects.filter(triage_status=Bucket.TriageStatus.INCOMPLETE)
76+
.annotate(max_probability_at_triage=Coalesce(max_probability_at_triage, 0.0))
77+
.annotate(has_better_new_report=has_better_new_report)
78+
.filter(has_better_new_report=True)
79+
)
80+
81+
ids = list(to_untriage.values_list('id', flat=True))
82+
untriaged_count = Bucket.objects.filter(id__in=ids).update(
83+
triage_status=None, triaged_at=None
84+
)
85+
86+
for bucket_id in ids:
87+
LOG.info(f"Auto-untriaged bucket {bucket_id} (incomplete): better quality report")
88+
89+
return untriaged_count
90+
91+
def unset_status_if_spike(self):
92+
"""Untriage buckets with any status experiencing spikes."""
93+
94+
has_new_reports = Exists(
95+
ReportEntry.objects.filter(
96+
bucket=OuterRef('pk'),
97+
reported_at__gt=OuterRef('triaged_at'),
98+
).exclude(comments="")
99+
)
100+
triaged_buckets = Bucket.objects.filter(
101+
triage_status__isnull=False
102+
).annotate(has_new_reports=has_new_reports).filter(has_new_reports=True)
103+
104+
untriaged_count = 0
105+
106+
end_date = timezone.now().date()
107+
short_window_start = end_date - timedelta(days=SHORT_WINDOW - 1)
108+
long_window_start = end_date - timedelta(days=LONG_WINDOW - 1)
109+
110+
print(short_window_start, long_window_start)
111+
112+
for bucket in triaged_buckets:
113+
reports = ReportEntry.objects.filter(
114+
bucket=bucket,
115+
reported_at__date__gte=long_window_start,
116+
reported_at__date__lte=end_date,
117+
).exclude(comments="")
118+
119+
short_count = reports.filter(
120+
reported_at__date__gte=short_window_start
121+
).count()
122+
123+
long_count = reports.count()
124+
125+
if long_count < MIN_TOTAL_REPORTS or short_count < MIN_SHORT_WINDOW_REPORTS:
126+
continue
127+
128+
# Calculate averages
129+
short_avg = short_count / SHORT_WINDOW
130+
long_avg = long_count / LONG_WINDOW
131+
132+
if long_avg == 0:
133+
continue
134+
135+
# Calculate spike ratio
136+
ratio = short_avg / long_avg
137+
138+
print(ratio, short_avg, long_avg)
139+
140+
# Auto-untriage if spike detected
141+
if ratio >= SPIKE_THRESHOLD:
142+
status = bucket.triage_status
143+
bucket.triage_status = None
144+
bucket.triaged_at = None
145+
bucket.save(update_fields=['triage_status', 'triaged_at'])
146+
untriaged_count += 1
147+
LOG.info(
148+
f"Auto-untriaged bucket {bucket.id} ({status}): "
149+
f"spike detected (ratio={ratio:.2f}, {short_count}/{long_count} reports)"
150+
)
151+
152+
return untriaged_count

server/server/settings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,10 @@ def resolver_context_processor(request):
311311
"task": "reportmanager.cron.cleanup_old_reports",
312312
"schedule": 30 * 60,
313313
},
314+
"Unset triage status for buckets every 6 hours": {
315+
"task": "reportmanager.cron.unset_buckets_triage_status",
316+
"schedule": 60 * 60 * 6,
317+
},
314318
"Backfill missing report data evry 12 hours": {
315319
"task": "reportmanager.cron.backfill_missing_report_data",
316320
"schedule": 60 * 60 * 12,

0 commit comments

Comments
 (0)