Unset triage status command

ksy36 · ksy36 · commit 66a967f09ad1 · 2026-05-04T14:37:00.000-04:00
diff --git a/server/reportmanager/cron.py b/server/reportmanager/cron.py
@@ -58,6 +58,11 @@ def update_report_stats():
     ReportHit.objects.filter(last_update__lt=old_cutoff).delete()
 
 
+@app.task(ignore_result=True)
+def unset_buckets_triage_status():
+    call_command("unset_buckets_triage_status")
+
+
 @app.task(ignore_result=True)
 def bug_update_status():
     call_command("bug_update_status")
diff --git a/server/reportmanager/management/commands/unset_buckets_triage_status.py b/server/reportmanager/management/commands/unset_buckets_triage_status.py
@@ -0,0 +1,152 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from datetime import timedelta
+from logging import getLogger
+
+from django.core.management import BaseCommand
+from django.db.models import Case, Count, Exists, FloatField, IntegerField, Max, OuterRef, Q, Subquery, When
+from django.db.models.functions import Coalesce
+from django.utils import timezone
+
+from reportmanager.models import Bucket, ReportEntry
+
+LOG = getLogger("reportmanager.autountriage")
+
+SHORT_WINDOW = 2  # days
+LONG_WINDOW = 60  # days
+SPIKE_THRESHOLD = 3  # x times increase
+MIN_TOTAL_REPORTS = 5  # minimum total reports required
+MIN_SHORT_WINDOW_REPORTS = 2  # minimum reports in short window to trigger spike
+
+# Statuses where new data plausibly changes the decision
+SPIKE_SENSITIVE = {
+    Bucket.TriageStatus.INCOMPLETE,
+    Bucket.TriageStatus.WORKS_FOR_ME,
+}
+
+# Statuses where the reason is structural — spikes rarely change anything
+SPIKE_INSENSITIVE = {
+    Bucket.TriageStatus.CANT_TEST,
+    Bucket.TriageStatus.NON_COMPAT,
+    Bucket.TriageStatus.INVALID,
+}
+
+class Command(BaseCommand):
+    help = (
+        "Check triaged buckets and automatically untriage them based on "
+        "quality improvements (incomplete status) or spike detection (all statuses)"
+    )
+
+    def handle(self, *args, **options):
+        LOG.info("Starting auto-untriage check")
+
+        inomplete_untriaged = self.unset_incomplete()
+        spike_untriaged = self.unset_status_if_spike()
+
+        total = inomplete_untriaged + spike_untriaged
+        LOG.info(
+            f"Auto-untriage complete: {inomplete_untriaged} incomplete, "
+            f"{spike_untriaged} spike-based, {total} total"
+        )
+
+    def unset_incomplete(self):
+        """Untriage incomplete buckets if they received with better quality reports."""
+
+        max_probability_at_triage = Subquery(
+            ReportEntry.objects.filter(
+                bucket=OuterRef('pk'),
+                reported_at__lte=OuterRef('triaged_at'),
+            ).order_by().values('bucket').annotate(
+                max_probability_at_triage=Max('ml_valid_probability')
+            ).values('max_probability_at_triage')[:1],
+            output_field=FloatField(),
+        )
+
+        has_better_new_report = Exists(
+            ReportEntry.objects.filter(
+                bucket=OuterRef('pk'),
+                reported_at__gt=OuterRef('triaged_at'),
+                ml_valid_probability__gt=OuterRef('max_probability_at_triage'),
+            )
+        )
+
+        to_untriage = (
+            Bucket.objects.filter(triage_status=Bucket.TriageStatus.INCOMPLETE)
+            .annotate(max_probability_at_triage=Coalesce(max_probability_at_triage, 0.0))
+            .annotate(has_better_new_report=has_better_new_report)
+            .filter(has_better_new_report=True)
+        )
+
+        ids = list(to_untriage.values_list('id', flat=True))
+        untriaged_count = Bucket.objects.filter(id__in=ids).update(
+            triage_status=None, triaged_at=None
+        )
+
+        for bucket_id in ids:
+            LOG.info(f"Auto-untriaged bucket {bucket_id} (incomplete): better quality report")
+
+        return untriaged_count
+
+    def unset_status_if_spike(self):
+        """Untriage buckets with any status experiencing spikes."""
+
+        has_new_reports = Exists(
+            ReportEntry.objects.filter(
+                bucket=OuterRef('pk'),
+                reported_at__gt=OuterRef('triaged_at'),
+            ).exclude(comments="")
+        )
+        triaged_buckets = Bucket.objects.filter(
+            triage_status__isnull=False
+        ).annotate(has_new_reports=has_new_reports).filter(has_new_reports=True)
+
+        untriaged_count = 0
+
+        end_date = timezone.now().date()
+        short_window_start = end_date - timedelta(days=SHORT_WINDOW - 1)
+        long_window_start = end_date - timedelta(days=LONG_WINDOW - 1)
+
+        print(short_window_start, long_window_start)
+
+        for bucket in triaged_buckets:
+            reports = ReportEntry.objects.filter(
+                bucket=bucket,
+                reported_at__date__gte=long_window_start,
+                reported_at__date__lte=end_date,
+            ).exclude(comments="")
+
+            short_count = reports.filter(
+                reported_at__date__gte=short_window_start
+            ).count()
+
+            long_count = reports.count()
+
+            if long_count < MIN_TOTAL_REPORTS or short_count < MIN_SHORT_WINDOW_REPORTS:
+                continue
+
+            # Calculate averages
+            short_avg = short_count / SHORT_WINDOW
+            long_avg = long_count / LONG_WINDOW
+
+            if long_avg == 0:
+                continue
+
+            # Calculate spike ratio
+            ratio = short_avg / long_avg
+
+            print(ratio, short_avg, long_avg)
+
+            # Auto-untriage if spike detected
+            if ratio >= SPIKE_THRESHOLD:
+                status = bucket.triage_status
+                bucket.triage_status = None
+                bucket.triaged_at = None
+                bucket.save(update_fields=['triage_status', 'triaged_at'])
+                untriaged_count += 1
+                LOG.info(
+                    f"Auto-untriaged bucket {bucket.id} ({status}): "
+                    f"spike detected (ratio={ratio:.2f}, {short_count}/{long_count} reports)"
+                )
+
+        return untriaged_count
diff --git a/server/server/settings.py b/server/server/settings.py
@@ -311,6 +311,10 @@ def resolver_context_processor(request):
         "task": "reportmanager.cron.cleanup_old_reports",
         "schedule": 30 * 60,
     },
+    "Unset triage status for buckets every 6 hours": {
+        "task": "reportmanager.cron.unset_buckets_triage_status",
+        "schedule": 60 * 60 * 6,
+    },
     "Backfill missing report data evry 12 hours": {
         "task": "reportmanager.cron.backfill_missing_report_data",
         "schedule": 60 * 60 * 12,
diff --git a/tests/test_unset_buckets_triage_status.py b/tests/test_unset_buckets_triage_status.py