RolnickLab · mihow · Jun 4, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py
@@ -1098,6 +1098,8 @@ class SourceImageListSerializer(DefaultSerializer):
     deployment = DeploymentNestedSerializer(read_only=True)
     event = EventNestedSerializer(read_only=True)
     project = serializers.PrimaryKeyRelatedField(queryset=Project.objects.all(), required=False)
+    # Annotated in SourceImageViewSet.get_queryset (latest detection created_at).
+    last_processed = serializers.DateTimeField(read_only=True)
     # file = serializers.ImageField(allow_empty_file=False, use_url=True)
 
     class Meta:
@@ -1118,6 +1120,7 @@ class Meta:
             "detections_count",
             "occurrences_count",
             "taxa_count",
+            "last_processed",
             "detections",
             "project",
         ]

diff --git a/ami/main/api/views.py b/ami/main/api/views.py
@@ -315,6 +315,7 @@ def get_queryset(self) -> QuerySet:
         project = self.get_active_project()
         if project:
             qs = qs.filter(project=project)
+
         num_example_captures = 10
         if self.action == "retrieve":
             qs = qs.prefetch_related(
@@ -561,6 +562,7 @@ class SourceImageViewSet(DefaultViewSet, ProjectMixin):
         "deployment__name",
         "event__start",
         "path",
+        "last_processed",
     ]
     permission_classes = [ObjectPermission]
 
@@ -597,13 +599,16 @@ def get_queryset(self) -> QuerySet:
 
         if self.action == "list":
             # It's cumbersome to override the default list view, so customize the queryset here
+            queryset = self.filter_by_processed(queryset)
             queryset = self.filter_by_has_detections(queryset)
+            queryset = self.annotate_last_processed(queryset)
 
         elif self.action == "retrieve":
             # For detail view, include storage info and additional prefetches
             with_counts_default = True
             queryset = queryset.prefetch_related("jobs", "collections")
             queryset = self.add_adjacent_captures(queryset)
+            queryset = self.annotate_last_processed(queryset)
             with_detections_default = True
 
         with_detections = self.request.query_params.get("with_detections", with_detections_default)
@@ -627,15 +632,62 @@ def get_queryset(self) -> QuerySet:
 
         return queryset
 
+    def filter_by_processed(self, queryset: QuerySet) -> QuerySet:
+        """
+        Filter by whether a capture has been processed by a detection pipeline.
+
+        "Processed" means the capture has *any* Detection row, including the null
+        markers (``NULL_DETECTIONS_FILTER``) that record a "processed, found nothing"
+        result. This mirrors how the capture set list separates the processed count
+        from the (real) detections count. Use ``has_detections`` to filter on real
+        detections only.
+
+        Reuses the ``with_was_processed`` queryset annotation so the "processed"
+        definition stays in one place.
+        """
+        processed = self.request.query_params.get("processed")
+        if processed is not None:
+            processed = BooleanField(required=False).clean(processed)
+            queryset = queryset.with_was_processed().filter(was_processed=processed)
+        return queryset
+
     def filter_by_has_detections(self, queryset: QuerySet) -> QuerySet:
+        """
+        Filter by whether a capture has any *real* detections (a detection with a
+        bounding box). Null detection markers are excluded, so a capture that was
+        processed but yielded nothing returns ``has_detections=false``. Use the
+        ``processed`` param to filter on processing status regardless of findings.
+        """
         has_detections = self.request.query_params.get("has_detections")
         if has_detections is not None:
             has_detections = BooleanField(required=False).clean(has_detections)
             queryset = queryset.annotate(
-                has_detections=models.Exists(Detection.objects.filter(source_image=models.OuterRef("pk"))),
+                has_detections=models.Exists(
+                    Detection.objects.filter(source_image=models.OuterRef("pk")).exclude(NULL_DETECTIONS_FILTER)
+                ),
             ).filter(has_detections=has_detections)
         return queryset
 
+    def annotate_last_processed(self, queryset: QuerySet) -> QuerySet:
+        """
+        Annotate each capture with ``last_processed`` — the most recent detection
+        ``created_at`` for that capture, i.e. when it was last run through a
+        detection pipeline. Null when the capture has never been processed;
+        NullsLastOrderingFilter sorts those last.
+
+        A correlated subquery (rather than a join + Max) keeps the row count stable
+        for pagination. The supporting index on Detection(source_image, -created_at)
+        makes the per-row lookup an index scan, so this stays cheap without
+        denormalizing a timestamp onto SourceImage.
+        """
+        return queryset.annotate(
+            last_processed=models.Subquery(
+                Detection.objects.filter(source_image=models.OuterRef("pk"))
+                .order_by("-created_at")
+                .values("created_at")[:1]
+            )
+        )
+
     def prefetch_detections(self, queryset: QuerySet, project: Project | None = None) -> QuerySet:
         """
         Return all detections for source images, but only include occurrence data

diff --git a/ami/main/migrations/0088_detection_det_srcimg_created_idx.py b/ami/main/migrations/0088_detection_det_srcimg_created_idx.py
@@ -0,0 +1,16 @@
+# Generated by Django 4.2.10 on 2026-05-29 12:14
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("main", "0087_taxon_parents_json_gin_index"),
+    ]
+
+    operations = [
+        migrations.AddIndex(
+            model_name="detection",
+            index=models.Index(fields=["source_image", "-created_at"], name="det_srcimg_created_idx"),
+        ),
+    ]
diff --git a/ami/main/models.py b/ami/main/models.py
@@ -2852,6 +2852,11 @@ class Meta:
             "frame_num",
             "timestamp",
         ]
+        indexes = [
+            # Supports the "last processed" subquery on the captures list: the
+            # latest detection created_at per source image (index scan, top 1).
+            models.Index(fields=["source_image", "-created_at"], name="det_srcimg_created_idx"),
+        ]
 
     def best_classification(self):
         # @TODO where is this used?

diff --git a/ami/main/tests.py b/ami/main/tests.py
@@ -38,7 +38,13 @@
 from ami.ml.models.pipeline import Pipeline
 from ami.ml.models.processing_service import ProcessingService
 from ami.ml.models.project_pipeline_config import ProjectPipelineConfig
-from ami.tests.fixtures.main import create_captures, create_occurrences, create_taxa, setup_test_project
+from ami.tests.fixtures.main import (
+    create_captures,
+    create_detections,
+    create_occurrences,
+    create_taxa,
+    setup_test_project,
+)
 from ami.tests.fixtures.storage import populate_bucket
 from ami.users.models import User
 from ami.users.roles import BasicMember, Identifier, MLDataManager, ProjectManager, create_roles_for_project
@@ -1390,6 +1396,89 @@ def test_unrelated_list_endpoints_still_work_without_project_id(self):
                 self.assertEqual(response.status_code, status.HTTP_200_OK, path)
 
 
+class TestCapturesProcessedFilter(APITestCase):
+    """
+    The captures list distinguishes two related filters:
+
+    - ``?processed=true|false`` (the UI "Processing status" filter): a capture is
+      "processed" when it has *any* Detection row, including the null markers that
+      record a "processed, found nothing" result.
+    - ``?has_detections=true|false``: a capture has *real* detections (a detection
+      with a bounding box). Null markers are excluded.
+
+    Fixture: 4 captures — 2 with a real detection, 1 with only a null marker
+    (processed but found nothing), 1 untouched. So:
+        processed=true       -> 3   has_detections=true  -> 2
+        processed=false      -> 1   has_detections=false -> 2
+    """
+
+    def setUp(self) -> None:
+        self.project, self.deployment = setup_test_project(reuse=False)
+        self.captures = create_captures(self.deployment, num_nights=1, images_per_night=4)
+        # Two captures get a real detection (bounding box present).
+        for capture in self.captures[:2]:
+            create_detections(capture, bboxes=[(0.1, 0.1, 0.2, 0.2)])
+        # One capture gets only a null marker: processed, but nothing found.
+        Detection.objects.create(
+            source_image=self.captures[2],
+            bbox=None,
+            timestamp=self.captures[2].timestamp,
+        )
+        # self.captures[3] is left untouched (never processed).
+        self.user = User.objects.create_user(email="proc-filter@insectai.org", is_staff=True)  # type: ignore
+        self.client.force_authenticate(user=self.user)
+        self.list_url = f"/api/v2/captures/?project_id={self.project.pk}"
+        return super().setUp()
+
+    def _count(self, query: str = "") -> int:
+        response = self.client.get(f"{self.list_url}{query}")
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        return response.json()["count"]
+
+    def test_processed_counts_null_markers(self):
+        # The null-marker capture counts as processed (2 real + 1 marker); its
+        # complement is the single untouched capture.
+        self.assertEqual(self._count("&processed=true"), 3)
+        self.assertEqual(self._count("&processed=false"), 1)
+
+    def test_has_detections_excludes_null_markers(self):
+        # Only the 2 real-detection captures; the processed-but-empty capture
+        # falls on the has_detections=false side.
+        self.assertEqual(self._count("&has_detections=true"), 2)
+        self.assertEqual(self._count("&has_detections=false"), 2)
+
+
+class TestCapturesLastProcessed(APITestCase):
+    """
+    The captures list annotates and can order by ``last_processed`` — the most
+    recent detection created_at for each capture. Captures that were never
+    processed expose ``last_processed = None``.
+    """
+
+    def setUp(self) -> None:
+        self.project, self.deployment = setup_test_project(reuse=False)
+        self.captures = create_captures(self.deployment, num_nights=1, images_per_night=2)
+        # First capture is processed (has a detection); the second is left untouched.
+        create_detections(self.captures[0], bboxes=[(0.1, 0.1, 0.2, 0.2)])
+        self.user = User.objects.create_user(email="cap-lastproc@insectai.org", is_staff=True)  # type: ignore
+        self.client.force_authenticate(user=self.user)
+        self.url = f"/api/v2/captures/?project_id={self.project.pk}"
+        return super().setUp()
+
+    def _row(self, data: dict, capture_id: int) -> dict:
+        return next(c for c in data["results"] if c["id"] == capture_id)
+
+    def test_last_processed_annotated_and_orderable(self):
+        # One request exercises the annotation, the serializer field, and the
+        # ordering registration together.
+        response = self.client.get(f"{self.url}&ordering=-last_processed")
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+        data = response.json()
+        # Processed capture has a timestamp; the untouched one is null.
+        self.assertIsNotNone(self._row(data, self.captures[0].pk)["last_processed"])
+        self.assertIsNone(self._row(data, self.captures[1].pk)["last_processed"])
+
+
 class TestProjectOwnerAutoAssignment(APITestCase):
     def setUp(self) -> None:
         self.user_1 = User.objects.create_user(email="testuser@insectai.org", is_staff=True, is_superuser=True)

diff --git a/docs/claude/planning/2026-05-28-captures-processed-filter-design.md b/docs/claude/planning/2026-05-28-captures-processed-filter-design.md
@@ -0,0 +1,80 @@
+# Captures list — "Processed / Not processed" filter
+
+Date: 2026-05-28
+Status: design approved, pending spec review
+Scope: first of several planned captures-list filters; this PR ships the processed filter only.
+
+## Goal
+
+Add a "Processing status" filter to the Captures (SourceImage) list view, letting users
+narrow to captures that have been processed, not processed, or all (no filter). Lay the
+groundwork (a planned filter set) for additional filters in later PRs.
+
+"Processed" = the image has been run through detection. Because PR #1093 writes a null
+Detection marker for the "processed, found nothing" case, the presence of *any* Detection
+row is an accurate signal of "was processed."
+
+## Backend — no change required
+
+The filter already exists and is exercised by the list endpoint:
+
+- `ami/main/api/views.py:630-636` — `SourceImageViewSet.filter_by_has_detections`
+  handles `?has_detections=true|false` by annotating
+  `Exists(Detection.objects.filter(source_image=OuterRef("pk")))` and filtering on it.
+  (`SourceImageViewSet` at `views.py:528`.)
+- Called from `get_queryset` only for the `list` action (`views.py:600`), which is what
+  the captures list uses.
+
+Decision: reuse the existing `has_detections` query param. Zero backend change, already
+tested behavior. The param name (`has_detections`) means "was processed" because of the
+null-marker convention; we surface it to users with the label "Processing status" and keep
+`has_detections` as the internal query key. This name/meaning gap is the one known wart and
+is documented here rather than fixed (a `was_processed` alias was considered and rejected to
+avoid extra surface area).
+
+## Frontend — four wiring changes
+
+1. **New component** `ui/src/components/filtering/filters/processing-status-filter.tsx`.
+   Model on `verification-status-filter.tsx`. Two options: "Processed" (true) /
+   "Not processed" (false). Wire `onValueChange={onAdd}` directly so both true and false
+   are settable. (The generic `BooleanFilter` is unusable here: its "No" branch calls
+   `onClear()` instead of filtering to false — see `boolean-filter.tsx:21-27`.)
+   Use a translated label string for the two options (add to `utils/language` if needed).
+
+2. **Register the component** in `ui/src/components/filtering/filter-control.tsx`
+   `ComponentMap`: `has_detections: ProcessingStatusFilter`.
+
+3. **Register the filter** in `ui/src/utils/useFilters.ts` `AVAILABLE_FILTERS`:
+   `{ label: 'Processing status', field: 'has_detections', tooltip: { text: ... } }`.
+
+4. **Render it** on the captures page `ui/src/pages/captures/captures.tsx` (inside the
+   existing `FilterSection`, alongside `deployment` and `collections`):
+   `<FilterControl field="has_detections" />`.
+
+State, URL params, page reset, and the clear-X ("All") behavior all come from the existing
+`useFilters` machinery — no changes there.
+
+## Data flow
+
+UI select -> `addFilter('has_detections', 'true'|'false')` -> URL search param ->
+`useFilters` -> `useCaptures` builds `?has_detections=...` via `getFetchUrl`
+(`ui/src/data-services/utils.ts`) -> DRF `filter_by_has_detections` -> filtered queryset.
+Clear-X removes the param -> "All".
+
+## Testing
+
+- Backend: verify existing coverage for `?has_detections=true|false` on the captures list
+  endpoint; add a test if missing (both branches + absent param).
+- Frontend: manual verification against the running stack — select Processed, Not processed,
+  and clear; confirm result counts change and the URL param round-trips.
+
+## Out of scope (planned follow-up PRs)
+
+To live in a collapsible "Advanced" `FilterSection` on the captures page later:
+
+- **Date range** — `date_start`/`date_end` already in the FE registry with a `DateFilter`
+  component, but the SourceImage viewset needs backend support mapping them to a `timestamp`
+  range (new work).
+- **Station** — already available via the existing `deployment` filter.
+- **Site** — add `deployment__research_site` to `filterset_fields` + a Site filter component.
+- **Device** — add `deployment__device` to `filterset_fields` + a Device filter component.