Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ami/main/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1098,6 +1098,8 @@ class SourceImageListSerializer(DefaultSerializer):
deployment = DeploymentNestedSerializer(read_only=True)
event = EventNestedSerializer(read_only=True)
project = serializers.PrimaryKeyRelatedField(queryset=Project.objects.all(), required=False)
# Annotated in SourceImageViewSet.get_queryset (latest detection created_at).
last_processed = serializers.DateTimeField(read_only=True)
# file = serializers.ImageField(allow_empty_file=False, use_url=True)

class Meta:
Expand All @@ -1118,6 +1120,7 @@ class Meta:
"detections_count",
"occurrences_count",
"taxa_count",
"last_processed",
"detections",
"project",
]
Expand Down
54 changes: 53 additions & 1 deletion ami/main/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ def get_queryset(self) -> QuerySet:
project = self.get_active_project()
if project:
qs = qs.filter(project=project)

num_example_captures = 10
if self.action == "retrieve":
qs = qs.prefetch_related(
Expand Down Expand Up @@ -561,6 +562,7 @@ class SourceImageViewSet(DefaultViewSet, ProjectMixin):
"deployment__name",
"event__start",
"path",
"last_processed",
]
permission_classes = [ObjectPermission]

Expand Down Expand Up @@ -597,13 +599,16 @@ def get_queryset(self) -> QuerySet:

if self.action == "list":
# It's cumbersome to override the default list view, so customize the queryset here
queryset = self.filter_by_processed(queryset)
queryset = self.filter_by_has_detections(queryset)
queryset = self.annotate_last_processed(queryset)

elif self.action == "retrieve":
# For detail view, include storage info and additional prefetches
with_counts_default = True
queryset = queryset.prefetch_related("jobs", "collections")
queryset = self.add_adjacent_captures(queryset)
queryset = self.annotate_last_processed(queryset)
with_detections_default = True

with_detections = self.request.query_params.get("with_detections", with_detections_default)
Expand All @@ -627,15 +632,62 @@ def get_queryset(self) -> QuerySet:

return queryset

def filter_by_processed(self, queryset: QuerySet) -> QuerySet:
"""
Filter by whether a capture has been processed by a detection pipeline.

"Processed" means the capture has *any* Detection row, including the null
markers (``NULL_DETECTIONS_FILTER``) that record a "processed, found nothing"
result. This mirrors how the capture set list separates the processed count
from the (real) detections count. Use ``has_detections`` to filter on real
detections only.

Reuses the ``with_was_processed`` queryset annotation so the "processed"
definition stays in one place.
"""
processed = self.request.query_params.get("processed")
if processed is not None:
processed = BooleanField(required=False).clean(processed)
queryset = queryset.with_was_processed().filter(was_processed=processed)
return queryset

def filter_by_has_detections(self, queryset: QuerySet) -> QuerySet:
"""
Filter by whether a capture has any *real* detections (a detection with a
bounding box). Null detection markers are excluded, so a capture that was
processed but yielded nothing returns ``has_detections=false``. Use the
``processed`` param to filter on processing status regardless of findings.
"""
has_detections = self.request.query_params.get("has_detections")
if has_detections is not None:
has_detections = BooleanField(required=False).clean(has_detections)
queryset = queryset.annotate(
has_detections=models.Exists(Detection.objects.filter(source_image=models.OuterRef("pk"))),
has_detections=models.Exists(
Detection.objects.filter(source_image=models.OuterRef("pk")).exclude(NULL_DETECTIONS_FILTER)
),
).filter(has_detections=has_detections)
return queryset

def annotate_last_processed(self, queryset: QuerySet) -> QuerySet:
"""
Annotate each capture with ``last_processed`` — the most recent detection
``created_at`` for that capture, i.e. when it was last run through a
detection pipeline. Null when the capture has never been processed;
NullsLastOrderingFilter sorts those last.

A correlated subquery (rather than a join + Max) keeps the row count stable
for pagination. The supporting index on Detection(source_image, -created_at)
makes the per-row lookup an index scan, so this stays cheap without
denormalizing a timestamp onto SourceImage.
"""
return queryset.annotate(
last_processed=models.Subquery(
Detection.objects.filter(source_image=models.OuterRef("pk"))
.order_by("-created_at")
.values("created_at")[:1]
)
)

def prefetch_detections(self, queryset: QuerySet, project: Project | None = None) -> QuerySet:
"""
Return all detections for source images, but only include occurrence data
Expand Down
16 changes: 16 additions & 0 deletions ami/main/migrations/0088_detection_det_srcimg_created_idx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by Django 4.2.10 on 2026-05-29 12:14

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("main", "0087_taxon_parents_json_gin_index"),
]

operations = [
migrations.AddIndex(
model_name="detection",
index=models.Index(fields=["source_image", "-created_at"], name="det_srcimg_created_idx"),
),
]
5 changes: 5 additions & 0 deletions ami/main/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2852,6 +2852,11 @@ class Meta:
"frame_num",
"timestamp",
]
indexes = [
# Supports the "last processed" subquery on the captures list: the
# latest detection created_at per source image (index scan, top 1).
models.Index(fields=["source_image", "-created_at"], name="det_srcimg_created_idx"),
]

def best_classification(self):
# @TODO where is this used?
Expand Down
91 changes: 90 additions & 1 deletion ami/main/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@
from ami.ml.models.pipeline import Pipeline
from ami.ml.models.processing_service import ProcessingService
from ami.ml.models.project_pipeline_config import ProjectPipelineConfig
from ami.tests.fixtures.main import create_captures, create_occurrences, create_taxa, setup_test_project
from ami.tests.fixtures.main import (
create_captures,
create_detections,
create_occurrences,
create_taxa,
setup_test_project,
)
from ami.tests.fixtures.storage import populate_bucket
from ami.users.models import User
from ami.users.roles import BasicMember, Identifier, MLDataManager, ProjectManager, create_roles_for_project
Expand Down Expand Up @@ -1390,6 +1396,89 @@ def test_unrelated_list_endpoints_still_work_without_project_id(self):
self.assertEqual(response.status_code, status.HTTP_200_OK, path)


class TestCapturesProcessedFilter(APITestCase):
"""
The captures list distinguishes two related filters:

- ``?processed=true|false`` (the UI "Processing status" filter): a capture is
"processed" when it has *any* Detection row, including the null markers that
record a "processed, found nothing" result.
- ``?has_detections=true|false``: a capture has *real* detections (a detection
with a bounding box). Null markers are excluded.

Fixture: 4 captures — 2 with a real detection, 1 with only a null marker
(processed but found nothing), 1 untouched. So:
processed=true -> 3 has_detections=true -> 2
processed=false -> 1 has_detections=false -> 2
"""

def setUp(self) -> None:
self.project, self.deployment = setup_test_project(reuse=False)
self.captures = create_captures(self.deployment, num_nights=1, images_per_night=4)
# Two captures get a real detection (bounding box present).
for capture in self.captures[:2]:
create_detections(capture, bboxes=[(0.1, 0.1, 0.2, 0.2)])

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Use pixel-space bbox fixtures in these new detection rows.

These new tests use normalized-looking bbox values, but this repo’s detection bbox convention is pixel coordinates. Please switch to integer pixel boxes to match canonical behavior and avoid brittle assumptions in future validation paths.

💡 Proposed test fixture update
-            create_detections(capture, bboxes=[(0.1, 0.1, 0.2, 0.2)])
+            create_detections(capture, bboxes=[(10, 10, 20, 20)])
...
-        create_detections(self.captures[0], bboxes=[(0.1, 0.1, 0.2, 0.2)])
+        create_detections(self.captures[0], bboxes=[(10, 10, 20, 20)])

Based on learnings: Detection.bbox/BoundingBox values in this repo use absolute pixel coordinate space (not normalized [0–1] floats).

Also applies to: 1460-1460

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@ami/main/tests.py` at line 1420, The test is passing normalized [0–1] bbox
values to create_detections but this codebase expects absolute pixel coordinates
(Detection.bbox/BoundingBox in pixel space); update the calls to
create_detections (e.g., the invocation with bboxes=[(0.1, 0.1, 0.2, 0.2)] and
the similar one at the other location) to use integer pixel boxes (x1,y1,x2,y2)
that match the capture fixture dimensions (use actual pixel coordinates or
compute pixels from the fixture size) so downstream validation uses canonical
pixel-space boxes.

# One capture gets only a null marker: processed, but nothing found.
Detection.objects.create(
source_image=self.captures[2],
bbox=None,
timestamp=self.captures[2].timestamp,
)
# self.captures[3] is left untouched (never processed).
self.user = User.objects.create_user(email="proc-filter@insectai.org", is_staff=True) # type: ignore
self.client.force_authenticate(user=self.user)
self.list_url = f"/api/v2/captures/?project_id={self.project.pk}"
return super().setUp()

def _count(self, query: str = "") -> int:
response = self.client.get(f"{self.list_url}{query}")
self.assertEqual(response.status_code, status.HTTP_200_OK)
return response.json()["count"]

def test_processed_counts_null_markers(self):
# The null-marker capture counts as processed (2 real + 1 marker); its
# complement is the single untouched capture.
self.assertEqual(self._count("&processed=true"), 3)
self.assertEqual(self._count("&processed=false"), 1)

def test_has_detections_excludes_null_markers(self):
# Only the 2 real-detection captures; the processed-but-empty capture
# falls on the has_detections=false side.
self.assertEqual(self._count("&has_detections=true"), 2)
self.assertEqual(self._count("&has_detections=false"), 2)


class TestCapturesLastProcessed(APITestCase):
"""
The captures list annotates and can order by ``last_processed`` — the most
recent detection created_at for each capture. Captures that were never
processed expose ``last_processed = None``.
"""

def setUp(self) -> None:
self.project, self.deployment = setup_test_project(reuse=False)
self.captures = create_captures(self.deployment, num_nights=1, images_per_night=2)
# First capture is processed (has a detection); the second is left untouched.
create_detections(self.captures[0], bboxes=[(0.1, 0.1, 0.2, 0.2)])
self.user = User.objects.create_user(email="cap-lastproc@insectai.org", is_staff=True) # type: ignore
self.client.force_authenticate(user=self.user)
self.url = f"/api/v2/captures/?project_id={self.project.pk}"
return super().setUp()

def _row(self, data: dict, capture_id: int) -> dict:
return next(c for c in data["results"] if c["id"] == capture_id)

def test_last_processed_annotated_and_orderable(self):
# One request exercises the annotation, the serializer field, and the
# ordering registration together.
response = self.client.get(f"{self.url}&ordering=-last_processed")
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
# Processed capture has a timestamp; the untouched one is null.
self.assertIsNotNone(self._row(data, self.captures[0].pk)["last_processed"])
self.assertIsNone(self._row(data, self.captures[1].pk)["last_processed"])


class TestProjectOwnerAutoAssignment(APITestCase):
def setUp(self) -> None:
self.user_1 = User.objects.create_user(email="testuser@insectai.org", is_staff=True, is_superuser=True)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Captures list — "Processed / Not processed" filter

Date: 2026-05-28
Status: design approved, pending spec review
Scope: first of several planned captures-list filters; this PR ships the processed filter only.

## Goal

Add a "Processing status" filter to the Captures (SourceImage) list view, letting users
narrow to captures that have been processed, not processed, or all (no filter). Lay the
groundwork (a planned filter set) for additional filters in later PRs.

"Processed" = the image has been run through detection. Because PR #1093 writes a null
Detection marker for the "processed, found nothing" case, the presence of *any* Detection
row is an accurate signal of "was processed."

## Backend — no change required

The filter already exists and is exercised by the list endpoint:

- `ami/main/api/views.py:630-636` — `SourceImageViewSet.filter_by_has_detections`
handles `?has_detections=true|false` by annotating
`Exists(Detection.objects.filter(source_image=OuterRef("pk")))` and filtering on it.
(`SourceImageViewSet` at `views.py:528`.)
- Called from `get_queryset` only for the `list` action (`views.py:600`), which is what
the captures list uses.

Decision: reuse the existing `has_detections` query param. Zero backend change, already
tested behavior. The param name (`has_detections`) means "was processed" because of the
null-marker convention; we surface it to users with the label "Processing status" and keep
`has_detections` as the internal query key. This name/meaning gap is the one known wart and
is documented here rather than fixed (a `was_processed` alias was considered and rejected to
avoid extra surface area).

## Frontend — four wiring changes

1. **New component** `ui/src/components/filtering/filters/processing-status-filter.tsx`.
Model on `verification-status-filter.tsx`. Two options: "Processed" (true) /
"Not processed" (false). Wire `onValueChange={onAdd}` directly so both true and false
are settable. (The generic `BooleanFilter` is unusable here: its "No" branch calls
`onClear()` instead of filtering to false — see `boolean-filter.tsx:21-27`.)
Use a translated label string for the two options (add to `utils/language` if needed).

2. **Register the component** in `ui/src/components/filtering/filter-control.tsx`
`ComponentMap`: `has_detections: ProcessingStatusFilter`.

3. **Register the filter** in `ui/src/utils/useFilters.ts` `AVAILABLE_FILTERS`:
`{ label: 'Processing status', field: 'has_detections', tooltip: { text: ... } }`.

4. **Render it** on the captures page `ui/src/pages/captures/captures.tsx` (inside the
existing `FilterSection`, alongside `deployment` and `collections`):
`<FilterControl field="has_detections" />`.

State, URL params, page reset, and the clear-X ("All") behavior all come from the existing
`useFilters` machinery — no changes there.

## Data flow

UI select -> `addFilter('has_detections', 'true'|'false')` -> URL search param ->
`useFilters` -> `useCaptures` builds `?has_detections=...` via `getFetchUrl`
(`ui/src/data-services/utils.ts`) -> DRF `filter_by_has_detections` -> filtered queryset.
Clear-X removes the param -> "All".

## Testing

- Backend: verify existing coverage for `?has_detections=true|false` on the captures list
endpoint; add a test if missing (both branches + absent param).
- Frontend: manual verification against the running stack — select Processed, Not processed,
and clear; confirm result counts change and the URL param round-trips.

## Out of scope (planned follow-up PRs)

To live in a collapsible "Advanced" `FilterSection` on the captures page later:

- **Date range** — `date_start`/`date_end` already in the FE registry with a `DateFilter`
component, but the SourceImage viewset needs backend support mapping them to a `timestamp`
range (new work).
- **Station** — already available via the existing `deployment` filter.
- **Site** — add `deployment__research_site` to `filterset_fields` + a Site filter component.
- **Device** — add `deployment__device` to `filterset_fields` + a Device filter component.
Loading
Loading