RolnickLab · mihow · May 27, 2026 · May 22, 2026 · May 22, 2026 · May 26, 2026
diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py
@@ -588,13 +588,27 @@ def get_taxa(self, obj):
         return [{"id": taxon.id, "name": taxon.name} for taxon in obj.taxa.all()]
 
 
+def agreement_requested(request: Request | None) -> bool:
+    """Whether ``with_agreement=true`` is set, gating the heavier agreed_exact_count."""
+    if request is None:
+        return False
+    value = request.query_params.get("with_agreement", "")
+    return str(value).lower() in ("true", "1", "yes", "on")
+
+
 class TaxonListSerializer(DefaultSerializer):
     # latest_detection = DetectionNestedSerializer(read_only=True)
     occurrences = serializers.SerializerMethodField()
     parents = TaxonParentSerializer(many=True, read_only=True, source="parents_json")
     parent_id = serializers.PrimaryKeyRelatedField(queryset=Taxon.objects.all(), source="parent")
     tags = serializers.SerializerMethodField()
 
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # agreed_exact_count is a gated annotation: omit it unless with_agreement=true.
+        if not agreement_requested(self.context.get("request")):
+            self.fields.pop("agreed_exact_count", None)
+
     def get_tags(self, obj):
         tag_list = getattr(obj, "prefetched_tags", [])
         return TagSerializer(tag_list, many=True, context=self.context).data
@@ -609,6 +623,9 @@ class Meta:
             "parents",
             "details",
             "occurrences_count",
+            "verified_count",
+            "agreed_with_prediction_count",
+            "agreed_exact_count",
             "occurrences",
             "tags",
             "last_detected",
@@ -886,6 +903,9 @@ class Meta:
             "parents",
             "details",
             "occurrences_count",
+            "verified_count",
+            "agreed_with_prediction_count",
+            "agreed_exact_count",
             "events_count",
             "occurrences",
             "gbif_taxon_key",

diff --git a/ami/main/api/views.py b/ami/main/api/views.py
@@ -6,7 +6,6 @@
 from django.core import exceptions
 from django.db import models
 from django.db.models import OuterRef, Prefetch, Q, Subquery
-from django.db.models.functions import Coalesce
 from django.db.models.query import QuerySet
 from django.forms import BooleanField, CharField, IntegerField
 from django.shortcuts import get_object_or_404
@@ -37,6 +36,8 @@
 from ami.utils.storages import ConnectionTestResult
 
 from ..models import (
+    BEST_IDENTIFICATION_ORDER,
+    BEST_MACHINE_PREDICTION_ORDER,
     NULL_DETECTIONS_FILTER,
     Classification,
     Deployment,
@@ -1026,7 +1027,9 @@ class CustomOccurrenceDeterminationFilter(CustomTaxonFilter):
     def filter_queryset(self, request, queryset, view):
         taxon = self.get_filter_taxon(request, query_params=self.query_params)
         if taxon:
-            # Here the queryset is the Occurrence queryset
+            # Here the queryset is the Occurrence queryset.
+            # The literal parents_json containment (constant RHS) is what the GIN index from
+            # migration 0087 serves — this hierarchical taxon filter is the index's main consumer.
             return queryset.filter(
                 models.Q(determination=taxon) | models.Q(determination__parents_json__contains=[{"id": taxon.pk}])
             )
@@ -1477,6 +1480,7 @@ class TaxonViewSet(DefaultViewSet, ProjectMixin):
         "created_at",
         "updated_at",
         "occurrences_count",
+        "verified_count",
         "last_detected",
         "best_determination_score",
         "name",
@@ -1634,10 +1638,8 @@ def get_taxa_observed(
         If a project is passed, only return taxa that have been observed.
         Also add the number of occurrences and the last time it was detected.
 
-        Uses efficient subqueries with default filters applied directly via Q objects
-        to leverage composite indexes on (determination_id, project_id, event_id, determination_score).
-        This avoids the N+1 query problem by building a single Q filter that can be reused
-        across all subqueries.
+        Counts are computed by annotate_taxon_counts from one filtered occurrence set
+        (occurrence_filters + the project's default filters), not per-taxon subqueries.
         """
         occurrence_filters = self.get_occurrence_filters(project)
 
@@ -1654,54 +1656,181 @@ def get_taxa_observed(
             apply_default_taxa_filter=apply_default_taxa_filter,
         )
 
-        # Combine base occurrence filters with default filters
-        base_filter = models.Q(
+        qs = self.annotate_taxon_counts(
+            qs,
             occurrence_filters,
-            determination_id=models.OuterRef("id"),
+            default_filters_q,
+            restrict_to_observed=not include_unobserved,
         )
 
-        base_filter = base_filter & default_filters_q
+        return qs
 
-        # Count occurrences - uses composite index (determination_id, project_id, event_id, determination_score)
-        occurrences_count_subquery = models.Subquery(
-            Occurrence.objects.filter(base_filter)
-            .values("determination_id")
-            .annotate(count=models.Count("id"))
-            .values("count")[:1],
-            output_field=models.IntegerField(),
+    def _include_agreement(self) -> bool:
+        """Whether the heavier ``agreed_exact_count`` annotation should be computed."""
+        if self.action == "retrieve":
+            return True
+        return bool(BooleanField(required=False).clean(self.request.query_params.get("with_agreement")))
+
+    @staticmethod
+    def _case_from_map(mapping: dict, default, output_field: models.Field) -> models.expressions.Combinable:
+        """Turn a precomputed ``{taxon_id: value}`` map into a constant-time ``CASE``.
+
+        The result is constant per row, so it is DB-sortable, paginatable, and stripped
+        from the pagination ``COUNT`` — unlike a per-taxon correlated subquery, which is
+        re-evaluated for every row and (in ``COUNT``) for every taxon in the project.
+        """
+        if not mapping:
+            return models.Value(default, output_field=output_field)
+        return models.Case(
+            *(
+                models.When(id=taxon_id, then=models.Value(value, output_field=output_field))
+                for taxon_id, value in mapping.items()
+            ),
+            default=models.Value(default, output_field=output_field),
+            output_field=output_field,
         )
 
-        # Get best score - uses same composite index
-        best_score_subquery = models.Subquery(
-            Occurrence.objects.filter(base_filter)
-            .values("determination_id")
-            .annotate(max_score=models.Max("determination_score"))
-            .values("max_score")[:1],
-            output_field=models.FloatField(),
-        )
+    def annotate_taxon_counts(
+        self,
+        qs: QuerySet,
+        occurrence_filters: models.Q,
+        default_filters_q: models.Q,
+        *,
+        restrict_to_observed: bool,
+    ) -> QuerySet:
+        """Centralised per-(project, taxon) count annotations for the taxa endpoint.
+
+        Every count is derived from one filtered occurrence set
+        (``occurrence_filters`` + the project default filters) by precomputing
+        ``{taxon_id: value}`` maps in Python and applying them as constant-time ``CASE``
+        annotations (:meth:`_case_from_map`), rather than per-taxon correlated subqueries.
+        The subquery form does not scale once ``occurrence_filters`` joins detections
+        (``?collection=<id>``): each annotation degrades to a per-row scan — 25x on the
+        page, and once per taxon in the unbounded pagination ``COUNT``.
+
+        Two count shapes share the same base queryset:
+
+        - Direct aggregates (``occurrences_count``, ``best_determination_score``,
+          ``last_detected``), keyed by the occurrence's own determination — one GROUP BY.
+          ``Count(distinct)`` dedupes the detections-join fan-out under ``?collection=``.
+        - ``verified_count`` / ``agreed_*`` roll up to ancestors via ``parents_json`` over
+          the (sparse) verified subset — see :meth:`_annotate_verification_counts`.
+
+        The determination ids present in the filtered set are exactly the observed taxa,
+        so ``restrict_to_observed`` reuses them instead of a separate membership query.
+        """
+        base = Occurrence.objects.filter(occurrence_filters).filter(default_filters_q)
+
+        occurrences_count: dict[int, int] = {}
+        best_score: dict[int, float | None] = {}
+        last_detected: dict[int, object] = {}
+        for row in base.values("determination_id").annotate(
+            _n=models.Count("id", distinct=True),
+            _score=models.Max("determination_score"),
+            _last=models.Max("detections__timestamp"),
+        ):
+            determination_id = row["determination_id"]
+            if determination_id is None:
+                continue
+            occurrences_count[determination_id] = row["_n"]
+            best_score[determination_id] = row["_score"]
+            last_detected[determination_id] = row["_last"]
 
-        # Get last detected timestamp - requires join with detections
-        last_detected_subquery = models.Subquery(
-            Occurrence.objects.filter(
-                base_filter,
-                detections__timestamp__isnull=False,
-            )
-            .values("determination_id")
-            .annotate(last_detected=models.Max("detections__timestamp"))
-            .values("last_detected")[:1],
-            output_field=models.DateTimeField(),
+        qs = qs.annotate(
+            occurrences_count=self._case_from_map(occurrences_count, 0, models.IntegerField()),
+            best_determination_score=self._case_from_map(best_score, None, models.FloatField()),
+            last_detected=self._case_from_map(last_detected, None, models.DateTimeField()),
         )
 
-        # Apply annotations
+        if restrict_to_observed:
+            qs = qs.filter(id__in=list(occurrences_count.keys()))
+
+        return self._annotate_verification_counts(qs, base)
+
+    def _annotate_verification_counts(self, qs: QuerySet, base: QuerySet) -> QuerySet:
+        """
+        Annotate per-taxon verification / human-model agreement counts, and apply the
+        ``verified=true|false`` filter on list responses.
+
+        ``base`` is the shared filtered occurrence set from :meth:`annotate_taxon_counts`.
+        Counts roll up descendant occurrences (verifying a species also counts toward its
+        genus/family rows). They only concern *verified* occurrences (those with a
+        non-withdrawn Identification), which are sparse, so the hierarchical rollup is a
+        single Python pass over that small subset applied as constant-time ``CASE``
+        annotations. A correlated ``parents_json`` subquery per taxon does not scale: the
+        GIN index can't serve a containment whose RHS is an ``OuterRef``.
+        """
+        include_agreement = self._include_agreement()
+
+        # The chosen (best, non-withdrawn) identification's agreed_with_prediction FK.
+        best_identification_agreed_prediction = models.Subquery(
+            Identification.objects.filter(occurrence=models.OuterRef("pk"), withdrawn=False)
+            .order_by(*BEST_IDENTIFICATION_ORDER)
+            .values("agreed_with_prediction_id")[:1]
+        )
+        verified_occurrences = base.filter(
+            models.Exists(Identification.objects.filter(occurrence=models.OuterRef("pk"), withdrawn=False))
+        ).annotate(_agreed_prediction_id=best_identification_agreed_prediction)
+        # ``pk`` is selected only so ``.distinct()`` below dedupes by occurrence: when
+        # occurrence_filters joins to detections (e.g. ?collection=<id>), one Occurrence
+        # yields a row per matching Detection, which would otherwise inflate the counts.
+        value_fields = ["pk", "determination_id", "determination__parents_json", "_agreed_prediction_id"]
+        if include_agreement:
+            # Top machine prediction's taxon for the same occurrence.
+            verified_occurrences = verified_occurrences.annotate(
+                _best_machine_taxon_id=models.Subquery(
+                    Classification.objects.filter(detection__occurrence=models.OuterRef("pk"))
+                    .order_by(*BEST_MACHINE_PREDICTION_ORDER)
+                    .values("taxon_id")[:1]
+                )
+            )
+            value_fields.append("_best_machine_taxon_id")
+
+        verified_counts: dict[int, int] = {}
+        agreed_with_prediction_counts: dict[int, int] = {}
+        agreed_exact_counts: dict[int, int] = {}
+        for row in verified_occurrences.values(*value_fields).distinct():
+            determination_id = row["determination_id"]
+            # The taxon itself plus every ancestor — i.e. every row this occurrence rolls up to.
+            taxon_ids: set[int] = set()
+            if determination_id is not None:
+                taxon_ids.add(determination_id)
+            for parent in row["determination__parents_json"] or []:
+                # parents_json round-trips through the pydantic schema field, so elements
+                # may be dicts or ``TaxonParent`` objects depending on the query path.
+                parent_id = parent.get("id") if isinstance(parent, dict) else getattr(parent, "id", None)
+                if parent_id is not None:
+                    taxon_ids.add(int(parent_id))
+
+            for taxon_id in taxon_ids:
+                verified_counts[taxon_id] = verified_counts.get(taxon_id, 0) + 1
+            if row["_agreed_prediction_id"] is not None:
+                for taxon_id in taxon_ids:
+                    agreed_with_prediction_counts[taxon_id] = agreed_with_prediction_counts.get(taxon_id, 0) + 1
+            if (
+                include_agreement
+                and determination_id is not None
+                and determination_id == row["_best_machine_taxon_id"]
+            ):
+                for taxon_id in taxon_ids:
+                    agreed_exact_counts[taxon_id] = agreed_exact_counts.get(taxon_id, 0) + 1
+
+        int_field = models.IntegerField()
         qs = qs.annotate(
-            occurrences_count=Coalesce(occurrences_count_subquery, 0),
-            best_determination_score=best_score_subquery,
-            last_detected=last_detected_subquery,
+            verified_count=self._case_from_map(verified_counts, 0, int_field),
+            agreed_with_prediction_count=self._case_from_map(agreed_with_prediction_counts, 0, int_field),
         )
+        if include_agreement:
+            qs = qs.annotate(agreed_exact_count=self._case_from_map(agreed_exact_counts, 0, int_field))
 
-        if not include_unobserved:
-            # Efficient EXISTS check that uses the composite index
-            qs = qs.filter(models.Exists(Occurrence.objects.filter(base_filter)))
+        # verified=true|false filter (list only); verified=false is the strict complement.
+        if self.action == "list" and "verified" in self.request.query_params:
+            verified = BooleanField(required=False).clean(self.request.query_params.get("verified"))
+            verified_taxon_ids = list(verified_counts.keys())
+            if verified:
+                qs = qs.filter(id__in=verified_taxon_ids)
+            else:
+                qs = qs.exclude(id__in=verified_taxon_ids)
 
         return qs
 

diff --git a/ami/main/migrations/0087_taxon_parents_json_gin_index.py b/ami/main/migrations/0087_taxon_parents_json_gin_index.py
@@ -0,0 +1,36 @@
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+    """
+    GIN index on Taxon.parents_json to support the hierarchical (descendant) taxon
+    filters that issue a literal `parents_json @> [{"id": <id>}]` containment: the
+    occurrence-list `taxon=<id>` filter (CustomOccurrenceDeterminationFilter) and the
+    project default-taxa filter (build_occurrence_default_filters_q). The index applies
+    to these because the right-hand side is a constant.
+
+    Note it does NOT back the #1316 per-taxon verification / agreement rollup: that is
+    computed in a single Python pass over the (sparse) verified-occurrence set rather
+    than a correlated subquery, because a containment whose RHS is an OuterRef can't use
+    the index. See TaxonViewSet._annotate_verification_counts.
+
+    CREATE INDEX CONCURRENTLY can't run inside a transaction, so this migration is
+    non-atomic. IF NOT EXISTS keeps it safe to co-exist with the same index if it lands
+    separately via the #1307 follow-up.
+    """
+
+    atomic = False
+
+    dependencies = [
+        ("main", "0086_sourceimage_recent_capture_index"),
+    ]
+
+    operations = [
+        migrations.RunSQL(
+            sql=(
+                "CREATE INDEX CONCURRENTLY IF NOT EXISTS main_taxon_parents_json_gin_idx "
+                "ON main_taxon USING gin (parents_json jsonb_path_ops);"
+            ),
+            reverse_sql="DROP INDEX CONCURRENTLY IF EXISTS main_taxon_parents_json_gin_idx;",
+        ),
+    ]
diff --git a/ami/main/models.py b/ami/main/models.py
@@ -3817,6 +3817,18 @@ def best_determination_score(self) -> float | None:
         # This is handled by an annotation if we are filtering by project, deployment or event
         return None
 
+    def verified_count(self) -> int | None:
+        # Handled by an annotation when filtering by project (TaxonViewSet.annotate_taxon_counts)
+        return None
+
+    def agreed_with_prediction_count(self) -> int | None:
+        # Handled by an annotation when filtering by project (TaxonViewSet.annotate_taxon_counts)
+        return None
+
+    def agreed_exact_count(self) -> int | None:
+        # Handled by an annotation only when with_agreement is requested or on the detail view
+        return None
+
     def occurrence_images(self, limit: int | None = 10) -> list[str]:
         # This is handled by an annotation if we are filtering by project, deployment or event
         return []