From bbde4eade2a4ea921d63f3a1e5293a1b5dfbdd19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Fri, 19 Jun 2026 11:02:47 +0200
Subject: [PATCH 1/2] feat(admin-cli): add --skip-extension-reconcile to
 run-db-migration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The post-migration step (ensure_vector_extension / ensure_text_search_extension)
runs per tenant schema, each opening a fresh connection and probing the catalog.
Across tens of thousands of tenant schemas this dominates the migration job, even
though the step only does real work when the configured backend
(HINDSIGHT_API_VECTOR_EXTENSION / HINDSIGHT_API_TEXT_SEARCH_EXTENSION) differs from
a schema's existing index/column shape — a rare, operator-driven change that, on
populated tables, refuses and asks you to re-embed anyway.

`run_migrations_for_schemas` already accepts `ensure_extensions`; this just exposes
it on the CLI as `--skip-extension-reconcile` (default off, so behavior is
unchanged) so an operator who has NOT changed the backend can skip the reconcile
on a routine re-migration over many tenants. A backend change still needs a normal
run to reshape the indexes.

Test: parametrized check that the flag threads ensure_extensions through to
run_migrations_for_schemas.
---
 hindsight-api-slim/hindsight_api/admin/cli.py | 18 +++++++++-
 .../tests/test_admin_backup_restore.py        | 33 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/hindsight-api-slim/hindsight_api/admin/cli.py b/hindsight-api-slim/hindsight_api/admin/cli.py
index eac03e502..8403c1102 100644
--- a/hindsight-api-slim/hindsight_api/admin/cli.py
+++ b/hindsight-api-slim/hindsight_api/admin/cli.py
@@ -256,6 +256,7 @@ async def _run_migration(
     schema: str | None = None,
     base_schema: str = DEFAULT_DATABASE_SCHEMA,
     embedding_dimension: int | None = None,
+    ensure_extensions: bool = True,
 ) -> list[str]:
     """Resolve database URL and run migrations for one schema or all discovered schemas."""
     from ..migrations import run_migrations_for_schemas
@@ -292,7 +293,7 @@ async def _run_migration(
         vector_extension=config.vector_extension,
         text_search_extension=config.text_search_extension,
         pg_search_tokenizer=config.text_search_extension_pg_search_tokenizer,
-        ensure_extensions=True,
+        ensure_extensions=ensure_extensions,
     )
 
     return schemas
@@ -311,6 +312,18 @@ def run_db_migration(
         "--embedding-dimension",
         help="Expected embedding dimension to enforce after migrations. Omit to skip dimension sync.",
     ),
+    skip_extension_reconcile: bool = typer.Option(
+        False,
+        "--skip-extension-reconcile",
+        help=(
+            "Skip the post-migration vector / text-search index reconcile. This step only does "
+            "work when the configured backend (HINDSIGHT_API_VECTOR_EXTENSION / "
+            "HINDSIGHT_API_TEXT_SEARCH_EXTENSION) differs from a schema's existing indexes — a "
+            "rare, operator-driven change. Skipping it makes a no-change re-migration over many "
+            "tenant schemas much faster. Only use when you have NOT changed the backend; a "
+            "backend change still needs a normal run to reshape the indexes."
+        ),
+    ),
 ):
     """Run database migrations to the latest version."""
     config = HindsightConfig.from_env()
@@ -324,6 +337,8 @@ def run_db_migration(
         typer.echo(f"Running database migrations for schema: {schema}...")
     else:
         typer.echo("Running database migrations for base schema and all discovered tenant schemas...")
+    if skip_extension_reconcile:
+        typer.echo("Skipping post-migration extension reconcile (--skip-extension-reconcile).")
 
     schemas = asyncio.run(
         _run_migration(
@@ -331,6 +346,7 @@ def run_db_migration(
             schema=schema,
             base_schema=config.database_schema,
             embedding_dimension=embedding_dimension,
+            ensure_extensions=not skip_extension_reconcile,
         )
     )
 
diff --git a/hindsight-api-slim/tests/test_admin_backup_restore.py b/hindsight-api-slim/tests/test_admin_backup_restore.py
index 16dd29ff4..ba371d444 100644
--- a/hindsight-api-slim/tests/test_admin_backup_restore.py
+++ b/hindsight-api-slim/tests/test_admin_backup_restore.py
@@ -547,3 +547,36 @@ def fake_ensure_text_search_extension(
     assert calls["run_migrations"] == [("resolved::postgresql://test", "tenant_demo")]
     assert calls["ensure_vector_extension"] == [("resolved::postgresql://test", "pgvector", "tenant_demo")]
     assert calls["ensure_text_search_extension"] == [("resolved::postgresql://test", "native", "", "tenant_demo")]
+
+
+@pytest.mark.parametrize(
+    ("ensure_extensions", "expected"),
+    [(True, True), (False, False)],
+)
+@pytest.mark.asyncio
+async def test_run_migration_threads_ensure_extensions_flag(monkeypatch, ensure_extensions, expected):
+    """The --skip-extension-reconcile flag (ensure_extensions=False) must reach run_migrations_for_schemas.
+
+    The post-migration vector/text-search reconcile only does work on a backend change, so operators
+    can skip it on a no-change re-migration over many tenant schemas. Verify the flag is threaded through
+    rather than silently dropped.
+    """
+    monkeypatch.setenv("HINDSIGHT_API_DATABASE_URL", "postgresql://test")
+    captured: dict = {}
+
+    async def fake_resolve_database_url(db_url: str) -> str:
+        return f"resolved::{db_url}"
+
+    def fake_run_migrations_for_schemas(database_url, schemas, **kwargs):
+        captured["ensure_extensions"] = kwargs.get("ensure_extensions")
+
+    monkeypatch.setattr(admin_cli, "load_extension", lambda *args, **kwargs: None)
+    monkeypatch.setattr(admin_cli, "resolve_database_url", fake_resolve_database_url)
+
+    from hindsight_api import migrations as migrations_module
+
+    monkeypatch.setattr(migrations_module, "run_migrations_for_schemas", fake_run_migrations_for_schemas)
+
+    await admin_cli._run_migration("postgresql://test", schema="tenant_demo", ensure_extensions=ensure_extensions)
+
+    assert captured["ensure_extensions"] is expected

From 202c872e40476e5d6008a4de9b36c15a3d384bfb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Fri, 19 Jun 2026 15:40:41 +0200
Subject: [PATCH 2/2] perf(migrations): don't create the unused global
 memory_units vector index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For per-bank vector backends (pgvector / pgvectorscale / vchord) every vector
search is bank + fact_type scoped and served by the per-(bank, fact_type) partial
indexes created at bank-creation time (bank_utils.create_bank_vector_indexes).
The global idx_memory_units_embedding is never chosen by the planner when bank_id
is in the WHERE clause — which is exactly why migration d5e6f7a8b9c0 drops it for
these backends. Verified via EXPLAIN: the semantic query uses idx_mu_emb_* whether
or not the global index exists.

ensure_vector_extension nonetheless recreated that global index on a fresh/empty
schema (the "No embedding index found, will create it" path), adding an unused,
write-amplifying index back to every schema. Skip creating it for per-bank
backends. scann (which genuinely uses a global filtered index) is unaffected.

Test: a fresh pgvector schema run through migrate + ensure_vector_extension ends
up with no idx_memory_units_embedding (fails without the fix).
---
 .../hindsight_api/migrations.py               | 32 ++++-----
 .../test_ensure_vector_no_global_index.py     | 70 +++++++++++++++++++
 2 files changed, 84 insertions(+), 18 deletions(-)
 create mode 100644 hindsight-api-slim/tests/test_ensure_vector_no_global_index.py

diff --git a/hindsight-api-slim/hindsight_api/migrations.py b/hindsight-api-slim/hindsight_api/migrations.py
index d9e637859..d95b07b14 100644
--- a/hindsight-api-slim/hindsight_api/migrations.py
+++ b/hindsight-api-slim/hindsight_api/migrations.py
@@ -686,24 +686,20 @@ def ensure_vector_extension(
 
             if not current_index_info:
                 if table_name == "memory_units" and uses_per_bank_vector_indexes(target_ext):
-                    # Check whether per-bank partial vector indexes already cover this table
-                    # (created by the bank_utils lifecycle — no global index needed in that case)
-                    per_bank_index_count = conn.execute(
-                        text("""
-                            SELECT COUNT(*)
-                            FROM pg_indexes
-                            WHERE schemaname = :schema
-                              AND tablename = :table_name
-                              AND indexname LIKE 'idx_mu_emb_%'
-                        """),
-                        {"schema": schema_name, "table_name": table_name},
-                    ).scalar()
-                    if per_bank_index_count and per_bank_index_count > 0:
-                        logger.debug(
-                            f"No global embedding index on {table_name}, but {per_bank_index_count} "
-                            f"per-bank partial vector indexes exist — skipping global index creation"
-                        )
-                        continue
+                    # Per-bank backends never use a GLOBAL memory_units vector index.
+                    # Every vector search is bank + fact_type scoped and served by the
+                    # per-(bank, fact_type) partial indexes created at bank-creation time
+                    # (bank_utils.create_bank_vector_indexes); the planner never picks a
+                    # global index when bank_id is in the WHERE clause, which is exactly
+                    # why migration d5e6f7a8b9c0 drops it for these backends. So don't
+                    # create one here either — not even on an empty schema with no per-bank
+                    # indexes yet (those are built when the first bank is created). Verified
+                    # via EXPLAIN: the query uses idx_mu_emb_* whether or not the global
+                    # index exists, so creating it is dead weight.
+                    logger.debug(
+                        f"Per-bank vector backend ({target_ext}); skipping global {index_name} creation on {table_name}"
+                    )
+                    continue
                 logger.warning(f"No embedding index found for {table_name}, will create it if safe")
                 mismatched_tables.append((table_name, index_name, None, row_count))
                 continue
diff --git a/hindsight-api-slim/tests/test_ensure_vector_no_global_index.py b/hindsight-api-slim/tests/test_ensure_vector_no_global_index.py
new file mode 100644
index 000000000..fb778b178
--- /dev/null
+++ b/hindsight-api-slim/tests/test_ensure_vector_no_global_index.py
@@ -0,0 +1,70 @@
+"""ensure_vector_extension must not create the (unused) global memory_units index.
+
+For per-bank backends (pgvector / pgvectorscale / vchord) every vector search is
+bank + fact_type scoped and served by the per-(bank, fact_type) partial indexes
+created at bank-creation time. The global `idx_memory_units_embedding` is never
+chosen by the planner (migration d5e6f7a8b9c0 drops it for exactly this reason),
+so the post-migration reconcile must not recreate it on a fresh schema.
+"""
+
+import asyncio
+
+import pytest
+from sqlalchemy import create_engine, text
+
+from hindsight_api._vector_index import uses_per_bank_vector_indexes
+from hindsight_api.config import HindsightConfig
+from hindsight_api.migrations import ensure_vector_extension, run_migrations
+
+
+@pytest.fixture(scope="module")
+def vec_db_url():
+    """A dedicated pg0 instance so the test owns its schema/index state."""
+    from hindsight_api.pg0 import EmbeddedPostgres
+
+    pg0 = EmbeddedPostgres(name="hindsight-vecidx-test", port=5570)
+    loop = asyncio.new_event_loop()
+    try:
+        return loop.run_until_complete(pg0.ensure_running())
+    finally:
+        loop.close()
+
+
+def test_per_bank_backend_does_not_create_global_memory_units_index(vec_db_url):
+    config = HindsightConfig.from_env()
+    vec = config.vector_extension
+    if not uses_per_bank_vector_indexes(vec):
+        pytest.skip(f"backend {vec!r} uses a global vector index by design (no per-bank indexes)")
+
+    schema = "vecidx_fresh"
+
+    engine = create_engine(vec_db_url)
+    try:
+        with engine.connect() as conn:
+            conn.execute(text(f'DROP SCHEMA IF EXISTS "{schema}" CASCADE'))
+            conn.commit()
+    finally:
+        engine.dispose()
+
+    run_migrations(vec_db_url, schema=schema)
+    # Fresh, empty schema (no banks yet) → the reconcile must be a no-op for the
+    # global index, not recreate it.
+    ensure_vector_extension(vec_db_url, vector_extension=vec, schema=schema)
+
+    engine = create_engine(vec_db_url)
+    try:
+        with engine.connect() as conn:
+            global_index_count = conn.execute(
+                text(
+                    "SELECT COUNT(*) FROM pg_indexes "
+                    "WHERE schemaname = :schema AND tablename = 'memory_units' "
+                    "AND indexname = 'idx_memory_units_embedding'"
+                ),
+                {"schema": schema},
+            ).scalar()
+            conn.execute(text(f'DROP SCHEMA IF EXISTS "{schema}" CASCADE'))
+            conn.commit()
+    finally:
+        engine.dispose()
+
+    assert global_index_count == 0