paradedb
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 77 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 13 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 25 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 128 additions & 1 deletion b/‎README.md‎
Lines changed: 128 additions & 1 deletion
diff --git a/‎examples/autocomplete.py‎
Lines changed: 39 additions & 0 deletions b/‎examples/autocomplete.py‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎examples/common.py‎
Lines changed: 84 additions & 0 deletions b/‎examples/common.py‎
Lines changed: 84 additions & 0 deletions
@@ -0,0 +1,77 @@
+name: CI
+
+on:
+  push:
+    branches: ["**"]
+  pull_request:
+
+jobs:
+  lint-type-unit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install .[test,dev]
+      - name: Ruff
+        run: ruff check .
+      - name: Mypy
+        run: mypy paradedb
+      - name: Unit tests
+        run: python -m pytest tests/unit
+
+  integration:
+    runs-on: ubuntu-latest
+    services:
+      paradedb:
+        image: paradedb/paradedb:latest
+        ports:
+          - 5432:5432
+        env:
+          POSTGRES_PASSWORD: postgres
+          POSTGRES_USER: postgres
+          POSTGRES_DB: postgres
+        options: >-
+          --health-cmd "pg_isready -U postgres"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 12
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install .[test,dev]
+      - name: Install pg client
+        run: sudo apt-get update && sudo apt-get install -y postgresql-client
+      - name: Wait for ParadeDB
+        env:
+          PGPASSWORD: postgres
+        run: |
+          for i in {1..30}; do
+            pg_isready -h localhost -p 5432 -U postgres && exit 0
+            sleep 2
+          done
+          echo "ParadeDB did not become ready" >&2
+          exit 1
+      - name: Integration tests
+        env:
+          PARADEDB_TEST_DSN: postgres://postgres:postgres@localhost:5432/postgres
+        run: python -m pytest -m integration
+      - name: Run examples
+        env:
+          DATABASE_URL: postgresql+psycopg://postgres:postgres@localhost:5432/postgres
+        run: |
+          python examples/quickstart.py
+          python examples/autocomplete.py
+          python examples/more_like_this.py
+          python examples/faceted_search.py
+          python examples/hybrid_rrf.py
+          python examples/rag.py
@@ -4,3 +4,6 @@
 __pycache__/
 *.py[cod]
 *$py.class
+
+# macOS
+.DS_Store
@@ -0,0 +1,13 @@
+# Changelog
+
+## Unreleased
+
+### Added
+
+- Full BM25 search/query helper set with advanced operators.
+- Facet and aggregation builders plus rows+facets helper.
+- Alembic custom operations and autogenerate render hooks.
+- Centralized validation helpers and expanded runtime guard errors.
+- Unit/integration suites for indexing, querying, facets, and migrations.
+- CI workflow for lint, typing, unit, and integration checks.
+- Example scripts for quickstart, facets, autocomplete, MLT, hybrid RRF, and RAG retrieval.
@@ -0,0 +1,25 @@
+# Contributing
+
+## Setup
+
+```bash
+python -m venv .venv
+source .venv/bin/activate
+pip install -e .[test,dev]
+```
+
+## Run Checks
+
+```bash
+ruff check .
+mypy paradedb
+python -m pytest tests/unit
+PARADEDB_TEST_DSN=postgres://postgres:postgres@localhost:5432/postgres python -m pytest -m integration
+```
+
+## Guidelines
+
+- Keep helpers typed and composable with standard SQLAlchemy expressions.
+- Add integration tests for runtime behavior changes.
+- Add unit tests for SQL compilation and validation paths.
+- Preserve PostgreSQL-only safeguards for ParadeDB-specific expressions.
@@ -1,3 +1,130 @@
 # sqlalchemy-paradedb
 
-SQLAlchemy query helpers for ParadeDB.
+Typed SQLAlchemy helpers for ParadeDB BM25 indexing and query composition.
+
+## Requirements
+
+- Python 3.10+
+- PostgreSQL with ParadeDB (`pg_search`) available
+- SQLAlchemy 2.x
+
+## Install
+
+```bash
+pip install sqlalchemy-paradedb
+```
+
+For local development:
+
+```bash
+pip install -e .[test,dev]
+```
+
+## Core Modules
+
+- `paradedb.sqlalchemy.indexing`: BM25 field definitions and tokenizer specs.
+- `paradedb.sqlalchemy.search`: ParadeDB predicates (`match_all`, `fuzzy`, `parse`, `more_like_this`, etc.).
+- `paradedb.sqlalchemy.pdb`: function wrappers (`score`, `snippet`, `snippets`, `agg`).
+- `paradedb.sqlalchemy.facets`: aggregate/facet JSON builders and rows+facets helper.
+- `paradedb.sqlalchemy.select_with`: select decorators for score/snippet columns.
+- `paradedb.sqlalchemy.alembic`: Alembic operations for BM25 index lifecycle.
+
+## Quickstart
+
+```python
+from sqlalchemy import Index, select
+from paradedb.sqlalchemy import indexing, search
+
+products_bm25_idx = Index(
+    "products_bm25_idx",
+    indexing.BM25Field(Product.id),
+    indexing.BM25Field(Product.description, tokenizer=indexing.tokenize.unicode(lowercase=True)),
+    indexing.BM25Field(Product.category, tokenizer=indexing.tokenize.literal()),
+    postgresql_using="bm25",
+    postgresql_with={"key_field": "id"},
+)
+
+products_bm25_idx.create(engine)
+
+stmt = select(Product.id, Product.description).where(search.match_any(Product.description, "running", "shoes"))
+```
+
+## Query APIs
+
+- Basic predicates: `match_all`, `match_any`, `term`, `phrase`, `fuzzy`, `regex`, `all`
+- Advanced predicates: `parse`, `phrase_prefix`, `regex_phrase`, `near`, `proximity`, `more_like_this`
+- Scoring/snippets: `pdb.score`, `pdb.snippet`, `pdb.snippets`, `select_with.score`, `select_with.snippet`
+- Aggregations/facets: `facets.*` builders + `pdb.agg(...)`
+- Rows + facets: `facets.with_rows(...)`
+
+## Facets
+
+```python
+from sqlalchemy import select
+from paradedb.sqlalchemy import facets, pdb, search
+
+stmt = (
+    select(
+        pdb.agg(facets.value_count(field="id")).label("count"),
+        pdb.agg(facets.avg(field="rating")).label("avg_rating"),
+    )
+    .select_from(Product)
+    .where(search.match_all(Product.description, "running"))
+)
+```
+
+## Alembic Operations
+
+Import once in migration env startup so operations are registered:
+
+```python
+import paradedb.sqlalchemy.alembic  # noqa: F401
+```
+
+Usage:
+
+```python
+op.create_bm25_index("products_bm25_idx", "products", ["id", "description"], key_field="id")
+op.reindex_bm25("products_bm25_idx", concurrently=True)
+op.drop_bm25_index("products_bm25_idx", if_exists=True)
+```
+
+## Validation and Guardrails
+
+- Search and facet builders validate option bounds and shapes at build time.
+- `select_with.snippet*` raises `SnippetWithFuzzyPredicateError` with fuzzy predicates.
+- `facets.with_rows` enforces `ORDER BY` + `LIMIT`, and can auto-inject a ParadeDB sentinel (`pdb.all()`).
+
+## Examples
+
+See `examples/`:
+
+- `quickstart.py`
+- `faceted_search.py`
+- `autocomplete.py`
+- `more_like_this.py`
+- `hybrid_rrf.py`
+- `rag.py`
+
+## Testing
+
+Unit tests:
+
+```bash
+python -m pytest tests/unit
+```
+
+Integration tests (requires running ParadeDB):
+
+```bash
+PARADEDB_TEST_DSN=postgres://postgres:postgres@localhost:5432/postgres python -m pytest -m integration
+```
+
+## CI
+
+GitHub Actions workflow at `.github/workflows/ci.yml` runs:
+
+- Ruff lint
+- Mypy type check
+- Unit tests
+- Integration tests against a ParadeDB service container
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from sqlalchemy import Integer, String, Text, select
+from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column
+
+from common import engine_from_env, setup_products
+from paradedb.sqlalchemy import search
+
+
+class Base(DeclarativeBase):
+    pass
+
+
+class Product(Base):
+    __tablename__ = "products"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    description: Mapped[str] = mapped_column(Text, nullable=False)
+    category: Mapped[str] = mapped_column(String(120), nullable=False)
+
+
+def main() -> None:
+    engine = engine_from_env()
+    setup_products(engine)
+
+    stmt = (
+        select(Product.id, Product.description)
+        .where(search.phrase_prefix(Product.description, ["running", "sh"]))
+        .order_by(Product.id)
+        .limit(10)
+    )
+
+    with Session(engine) as session:
+        for row in session.execute(stmt):
+            print(dict(row._mapping))
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+import os
+
+from sqlalchemy import text
+from sqlalchemy import create_engine
+from sqlalchemy.engine import Engine
+
+
+PRODUCT_ROWS = [
+    (1, "Sleek running shoes for daily training", "Footwear", 5),
+    (2, "Trail running shoes with durable grip", "Footwear", 4),
+    (3, "Wireless noise-canceling headphones", "Electronics", 5),
+    (4, "Budget walking sneakers", "Footwear", 2),
+    (5, "Artistic ceramic vase", "Home", 3),
+]
+
+DOCUMENT_ROWS = [
+    (1, "ParadeDB is a Postgres extension for full-text search."),
+    (2, "BM25 ranking helps relevance-based retrieval in PostgreSQL."),
+    (3, "RAG pipelines combine retrieval with LLM generation."),
+]
+
+
+def engine_from_env() -> Engine:
+    dsn = os.getenv("DATABASE_URL", "postgresql+psycopg://postgres:postgres@localhost:5432/postgres")
+    return create_engine(dsn)
+
+
+def setup_products(engine: Engine) -> None:
+    with engine.begin() as conn:
+        conn.execute(text("DROP INDEX IF EXISTS products_bm25_idx"))
+        conn.execute(text("DROP TABLE IF EXISTS products"))
+        conn.execute(
+            text(
+                """
+                CREATE TABLE products (
+                  id int primary key,
+                  description text not null,
+                  category text not null,
+                  rating int not null
+                )
+                """
+            )
+        )
+        conn.execute(
+            text(
+                "CREATE INDEX products_bm25_idx ON products USING bm25 (id, description, category, rating) WITH (key_field='id')"
+            )
+        )
+        for row in PRODUCT_ROWS:
+            conn.execute(
+                text(
+                    "INSERT INTO products (id, description, category, rating) VALUES (:id, :description, :category, :rating)"
+                ),
+                {
+                    "id": row[0],
+                    "description": row[1],
+                    "category": row[2],
+                    "rating": row[3],
+                },
+            )
+
+
+def setup_documents(engine: Engine) -> None:
+    with engine.begin() as conn:
+        conn.execute(text("DROP INDEX IF EXISTS documents_bm25_idx"))
+        conn.execute(text("DROP TABLE IF EXISTS documents"))
+        conn.execute(
+            text(
+                """
+                CREATE TABLE documents (
+                  id int primary key,
+                  content text not null
+                )
+                """
+            )
+        )
+        conn.execute(text("CREATE INDEX documents_bm25_idx ON documents USING bm25 (id, content) WITH (key_field='id')"))
+        for row in DOCUMENT_ROWS:
+            conn.execute(
+                text("INSERT INTO documents (id, content) VALUES (:id, :content)"),
+                {"id": row[0], "content": row[1]},
+            )