From c327e6b8f12afd86205b3aa1ac67dd737e87160a Mon Sep 17 00:00:00 2001 From: ntkathole Date: Fri, 8 May 2026 10:23:52 +0530 Subject: [PATCH] feat: Add demo noteboooks for users Signed-off-by: ntkathole --- docs/SUMMARY.md | 1 + docs/getting-started/quickstart.md | 1 + docs/reference/feast-cli-commands.md | 42 ++ docs/tutorials/demo-notebooks.md | 114 ++++ pixi.lock | 6 +- sdk/python/docs/source/feast.rst | 8 + sdk/python/feast/__init__.py | 2 + sdk/python/feast/cli/cli.py | 33 ++ sdk/python/feast/demos.py | 851 +++++++++++++++++++++++++++ sdk/python/tests/unit/test_demos.py | 360 +++++++++++ 10 files changed, 1415 insertions(+), 3 deletions(-) create mode 100644 docs/tutorials/demo-notebooks.md create mode 100644 sdk/python/feast/demos.py create mode 100644 sdk/python/tests/unit/test_demos.py diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 44c1cc09477..1b0b0961d79 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -56,6 +56,7 @@ * [RAG Fine Tuning with Feast and Milvus](../examples/rag-retriever/README.md) * [MCP - AI Agent Example](../examples/mcp_feature_store/README.md) * [Feast-Powered AI Agent](../examples/agent_feature_store/README.md) +* [Demo Notebooks](tutorials/demo-notebooks.md) ## How-to Guides diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index e98425f9149..aa56d09b1d8 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -666,6 +666,7 @@ show up in the upcoming concepts + architecture + tutorial pages as well. ## Next steps +* Run `feast demo-notebooks` to generate tailored Jupyter notebooks for your project. See [Demo Notebooks](../tutorials/demo-notebooks.md). * Read the [Concepts](concepts/) page to understand the Feast data model. * Read the [Architecture](architecture/) page. * Check out our [Tutorials](../tutorials/tutorials-overview/) section for more examples on how to use Feast. diff --git a/docs/reference/feast-cli-commands.md b/docs/reference/feast-cli-commands.md index 535065b5a98..99a99ab1707 100644 --- a/docs/reference/feast-cli-commands.md +++ b/docs/reference/feast-cli-commands.md @@ -21,6 +21,7 @@ Commands: apply Create or update a feature store deployment configuration Display Feast configuration delete Delete a Feast object from the registry + demo-notebooks Generate demo Jupyter notebooks for the project entities Access entities feature-views Access feature views init Create a new Feast repository @@ -142,6 +143,47 @@ The delete operation is permanent and will remove the object from the registry. If multiple objects have the same name across different types, `feast delete` will delete the first one it finds. For programmatic deletion with more control, use the Python SDK methods like `store.delete_feature_view()`, `store.delete_feature_service()`, etc. {% endhint %} +## Demo Notebooks + +Generate tailored demo Jupyter notebooks for each Feast project found in the current directory. + +```bash +feast demo-notebooks +``` + +The command searches for `feature_store.yaml` in the current directory and every file inside the `feast-config/` directory. Each file is treated as a separate project config, and notebooks are created under `./feast-demo-notebooks//`. + +The generated notebooks adapt to your project configuration (online/offline store types, authentication, vector search) and cover: + +* **Feature store overview** — explore registered entities, feature views, and services. +* **Historical feature retrieval** — build training datasets with point-in-time correct joins. +* **Online feature serving** — materialize features and retrieve them at low latency. + +**Options:** + +* `-o, --output-dir` — Directory where the notebooks are written. Default: `./feast-demo-notebooks`. +* `--overwrite` — Overwrite existing notebooks if the output directory already exists. + +```bash +feast demo-notebooks -o ./my-notebooks --overwrite +``` + +You can also use the `--chdir` global option to point at a different feature repository: + +```bash +feast -c /path/to/feature_repo demo-notebooks +``` + +The same functionality is available via the Python SDK: + +```python +from feast import copy_demo_notebooks + +copy_demo_notebooks(output_dir="./feast-demo-notebooks", repo_path=".") +``` + +For more details see the [Demo Notebooks tutorial](../tutorials/demo-notebooks.md). + ## Entities List all registered entities diff --git a/docs/tutorials/demo-notebooks.md b/docs/tutorials/demo-notebooks.md new file mode 100644 index 00000000000..8c0ba059f81 --- /dev/null +++ b/docs/tutorials/demo-notebooks.md @@ -0,0 +1,114 @@ +# Demo Notebooks + +Feast can generate tailored Jupyter notebooks for any Feast project. The notebooks adapt to your `feature_store.yaml` configuration and provide a hands-on walkthrough of core Feast functionality. + +## What you get + +For each project discovered, Feast creates a directory with notebooks covering: + +| Notebook | Description | +|----------|-------------| +| **01 — Feature Store Overview** | Explore registered entities, feature views, feature services, and data sources. | +| **02 — Historical Feature Retrieval** | Build a training dataset with point-in-time correct joins using `get_historical_features`. | +| **03 — Online Feature Serving** | Materialize features to the online store and retrieve them at low latency with `get_online_features`. | + +The content adapts automatically based on: + +* **Online / offline store types** — descriptions reflect the actual backends configured. +* **Registry type** — local registries include `feast apply`; remote registries use `refresh_registry()`. +* **Authentication** — auth details from `feature_store.yaml` are surfaced when configured. +* **Vector search** — a vector/RAG retrieval section is included when embeddings are detected. + +## Prerequisites + +* Python 3.9+ +* Feast installed (`pip install feast`) +* A feature repository with a valid `feature_store.yaml` + +## Using the CLI + +Run the command from (or pointing to) a directory containing `feature_store.yaml`: + +```bash +feast demo-notebooks +``` + +This searches for `feature_store.yaml` in the current directory and every file inside the `feast-config/` directory. Each file in `feast-config/` is treated as a separate project config. For each project found, notebooks are written to `./feast-demo-notebooks//`. + +### Options + +| Option | Default | Description | +|--------|---------|-------------| +| `-o, --output-dir` | `./feast-demo-notebooks` | Root directory for generated notebooks | +| `--overwrite` | `false` | Overwrite if the output directory already exists | + +```bash +# Write to a custom directory +feast demo-notebooks -o ./my-notebooks + +# Overwrite existing notebooks +feast demo-notebooks --overwrite + +# Use --chdir to point at a different feature repo +feast -c /path/to/feature_repo demo-notebooks +``` + +## Using the Python SDK + +```python +from feast import copy_demo_notebooks + +copy_demo_notebooks() +``` + +### Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `output_dir` | `str` | `"./feast-demo-notebooks"` | Root directory for generated notebooks | +| `repo_path` | `str` | `"."` | Directory to search for `feature_store.yaml` files | +| `overwrite` | `bool` | `False` | Overwrite existing output directories | + +### Examples + +```python +from feast import copy_demo_notebooks + +# Default — searches current directory, writes to ./feast-demo-notebooks/ +copy_demo_notebooks() + +# Custom paths +copy_demo_notebooks( + output_dir="/home/user/notebooks", + repo_path="/home/user/feast-projects/my-repo/feature_repo", + overwrite=True, +) +``` + +## Multi-project repositories + +If your `feast-config/` directory contains multiple files, each is treated as a separate project and a dedicated notebook directory is created: + +``` +feast-demo-notebooks/ +├── project_alpha/ +│ ├── 01_feature_store_overview.ipynb +│ ├── 02_historical_features_training.ipynb +│ └── 03_online_features_serving.ipynb +└── project_beta/ + ├── 01_feature_store_overview.ipynb + ├── 02_historical_features_training.ipynb + └── 03_online_features_serving.ipynb +``` + +## Running the notebooks + +Open any generated notebook in Jupyter, JupyterLab, or VS Code and run cells from top to bottom. Each notebook: + +1. Configures the path to your `feature_store.yaml` automatically (no manual editing needed). +2. Connects to the feature store using the Feast Python SDK. +3. Walks through relevant operations with real data from your project. + +{% hint style="info" %} +The first notebook (**01 — Overview**) includes a prerequisites check and `feast apply` / registry sync step. Subsequent notebooks assume these have already been completed. +{% endhint %} diff --git a/pixi.lock b/pixi.lock index e6e724dce74..ffdb536b80d 100644 --- a/pixi.lock +++ b/pixi.lock @@ -2229,8 +2229,8 @@ packages: requires_python: '>=3.10' - pypi: ./ name: feast - version: 0.62.1.dev58+g4f142a3c1.d20260501 - sha256: 168dda185ac1fd0b97d388a4c5af598965dc17dfaaf09d35c7bb4ddbd3dafe43 + version: 0.63.1.dev11+g728aa2e03 + sha256: 0374de3999768cff5af26fcded970de628340809ce8d1466645c9ca0d209c166 requires_dist: - click>=7.0.0,<9.0.0 - colorama>=0.3.9,<1 @@ -2257,7 +2257,7 @@ packages: - uvicorn-worker - gunicorn ; sys_platform != 'win32' - dask[dataframe]>=2024.2.1 - - prometheus-client + - prometheus-client>=0.20.0,<0.25.0 - psutil - bigtree>=0.19.2 - pyjwt diff --git a/sdk/python/docs/source/feast.rst b/sdk/python/docs/source/feast.rst index 27231480da8..304083138d2 100644 --- a/sdk/python/docs/source/feast.rst +++ b/sdk/python/docs/source/feast.rst @@ -65,6 +65,14 @@ feast.data\_format module :undoc-members: :show-inheritance: +feast.demos module +------------------ + +.. automodule:: feast.demos + :members: + :undoc-members: + :show-inheritance: + feast.data\_source module ------------------------- diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py index b1881c50150..b61bc2110ab 100644 --- a/sdk/python/feast/__init__.py +++ b/sdk/python/feast/__init__.py @@ -1,6 +1,7 @@ from importlib.metadata import PackageNotFoundError from importlib.metadata import version as _version +from feast.demos import copy_demo_notebooks from feast.infra.offline_stores.bigquery_source import BigQuerySource from feast.infra.offline_stores.contrib.athena_offline_store.athena_source import ( AthenaSource, @@ -41,6 +42,7 @@ __all__ = [ "Aggregation", "BatchFeatureView", + "copy_demo_notebooks", "DataFrameEngine", "Entity", "KafkaSource", diff --git a/sdk/python/feast/cli/cli.py b/sdk/python/feast/cli/cli.py index 1e461af4a28..886c91f69ae 100644 --- a/sdk/python/feast/cli/cli.py +++ b/sdk/python/feast/cli/cli.py @@ -598,6 +598,39 @@ def validate( exit(1) +@cli.command("demo-notebooks") +@click.option( + "--output-dir", + "-o", + default="./feast-demo-notebooks", + show_default=True, + help="Directory where the demo notebooks are written.", +) +@click.option( + "--overwrite", + is_flag=True, + default=False, + help="Overwrite existing notebooks if the output directory already exists.", +) +@click.pass_context +def demo_notebooks_command(ctx: click.Context, output_dir: str, overwrite: bool): + """ + Generate demo Jupyter notebooks tailored to the feature store configuration. + + Searches for feature_store.yaml in the current directory and every file + inside feast-config/. Each file is treated as a separate project config. + For each project found, a sub-directory is created under OUTPUT_DIR. + """ + from feast.demos import copy_demo_notebooks + + repo = ctx.obj["CHDIR"] + copy_demo_notebooks( + output_dir=output_dir, + repo_path=str(repo), + overwrite=overwrite, + ) + + cli.add_command(data_sources_cmd) cli.add_command(entities_cmd) cli.add_command(feature_services_cmd) diff --git a/sdk/python/feast/demos.py b/sdk/python/feast/demos.py new file mode 100644 index 00000000000..9970f90316b --- /dev/null +++ b/sdk/python/feast/demos.py @@ -0,0 +1,851 @@ +# Copyright 2026 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Demo notebook generation for Feast projects. + +Usage:: + + from feast import copy_demo_notebooks + copy_demo_notebooks() + +This will search for ``feature_store.yaml`` in the current directory and every +file inside the ``feast-config/`` directory, then write tailored Jupyter +notebooks into a ``./feast-demo-notebooks//`` directory for each +project found. +""" + +import json +import logging +import os +import pathlib +from typing import Any, Optional + +import yaml + +_logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Discovery helpers +# --------------------------------------------------------------------------- + + +def _find_feature_store_yamls(repo_path: pathlib.Path) -> list[pathlib.Path]: + """Return all feature-store config paths found under *repo_path*. + + Searches: + 1. ``repo_path/feature_store.yaml`` + 2. Every file directly inside ``repo_path/feast-config/`` + — each file is treated as a separate project config. + """ + found: list[pathlib.Path] = [] + + direct = repo_path / "feature_store.yaml" + if direct.exists(): + found.append(direct) + + feast_config_dir = repo_path / "feast-config" + if feast_config_dir.is_dir(): + for entry in sorted(feast_config_dir.iterdir()): + if entry.is_file(): + found.append(entry) + + return found + + +def _parse_yaml(yaml_path: pathlib.Path) -> dict[str, Any]: + with open(yaml_path) as fh: + return yaml.safe_load(os.path.expandvars(fh.read())) or {} + + +def _extract_store_info(config: dict[str, Any]) -> dict[str, Any]: + """Summarise the key fields from a raw ``feature_store.yaml`` dict.""" + info: dict[str, Any] = { + "project": config.get("project", "my_feast_project"), + "provider": config.get("provider", "local"), + "online_store_type": "sqlite", + "offline_store_type": "file", + "registry_type": "file", + "auth_type": "no_auth", + "vector_enabled": False, + "embedding_dim": None, + } + + online = config.get("online_store", {}) + if isinstance(online, dict): + info["online_store_type"] = online.get("type", "sqlite").lower() + info["vector_enabled"] = bool(online.get("vector_enabled", False)) + if online.get("embedding_dim"): + info["embedding_dim"] = online["embedding_dim"] + elif isinstance(online, str): + info["online_store_type"] = online.lower() + + offline = config.get("offline_store", {}) + if isinstance(offline, dict): + info["offline_store_type"] = offline.get("type", "file").lower() + elif isinstance(offline, str): + info["offline_store_type"] = offline.lower() + + registry = config.get("registry", {}) + if isinstance(registry, dict): + # Operator client YAML uses "registry_type" key; standard Feast uses "type" + info["registry_type"] = ( + registry.get("registry_type") or registry.get("type", "file") + ).lower() + # string registry value is a plain file path — keep default "file" + + auth = config.get("auth", {}) + if isinstance(auth, dict): + info["auth_type"] = auth.get("type", "no_auth").lower() + + return info + + +# --------------------------------------------------------------------------- +# Notebook cell builders +# --------------------------------------------------------------------------- + + +def _md(source: str) -> dict[str, Any]: + return { + "cell_type": "markdown", + "metadata": {}, + "source": source, + } + + +def _code(source: str, tags: Optional[list[str]] = None) -> dict[str, Any]: + meta: dict[str, Any] = {} + if tags: + meta["tags"] = tags + return { + "cell_type": "code", + "execution_count": None, + "metadata": meta, + "outputs": [], + "source": source, + } + + +def _notebook(cells: list[dict[str, Any]]) -> dict[str, Any]: + return { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3", + }, + }, + "cells": cells, + } + + +# --------------------------------------------------------------------------- +# Per-store setup snippets +# --------------------------------------------------------------------------- + + +def _is_operator_client(info: dict[str, Any]) -> bool: + """Return True when the feature_store.yaml was generated by the Feast operator. + + The operator sets provider=local with registry_type=remote, online_store.type=remote, + and offline_store.type=remote. + """ + return ( + info["registry_type"] == "remote" + and info["online_store_type"] == "remote" + and info["offline_store_type"] == "remote" + ) + + +# --------------------------------------------------------------------------- +# Notebook generators +# --------------------------------------------------------------------------- + + +def _apply_md(info: dict[str, Any]) -> dict[str, Any]: + """Return the markdown cell that introduces the apply / registry-sync section.""" + if info["registry_type"] == "remote": + return _md( + "## 4. Registry Sync\n\nRefresh the registry cache to load the latest feature definitions." + ) + return _md( + "## 4. Apply Feature Definitions\n\n" + "Register entities, feature views, and services into the registry. " + "Skip if already applied." + ) + + +def _apply_code(info: dict[str, Any]) -> dict[str, Any]: + """Return the code cell that applies (local) or refreshes (remote) the registry.""" + if info["registry_type"] == "remote": + return _code( + "store.refresh_registry()\n" + "fvs = store.list_feature_views()\n" + "print(f'Registry synced — {len(fvs)} feature view(s) available.')" + ) + # Local file registry — auto-apply if empty, then refresh. + return _code( + "fvs = store.list_feature_views()\n" + "entities = store.list_entities()\n" + "\n" + "if fvs or entities:\n" + " print(f'Registry ready: {len(entities)} entity/entities, {len(fvs)} feature view(s)')\n" + "else:\n" + " print('Registry is empty — running feast apply ...')\n" + " !feast -f {FEAST_FS_YAML} apply\n" + " store.refresh_registry()\n" + " print('Apply complete.')" + ) + + +def _path_setup_cell(yaml_abs: str) -> dict[str, Any]: + """Return a code cell that sets ``FEAST_FS_YAML`` to the absolute path of + the feature-store config resolved at generation time.""" + return _code( + "import os\n" + "\n" + f"FEAST_FS_YAML = r{repr(yaml_abs)}\n" + "\n" + "assert os.path.exists(FEAST_FS_YAML), (\n" + " f'Config not found at {FEAST_FS_YAML!r}. '\n" + " 'Update FEAST_FS_YAML to the correct path.'\n" + ")\n" + "print(f'Using feature_store.yaml: {FEAST_FS_YAML}')", + tags=["parameters"], + ) + + +def _nb_overview(info: dict[str, Any], yaml_abs: str) -> dict[str, Any]: + project = info["project"] + ost = info["online_store_type"] + offst = info["offline_store_type"] + auth = info["auth_type"] + provider = info["provider"] + vector_enabled = info["vector_enabled"] + + cells: list[dict[str, Any]] = [ + _md( + f"# Feature Store Overview — `{project}`\n\n" + "Explore the entities, feature views, feature services, and data sources " + "registered in this project." + ), + _md("## 1. Prerequisites"), + _code( + "# Verify feast installation\nimport feast\nprint(f'Feast version: {feast.__version__}')" + ), + _md("## 2. Feature Store Path"), + _path_setup_cell(yaml_abs), + _md( + f"## 3. Connect to the Feature Store\n" + f"The feature store for project **`{project}`** is configured with:\n\n" + f"| Setting | Value |\n" + f"|---------|-------|\n" + f"| Provider | `{provider}` |\n" + f"| Online store | `{ost}` |\n" + f"| Offline store | `{offst}` |\n" + f"| Auth | `{auth}` |\n" + + ( + f"| Vector search | enabled (embedding dim: {info['embedding_dim']}) |\n" + if vector_enabled + else "" + ) + ), + _code( + "from feast import FeatureStore\n" + "\n" + "store = FeatureStore(fs_yaml_file=FEAST_FS_YAML)\n" + "print(f'Connected to project: {store.project}')" + ), + _apply_md(info), + _apply_code(info), + _md("## 5. List Entities"), + _code( + "entities = store.list_entities()\n" + "print(f'Found {len(entities)} entity/entities\\n')\n" + "for e in entities:\n" + " print(f' • {e.name} (join_key={e.join_key}, type={e.value_type})')" + ), + _md("## 6. List Feature Views"), + _code( + "feature_views = store.list_feature_views()\n" + "print(f'Found {len(feature_views)} batch feature view(s)\\n')\n" + "for fv in feature_views:\n" + " feature_names = [f.name for f in fv.features]\n" + " print(f' • {fv.name}')\n" + " print(f' Features : {feature_names}')\n" + " print(f' Entities : {fv.entities}')\n" + " print(f' TTL : {fv.ttl}')\n" + ), + _md("## 7. List On-Demand Feature Views"), + _code( + "odfvs = store.list_on_demand_feature_views()\n" + "if odfvs:\n" + " print(f'Found {len(odfvs)} on-demand feature view(s)\\n')\n" + " for odfv in odfvs:\n" + " print(f' • {odfv.name}')\n" + "else:\n" + " print('No on-demand feature views defined.')" + ), + _md("## 8. List Feature Services"), + _code( + "services = store.list_feature_services()\n" + "if services:\n" + " print(f'Found {len(services)} feature service(s)\\n')\n" + " for svc in services:\n" + " views = [p.name for p in svc.feature_view_projections]\n" + " print(f' • {svc.name} -> views: {views}')\n" + "else:\n" + " print('No feature services defined.')" + ), + _md("## 9. List Data Sources"), + _code( + "sources = store.list_data_sources()\n" + "print(f'Found {len(sources)} data source(s)\\n')\n" + "for src in sources:\n" + " print(f' • {src.name} ({type(src).__name__})')" + ), + _md( + "## Next Steps\n\n" + "- **`02_historical_features_training.ipynb`** — retrieve historical features for training.\n" + "- **`03_online_features_serving.ipynb`** — materialize and serve online features." + ), + ] + return _notebook(cells) + + +def _nb_historical(info: dict[str, Any], yaml_abs: str) -> dict[str, Any]: + project = info["project"] + + cells: list[dict[str, Any]] = [ + _md( + f"# Historical Features & Training Datasets — `{project}`\n\n" + "Retrieve point-in-time correct feature values to build ML training datasets." + ), + _md("## 1. Feature Store Path"), + _path_setup_cell(yaml_abs), + _md("## 2. Connect to the Feature Store"), + _code( + "from feast import FeatureStore\n" + "\n" + "store = FeatureStore(fs_yaml_file=FEAST_FS_YAML)\n" + "print(f'Project : {store.project}')\n" + "print('Feature views:', [fv.name for fv in store.list_feature_views()])" + ), + _md( + "## 3. Discover Available Features\n\nList feature views and read a sample of entity data." + ), + _code( + "import pandas as pd\n" + "from datetime import datetime, timedelta, timezone\n" + "\n" + "fvs = store.list_feature_views()\n" + "entities = store.list_entities()\n" + "\n" + "if not fvs:\n" + " print('No feature views found — run `feast apply` first.')\n" + "else:\n" + " first_fv = fvs[0]\n" + "\n" + " # Identify the entity join key.\n" + " entity_name = entities[0].join_key if entities else 'entity_id'\n" + " if first_fv.entities:\n" + " fv_entity = next(\n" + " (e for e in entities if e.name in set(first_fv.entities)),\n" + " entities[0] if entities else None,\n" + " )\n" + " if fv_entity:\n" + " entity_name = fv_entity.join_key\n" + "\n" + " # Read latest entity values from the offline store.\n" + " # This uses the same mechanism Feast uses for materialization.\n" + " source = first_fv.batch_source\n" + " provider = store._get_provider()\n" + " sample_df = provider.offline_store.pull_latest_from_table_or_query(\n" + " config=store.config,\n" + " data_source=source,\n" + " join_key_columns=[entity_name],\n" + " feature_name_columns=[f.name for f in first_fv.features],\n" + " timestamp_field=source.timestamp_field,\n" + " created_timestamp_column=source.created_timestamp_column or '',\n" + " start_date=datetime(2000, 1, 1, tzinfo=timezone.utc),\n" + " end_date=datetime.now(tz=timezone.utc),\n" + " ).to_df()\n" + "\n" + " print(f'Feature view : {first_fv.name}')\n" + " print(f'Entity join key : {entity_name!r}')\n" + " print(f'Rows in source : {len(sample_df):,}')\n" + " print(f'Columns : {list(sample_df.columns)}')\n" + " if len(sample_df) > 0:\n" + " display(sample_df.head())\n" + " else:\n" + " print('No data found — check that your data source has been populated.')" + ), + _md( + "## 4. Build an Entity DataFrame\n\n" + "Specify which entity IDs and at what timestamps you want features for." + ), + _code( + "if not fvs:\n" + " raise SystemExit('No feature views — run feast apply first.')\n" + "\n" + "# Use real entity IDs and timestamps from the sample.\n" + "if entity_name in sample_df.columns and len(sample_df) > 0:\n" + " entity_ids = sample_df[entity_name].dropna().unique()[:5].tolist()\n" + " # Detect the timestamp column from the source's configuration.\n" + " ts_col = source.timestamp_field if source.timestamp_field in sample_df.columns else None\n" + " if not ts_col:\n" + " ts_col = next((c for c in sample_df.columns if 'timestamp' in c.lower()), None)\n" + " if ts_col:\n" + " timestamps = (\n" + " sample_df[sample_df[entity_name].isin(entity_ids)]\n" + " .sort_values(ts_col, ascending=False)\n" + " .drop_duplicates(subset=[entity_name])[ts_col]\n" + " .tolist()\n" + " )\n" + " else:\n" + " timestamps = [datetime.now() - timedelta(hours=i) for i in range(len(entity_ids))]\n" + "else:\n" + " entity_ids = [1001, 1002, 1003]\n" + " timestamps = [datetime.now() - timedelta(hours=i) for i in range(len(entity_ids))]\n" + " print('Using placeholder entity IDs — replace with real values from your data.')\n" + "\n" + "entity_df = pd.DataFrame(\n" + " {\n" + " entity_name: entity_ids[:len(timestamps)],\n" + " 'event_timestamp': timestamps[:len(entity_ids)],\n" + " }\n" + ")\n" + "print(f'Entity IDs : {entity_ids}')\n" + "print(f'Rows : {len(entity_df)}')\n" + "entity_df" + ), + _md("## 5. Choose Features to Retrieve"), + _code( + "# List all available feature views and their features.\n" + "print('Available feature views:')\n" + "for fv in fvs:\n" + " features = [f.name for f in fv.features]\n" + " print(f' {fv.name}: {features}')\n" + "\n" + "# Select features from the first feature view.\n" + "# Using a single view avoids name collisions across views with identical column names.\n" + "feature_refs = [f'{first_fv.name}:{f.name}' for f in first_fv.features]\n" + "print('\\nWill retrieve:', feature_refs)" + ), + _md("## 6. Retrieve Historical Features"), + _code( + "if feature_refs:\n" + " training_df = store.get_historical_features(\n" + " entity_df=entity_df,\n" + " features=feature_refs,\n" + " ).to_df()\n" + " print(f'Training dataset shape: {training_df.shape}')\n" + " training_df.head()\n" + "else:\n" + " print('No feature views found — run `feast apply` first.')" + ), + _md( + "## 7. (Optional) Retrieve via FeatureService\n\nRetrieve features using a versioned FeatureService instead of individual feature references." + ), + _code( + "services = store.list_feature_services()\n" + "if not services:\n" + " print('No feature services found — define one in your feature repo.')\n" + "else:\n" + " svc = services[0]\n" + "\n" + " # Detect extra request-data columns required by ODFVs in this service.\n" + " odfv_map = {v.name: v for v in store.list_on_demand_feature_views()}\n" + " missing_cols = {\n" + " field.name: field.dtype\n" + " for proj in svc.feature_view_projections\n" + " if proj.name in odfv_map\n" + " for rs in odfv_map[proj.name].source_request_sources.values()\n" + " for field in rs.schema\n" + " if field.name not in entity_df.columns\n" + " }\n" + "\n" + " if missing_cols:\n" + " print('This service requires the following extra columns in entity_df:')\n" + " for col, dtype in missing_cols.items():\n" + " print(f' entity_df[{col!r}] = ')\n" + " print('Add them to entity_df above and re-run this cell.')\n" + " else:\n" + " # Check if service needs entity keys not already in entity_df.\n" + " svc_entities = set()\n" + " for proj in svc.feature_view_projections:\n" + " fv_match = next((fv for fv in fvs if fv.name == proj.name), None)\n" + " if fv_match:\n" + " for ent_name in fv_match.entities:\n" + " ent_obj = next((e for e in entities if e.name == ent_name), None)\n" + " if ent_obj:\n" + " svc_entities.add(ent_obj.join_key)\n" + " missing_keys = svc_entities - set(entity_df.columns)\n" + " if missing_keys:\n" + " print(f'This service requires additional entity columns: {missing_keys}')\n" + " print('Add them to entity_df above and re-run this cell.')\n" + " else:\n" + " print(f'Using feature service: {svc.name}')\n" + " training_df_svc = store.get_historical_features(\n" + " entity_df=entity_df,\n" + " features=svc,\n" + " full_feature_names=True,\n" + " ).to_df()\n" + " print(f'Dataset shape: {training_df_svc.shape}')\n" + " training_df_svc.head()" + ), + _md("## 8. Use the Training Dataset"), + _code( + "# Example: split into features (X) and labels (y)\n" + "# Adjust column names to match your actual feature names and label.\n" + "if feature_refs and 'training_df' in dir():\n" + " label_col = 'label' # TODO: replace with your label column\n" + " feature_cols = [c for c in training_df.columns\n" + " if c not in ('event_timestamp', entity_name, label_col)]\n" + " X = training_df[feature_cols]\n" + " print('Feature matrix shape:', X.shape)\n" + " print('Feature columns:', feature_cols)" + ), + _md( + "## Next Steps\n\n" + "- **`03_online_features_serving.ipynb`** — materialize and serve online features." + ), + ] + return _notebook(cells) + + +def _nb_online(info: dict[str, Any], yaml_abs: str) -> dict[str, Any]: + project = info["project"] + auth = info["auth_type"] + vector_enabled = info["vector_enabled"] + + cells: list[dict[str, Any]] = [ + _md( + f"# Online Feature Serving — `{project}`\n\n" + "Materialize features and retrieve them at low latency for inference." + ), + _md("## 1. Feature Store Path"), + _path_setup_cell(yaml_abs), + _md("## 2. Connect to the Feature Store"), + _code( + "from feast import FeatureStore\n" + "\n" + "store = FeatureStore(fs_yaml_file=FEAST_FS_YAML)\n" + "print(f'Project : {store.project}')" + ), + ] + + # Materialization section. + materialize_md = ( + "## 3. Materialize Features\n\n" + + ( + "> **Optional** — materialization is typically handled server-side.\n\n" + if _is_operator_client(info) + else "" + ) + + "Load feature values into the online store for low-latency serving.\n\n" + "| Method | When to use |\n" + "|--------|-------------|\n" + "| `materialize_incremental` | Regular runs — only new data since last run |\n" + "| `materialize` | First run or full refresh of a time window |" + ) + cells += [ + _md(materialize_md), + _code( + "from datetime import datetime, timedelta, timezone\n" + "\n" + "fvs = store.list_feature_views()\n" + "\n" + "if not fvs:\n" + " print('No feature views found — run feast apply first (see section 3).')\n" + "else:\n" + " # Check last materialization watermarks across all feature views.\n" + " last_written = [\n" + " fv.materialization_intervals[-1][1]\n" + " for fv in fvs\n" + " if fv.materialization_intervals\n" + " ]\n" + "\n" + " if not last_written:\n" + " # No materialization history — do a full initial load.\n" + " end_date = datetime.now(tz=timezone.utc)\n" + " start_date = end_date - timedelta(days=30)\n" + " print(f'First materialization: loading {start_date.date()} → {end_date.date()} ...')\n" + " store.materialize(start_date=start_date, end_date=end_date)\n" + " else:\n" + " # Incremental: only pick up data since the last run.\n" + " end_date = datetime.now(tz=timezone.utc)\n" + " print(f'Incremental materialization up to {end_date} ...')\n" + " store.materialize_incremental(end_date=end_date)\n" + "\n" + " print('Materialization complete.')" + ), + _md("### 3b. Force a Full Refresh"), + _code( + "# from datetime import datetime, timedelta, timezone\n" + "# store.materialize(\n" + "# start_date=datetime.now(tz=timezone.utc) - timedelta(days=7),\n" + "# end_date=datetime.now(tz=timezone.utc),\n" + "# )" + ), + ] + + cells += [ + _md("## 4. Retrieve Online Features"), + _code( + "entities = store.list_entities()\n" + "fvs = store.list_feature_views()\n" + "\n" + "if not entities or not fvs:\n" + " print('No entities or feature views — run `feast apply` first.')\n" + "else:\n" + " first_fv = fvs[0]\n" + " feature_refs = [f'{first_fv.name}:{f.name}' for f in first_fv.features[:3]]\n" + "\n" + " # Resolve the correct entity join key for the first feature view.\n" + " entity_name = entities[0].join_key\n" + " if first_fv.entities:\n" + " fv_entity = next(\n" + " (e for e in entities if e.name in set(first_fv.entities)),\n" + " entities[0],\n" + " )\n" + " entity_name = fv_entity.join_key\n" + "\n" + " # Discover real entity IDs from the offline source.\n" + " from datetime import timezone\n" + " source = first_fv.batch_source\n" + " provider = store._get_provider()\n" + " sample_df = provider.offline_store.pull_latest_from_table_or_query(\n" + " config=store.config,\n" + " data_source=source,\n" + " join_key_columns=[entity_name],\n" + " feature_name_columns=[f.name for f in first_fv.features],\n" + " timestamp_field=source.timestamp_field,\n" + " created_timestamp_column=source.created_timestamp_column or '',\n" + " start_date=datetime(2000, 1, 1, tzinfo=timezone.utc),\n" + " end_date=datetime.now(tz=timezone.utc),\n" + " ).to_df()\n" + "\n" + " if len(sample_df) > 0 and entity_name in sample_df.columns:\n" + " entity_ids = sample_df[entity_name].dropna().unique()[:5].tolist()\n" + " else:\n" + " entity_ids = [1001, 1002]\n" + " print('Using placeholder IDs — replace with real values.')\n" + "\n" + " entity_rows = [{entity_name: eid} for eid in entity_ids]\n" + "\n" + " response = store.get_online_features(\n" + " features=feature_refs,\n" + " entity_rows=entity_rows,\n" + " )\n" + " import pandas as pd\n" + " print(pd.DataFrame(response.to_dict()))" + ), + _md( + "## 5. Online Features via FeatureService\n\nRetrieve features using a versioned FeatureService." + ), + _code( + "services = store.list_feature_services()\n" + "if not services:\n" + " print('No feature services defined.')\n" + "else:\n" + " svc = services[0]\n" + "\n" + " # Detect extra request-data fields required by ODFVs in this service.\n" + " odfv_map = {v.name: v for v in store.list_on_demand_feature_views()}\n" + " current_keys = set(entity_rows[0].keys()) if entity_rows else set()\n" + " missing_fields = {\n" + " field.name: field.dtype\n" + " for proj in svc.feature_view_projections\n" + " if proj.name in odfv_map\n" + " for rs in odfv_map[proj.name].source_request_sources.values()\n" + " for field in rs.schema\n" + " if field.name not in current_keys\n" + " }\n" + "\n" + " if missing_fields:\n" + " print('This service requires the following extra fields in each entity row:')\n" + " for col, dtype in missing_fields.items():\n" + " print(f' {col!r}: ')\n" + " print('Add them to entity_rows above and re-run this cell.')\n" + " else:\n" + " # Check if service needs extra entity keys beyond what we have.\n" + " svc_entities = set()\n" + " for proj in svc.feature_view_projections:\n" + " fv_match = next((fv for fv in fvs if fv.name == proj.name), None)\n" + " if fv_match:\n" + " for ent_name in fv_match.entities:\n" + " ent_obj = next((e for e in entities if e.name == ent_name), None)\n" + " if ent_obj:\n" + " svc_entities.add(ent_obj.join_key)\n" + " missing_keys = svc_entities - current_keys\n" + " if missing_keys:\n" + " print(f'This service requires additional entity keys: {missing_keys}')\n" + " print('Add them to entity_rows above and re-run this cell.')\n" + " else:\n" + " print(f'Using feature service: {svc.name}')\n" + " response = store.get_online_features(\n" + " features=svc,\n" + " entity_rows=entity_rows,\n" + " full_feature_names=True,\n" + " )\n" + " import pandas as pd\n" + " print(pd.DataFrame(response.to_dict()))" + ), + ] + + if auth in ("kubernetes", "oidc"): + cells.append(_md(f"## 6. Authentication (`{auth}`)")) + cells.append(_code("print(store.config.auth)")) + + if vector_enabled: + dim = info.get("embedding_dim") or 384 + section = 7 if auth in ("kubernetes", "oidc") else 6 + cells.append( + _md( + f"## {section}. Vector / RAG Feature Retrieval\n\nSearch stored embeddings (dim: {dim})." + ) + ) + cells.append( + _code( + "import numpy as np\n" + "\n" + "# TODO: replace with a real query embedding from your encoder model\n" + f"query_embedding = np.random.rand({dim}).tolist()\n" + "\n" + "# List feature views with vector features\n" + "fvs = store.list_feature_views()\n" + "vec_fvs = [\n" + " fv for fv in fvs\n" + " if any(getattr(f, 'vector_index', False) for f in fv.features)\n" + "]\n" + "\n" + "if vec_fvs:\n" + " fv = vec_fvs[0]\n" + " results = store.retrieve_online_documents(\n" + " feature=f'{fv.name}:{fv.features[0].name}',\n" + " query=query_embedding,\n" + " top_k=5,\n" + " )\n" + " import pandas as pd\n" + " print(pd.DataFrame(results.to_dict()))\n" + "else:\n" + " print('No vector feature views found.')" + ) + ) + + cells.append( + _md( + "## Next Steps\n\n" + "- Schedule `materialize_incremental` to keep the online store fresh.\n" + ) + ) + + return _notebook(cells) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def copy_demo_notebooks( + output_dir: str = "./feast-demo-notebooks", + repo_path: str = ".", + overwrite: bool = False, +) -> None: + """Generate tailored demo notebooks for each Feast project found nearby. + + The function searches *repo_path* (default: current working directory) for + feature-store YAML files in: + + * ``/feature_store.yaml`` + * Every file inside ``/feast-config/`` + + For each project discovered a sub-directory is created under *output_dir* + and one or more notebooks are written (the exact set depends on the project + configuration and may grow in future releases). + + Parameters + ---------- + output_dir: + Root directory where notebooks are written. + Defaults to ``./feast-demo-notebooks``. + repo_path: + Directory to search for ``feature_store.yaml`` files. + Defaults to the current working directory. + overwrite: + When *False* (default) raise :class:`FileExistsError` if *output_dir* + already exists. Set to *True* to update notebooks in place. + """ + out = pathlib.Path(output_dir).resolve() + + if not overwrite and out.exists(): + raise FileExistsError( + f"Directory '{out}' already exists. " + "Remove it or pass overwrite=True to update notebooks in place." + ) + + root = pathlib.Path(repo_path).absolute() + yaml_paths = _find_feature_store_yamls(root) + + if not yaml_paths: + _logger.warning( + "No feature_store.yaml found under '%s'. " + "Make sure you run this from a directory that contains feature_store.yaml " + "or a feast-config/ subdirectory.", + root, + ) + return + + out.mkdir(parents=True, exist_ok=True) + print(f"Writing demo notebooks to: {out}\n") + + for yaml_path in yaml_paths: + raw = _parse_yaml(yaml_path) + info = _extract_store_info(raw) + project = info["project"] + + project_dir = out / project + project_dir.mkdir(parents=True, exist_ok=True) + + # Absolute path — use absolute() instead of resolve() to preserve + # Kubernetes ConfigMap/Secret symlinks. + yaml_abs_str = str(yaml_path.absolute()) + + notebooks = { + "01_feature_store_overview.ipynb": _nb_overview(info, yaml_abs_str), + "02_historical_features_training.ipynb": _nb_historical(info, yaml_abs_str), + "03_online_features_serving.ipynb": _nb_online(info, yaml_abs_str), + } + + for nb_name, nb_content in notebooks.items(): + nb_path = project_dir / nb_name + with open(nb_path, "w") as fh: + json.dump(nb_content, fh, indent=1) + + print( + f" [{project}]\n" + f" feature_store.yaml : {yaml_abs_str}\n" + f" online_store : {info['online_store_type']}\n" + f" offline_store : {info['offline_store_type']}\n" + f" auth : {info['auth_type']}\n" + + (" vector search : enabled\n" if info["vector_enabled"] else "") + + f" → {project_dir}/" + ) + for nb_name in notebooks: + print(f" ✓ {nb_name}") + print() diff --git a/sdk/python/tests/unit/test_demos.py b/sdk/python/tests/unit/test_demos.py new file mode 100644 index 00000000000..52d61b353a2 --- /dev/null +++ b/sdk/python/tests/unit/test_demos.py @@ -0,0 +1,360 @@ +"""Unit tests for feast.demos — demo notebook generation.""" + +import json +import pathlib +import textwrap + +import pytest + +from feast.demos import ( + _extract_store_info, + _find_feature_store_yamls, + _is_operator_client, + copy_demo_notebooks, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_LOCAL_YAML = textwrap.dedent("""\ + project: local_proj + provider: local + registry: + path: data/registry.db + registry_type: file + offline_store: + type: file + online_store: + type: sqlite + path: data/online_store.db + entity_key_serialization_version: 3 +""") + +_OPERATOR_YAML = textwrap.dedent("""\ + project: remote_proj + provider: local + offline_store: + host: feast-offline.svc.cluster.local + port: 80 + type: remote + online_store: + path: http://feast-online.svc.cluster.local:80 + type: remote + registry: + path: feast-registry.svc.cluster.local:80 + registry_type: remote + auth: + type: oidc + entity_key_serialization_version: 3 +""") + +_VECTOR_YAML = textwrap.dedent("""\ + project: vec_proj + provider: local + registry: + path: data/registry.db + registry_type: file + offline_store: + type: file + online_store: + type: pgvector + vector_enabled: true + embedding_dim: 512 + entity_key_serialization_version: 3 +""") + + +def _write(tmp_path: pathlib.Path, rel: str, content: str) -> pathlib.Path: + p = tmp_path / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(content) + return p + + +def _sections(nb: dict) -> list[str]: + """Return the first line of every markdown cell that starts with #.""" + return [ + "".join(cell["source"]).splitlines()[0] + for cell in nb["cells"] + if cell["cell_type"] == "markdown" and "".join(cell["source"]).startswith("#") + ] + + +# --------------------------------------------------------------------------- +# _extract_store_info +# --------------------------------------------------------------------------- + + +class TestExtractStoreInfo: + def test_local_defaults(self): + info = _extract_store_info({}) + assert info["project"] == "my_feast_project" + assert info["provider"] == "local" + assert info["online_store_type"] == "sqlite" + assert info["offline_store_type"] == "file" + assert info["registry_type"] == "file" + assert info["auth_type"] == "no_auth" + assert info["vector_enabled"] is False + assert info["embedding_dim"] is None + + def test_operator_client_yaml(self): + config = { + "project": "sample", + "provider": "local", + "offline_store": {"type": "remote", "host": "h", "port": 80}, + "online_store": {"type": "remote", "path": "http://h:80"}, + "registry": {"registry_type": "remote", "path": "h:80"}, + "auth": {"type": "oidc"}, + } + info = _extract_store_info(config) + assert info["registry_type"] == "remote" + assert info["online_store_type"] == "remote" + assert info["offline_store_type"] == "remote" + assert info["auth_type"] == "oidc" + + def test_registry_type_key_takes_priority_over_type(self): + config = {"registry": {"registry_type": "remote", "type": "file"}} + info = _extract_store_info(config) + assert info["registry_type"] == "remote" + + def test_registry_type_fallback_to_type(self): + config = {"registry": {"type": "snowflake"}} + info = _extract_store_info(config) + assert info["registry_type"] == "snowflake" + + def test_string_registry_path_stays_file(self): + info = _extract_store_info({"registry": "data/registry.db"}) + assert info["registry_type"] == "file" + + def test_vector_enabled(self): + config = { + "online_store": { + "type": "pgvector", + "vector_enabled": True, + "embedding_dim": 512, + } + } + info = _extract_store_info(config) + assert info["vector_enabled"] is True + assert info["embedding_dim"] == 512 + + def test_online_store_as_string(self): + info = _extract_store_info({"online_store": "Redis"}) + assert info["online_store_type"] == "redis" + + def test_offline_store_as_string(self): + info = _extract_store_info({"offline_store": "BigQuery"}) + assert info["offline_store_type"] == "bigquery" + + +# --------------------------------------------------------------------------- +# _is_operator_client +# --------------------------------------------------------------------------- + + +class TestIsOperatorClient: + def _info(self, registry="remote", online="remote", offline="remote"): + return { + "registry_type": registry, + "online_store_type": online, + "offline_store_type": offline, + } + + def test_all_remote_is_operator(self): + assert _is_operator_client(self._info()) is True + + def test_local_registry_not_operator(self): + assert _is_operator_client(self._info(registry="file")) is False + + def test_local_online_not_operator(self): + assert _is_operator_client(self._info(online="sqlite")) is False + + def test_local_offline_not_operator(self): + assert _is_operator_client(self._info(offline="file")) is False + + +# --------------------------------------------------------------------------- +# _find_feature_store_yamls +# --------------------------------------------------------------------------- + + +class TestFindFeatureStoreYamls: + def test_direct(self, tmp_path): + _write(tmp_path, "feature_store.yaml", "project: p") + found = _find_feature_store_yamls(tmp_path) + assert len(found) == 1 + assert found[0].name == "feature_store.yaml" + + def test_feast_config_root(self, tmp_path): + _write(tmp_path, "feast-config/feature_store.yaml", "project: p") + found = _find_feature_store_yamls(tmp_path) + assert len(found) == 1 + + def test_feast_config_multiple_files(self, tmp_path): + _write(tmp_path, "feast-config/rag.yaml", "project: rag") + _write(tmp_path, "feast-config/rec.yml", "project: rec") + found = _find_feature_store_yamls(tmp_path) + assert len(found) == 2 + + def test_feast_config_any_extension(self, tmp_path): + _write(tmp_path, "feast-config/project_a.yaml", "project: a") + _write(tmp_path, "feast-config/project_b", "project: b") + found = _find_feature_store_yamls(tmp_path) + assert len(found) == 2 + + def test_feast_config_ignores_directories(self, tmp_path): + _write(tmp_path, "feast-config/valid.yaml", "project: p") + (tmp_path / "feast-config" / "subdir").mkdir() + found = _find_feature_store_yamls(tmp_path) + assert len(found) == 1 + + def test_multiple_sources(self, tmp_path): + _write(tmp_path, "feature_store.yaml", "project: root") + _write(tmp_path, "feast-config/a.yaml", "project: a") + _write(tmp_path, "feast-config/b", "project: b") + found = _find_feature_store_yamls(tmp_path) + assert len(found) == 3 + + def test_no_yaml_returns_empty(self, tmp_path): + assert _find_feature_store_yamls(tmp_path) == [] + + +# --------------------------------------------------------------------------- +# copy_demo_notebooks — file generation +# --------------------------------------------------------------------------- + + +class TestCopyDemoNotebooks: + def test_generates_notebooks(self, tmp_path): + _write(tmp_path, "feature_store.yaml", _LOCAL_YAML) + out = tmp_path / "out" + copy_demo_notebooks(output_dir=str(out), repo_path=str(tmp_path)) + + assert (out / "local_proj" / "01_feature_store_overview.ipynb").exists() + assert (out / "local_proj" / "02_historical_features_training.ipynb").exists() + assert (out / "local_proj" / "03_online_features_serving.ipynb").exists() + + def test_valid_notebook_json(self, tmp_path): + _write(tmp_path, "feature_store.yaml", _LOCAL_YAML) + out = tmp_path / "out" + copy_demo_notebooks(output_dir=str(out), repo_path=str(tmp_path)) + + nb = json.loads( + (out / "local_proj" / "01_feature_store_overview.ipynb").read_text() + ) + assert nb["nbformat"] == 4 + assert isinstance(nb["cells"], list) + + def test_raises_if_output_exists(self, tmp_path): + _write(tmp_path, "feature_store.yaml", _LOCAL_YAML) + out = tmp_path / "out" + copy_demo_notebooks(output_dir=str(out), repo_path=str(tmp_path)) + + with pytest.raises(FileExistsError): + copy_demo_notebooks(output_dir=str(out), repo_path=str(tmp_path)) + + def test_overwrite_flag(self, tmp_path): + _write(tmp_path, "feature_store.yaml", _LOCAL_YAML) + out = tmp_path / "out" + copy_demo_notebooks(output_dir=str(out), repo_path=str(tmp_path)) + copy_demo_notebooks( + output_dir=str(out), repo_path=str(tmp_path), overwrite=True + ) + + def test_no_yaml_returns_without_creating_output(self, tmp_path): + out = tmp_path / "out" + copy_demo_notebooks(output_dir=str(out), repo_path=str(tmp_path)) + assert not out.exists() + + def test_multiple_projects(self, tmp_path): + _write( + tmp_path, + "feast-config/proj_a.yaml", + "project: proj_a\nprovider: local\n", + ) + _write( + tmp_path, + "feast-config/proj_b.yaml", + "project: proj_b\nprovider: local\n", + ) + out = tmp_path / "out" + copy_demo_notebooks(output_dir=str(out), repo_path=str(tmp_path)) + assert (out / "proj_a").is_dir() + assert (out / "proj_b").is_dir() + + +# --------------------------------------------------------------------------- +# Notebook content — section headings +# --------------------------------------------------------------------------- + + +class TestNotebookContent: + def _notebooks(self, tmp_path, yaml_content): + _write(tmp_path, "feature_store.yaml", yaml_content) + out = tmp_path / "out" + copy_demo_notebooks(output_dir=str(out), repo_path=str(tmp_path)) + project = _extract_store_info(__import__("yaml").safe_load(yaml_content))[ + "project" + ] + return { + name: json.loads((out / project / name).read_text()) + for name in [ + "01_feature_store_overview.ipynb", + "02_historical_features_training.ipynb", + "03_online_features_serving.ipynb", + ] + } + + def test_local_overview_has_apply_section(self, tmp_path): + nbs = self._notebooks(tmp_path, _LOCAL_YAML) + sections = _sections(nbs["01_feature_store_overview.ipynb"]) + assert any("Apply Feature Definitions" in s for s in sections) + + def test_remote_overview_has_registry_sync(self, tmp_path): + nbs = self._notebooks(tmp_path, _OPERATOR_YAML) + sections = _sections(nbs["01_feature_store_overview.ipynb"]) + assert any("Registry Sync" in s for s in sections) + + def test_historical_no_apply_section(self, tmp_path): + nbs = self._notebooks(tmp_path, _LOCAL_YAML) + sections = _sections(nbs["02_historical_features_training.ipynb"]) + assert not any("Apply" in s for s in sections) + + def test_online_no_apply_section(self, tmp_path): + nbs = self._notebooks(tmp_path, _LOCAL_YAML) + sections = _sections(nbs["03_online_features_serving.ipynb"]) + assert not any("Apply" in s for s in sections) + + def test_vector_notebook_has_vector_section(self, tmp_path): + nbs = self._notebooks(tmp_path, _VECTOR_YAML) + sections = _sections(nbs["03_online_features_serving.ipynb"]) + assert any("Vector" in s for s in sections) + + def test_non_vector_notebook_no_vector_section(self, tmp_path): + nbs = self._notebooks(tmp_path, _LOCAL_YAML) + sections = _sections(nbs["03_online_features_serving.ipynb"]) + assert not any("Vector" in s for s in sections) + + def test_auth_section_present_for_oidc(self, tmp_path): + nbs = self._notebooks(tmp_path, _OPERATOR_YAML) + sections = _sections(nbs["03_online_features_serving.ipynb"]) + assert any("Authentication" in s for s in sections) + + def test_auth_section_absent_for_no_auth(self, tmp_path): + nbs = self._notebooks(tmp_path, _LOCAL_YAML) + sections = _sections(nbs["03_online_features_serving.ipynb"]) + assert not any("Authentication" in s for s in sections) + + def test_path_setup_cell_contains_yaml_path(self, tmp_path): + _write(tmp_path, "feature_store.yaml", _LOCAL_YAML) + out = tmp_path / "out" + copy_demo_notebooks(output_dir=str(out), repo_path=str(tmp_path)) + nb = json.loads( + (out / "local_proj" / "01_feature_store_overview.ipynb").read_text() + ) + code_sources = [ + "".join(c["source"]) for c in nb["cells"] if c["cell_type"] == "code" + ] + yaml_path = str((tmp_path / "feature_store.yaml").resolve()) + assert any(yaml_path in src for src in code_sources)