From e985426c7ec5c7d1edd3837338f8d4504359fb6a Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sat, 13 Jun 2026 11:33:09 -0700 Subject: [PATCH 01/25] feat(snowflake): import Cortex Analyst measures and enrichment keys Read the table-level measures key as a legacy alias of facts so current Cortex Analyst files (e.g. the revenue_timeseries tutorial) no longer import with zero metrics. Also import synonyms on dimensions/measures/ metrics, sample_values and cortex_search_service(_name) on dimensions, top-level verified_queries/custom_instructions/module_custom_instructions, and preserve newer per-field keys (access_modifier, is_enum, unique, labels, tags, non_additive_dimensions, using_relationships) in metadata. Export round-trips the new fields. Adds synonyms/sample_values/ cortex_search_service_name fields to core Dimension/Metric. --- sidemantic/adapters/snowflake.py | 166 +++++++++++++++++- sidemantic/core/dimension.py | 7 + sidemantic/core/metric.py | 3 + .../snowflake/test_cortex_features.py | 143 +++++++++++++++ tests/adapters/snowflake/test_fixtures.py | 25 +-- tests/fixtures/snowflake/cortex_features.yaml | 79 +++++++++ 6 files changed, 398 insertions(+), 25 deletions(-) create mode 100644 tests/adapters/snowflake/test_cortex_features.py create mode 100644 tests/fixtures/snowflake/cortex_features.yaml diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index a34ba470..ce45de8d 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -111,12 +111,19 @@ class SnowflakeAdapter(BaseAdapter): - tables -> Models - dimensions -> Dimensions (categorical) - time_dimensions -> Dimensions (time) - - facts -> Metrics (with default_aggregation) + - facts (a.k.a. legacy `measures`) -> Metrics (with default_aggregation) - metrics -> Metrics (derived, table-scoped aggregations) - relationships -> Relationships - filters -> Segments - Reference: https://docs.snowflake.com/en/user-guide/snowflake-cortex/cortex-analyst/semantic-model-spec + Also imports newer Cortex Analyst spec features: + - `synonyms` on dimensions/facts/measures/metrics + - `sample_values` and `cortex_search_service` / `cortex_search_service_name` on dimensions + - top-level `verified_queries`, `custom_instructions`, `module_custom_instructions` + - per-field keys preserved in metadata: access_modifier, is_enum, unique, labels, + tags, non_additive_dimensions, using_relationships + + Reference: https://docs.snowflake.com/en/user-guide/views-semantic/semantic-view-yaml-spec """ def parse(self, source: str | Path) -> SemanticGraph: @@ -169,6 +176,20 @@ def _parse_file(self, file_path: Path, graph: SemanticGraph) -> None: relationships_def = data.get("relationships") or [] self._apply_relationships(relationships_def, graph) + # Parse top-level metrics (semantic-model-scoped metrics referencing tables) + for metric_def in data.get("metrics") or []: + metric = self._parse_metric(metric_def) + if metric is None: + continue + table_name = metric_def.get("table") + if table_name and table_name in graph.models: + graph.models[table_name].metrics.append(metric) + else: + graph.metrics[metric.name] = metric + + # Parse top-level Cortex Analyst sections onto the graph. + self._apply_top_level_sections(data, graph) + def _parse_table(self, table_def: dict) -> Model | None: """Parse Snowflake table definition into Model. @@ -212,9 +233,11 @@ def _parse_table(self, table_def: dict) -> Model | None: if dim: dimensions.append(dim) - # Parse facts (row-level measures with default aggregation) + # Parse facts (row-level measures with default aggregation). + # Cortex Analyst's table-level `measures:` key is a legacy alias of `facts:`; + # accept both so current Cortex Analyst files import without silent data loss. metrics = [] - for fact_def in table_def.get("facts") or []: + for fact_def in (table_def.get("facts") or []) + (table_def.get("measures") or []): metric = self._parse_fact(fact_def) if metric: metrics.append(metric) @@ -270,6 +293,10 @@ def _parse_dimension(self, dim_def: dict) -> Dimension | None: type=dim_type, sql=dim_def.get("expr"), description=dim_def.get("description"), + synonyms=dim_def.get("synonyms"), + sample_values=dim_def.get("sample_values"), + cortex_search_service_name=self._cortex_search_service_name(dim_def), + metadata=self._dimension_metadata(dim_def), ) def _parse_time_dimension(self, dim_def: dict) -> Dimension | None: @@ -291,6 +318,10 @@ def _parse_time_dimension(self, dim_def: dict) -> Dimension | None: sql=dim_def.get("expr"), description=dim_def.get("description"), granularity="day", # Default granularity + synonyms=dim_def.get("synonyms"), + sample_values=dim_def.get("sample_values"), + cortex_search_service_name=self._cortex_search_service_name(dim_def), + metadata=self._dimension_metadata(dim_def), ) def _parse_fact(self, fact_def: dict) -> Metric | None: @@ -309,7 +340,7 @@ def _parse_fact(self, fact_def: dict) -> Metric | None: return None # Map Snowflake default_aggregation to Sidemantic agg - default_agg = fact_def.get("default_aggregation", "sum").lower() + default_agg = (fact_def.get("default_aggregation") or "sum").lower() agg_mapping = { "sum": "sum", "avg": "avg", @@ -327,6 +358,8 @@ def _parse_fact(self, fact_def: dict) -> Metric | None: agg=agg, sql=fact_def.get("expr"), description=fact_def.get("description"), + synonyms=fact_def.get("synonyms"), + metadata=self._measure_metadata(fact_def), ) def _parse_metric(self, metric_def: dict) -> Metric | None: @@ -375,6 +408,8 @@ def _parse_metric(self, metric_def: dict) -> Metric | None: agg=agg_func, sql=inner_expr, description=metric_def.get("description"), + synonyms=metric_def.get("synonyms"), + metadata=self._metric_metadata(metric_def), ) # Complex expression (multiple aggregations or couldn't parse simple one) @@ -385,8 +420,89 @@ def _parse_metric(self, metric_def: dict) -> Metric | None: type="derived", sql=qualified_expr, description=metric_def.get("description"), + synonyms=metric_def.get("synonyms"), + metadata=self._metric_metadata(metric_def), ) + @staticmethod + def _cortex_search_service_name(dim_def: dict) -> str | None: + """Resolve the linked Cortex Search service name for a dimension. + + Supports both the legacy flat ``cortex_search_service_name`` string and + the newer nested ``cortex_search_service`` object (``{service, ...}``). + """ + flat = dim_def.get("cortex_search_service_name") + if flat: + return flat + nested = dim_def.get("cortex_search_service") + if isinstance(nested, dict): + return nested.get("service") + if isinstance(nested, str): + return nested + return None + + @staticmethod + def _collect_metadata(definition: dict, keys: tuple[str, ...]) -> dict | None: + """Preserve newer Cortex Analyst per-field keys under a snowflake namespace.""" + extra = {key: definition[key] for key in keys if definition.get(key) is not None} + if not extra: + return None + return {"snowflake": extra} + + def _dimension_metadata(self, dim_def: dict) -> dict | None: + return self._collect_metadata( + dim_def, + ("unique", "is_enum", "access_modifier", "labels", "tags", "cortex_search_service"), + ) + + def _measure_metadata(self, measure_def: dict) -> dict | None: + return self._collect_metadata( + measure_def, + ("access_modifier", "is_enum", "labels", "tags", "non_additive_dimensions"), + ) + + def _metric_metadata(self, metric_def: dict) -> dict | None: + return self._collect_metadata( + metric_def, + ("access_modifier", "labels", "tags", "non_additive_dimensions", "using_relationships"), + ) + + @staticmethod + def _apply_top_level_sections(data: dict, graph: SemanticGraph) -> None: + """Attach top-level Cortex Analyst sections to the graph. + + Cortex Analyst defines several semantic-model-level sections that have no + direct Sidemantic equivalent. We expose them both as direct attributes on + the graph (for ergonomic access) and inside ``graph.metadata`` so they + survive serialization. + """ + verified_queries = data.get("verified_queries") or [] + custom_instructions = data.get("custom_instructions") + module_custom_instructions = data.get("module_custom_instructions") + + # Accumulate verified queries across files in a directory parse. + existing = list(getattr(graph, "verified_queries", []) or []) + existing.extend(verified_queries) + graph.verified_queries = existing + + if custom_instructions is not None: + graph.custom_instructions = custom_instructions + elif not hasattr(graph, "custom_instructions"): + graph.custom_instructions = None + + if module_custom_instructions is not None: + graph.module_custom_instructions = module_custom_instructions + elif not hasattr(graph, "module_custom_instructions"): + graph.module_custom_instructions = None + + snowflake_meta = graph.metadata.setdefault("snowflake", {}) + if existing: + snowflake_meta["verified_queries"] = existing + if graph.custom_instructions is not None: + snowflake_meta["custom_instructions"] = graph.custom_instructions + if graph.module_custom_instructions is not None: + snowflake_meta["module_custom_instructions"] = graph.module_custom_instructions + def _parse_filter(self, filter_def: dict) -> Segment | None: """Parse Snowflake filter into Sidemantic segment. @@ -493,6 +609,17 @@ def export(self, graph: SemanticGraph, output_path: str | Path) -> None: if not semantic_model["relationships"]: del semantic_model["relationships"] + # Export top-level Cortex Analyst sections if present on the graph. + verified_queries = getattr(graph, "verified_queries", None) + if verified_queries: + semantic_model["verified_queries"] = verified_queries + custom_instructions = getattr(graph, "custom_instructions", None) + if custom_instructions: + semantic_model["custom_instructions"] = custom_instructions + module_custom_instructions = getattr(graph, "module_custom_instructions", None) + if module_custom_instructions: + semantic_model["module_custom_instructions"] = module_custom_instructions + output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w") as f: @@ -603,6 +730,8 @@ def _export_dimension(self, dim: Dimension) -> dict: } dim_def["data_type"] = type_mapping.get(dim.type, "TEXT") + self._export_dimension_extras(dim, dim_def) + return dim_def def _export_time_dimension(self, dim: Dimension) -> dict: @@ -624,8 +753,23 @@ def _export_time_dimension(self, dim: Dimension) -> dict: dim_def["data_type"] = "TIMESTAMP" + self._export_dimension_extras(dim, dim_def) + return dim_def + @staticmethod + def _export_dimension_extras(dim: Dimension, dim_def: dict) -> None: + """Attach Cortex Analyst enrichment keys to an exported dimension.""" + if dim.synonyms: + dim_def["synonyms"] = dim.synonyms + if dim.sample_values: + dim_def["sample_values"] = dim.sample_values + if dim.cortex_search_service_name: + dim_def["cortex_search_service_name"] = dim.cortex_search_service_name + snowflake_meta = (dim.metadata or {}).get("snowflake", {}) + for key, value in snowflake_meta.items(): + dim_def.setdefault(key, value) + def _export_fact(self, metric: Metric) -> dict: """Export metric as Snowflake fact. @@ -657,6 +801,12 @@ def _export_fact(self, metric: Metric) -> dict: fact["data_type"] = "NUMBER" + if metric.synonyms: + fact["synonyms"] = metric.synonyms + snowflake_meta = (metric.metadata or {}).get("snowflake", {}) + for key, value in snowflake_meta.items(): + fact.setdefault(key, value) + return fact def _export_metric(self, metric: Metric) -> dict: @@ -691,6 +841,12 @@ def _export_metric(self, metric: Metric) -> dict: elif metric.sql: metric_def["expr"] = metric.sql + if metric.synonyms: + metric_def["synonyms"] = metric.synonyms + snowflake_meta = (metric.metadata or {}).get("snowflake", {}) + for key, value in snowflake_meta.items(): + metric_def.setdefault(key, value) + return metric_def def _export_filter(self, segment: Segment) -> dict: diff --git a/sidemantic/core/dimension.py b/sidemantic/core/dimension.py index 92d4b582..d91a8fba 100644 --- a/sidemantic/core/dimension.py +++ b/sidemantic/core/dimension.py @@ -26,6 +26,13 @@ class Dimension(BaseModel): label: str | None = Field(None, description="Display label") metadata: dict[str, Any] | None = Field(None, description="Adapter-specific metadata payload") + # Synonyms / sample values (e.g. Snowflake Cortex Analyst, Cube) + synonyms: list[str] | None = Field(None, description="Alternative names for this dimension") + sample_values: list[str] | None = Field(None, description="Representative sample values for this dimension") + cortex_search_service_name: str | None = Field( + None, description="Linked Cortex Search service name (Snowflake Cortex Analyst)" + ) + # Display formatting format: str | None = Field(None, description="Display format string (e.g., '$#,##0.00', '0.00%')") value_format_name: str | None = Field(None, description="Named format (e.g., 'usd', 'percent', 'decimal_2')") diff --git a/sidemantic/core/metric.py b/sidemantic/core/metric.py index bfd1270e..411b2c39 100644 --- a/sidemantic/core/metric.py +++ b/sidemantic/core/metric.py @@ -331,6 +331,9 @@ def validate_type_specific_fields(self): label: str | None = Field(None, description="Display label") metadata: dict[str, Any] | None = Field(None, description="Adapter-specific metadata payload") + # Synonyms (e.g. Snowflake Cortex Analyst measures/metrics, Cube) + synonyms: list[str] | None = Field(None, description="Alternative names for this measure/metric") + # Display formatting format: str | None = Field(None, description="Display format string (e.g., '$#,##0.00', '0.00%')") value_format_name: str | None = Field(None, description="Named format (e.g., 'usd', 'percent', 'decimal_2')") diff --git a/tests/adapters/snowflake/test_cortex_features.py b/tests/adapters/snowflake/test_cortex_features.py new file mode 100644 index 00000000..219ef8fa --- /dev/null +++ b/tests/adapters/snowflake/test_cortex_features.py @@ -0,0 +1,143 @@ +"""Tests for newer Snowflake Cortex Analyst spec features. + +Covers the keys added to the adapter on top of the legacy facts-based format: +- table-level `measures` (legacy alias of `facts`) +- `synonyms` on dimensions/measures/metrics +- `sample_values`, nested `cortex_search_service`, `is_enum`, `unique`, + `access_modifier`, `labels`, `tags` on dimensions +- `non_additive_dimensions` / `using_relationships` preserved in metadata +- top-level `verified_queries`, `custom_instructions`, `module_custom_instructions` +- export round-trip preservation of all of the above +""" + +from pathlib import Path + +import pytest +import yaml + +from sidemantic.adapters.snowflake import SnowflakeAdapter + + +@pytest.fixture +def adapter(): + return SnowflakeAdapter() + + +@pytest.fixture +def fixture_path(): + return Path(__file__).parent.parent.parent / "fixtures" / "snowflake" / "cortex_features.yaml" + + +@pytest.fixture +def graph(adapter, fixture_path): + return adapter.parse(fixture_path) + + +class TestMeasuresAlias: + def test_measures_parsed_as_metrics(self, graph): + model = graph.models["orders"] + names = {m.name for m in model.metrics} + assert "order_total" in names + assert "distinct_orders" in names + + def test_measure_default_aggregation(self, graph): + model = graph.models["orders"] + order_total = model.get_metric("order_total") + assert order_total.agg == "sum" + assert order_total.sql == "total" + + +class TestSynonyms: + def test_dimension_synonyms(self, graph): + model = graph.models["orders"] + status = model.get_dimension("status") + assert status.synonyms == ["state"] + + def test_measure_synonyms(self, graph): + model = graph.models["orders"] + order_total = model.get_metric("order_total") + assert order_total.synonyms == ["revenue"] + + def test_metric_synonyms(self, graph): + model = graph.models["orders"] + distinct_orders = model.get_metric("distinct_orders") + assert distinct_orders.synonyms == ["order count"] + + +class TestDimensionEnrichment: + def test_sample_values(self, graph): + model = graph.models["orders"] + status = model.get_dimension("status") + assert "delivered" in status.sample_values + + def test_nested_cortex_search_service(self, graph): + model = graph.models["orders"] + cust = model.get_dimension("customer_name") + assert cust.cortex_search_service_name == "customer_name_search" + + def test_is_enum_and_modifier_in_metadata(self, graph): + model = graph.models["orders"] + status = model.get_dimension("status") + sf = status.metadata["snowflake"] + assert sf["is_enum"] is True + assert sf["access_modifier"] == "public_access" + assert sf["labels"] == ["Order Status"] + assert sf["tags"] == ["core"] + + +class TestMeasureMetricMetadata: + def test_non_additive_dimensions_preserved(self, graph): + model = graph.models["orders"] + order_total = model.get_metric("order_total") + sf = order_total.metadata["snowflake"] + assert sf["non_additive_dimensions"][0]["dimension"] == "order_date" + assert sf["access_modifier"] == "public_access" + + def test_using_relationships_preserved(self, graph): + model = graph.models["orders"] + distinct_orders = model.get_metric("distinct_orders") + sf = distinct_orders.metadata["snowflake"] + assert sf["using_relationships"] == ["orders_to_customers"] + + +class TestTopLevelSections: + def test_verified_queries(self, graph): + assert len(graph.verified_queries) == 1 + assert graph.verified_queries[0]["name"] == "total revenue" + + def test_custom_instructions(self, graph): + assert graph.custom_instructions == "Always prefer revenue over total when answering." + + def test_module_custom_instructions(self, graph): + mci = graph.module_custom_instructions + assert mci["sql_generation"] == "Prefer explicit column references." + assert mci["question_categorization"] == "Treat revenue questions as financial." + + +class TestRoundtrip: + def test_roundtrip_preserves_cortex_features(self, adapter, graph, tmp_path): + output = tmp_path / "out.yaml" + adapter.export(graph, output) + + data = yaml.safe_load(output.read_text()) + + # Top-level sections survive export. + assert "verified_queries" in data + assert data["custom_instructions"] == "Always prefer revenue over total when answering." + assert "module_custom_instructions" in data + + # Re-parse and confirm key fields persist. + graph2 = adapter.parse(output) + model = graph2.models["orders"] + + status = model.get_dimension("status") + assert status.synonyms == ["state"] + assert "delivered" in status.sample_values + + cust = model.get_dimension("customer_name") + assert cust.cortex_search_service_name == "customer_name_search" + + order_total = model.get_metric("order_total") + assert order_total.synonyms == ["revenue"] + + assert len(graph2.verified_queries) == 1 diff --git a/tests/adapters/snowflake/test_fixtures.py b/tests/adapters/snowflake/test_fixtures.py index 40de1b9d..ce4b087d 100644 --- a/tests/adapters/snowflake/test_fixtures.py +++ b/tests/adapters/snowflake/test_fixtures.py @@ -174,25 +174,21 @@ def test_varchar_dimension_type(self, graph): class TestRevenueTimeseriesMeasures: """Verify measures parsing. - The Cortex Analyst format uses `measures` (not `facts`). The adapter - currently only looks for `facts` and `metrics`, so measures from the - Cortex Analyst format are not imported. These tests are marked xfail - to document the gap. + The Cortex Analyst format uses the table-level `measures` key as a legacy + alias of `facts`. The adapter reads both, so measures from the tutorial + fixture are imported as Sidemantic metrics. """ - @pytest.mark.xfail(reason="Adapter parses 'facts' key, not 'measures' (Cortex Analyst format)") def test_daily_revenue_has_measures(self, graph): model = graph.models["daily_revenue"] metric_names = {m.name for m in model.metrics} assert "daily_revenue" in metric_names - @pytest.mark.xfail(reason="Adapter parses 'facts' key, not 'measures' (Cortex Analyst format)") def test_daily_revenue_measure_count(self, graph): """daily_revenue table defines 5 measures.""" model = graph.models["daily_revenue"] assert len(model.metrics) == 5 - @pytest.mark.xfail(reason="Adapter parses 'facts' key, not 'measures' (Cortex Analyst format)") def test_daily_cogs_measure(self, graph): model = graph.models["daily_revenue"] cogs = model.get_metric("daily_cogs") @@ -200,14 +196,12 @@ def test_daily_cogs_measure(self, graph): assert cogs.agg == "sum" assert cogs.sql == "cogs" - @pytest.mark.xfail(reason="Adapter parses 'facts' key, not 'measures' (Cortex Analyst format)") def test_daily_profit_computed_measure(self, graph): """daily_profit has expr 'revenue - cogs' and no default_aggregation.""" model = graph.models["daily_revenue"] profit = model.get_metric("daily_profit") assert profit is not None - @pytest.mark.xfail(reason="Adapter parses 'facts' key, not 'measures' (Cortex Analyst format)") def test_forecast_error_avg_aggregation(self, graph): """daily_forecast_abs_error has default_aggregation: avg.""" model = graph.models["daily_revenue"] @@ -255,21 +249,14 @@ def test_region_relationship_keys(self, graph): assert region_rel.primary_key == "region_id" -class TestRevenueTimeseriesUnsupportedFeatures: - """Test features present in the fixture that the adapter does not yet handle. +class TestRevenueTimeseriesCortexFeatures: + """Test newer Cortex Analyst features the adapter now imports.""" - These are marked xfail to document what a Cortex Analyst model can contain - that sidemantic does not yet import. - """ - - @pytest.mark.xfail(reason="verified_queries not imported by adapter") def test_verified_queries_imported(self, graph): """The fixture has 2 verified_queries; adapter should expose them.""" - # SemanticGraph has no verified_queries attribute yet assert hasattr(graph, "verified_queries") assert len(graph.verified_queries) == 2 - @pytest.mark.xfail(reason="synonyms on measures not imported by adapter") def test_measure_synonyms(self, graph): """daily_revenue measure has synonyms ['sales', 'income'].""" model = graph.models["daily_revenue"] @@ -277,7 +264,6 @@ def test_measure_synonyms(self, graph): assert hasattr(rev, "synonyms") assert "sales" in rev.synonyms - @pytest.mark.xfail(reason="sample_values on dimensions not imported by adapter") def test_dimension_sample_values(self, graph): """product_line dimension has sample_values.""" model = graph.models["product"] @@ -285,7 +271,6 @@ def test_dimension_sample_values(self, graph): assert hasattr(pl, "sample_values") assert "Electronics" in pl.sample_values - @pytest.mark.xfail(reason="cortex_search_service_name not imported by adapter") def test_cortex_search_service_name(self, graph): """product_dimension table has cortex_search_service_name on product_line.""" model = graph.models["product_dimension"] diff --git a/tests/fixtures/snowflake/cortex_features.yaml b/tests/fixtures/snowflake/cortex_features.yaml new file mode 100644 index 00000000..e639b6fe --- /dev/null +++ b/tests/fixtures/snowflake/cortex_features.yaml @@ -0,0 +1,79 @@ +name: cortex_features +description: Cortex Analyst model exercising newer spec keys. +tables: + - name: orders + description: Orders fact table. + base_table: + database: analytics + schema: sales + table: orders + primary_key: + columns: + - order_id + time_dimensions: + - name: order_date + expr: order_date + data_type: timestamp + unique: false + dimensions: + - name: order_id + expr: order_id + data_type: number + - name: status + expr: status + data_type: varchar + synonyms: + - state + is_enum: true + sample_values: + - delivered + - shipped + access_modifier: public_access + labels: + - Order Status + tags: + - core + - name: customer_name + expr: customer_name + data_type: varchar + cortex_search_service: + service: customer_name_search + literal_column: customer_name + database: analytics + schema: sales + measures: + - name: order_total + expr: total + data_type: number + default_aggregation: sum + synonyms: + - revenue + access_modifier: public_access + non_additive_dimensions: + - table: orders + dimension: order_date + labels: + - Order Total + tags: + - finance + metrics: + - name: distinct_orders + expr: COUNT(DISTINCT order_id) + synonyms: + - order count + access_modifier: public_access + using_relationships: + - orders_to_customers + +verified_queries: + - name: total revenue + question: what is the total revenue + sql: "SELECT SUM(total) FROM orders" + verified_at: 1700000000 + verified_by: analyst + +custom_instructions: Always prefer revenue over total when answering. + +module_custom_instructions: + sql_generation: Prefer explicit column references. + question_categorization: Treat revenue questions as financial. From 9124e4b5d1cd6a32444df8459e85de74794b380c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 13 Jun 2026 18:42:53 +0000 Subject: [PATCH 02/25] Auto-update JSON schema --- sidemantic-schema.json | 138 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/sidemantic-schema.json b/sidemantic-schema.json index 8b0dd150..5ddfda19 100644 --- a/sidemantic-schema.json +++ b/sidemantic-schema.json @@ -3,6 +3,19 @@ "Dimension": { "description": "Dimension (attribute) definition.\n\nDimensions are used for grouping and filtering in queries.", "properties": { + "cortex_search_service_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Linked Cortex Search service name (Snowflake Cortex Analyst)", + "title": "Cortex Search Service Name" + }, "dax": { "anyOf": [ { @@ -147,6 +160,22 @@ "title": "Public", "type": "boolean" }, + "sample_values": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Representative sample values for this dimension", + "title": "Sample Values" + }, "sql": { "anyOf": [ { @@ -176,6 +205,22 @@ "description": "Supported granularities for time dimensions", "title": "Supported Granularities" }, + "synonyms": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Alternative names for this dimension", + "title": "Synonyms" + }, "type": { "description": "Dimension type", "enum": [ @@ -812,6 +857,22 @@ "description": "N-step funnel filter expressions (overrides base_event/conversion_event)", "title": "Steps" }, + "synonyms": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Alternative names for this measure/metric", + "title": "Synonyms" + }, "time_offset": { "anyOf": [ { @@ -2013,6 +2074,22 @@ "description": "N-step funnel filter expressions (overrides base_event/conversion_event)", "title": "Steps" }, + "synonyms": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Alternative names for this measure/metric", + "title": "Synonyms" + }, "time_offset": { "anyOf": [ { @@ -2129,6 +2206,19 @@ "Dimension": { "description": "Dimension (attribute) definition.\n\nDimensions are used for grouping and filtering in queries.", "properties": { + "cortex_search_service_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Linked Cortex Search service name (Snowflake Cortex Analyst)", + "title": "Cortex Search Service Name" + }, "dax": { "anyOf": [ { @@ -2273,6 +2363,22 @@ "title": "Public", "type": "boolean" }, + "sample_values": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Representative sample values for this dimension", + "title": "Sample Values" + }, "sql": { "anyOf": [ { @@ -2302,6 +2408,22 @@ "description": "Supported granularities for time dimensions", "title": "Supported Granularities" }, + "synonyms": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Alternative names for this dimension", + "title": "Synonyms" + }, "type": { "description": "Dimension type", "enum": [ @@ -2938,6 +3060,22 @@ "description": "N-step funnel filter expressions (overrides base_event/conversion_event)", "title": "Steps" }, + "synonyms": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Alternative names for this measure/metric", + "title": "Synonyms" + }, "time_offset": { "anyOf": [ { From b7f01278c0b88ada4482b4728438e969cc007029 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 08:20:41 -0700 Subject: [PATCH 03/25] Fix Snowflake top-level metric qualification and round-trip export Graph-level metrics (top-level metrics with no owning table) were passed through _qualify_columns(), which corrupted their model.field references into {model}.model.field. They were also never serialized by export(), so they silently disappeared on parse/export round-trips. Parse graph-level metric expressions without {model} qualification and re-export graph.metrics into the top-level metrics section. --- sidemantic/adapters/snowflake.py | 38 +++++++-- tests/adapters/snowflake/test_roundtrip.py | 92 ++++++++++++++++++++++ 2 files changed, 123 insertions(+), 7 deletions(-) diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index ce45de8d..f24b91a6 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -178,13 +178,21 @@ def _parse_file(self, file_path: Path, graph: SemanticGraph) -> None: # Parse top-level metrics (semantic-model-scoped metrics referencing tables) for metric_def in data.get("metrics") or []: - metric = self._parse_metric(metric_def) - if metric is None: - continue table_name = metric_def.get("table") if table_name and table_name in graph.models: + # Table-scoped: bare column refs are local to the table, so qualify + # complex expressions with the {model} placeholder. + metric = self._parse_metric(metric_def) + if metric is None: + continue graph.models[table_name].metrics.append(metric) else: + # Graph-level metric: expressions reference other fields as + # `model.field` (already qualified), so leave them untouched + # instead of corrupting them with the {model} placeholder. + metric = self._parse_metric(metric_def, qualify=False) + if metric is None: + continue graph.metrics[metric.name] = metric # Parse top-level Cortex Analyst sections onto the graph. @@ -362,13 +370,18 @@ def _parse_fact(self, fact_def: dict) -> Metric | None: metadata=self._measure_metadata(fact_def), ) - def _parse_metric(self, metric_def: dict) -> Metric | None: + def _parse_metric(self, metric_def: dict, qualify: bool = True) -> Metric | None: """Parse Snowflake metric into Sidemantic metric. Metrics in Snowflake are table-scoped aggregations (already contain aggregate functions). Args: metric_def: Metric definition dictionary + qualify: When True (table-scoped metrics), bare column references in + complex/derived expressions are qualified with the {model} + placeholder. When False (graph-level metrics), the expression is + left as-is because it already uses ``model.field`` references that + must not be rewritten. Returns: Metric instance or None @@ -413,12 +426,14 @@ def _parse_metric(self, metric_def: dict) -> Metric | None: ) # Complex expression (multiple aggregations or couldn't parse simple one) - # Mark as derived and qualify column references with {model} placeholder - qualified_expr = _qualify_columns(expr) + # Mark as derived. Table-scoped metrics qualify bare column references with + # the {model} placeholder; graph-level metrics already use `model.field` + # references and must be left untouched. + derived_expr = _qualify_columns(expr) if qualify else expr return Metric( name=name, type="derived", - sql=qualified_expr, + sql=derived_expr, description=metric_def.get("description"), synonyms=metric_def.get("synonyms"), metadata=self._metric_metadata(metric_def), @@ -609,6 +624,15 @@ def export(self, graph: SemanticGraph, output_path: str | Path) -> None: if not semantic_model["relationships"]: del semantic_model["relationships"] + # Export graph-level (top-level) metrics. These have no owning table and + # were parsed from the semantic model's top-level `metrics:` section, so + # they must be serialized back there to survive a parse/export round-trip. + top_level_metrics = [] + for metric in graph.metrics.values(): + top_level_metrics.append(self._export_metric(metric)) + if top_level_metrics: + semantic_model["metrics"] = top_level_metrics + # Export top-level Cortex Analyst sections if present on the graph. verified_queries = getattr(graph, "verified_queries", None) if verified_queries: diff --git a/tests/adapters/snowflake/test_roundtrip.py b/tests/adapters/snowflake/test_roundtrip.py index 9bc3fd00..7fedd6f5 100644 --- a/tests/adapters/snowflake/test_roundtrip.py +++ b/tests/adapters/snowflake/test_roundtrip.py @@ -331,3 +331,95 @@ def test_export_creates_valid_snowflake_yaml(self, adapter, examples_dir, tmp_pa # metrics should have expr for metric in table.get("metrics", []): assert "name" in metric + + +class TestSnowflakeTopLevelMetrics: + """Test parsing/exporting of graph-level (top-level) metrics. + + Snowflake semantic-view metrics that omit ``table`` (or reference a table not + present in the model) become graph-level Sidemantic metrics that reference + other fields with ``model.field`` syntax. + """ + + @pytest.fixture + def top_level_yaml(self, tmp_path): + path = tmp_path / "top_level.yaml" + path.write_text(""" +name: shop +tables: + - name: orders + base_table: + table: orders + primary_key: + columns: + - id + facts: + - name: total_revenue + expr: amount + default_aggregation: sum + - name: order_count + expr: id + default_aggregation: count +metrics: + - name: revenue_per_order + expr: orders.total_revenue / orders.order_count +""") + return path + + def test_top_level_metric_is_not_overqualified(self, adapter, top_level_yaml): + """Graph-level metric expressions must keep model.field references intact.""" + graph = adapter.parse(top_level_yaml) + + assert "revenue_per_order" in graph.metrics + metric = graph.metrics["revenue_per_order"] + assert metric.type == "derived" + # Must NOT be corrupted with the {model} placeholder. + assert "{model}" not in metric.sql + assert metric.sql == "orders.total_revenue / orders.order_count" + + def test_top_level_metric_survives_roundtrip(self, adapter, top_level_yaml, tmp_path): + """Graph-level metrics must be re-exported into the top-level metrics section.""" + graph = adapter.parse(top_level_yaml) + + output_file = tmp_path / "roundtrip.yaml" + adapter.export(graph, output_file) + + with open(output_file) as f: + data = yaml.safe_load(f) + + # Top-level metrics section must be present after export. + assert "metrics" in data + names = {m["name"]: m for m in data["metrics"]} + assert "revenue_per_order" in names + assert names["revenue_per_order"]["expr"] == "orders.total_revenue / orders.order_count" + + # And it must survive a full re-parse without being lost or corrupted. + graph2 = adapter.parse(output_file) + assert "revenue_per_order" in graph2.metrics + assert graph2.metrics["revenue_per_order"].sql == "orders.total_revenue / orders.order_count" + + def test_table_scoped_metric_still_qualified(self, adapter, tmp_path): + """Table-scoped derived metrics must still get the {model} placeholder.""" + path = tmp_path / "scoped.yaml" + path.write_text(""" +name: shop +tables: + - name: orders + base_table: + table: orders + primary_key: + columns: + - id +metrics: + - name: weird_ratio + table: orders + expr: SUM(amount) / COUNT(id) +""") + graph = adapter.parse(path) + + metric = graph.models["orders"].get_metric("weird_ratio") + assert metric is not None + assert metric.type == "derived" + # Bare table-local columns must be qualified with {model}. + assert "{model}.amount" in metric.sql + assert "{model}.id" in metric.sql From 6a14c81f4c70569b88f47e4951aa8490f9dd7105 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 09:17:12 -0700 Subject: [PATCH 04/25] Skip unrepresentable Snowflake metrics and wire Cortex fields into native YAML Snowflake export now skips metrics it cannot represent: model-owned metrics auto-registered at graph level (time_comparison/conversion) were being double-exported as top-level stubs, and any metric type without a Snowflake expr produced invalid stubs that failed to re-parse. Also wire the Cortex enrichment fields (dimension synonyms/sample_values/ cortex_search_service_name and metric synonyms) into the native Sidemantic adapter so export-native round-trips them instead of silently dropping them. --- sidemantic/adapters/sidemantic.py | 16 +++++ sidemantic/adapters/snowflake.py | 25 +++++++- .../sidemantic_adapter/test_parsing.py | 62 +++++++++++++++++++ tests/adapters/snowflake/test_roundtrip.py | 32 ++++++++++ 4 files changed, 132 insertions(+), 3 deletions(-) diff --git a/sidemantic/adapters/sidemantic.py b/sidemantic/adapters/sidemantic.py index 95daaf40..0683f28e 100644 --- a/sidemantic/adapters/sidemantic.py +++ b/sidemantic/adapters/sidemantic.py @@ -79,6 +79,9 @@ "label", "metadata", "meta", + "synonyms", + "sample_values", + "cortex_search_service_name", "format", "value_format_name", "parent", @@ -118,6 +121,7 @@ "periods", "retention_granularity", "granularity", + "synonyms", "inner_metrics", "entity_dimensions", "having", @@ -561,6 +565,9 @@ def _parse_model(self, model_def: dict, *, source_path: Path | None = None) -> M parent=dim_def.get("parent"), metadata=dim_def.get("metadata"), meta=dim_def.get("meta"), + synonyms=dim_def.get("synonyms"), + sample_values=dim_def.get("sample_values"), + cortex_search_service_name=dim_def.get("cortex_search_service_name"), window=dim_def.get("window"), ) dimensions.append(dimension) @@ -781,6 +788,7 @@ def _parse_metric( "value_format_name", "drill_fields", "non_additive_dimension", + "synonyms", "meta", "public", ]: @@ -928,6 +936,12 @@ def _export_model(self, model: Model) -> dict: dim_def["metadata"] = dim.metadata if dim.meta: dim_def["meta"] = dim.meta + if dim.synonyms: + dim_def["synonyms"] = dim.synonyms + if dim.sample_values: + dim_def["sample_values"] = dim.sample_values + if dim.cortex_search_service_name: + dim_def["cortex_search_service_name"] = dim.cortex_search_service_name if dim.format: dim_def["format"] = dim.format if dim.value_format_name: @@ -966,6 +980,8 @@ def _export_model(self, model: Model) -> dict: measure_def["metadata"] = measure.metadata if measure.meta: measure_def["meta"] = measure.meta + if measure.synonyms: + measure_def["synonyms"] = measure.synonyms if not measure.public: measure_def["public"] = measure.public if measure.format: diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index f24b91a6..3e64e1e3 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -627,9 +627,22 @@ def export(self, graph: SemanticGraph, output_path: str | Path) -> None: # Export graph-level (top-level) metrics. These have no owning table and # were parsed from the semantic model's top-level `metrics:` section, so # they must be serialized back there to survive a parse/export round-trip. + # + # ``graph.metrics`` also contains model-owned metrics that ``add_model()`` + # auto-registers at graph level (``time_comparison``/``conversion``). Those + # are already serialized inside their table and have no valid Snowflake + # top-level representation, so skip any metric that is owned by a model. + owned_metric_names = {metric.name for model in resolved_models.values() for metric in model.metrics} top_level_metrics = [] - for metric in graph.metrics.values(): - top_level_metrics.append(self._export_metric(metric)) + for name, metric in graph.metrics.items(): + if name in owned_metric_names: + continue + metric_def = self._export_metric(metric) + # Skip metric types Snowflake cannot represent (no `expr`) rather than + # emitting an invalid stub that would fail to re-parse. + if "expr" not in metric_def: + continue + top_level_metrics.append(metric_def) if top_level_metrics: semantic_model["metrics"] = top_level_metrics @@ -711,8 +724,14 @@ def _export_table(self, model: Model) -> dict: fact = self._export_fact(metric) facts.append(fact) else: - # Complex metric or derived -> metric + # Complex metric or derived -> metric. Snowflake has no + # representation for metric types like time_comparison or + # conversion, so _export_metric() cannot build an `expr` for + # them; skip those rather than emitting an invalid stub that + # would fail to re-parse. metric_def = self._export_metric(metric) + if "expr" not in metric_def: + continue metrics.append(metric_def) if facts: diff --git a/tests/adapters/sidemantic_adapter/test_parsing.py b/tests/adapters/sidemantic_adapter/test_parsing.py index c3529398..8bd68405 100644 --- a/tests/adapters/sidemantic_adapter/test_parsing.py +++ b/tests/adapters/sidemantic_adapter/test_parsing.py @@ -1118,5 +1118,67 @@ def test_dimension_window_in_sql_generation(): assert "LEAD(event) OVER (PARTITION BY person_id ORDER BY timestamp)" in sql +def test_parse_native_yaml_round_trips_cortex_enrichment_fields(tmp_path): + """Cortex enrichment fields (synonyms/sample_values/search service) survive round-trip. + + These fields are populated when importing a Snowflake Cortex model; the native + adapter must both accept them in hand-authored YAML and re-emit them on export + so `sidemantic export-native` does not silently drop them. + """ + adapter = SidemanticAdapter() + yaml_path = tmp_path / "orders.yml" + yaml_path.write_text( + """ +version: 1 +models: + - name: orders + table: orders + primary_key: id + dimensions: + - name: status + type: categorical + synonyms: + - state + - order_status + sample_values: + - delivered + - shipped + cortex_search_service_name: ORDERS_STATUS_SEARCH + metrics: + - name: order_count + agg: count + synonyms: + - num_orders +""" + ) + + graph = adapter.parse(yaml_path) + model = graph.models["orders"] + + status = model.get_dimension("status") + assert status.synonyms == ["state", "order_status"] + assert status.sample_values == ["delivered", "shipped"] + assert status.cortex_search_service_name == "ORDERS_STATUS_SEARCH" + order_count = model.get_metric("order_count") + assert order_count.synonyms == ["num_orders"] + + export_path = tmp_path / "exported.yml" + adapter.export(graph, export_path) + exported = yaml.safe_load(export_path.read_text()) + + exported_dim = exported["models"][0]["dimensions"][0] + assert exported_dim["synonyms"] == ["state", "order_status"] + assert exported_dim["sample_values"] == ["delivered", "shipped"] + assert exported_dim["cortex_search_service_name"] == "ORDERS_STATUS_SEARCH" + exported_metric = exported["models"][0]["metrics"][0] + assert exported_metric["synonyms"] == ["num_orders"] + + # And a full re-parse preserves them. + graph2 = adapter.parse(export_path) + status2 = graph2.models["orders"].get_dimension("status") + assert status2.synonyms == ["state", "order_status"] + assert status2.cortex_search_service_name == "ORDERS_STATUS_SEARCH" + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tests/adapters/snowflake/test_roundtrip.py b/tests/adapters/snowflake/test_roundtrip.py index 7fedd6f5..5c439baa 100644 --- a/tests/adapters/snowflake/test_roundtrip.py +++ b/tests/adapters/snowflake/test_roundtrip.py @@ -423,3 +423,35 @@ def test_table_scoped_metric_still_qualified(self, adapter, tmp_path): # Bare table-local columns must be qualified with {model}. assert "{model}.amount" in metric.sql assert "{model}.id" in metric.sql + + def test_export_skips_auto_registered_model_metrics(self, adapter, tmp_path): + """Model-owned metrics auto-registered at graph level must not leak into top-level metrics. + + ``graph.add_model()`` registers ``time_comparison``/``conversion`` metrics in + ``graph.metrics``. These are already serialized inside their owning table and + have no valid Snowflake top-level representation, so export must skip them. + """ + model = Model( + name="orders", + table="ORDERS", + primary_key="id", + metrics=[ + Metric(name="total_revenue", agg="sum", sql="amount"), + Metric(name="revenue_yoy", type="time_comparison", base_metric="total_revenue", comparison_type="yoy"), + ], + ) + graph = SemanticGraph() + graph.add_model(model) + # Sanity check: the time_comparison metric is auto-registered at graph level. + assert "revenue_yoy" in graph.metrics + + output_file = tmp_path / "export.yaml" + adapter.export(graph, output_file) + + with open(output_file) as f: + data = yaml.safe_load(f) + + # No top-level metrics section should be emitted for model-owned metrics. + assert "metrics" not in data + # The export must still re-parse cleanly. + adapter.parse(output_file) From b19d9877275422f00332c2973ed900b976f3b0f7 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 11:10:10 -0700 Subject: [PATCH 05/25] Coerce Snowflake sample_values to str and export graph-level metric synonyms Snowflake sample_values can be numeric/time scalars per the Cortex spec; coerce them to str so import does not reject valid files. Also emit synonyms when exporting graph-level metrics so native round-trips no longer drop them. --- sidemantic/adapters/sidemantic.py | 2 ++ sidemantic/adapters/snowflake.py | 19 ++++++++-- .../sidemantic_adapter/test_parsing.py | 36 +++++++++++++++++++ .../snowflake/test_cortex_features.py | 12 +++++++ 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/sidemantic/adapters/sidemantic.py b/sidemantic/adapters/sidemantic.py index 0683f28e..762c4fb4 100644 --- a/sidemantic/adapters/sidemantic.py +++ b/sidemantic/adapters/sidemantic.py @@ -1105,6 +1105,8 @@ def _export_metric(self, measure: Metric, graph) -> dict: result["metadata"] = measure.metadata if measure.meta: result["meta"] = measure.meta + if measure.synonyms: + result["synonyms"] = measure.synonyms if not measure.public: result["public"] = measure.public diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 3e64e1e3..194b92ab 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -302,7 +302,7 @@ def _parse_dimension(self, dim_def: dict) -> Dimension | None: sql=dim_def.get("expr"), description=dim_def.get("description"), synonyms=dim_def.get("synonyms"), - sample_values=dim_def.get("sample_values"), + sample_values=self._sample_values(dim_def), cortex_search_service_name=self._cortex_search_service_name(dim_def), metadata=self._dimension_metadata(dim_def), ) @@ -327,7 +327,7 @@ def _parse_time_dimension(self, dim_def: dict) -> Dimension | None: description=dim_def.get("description"), granularity="day", # Default granularity synonyms=dim_def.get("synonyms"), - sample_values=dim_def.get("sample_values"), + sample_values=self._sample_values(dim_def), cortex_search_service_name=self._cortex_search_service_name(dim_def), metadata=self._dimension_metadata(dim_def), ) @@ -456,6 +456,21 @@ def _cortex_search_service_name(dim_def: dict) -> str | None: return nested return None + @staticmethod + def _sample_values(dim_def: dict) -> list[str] | None: + """Coerce Snowflake ``sample_values`` to strings. + + Snowflake documents ``sample_values`` as raw column values, so numeric or + time dimensions can legally contain unquoted YAML scalars (e.g. + ``sample_values: [1001, 1002]``). ``Dimension.sample_values`` is typed as + ``list[str]``, so coerce any scalar to ``str`` to avoid rejecting valid + Cortex files. + """ + values = dim_def.get("sample_values") + if values is None: + return None + return [str(value) for value in values] + @staticmethod def _collect_metadata(definition: dict, keys: tuple[str, ...]) -> dict | None: """Preserve newer Cortex Analyst per-field keys under a snowflake namespace.""" diff --git a/tests/adapters/sidemantic_adapter/test_parsing.py b/tests/adapters/sidemantic_adapter/test_parsing.py index 8bd68405..7e6184ed 100644 --- a/tests/adapters/sidemantic_adapter/test_parsing.py +++ b/tests/adapters/sidemantic_adapter/test_parsing.py @@ -410,6 +410,42 @@ def test_parse_export_preserves_native_metadata_visibility_and_granularity(tmp_p assert graph2.metrics["revenue_per_order"].public is False +def test_parse_export_preserves_graph_level_metric_synonyms(tmp_path): + """Top-level derived metrics must keep `synonyms` through export round-trips.""" + adapter = SidemanticAdapter() + yaml_path = tmp_path / "orders.yml" + yaml_path.write_text( + """ +version: 1 +models: + - name: orders + table: orders + metrics: + - name: total_revenue + agg: sum + sql: amount + - name: order_count + agg: count +metrics: + - name: revenue_per_order + type: derived + sql: orders.total_revenue / orders.order_count + synonyms: [aov, average order value] +""" + ) + + graph = adapter.parse(yaml_path) + assert graph.metrics["revenue_per_order"].synonyms == ["aov", "average order value"] + + export_path = tmp_path / "exported.yml" + adapter.export(graph, export_path) + exported = yaml.safe_load(export_path.read_text()) + assert exported["metrics"][0]["synonyms"] == ["aov", "average order value"] + + graph2 = adapter.parse(export_path) + assert graph2.metrics["revenue_per_order"].synonyms == ["aov", "average order value"] + + def test_parse_export_preserves_top_level_parameters(tmp_path): adapter = SidemanticAdapter() yaml_path = tmp_path / "orders.yml" diff --git a/tests/adapters/snowflake/test_cortex_features.py b/tests/adapters/snowflake/test_cortex_features.py index 219ef8fa..5a1debf1 100644 --- a/tests/adapters/snowflake/test_cortex_features.py +++ b/tests/adapters/snowflake/test_cortex_features.py @@ -84,6 +84,18 @@ def test_is_enum_and_modifier_in_metadata(self, graph): assert sf["labels"] == ["Order Status"] assert sf["tags"] == ["core"] + def test_non_string_sample_values_coerced(self, adapter): + """Numeric/time sample_values (valid per the Cortex spec) are coerced to str.""" + dim = adapter._parse_dimension( + {"name": "order_id", "data_type": "NUMBER", "expr": "order_id", "sample_values": [1001, 1002]} + ) + assert dim.sample_values == ["1001", "1002"] + + time_dim = adapter._parse_time_dimension( + {"name": "order_ts", "expr": "order_ts", "sample_values": [1700000000, 1700000001]} + ) + assert time_dim.sample_values == ["1700000000", "1700000001"] + class TestMeasureMetricMetadata: def test_non_additive_dimensions_preserved(self, graph): From f7180510cb9d6f4abe9291e7280a9cbda3e64403 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 11:29:17 -0700 Subject: [PATCH 06/25] Preserve graph-level metadata across native export round-trips SidemanticAdapter now round-trips graph.metadata as a top-level metadata key, so Snowflake Cortex top-level sections (verified_queries and the custom-instruction blocks) survive import snowflake -> export-native -> re-parse. The Snowflake export falls back to graph.metadata['snowflake'] when the dynamic attributes are absent, so re-export to Snowflake still emits those sections. --- sidemantic/adapters/sidemantic.py | 9 +++++++ sidemantic/adapters/snowflake.py | 14 +++++++---- .../snowflake/test_cortex_features.py | 24 +++++++++++++++++++ 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/sidemantic/adapters/sidemantic.py b/sidemantic/adapters/sidemantic.py index 762c4fb4..b343fddc 100644 --- a/sidemantic/adapters/sidemantic.py +++ b/sidemantic/adapters/sidemantic.py @@ -29,6 +29,7 @@ "models", "metrics", "parameters", + "metadata", "sql_metrics", "sql_segments", } @@ -391,6 +392,11 @@ def parse(self, source: str | Path) -> SemanticGraph: validate_native_format_version(data) reject_unknown_fields(data, ROOT_FIELDS, "root", source_path=source_path) + # Preserve graph-level metadata (e.g. Snowflake Cortex top-level sections). + graph_metadata = data.get("metadata") + if isinstance(graph_metadata, dict): + graph.metadata.update(graph_metadata) + # Parse models for model_def in data.get("models") or []: model = self._parse_model(model_def, source_path=source_path) @@ -488,6 +494,9 @@ def export(self, graph: SemanticGraph, output_path: str | Path) -> None: if graph.parameters: data["parameters"] = [self._export_parameter(parameter) for parameter in graph.parameters.values()] + if graph.metadata: + data["metadata"] = graph.metadata + output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w") as f: diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 194b92ab..872543b8 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -661,14 +661,20 @@ def export(self, graph: SemanticGraph, output_path: str | Path) -> None: if top_level_metrics: semantic_model["metrics"] = top_level_metrics - # Export top-level Cortex Analyst sections if present on the graph. - verified_queries = getattr(graph, "verified_queries", None) + # Export top-level Cortex Analyst sections if present on the graph. These + # live as dynamic attributes when parsed directly, but only survive a + # native (SidemanticAdapter) round-trip via ``graph.metadata["snowflake"]``, + # so fall back to that when the attributes are absent. + snowflake_meta = graph.metadata.get("snowflake") or {} + verified_queries = getattr(graph, "verified_queries", None) or snowflake_meta.get("verified_queries") if verified_queries: semantic_model["verified_queries"] = verified_queries - custom_instructions = getattr(graph, "custom_instructions", None) + custom_instructions = getattr(graph, "custom_instructions", None) or snowflake_meta.get("custom_instructions") if custom_instructions: semantic_model["custom_instructions"] = custom_instructions - module_custom_instructions = getattr(graph, "module_custom_instructions", None) + module_custom_instructions = getattr(graph, "module_custom_instructions", None) or snowflake_meta.get( + "module_custom_instructions" + ) if module_custom_instructions: semantic_model["module_custom_instructions"] = module_custom_instructions diff --git a/tests/adapters/snowflake/test_cortex_features.py b/tests/adapters/snowflake/test_cortex_features.py index 5a1debf1..61735ccd 100644 --- a/tests/adapters/snowflake/test_cortex_features.py +++ b/tests/adapters/snowflake/test_cortex_features.py @@ -153,3 +153,27 @@ def test_roundtrip_preserves_cortex_features(self, adapter, graph, tmp_path): assert order_total.synonyms == ["revenue"] assert len(graph2.verified_queries) == 1 + + def test_top_level_sections_survive_native_roundtrip(self, adapter, graph, tmp_path): + """Snowflake -> native (export-native) -> Snowflake preserves top-level sections.""" + from sidemantic.adapters.sidemantic import SidemanticAdapter + + native = SidemanticAdapter() + native_path = tmp_path / "native.yml" + native.export(graph, native_path) + + native_data = yaml.safe_load(native_path.read_text()) + assert native_data["metadata"]["snowflake"]["verified_queries"] + + # Re-parse native YAML into a fresh graph (no dynamic Snowflake attributes). + graph2 = native.parse(native_path) + assert not hasattr(graph2, "verified_queries") + assert graph2.metadata["snowflake"]["verified_queries"] + + # Re-export to Snowflake; top-level sections come back from graph.metadata. + sf_out = tmp_path / "out_snowflake.yaml" + adapter.export(graph2, sf_out) + sf_data = yaml.safe_load(sf_out.read_text()) + assert len(sf_data["verified_queries"]) == 1 + assert sf_data["custom_instructions"] == "Always prefer revenue over total when answering." + assert "module_custom_instructions" in sf_data From 6640a930406634a9c97dd210fd2424337a7a8cd1 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 11:48:01 -0700 Subject: [PATCH 07/25] Preserve Snowflake relationship names and metric using_relationships The Snowflake relationship name referenced by a metric using_relationships was discarded (Relationship.name holds the related model), so the metric referenced a relationship that no longer existed after export. Stash the original name in relationship metadata and re-emit it. A simple aggregate metric carrying using_relationships was also routed through facts, which do not collect that metric-only key on re-parse; export such metrics through the metrics block so the reference survives. --- sidemantic/adapters/snowflake.py | 22 +++++++- tests/adapters/snowflake/test_roundtrip.py | 63 ++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 872543b8..65a63c66 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -588,6 +588,15 @@ def _apply_relationships(self, relationships_def: list, graph: SemanticGraph) -> left_column = first_col.get("left_column") right_column = first_col.get("right_column") + # The Snowflake relationship name is referenced by metric + # `using_relationships`; preserve it so those references stay valid + # after export. `Relationship.name` is the related-model identifier and + # cannot hold it, so stash it in adapter metadata instead. + metadata = None + snowflake_name = rel_def.get("name") + if snowflake_name: + metadata = {"snowflake": {"name": snowflake_name}} + # In Snowflake, left_table is the "many" side, right_table is the "one" side # Add relationship to left_table pointing to right_table if left_table in graph.models: @@ -597,6 +606,7 @@ def _apply_relationships(self, relationships_def: list, graph: SemanticGraph) -> type=rel_type, foreign_key=left_column, primary_key=right_column, + metadata=metadata, ) model.relationships.append(relationship) # Rebuild adjacency after adding relationship @@ -740,7 +750,11 @@ def _export_table(self, model: Model) -> dict: metrics = [] for metric in model.metrics: - if metric.agg and not metric.type: + snowflake_meta = (metric.metadata or {}).get("snowflake", {}) + # `using_relationships` is a metric-only Snowflake key that facts do + # not collect on re-parse, so a simple aggregation carrying it must be + # exported as a metric (not a fact) to survive a round-trip. + if metric.agg and not metric.type and "using_relationships" not in snowflake_meta: # Simple aggregation -> fact fact = self._export_fact(metric) facts.append(fact) @@ -955,4 +969,10 @@ def _export_relationship(self, model: Model, rel: Relationship) -> dict: "join_type": "left_outer", } + # Preserve the original Snowflake relationship name so metric + # `using_relationships` references resolve after a round-trip. + snowflake_name = (rel.metadata or {}).get("snowflake", {}).get("name") + if snowflake_name: + rel_def = {"name": snowflake_name, **rel_def} + return rel_def diff --git a/tests/adapters/snowflake/test_roundtrip.py b/tests/adapters/snowflake/test_roundtrip.py index 5c439baa..85bae822 100644 --- a/tests/adapters/snowflake/test_roundtrip.py +++ b/tests/adapters/snowflake/test_roundtrip.py @@ -455,3 +455,66 @@ def test_export_skips_auto_registered_model_metrics(self, adapter, tmp_path): assert "metrics" not in data # The export must still re-parse cleanly. adapter.parse(output_file) + + def test_roundtrip_preserves_using_relationships_and_relationship_name(self, adapter, tmp_path): + """A metric `using_relationships` and the named relationship it points to must survive. + + Snowflake relationship `name` is referenced by metric `using_relationships`. + Both the relationship name and the metric reference must round-trip, and the + aggregate metric carrying `using_relationships` must be exported as a metric + (not a fact) so the key is not dropped on re-parse. + """ + source = tmp_path / "rel.yaml" + source.write_text( + """ +name: rel_test +tables: + - name: orders + base_table: {database: db, schema: s, table: orders} + primary_key: {columns: [order_id]} + dimensions: + - {name: order_id, expr: order_id, data_type: number} + - {name: customer_id, expr: customer_id, data_type: number} + metrics: + - name: distinct_orders + expr: COUNT(DISTINCT order_id) + using_relationships: [orders_to_customers] + - name: customers + base_table: {database: db, schema: s, table: customers} + primary_key: {columns: [id]} + dimensions: + - {name: id, expr: id, data_type: number} +relationships: + - name: orders_to_customers + left_table: orders + right_table: customers + relationship_columns: + - {left_column: customer_id, right_column: id} + relationship_type: many_to_one + join_type: left_outer +""" + ) + + graph = adapter.parse(source) + rel = graph.models["orders"].relationships[0] + assert rel.metadata["snowflake"]["name"] == "orders_to_customers" + + output_file = tmp_path / "out.yaml" + adapter.export(graph, output_file) + data = yaml.safe_load(output_file.read_text()) + + # The relationship name is re-emitted so references stay resolvable. + assert [r["name"] for r in data["relationships"]] == ["orders_to_customers"] + + # The aggregate metric carrying using_relationships goes to metrics, not facts. + orders_table = next(t for t in data["tables"] if t["name"] == "orders") + assert "facts" not in orders_table or all(f["name"] != "distinct_orders" for f in orders_table["facts"]) + exported_metric = next(m for m in orders_table["metrics"] if m["name"] == "distinct_orders") + assert exported_metric["using_relationships"] == ["orders_to_customers"] + + # Re-parse preserves both the relationship name and the metric reference. + graph2 = adapter.parse(output_file) + rel2 = graph2.models["orders"].relationships[0] + assert rel2.metadata["snowflake"]["name"] == "orders_to_customers" + metric2 = graph2.models["orders"].get_metric("distinct_orders") + assert metric2.metadata["snowflake"]["using_relationships"] == ["orders_to_customers"] From 228441757e41913fada260503ef352426462998c Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 12:02:36 -0700 Subject: [PATCH 08/25] Match owned Snowflake metrics by identity so same-named top-level metrics export The top-level metric export skipped any graph metric whose name matched a model-local metric, silently dropping a distinct top-level metric that merely shared a name. Match by object identity instead, so auto-registered model-owned metrics are still skipped while genuine graph-level metrics round-trip. --- sidemantic/adapters/snowflake.py | 6 ++- tests/adapters/snowflake/test_roundtrip.py | 53 ++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 65a63c66..07f3be45 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -657,10 +657,12 @@ def export(self, graph: SemanticGraph, output_path: str | Path) -> None: # auto-registers at graph level (``time_comparison``/``conversion``). Those # are already serialized inside their table and have no valid Snowflake # top-level representation, so skip any metric that is owned by a model. - owned_metric_names = {metric.name for model in resolved_models.values() for metric in model.metrics} + # Match by object identity, not name, so a distinct top-level metric that + # merely shares a name with a model-local metric still round-trips. + owned_metric_ids = {id(metric) for model in resolved_models.values() for metric in model.metrics} top_level_metrics = [] for name, metric in graph.metrics.items(): - if name in owned_metric_names: + if id(metric) in owned_metric_ids: continue metric_def = self._export_metric(metric) # Skip metric types Snowflake cannot represent (no `expr`) rather than diff --git a/tests/adapters/snowflake/test_roundtrip.py b/tests/adapters/snowflake/test_roundtrip.py index 85bae822..61c8d8e6 100644 --- a/tests/adapters/snowflake/test_roundtrip.py +++ b/tests/adapters/snowflake/test_roundtrip.py @@ -456,6 +456,59 @@ def test_export_skips_auto_registered_model_metrics(self, adapter, tmp_path): # The export must still re-parse cleanly. adapter.parse(output_file) + def test_export_preserves_top_level_metric_sharing_model_metric_name(self, adapter, tmp_path): + """A distinct top-level metric must survive even if it shares a model-local name. + + The owned-metric skip in export must match by object identity, not name, so + a genuine graph-level metric that merely shares a name with a table-local + metric is not dropped on export. + """ + model = Model( + name="orders", + table="ORDERS", + primary_key="id", + metrics=[Metric(name="summary", agg="sum", sql="amount")], + ) + graph = SemanticGraph() + graph.add_model(model) + # Distinct graph-level derived metric that shares the name "summary". + top_level = Metric(name="summary", type="derived", sql="orders.summary * 2") + graph.metrics["summary"] = top_level + assert graph.metrics["summary"] is not model.metrics[0] + + output_file = tmp_path / "export.yaml" + adapter.export(graph, output_file) + data = yaml.safe_load(output_file.read_text()) + + # The distinct top-level metric is serialized to the top-level metrics block. + assert [m["name"] for m in data.get("metrics", [])] == ["summary"] + assert data["metrics"][0]["expr"] == "orders.summary * 2" + # And the export still re-parses cleanly. + adapter.parse(output_file) + + def test_export_skips_auto_registered_metric_by_identity_not_name(self, adapter, tmp_path): + """Auto-registered model metrics (same object) are still skipped at top level.""" + model = Model( + name="orders", + table="ORDERS", + primary_key="id", + metrics=[ + Metric(name="total_revenue", agg="sum", sql="amount"), + Metric(name="revenue_yoy", type="time_comparison", base_metric="total_revenue", comparison_type="yoy"), + ], + ) + graph = SemanticGraph() + graph.add_model(model) + # The time_comparison metric is the same object registered at graph level. + assert graph.metrics["revenue_yoy"] is model.metrics[1] + + output_file = tmp_path / "export.yaml" + adapter.export(graph, output_file) + data = yaml.safe_load(output_file.read_text()) + + assert "metrics" not in data + adapter.parse(output_file) + def test_roundtrip_preserves_using_relationships_and_relationship_name(self, adapter, tmp_path): """A metric `using_relationships` and the named relationship it points to must survive. From 55c8517d0e70f9dea8fde45df75d60570d24f8a9 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 14:15:51 -0700 Subject: [PATCH 09/25] Accept Snowflake enrichment fields in the Rust native schema The Python native adapter now emits a root metadata block plus dimension synonyms/sample_values/cortex_search_service_name and metric synonyms. The Rust native loader uses deny_unknown_fields, so add these optional fields to SidemanticConfig/DimensionConfig/MetricConfig to keep native YAML produced by export-native loadable by the Rust/wasm parser. --- sidemantic-rs/src/config/schema.rs | 70 ++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/sidemantic-rs/src/config/schema.rs b/sidemantic-rs/src/config/schema.rs index e5a33b87..01dc1c9b 100644 --- a/sidemantic-rs/src/config/schema.rs +++ b/sidemantic-rs/src/config/schema.rs @@ -29,6 +29,9 @@ pub struct SidemanticConfig { pub metrics: Vec, #[serde(default)] pub parameters: Vec, + /// Graph-level adapter metadata (e.g. Snowflake Cortex top-level sections). + #[serde(default)] + pub metadata: Option, #[serde(default)] pub sql_metrics: Option, #[serde(default)] @@ -129,6 +132,15 @@ pub struct DimensionConfig { pub metadata: Option, #[serde(default)] pub meta: Option, + /// Alternative names (e.g. Snowflake Cortex Analyst, Cube). + #[serde(default)] + pub synonyms: Option>, + /// Representative sample values for this dimension. + #[serde(default)] + pub sample_values: Option>, + /// Linked Cortex Search service name (Snowflake Cortex Analyst). + #[serde(default)] + pub cortex_search_service_name: Option, pub format: Option, pub value_format_name: Option, pub parent: Option, @@ -187,6 +199,9 @@ pub struct MetricConfig { pub metadata: Option, #[serde(default)] pub meta: Option, + /// Alternative names (e.g. Snowflake Cortex Analyst, Cube). + #[serde(default)] + pub synonyms: Option>, #[serde(default = "default_public")] pub public: bool, } @@ -1687,6 +1702,61 @@ models: ); } + #[test] + fn test_native_contract_accepts_snowflake_enrichment_fields() { + // Native YAML produced by Python `export-native` after a Snowflake import + // carries root `metadata`, dimension synonyms/sample_values/cortex search, + // and metric synonyms. The Rust native loader must accept (not reject) it. + let yaml = r#" +metadata: + snowflake: + verified_queries: + - name: total revenue + custom_instructions: Prefer revenue. +models: + - name: orders + table: orders + dimensions: + - name: status + type: categorical + synonyms: [state] + sample_values: ["1001", "1002"] + cortex_search_service_name: status_search + metrics: + - name: revenue + agg: sum + sql: amount + synonyms: [total revenue] +"#; + + let config: SidemanticConfig = serde_yaml::from_str(yaml).unwrap(); + + assert_eq!( + config.metadata.as_ref().unwrap()["snowflake"]["custom_instructions"], + "Prefer revenue." + ); + + let dim = &config.models[0].dimensions[0]; + assert_eq!(dim.synonyms.as_deref(), Some(&["state".to_string()][..])); + assert_eq!( + dim.sample_values.as_deref(), + Some(&["1001".to_string(), "1002".to_string()][..]) + ); + assert_eq!( + dim.cortex_search_service_name.as_deref(), + Some("status_search") + ); + + let metric = &config.models[0].metrics[0]; + assert_eq!( + metric.synonyms.as_deref(), + Some(&["total revenue".to_string()][..]) + ); + + // The config must still convert into the internal model without error. + config.into_parts().unwrap(); + } + #[test] fn test_parse_cube_yaml() { let yaml = r#" From a322a3945f3e51517013f3666a27d07f08c35e17 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 17:00:42 -0700 Subject: [PATCH 10/25] Merge Snowflake graph metadata during directory loading The CLI-first load_from_directory path only copied _tmdl_ passthrough attrs into layer.graph, dropping graph.metadata and the Snowflake dynamic top-level attributes (verified_queries / custom instructions). Merge those too so import snowflake -> export-native re-emits the root metadata block and the sections can be re-exported to Snowflake. --- sidemantic/loaders.py | 17 +++++++++ tests/core/test_directory_loaders.py | 57 ++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index 40c622b2..568e0545 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -796,6 +796,23 @@ def _merge_graph_passthrough_metadata(target_graph: object, source_graph: object continue setattr(target_graph, name, copy.deepcopy(value)) + # Merge graph-level metadata (e.g. Snowflake Cortex top-level sections) so the + # CLI-first load -> export-native path round-trips them. + source_metadata = getattr(source_graph, "metadata", None) + if isinstance(source_metadata, dict) and source_metadata: + target_metadata = getattr(target_graph, "metadata", None) + if not isinstance(target_metadata, dict): + target_metadata = {} + target_graph.metadata = target_metadata + for key, value in source_metadata.items(): + target_metadata[key] = copy.deepcopy(value) + + # Carry over Snowflake dynamic top-level attributes set by the adapter. + for attr in ("verified_queries", "custom_instructions", "module_custom_instructions"): + value = getattr(source_graph, attr, None) + if value: + setattr(target_graph, attr, copy.deepcopy(value)) + def _infer_relationships(models: dict) -> None: """Infer relationships between models based on foreign key naming conventions. diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index 1190d02b..445b2611 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -226,3 +226,60 @@ def test_load_from_directory_scopes_reused_bsl_join_aliases(tmp_path): events_sql = layer.compile(metrics=["events.count"], dimensions=["events_user.name"]) assert "events_user_cte" in events_sql assert "FROM accounts" in events_sql + + +def test_load_from_directory_preserves_snowflake_top_level_sections(tmp_path): + """CLI-first load -> export-native must round-trip Snowflake Cortex top-level sections.""" + import yaml + + from sidemantic.adapters.sidemantic import SidemanticAdapter + + (tmp_path / "cortex.yaml").write_text( + """ +name: cortex +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [order_id] + dimensions: + - name: order_id + expr: order_id + data_type: number + measures: + - name: order_total + expr: total + data_type: number + default_aggregation: sum +verified_queries: + - name: total revenue + question: what is the total revenue + sql: "SELECT SUM(total) FROM orders" +custom_instructions: Prefer revenue. +module_custom_instructions: + sql_generation: Use explicit columns. +""" + ) + + layer = SemanticLayer(auto_register=False) + load_from_directory(layer, tmp_path) + graph = layer.graph + + # Top-level sections reach layer.graph (both as metadata and dynamic attrs). + assert graph.metadata["snowflake"]["verified_queries"] + assert graph.metadata["snowflake"]["custom_instructions"] == "Prefer revenue." + assert getattr(graph, "verified_queries", None) + assert getattr(graph, "custom_instructions", None) == "Prefer revenue." + + # export-native emits a root metadata block carrying them. + out = tmp_path / "native.yml" + SidemanticAdapter().export(graph, out) + data = yaml.safe_load(out.read_text()) + assert data["metadata"]["snowflake"]["custom_instructions"] == "Prefer revenue." + + # And a native re-parse keeps them on graph.metadata. + graph2 = SidemanticAdapter().parse(out) + assert graph2.metadata["snowflake"]["verified_queries"] From b68e9e232a08d6af8927dbd88e018b1aab46837f Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 17:36:50 -0700 Subject: [PATCH 11/25] Round-trip Snowflake non-additive metrics, private access, multi-file metadata - Route simple aggregate metrics carrying non_additive_dimensions through the metrics block (not facts) so the metric-only key keeps them metrics. - Map access_modifier private_access onto Dimension/Metric public=False on import and re-emit access_modifier on export, so private Cortex fields are hidden in info/catalog/native YAML while round-tripping. - Deep-merge Snowflake graph metadata across directory-loaded files so multi-file Cortex projects accumulate verified_queries and custom instructions instead of overwriting. --- sidemantic/adapters/snowflake.py | 32 ++++++++-- sidemantic/loaders.py | 38 +++++++++-- tests/adapters/snowflake/test_roundtrip.py | 73 ++++++++++++++++++++++ tests/core/test_directory_loaders.py | 56 +++++++++++++++++ 4 files changed, 190 insertions(+), 9 deletions(-) diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 07f3be45..232b4bb4 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -305,6 +305,7 @@ def _parse_dimension(self, dim_def: dict) -> Dimension | None: sample_values=self._sample_values(dim_def), cortex_search_service_name=self._cortex_search_service_name(dim_def), metadata=self._dimension_metadata(dim_def), + public=self._public_from_access_modifier(dim_def), ) def _parse_time_dimension(self, dim_def: dict) -> Dimension | None: @@ -330,6 +331,7 @@ def _parse_time_dimension(self, dim_def: dict) -> Dimension | None: sample_values=self._sample_values(dim_def), cortex_search_service_name=self._cortex_search_service_name(dim_def), metadata=self._dimension_metadata(dim_def), + public=self._public_from_access_modifier(dim_def), ) def _parse_fact(self, fact_def: dict) -> Metric | None: @@ -368,6 +370,7 @@ def _parse_fact(self, fact_def: dict) -> Metric | None: description=fact_def.get("description"), synonyms=fact_def.get("synonyms"), metadata=self._measure_metadata(fact_def), + public=self._public_from_access_modifier(fact_def), ) def _parse_metric(self, metric_def: dict, qualify: bool = True) -> Metric | None: @@ -423,6 +426,7 @@ def _parse_metric(self, metric_def: dict, qualify: bool = True) -> Metric | None description=metric_def.get("description"), synonyms=metric_def.get("synonyms"), metadata=self._metric_metadata(metric_def), + public=self._public_from_access_modifier(metric_def), ) # Complex expression (multiple aggregations or couldn't parse simple one) @@ -437,6 +441,7 @@ def _parse_metric(self, metric_def: dict, qualify: bool = True) -> Metric | None description=metric_def.get("description"), synonyms=metric_def.get("synonyms"), metadata=self._metric_metadata(metric_def), + public=self._public_from_access_modifier(metric_def), ) @staticmethod @@ -456,6 +461,16 @@ def _cortex_search_service_name(dim_def: dict) -> str | None: return nested return None + @staticmethod + def _public_from_access_modifier(definition: dict) -> bool: + """Map Snowflake ``access_modifier`` onto Sidemantic visibility. + + Snowflake uses ``private_access`` for hidden helper fields. The original + modifier is still preserved in metadata, but reflect it on ``public`` so + CLI ``info``/catalog and native export treat the field as non-public. + """ + return definition.get("access_modifier") != "private_access" + @staticmethod def _sample_values(dim_def: dict) -> list[str] | None: """Coerce Snowflake ``sample_values`` to strings. @@ -751,12 +766,15 @@ def _export_table(self, model: Model) -> dict: facts = [] metrics = [] + # Snowflake table `metrics` carry metric-only keys (e.g. using_relationships, + # non_additive_dimensions). A simple aggregation that carries one of these + # was authored as a metric, so re-export it as a metric (not a fact) to keep + # the original representation across a round-trip. + metric_only_keys = ("using_relationships", "non_additive_dimensions") for metric in model.metrics: snowflake_meta = (metric.metadata or {}).get("snowflake", {}) - # `using_relationships` is a metric-only Snowflake key that facts do - # not collect on re-parse, so a simple aggregation carrying it must be - # exported as a metric (not a fact) to survive a round-trip. - if metric.agg and not metric.type and "using_relationships" not in snowflake_meta: + has_metric_only_key = any(key in snowflake_meta for key in metric_only_keys) + if metric.agg and not metric.type and not has_metric_only_key: # Simple aggregation -> fact fact = self._export_fact(metric) facts.append(fact) @@ -849,6 +867,8 @@ def _export_dimension_extras(dim: Dimension, dim_def: dict) -> None: snowflake_meta = (dim.metadata or {}).get("snowflake", {}) for key, value in snowflake_meta.items(): dim_def.setdefault(key, value) + if not dim.public: + dim_def.setdefault("access_modifier", "private_access") def _export_fact(self, metric: Metric) -> dict: """Export metric as Snowflake fact. @@ -886,6 +906,8 @@ def _export_fact(self, metric: Metric) -> dict: snowflake_meta = (metric.metadata or {}).get("snowflake", {}) for key, value in snowflake_meta.items(): fact.setdefault(key, value) + if not metric.public: + fact.setdefault("access_modifier", "private_access") return fact @@ -926,6 +948,8 @@ def _export_metric(self, metric: Metric) -> dict: snowflake_meta = (metric.metadata or {}).get("snowflake", {}) for key, value in snowflake_meta.items(): metric_def.setdefault(key, value) + if not metric.public: + metric_def.setdefault("access_modifier", "private_access") return metric_def diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index 568e0545..ec6d42e8 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -790,6 +790,26 @@ def _merge_import_warnings(graph: object, warnings: list[dict[str, object]]) -> graph.import_warnings = merged +def _deep_merge_metadata(target: dict, source: dict) -> None: + """Recursively merge ``source`` into ``target``. + + Nested dicts are merged, list values are appended (deduplicated by value), + and scalars from ``source`` overwrite. This keeps multi-file payloads such as + Snowflake Cortex ``verified_queries`` from clobbering one another when several + files are loaded from a directory. + """ + for key, value in source.items(): + existing = target.get(key) + if isinstance(existing, dict) and isinstance(value, dict): + _deep_merge_metadata(existing, value) + elif isinstance(existing, list) and isinstance(value, list): + for item in value: + if item not in existing: + existing.append(copy.deepcopy(item)) + else: + target[key] = copy.deepcopy(value) + + def _merge_graph_passthrough_metadata(target_graph: object, source_graph: object) -> None: for name, value in vars(source_graph).items(): if not name.startswith("_tmdl_"): @@ -797,20 +817,28 @@ def _merge_graph_passthrough_metadata(target_graph: object, source_graph: object setattr(target_graph, name, copy.deepcopy(value)) # Merge graph-level metadata (e.g. Snowflake Cortex top-level sections) so the - # CLI-first load -> export-native path round-trips them. + # CLI-first load -> export-native path round-trips them. Deep-merge so multiple + # files in a directory each contribute their sections instead of overwriting. source_metadata = getattr(source_graph, "metadata", None) if isinstance(source_metadata, dict) and source_metadata: target_metadata = getattr(target_graph, "metadata", None) if not isinstance(target_metadata, dict): target_metadata = {} target_graph.metadata = target_metadata - for key, value in source_metadata.items(): - target_metadata[key] = copy.deepcopy(value) + _deep_merge_metadata(target_metadata, source_metadata) - # Carry over Snowflake dynamic top-level attributes set by the adapter. + # Carry over Snowflake dynamic top-level attributes set by the adapter. Lists + # (verified_queries) accumulate across files; scalars take the latest value. for attr in ("verified_queries", "custom_instructions", "module_custom_instructions"): value = getattr(source_graph, attr, None) - if value: + if not value: + continue + existing = getattr(target_graph, attr, None) + if isinstance(existing, list) and isinstance(value, list): + for item in value: + if item not in existing: + existing.append(copy.deepcopy(item)) + else: setattr(target_graph, attr, copy.deepcopy(value)) diff --git a/tests/adapters/snowflake/test_roundtrip.py b/tests/adapters/snowflake/test_roundtrip.py index 61c8d8e6..ecfa8b78 100644 --- a/tests/adapters/snowflake/test_roundtrip.py +++ b/tests/adapters/snowflake/test_roundtrip.py @@ -571,3 +571,76 @@ def test_roundtrip_preserves_using_relationships_and_relationship_name(self, ada assert rel2.metadata["snowflake"]["name"] == "orders_to_customers" metric2 = graph2.models["orders"].get_metric("distinct_orders") assert metric2.metadata["snowflake"]["using_relationships"] == ["orders_to_customers"] + + def test_roundtrip_aggregate_metric_with_non_additive_dimensions_stays_metric(self, adapter, tmp_path): + """A simple aggregate metric carrying non_additive_dimensions exports as a metric.""" + source = tmp_path / "na.yaml" + source.write_text( + """ +name: na_test +tables: + - name: accounts + base_table: {database: db, schema: s, table: accounts} + primary_key: {columns: [id]} + dimensions: + - {name: id, expr: id, data_type: number} + metrics: + - name: max_balance + expr: MAX(balance) + non_additive_dimensions: + - {table: accounts, dimension: snapshot_date} +""" + ) + + graph = adapter.parse(source) + output_file = tmp_path / "out.yaml" + adapter.export(graph, output_file) + data = yaml.safe_load(output_file.read_text()) + + accounts = next(t for t in data["tables"] if t["name"] == "accounts") + # Routed to metrics, not facts, so the metric-only key keeps it a metric. + assert "facts" not in accounts or all(f["name"] != "max_balance" for f in accounts["facts"]) + exported = next(m for m in accounts["metrics"] if m["name"] == "max_balance") + assert exported["non_additive_dimensions"][0]["dimension"] == "snapshot_date" + + graph2 = adapter.parse(output_file) + metric2 = graph2.models["accounts"].get_metric("max_balance") + assert metric2.metadata["snowflake"]["non_additive_dimensions"][0]["dimension"] == "snapshot_date" + + def test_roundtrip_private_access_modifier_maps_to_public_false(self, adapter, tmp_path): + """access_modifier: private_access marks the field non-public and round-trips.""" + source = tmp_path / "priv.yaml" + source.write_text( + """ +name: priv_test +tables: + - name: orders + base_table: {database: db, schema: s, table: orders} + primary_key: {columns: [id]} + dimensions: + - name: ssn + expr: ssn + data_type: text + access_modifier: private_access + - name: status + expr: status + data_type: text + access_modifier: public_access +""" + ) + + graph = adapter.parse(source) + ssn = graph.models["orders"].get_dimension("ssn") + status = graph.models["orders"].get_dimension("status") + assert ssn.public is False + assert status.public is True + + output_file = tmp_path / "out.yaml" + adapter.export(graph, output_file) + data = yaml.safe_load(output_file.read_text()) + orders = data["tables"][0] + exported_ssn = next(d for d in orders["dimensions"] if d["name"] == "ssn") + assert exported_ssn["access_modifier"] == "private_access" + + graph2 = adapter.parse(output_file) + assert graph2.models["orders"].get_dimension("ssn").public is False diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index 445b2611..a1d3cf6a 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -283,3 +283,59 @@ def test_load_from_directory_preserves_snowflake_top_level_sections(tmp_path): # And a native re-parse keeps them on graph.metadata. graph2 = SidemanticAdapter().parse(out) assert graph2.metadata["snowflake"]["verified_queries"] + + +def test_load_from_directory_merges_snowflake_metadata_across_files(tmp_path): + """Multi-file Cortex projects must accumulate top-level sections, not overwrite.""" + (tmp_path / "a.yaml").write_text( + """ +name: a +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [id] + dimensions: + - name: id + expr: id + data_type: number +verified_queries: + - name: q1 + question: x + sql: SELECT 1 +custom_instructions: from A +""" + ) + (tmp_path / "b.yaml").write_text( + """ +name: b +tables: + - name: customers + base_table: + database: db + schema: s + table: customers + primary_key: + columns: [id] + dimensions: + - name: id + expr: id + data_type: number +verified_queries: + - name: q2 + question: y + sql: SELECT 2 +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + graph = layer.graph + + merged = graph.metadata["snowflake"]["verified_queries"] + assert sorted(q["name"] for q in merged) == ["q1", "q2"] + # Dynamic attribute accumulates too. + assert len(getattr(graph, "verified_queries", [])) == 2 From 71d521603ed49be46ea312c17136bc6a0235735d Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 18:09:08 -0700 Subject: [PATCH 12/25] Keep Snowflake metric expressions resolvable across contexts - Strip the {model} placeholder when exporting table-scoped metrics so Snowflake receives bare column references instead of unresolvable tokens. - Preserve model.field qualifiers when exporting graph-level ratio metrics so Snowflake can resolve cross-table members in view-level metrics. --- sidemantic/adapters/snowflake.py | 42 +++++++++++++---- tests/adapters/snowflake/test_roundtrip.py | 55 ++++++++++++++++++++++ 2 files changed, 88 insertions(+), 9 deletions(-) diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 232b4bb4..c8702523 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -679,7 +679,7 @@ def export(self, graph: SemanticGraph, output_path: str | Path) -> None: for name, metric in graph.metrics.items(): if id(metric) in owned_metric_ids: continue - metric_def = self._export_metric(metric) + metric_def = self._export_metric(metric, top_level=True) # Skip metric types Snowflake cannot represent (no `expr`) rather than # emitting an invalid stub that would fail to re-parse. if "expr" not in metric_def: @@ -911,11 +911,28 @@ def _export_fact(self, metric: Metric) -> dict: return fact - def _export_metric(self, metric: Metric) -> dict: + @staticmethod + def _strip_model_placeholder(sql: str | None) -> str | None: + """Drop the ``{model}.`` placeholder so Snowflake sees bare column refs. + + Table-scoped metric expressions are parsed with the ``{model}`` placeholder + for table-local columns; Snowflake cannot resolve that token, so it must be + removed when re-exporting these metrics to Snowflake. + """ + if sql is None: + return None + return sql.replace("{model}.", "").replace("{model}", "") + + def _export_metric(self, metric: Metric, *, top_level: bool = False) -> dict: """Export metric to Snowflake metric format. Args: metric: Metric to export + top_level: When True the metric is a graph-level (view) metric whose + references already use ``model.field`` qualifiers that Snowflake + needs to resolve cross-table references, so they are preserved. + When False the metric is table-scoped and ``{model}`` placeholders + are stripped to bare column references. Returns: Metric definition dictionary @@ -927,21 +944,28 @@ def _export_metric(self, metric: Metric) -> dict: # Build expression based on metric type if metric.type == "ratio" and metric.numerator and metric.denominator: - # Extract measure names from qualified references - num = metric.numerator.split(".")[-1] if "." in metric.numerator else metric.numerator - denom = metric.denominator.split(".")[-1] if "." in metric.denominator else metric.denominator + if top_level: + # Graph-level metric: keep qualified references so Snowflake can + # resolve cross-table members (e.g. ``orders.revenue``). + num = metric.numerator + denom = metric.denominator + else: + # Table-scoped metric: Snowflake expressions use bare column names. + num = metric.numerator.split(".")[-1] if "." in metric.numerator else metric.numerator + denom = metric.denominator.split(".")[-1] if "." in metric.denominator else metric.denominator metric_def["expr"] = f"{num} / NULLIF({denom}, 0)" elif metric.type == "derived" and metric.sql: - metric_def["expr"] = metric.sql + metric_def["expr"] = metric.sql if top_level else self._strip_model_placeholder(metric.sql) elif metric.agg and metric.sql: # Simple aggregation - wrap in aggregate function agg_func = metric.agg.upper() + sql = metric.sql if top_level else self._strip_model_placeholder(metric.sql) if agg_func == "COUNT_DISTINCT": - metric_def["expr"] = f"COUNT(DISTINCT {metric.sql})" + metric_def["expr"] = f"COUNT(DISTINCT {sql})" else: - metric_def["expr"] = f"{agg_func}({metric.sql})" + metric_def["expr"] = f"{agg_func}({sql})" elif metric.sql: - metric_def["expr"] = metric.sql + metric_def["expr"] = metric.sql if top_level else self._strip_model_placeholder(metric.sql) if metric.synonyms: metric_def["synonyms"] = metric.synonyms diff --git a/tests/adapters/snowflake/test_roundtrip.py b/tests/adapters/snowflake/test_roundtrip.py index ecfa8b78..692acee0 100644 --- a/tests/adapters/snowflake/test_roundtrip.py +++ b/tests/adapters/snowflake/test_roundtrip.py @@ -644,3 +644,58 @@ def test_roundtrip_private_access_modifier_maps_to_public_false(self, adapter, t graph2 = adapter.parse(output_file) assert graph2.models["orders"].get_dimension("ssn").public is False + + def test_export_strips_model_placeholder_from_table_scoped_metric(self, adapter, tmp_path): + """Table-scoped derived metrics must not leak {model} placeholders into Snowflake.""" + source = tmp_path / "ph.yaml" + source.write_text( + """ +name: ph_test +tables: + - name: orders + base_table: {database: db, schema: s, table: orders} + primary_key: {columns: [id]} + dimensions: + - {name: id, expr: id, data_type: number} + facts: + - {name: amount, expr: amount, data_type: number} +metrics: + - name: avg_order + table: orders + expr: SUM(amount) / COUNT(id) +""" + ) + + graph = adapter.parse(source) + # Internally the table-scoped expression is qualified for queryability. + assert "{model}" in graph.models["orders"].get_metric("avg_order").sql + + output_file = tmp_path / "out.yaml" + adapter.export(graph, output_file) + data = yaml.safe_load(output_file.read_text()) + orders = next(t for t in data["tables"] if t["name"] == "orders") + expr = next(m["expr"] for m in orders["metrics"] if m["name"] == "avg_order") + assert "{model}" not in expr + assert expr == "SUM(amount) / COUNT(id)" + + def test_export_top_level_ratio_keeps_model_qualifiers(self, adapter, tmp_path): + """Graph-level ratio metrics keep model.field qualifiers for cross-table refs.""" + graph = SemanticGraph() + graph.add_model( + Model( + name="orders", + table="ORDERS", + primary_key="id", + metrics=[ + Metric(name="revenue", agg="sum", sql="amount"), + Metric(name="order_count", agg="count"), + ], + ) + ) + graph.add_metric(Metric(name="aov", type="ratio", numerator="orders.revenue", denominator="orders.order_count")) + + output_file = tmp_path / "out.yaml" + adapter.export(graph, output_file) + data = yaml.safe_load(output_file.read_text()) + expr = next(m["expr"] for m in data["metrics"] if m["name"] == "aov") + assert expr == "orders.revenue / NULLIF(orders.order_count, 0)" From 7cf5af001ac48e901f10428965334dcaf70d7ddf Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 18:42:50 -0700 Subject: [PATCH 13/25] Defer Snowflake top-level metrics until all tables are parsed Parsing a directory attached a top-level metric to its table only if the table file was visited first; otherwise the metric fell into the graph-level branch and lost its table context on export. Collect top-level metric definitions across all files and resolve them after every table is loaded so attachment no longer depends on traversal order. --- sidemantic/adapters/snowflake.py | 59 ++++++++++++++-------- tests/adapters/snowflake/test_roundtrip.py | 43 ++++++++++++++++ 2 files changed, 80 insertions(+), 22 deletions(-) diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index c8702523..3b3f4b36 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -138,24 +138,53 @@ def parse(self, source: str | Path) -> SemanticGraph: graph = SemanticGraph() source_path = Path(source) + # Top-level metrics are resolved after every file's tables are loaded, so a + # metric referencing a table defined in a later file still attaches to it + # regardless of directory traversal order. + deferred_metrics: list[dict] = [] + if source_path.is_dir(): # Parse all YAML files in directory for yaml_file in source_path.rglob("*.yml"): - self._parse_file(yaml_file, graph) + self._parse_file(yaml_file, graph, deferred_metrics) for yaml_file in source_path.rglob("*.yaml"): - self._parse_file(yaml_file, graph) + self._parse_file(yaml_file, graph, deferred_metrics) else: # Parse single file - self._parse_file(source_path, graph) + self._parse_file(source_path, graph, deferred_metrics) + + self._apply_top_level_metrics(deferred_metrics, graph) return graph - def _parse_file(self, file_path: Path, graph: SemanticGraph) -> None: + def _apply_top_level_metrics(self, metric_defs: list[dict], graph: SemanticGraph) -> None: + """Attach collected top-level metrics once all tables are loaded.""" + for metric_def in metric_defs: + table_name = metric_def.get("table") + if table_name and table_name in graph.models: + # Table-scoped: bare column refs are local to the table, so qualify + # complex expressions with the {model} placeholder. + metric = self._parse_metric(metric_def) + if metric is None: + continue + graph.models[table_name].metrics.append(metric) + else: + # Graph-level metric: expressions reference other fields as + # `model.field` (already qualified), so leave them untouched + # instead of corrupting them with the {model} placeholder. + metric = self._parse_metric(metric_def, qualify=False) + if metric is None: + continue + graph.metrics[metric.name] = metric + + def _parse_file(self, file_path: Path, graph: SemanticGraph, deferred_metrics: list[dict]) -> None: """Parse a single Snowflake semantic model YAML file. Args: file_path: Path to YAML file graph: Semantic graph to add models/metrics to + deferred_metrics: Accumulator for top-level metric definitions, resolved + after every file's tables are loaded. """ with open(file_path) as f: data = yaml.safe_load(f) @@ -176,24 +205,10 @@ def _parse_file(self, file_path: Path, graph: SemanticGraph) -> None: relationships_def = data.get("relationships") or [] self._apply_relationships(relationships_def, graph) - # Parse top-level metrics (semantic-model-scoped metrics referencing tables) - for metric_def in data.get("metrics") or []: - table_name = metric_def.get("table") - if table_name and table_name in graph.models: - # Table-scoped: bare column refs are local to the table, so qualify - # complex expressions with the {model} placeholder. - metric = self._parse_metric(metric_def) - if metric is None: - continue - graph.models[table_name].metrics.append(metric) - else: - # Graph-level metric: expressions reference other fields as - # `model.field` (already qualified), so leave them untouched - # instead of corrupting them with the {model} placeholder. - metric = self._parse_metric(metric_def, qualify=False) - if metric is None: - continue - graph.metrics[metric.name] = metric + # Defer top-level metrics (semantic-model-scoped metrics referencing tables) + # until all files are parsed, so a metric whose table lives in a later file + # still attaches correctly regardless of traversal order. + deferred_metrics.extend(data.get("metrics") or []) # Parse top-level Cortex Analyst sections onto the graph. self._apply_top_level_sections(data, graph) diff --git a/tests/adapters/snowflake/test_roundtrip.py b/tests/adapters/snowflake/test_roundtrip.py index 692acee0..7001a401 100644 --- a/tests/adapters/snowflake/test_roundtrip.py +++ b/tests/adapters/snowflake/test_roundtrip.py @@ -699,3 +699,46 @@ def test_export_top_level_ratio_keeps_model_qualifiers(self, adapter, tmp_path): data = yaml.safe_load(output_file.read_text()) expr = next(m["expr"] for m in data["metrics"] if m["name"] == "aov") assert expr == "orders.revenue / NULLIF(orders.order_count, 0)" + + def test_parse_directory_attaches_top_level_metric_regardless_of_file_order(self, adapter, tmp_path): + """A top-level metric must attach to its table even if defined in an earlier file.""" + # rglob visits files in sorted order, so a_metrics is parsed before z_tables. + (tmp_path / "a_metrics.yaml").write_text( + """ +name: a_metrics +metrics: + - name: avg_order + table: orders + expr: SUM(amount) / COUNT(order_id) +""" + ) + (tmp_path / "z_tables.yaml").write_text( + """ +name: z_tables +tables: + - name: orders + base_table: {database: db, schema: s, table: orders} + primary_key: {columns: [order_id]} + dimensions: + - {name: order_id, expr: order_id, data_type: number} + facts: + - {name: amount, expr: amount, data_type: number} +""" + ) + + graph = adapter.parse(tmp_path) + + # The metric attaches to its table (not the graph-level branch). + orders = graph.models["orders"] + assert "avg_order" in [m.name for m in orders.metrics] + assert "avg_order" not in graph.metrics + # Table-scoped: complex expression is qualified for queryability. + assert "{model}" in orders.get_metric("avg_order").sql + + # Export drops the placeholder and keeps the metric under the orders table. + output_file = tmp_path / "out.yaml" + adapter.export(graph, output_file) + data = yaml.safe_load(output_file.read_text()) + orders_table = next(t for t in data["tables"] if t["name"] == "orders") + expr = next(m["expr"] for m in orders_table["metrics"] if m["name"] == "avg_order") + assert "{model}" not in expr From da115d3185623592be3dbd19006a14ff105d6363 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 19:20:40 -0700 Subject: [PATCH 14/25] Resolve Snowflake cross-file top-level metrics in the CLI loader The CLI loader parses each Snowflake file separately, so a top-level metric referencing a table defined in another file could not attach. Mark such metrics with a pending table reference during parse, detect Cortex files (tables + base_table) before the generic MetricFlow heuristic, and re-attach the metric to its model once every file is loaded, re-qualifying its SQL. --- sidemantic/adapters/snowflake.py | 35 ++++++++++++++++ sidemantic/loaders.py | 23 +++++++++-- tests/core/test_directory_loaders.py | 62 ++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 3 deletions(-) diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 3b3f4b36..7c861f08 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -157,6 +157,32 @@ def parse(self, source: str | Path) -> SemanticGraph: return graph + @staticmethod + def resolve_pending_table_metrics(models: dict, metrics: dict) -> None: + """Attach graph-level metrics that reference a now-loaded table. + + Multi-file CLI loads parse each Snowflake file separately, so a top-level + metric with ``table: orders`` defined before the file that declares + ``orders`` lands in ``metrics`` with a ``pending_table`` marker. Once every + file's models are loaded, move such a metric onto its table and re-qualify + its expression with the ``{model}`` placeholder. ``models``/``metrics`` are + the name-keyed dicts accumulated during directory loading. + """ + for name in list(metrics.keys()): + metric = metrics[name] + snowflake_meta = (metric.metadata or {}).get("snowflake") or {} + table_name = snowflake_meta.get("pending_table") + if not table_name: + continue + cleaned_meta = {k: v for k, v in snowflake_meta.items() if k != "pending_table"} + metric.metadata = {**(metric.metadata or {}), "snowflake": cleaned_meta} if cleaned_meta else None + model = models.get(table_name) + if model is not None: + if metric.type == "derived" and metric.sql: + metric.sql = _qualify_columns(metric.sql) + del metrics[name] + model.metrics.append(metric) + def _apply_top_level_metrics(self, metric_defs: list[dict], graph: SemanticGraph) -> None: """Attach collected top-level metrics once all tables are loaded.""" for metric_def in metric_defs: @@ -175,6 +201,15 @@ def _apply_top_level_metrics(self, metric_defs: list[dict], graph: SemanticGraph metric = self._parse_metric(metric_def, qualify=False) if metric is None: continue + if table_name: + # The referenced table is not in this graph (multi-file CLI load + # parses each file separately); record it so the directory loader + # can re-attach the metric once that table is loaded. + metadata = metric.metadata or {} + snowflake_meta = dict(metadata.get("snowflake") or {}) + snowflake_meta["pending_table"] = table_name + metadata = {**metadata, "snowflake": snowflake_meta} + metric.metadata = metadata graph.metrics[metric.name] = metric def _parse_file(self, file_path: Path, graph: SemanticGraph, deferred_metrics: list[dict]) -> None: diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index ec6d42e8..e92106db 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -172,6 +172,12 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict adapter = SidemanticAdapter() elif _looks_like_native_sidemantic_yaml(yaml_data): adapter = SidemanticAdapter() + elif _yaml_has_top_level_key(yaml_data, "tables") and _contains_yaml_key(yaml_data, "base_table"): + # Snowflake Cortex Semantic Model format. Checked before the generic + # MetricFlow `metrics:` + `type:` heuristic because a Cortex file may + # carry top-level `metrics:` and `data_type:` while `base_table` is a + # Snowflake-only signal MetricFlow never has. + adapter = SnowflakeAdapter() elif _yaml_has_top_level_key(yaml_data, "metrics") and "type: " in content: adapter = MetricFlowAdapter() elif _contains_yaml_key(yaml_data, "base_sql_table") and _contains_yaml_key(yaml_data, "measures"): @@ -184,9 +190,6 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict adapter = ThoughtSpotAdapter() elif _contains_yaml_key(yaml_data, "worksheet") and _contains_yaml_key(yaml_data, "worksheet_columns"): adapter = ThoughtSpotAdapter() - elif _yaml_has_top_level_key(yaml_data, "tables") and _contains_yaml_key(yaml_data, "base_table"): - # Snowflake Cortex Semantic Model format - adapter = SnowflakeAdapter() elif _looks_like_bsl_yaml(yaml_data): # BSL format uses _.column syntax for expressions adapter = BSLAdapter() @@ -247,6 +250,11 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict # declared in separate files. _finalize_bsl_join_aliases(all_models) + # Attach Snowflake top-level metrics whose referenced table was defined in a + # different file (each Snowflake file is parsed separately, so the table may + # not have been known when the metric file was parsed). + _resolve_snowflake_pending_table_metrics(all_models, all_metrics) + # Infer cross-model relationships based on naming conventions _infer_relationships(all_models) @@ -790,6 +798,15 @@ def _merge_import_warnings(graph: object, warnings: list[dict[str, object]]) -> graph.import_warnings = merged +def _resolve_snowflake_pending_table_metrics(all_models: dict, all_metrics: dict) -> None: + """Re-attach Snowflake top-level metrics to tables defined in other files.""" + if not any((metric.metadata or {}).get("snowflake", {}).get("pending_table") for metric in all_metrics.values()): + return + from sidemantic.adapters.snowflake import SnowflakeAdapter + + SnowflakeAdapter.resolve_pending_table_metrics(all_models, all_metrics) + + def _deep_merge_metadata(target: dict, source: dict) -> None: """Recursively merge ``source`` into ``target``. diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index a1d3cf6a..2f491186 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -339,3 +339,65 @@ def test_load_from_directory_merges_snowflake_metadata_across_files(tmp_path): assert sorted(q["name"] for q in merged) == ["q1", "q2"] # Dynamic attribute accumulates too. assert len(getattr(graph, "verified_queries", [])) == 2 + + +def test_load_from_directory_attaches_snowflake_metric_to_table_in_another_file(tmp_path): + """A Snowflake top-level metric attaches to its table even if defined in another file.""" + # File A is Snowflake-detected (tables + base_table) and carries a top-level + # metric referencing `orders`, which lives in file B. + (tmp_path / "a_model.yaml").write_text( + """ +name: a_model +tables: + - name: products + base_table: + database: db + schema: s + table: products + primary_key: + columns: [id] + dimensions: + - name: id + expr: id + data_type: number +metrics: + - name: avg_order + table: orders + expr: SUM(amount) / COUNT(order_id) +""" + ) + (tmp_path / "b_model.yaml").write_text( + """ +name: b_model +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [order_id] + dimensions: + - name: order_id + expr: order_id + data_type: number + facts: + - name: amount + expr: amount + data_type: number +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + graph = layer.graph + + assert set(graph.models) == {"products", "orders"} + orders = graph.models["orders"] + assert "avg_order" in [m.name for m in orders.metrics] + assert "avg_order" not in graph.metrics + metric = orders.get_metric("avg_order") + # Table-scoped: complex expression re-qualified for queryability. + assert "{model}" in metric.sql + # The internal pending marker is cleaned up after attachment. + assert (metric.metadata or {}).get("snowflake", {}).get("pending_table") is None From 87ea4155184c13e6238570427f42e59e261afa2d Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 19:55:59 -0700 Subject: [PATCH 15/25] Detect metric-only Snowflake Cortex files in the directory loader A Cortex project may split top-level metrics (table + expr, no tables section) into their own file. Such a file was never routed to the Snowflake adapter, so the metrics silently dropped during load_from_directory. Detect these files by their Cortex-only table/expr shape and route them to Snowflake so the metrics defer and attach to tables defined in sibling files. --- sidemantic/loaders.py | 30 +++++++++++++++++++ tests/core/test_directory_loaders.py | 43 ++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index e92106db..e962299a 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -178,6 +178,11 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict # carry top-level `metrics:` and `data_type:` while `base_table` is a # Snowflake-only signal MetricFlow never has. adapter = SnowflakeAdapter() + elif _looks_like_snowflake_metrics_file(yaml_data): + # Cortex top-level metrics split into their own file (table + expr, + # no tables section). Route to Snowflake so the metrics defer and + # attach to tables defined in sibling files. + adapter = SnowflakeAdapter() elif _yaml_has_top_level_key(yaml_data, "metrics") and "type: " in content: adapter = MetricFlowAdapter() elif _contains_yaml_key(yaml_data, "base_sql_table") and _contains_yaml_key(yaml_data, "measures"): @@ -437,6 +442,31 @@ def _yaml_has_top_level_key(data: dict, key: str) -> bool: return isinstance(data, dict) and key in data +def _looks_like_snowflake_metrics_file(data: dict) -> bool: + """Detect a Snowflake Cortex file that contains only top-level metrics. + + Cortex projects may split top-level ``metrics:`` (entries with ``table`` and + ``expr``) into their own file without any ``tables`` section. Such a file must + route to the Snowflake adapter so the metrics can be deferred and attached to + tables defined in sibling files. MetricFlow metrics use ``type``/``type_params`` + /``measure`` instead, so require Cortex-only ``table`` + ``expr`` keys and the + absence of those MetricFlow markers. + """ + if not isinstance(data, dict) or "tables" in data: + return False + metrics = data.get("metrics") + if not isinstance(metrics, list) or not metrics: + return False + for metric in metrics: + if not isinstance(metric, dict): + return False + if "table" not in metric or "expr" not in metric: + return False + if "type_params" in metric or "measure" in metric: + return False + return True + + def _contains_yaml_key(value: object, key: str) -> bool: """Return True when a parsed YAML object contains an exact key anywhere.""" if isinstance(value, dict): diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index 2f491186..48be85ab 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -401,3 +401,46 @@ def test_load_from_directory_attaches_snowflake_metric_to_table_in_another_file( assert "{model}" in metric.sql # The internal pending marker is cleaned up after attachment. assert (metric.metadata or {}).get("snowflake", {}).get("pending_table") is None + + +def test_load_from_directory_detects_metric_only_snowflake_file(tmp_path): + """A Cortex file with only top-level metrics (table + expr) is routed to Snowflake.""" + # Metric-only file (no tables section) parsed before the table file. + (tmp_path / "a_metrics.yaml").write_text( + """ +metrics: + - name: avg_order + table: orders + expr: SUM(amount) / COUNT(order_id) +""" + ) + (tmp_path / "z_tables.yaml").write_text( + """ +name: tables_model +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [order_id] + dimensions: + - name: order_id + expr: order_id + data_type: number + facts: + - name: amount + expr: amount + data_type: number +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + graph = layer.graph + + orders = graph.models["orders"] + assert "avg_order" in [m.name for m in orders.metrics] + assert "avg_order" not in graph.metrics + assert "{model}" in orders.get_metric("avg_order").sql From c684a99b47d659419a6c6e2b9764e3ec189e2882 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 20:30:41 -0700 Subject: [PATCH 16/25] Allow tableless metrics in split Snowflake Cortex metrics files The metrics-only Cortex detector rejected the whole file when any metric lacked a table, dropping mixed files that combine table-scoped metrics with graph-level view metrics. Require expr and no MetricFlow markers on every metric and at least one table reference, so mixed files route to Snowflake while tableless-only files are left to native detection. --- sidemantic/loaders.py | 19 ++++++----- tests/core/test_directory_loaders.py | 50 ++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 8 deletions(-) diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index e962299a..6de06b82 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -445,26 +445,29 @@ def _yaml_has_top_level_key(data: dict, key: str) -> bool: def _looks_like_snowflake_metrics_file(data: dict) -> bool: """Detect a Snowflake Cortex file that contains only top-level metrics. - Cortex projects may split top-level ``metrics:`` (entries with ``table`` and - ``expr``) into their own file without any ``tables`` section. Such a file must - route to the Snowflake adapter so the metrics can be deferred and attached to - tables defined in sibling files. MetricFlow metrics use ``type``/``type_params`` - /``measure`` instead, so require Cortex-only ``table`` + ``expr`` keys and the - absence of those MetricFlow markers. + Cortex projects may split top-level ``metrics:`` into their own file without a + ``tables`` section. Such a file mixes table-scoped metrics (with ``table``) and + graph-level view metrics (without ``table``); both use ``expr`` and never the + MetricFlow ``type_params``/``measure`` markers. Require every metric to use + ``expr`` with no MetricFlow markers, plus at least one ``table`` reference so + native detection (which already handles tableless graph metrics) keeps those. """ if not isinstance(data, dict) or "tables" in data: return False metrics = data.get("metrics") if not isinstance(metrics, list) or not metrics: return False + has_table_scoped = False for metric in metrics: if not isinstance(metric, dict): return False - if "table" not in metric or "expr" not in metric: + if "expr" not in metric: return False if "type_params" in metric or "measure" in metric: return False - return True + if "table" in metric: + has_table_scoped = True + return has_table_scoped def _contains_yaml_key(value: object, key: str) -> bool: diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index 48be85ab..2cd7f70b 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -444,3 +444,53 @@ def test_load_from_directory_detects_metric_only_snowflake_file(tmp_path): assert "avg_order" in [m.name for m in orders.metrics] assert "avg_order" not in graph.metrics assert "{model}" in orders.get_metric("avg_order").sql + + +def test_load_from_directory_detects_mixed_snowflake_metrics_file(tmp_path): + """A metrics-only Cortex file may mix table-scoped and tableless view metrics.""" + # No tables section; one metric has table (table-scoped), one omits it (graph-level). + (tmp_path / "a_metrics.yaml").write_text( + """ +metrics: + - name: avg_order + table: orders + expr: SUM(amount) / COUNT(order_id) + - name: global_ratio + expr: orders.revenue / orders.order_count +""" + ) + (tmp_path / "z_tables.yaml").write_text( + """ +name: tables_model +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [order_id] + dimensions: + - name: order_id + expr: order_id + data_type: number + facts: + - name: amount + expr: amount + data_type: number + metrics: + - name: revenue + expr: SUM(amount) + - name: order_count + expr: COUNT(order_id) +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + graph = layer.graph + + # Table-scoped metric attaches to its table; tableless metric stays graph-level. + assert "avg_order" in [m.name for m in graph.models["orders"].metrics] + assert "global_ratio" in graph.metrics + assert "avg_order" not in graph.metrics From d20f85dfb9e127b6a4e098ba9d6bcee597d315d6 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 21:01:16 -0700 Subject: [PATCH 17/25] Route tableless Snowflake view-metric sidecars with top-level sections A split Cortex file with only view-level metrics (no table) plus Snowflake top-level sections such as verified_queries matched neither native nor Snowflake detection, so it was skipped and its metrics/metadata dropped. Treat a Snowflake-only top-level section as a Cortex signal so these sidecar files route to the Snowflake adapter. --- sidemantic/loaders.py | 14 +++++--- tests/core/test_directory_loaders.py | 51 ++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index 6de06b82..a397bc5f 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -442,15 +442,20 @@ def _yaml_has_top_level_key(data: dict, key: str) -> bool: return isinstance(data, dict) and key in data +_SNOWFLAKE_TOP_LEVEL_SECTIONS = ("verified_queries", "custom_instructions", "module_custom_instructions") + + def _looks_like_snowflake_metrics_file(data: dict) -> bool: """Detect a Snowflake Cortex file that contains only top-level metrics. Cortex projects may split top-level ``metrics:`` into their own file without a ``tables`` section. Such a file mixes table-scoped metrics (with ``table``) and graph-level view metrics (without ``table``); both use ``expr`` and never the - MetricFlow ``type_params``/``measure`` markers. Require every metric to use - ``expr`` with no MetricFlow markers, plus at least one ``table`` reference so - native detection (which already handles tableless graph metrics) keeps those. + MetricFlow ``type_params``/``measure`` markers. Route the file to Snowflake when + every metric is Cortex-shaped and it carries a Cortex-only signal: at least one + ``table`` reference, or a Snowflake-only top-level section (verified_queries / + custom instructions). A tableless metrics file with none of these is left to + native detection. """ if not isinstance(data, dict) or "tables" in data: return False @@ -467,7 +472,8 @@ def _looks_like_snowflake_metrics_file(data: dict) -> bool: return False if "table" in metric: has_table_scoped = True - return has_table_scoped + has_snowflake_section = any(section in data for section in _SNOWFLAKE_TOP_LEVEL_SECTIONS) + return has_table_scoped or has_snowflake_section def _contains_yaml_key(value: object, key: str) -> bool: diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index 2cd7f70b..01b08576 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -494,3 +494,54 @@ def test_load_from_directory_detects_mixed_snowflake_metrics_file(tmp_path): assert "avg_order" in [m.name for m in graph.models["orders"].metrics] assert "global_ratio" in graph.metrics assert "avg_order" not in graph.metrics + + +def test_load_from_directory_detects_view_metric_sidecar_with_snowflake_sections(tmp_path): + """A tableless Cortex sidecar with verified_queries routes to Snowflake.""" + # Pure view-level metrics (no table) plus Snowflake-only top-level sections. + (tmp_path / "a_sidecar.yaml").write_text( + """ +metrics: + - name: global_ratio + expr: orders.revenue / orders.order_count +verified_queries: + - name: total revenue + sql: SELECT SUM(amount) FROM orders +custom_instructions: Prefer revenue. +""" + ) + (tmp_path / "z_tables.yaml").write_text( + """ +name: tm +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [order_id] + dimensions: + - name: order_id + expr: order_id + data_type: number + facts: + - name: amount + expr: amount + data_type: number + metrics: + - name: revenue + expr: SUM(amount) + - name: order_count + expr: COUNT(order_id) +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + graph = layer.graph + + assert "global_ratio" in graph.metrics + snowflake_meta = graph.metadata.get("snowflake", {}) + assert snowflake_meta.get("verified_queries") + assert snowflake_meta.get("custom_instructions") == "Prefer revenue." From 5ef649c3f3dec1117889a11147040cc189fc27ff Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 21:32:58 -0700 Subject: [PATCH 18/25] Route instruction-only Snowflake Cortex sidecars A Cortex sidecar holding only verified_queries/custom_instructions and no metrics returned early from the metrics-file detector before the Snowflake-section check, so it was skipped and its metadata lost. Treat the metrics key as optional so files carrying only Snowflake-only top-level sections route to the Snowflake adapter. --- sidemantic/loaders.py | 46 ++++++++++++++++------------ tests/core/test_directory_loaders.py | 44 ++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 20 deletions(-) diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index a397bc5f..9830cdee 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -446,32 +446,38 @@ def _yaml_has_top_level_key(data: dict, key: str) -> bool: def _looks_like_snowflake_metrics_file(data: dict) -> bool: - """Detect a Snowflake Cortex file that contains only top-level metrics. - - Cortex projects may split top-level ``metrics:`` into their own file without a - ``tables`` section. Such a file mixes table-scoped metrics (with ``table``) and - graph-level view metrics (without ``table``); both use ``expr`` and never the - MetricFlow ``type_params``/``measure`` markers. Route the file to Snowflake when - every metric is Cortex-shaped and it carries a Cortex-only signal: at least one - ``table`` reference, or a Snowflake-only top-level section (verified_queries / - custom instructions). A tableless metrics file with none of these is left to - native detection. + """Detect a split Snowflake Cortex sidecar without a ``tables`` section. + + Cortex projects may split top-level ``metrics:`` and/or the Snowflake-only + sections (verified_queries / custom instructions) into their own file. Route + such a file to the Snowflake adapter when it carries a Cortex-only signal: + + - a Snowflake-only top-level section (verified_queries / custom instructions), + even when no ``metrics`` are present (instruction-only sidecar), or + - top-level ``metrics`` with at least one ``table`` reference. + + Any present metrics must be Cortex-shaped (``expr`` with no MetricFlow + ``type_params``/``measure`` markers). A tableless metrics file with none of + these signals is left to native detection. """ if not isinstance(data, dict) or "tables" in data: return False + metrics = data.get("metrics") - if not isinstance(metrics, list) or not metrics: - return False has_table_scoped = False - for metric in metrics: - if not isinstance(metric, dict): - return False - if "expr" not in metric: + if metrics is not None: + if not isinstance(metrics, list) or not metrics: return False - if "type_params" in metric or "measure" in metric: - return False - if "table" in metric: - has_table_scoped = True + for metric in metrics: + if not isinstance(metric, dict): + return False + if "expr" not in metric: + return False + if "type_params" in metric or "measure" in metric: + return False + if "table" in metric: + has_table_scoped = True + has_snowflake_section = any(section in data for section in _SNOWFLAKE_TOP_LEVEL_SECTIONS) return has_table_scoped or has_snowflake_section diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index 01b08576..8a6c377b 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -545,3 +545,47 @@ def test_load_from_directory_detects_view_metric_sidecar_with_snowflake_sections snowflake_meta = graph.metadata.get("snowflake", {}) assert snowflake_meta.get("verified_queries") assert snowflake_meta.get("custom_instructions") == "Prefer revenue." + + +def test_load_from_directory_detects_instruction_only_snowflake_sidecar(tmp_path): + """A Cortex sidecar with only verified_queries/custom_instructions routes to Snowflake.""" + # No metrics and no tables: only Snowflake-only top-level sections. + (tmp_path / "a_instructions.yaml").write_text( + """ +verified_queries: + - name: total revenue + sql: SELECT SUM(amount) FROM orders +custom_instructions: Prefer revenue. +module_custom_instructions: + sql_generation: Use explicit columns. +""" + ) + (tmp_path / "z_tables.yaml").write_text( + """ +name: tm +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [order_id] + dimensions: + - name: order_id + expr: order_id + data_type: number + facts: + - name: amount + expr: amount + data_type: number +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + snowflake_meta = layer.graph.metadata.get("snowflake", {}) + + assert snowflake_meta.get("verified_queries") + assert snowflake_meta.get("custom_instructions") == "Prefer revenue." + assert "module_custom_instructions" in snowflake_meta From fca46034b1e41fa430ef70d71a851f3b016306cc Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 22:07:01 -0700 Subject: [PATCH 19/25] Avoid pending-metric name collisions and merge non-OSI root metadata in Rust - Hold Snowflake table-scoped metrics whose table is in another file in a table-qualified pending list instead of the graph-level name map, so two tables defining a same-named scoped metric no longer overwrite each other; unresolved entries fall back to graph-level metrics. - Replace the Rust directory loader's OSI-only metadata merge with a generic deep merge so root metadata.snowflake (verified queries / custom instructions) from Python export-native files survives directory loads. --- sidemantic-rs/src/config/loader.rs | 111 ++++++++++++++++++++------- sidemantic/adapters/snowflake.py | 67 +++++++++------- sidemantic/loaders.py | 17 +++- tests/core/test_directory_loaders.py | 65 ++++++++++++++++ 4 files changed, 201 insertions(+), 59 deletions(-) diff --git a/sidemantic-rs/src/config/loader.rs b/sidemantic-rs/src/config/loader.rs index a81af10a..5a6fa6b2 100644 --- a/sidemantic-rs/src/config/loader.rs +++ b/sidemantic-rs/src/config/loader.rs @@ -494,7 +494,7 @@ pub fn load_from_directory_with_metadata(dir: impl AsRef) -> Result { let content = fs::read_to_string(&path).map_err(|e| { @@ -590,39 +590,37 @@ pub fn load_from_directory_with_metadata(dir: impl AsRef) -> Result, incoming: Option) { +fn merge_graph_metadata(acc: &mut Option, incoming: Option) { let Some(incoming) = incoming else { return; }; - let Some(incoming_osi) = incoming.get("osi").and_then(|v| v.as_object()).cloned() else { - return; - }; - - let acc_value = - acc.get_or_insert_with(|| serde_json::json!({ "osi": { "semantic_models": [] } })); - let Some(acc_osi) = acc_value - .as_object_mut() - .and_then(|m| m.get_mut("osi")) - .and_then(serde_json::Value::as_object_mut) - else { - return; - }; - - if let Some(serde_json::Value::Array(incoming_models)) = incoming_osi.get("semantic_models") { - let entry = acc_osi - .entry("semantic_models") - .or_insert_with(|| serde_json::Value::Array(Vec::new())); - if let serde_json::Value::Array(acc_models) = entry { - acc_models.extend(incoming_models.iter().cloned()); - } + match acc { + Some(existing) => deep_merge_json(existing, incoming), + None => *acc = Some(incoming), } +} - for key in ["version", "ontology"] { - if !acc_osi.contains_key(key) { - if let Some(value) = incoming_osi.get(key) { - acc_osi.insert(key.to_string(), value.clone()); +/// Recursively merge `incoming` into `target`: objects merge, arrays append, and +/// scalars keep the existing (first-wins) value. This preserves OSI accumulation +/// (semantic_models arrays append, version/ontology keep first) while also merging +/// non-OSI payloads such as `metadata.snowflake` from Python `export-native` files. +fn deep_merge_json(target: &mut serde_json::Value, incoming: serde_json::Value) { + match (target, incoming) { + (serde_json::Value::Object(target_map), serde_json::Value::Object(incoming_map)) => { + for (key, value) in incoming_map { + match target_map.get_mut(&key) { + Some(existing) => deep_merge_json(existing, value), + None => { + target_map.insert(key, value); + } + } } } + (serde_json::Value::Array(target_arr), serde_json::Value::Array(incoming_arr)) => { + target_arr.extend(incoming_arr); + } + // Scalars (or type mismatches): keep the existing value. + _ => {} } } @@ -1875,6 +1873,65 @@ models: assert!(orders.get_metric("net_revenue").is_some()); } + #[test] + fn test_load_from_directory_merges_non_osi_root_metadata() { + let dir = std::env::temp_dir().join(format!( + "sidemantic-rs-loader-metadata-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + )); + fs::create_dir_all(&dir).unwrap(); + // Python `export-native` writes root `metadata.snowflake` (no `osi` key). + fs::write( + dir.join("a.yml"), + r#" +models: + - name: orders + table: orders + primary_key: order_id +metadata: + snowflake: + custom_instructions: Prefer revenue. + verified_queries: + - name: q1 +"#, + ) + .unwrap(); + fs::write( + dir.join("b.yml"), + r#" +models: + - name: customers + table: customers + primary_key: id +metadata: + snowflake: + verified_queries: + - name: q2 +"#, + ) + .unwrap(); + + let loaded = load_from_directory_with_metadata(&dir).unwrap(); + fs::remove_dir_all(&dir).unwrap(); + + let metadata = loaded.graph.metadata().expect("graph metadata preserved"); + let snowflake = &metadata["snowflake"]; + assert_eq!(snowflake["custom_instructions"], "Prefer revenue."); + // verified_queries from both files accumulate. + let names: Vec<&str> = snowflake["verified_queries"] + .as_array() + .unwrap() + .iter() + .map(|entry| entry["name"].as_str().unwrap()) + .collect(); + assert!(names.contains(&"q1")); + assert!(names.contains(&"q2")); + } + #[test] fn test_walkdir_returns_deterministic_lexical_order() { let dir = std::env::temp_dir().join(format!( diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 7c861f08..624e8295 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -155,33 +155,42 @@ def parse(self, source: str | Path) -> SemanticGraph: self._apply_top_level_metrics(deferred_metrics, graph) + # For a directory parse every file is seen here, so resolve pending metrics + # against the loaded tables; anything still unresolved (table truly absent) + # falls back to a graph-level metric so it is not dropped. For a single-file + # parse the pending list is left intact so the directory loader can resolve + # it across files. + pending = getattr(graph, "_pending_table_metrics", None) + if pending and source_path.is_dir(): + self.resolve_pending_table_metrics(graph.models, pending) + for _table_name, metric in pending: + graph.metrics.setdefault(metric.name, metric) + pending.clear() + return graph @staticmethod - def resolve_pending_table_metrics(models: dict, metrics: dict) -> None: - """Attach graph-level metrics that reference a now-loaded table. + def resolve_pending_table_metrics(models: dict, pending_metrics: list) -> None: + """Attach pending metrics that reference a now-loaded table. Multi-file CLI loads parse each Snowflake file separately, so a top-level metric with ``table: orders`` defined before the file that declares - ``orders`` lands in ``metrics`` with a ``pending_table`` marker. Once every - file's models are loaded, move such a metric onto its table and re-qualify - its expression with the ``{model}`` placeholder. ``models``/``metrics`` are - the name-keyed dicts accumulated during directory loading. + ``orders`` is collected as a ``(table_name, Metric)`` pending entry. Once + every file's models are loaded, attach each to its table and re-qualify its + expression with the ``{model}`` placeholder. Pending entries are a list (not + a name-keyed map) so same-named scoped metrics on different tables do not + overwrite one another. Unresolved entries are left in place. """ - for name in list(metrics.keys()): - metric = metrics[name] - snowflake_meta = (metric.metadata or {}).get("snowflake") or {} - table_name = snowflake_meta.get("pending_table") - if not table_name: - continue - cleaned_meta = {k: v for k, v in snowflake_meta.items() if k != "pending_table"} - metric.metadata = {**(metric.metadata or {}), "snowflake": cleaned_meta} if cleaned_meta else None + remaining = [] + for table_name, metric in pending_metrics: model = models.get(table_name) - if model is not None: - if metric.type == "derived" and metric.sql: - metric.sql = _qualify_columns(metric.sql) - del metrics[name] - model.metrics.append(metric) + if model is None: + remaining.append((table_name, metric)) + continue + if metric.type == "derived" and metric.sql: + metric.sql = _qualify_columns(metric.sql) + model.metrics.append(metric) + pending_metrics[:] = remaining def _apply_top_level_metrics(self, metric_defs: list[dict], graph: SemanticGraph) -> None: """Attach collected top-level metrics once all tables are loaded.""" @@ -194,6 +203,17 @@ def _apply_top_level_metrics(self, metric_defs: list[dict], graph: SemanticGraph if metric is None: continue graph.models[table_name].metrics.append(metric) + elif table_name: + # The referenced table is not in this graph (multi-file CLI load + # parses each file separately). Hold the metric in a table-qualified + # pending list so the directory loader can attach it once that table + # is loaded, without colliding on metric name. + metric = self._parse_metric(metric_def, qualify=False) + if metric is None: + continue + if not hasattr(graph, "_pending_table_metrics"): + graph._pending_table_metrics = [] + graph._pending_table_metrics.append((table_name, metric)) else: # Graph-level metric: expressions reference other fields as # `model.field` (already qualified), so leave them untouched @@ -201,15 +221,6 @@ def _apply_top_level_metrics(self, metric_defs: list[dict], graph: SemanticGraph metric = self._parse_metric(metric_def, qualify=False) if metric is None: continue - if table_name: - # The referenced table is not in this graph (multi-file CLI load - # parses each file separately); record it so the directory loader - # can re-attach the metric once that table is loaded. - metadata = metric.metadata or {} - snowflake_meta = dict(metadata.get("snowflake") or {}) - snowflake_meta["pending_table"] = table_name - metadata = {**metadata, "snowflake": snowflake_meta} - metric.metadata = metadata graph.metrics[metric.name] = metric def _parse_file(self, file_path: Path, graph: SemanticGraph, deferred_metrics: list[dict]) -> None: diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index 9830cdee..1df7d573 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -54,6 +54,9 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict all_models = {} all_metrics = {} all_parameters = {} + # Snowflake table-scoped metrics whose table lives in another file, held as + # (table_name, Metric) pairs so same-named scoped metrics never collide. + all_pending_table_metrics: list = [] import_warnings: list[dict[str, object]] = [] # Check for SML repository (catalog.yml/atscale.yml or object_type files) @@ -237,6 +240,7 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict all_models.update(graph.models) all_metrics.update(graph.metrics) all_parameters.update(graph.parameters) + all_pending_table_metrics.extend(getattr(graph, "_pending_table_metrics", [])) except Exception as e: _append_import_warning( import_warnings, @@ -258,7 +262,7 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict # Attach Snowflake top-level metrics whose referenced table was defined in a # different file (each Snowflake file is parsed separately, so the table may # not have been known when the metric file was parsed). - _resolve_snowflake_pending_table_metrics(all_models, all_metrics) + _resolve_snowflake_pending_table_metrics(all_models, all_metrics, all_pending_table_metrics) # Infer cross-model relationships based on naming conventions _infer_relationships(all_models) @@ -843,13 +847,18 @@ def _merge_import_warnings(graph: object, warnings: list[dict[str, object]]) -> graph.import_warnings = merged -def _resolve_snowflake_pending_table_metrics(all_models: dict, all_metrics: dict) -> None: +def _resolve_snowflake_pending_table_metrics(all_models: dict, all_metrics: dict, pending: list) -> None: """Re-attach Snowflake top-level metrics to tables defined in other files.""" - if not any((metric.metadata or {}).get("snowflake", {}).get("pending_table") for metric in all_metrics.values()): + if not pending: return from sidemantic.adapters.snowflake import SnowflakeAdapter - SnowflakeAdapter.resolve_pending_table_metrics(all_models, all_metrics) + SnowflakeAdapter.resolve_pending_table_metrics(all_models, pending) + # Any metric whose table is still unknown falls back to a graph-level metric + # so it is not silently dropped. + for _table_name, metric in pending: + all_metrics.setdefault(metric.name, metric) + pending.clear() def _deep_merge_metadata(target: dict, source: dict) -> None: diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index 8a6c377b..6f17a117 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -589,3 +589,68 @@ def test_load_from_directory_detects_instruction_only_snowflake_sidecar(tmp_path assert snowflake_meta.get("verified_queries") assert snowflake_meta.get("custom_instructions") == "Prefer revenue." assert "module_custom_instructions" in snowflake_meta + + +def test_load_from_directory_same_named_scoped_metrics_on_different_tables(tmp_path): + """Same-named table-scoped metrics on different tables must not overwrite each other.""" + # Two metric sidecars each define a metric named "total" for a different table. + (tmp_path / "a_metrics.yaml").write_text( + """ +metrics: + - name: total + table: orders + expr: SUM(amount) +""" + ) + (tmp_path / "b_metrics.yaml").write_text( + """ +metrics: + - name: total + table: customers + expr: SUM(balance) +""" + ) + (tmp_path / "z_tables.yaml").write_text( + """ +name: tm +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [id] + dimensions: + - name: id + expr: id + data_type: number + facts: + - name: amount + expr: amount + data_type: number + - name: customers + base_table: + database: db + schema: s + table: customers + primary_key: + columns: [id] + dimensions: + - name: id + expr: id + data_type: number + facts: + - name: balance + expr: balance + data_type: number +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + graph = layer.graph + + # Both scoped metrics attach to their respective tables. + assert "total" in [m.name for m in graph.models["orders"].metrics] + assert "total" in [m.name for m in graph.models["customers"].metrics] From 0ffb3a8e2e4605ba490c1a1d34d34ad7a5b536f4 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 22:40:28 -0700 Subject: [PATCH 20/25] Route and defer relationship-only Snowflake Cortex sidecars A split Cortex project may keep top-level relationships in their own file. The detector now recognizes Snowflake-shaped relationship-only sidecars, the adapter defers relationships until all tables load (and stashes unresolved ones), and the directory loader applies pending relationships once every referenced table is loaded, so declared joins survive the CLI-first flow. --- sidemantic/adapters/snowflake.py | 45 ++++++++++++++++----- sidemantic/loaders.py | 40 ++++++++++++++++--- tests/core/test_directory_loaders.py | 58 ++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 15 deletions(-) diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 624e8295..4fdcab5c 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -138,21 +138,23 @@ def parse(self, source: str | Path) -> SemanticGraph: graph = SemanticGraph() source_path = Path(source) - # Top-level metrics are resolved after every file's tables are loaded, so a - # metric referencing a table defined in a later file still attaches to it - # regardless of directory traversal order. + # Top-level metrics and relationships are resolved after every file's tables + # are loaded, so a metric or relationship referencing a table defined in a + # later file still resolves regardless of directory traversal order. deferred_metrics: list[dict] = [] + deferred_relationships: list[dict] = [] if source_path.is_dir(): # Parse all YAML files in directory for yaml_file in source_path.rglob("*.yml"): - self._parse_file(yaml_file, graph, deferred_metrics) + self._parse_file(yaml_file, graph, deferred_metrics, deferred_relationships) for yaml_file in source_path.rglob("*.yaml"): - self._parse_file(yaml_file, graph, deferred_metrics) + self._parse_file(yaml_file, graph, deferred_metrics, deferred_relationships) else: # Parse single file - self._parse_file(source_path, graph, deferred_metrics) + self._parse_file(source_path, graph, deferred_metrics, deferred_relationships) + self._apply_relationships(deferred_relationships, graph) self._apply_top_level_metrics(deferred_metrics, graph) # For a directory parse every file is seen here, so resolve pending metrics @@ -223,7 +225,13 @@ def _apply_top_level_metrics(self, metric_defs: list[dict], graph: SemanticGraph continue graph.metrics[metric.name] = metric - def _parse_file(self, file_path: Path, graph: SemanticGraph, deferred_metrics: list[dict]) -> None: + def _parse_file( + self, + file_path: Path, + graph: SemanticGraph, + deferred_metrics: list[dict], + deferred_relationships: list[dict], + ) -> None: """Parse a single Snowflake semantic model YAML file. Args: @@ -231,6 +239,8 @@ def _parse_file(self, file_path: Path, graph: SemanticGraph, deferred_metrics: l graph: Semantic graph to add models/metrics to deferred_metrics: Accumulator for top-level metric definitions, resolved after every file's tables are loaded. + deferred_relationships: Accumulator for top-level relationship + definitions, applied after every file's tables are loaded. """ with open(file_path) as f: data = yaml.safe_load(f) @@ -247,9 +257,9 @@ def _parse_file(self, file_path: Path, graph: SemanticGraph, deferred_metrics: l if model: graph.add_model(model) - # Parse relationships (defined at semantic model level, not table level) - relationships_def = data.get("relationships") or [] - self._apply_relationships(relationships_def, graph) + # Defer relationships (defined at the semantic-model level) until all files' + # tables are loaded so they resolve regardless of traversal order. + deferred_relationships.extend(data.get("relationships") or []) # Defer top-level metrics (semantic-model-scoped metrics referencing tables) # until all files are parsed, so a metric whose table lives in a later file @@ -636,6 +646,14 @@ def _parse_filter(self, filter_def: dict) -> Segment | None: description=filter_def.get("description"), ) + def apply_pending_relationships(self, relationships_def: list, graph: SemanticGraph) -> None: + """Apply relationship definitions collected from separately-parsed files. + + Used by the directory loader after every file's models are loaded so a + relationship-only Cortex sidecar still attaches its joins. + """ + self._apply_relationships(relationships_def, graph) + def _apply_relationships(self, relationships_def: list, graph: SemanticGraph) -> None: """Apply relationships from semantic model to models in graph. @@ -687,6 +705,13 @@ def _apply_relationships(self, relationships_def: list, graph: SemanticGraph) -> model.relationships.append(relationship) # Rebuild adjacency after adding relationship graph.build_adjacency() + else: + # The left table is not in this graph (a multi-file CLI load parses + # each file separately). Hold the definition so the directory loader + # can apply it once that table is loaded. + if not hasattr(graph, "_pending_relationships"): + graph._pending_relationships = [] + graph._pending_relationships.append(rel_def) def export(self, graph: SemanticGraph, output_path: str | Path) -> None: """Export semantic graph to Snowflake semantic model YAML format. diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index 1df7d573..ebd189ba 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -57,6 +57,8 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict # Snowflake table-scoped metrics whose table lives in another file, held as # (table_name, Metric) pairs so same-named scoped metrics never collide. all_pending_table_metrics: list = [] + # Snowflake relationship definitions whose tables live in other files. + all_pending_relationships: list = [] import_warnings: list[dict[str, object]] = [] # Check for SML repository (catalog.yml/atscale.yml or object_type files) @@ -241,6 +243,7 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict all_metrics.update(graph.metrics) all_parameters.update(graph.parameters) all_pending_table_metrics.extend(getattr(graph, "_pending_table_metrics", [])) + all_pending_relationships.extend(getattr(graph, "_pending_relationships", [])) except Exception as e: _append_import_warning( import_warnings, @@ -272,6 +275,10 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict if model.name not in layer.graph.models: layer.add_model(model) + # Apply Snowflake relationships declared in a separate file now that every + # referenced table is loaded. + _apply_snowflake_pending_relationships(layer.graph, all_pending_relationships) + # Register graph-level metrics and parameters after models. for metric in all_metrics.values(): if metric.name not in layer.graph.metrics: @@ -449,15 +456,28 @@ def _yaml_has_top_level_key(data: dict, key: str) -> bool: _SNOWFLAKE_TOP_LEVEL_SECTIONS = ("verified_queries", "custom_instructions", "module_custom_instructions") +def _looks_like_snowflake_relationships(data: dict) -> bool: + """Return True when a file's top-level ``relationships`` are Snowflake-shaped.""" + relationships = data.get("relationships") + if not isinstance(relationships, list) or not relationships: + return False + return all( + isinstance(rel, dict) and "left_table" in rel and "right_table" in rel and "relationship_columns" in rel + for rel in relationships + ) + + def _looks_like_snowflake_metrics_file(data: dict) -> bool: """Detect a split Snowflake Cortex sidecar without a ``tables`` section. - Cortex projects may split top-level ``metrics:`` and/or the Snowflake-only - sections (verified_queries / custom instructions) into their own file. Route - such a file to the Snowflake adapter when it carries a Cortex-only signal: + Cortex projects may split top-level ``metrics:``, ``relationships:`` and/or the + Snowflake-only sections (verified_queries / custom instructions) into their own + file. Route such a file to the Snowflake adapter when it carries a Cortex-only + signal: - a Snowflake-only top-level section (verified_queries / custom instructions), - even when no ``metrics`` are present (instruction-only sidecar), or + even when no ``metrics`` are present (instruction-only sidecar), + - Snowflake-shaped top-level ``relationships`` (relationship-only sidecar), or - top-level ``metrics`` with at least one ``table`` reference. Any present metrics must be Cortex-shaped (``expr`` with no MetricFlow @@ -483,7 +503,7 @@ def _looks_like_snowflake_metrics_file(data: dict) -> bool: has_table_scoped = True has_snowflake_section = any(section in data for section in _SNOWFLAKE_TOP_LEVEL_SECTIONS) - return has_table_scoped or has_snowflake_section + return has_table_scoped or has_snowflake_section or _looks_like_snowflake_relationships(data) def _contains_yaml_key(value: object, key: str) -> bool: @@ -861,6 +881,16 @@ def _resolve_snowflake_pending_table_metrics(all_models: dict, all_metrics: dict pending.clear() +def _apply_snowflake_pending_relationships(graph: object, pending: list) -> None: + """Apply Snowflake relationship definitions whose tables live in other files.""" + if not pending: + return + from sidemantic.adapters.snowflake import SnowflakeAdapter + + SnowflakeAdapter().apply_pending_relationships(pending, graph) + pending.clear() + + def _deep_merge_metadata(target: dict, source: dict) -> None: """Recursively merge ``source`` into ``target``. diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index 6f17a117..b4d20b6c 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -654,3 +654,61 @@ def test_load_from_directory_same_named_scoped_metrics_on_different_tables(tmp_p # Both scoped metrics attach to their respective tables. assert "total" in [m.name for m in graph.models["orders"].metrics] assert "total" in [m.name for m in graph.models["customers"].metrics] + + +def test_load_from_directory_detects_relationship_only_snowflake_sidecar(tmp_path): + """A Cortex sidecar with only top-level relationships routes to Snowflake and attaches joins.""" + # Non-standard join columns so foreign-key inference would NOT recreate the join. + (tmp_path / "a_rels.yaml").write_text( + """ +relationships: + - name: orders_to_customers + left_table: orders + right_table: customers + relationship_columns: + - left_column: cust_ref + right_column: cust_pk + relationship_type: many_to_one +""" + ) + (tmp_path / "z_tables.yaml").write_text( + """ +name: tm +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [order_id] + dimensions: + - name: order_id + expr: order_id + data_type: number + - name: cust_ref + expr: cust_ref + data_type: number + - name: customers + base_table: + database: db + schema: s + table: customers + primary_key: + columns: [cust_pk] + dimensions: + - name: cust_pk + expr: cust_pk + data_type: number +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + graph = layer.graph + + orders = graph.models["orders"] + rel = next(r for r in orders.relationships if r.name == "customers") + assert rel.metadata["snowflake"]["name"] == "orders_to_customers" + assert rel.foreign_key == "cust_ref" + assert graph.find_relationship_path("orders", "customers") From ef6e7603fa64c6d0cedf6b0d6185524352cf865c Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 14 Jun 2026 23:12:27 -0700 Subject: [PATCH 21/25] Prefer explicit Snowflake joins and route metric-key sidecars - Apply Snowflake relationships from sidecar files before foreign-key inference so an explicit Cortex join wins over a guessed one for the same table pair, de-duplicating existing edges. - Detect a tableless Cortex metrics sidecar by Snowflake-only per-metric keys (access_modifier / labels / tags / non_additive_dimensions / using_relationships), so files native detection rejects still route to Snowflake instead of being dropped. --- sidemantic/adapters/snowflake.py | 46 +++++++++++- sidemantic/loaders.py | 34 ++++++--- tests/core/test_directory_loaders.py | 108 +++++++++++++++++++++++++++ 3 files changed, 173 insertions(+), 15 deletions(-) diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 4fdcab5c..7433451f 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -646,13 +646,51 @@ def _parse_filter(self, filter_def: dict) -> Segment | None: description=filter_def.get("description"), ) - def apply_pending_relationships(self, relationships_def: list, graph: SemanticGraph) -> None: + def apply_pending_relationships(self, relationships_def: list, models: dict) -> None: """Apply relationship definitions collected from separately-parsed files. - Used by the directory loader after every file's models are loaded so a - relationship-only Cortex sidecar still attaches its joins. + Used by the directory loader after every file's models are loaded (and + before foreign-key inference) so a relationship-only Cortex sidecar attaches + its joins and an explicit join takes precedence over a guessed one. Operates + on the name-keyed ``models`` dict; adjacency is rebuilt later by the loader. """ - self._apply_relationships(relationships_def, graph) + for rel_def in relationships_def: + left_table = rel_def.get("left_table") + right_table = rel_def.get("right_table") + rel_type = rel_def.get("relationship_type", "many_to_one") + + if not left_table or not right_table: + continue + + rel_columns = rel_def.get("relationship_columns") or [] + if not rel_columns: + continue + + first_col = rel_columns[0] + left_column = first_col.get("left_column") + right_column = first_col.get("right_column") + + metadata = None + snowflake_name = rel_def.get("name") + if snowflake_name: + metadata = {"snowflake": {"name": snowflake_name}} + + model = models.get(left_table) + if model is None: + continue + # Skip if a relationship to the same target already exists (e.g. another + # sidecar declared it) to avoid duplicates. + if any(r.name == right_table for r in model.relationships): + continue + model.relationships.append( + Relationship( + name=right_table, + type=rel_type, + foreign_key=left_column, + primary_key=right_column, + metadata=metadata, + ) + ) def _apply_relationships(self, relationships_def: list, graph: SemanticGraph) -> None: """Apply relationships from semantic model to models in graph. diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index ebd189ba..4ce4fa51 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -267,6 +267,11 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict # not have been known when the metric file was parsed). _resolve_snowflake_pending_table_metrics(all_models, all_metrics, all_pending_table_metrics) + # Apply Snowflake relationships declared in a separate file before FK inference + # so an explicit Cortex join takes precedence over a guessed one for the same + # table pair. + _apply_snowflake_pending_relationships(all_models, all_pending_relationships) + # Infer cross-model relationships based on naming conventions _infer_relationships(all_models) @@ -275,10 +280,6 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict if model.name not in layer.graph.models: layer.add_model(model) - # Apply Snowflake relationships declared in a separate file now that every - # referenced table is loaded. - _apply_snowflake_pending_relationships(layer.graph, all_pending_relationships) - # Register graph-level metrics and parameters after models. for metric in all_metrics.values(): if metric.name not in layer.graph.metrics: @@ -454,6 +455,15 @@ def _yaml_has_top_level_key(data: dict, key: str) -> bool: _SNOWFLAKE_TOP_LEVEL_SECTIONS = ("verified_queries", "custom_instructions", "module_custom_instructions") +# Per-metric keys that only Snowflake Cortex uses (not in the native METRIC_FIELDS). +_SNOWFLAKE_METRIC_KEYS = ( + "table", + "access_modifier", + "labels", + "tags", + "non_additive_dimensions", + "using_relationships", +) def _looks_like_snowflake_relationships(data: dict) -> bool: @@ -478,7 +488,9 @@ def _looks_like_snowflake_metrics_file(data: dict) -> bool: - a Snowflake-only top-level section (verified_queries / custom instructions), even when no ``metrics`` are present (instruction-only sidecar), - Snowflake-shaped top-level ``relationships`` (relationship-only sidecar), or - - top-level ``metrics`` with at least one ``table`` reference. + - top-level ``metrics`` carrying a Snowflake-only metric key (``table`` or per- + metric ``access_modifier``/``labels``/``tags``/``non_additive_dimensions``/ + ``using_relationships``). Any present metrics must be Cortex-shaped (``expr`` with no MetricFlow ``type_params``/``measure`` markers). A tableless metrics file with none of @@ -488,7 +500,7 @@ def _looks_like_snowflake_metrics_file(data: dict) -> bool: return False metrics = data.get("metrics") - has_table_scoped = False + has_snowflake_metric_key = False if metrics is not None: if not isinstance(metrics, list) or not metrics: return False @@ -499,11 +511,11 @@ def _looks_like_snowflake_metrics_file(data: dict) -> bool: return False if "type_params" in metric or "measure" in metric: return False - if "table" in metric: - has_table_scoped = True + if any(key in metric for key in _SNOWFLAKE_METRIC_KEYS): + has_snowflake_metric_key = True has_snowflake_section = any(section in data for section in _SNOWFLAKE_TOP_LEVEL_SECTIONS) - return has_table_scoped or has_snowflake_section or _looks_like_snowflake_relationships(data) + return has_snowflake_metric_key or has_snowflake_section or _looks_like_snowflake_relationships(data) def _contains_yaml_key(value: object, key: str) -> bool: @@ -881,13 +893,13 @@ def _resolve_snowflake_pending_table_metrics(all_models: dict, all_metrics: dict pending.clear() -def _apply_snowflake_pending_relationships(graph: object, pending: list) -> None: +def _apply_snowflake_pending_relationships(all_models: dict, pending: list) -> None: """Apply Snowflake relationship definitions whose tables live in other files.""" if not pending: return from sidemantic.adapters.snowflake import SnowflakeAdapter - SnowflakeAdapter().apply_pending_relationships(pending, graph) + SnowflakeAdapter().apply_pending_relationships(pending, all_models) pending.clear() diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index b4d20b6c..2e57a7ba 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -712,3 +712,111 @@ def test_load_from_directory_detects_relationship_only_snowflake_sidecar(tmp_pat assert rel.metadata["snowflake"]["name"] == "orders_to_customers" assert rel.foreign_key == "cust_ref" assert graph.find_relationship_path("orders", "customers") + + +def test_load_from_directory_explicit_snowflake_relationship_beats_inference(tmp_path): + """An explicit Cortex relationship takes precedence over a guessed foreign key.""" + # orders has customer_id (inferable to customers) AND an explicit Snowflake join. + (tmp_path / "a_rels.yaml").write_text( + """ +relationships: + - name: orders_to_customers + left_table: orders + right_table: customers + relationship_columns: + - left_column: cust_ref + right_column: cust_pk + relationship_type: many_to_one +""" + ) + (tmp_path / "z_tables.yaml").write_text( + """ +name: tm +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [order_id] + dimensions: + - name: order_id + expr: order_id + data_type: number + - name: customer_id + expr: customer_id + data_type: number + - name: cust_ref + expr: cust_ref + data_type: number + - name: customers + base_table: + database: db + schema: s + table: customers + primary_key: + columns: [cust_pk] + dimensions: + - name: cust_pk + expr: cust_pk + data_type: number +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + orders = layer.graph.models["orders"] + + customer_rels = [r for r in orders.relationships if r.name == "customers"] + assert len(customer_rels) == 1 + assert customer_rels[0].foreign_key == "cust_ref" + assert customer_rels[0].metadata["snowflake"]["name"] == "orders_to_customers" + + +def test_load_from_directory_detects_metric_sidecar_with_snowflake_metric_keys(tmp_path): + """A tableless metrics sidecar carrying Snowflake-only metric keys routes to Snowflake.""" + (tmp_path / "a_metrics.yaml").write_text( + """ +metrics: + - name: global_ratio + expr: orders.revenue / orders.order_count + access_modifier: public_access + labels: [KPI] +""" + ) + (tmp_path / "z_tables.yaml").write_text( + """ +name: tm +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [order_id] + dimensions: + - name: order_id + expr: order_id + data_type: number + facts: + - name: amount + expr: amount + data_type: number + metrics: + - name: revenue + expr: SUM(amount) + - name: order_count + expr: COUNT(order_id) +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + graph = layer.graph + + assert "global_ratio" in graph.metrics + sf = graph.metrics["global_ratio"].metadata["snowflake"] + assert sf["access_modifier"] == "public_access" + assert sf["labels"] == ["KPI"] From 892f84ea5d807dae2d46a2f8f090e0ddbc97e44e Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Mon, 15 Jun 2026 07:27:09 -0700 Subject: [PATCH 22/25] Keep distinct same-target Snowflake relationships in directory loader apply_pending_relationships de-duplicated by target table alone, dropping every named relationship after the first when a split Cortex project declared two joins between the same two tables. De-dup by the preserved Snowflake relationship name (or join columns) instead so alternate joins survive and using_relationships references keep resolving. --- sidemantic/adapters/snowflake.py | 20 ++++++-- tests/core/test_directory_loaders.py | 69 ++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 7433451f..4f76dee5 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -654,6 +654,20 @@ def apply_pending_relationships(self, relationships_def: list, models: dict) -> its joins and an explicit join takes precedence over a guessed one. Operates on the name-keyed ``models`` dict; adjacency is rebuilt later by the loader. """ + + def _is_duplicate(existing: Relationship) -> bool: + # Same Snowflake relationship: matched by the preserved Snowflake name + # when both carry one, otherwise by target + join columns. Distinct + # named relationships between the same two tables are NOT duplicates and + # must all survive so metrics referencing them via ``using_relationships`` + # keep resolving and alternate joins still round-trip on export. + if existing.name != right_table: + return False + existing_name = existing.metadata.get("snowflake", {}).get("name") if existing.metadata else None + if snowflake_name and existing_name: + return existing_name == snowflake_name + return existing.foreign_key == left_column and existing.primary_key == right_column + for rel_def in relationships_def: left_table = rel_def.get("left_table") right_table = rel_def.get("right_table") @@ -678,9 +692,9 @@ def apply_pending_relationships(self, relationships_def: list, models: dict) -> model = models.get(left_table) if model is None: continue - # Skip if a relationship to the same target already exists (e.g. another - # sidecar declared it) to avoid duplicates. - if any(r.name == right_table for r in model.relationships): + # Skip only an exact duplicate (same Snowflake name, or same join + # columns to the same target); keep distinct alternate joins. + if any(_is_duplicate(r) for r in model.relationships): continue model.relationships.append( Relationship( diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index 56c771e2..0d0f3d41 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -757,6 +757,75 @@ def test_load_from_directory_detects_relationship_only_snowflake_sidecar(tmp_pat assert graph.find_relationship_path("orders", "customers") +def test_load_from_directory_keeps_same_target_snowflake_relationships(tmp_path): + """Two distinct named relationships between the same tables in a split project + must both survive: de-dup is by Snowflake name/columns, not the target table.""" + (tmp_path / "a_rels.yaml").write_text( + """ +relationships: + - name: orders_to_customers_billing + left_table: orders + right_table: customers + relationship_columns: + - left_column: billing_cust_ref + right_column: cust_pk + relationship_type: many_to_one + - name: orders_to_customers_shipping + left_table: orders + right_table: customers + relationship_columns: + - left_column: shipping_cust_ref + right_column: cust_pk + relationship_type: many_to_one +""" + ) + (tmp_path / "z_tables.yaml").write_text( + """ +name: tm +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [order_id] + dimensions: + - name: order_id + expr: order_id + data_type: number + - name: billing_cust_ref + expr: billing_cust_ref + data_type: number + - name: shipping_cust_ref + expr: shipping_cust_ref + data_type: number + - name: customers + base_table: + database: db + schema: s + table: customers + primary_key: + columns: [cust_pk] + dimensions: + - name: cust_pk + expr: cust_pk + data_type: number +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + orders = layer.graph.models["orders"] + + customer_rels = [r for r in orders.relationships if r.name == "customers"] + assert {r.metadata["snowflake"]["name"] for r in customer_rels} == { + "orders_to_customers_billing", + "orders_to_customers_shipping", + } + assert {r.foreign_key for r in customer_rels} == {"billing_cust_ref", "shipping_cust_ref"} + + def test_load_from_directory_detects_view_metric_sidecar_with_snowflake_sections(tmp_path): """A tableless Cortex sidecar with verified_queries routes to Snowflake.""" # Pure view-level metrics (no table) plus Snowflake-only top-level sections. From 976b38236969ac4ceedbc658d96a2a5f53d48125 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Mon, 15 Jun 2026 08:02:21 -0700 Subject: [PATCH 23/25] Route named tableless Cortex view-metric sidecars to Snowflake A split Cortex sidecar with a root name and only tableless view metrics (no per-metric Snowflake key, no verified_queries/instructions/relationships) was skipped by load_from_directory: native detection rejects the root name and the Snowflake predicate found no Cortex signal, so the view metric was dropped. Treat a root name alongside Cortex-shaped metrics as a Snowflake signal. --- sidemantic/loaders.py | 23 ++++++++++--- tests/core/test_directory_loaders.py | 49 ++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index 92167fef..e6357e8f 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -545,17 +545,20 @@ def _looks_like_snowflake_metrics_file(data: dict) -> bool: - Snowflake-shaped top-level ``relationships`` (relationship-only sidecar), or - top-level ``metrics`` carrying a Snowflake-only metric key (``table`` or per- metric ``access_modifier``/``labels``/``tags``/``non_additive_dimensions``/ - ``using_relationships``). + ``using_relationships``), or + - a root ``name`` alongside Cortex-shaped ``metrics`` -- a tableless view-metric + sidecar whose only Cortex signal is the root ``name`` the native format rejects. Any present metrics must be Cortex-shaped (``expr`` with no MetricFlow - ``type_params``/``measure`` markers). A tableless metrics file with none of - these signals is left to native detection. + ``type_params``/``measure`` markers). A tableless metrics file with no root + ``name`` and none of these signals is left to native detection. """ if not isinstance(data, dict) or "tables" in data: return False metrics = data.get("metrics") has_snowflake_metric_key = False + has_cortex_metrics = False if metrics is not None: if not isinstance(metrics, list) or not metrics: return False @@ -568,9 +571,21 @@ def _looks_like_snowflake_metrics_file(data: dict) -> bool: return False if any(key in metric for key in _SNOWFLAKE_METRIC_KEYS): has_snowflake_metric_key = True + has_cortex_metrics = True has_snowflake_section = any(section in data for section in _SNOWFLAKE_TOP_LEVEL_SECTIONS) - return has_snowflake_metric_key or has_snowflake_section or _looks_like_snowflake_relationships(data) + # A tableless Cortex sidecar may carry only a root ``name`` plus view-level + # metrics (no per-metric Snowflake key, no Snowflake sections). The root + # ``name`` is a Cortex semantic-model field the native format rejects, so its + # presence alongside Cortex-shaped metrics is a reliable Snowflake signal -- + # without it the file is dropped by both native and Snowflake detection. + has_snowflake_root_name = has_cortex_metrics and isinstance(data.get("name"), str) + return ( + has_snowflake_metric_key + or has_snowflake_section + or has_snowflake_root_name + or _looks_like_snowflake_relationships(data) + ) def _contains_yaml_key(value: object, key: str) -> bool: diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index 0d0f3d41..f5ed86e4 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -877,6 +877,55 @@ def test_load_from_directory_detects_view_metric_sidecar_with_snowflake_sections assert snowflake_meta.get("custom_instructions") == "Prefer revenue." +def test_load_from_directory_detects_named_view_metric_sidecar(tmp_path): + """A Cortex sidecar with a root ``name`` and only tableless view metrics (no + Snowflake-only key or section) still routes to Snowflake, not silently dropped. + + The root ``name`` is the sole Cortex signal: native detection rejects ``name`` + so the file is not native-compatible, and without this routing the view metric + is lost on the CLI load_from_directory / export-native path. + """ + (tmp_path / "a_sidecar.yaml").write_text( + """ +name: view_metrics +metrics: + - name: global_ratio + expr: orders.revenue / orders.order_count +""" + ) + (tmp_path / "z_tables.yaml").write_text( + """ +name: tm +tables: + - name: orders + base_table: + database: db + schema: s + table: orders + primary_key: + columns: [order_id] + dimensions: + - name: order_id + expr: order_id + data_type: number + facts: + - name: amount + expr: amount + data_type: number + metrics: + - name: revenue + expr: SUM(amount) + - name: order_count + expr: COUNT(order_id) +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + + assert "global_ratio" in layer.graph.metrics + + def test_load_from_directory_explicit_snowflake_relationship_beats_inference(tmp_path): """An explicit Cortex relationship takes precedence over a guessed foreign key.""" # orders has customer_id (inferable to customers) AND an explicit Snowflake join. From 3d47659c49b9c3a77461d52b3dbbb0e9ced9b219 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Mon, 15 Jun 2026 09:13:45 -0700 Subject: [PATCH 24/25] Fix Snowflake access override and SQL metadata frontmatter Snowflake export: when public is false, write access_modifier private_access unconditionally so a stale public_access value carried in metadata no longer keeps the field public. Applies to dimensions, facts, and metrics. Native SQL frontmatter: strip root-only metadata before deciding whether a .sql frontmatter is a model, so graph-level SQL files containing only metadata plus METRIC/PARAMETER definitions still load. Graph metadata is preserved on the graph; model metadata is reattached when the frontmatter is a model. --- sidemantic/adapters/sidemantic.py | 17 ++++- sidemantic/adapters/snowflake.py | 12 +++- .../sidemantic_adapter/test_parsing.py | 40 ++++++++++++ tests/adapters/snowflake/test_roundtrip.py | 64 +++++++++++++++++++ 4 files changed, 128 insertions(+), 5 deletions(-) diff --git a/sidemantic/adapters/sidemantic.py b/sidemantic/adapters/sidemantic.py index b343fddc..fdcfcbff 100644 --- a/sidemantic/adapters/sidemantic.py +++ b/sidemantic/adapters/sidemantic.py @@ -286,6 +286,10 @@ def normalize_sql_frontmatter(frontmatter: dict) -> dict: normalized.pop("connection", None) normalized.pop("models", None) normalized.pop("parameters", None) + # ``metadata`` is a root-only native field (graph-level), so it must not by + # itself make the frontmatter look like a model definition. Graph metadata is + # extracted separately by the caller before this decision. + normalized.pop("metadata", None) return normalized @@ -356,9 +360,13 @@ def parse(self, source: str | Path) -> SemanticGraph: raise ValueError(f"{source_path}: invalid SQL definitions: {exc}") from exc # Parse frontmatter as a model only when it still contains model fields - # after native contract metadata such as `version` is removed. + # after native contract metadata such as `version`/`metadata` is removed. normalized_frontmatter = normalize_sql_frontmatter(frontmatter) if frontmatter else {} if normalized_frontmatter: + # ``metadata`` is a valid model field, so re-attach it when the + # frontmatter is a model so the model keeps its own metadata. + if frontmatter.get("metadata") is not None: + normalized_frontmatter["metadata"] = frontmatter["metadata"] model = self._parse_model(normalized_frontmatter, source_path=source_path) if model: # Add SQL-defined metrics/segments to the model @@ -368,7 +376,12 @@ def parse(self, source: str | Path) -> SemanticGraph: model.pre_aggregations.extend(sql_preaggs) graph.add_model(model) else: - # No frontmatter - treat as graph-level metrics/segments + # No model frontmatter - treat as graph-level metrics/segments. + # Root-only ``metadata`` (e.g. Snowflake Cortex top-level + # sections) is preserved on the graph here. + graph_metadata = frontmatter.get("metadata") if frontmatter else None + if isinstance(graph_metadata, dict): + graph.metadata.update(graph_metadata) for metric in sql_metrics: graph.add_metric(metric) for param in sql_parameters: diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index 4f76dee5..f339c1a5 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -1006,7 +1006,9 @@ def _export_dimension_extras(dim: Dimension, dim_def: dict) -> None: for key, value in snowflake_meta.items(): dim_def.setdefault(key, value) if not dim.public: - dim_def.setdefault("access_modifier", "private_access") + # Override any stale ``public_access`` carried over in metadata so the + # Sidemantic visibility flag wins. + dim_def["access_modifier"] = "private_access" def _export_fact(self, metric: Metric) -> dict: """Export metric as Snowflake fact. @@ -1045,7 +1047,9 @@ def _export_fact(self, metric: Metric) -> dict: for key, value in snowflake_meta.items(): fact.setdefault(key, value) if not metric.public: - fact.setdefault("access_modifier", "private_access") + # Override any stale ``public_access`` carried over in metadata so the + # Sidemantic visibility flag wins. + fact["access_modifier"] = "private_access" return fact @@ -1111,7 +1115,9 @@ def _export_metric(self, metric: Metric, *, top_level: bool = False) -> dict: for key, value in snowflake_meta.items(): metric_def.setdefault(key, value) if not metric.public: - metric_def.setdefault("access_modifier", "private_access") + # Override any stale ``public_access`` carried over in metadata so the + # Sidemantic visibility flag wins. + metric_def["access_modifier"] = "private_access" return metric_def diff --git a/tests/adapters/sidemantic_adapter/test_parsing.py b/tests/adapters/sidemantic_adapter/test_parsing.py index 7e6184ed..fe9442af 100644 --- a/tests/adapters/sidemantic_adapter/test_parsing.py +++ b/tests/adapters/sidemantic_adapter/test_parsing.py @@ -920,6 +920,46 @@ def test_parse_native_sql_version_only_frontmatter_preserves_graph_parameter(tmp assert graph.parameters["status_filter"].type == "string" +def test_parse_native_sql_metadata_only_frontmatter_preserves_graph_definitions(tmp_path): + """Root-only metadata frontmatter must not swallow graph-level SQL metrics/params.""" + adapter = SidemanticAdapter() + sql_path = tmp_path / "metrics.sql" + sql_path.write_text( + """ +--- +version: 1 +metadata: + description: Top-level Cortex sections + owner: analytics +--- + +METRIC ( + name order_count, + agg count +); + +PARAMETER ( + name status_filter, + type string, + default_value 'paid' +); +""" + ) + + graph = adapter.parse(sql_path) + + # No model is created from root-only metadata frontmatter. + assert len(graph.models) == 0 + # Graph-level metric and parameter still load. + assert "order_count" in graph.metrics + assert graph.metrics["order_count"].agg == "count" + assert "status_filter" in graph.parameters + assert graph.parameters["status_filter"].type == "string" + # Root metadata is preserved on the graph. + assert graph.metadata.get("description") == "Top-level Cortex sections" + assert graph.metadata.get("owner") == "analytics" + + def test_parse_native_sql_frontmatter_rejects_unsupported_version(tmp_path): """Test unsupported native SQL frontmatter versions fail early.""" adapter = SidemanticAdapter() diff --git a/tests/adapters/snowflake/test_roundtrip.py b/tests/adapters/snowflake/test_roundtrip.py index 7001a401..ce686236 100644 --- a/tests/adapters/snowflake/test_roundtrip.py +++ b/tests/adapters/snowflake/test_roundtrip.py @@ -645,6 +645,70 @@ def test_roundtrip_private_access_modifier_maps_to_public_false(self, adapter, t graph2 = adapter.parse(output_file) assert graph2.models["orders"].get_dimension("ssn").public is False + def test_public_false_overrides_stale_public_access_metadata(self, adapter, tmp_path): + """public=False must win over a public_access modifier carried in metadata. + + A field imported with access_modifier: public_access keeps that value in + metadata. If a user later sets public=False (native YAML/API) and exports + back to Snowflake, the field must become private_access, not stay public. + """ + source = tmp_path / "pub.yaml" + source.write_text( + """ +name: pub_test +tables: + - name: orders + base_table: {database: db, schema: s, table: orders} + primary_key: {columns: [id]} + dimensions: + - name: ssn + expr: ssn + data_type: text + access_modifier: public_access + facts: + - name: amount + expr: amount + data_type: number + access_modifier: public_access + metrics: + - name: total + expr: SUM(amount) + access_modifier: public_access + non_additive_dimensions: + - {table: orders, dimension: snapshot_date} +""" + ) + + graph = adapter.parse(source) + orders = graph.models["orders"] + # Imported public_access is preserved in metadata. + assert orders.get_dimension("ssn").metadata["snowflake"]["access_modifier"] == "public_access" + assert orders.get_metric("amount").metadata["snowflake"]["access_modifier"] == "public_access" + assert orders.get_metric("total").metadata["snowflake"]["access_modifier"] == "public_access" + + # User flips visibility to private via the native API. + orders.get_dimension("ssn").public = False + orders.get_metric("amount").public = False + orders.get_metric("total").public = False + + output_file = tmp_path / "out.yaml" + adapter.export(graph, output_file) + data = yaml.safe_load(output_file.read_text()) + table = data["tables"][0] + exported_dim = next(d for d in table["dimensions"] if d["name"] == "ssn") + exported_fact = next(f for f in table["facts"] if f["name"] == "amount") + exported_metric = next(m for m in table["metrics"] if m["name"] == "total") + assert exported_dim["access_modifier"] == "private_access" + assert exported_fact["access_modifier"] == "private_access" + assert exported_metric["access_modifier"] == "private_access" + + # Re-parsing keeps them non-public. + graph2 = adapter.parse(output_file) + orders2 = graph2.models["orders"] + assert orders2.get_dimension("ssn").public is False + assert orders2.get_metric("amount").public is False + assert orders2.get_metric("total").public is False + def test_export_strips_model_placeholder_from_table_scoped_metric(self, adapter, tmp_path): """Table-scoped derived metrics must not leak {model} placeholders into Snowflake.""" source = tmp_path / "ph.yaml" From 924caf08b95342cedbfba32d36684dffa3ca03b8 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Tue, 16 Jun 2026 20:40:26 -0700 Subject: [PATCH 25/25] Export Snowflake Cortex search service as single nested shape Snowflake documents the nested cortex_search_service object as replacing the deprecated flat cortex_search_service_name. The dimension export path emitted both: the flat key from the first-class name plus the nested object carried in metadata, so round-tripped YAML duplicated the service and a user edit to the native name left a stale conflicting nested value. Emit only the nested cortex_search_service, syncing its service to the first-class cortex_search_service_name so native edits win, and drop the deprecated flat key. --- sidemantic/adapters/snowflake.py | 17 ++++++- .../snowflake/test_cortex_features.py | 44 +++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/sidemantic/adapters/snowflake.py b/sidemantic/adapters/snowflake.py index f339c1a5..b9959a5b 100644 --- a/sidemantic/adapters/snowflake.py +++ b/sidemantic/adapters/snowflake.py @@ -1000,9 +1000,22 @@ def _export_dimension_extras(dim: Dimension, dim_def: dict) -> None: dim_def["synonyms"] = dim.synonyms if dim.sample_values: dim_def["sample_values"] = dim.sample_values + snowflake_meta = dict((dim.metadata or {}).get("snowflake", {})) + # Snowflake documents the nested ``cortex_search_service`` object as + # replacing the deprecated flat ``cortex_search_service_name`` string, so + # emit only one shape. Prefer the nested object (it can carry extra keys + # like ``literal_column``/``database``/``schema``) and keep its ``service`` + # in sync with the first-class ``cortex_search_service_name`` so a user + # edit to the native name wins instead of leaving a stale nested value. + nested = snowflake_meta.pop("cortex_search_service", None) if dim.cortex_search_service_name: - dim_def["cortex_search_service_name"] = dim.cortex_search_service_name - snowflake_meta = (dim.metadata or {}).get("snowflake", {}) + if isinstance(nested, dict): + nested = {**nested, "service": dim.cortex_search_service_name} + else: + nested = {"service": dim.cortex_search_service_name} + dim_def["cortex_search_service"] = nested + elif nested is not None: + dim_def["cortex_search_service"] = nested for key, value in snowflake_meta.items(): dim_def.setdefault(key, value) if not dim.public: diff --git a/tests/adapters/snowflake/test_cortex_features.py b/tests/adapters/snowflake/test_cortex_features.py index 61735ccd..e96c9456 100644 --- a/tests/adapters/snowflake/test_cortex_features.py +++ b/tests/adapters/snowflake/test_cortex_features.py @@ -154,6 +154,50 @@ def test_roundtrip_preserves_cortex_features(self, adapter, graph, tmp_path): assert len(graph2.verified_queries) == 1 + def _exported_customer_name(self, adapter, graph, tmp_path): + output = tmp_path / "out.yaml" + adapter.export(graph, output) + data = yaml.safe_load(output.read_text()) + for table in data["tables"]: + for dim in table.get("dimensions", []): + if dim["name"] == "customer_name": + return dim + raise AssertionError("customer_name dimension missing from export") + + def test_cortex_search_service_exports_only_nested_shape(self, adapter, graph, tmp_path): + """A dimension imported from the nested ``cortex_search_service`` must export + only that nested shape, never the deprecated flat ``cortex_search_service_name``. + + Snowflake documents ``cortex_search_service`` as replacing the deprecated + ``cortex_search_service_name``, so round-tripped YAML must not carry both + keys (a stale flat/nested pair can conflict once a user edits one). + """ + dim = self._exported_customer_name(adapter, graph, tmp_path) + + # Only the nested object is emitted; the flat deprecated key is gone. + assert "cortex_search_service_name" not in dim + nested = dim["cortex_search_service"] + assert nested["service"] == "customer_name_search" + # Extra nested keys from the source survive. + assert nested["literal_column"] == "customer_name" + assert nested["database"] == "analytics" + assert nested["schema"] == "sales" + + def test_edited_native_search_service_name_wins_over_stale_nested(self, adapter, graph, tmp_path): + """Editing the first-class ``cortex_search_service_name`` updates the nested + ``service`` on export instead of leaving a stale conflicting value.""" + cust = graph.models["orders"].get_dimension("customer_name") + cust.cortex_search_service_name = "renamed_search" + + dim = self._exported_customer_name(adapter, graph, tmp_path) + + assert "cortex_search_service_name" not in dim + nested = dim["cortex_search_service"] + # The edited native name wins; the nested ``service`` is not stale. + assert nested["service"] == "renamed_search" + # Other nested keys are still preserved. + assert nested["database"] == "analytics" + def test_top_level_sections_survive_native_roundtrip(self, adapter, graph, tmp_path): """Snowflake -> native (export-native) -> Snowflake preserves top-level sections.""" from sidemantic.adapters.sidemantic import SidemanticAdapter