Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e985426
feat(snowflake): import Cortex Analyst measures and enrichment keys
nicosuave Jun 13, 2026
9124e4b
Auto-update JSON schema
github-actions[bot] Jun 13, 2026
b7f0127
Fix Snowflake top-level metric qualification and round-trip export
nicosuave Jun 14, 2026
6a14c81
Skip unrepresentable Snowflake metrics and wire Cortex fields into na…
nicosuave Jun 14, 2026
b19d987
Coerce Snowflake sample_values to str and export graph-level metric s…
nicosuave Jun 14, 2026
f718051
Preserve graph-level metadata across native export round-trips
nicosuave Jun 14, 2026
6640a93
Preserve Snowflake relationship names and metric using_relationships
nicosuave Jun 14, 2026
2284417
Match owned Snowflake metrics by identity so same-named top-level met…
nicosuave Jun 14, 2026
55c8517
Accept Snowflake enrichment fields in the Rust native schema
nicosuave Jun 14, 2026
642cde3
Merge remote-tracking branch 'origin/main' into wf-snowflake-200
nicosuave Jun 14, 2026
a322a39
Merge Snowflake graph metadata during directory loading
nicosuave Jun 15, 2026
b68e9e2
Round-trip Snowflake non-additive metrics, private access, multi-file…
nicosuave Jun 15, 2026
71d5216
Keep Snowflake metric expressions resolvable across contexts
nicosuave Jun 15, 2026
7cf5af0
Defer Snowflake top-level metrics until all tables are parsed
nicosuave Jun 15, 2026
da115d3
Resolve Snowflake cross-file top-level metrics in the CLI loader
nicosuave Jun 15, 2026
87ea415
Detect metric-only Snowflake Cortex files in the directory loader
nicosuave Jun 15, 2026
c684a99
Allow tableless metrics in split Snowflake Cortex metrics files
nicosuave Jun 15, 2026
d20f85d
Route tableless Snowflake view-metric sidecars with top-level sections
nicosuave Jun 15, 2026
5ef649c
Route instruction-only Snowflake Cortex sidecars
nicosuave Jun 15, 2026
fca4603
Avoid pending-metric name collisions and merge non-OSI root metadata …
nicosuave Jun 15, 2026
0ffb3a8
Route and defer relationship-only Snowflake Cortex sidecars
nicosuave Jun 15, 2026
ef6e760
Prefer explicit Snowflake joins and route metric-key sidecars
nicosuave Jun 15, 2026
4db2970
Merge origin/main into update-snowflake-adapter
nicosuave Jun 15, 2026
7cb7c30
Merge origin/main into update-snowflake-adapter
nicosuave Jun 15, 2026
892f84e
Keep distinct same-target Snowflake relationships in directory loader
nicosuave Jun 15, 2026
976b382
Route named tableless Cortex view-metric sidecars to Snowflake
nicosuave Jun 15, 2026
3d47659
Fix Snowflake access override and SQL metadata frontmatter
nicosuave Jun 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 161 additions & 5 deletions sidemantic/adapters/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,19 @@ class SnowflakeAdapter(BaseAdapter):
- tables -> Models
- dimensions -> Dimensions (categorical)
- time_dimensions -> Dimensions (time)
- facts -> Metrics (with default_aggregation)
- facts (a.k.a. legacy `measures`) -> Metrics (with default_aggregation)
- metrics -> Metrics (derived, table-scoped aggregations)
- relationships -> Relationships
- filters -> Segments

Reference: https://docs.snowflake.com/en/user-guide/snowflake-cortex/cortex-analyst/semantic-model-spec
Also imports newer Cortex Analyst spec features:
- `synonyms` on dimensions/facts/measures/metrics
- `sample_values` and `cortex_search_service` / `cortex_search_service_name` on dimensions
- top-level `verified_queries`, `custom_instructions`, `module_custom_instructions`
- per-field keys preserved in metadata: access_modifier, is_enum, unique, labels,
tags, non_additive_dimensions, using_relationships

Reference: https://docs.snowflake.com/en/user-guide/views-semantic/semantic-view-yaml-spec
"""

def parse(self, source: str | Path) -> SemanticGraph:
Expand Down Expand Up @@ -169,6 +176,20 @@ def _parse_file(self, file_path: Path, graph: SemanticGraph) -> None:
relationships_def = data.get("relationships") or []
self._apply_relationships(relationships_def, graph)

# Parse top-level metrics (semantic-model-scoped metrics referencing tables)
for metric_def in data.get("metrics") or []:
metric = self._parse_metric(metric_def)
Comment thread
nicosuave marked this conversation as resolved.
Outdated
if metric is None:
continue
table_name = metric_def.get("table")
if table_name and table_name in graph.models:
graph.models[table_name].metrics.append(metric)
else:
graph.metrics[metric.name] = metric
Comment thread
nicosuave marked this conversation as resolved.
Outdated

# Parse top-level Cortex Analyst sections onto the graph.
self._apply_top_level_sections(data, graph)

def _parse_table(self, table_def: dict) -> Model | None:
"""Parse Snowflake table definition into Model.

Expand Down Expand Up @@ -212,9 +233,11 @@ def _parse_table(self, table_def: dict) -> Model | None:
if dim:
dimensions.append(dim)

# Parse facts (row-level measures with default aggregation)
# Parse facts (row-level measures with default aggregation).
# Cortex Analyst's table-level `measures:` key is a legacy alias of `facts:`;
# accept both so current Cortex Analyst files import without silent data loss.
metrics = []
for fact_def in table_def.get("facts") or []:
for fact_def in (table_def.get("facts") or []) + (table_def.get("measures") or []):
metric = self._parse_fact(fact_def)
if metric:
metrics.append(metric)
Expand Down Expand Up @@ -270,6 +293,10 @@ def _parse_dimension(self, dim_def: dict) -> Dimension | None:
type=dim_type,
sql=dim_def.get("expr"),
description=dim_def.get("description"),
synonyms=dim_def.get("synonyms"),
sample_values=dim_def.get("sample_values"),
Comment thread
nicosuave marked this conversation as resolved.
Outdated
cortex_search_service_name=self._cortex_search_service_name(dim_def),
metadata=self._dimension_metadata(dim_def),
Comment thread
nicosuave marked this conversation as resolved.
)

def _parse_time_dimension(self, dim_def: dict) -> Dimension | None:
Expand All @@ -291,6 +318,10 @@ def _parse_time_dimension(self, dim_def: dict) -> Dimension | None:
sql=dim_def.get("expr"),
description=dim_def.get("description"),
granularity="day", # Default granularity
synonyms=dim_def.get("synonyms"),
sample_values=dim_def.get("sample_values"),
cortex_search_service_name=self._cortex_search_service_name(dim_def),
metadata=self._dimension_metadata(dim_def),
)

def _parse_fact(self, fact_def: dict) -> Metric | None:
Expand All @@ -309,7 +340,7 @@ def _parse_fact(self, fact_def: dict) -> Metric | None:
return None

# Map Snowflake default_aggregation to Sidemantic agg
default_agg = fact_def.get("default_aggregation", "sum").lower()
default_agg = (fact_def.get("default_aggregation") or "sum").lower()
agg_mapping = {
"sum": "sum",
"avg": "avg",
Expand All @@ -327,6 +358,8 @@ def _parse_fact(self, fact_def: dict) -> Metric | None:
agg=agg,
sql=fact_def.get("expr"),
description=fact_def.get("description"),
synonyms=fact_def.get("synonyms"),
metadata=self._measure_metadata(fact_def),
)

def _parse_metric(self, metric_def: dict) -> Metric | None:
Expand Down Expand Up @@ -375,6 +408,8 @@ def _parse_metric(self, metric_def: dict) -> Metric | None:
agg=agg_func,
sql=inner_expr,
description=metric_def.get("description"),
synonyms=metric_def.get("synonyms"),
metadata=self._metric_metadata(metric_def),
)

# Complex expression (multiple aggregations or couldn't parse simple one)
Expand All @@ -385,8 +420,89 @@ def _parse_metric(self, metric_def: dict) -> Metric | None:
type="derived",
sql=qualified_expr,
description=metric_def.get("description"),
synonyms=metric_def.get("synonyms"),
metadata=self._metric_metadata(metric_def),
)

@staticmethod
def _cortex_search_service_name(dim_def: dict) -> str | None:
"""Resolve the linked Cortex Search service name for a dimension.

Supports both the legacy flat ``cortex_search_service_name`` string and
the newer nested ``cortex_search_service`` object (``{service, ...}``).
"""
flat = dim_def.get("cortex_search_service_name")
if flat:
return flat
nested = dim_def.get("cortex_search_service")
if isinstance(nested, dict):
return nested.get("service")
if isinstance(nested, str):
return nested
return None

@staticmethod
def _collect_metadata(definition: dict, keys: tuple[str, ...]) -> dict | None:
"""Preserve newer Cortex Analyst per-field keys under a snowflake namespace."""
extra = {key: definition[key] for key in keys if definition.get(key) is not None}
if not extra:
return None
return {"snowflake": extra}

def _dimension_metadata(self, dim_def: dict) -> dict | None:
return self._collect_metadata(
dim_def,
("unique", "is_enum", "access_modifier", "labels", "tags", "cortex_search_service"),
)

def _measure_metadata(self, measure_def: dict) -> dict | None:
return self._collect_metadata(
measure_def,
("access_modifier", "is_enum", "labels", "tags", "non_additive_dimensions"),
)

def _metric_metadata(self, metric_def: dict) -> dict | None:
return self._collect_metadata(
metric_def,
("access_modifier", "labels", "tags", "non_additive_dimensions", "using_relationships"),
Comment thread
nicosuave marked this conversation as resolved.
)

@staticmethod
def _apply_top_level_sections(data: dict, graph: SemanticGraph) -> None:
"""Attach top-level Cortex Analyst sections to the graph.

Cortex Analyst defines several semantic-model-level sections that have no
direct Sidemantic equivalent. We expose them both as direct attributes on
the graph (for ergonomic access) and inside ``graph.metadata`` so they
survive serialization.
"""
verified_queries = data.get("verified_queries") or []
custom_instructions = data.get("custom_instructions")
module_custom_instructions = data.get("module_custom_instructions")

# Accumulate verified queries across files in a directory parse.
existing = list(getattr(graph, "verified_queries", []) or [])
existing.extend(verified_queries)
graph.verified_queries = existing

if custom_instructions is not None:
graph.custom_instructions = custom_instructions
elif not hasattr(graph, "custom_instructions"):
graph.custom_instructions = None

if module_custom_instructions is not None:
graph.module_custom_instructions = module_custom_instructions
elif not hasattr(graph, "module_custom_instructions"):
graph.module_custom_instructions = None

snowflake_meta = graph.metadata.setdefault("snowflake", {})
if existing:
snowflake_meta["verified_queries"] = existing
Comment thread
nicosuave marked this conversation as resolved.
if graph.custom_instructions is not None:
snowflake_meta["custom_instructions"] = graph.custom_instructions
Comment thread
nicosuave marked this conversation as resolved.
if graph.module_custom_instructions is not None:
snowflake_meta["module_custom_instructions"] = graph.module_custom_instructions

def _parse_filter(self, filter_def: dict) -> Segment | None:
"""Parse Snowflake filter into Sidemantic segment.

Expand Down Expand Up @@ -493,6 +609,17 @@ def export(self, graph: SemanticGraph, output_path: str | Path) -> None:
if not semantic_model["relationships"]:
del semantic_model["relationships"]

# Export top-level Cortex Analyst sections if present on the graph.
verified_queries = getattr(graph, "verified_queries", None)
if verified_queries:
semantic_model["verified_queries"] = verified_queries
custom_instructions = getattr(graph, "custom_instructions", None)
if custom_instructions:
semantic_model["custom_instructions"] = custom_instructions
module_custom_instructions = getattr(graph, "module_custom_instructions", None)
if module_custom_instructions:
semantic_model["module_custom_instructions"] = module_custom_instructions

output_path.parent.mkdir(parents=True, exist_ok=True)

with open(output_path, "w") as f:
Expand Down Expand Up @@ -603,6 +730,8 @@ def _export_dimension(self, dim: Dimension) -> dict:
}
dim_def["data_type"] = type_mapping.get(dim.type, "TEXT")

self._export_dimension_extras(dim, dim_def)

return dim_def

def _export_time_dimension(self, dim: Dimension) -> dict:
Expand All @@ -624,8 +753,23 @@ def _export_time_dimension(self, dim: Dimension) -> dict:

dim_def["data_type"] = "TIMESTAMP"

self._export_dimension_extras(dim, dim_def)

return dim_def

@staticmethod
def _export_dimension_extras(dim: Dimension, dim_def: dict) -> None:
"""Attach Cortex Analyst enrichment keys to an exported dimension."""
if dim.synonyms:
dim_def["synonyms"] = dim.synonyms
if dim.sample_values:
dim_def["sample_values"] = dim.sample_values
if dim.cortex_search_service_name:
dim_def["cortex_search_service_name"] = dim.cortex_search_service_name
snowflake_meta = (dim.metadata or {}).get("snowflake", {})
for key, value in snowflake_meta.items():
dim_def.setdefault(key, value)
Comment thread
nicosuave marked this conversation as resolved.

def _export_fact(self, metric: Metric) -> dict:
"""Export metric as Snowflake fact.

Expand Down Expand Up @@ -657,6 +801,12 @@ def _export_fact(self, metric: Metric) -> dict:

fact["data_type"] = "NUMBER"

if metric.synonyms:
fact["synonyms"] = metric.synonyms
snowflake_meta = (metric.metadata or {}).get("snowflake", {})
for key, value in snowflake_meta.items():
fact.setdefault(key, value)
Comment thread
nicosuave marked this conversation as resolved.

return fact

def _export_metric(self, metric: Metric) -> dict:
Expand Down Expand Up @@ -691,6 +841,12 @@ def _export_metric(self, metric: Metric) -> dict:
elif metric.sql:
metric_def["expr"] = metric.sql

if metric.synonyms:
metric_def["synonyms"] = metric.synonyms
snowflake_meta = (metric.metadata or {}).get("snowflake", {})
for key, value in snowflake_meta.items():
metric_def.setdefault(key, value)

return metric_def

def _export_filter(self, segment: Segment) -> dict:
Expand Down
7 changes: 7 additions & 0 deletions sidemantic/core/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ class Dimension(BaseModel):
label: str | None = Field(None, description="Display label")
metadata: dict[str, Any] | None = Field(None, description="Adapter-specific metadata payload")

# Synonyms / sample values (e.g. Snowflake Cortex Analyst, Cube)
synonyms: list[str] | None = Field(None, description="Alternative names for this dimension")
sample_values: list[str] | None = Field(None, description="Representative sample values for this dimension")
cortex_search_service_name: str | None = Field(
None, description="Linked Cortex Search service name (Snowflake Cortex Analyst)"
)
Comment thread
nicosuave marked this conversation as resolved.

# Display formatting
format: str | None = Field(None, description="Display format string (e.g., '$#,##0.00', '0.00%')")
value_format_name: str | None = Field(None, description="Named format (e.g., 'usd', 'percent', 'decimal_2')")
Expand Down
3 changes: 3 additions & 0 deletions sidemantic/core/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,9 @@ def validate_type_specific_fields(self):
label: str | None = Field(None, description="Display label")
metadata: dict[str, Any] | None = Field(None, description="Adapter-specific metadata payload")

# Synonyms (e.g. Snowflake Cortex Analyst measures/metrics, Cube)
synonyms: list[str] | None = Field(None, description="Alternative names for this measure/metric")

# Display formatting
format: str | None = Field(None, description="Display format string (e.g., '$#,##0.00', '0.00%')")
value_format_name: str | None = Field(None, description="Named format (e.g., 'usd', 'percent', 'decimal_2')")
Expand Down
Loading
Loading