Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 209 additions & 8 deletions sidemantic/adapters/hex.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,42 @@ def parse(self, source: str | Path) -> SemanticGraph:
def _parse_file(self, file_path: Path, graph: SemanticGraph) -> None:
"""Parse a single Hex YAML file.

A file may contain multiple resources separated by ``---`` (multi-document
YAML). Each document carries a top-level ``type:`` discriminator
(``model`` or ``view``). Legacy single-document files without a ``type``
are treated as models.

Args:
file_path: Path to YAML file
graph: Semantic graph to add models to
"""
with open(file_path) as f:
data = yaml.safe_load(f)
documents = yaml.safe_load_all(f)
Comment thread
nicosuave marked this conversation as resolved.

for data in documents:
if not data or not isinstance(data, dict):
continue

model = self._parse_resource(data)
if model:
graph.add_model(model)

def _parse_resource(self, resource_def: dict) -> Model | None:
"""Dispatch a Hex resource to the correct parser based on ``type``.

if not data:
return
Args:
resource_def: Resource definition dictionary

Returns:
Model instance or None
"""
# ``type`` is the resource discriminator on current Hex YAML. Legacy
# files omit it and are always models.
resource_type = resource_def.get("type", "model")

# Each file is a single model
model = self._parse_model(data)
if model:
graph.add_model(model)
if resource_type == "view":
return self._parse_view(resource_def)
return self._parse_model(resource_def)

def _parse_model(self, model_def: dict) -> Model | None:
"""Parse a Hex model definition into a Model.
Expand Down Expand Up @@ -108,6 +130,16 @@ def _parse_model(self, model_def: dict) -> Model | None:
if relation:
relationships.append(relation)

# Visibility: public/internal/private. Only "public" stays visible.
visibility = model_def.get("visibility")
meta = {}
if visibility is not None:
meta["visibility"] = visibility

# Display label (Model has no `label`, so it rides on `metadata`).
name = model_def.get("name")
metadata = {"label": name} if name else None

return Model(
name=model_id,
table=table,
Expand All @@ -117,6 +149,48 @@ def _parse_model(self, model_def: dict) -> Model | None:
relationships=relationships,
dimensions=dimensions,
metrics=measures,
metadata=metadata,
meta=meta or None,
)

def _parse_view(self, view_def: dict) -> Model | None:
"""Parse a Hex ``view`` resource into a Model.

Views (``type: view``) are fit-for-purpose entrypoints layered on top of
a base model. Sidemantic has no native view concept, so the view's
structure (``base`` model reference and ``contents`` groups) is preserved
on the model's ``meta`` payload for faithful round-tripping.

Args:
view_def: View definition dictionary

Returns:
Model instance or None if parsing fails
"""
view_id = view_def.get("id")
if not view_id:
return None

meta = {"hex_resource_type": "view"}

base = view_def.get("base")
if base is not None:
meta["base"] = base

contents = view_def.get("contents")
if contents is not None:
meta["contents"] = contents

name = view_def.get("name")
visibility = view_def.get("visibility")
if visibility is not None:
meta["visibility"] = visibility

return Model(
name=view_id,
description=view_def.get("description"),
metadata={"label": name} if name else None,
meta=meta,
)

def _parse_dimension(self, dim_def: dict) -> Dimension | None:
Expand Down Expand Up @@ -168,12 +242,20 @@ def _parse_dimension(self, dim_def: dict) -> Dimension | None:
elif "timestamp" in dim_type:
granularity = "hour" # Default to hour for timestamps

# Visibility: public/internal/private. Only "public" stays visible.
visibility = dim_def.get("visibility")
meta = {"visibility": visibility} if visibility is not None else None
public = visibility is None or visibility == "public"

return Dimension(
name=dim_id,
type=sidemantic_type,
sql=expr,
granularity=granularity,
description=dim_def.get("description"),
label=dim_def.get("name"),
public=public,
meta=meta,
)

def _parse_measure(self, measure_def: dict) -> Metric | None:
Expand Down Expand Up @@ -240,15 +322,63 @@ def _parse_measure(self, measure_def: dict) -> Metric | None:
# Reference to existing dimension
filters.append(filter_def)

# Semi-additive measures: non-additive across the given dimension(s).
non_additive_dimension = self._parse_semi_additive(measure_def.get("semi_additive"))

# Visibility: public/internal/private. Only "public" stays visible.
visibility = measure_def.get("visibility")
meta = {"visibility": visibility} if visibility is not None else None
public = visibility is None or visibility == "public"

return Metric(
name=measure_id,
type=metric_type,
agg=agg_type,
sql=expr,
filters=filters if filters else None,
description=measure_def.get("description"),
label=measure_def.get("name"),
non_additive_dimension=non_additive_dimension,
public=public,
meta=meta,
)

@staticmethod
def _parse_semi_additive(semi_additive) -> str | None:
"""Extract the non-additive dimension from a Hex ``semi_additive`` config.

Current Hex YAML uses an object form::

semi_additive:
over:
- dimension: <dimension_id>
pick: min | max
groupings:
- <dimension_id>

Legacy/shorthand string forms (e.g. ``semi_additive: last``) are also
accepted and ignored for the dimension extraction (there is no associated
dimension to record). Returns the first ``over`` dimension id, which maps
to Sidemantic's single ``non_additive_dimension``.

Args:
semi_additive: Raw value of the ``semi_additive`` field

Returns:
Dimension id the measure is non-additive across, or None
"""
if not semi_additive:
return None

if isinstance(semi_additive, dict):
over = semi_additive.get("over") or []
for entry in over:
if isinstance(entry, dict) and entry.get("dimension"):
return entry["dimension"]
Comment thread
nicosuave marked this conversation as resolved.
Outdated
Comment thread
nicosuave marked this conversation as resolved.
Outdated
if isinstance(entry, str):
return entry
return None

def _parse_relation(self, relation_def: dict) -> Relationship | None:
"""Parse Hex relation into Sidemantic relationship.

Expand Down Expand Up @@ -334,7 +464,18 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict:
Returns:
Model definition dictionary
"""
model_def = {"id": model.name}
meta = model.meta or {}

# Round-trip Hex views back to ``type: view`` resources.
if meta.get("hex_resource_type") == "view":
return self._export_view(model)

# ``type`` is the resource discriminator required on current Hex YAML.
model_def = {"id": model.name, "type": "model"}

label = (model.metadata or {}).get("label")
if label:
model_def["name"] = label

if model.sql:
model_def["base_sql_query"] = model.sql
Expand All @@ -344,11 +485,17 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict:
if model.description:
model_def["description"] = model.description

if meta.get("visibility"):
model_def["visibility"] = meta["visibility"]

# Export dimensions
dimensions = []
for dim in model.dimensions:
dim_def = {"id": dim.name}

if dim.label:
dim_def["name"] = dim.label

# Map Sidemantic types to Hex types
type_mapping = {
"categorical": "string",
Expand Down Expand Up @@ -377,6 +524,13 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict:
if dim.description:
dim_def["description"] = dim.description

# Visibility: prefer recorded value, otherwise derive from public flag.
dim_visibility = (dim.meta or {}).get("visibility")
if dim_visibility:
dim_def["visibility"] = dim_visibility
elif not dim.public:
dim_def["visibility"] = "internal"

# Mark unique dimensions
if dim.name == model.primary_key:
dim_def["unique"] = True
Expand All @@ -391,6 +545,9 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict:
for metric in model.metrics:
measure_def = {"id": metric.name}

if metric.label:
measure_def["name"] = metric.label

# Handle different metric types
if metric.type == "derived":
# Custom SQL aggregation
Expand Down Expand Up @@ -428,6 +585,17 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict:
if metric.description:
measure_def["description"] = metric.description

# Semi-additive: export as the current Hex object form.
if metric.non_additive_dimension:
measure_def["semi_additive"] = {"over": [{"dimension": metric.non_additive_dimension}]}

# Visibility: prefer recorded value, otherwise derive from public flag.
measure_visibility = (metric.meta or {}).get("visibility")
if measure_visibility:
measure_def["visibility"] = measure_visibility
elif not metric.public:
measure_def["visibility"] = "internal"

measures.append(measure_def)

if measures:
Expand All @@ -454,3 +622,36 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict:
model_def["relations"] = relations

return model_def

def _export_view(self, model: Model) -> dict:
"""Export a model that was imported from a Hex ``view`` resource.

Reconstructs the ``type: view`` resource from the metadata captured during
import (``base`` and ``contents``).

Args:
model: Model carrying ``hex_resource_type == "view"`` metadata

Returns:
View definition dictionary
"""
meta = model.meta or {}
view_def = {"id": model.name, "type": "view"}
Comment thread
nicosuave marked this conversation as resolved.

label = (model.metadata or {}).get("label")
if label:
view_def["name"] = label

if model.description:
view_def["description"] = model.description

if meta.get("visibility"):
view_def["visibility"] = meta["visibility"]

if meta.get("base") is not None:
view_def["base"] = meta["base"]

if meta.get("contents") is not None:
view_def["contents"] = meta["contents"]

return view_def
25 changes: 15 additions & 10 deletions tests/adapters/hex/test_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,11 @@ def test_stddev_aggregation(self, model):
assert stddev is not None
assert stddev.agg is not None

@pytest.mark.xfail(reason="semi_additive not yet supported in adapter")
def test_semi_additive_measure(self, model):
"""Semi-additive measures preserve semi_additive setting."""
current_mrr = model.get_metric("current_mrr")
# Semi-additive measures should have non_additive_dimension or similar
assert current_mrr.non_additive_dimension is not None
# Semi-additive measures map to non_additive_dimension (the 'over' dimension)
assert current_mrr.non_additive_dimension == "subscription_quarter"

def test_count_if_aggregation(self, model):
"""count_if func is parsed as conditional count."""
Expand Down Expand Up @@ -310,26 +309,25 @@ def test_measure_with_dimension_filter(self, model):
assert conv.filters is not None
assert len(conv.filters) > 0

@pytest.mark.xfail(reason="semi_additive not yet supported in adapter")
def test_semi_additive_daily_active_users(self, model):
"""Semi-additive DAU measure preserves non-additivity."""
dau = model.get_metric("daily_active_users")
assert dau.non_additive_dimension is not None
assert dau.non_additive_dimension == "event_date"

@pytest.mark.xfail(reason="visibility not yet mapped to model metadata")
def test_dimension_visibility(self, model):
"""Visibility settings are preserved on dimensions."""
# Visibility would be stored in dimension meta
# Visibility is stored in dimension meta and reflected in public flag
session_dim = model.get_dimension("session_id")
assert session_dim.meta is not None
assert session_dim.meta.get("visibility") == "internal"
assert session_dim.public is False

@pytest.mark.xfail(reason="visibility not yet mapped to metric metadata")
def test_measure_visibility(self, model):
"""Visibility settings are preserved on measures."""
unique_sessions = model.get_metric("unique_sessions")
assert unique_sessions.meta is not None
assert unique_sessions.meta.get("visibility") == "internal"
assert unique_sessions.public is False

def test_multiple_relations(self, model):
"""Multiple relations are parsed."""
Expand Down Expand Up @@ -372,10 +370,17 @@ def test_all_models_loaded(self, graph):
assert "employees" in graph.models
assert "support_tickets" in graph.models
assert "page_views" in graph.models
# Multi-document typed fixture contributes a model and a view.
assert "subscriptions" in graph.models
assert "revenue_overview" in graph.models

def test_total_model_count(self, graph):
"""All 9 fixture files produce 9 models."""
assert len(graph.models) == 9
"""All fixture files produce the expected number of resources.

The multi-document `subscriptions_project.yml` adds two resources
(a model + a view), so 9 legacy files + 2 = 11 resources.
"""
assert len(graph.models) == 11

def test_cross_model_measure_reference(self, graph):
"""Organizations model has cross-model measure referencing users."""
Expand Down
Loading
Loading