diff --git a/sidemantic/adapters/hex.py b/sidemantic/adapters/hex.py index 0a0dc0b7..7dce5e25 100644 --- a/sidemantic/adapters/hex.py +++ b/sidemantic/adapters/hex.py @@ -49,20 +49,42 @@ def parse(self, source: str | Path) -> SemanticGraph: def _parse_file(self, file_path: Path, graph: SemanticGraph) -> None: """Parse a single Hex YAML file. + A file may contain multiple resources separated by ``---`` (multi-document + YAML). Each document carries a top-level ``type:`` discriminator + (``model`` or ``view``). Legacy single-document files without a ``type`` + are treated as models. + Args: file_path: Path to YAML file graph: Semantic graph to add models to """ with open(file_path) as f: - data = yaml.safe_load(f) + documents = yaml.safe_load_all(f) + + for data in documents: + if not data or not isinstance(data, dict): + continue - if not data: - return + model = self._parse_resource(data) + if model: + graph.add_model(model) - # Each file is a single model - model = self._parse_model(data) - if model: - graph.add_model(model) + def _parse_resource(self, resource_def: dict) -> Model | None: + """Dispatch a Hex resource to the correct parser based on ``type``. + + Args: + resource_def: Resource definition dictionary + + Returns: + Model instance or None + """ + # ``type`` is the resource discriminator on current Hex YAML. Legacy + # files omit it and are always models. + resource_type = resource_def.get("type", "model") + + if resource_type == "view": + return self._parse_view(resource_def) + return self._parse_model(resource_def) def _parse_model(self, model_def: dict) -> Model | None: """Parse a Hex model definition into a Model. @@ -108,6 +130,16 @@ def _parse_model(self, model_def: dict) -> Model | None: if relation: relationships.append(relation) + # Visibility: public/internal/private. Only "public" stays visible. + visibility = model_def.get("visibility") + meta = {} + if visibility is not None: + meta["visibility"] = visibility + + # Display label (Model has no `label`, so it rides on `metadata`). + name = model_def.get("name") + metadata = {"label": name} if name else None + return Model( name=model_id, table=table, @@ -117,6 +149,48 @@ def _parse_model(self, model_def: dict) -> Model | None: relationships=relationships, dimensions=dimensions, metrics=measures, + metadata=metadata, + meta=meta or None, + ) + + def _parse_view(self, view_def: dict) -> Model | None: + """Parse a Hex ``view`` resource into a Model. + + Views (``type: view``) are fit-for-purpose entrypoints layered on top of + a base model. Sidemantic has no native view concept, so the view's + structure (``base`` model reference and ``contents`` groups) is preserved + on the model's ``meta`` payload for faithful round-tripping. + + Args: + view_def: View definition dictionary + + Returns: + Model instance or None if parsing fails + """ + view_id = view_def.get("id") + if not view_id: + return None + + meta = {"hex_resource_type": "view"} + + base = view_def.get("base") + if base is not None: + meta["base"] = base + + contents = view_def.get("contents") + if contents is not None: + meta["contents"] = contents + + name = view_def.get("name") + visibility = view_def.get("visibility") + if visibility is not None: + meta["visibility"] = visibility + + return Model( + name=view_id, + description=view_def.get("description"), + metadata={"label": name} if name else None, + meta=meta, ) def _parse_dimension(self, dim_def: dict) -> Dimension | None: @@ -168,12 +242,20 @@ def _parse_dimension(self, dim_def: dict) -> Dimension | None: elif "timestamp" in dim_type: granularity = "hour" # Default to hour for timestamps + # Visibility: public/internal/private. Only "public" stays visible. + visibility = dim_def.get("visibility") + meta = {"visibility": visibility} if visibility is not None else None + public = visibility is None or visibility == "public" + return Dimension( name=dim_id, type=sidemantic_type, sql=expr, granularity=granularity, description=dim_def.get("description"), + label=dim_def.get("name"), + public=public, + meta=meta, ) def _parse_measure(self, measure_def: dict) -> Metric | None: @@ -240,6 +322,29 @@ def _parse_measure(self, measure_def: dict) -> Metric | None: # Reference to existing dimension filters.append(filter_def) + # Semi-additive measures: non-additive across the given dimension(s). + semi_additive = measure_def.get("semi_additive") + non_additive_dimension = self._parse_semi_additive(semi_additive) + + # Build metadata payload. + meta = {} + + # Visibility: public/internal/private. Only "public" stays visible. + visibility = measure_def.get("visibility") + if visibility is not None: + meta["visibility"] = visibility + public = visibility is None or visibility == "public" + + # Preserve the full object-form ``semi_additive`` config so that + # ``pick``/``groupings`` survive a round-trip. Sidemantic only models a + # single ``non_additive_dimension``; without stashing the original, an + # export would drop ``pick`` and the Hex spec would default it to ``max``, + # silently corrupting opening-balance (``pick: min``) snapshots. + if isinstance(semi_additive, dict): + meta["hex_semi_additive"] = semi_additive + + meta = meta or None + return Metric( name=measure_id, type=metric_type, @@ -247,8 +352,60 @@ def _parse_measure(self, measure_def: dict) -> Metric | None: sql=expr, filters=filters if filters else None, description=measure_def.get("description"), + label=measure_def.get("name"), + non_additive_dimension=non_additive_dimension, + public=public, + meta=meta, ) + @staticmethod + def _parse_semi_additive(semi_additive) -> str | None: + """Extract the non-additive dimension from a Hex ``semi_additive`` config. + + Current Hex YAML uses an object form:: + + semi_additive: + over: + - dimension: + pick: min | max + groupings: + - + + Legacy/shorthand string forms (e.g. ``semi_additive: last``) are also + accepted and ignored for the dimension extraction (there is no associated + dimension to record). Returns the first ``over`` dimension id, which maps + to Sidemantic's single ``non_additive_dimension``. + + Args: + semi_additive: Raw value of the ``semi_additive`` field + + Returns: + Dimension id the measure is non-additive across, or None + """ + if not semi_additive: + return None + + if isinstance(semi_additive, dict): + over = semi_additive.get("over") or [] + for entry in over: + if isinstance(entry, dict) and entry.get("dimension") is not None: + dimension = entry["dimension"] + # The Hex spec allows ``dimension`` to be either a bare + # dimension id or an inline Dimension object (``{id: ...}``). + # Sidemantic's ``non_additive_dimension`` is a plain string, + # so extract the id from the object form. + if isinstance(dimension, dict): + dimension_id = dimension.get("id") + if isinstance(dimension_id, str): + return dimension_id + continue + if isinstance(dimension, str): + return dimension + continue + if isinstance(entry, str): + return entry + return None + def _parse_relation(self, relation_def: dict) -> Relationship | None: """Parse Hex relation into Sidemantic relationship. @@ -334,7 +491,18 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict: Returns: Model definition dictionary """ - model_def = {"id": model.name} + meta = model.meta or {} + + # Round-trip Hex views back to ``type: view`` resources. + if meta.get("hex_resource_type") == "view": + return self._export_view(model) + + # ``type`` is the resource discriminator required on current Hex YAML. + model_def = {"id": model.name, "type": "model"} + + label = (model.metadata or {}).get("label") + if label: + model_def["name"] = label if model.sql: model_def["base_sql_query"] = model.sql @@ -344,11 +512,17 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict: if model.description: model_def["description"] = model.description + if meta.get("visibility"): + model_def["visibility"] = meta["visibility"] + # Export dimensions dimensions = [] for dim in model.dimensions: dim_def = {"id": dim.name} + if dim.label: + dim_def["name"] = dim.label + # Map Sidemantic types to Hex types type_mapping = { "categorical": "string", @@ -377,6 +551,13 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict: if dim.description: dim_def["description"] = dim.description + # Visibility: prefer recorded value, otherwise derive from public flag. + dim_visibility = (dim.meta or {}).get("visibility") + if dim_visibility: + dim_def["visibility"] = dim_visibility + elif not dim.public: + dim_def["visibility"] = "internal" + # Mark unique dimensions if dim.name == model.primary_key: dim_def["unique"] = True @@ -391,6 +572,9 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict: for metric in model.metrics: measure_def = {"id": metric.name} + if metric.label: + measure_def["name"] = metric.label + # Handle different metric types if metric.type == "derived": # Custom SQL aggregation @@ -428,6 +612,22 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict: if metric.description: measure_def["description"] = metric.description + # Semi-additive: prefer the preserved object-form config (keeps + # ``pick``/``groupings`` intact on round-trip), otherwise emit the + # minimal form derived from ``non_additive_dimension``. + preserved_semi_additive = (metric.meta or {}).get("hex_semi_additive") + if preserved_semi_additive: + measure_def["semi_additive"] = preserved_semi_additive + elif metric.non_additive_dimension: + measure_def["semi_additive"] = {"over": [{"dimension": metric.non_additive_dimension}]} + + # Visibility: prefer recorded value, otherwise derive from public flag. + measure_visibility = (metric.meta or {}).get("visibility") + if measure_visibility: + measure_def["visibility"] = measure_visibility + elif not metric.public: + measure_def["visibility"] = "internal" + measures.append(measure_def) if measures: @@ -454,3 +654,36 @@ def _export_model(self, model: Model, graph: SemanticGraph) -> dict: model_def["relations"] = relations return model_def + + def _export_view(self, model: Model) -> dict: + """Export a model that was imported from a Hex ``view`` resource. + + Reconstructs the ``type: view`` resource from the metadata captured during + import (``base`` and ``contents``). + + Args: + model: Model carrying ``hex_resource_type == "view"`` metadata + + Returns: + View definition dictionary + """ + meta = model.meta or {} + view_def = {"id": model.name, "type": "view"} + + label = (model.metadata or {}).get("label") + if label: + view_def["name"] = label + + if model.description: + view_def["description"] = model.description + + if meta.get("visibility"): + view_def["visibility"] = meta["visibility"] + + if meta.get("base") is not None: + view_def["base"] = meta["base"] + + if meta.get("contents") is not None: + view_def["contents"] = meta["contents"] + + return view_def diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index 40c622b2..5dc0ff15 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -152,12 +152,24 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict try: yaml_data = _load_yaml_mapping(content) except Exception as e: - if _looks_like_semantic_yaml_text(content): + # Current Hex Semantic Authoring files are multi-document YAML + # (``---``-separated, typed resources). ``yaml.safe_load`` rejects + # those before any single-document detection runs, so check for + # Hex explicitly here before treating the file as unparseable. + if _looks_like_hex_yaml(content): + adapter = HexAdapter() + elif _looks_like_semantic_yaml_text(content): _handle_parse_error(file_path, e, strict=strict) - continue + continue + else: + continue + yaml_data = None # Check for MetricFlow before Sidemantic native since # "semantic_models:" contains "models:" as a substring - if _yaml_has_top_level_key(yaml_data, "semantic_models"): + if yaml_data is None: + # Format already resolved on the multi-document fallback path. + pass + elif _yaml_has_top_level_key(yaml_data, "semantic_models"): adapter = MetricFlowAdapter() elif _yaml_has_top_level_key(yaml_data, "semantic_model") and _yaml_has_top_level_key( yaml_data, "datasets" @@ -174,7 +186,9 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict adapter = SidemanticAdapter() elif _yaml_has_top_level_key(yaml_data, "metrics") and "type: " in content: adapter = MetricFlowAdapter() - elif _contains_yaml_key(yaml_data, "base_sql_table") and _contains_yaml_key(yaml_data, "measures"): + elif _is_hex_resource_mapping(yaml_data): + # Single-document Hex (legacy ``base_sql_table``/``measures`` form + # or a current typed ``type: model``/``type: view`` resource). adapter = HexAdapter() elif ( _contains_yaml_key(yaml_data, "table") @@ -379,6 +393,40 @@ def _load_yaml_mapping(content: str) -> dict: return data if isinstance(data, dict) else {} +def _is_hex_resource_mapping(data: object) -> bool: + """Return True when a single YAML mapping is a Hex Semantic Authoring resource. + + Covers both the legacy single-document form (``base_sql_table``/ + ``base_sql_query`` + ``measures``) and the current typed form where each + resource carries a ``type: model`` / ``type: view`` discriminator alongside + an ``id``. + """ + if not isinstance(data, dict): + return False + if data.get("type") in ("model", "view") and "id" in data: + return True + # ``HexAdapter`` accepts query-backed models (``base_sql_query``) in addition + # to table-backed ones; both must be recognized so directory auto-discovery + # does not silently skip query-backed Hex models on the CLI/MCP path. + if not _contains_yaml_key(data, "measures"): + return False + return _contains_yaml_key(data, "base_sql_table") or _contains_yaml_key(data, "base_sql_query") + + +def _looks_like_hex_yaml(content: str) -> bool: + """Detect Hex YAML, including multi-document (``---``-separated) files. + + Current Hex Semantic Authoring projects emit multiple typed resources in one + file separated by ``---``. ``yaml.safe_load`` rejects those, so this helper + uses ``safe_load_all`` and returns True when any document is a Hex resource. + """ + try: + documents = list(yaml.safe_load_all(content)) + except Exception: + return False + return any(_is_hex_resource_mapping(doc) for doc in documents) + + def _looks_like_semantic_yaml_text(content: str) -> bool: """Return True when malformed YAML text contains a known semantic-layer key.""" semantic_keys = ( diff --git a/sidemantic/validation.py b/sidemantic/validation.py index 608c3304..e3c7c6cd 100644 --- a/sidemantic/validation.py +++ b/sidemantic/validation.py @@ -69,7 +69,16 @@ def validate_model(model: "Model") -> list[str]: errors.append(f"Model '{model.name}' must have a primary_key defined") # Check for a physical, SQL, DAX, or externally sourced model definition. - if not model.table and not model.sql and not getattr(model, "source_uri", None) and not getattr(model, "dax", None): + # Hex ``view`` resources are presentation layers over a base model and are + # intentionally table-less, so they are exempt from this requirement. + is_hex_view = bool((getattr(model, "meta", None) or {}).get("hex_resource_type") == "view") + if ( + not is_hex_view + and not model.table + and not model.sql + and not getattr(model, "source_uri", None) + and not getattr(model, "dax", None) + ): errors.append(f"Model '{model.name}' must have one of 'table', 'sql', 'dax', or 'source_uri' defined") for label, items in [ diff --git a/sidemantic/validation_runner.py b/sidemantic/validation_runner.py index 27a54193..a6d5d30e 100644 --- a/sidemantic/validation_runner.py +++ b/sidemantic/validation_runner.py @@ -48,6 +48,21 @@ def validate_directory(directory: str | Path) -> ValidationReport: if rel.name not in layer.graph.models: report.errors.append(f"Model '{model_name}' has relationship to '{rel.name}' which doesn't exist") + # Hex ``view`` resources reference a base model by name and carry their + # own ``contents``. Both are required by the Hex spec, but views are + # exempt from the physical-source check in ``validate_model``, so a + # missing/misspelled base or absent contents would otherwise pass + # silently on the CLI validation path. + model_meta = getattr(model, "meta", None) or {} + if model_meta.get("hex_resource_type") == "view": + base = model_meta.get("base") + if not base: + report.errors.append(f"Hex view '{model_name}' must have a 'base' model reference defined") + elif base not in layer.graph.models: + report.errors.append(f"Hex view '{model_name}' references base model '{base}' which doesn't exist") + if not model_meta.get("contents"): + report.errors.append(f"Hex view '{model_name}' must have non-empty 'contents' defined") + for metric in layer.graph.metrics.values(): report.errors.extend(validate_metric(metric, layer.graph)) diff --git a/tests/adapters/hex/test_fixtures.py b/tests/adapters/hex/test_fixtures.py index 08c272a0..fc6a15d1 100644 --- a/tests/adapters/hex/test_fixtures.py +++ b/tests/adapters/hex/test_fixtures.py @@ -178,12 +178,11 @@ def test_stddev_aggregation(self, model): assert stddev is not None assert stddev.agg is not None - @pytest.mark.xfail(reason="semi_additive not yet supported in adapter") def test_semi_additive_measure(self, model): """Semi-additive measures preserve semi_additive setting.""" current_mrr = model.get_metric("current_mrr") - # Semi-additive measures should have non_additive_dimension or similar - assert current_mrr.non_additive_dimension is not None + # Semi-additive measures map to non_additive_dimension (the 'over' dimension) + assert current_mrr.non_additive_dimension == "subscription_quarter" def test_count_if_aggregation(self, model): """count_if func is parsed as conditional count.""" @@ -310,26 +309,25 @@ def test_measure_with_dimension_filter(self, model): assert conv.filters is not None assert len(conv.filters) > 0 - @pytest.mark.xfail(reason="semi_additive not yet supported in adapter") def test_semi_additive_daily_active_users(self, model): """Semi-additive DAU measure preserves non-additivity.""" dau = model.get_metric("daily_active_users") - assert dau.non_additive_dimension is not None + assert dau.non_additive_dimension == "event_date" - @pytest.mark.xfail(reason="visibility not yet mapped to model metadata") def test_dimension_visibility(self, model): """Visibility settings are preserved on dimensions.""" - # Visibility would be stored in dimension meta + # Visibility is stored in dimension meta and reflected in public flag session_dim = model.get_dimension("session_id") assert session_dim.meta is not None assert session_dim.meta.get("visibility") == "internal" + assert session_dim.public is False - @pytest.mark.xfail(reason="visibility not yet mapped to metric metadata") def test_measure_visibility(self, model): """Visibility settings are preserved on measures.""" unique_sessions = model.get_metric("unique_sessions") assert unique_sessions.meta is not None assert unique_sessions.meta.get("visibility") == "internal" + assert unique_sessions.public is False def test_multiple_relations(self, model): """Multiple relations are parsed.""" @@ -372,10 +370,17 @@ def test_all_models_loaded(self, graph): assert "employees" in graph.models assert "support_tickets" in graph.models assert "page_views" in graph.models + # Multi-document typed fixture contributes a model and a view. + assert "subscriptions" in graph.models + assert "revenue_overview" in graph.models def test_total_model_count(self, graph): - """All 9 fixture files produce 9 models.""" - assert len(graph.models) == 9 + """All fixture files produce the expected number of resources. + + The multi-document `subscriptions_project.yml` adds two resources + (a model + a view), so 9 legacy files + 2 = 11 resources. + """ + assert len(graph.models) == 11 def test_cross_model_measure_reference(self, graph): """Organizations model has cross-model measure referencing users.""" diff --git a/tests/adapters/hex/test_typed_resources.py b/tests/adapters/hex/test_typed_resources.py new file mode 100644 index 00000000..97961aae --- /dev/null +++ b/tests/adapters/hex/test_typed_resources.py @@ -0,0 +1,370 @@ +"""Tests for current Hex Semantic Authoring YAML. + +Covers the post-Aug-2025 schema: the top-level `type:` discriminator, +multi-document files separated by `---`, the `view` resource type, display +`name` labels, `visibility`, and object-form `semi_additive` measures. +""" + +import tempfile +from pathlib import Path + +import pytest +import yaml + +from sidemantic.adapters.hex import HexAdapter + +FIXTURE = "tests/fixtures/hex/subscriptions_project.yml" + + +# ============================================================================= +# MULTI-DOCUMENT + TYPED RESOURCE PARSING +# ============================================================================= + + +class TestTypedMultiDocParsing: + @pytest.fixture + def graph(self): + return HexAdapter().parse(FIXTURE) + + def test_multi_doc_yields_both_resources(self, graph): + """A `---`-separated file yields every resource, not just the first.""" + assert "subscriptions" in graph.models + assert "revenue_overview" in graph.models + + def test_typed_model_parsed(self, graph): + """`type: model` is parsed as a model with its table.""" + model = graph.models["subscriptions"] + assert model.table == "analytics.subscriptions" + + def test_model_visibility_in_meta(self, graph): + """Model-level visibility is preserved in meta.""" + model = graph.models["subscriptions"] + assert model.meta is not None + assert model.meta.get("visibility") == "public" + + def test_model_display_name_label(self, graph): + """Model `name` maps to the display label on metadata.""" + model = graph.models["subscriptions"] + assert (model.metadata or {}).get("label") == "Subscriptions" + + def test_dimension_display_name_label(self, graph): + """Dimension `name` maps to the Sidemantic display label.""" + model = graph.models["subscriptions"] + assert model.get_dimension("plan").label == "Plan Tier" + assert model.get_dimension("customer_id").label == "Customer" + + def test_measure_display_name_label(self, graph): + """Measure `name` maps to the Sidemantic display label.""" + model = graph.models["subscriptions"] + assert model.get_metric("total_mrr").label == "Total MRR" + assert model.get_metric("current_mrr").label == "Current MRR" + + def test_dimension_visibility_internal(self, graph): + """`visibility: internal` hides the dimension and is recorded in meta.""" + model = graph.models["subscriptions"] + dim = model.get_dimension("internal_notes") + assert dim.public is False + assert dim.meta.get("visibility") == "internal" + + def test_dimension_visibility_private(self, graph): + """`visibility: private` hides the dimension.""" + model = graph.models["subscriptions"] + dim = model.get_dimension("secret_token") + assert dim.public is False + assert dim.meta.get("visibility") == "private" + + def test_dimension_visibility_public_default(self, graph): + """Dimensions without a visibility stay public.""" + model = graph.models["subscriptions"] + assert model.get_dimension("plan").public is True + + def test_measure_visibility_internal(self, graph): + """`visibility: internal` hides the measure.""" + model = graph.models["subscriptions"] + m = model.get_metric("internal_mrr") + assert m.public is False + assert m.meta.get("visibility") == "internal" + + def test_semi_additive_object_form(self, graph): + """Object-form `semi_additive.over[].dimension` maps to non_additive_dimension.""" + model = graph.models["subscriptions"] + assert model.get_metric("current_mrr").non_additive_dimension == "snapshot_date" + + +# ============================================================================= +# VIEW RESOURCE PARSING +# ============================================================================= + + +class TestViewResource: + @pytest.fixture + def view(self): + graph = HexAdapter().parse(FIXTURE) + return graph.models["revenue_overview"] + + def test_view_recorded_as_view(self, view): + """View resources are tagged so they round-trip back to `type: view`.""" + assert view.meta.get("hex_resource_type") == "view" + + def test_view_base_reference(self, view): + """View `base` model reference is preserved.""" + assert view.meta.get("base") == "subscriptions" + + def test_view_contents_preserved(self, view): + """View `contents` groups are preserved verbatim.""" + contents = view.meta.get("contents") + assert contents is not None + assert contents[0]["name"] == "Revenue" + assert "total_mrr" in contents[0]["measures"] + + def test_view_label_and_description(self, view): + """View display name and description are preserved.""" + assert view.description == "Curated revenue entrypoint" + assert (view.metadata or {}).get("label") == "Revenue Overview" + + def test_view_has_no_table(self, view): + """Views are not backed by a table or SQL of their own.""" + assert view.table is None + assert view.sql is None + + +# ============================================================================= +# TYPE DISCRIMINATOR / BACKWARD COMPATIBILITY +# ============================================================================= + + +def test_explicit_type_model(): + """`type: model` is accepted explicitly.""" + hex_def = { + "id": "m", + "type": "model", + "base_sql_table": "t", + "dimensions": [{"id": "id", "type": "number", "unique": True}], + } + with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as f: + yaml.dump(hex_def, f) + temp_path = Path(f.name) + try: + graph = HexAdapter().parse(temp_path) + assert "m" in graph.models + assert graph.models["m"].table == "t" + finally: + temp_path.unlink() + + +def test_legacy_untyped_model_still_parses(): + """Legacy single-doc files without a `type` are still treated as models.""" + hex_def = { + "id": "legacy", + "base_sql_table": "t", + "dimensions": [{"id": "id", "type": "number", "unique": True}], + } + with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as f: + yaml.dump(hex_def, f) + temp_path = Path(f.name) + try: + graph = HexAdapter().parse(temp_path) + assert "legacy" in graph.models + finally: + temp_path.unlink() + + +# ============================================================================= +# EXPORT / ROUNDTRIP OF NEW FEATURES +# ============================================================================= + + +def test_export_emits_type_discriminator(): + """Exported models carry the required `type: model` discriminator.""" + adapter = HexAdapter() + graph = adapter.parse(FIXTURE) + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as f: + temp_path = Path(f.name) + try: + # Export just the model (single-file export writes the first model). + adapter.export(graph, temp_path) + with open(temp_path) as fh: + data = yaml.safe_load(fh) + assert data["type"] == "model" + finally: + temp_path.unlink(missing_ok=True) + + +def test_roundtrip_view_and_typed_features(tmp_path): + """Typed model + view survive an import/export/import roundtrip to a directory.""" + adapter = HexAdapter() + graph1 = adapter.parse(FIXTURE) + + out_dir = tmp_path / "hex_out" + adapter.export(graph1, out_dir) + + graph2 = adapter.parse(out_dir) + + # Model survives with label, visibility, and semi-additive. + model = graph2.models["subscriptions"] + assert model.meta.get("visibility") == "public" + assert model.get_dimension("plan").label == "Plan Tier" + assert model.get_metric("current_mrr").non_additive_dimension == "snapshot_date" + assert model.get_dimension("internal_notes").public is False + assert model.get_metric("internal_mrr").public is False + + # View survives as a view with its base + contents. + view = graph2.models["revenue_overview"] + assert view.meta.get("hex_resource_type") == "view" + assert view.meta.get("base") == "subscriptions" + assert view.meta.get("contents")[0]["name"] == "Revenue" + + +def test_export_view_emits_type_view(tmp_path): + """A view model exports back to a `type: view` resource file.""" + adapter = HexAdapter() + graph = adapter.parse(FIXTURE) + + out_dir = tmp_path / "hex_out" + adapter.export(graph, out_dir) + + with open(out_dir / "revenue_overview.yml") as fh: + data = yaml.safe_load(fh) + + assert data["type"] == "view" + assert data["base"] == "subscriptions" + assert data["name"] == "Revenue Overview" + assert data["contents"][0]["measures"] == ["total_mrr", "current_mrr"] + + +def test_semi_additive_pick_and_groupings_preserved_in_meta(): + """The full object-form `semi_additive` config is stashed in measure meta.""" + doc = { + "type": "model", + "id": "balances", + "sql_table": "analytics.balances", + "measures": [ + { + "id": "opening_balance", + "func": "sum", + "of": "amount", + "semi_additive": { + "over": [{"dimension": "snapshot_date", "pick": "min"}], + "groupings": ["account_id"], + }, + } + ], + } + adapter = HexAdapter() + with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as f: + Path(f.name).write_text(yaml.safe_dump(doc)) + in_path = Path(f.name) + try: + graph = adapter.parse(in_path) + metric = graph.models["balances"].get_metric("opening_balance") + # The single non-additive dimension still maps through. + assert metric.non_additive_dimension == "snapshot_date" + # The full config (including the non-default `pick: min` and `groupings`) + # is retained so it can be re-emitted on export. + preserved = metric.meta["hex_semi_additive"] + assert preserved["over"][0]["pick"] == "min" + assert preserved["groupings"] == ["account_id"] + finally: + in_path.unlink(missing_ok=True) + + +def test_semi_additive_pick_min_survives_roundtrip(tmp_path): + """`pick: min`/`groupings` survive parse -> export -> parse without corruption. + + Without preservation the export would default `pick` to `max` per the Hex + spec, silently changing an opening-balance snapshot's semantics. + """ + doc = { + "type": "model", + "id": "balances", + "sql_table": "analytics.balances", + "measures": [ + { + "id": "opening_balance", + "func": "sum", + "of": "amount", + "semi_additive": { + "over": [{"dimension": "snapshot_date", "pick": "min"}], + "groupings": ["account_id"], + }, + } + ], + } + adapter = HexAdapter() + with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as f: + Path(f.name).write_text(yaml.safe_dump(doc)) + in_path = Path(f.name) + out_dir = tmp_path / "hex_out" + try: + graph1 = adapter.parse(in_path) + adapter.export(graph1, out_dir) + + with open(out_dir / "balances.yml") as fh: + exported = yaml.safe_load(fh) + measure = exported["measures"][0] + assert measure["semi_additive"]["over"][0]["pick"] == "min" + assert measure["semi_additive"]["groupings"] == ["account_id"] + + # And it re-imports identically. + graph2 = adapter.parse(out_dir) + metric = graph2.models["balances"].get_metric("opening_balance") + assert metric.meta["hex_semi_additive"]["over"][0]["pick"] == "min" + finally: + in_path.unlink(missing_ok=True) + + +def test_semi_additive_inline_dimension_object(tmp_path): + """`semi_additive.over[].dimension` as an inline Dimension object parses. + + The Hex spec allows the `over` dimension to be either a bare dimension id or + an inline Dimension object (`{id: ..., type: ...}`). Sidemantic's + `non_additive_dimension` is a plain string, so the inline object form must be + reduced to its id; passing the dict through fails Pydantic validation and + breaks the CLI load path for otherwise-valid snapshot measures. + """ + doc = { + "type": "model", + "id": "balances", + "sql_table": "analytics.balances", + "measures": [ + { + "id": "ending_balance", + "func": "sum", + "of": "amount", + "semi_additive": { + "over": [ + { + "dimension": {"id": "snapshot_date", "type": "date"}, + "pick": "last", + } + ], + "groupings": ["account_id"], + }, + } + ], + } + adapter = HexAdapter() + with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as f: + Path(f.name).write_text(yaml.safe_dump(doc)) + in_path = Path(f.name) + out_dir = tmp_path / "hex_out" + try: + graph = adapter.parse(in_path) + metric = graph.models["balances"].get_metric("ending_balance") + # The inline dimension object is reduced to its id. + assert metric.non_additive_dimension == "snapshot_date" + # The full inline config still round-trips through preserved meta. + adapter.export(graph, out_dir) + with open(out_dir / "balances.yml") as fh: + exported = yaml.safe_load(fh) + measure = exported["measures"][0] + assert measure["semi_additive"]["over"][0]["dimension"] == {"id": "snapshot_date", "type": "date"} + assert measure["semi_additive"]["over"][0]["pick"] == "last" + assert measure["semi_additive"]["groupings"] == ["account_id"] + finally: + in_path.unlink(missing_ok=True) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/core/test_directory_loaders.py b/tests/core/test_directory_loaders.py index 1190d02b..7d27a52a 100644 --- a/tests/core/test_directory_loaders.py +++ b/tests/core/test_directory_loaders.py @@ -226,3 +226,227 @@ def test_load_from_directory_scopes_reused_bsl_join_aliases(tmp_path): events_sql = layer.compile(metrics=["events.count"], dimensions=["events_user.name"]) assert "events_user_cte" in events_sql assert "FROM accounts" in events_sql + + +def test_load_from_directory_detects_multi_document_hex(tmp_path): + """Multi-document (``---``-separated) typed Hex files load via auto-discovery. + + ``yaml.safe_load`` rejects multi-document files, so without explicit Hex + detection the documented CLI workflow could not load current Hex projects. + """ + hex_file = tmp_path / "subscriptions_project.yml" + hex_file.write_text( + """ +id: subscriptions +type: model +base_sql_table: analytics.subscriptions +dimensions: + - id: customer_id + type: string + unique: true + - id: snapshot_date + type: date +measures: + - id: total_mrr + func: sum + of: mrr + - id: current_mrr + func: sum + of: mrr + semi_additive: + over: + - dimension: snapshot_date + pick: max +--- +id: revenue_overview +type: view +base: subscriptions +contents: + - name: Revenue + measures: + - total_mrr +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + + # Both the model and the table-less view resource are registered. + assert "subscriptions" in layer.graph.models + assert "revenue_overview" in layer.graph.models + + view = layer.graph.models["revenue_overview"] + assert view.meta.get("hex_resource_type") == "view" + assert view.table is None + + # The typed model's semi-additive config survives through the CLI load path. + assert layer.graph.models["subscriptions"].get_metric("current_mrr").non_additive_dimension == "snapshot_date" + + +def test_load_from_directory_detects_exported_hex_view(tmp_path): + """A standalone exported ``type: view`` Hex file is detected by auto-discovery.""" + view_file = tmp_path / "revenue_overview.yml" + view_file.write_text( + """ +id: revenue_overview +type: view +base: subscriptions +contents: + - name: Revenue + measures: + - total_mrr +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + + assert "revenue_overview" in layer.graph.models + assert layer.graph.models["revenue_overview"].meta.get("hex_resource_type") == "view" + + +def test_load_from_directory_detects_query_backed_hex(tmp_path): + """Untyped query-backed Hex models (``base_sql_query``) load via auto-discovery. + + ``HexAdapter`` accepts ``base_sql_query`` as well as ``base_sql_table``, but + directory auto-discovery previously required ``base_sql_table`` to select the + Hex adapter, so query-backed Hex models were silently skipped on the + documented CLI/MCP load path. + """ + hex_file = tmp_path / "support_tickets.yml" + hex_file.write_text( + """ +id: support_tickets +base_sql_query: | + SELECT id, customer_id, status + FROM support.tickets +dimensions: + - id: id + type: number + unique: true + - id: customer_id + type: number + - id: status + type: string +measures: + - id: ticket_count + func: count +""" + ) + + layer = SemanticLayer() + load_from_directory(layer, tmp_path) + + assert "support_tickets" in layer.graph.models + model = layer.graph.models["support_tickets"] + assert model._source_format == "Hex" + # The query-backed model carries its SQL, not a physical table reference. + assert model.sql is not None + assert model.table is None + assert model.get_metric("ticket_count") is not None + + +_HEX_VIEW_BASE_MODEL = """ +id: subscriptions +type: model +base_sql_table: analytics.subscriptions +dimensions: + - id: id + type: number + unique: true +measures: + - id: total + func: count +--- +""" + + +def test_validate_directory_flags_missing_hex_view_base(tmp_path): + """A Hex view without a `base` reference is reported as an error, not a pass. + + Views are exempt from the physical-source check, so an omitted base would + otherwise let `sidemantic validate` report Validation Passed for an + unresolvable view. + """ + from sidemantic.validation_runner import validate_directory + + (tmp_path / "project.yml").write_text( + _HEX_VIEW_BASE_MODEL + + """ +id: revenue_overview +type: view +contents: + - name: Revenue + measures: + - total +""" + ) + + report = validate_directory(tmp_path) + assert not report.passed + assert any("must have a 'base'" in err and "revenue_overview" in err for err in report.errors) + + +def test_validate_directory_flags_unknown_hex_view_base(tmp_path): + """A Hex view whose `base` names no loaded model is reported as an error.""" + from sidemantic.validation_runner import validate_directory + + (tmp_path / "project.yml").write_text( + _HEX_VIEW_BASE_MODEL + + """ +id: revenue_overview +type: view +base: subscriptionz +contents: + - name: Revenue + measures: + - total +""" + ) + + report = validate_directory(tmp_path) + assert not report.passed + assert any("subscriptionz" in err and "doesn't exist" in err for err in report.errors) + + +def test_validate_directory_flags_hex_view_without_contents(tmp_path): + """A Hex view with a valid `base` but no `contents` is reported as an error. + + Hex views require `contents`; without this check a view that omits it would + report Validation Passed because views are exempt from the source check. + """ + from sidemantic.validation_runner import validate_directory + + (tmp_path / "project.yml").write_text( + _HEX_VIEW_BASE_MODEL + + """ +id: revenue_overview +type: view +base: subscriptions +""" + ) + + report = validate_directory(tmp_path) + assert not report.passed + assert any("contents" in err and "revenue_overview" in err for err in report.errors) + + +def test_validate_directory_accepts_valid_hex_view_base(tmp_path): + """A Hex view with a `base` naming a loaded model emits no view errors.""" + from sidemantic.validation_runner import validate_directory + + (tmp_path / "project.yml").write_text( + _HEX_VIEW_BASE_MODEL + + """ +id: revenue_overview +type: view +base: subscriptions +contents: + - name: Revenue + measures: + - total +""" + ) + + report = validate_directory(tmp_path) + assert not any("view" in err.lower() for err in report.errors) diff --git a/tests/fixtures/hex/product_events.yml b/tests/fixtures/hex/product_events.yml index 5ec54fdf..9144d5e4 100644 --- a/tests/fixtures/hex/product_events.yml +++ b/tests/fixtures/hex/product_events.yml @@ -1,4 +1,5 @@ id: product_events +type: model base_sql_table: analytics.product_events description: Product event tracking with visibility controls @@ -91,7 +92,10 @@ measures: - id: daily_active_users func: count_distinct of: user_id - semi_additive: last + semi_additive: + over: + - dimension: event_date + pick: max visibility: public description: Daily active users (non-additive across days) diff --git a/tests/fixtures/hex/saas_analytics.yml b/tests/fixtures/hex/saas_analytics.yml index bf0d0c63..09cba659 100644 --- a/tests/fixtures/hex/saas_analytics.yml +++ b/tests/fixtures/hex/saas_analytics.yml @@ -1,4 +1,5 @@ id: saas_analytics +type: model base_sql_query: | SELECT s.id, @@ -135,7 +136,10 @@ measures: name: Current MRR func: sum of: mrr - semi_additive: end_of_period + semi_additive: + over: + - dimension: subscription_quarter + pick: max description: MRR as of end of period (semi-additive) - id: enterprise_mrr diff --git a/tests/fixtures/hex/subscriptions_project.yml b/tests/fixtures/hex/subscriptions_project.yml new file mode 100644 index 00000000..2db869e2 --- /dev/null +++ b/tests/fixtures/hex/subscriptions_project.yml @@ -0,0 +1,85 @@ +# Current Hex Semantic Authoring YAML: a typed, multi-document file. +# Two resources separated by `---`: a `model` and a `view` layered on it. +id: subscriptions +type: model +name: Subscriptions +base_sql_table: analytics.subscriptions +description: Subscription facts with current Hex typed schema +visibility: public + +dimensions: + - id: id + type: number + unique: true + + - id: customer_id + name: Customer + type: number + + - id: plan + name: Plan Tier + type: string + description: Plan tier (free, pro, enterprise) + + - id: mrr + type: number + + - id: internal_notes + type: string + visibility: internal + description: Internal-only annotation + + - id: secret_token + type: string + visibility: private + + - id: snapshot_date + type: date + description: Daily snapshot date + +measures: + - id: subscription_count + name: Subscriptions + func: count + description: Number of subscriptions + + - id: total_mrr + name: Total MRR + func: sum + of: mrr + description: Sum of monthly recurring revenue + + - id: current_mrr + name: Current MRR + func: sum + of: mrr + semi_additive: + over: + - dimension: snapshot_date + pick: max + description: MRR as of the latest snapshot (semi-additive) + + - id: internal_mrr + func: sum + of: mrr + visibility: internal + +relations: + - id: customers + type: many_to_one + join_sql: customer_id = ${customers}.id + +--- +id: revenue_overview +type: view +name: Revenue Overview +base: subscriptions +description: Curated revenue entrypoint +contents: + - name: Revenue + dimensions: + - plan + - snapshot_date + measures: + - total_mrr + - current_mrr