diff --git a/sidemantic/adapters/holistics.py b/sidemantic/adapters/holistics.py index 67a7fd19..e8459663 100644 --- a/sidemantic/adapters/holistics.py +++ b/sidemantic/adapters/holistics.py @@ -147,6 +147,12 @@ class FuncDeclaration: class AmlProperty: key: str value: AmlValue + # Originating file context for this property's definition. Set when a block is + # composed across modules (e.g. a Dataset extending a PartialDataset declared + # in another file) so property values such as `relationships` qualify their + # model/field references against the file that defined them rather than the + # consuming file. + context: _FileContext | None = None @dataclass @@ -154,6 +160,11 @@ class AmlBlock: kind: str name: str | None items: list[AmlItem] + # Originating file context for this block's definition. Set when a block is + # composed across modules (e.g. a Dataset extending a PartialDataset declared + # in another file) so its child fields resolve constants/`use` aliases against + # the file that defined them rather than the consuming file. + context: _FileContext | None = None AmlItem = ( @@ -250,6 +261,26 @@ def parse(self, source: str | Path) -> SemanticGraph: for model in resolved_models.values(): graph.add_model(model) + # Dataset-level dimensions/metrics, standalone Metric blocks, and + # PartialDataset blocks composed via .extend(). Datasets surface as + # models so their cross-model dimensions/metrics are not dropped, and + # standalone metrics register as graph-level metrics. + dataset_models, standalone_metrics, assignment_dataset_blocks = _resolve_dataset_artifacts( + documents, constants, resolved_models + ) + for model in dataset_models.values(): + if model.name not in graph.models: + graph.add_model(model) + # Dataset metrics are first-class graph metrics. add_model() only + # auto-registers time_comparison/conversion types, so surface the + # rest (derived/simple) at graph scope too for unqualified access. + for metric in model.metrics: + if metric.name not in graph.metrics: + graph.add_metric(metric) + for metric in standalone_metrics.values(): + if metric.name not in graph.metrics: + graph.add_metric(metric) + pending_relationships: list[_AmlRelationship] = [] pending_relationship_refs: list[_RelationshipRef] = [] relationships_by_name: dict[str, _AmlRelationship] = {} @@ -268,6 +299,13 @@ def parse(self, source: str | Path) -> SemanticGraph: pending_relationships.extend(dataset_relationships) pending_relationship_refs.extend(dataset_rel_refs) + # Assignment-form datasets (Dataset x = Dataset { ... }) are not top-level + # AmlBlocks, so attach their relationships from the resolved blocks. + for dataset_block, dataset_context in assignment_dataset_blocks: + dataset_relationships, dataset_rel_refs = _parse_dataset_relationships(dataset_block, dataset_context) + pending_relationships.extend(dataset_relationships) + pending_relationship_refs.extend(dataset_rel_refs) + for rel_ref in pending_relationship_refs: rel = relationships_by_name.get(rel_ref.name) if rel: @@ -533,12 +571,13 @@ def resolve_named_block(name: str) -> AmlBlock | None: if name in resolving: return None if name in model_blocks: - block, _ = model_blocks[name] - resolved_blocks[name] = block - return block + block, block_ctx = model_blocks[name] + stamped = _stamp_block_context(block, block_ctx) + resolved_blocks[name] = stamped + return stamped if name in partial_blocks: - block, _ = partial_blocks[name] - return block + block, block_ctx = partial_blocks[name] + return _stamp_block_context(block, block_ctx) assignment_entry = assignments.get(name) if assignment_entry: assignment, context = assignment_entry @@ -589,11 +628,14 @@ def _resolve_block_from_value( merged = _merge_blocks(merged, _as_model_block(extension_block)) return merged - if isinstance(value, TypedBlock) and value.type_name in {"Model", "PartialModel"}: - return AmlBlock(kind=value.type_name, name=None, items=value.items) + if isinstance(value, TypedBlock) and value.type_name in {"Model", "PartialModel", "Dataset", "PartialDataset"}: + # Inlined block body was authored in `context`'s file; stamp child fields + # so they resolve constants/`use` aliases against that file when this block + # is merged into a dataset/model defined elsewhere. + return _stamp_block_context(AmlBlock(kind=value.type_name, name=None, items=value.items), context) if isinstance(value, InlineBlock): - return AmlBlock(kind="Model", name=None, items=value.items) + return _stamp_block_context(AmlBlock(kind="Model", name=None, items=value.items), context) if isinstance(value, Identifier): qualified_name = _qualify_name(value.name, context) @@ -609,7 +651,29 @@ def _resolve_block_from_value( def _as_model_block(block: AmlBlock) -> AmlBlock: if block.kind == "Model": return block - return AmlBlock(kind="Model", name=block.name, items=block.items) + return AmlBlock(kind="Model", name=block.name, items=block.items, context=block.context) + + +def _stamp_block_context(block: AmlBlock, context: _FileContext) -> AmlBlock: + """Tag a block's child field blocks with their defining context. + + Used when a block is pulled into a cross-module composition (e.g. a Dataset + extending a PartialDataset from another file). Each child field (dimension / + measure / metric) records the file it was authored in so its constants and + `use` aliases resolve against that file, not the consuming dataset's file. + Properties (e.g. `relationships`) are stamped too so their model/field + references qualify against the authoring file. Child items that already carry + a context keep it (the closest origin wins). + """ + stamped_items: list[AmlItem] = [] + for item in block.items: + if isinstance(item, AmlBlock): + stamped_items.append(_stamp_block_context(item, context) if item.context is None else item) + elif isinstance(item, AmlProperty): + stamped_items.append(item if item.context is not None else AmlProperty(item.key, item.value, context)) + else: + stamped_items.append(item) + return AmlBlock(kind=block.kind, name=block.name, items=stamped_items, context=block.context or context) def _merge_blocks(base: AmlBlock, extension: AmlBlock) -> AmlBlock: @@ -648,7 +712,16 @@ def _merge_blocks(base: AmlBlock, extension: AmlBlock) -> AmlBlock: merged_items.append(item) - return AmlBlock(kind=base.kind, name=base.name, items=merged_items) + # Preserve the defining context so child fields composed across modules keep + # resolving constants/`use` aliases against their authoring file. The + # extension wins when it overrides the base (its block is the one taking + # effect), falling back to the base's context. + return AmlBlock( + kind=base.kind, + name=base.name, + items=merged_items, + context=extension.context or base.context, + ) def _qualify_declared_name(name: str, module_prefix: str | None) -> str: @@ -1001,13 +1074,17 @@ def _parse_model_block(block: AmlBlock, constants: dict[str, AmlValue], context: for item in block.items: if isinstance(item, AmlBlock) and item.kind == "dimension": - dimension, is_primary = _parse_dimension_block(item, constants, context) + # Fields composed from a PartialModel in another file keep their own + # defining context so their constants/`use` aliases resolve correctly. + item_context = item.context or context + dimension, is_primary = _parse_dimension_block(item, constants, item_context) if dimension: dimensions.append(dimension) if is_primary and primary_key is None: primary_key = dimension.name elif isinstance(item, AmlBlock) and item.kind in {"measure", "metric"}: - metric = _parse_measure_block(item, constants, context) + item_context = item.context or context + metric = _parse_measure_block(item, constants, item_context) if metric: metrics.append(metric) @@ -1032,15 +1109,19 @@ def _parse_dimension_block( return None, False properties = _properties_from_items(block.items) + prop_contexts = _property_contexts_from_items(block.items) - dim_type_raw = _value_as_string(properties.get("type"), constants, context) - label = _value_as_string(properties.get("label"), constants, context) - description = _value_as_string(properties.get("description"), constants, context) - fmt = _value_as_string(properties.get("format"), constants, context) + def ctx_for(key: str) -> _FileContext: + return prop_contexts.get(key) or context + + dim_type_raw = _value_as_string(properties.get("type"), constants, ctx_for("type")) + label = _value_as_string(properties.get("label"), constants, ctx_for("label")) + description = _value_as_string(properties.get("description"), constants, ctx_for("description")) + fmt = _value_as_string(properties.get("format"), constants, ctx_for("format")) is_primary = _value_as_bool(properties.get("primary_key")) is True dim_type, granularity = _map_dimension_type(dim_type_raw) - sql_expr = _normalize_definition(properties.get("definition"), constants, context) + sql_expr = _normalize_definition(properties.get("definition"), constants, ctx_for("definition")) if sql_expr == block.name: sql_expr = None @@ -1063,13 +1144,19 @@ def _parse_measure_block(block: AmlBlock, constants: dict[str, AmlValue], contex return None properties = _properties_from_items(block.items) + prop_contexts = _property_contexts_from_items(block.items) - label = _value_as_string(properties.get("label"), constants, context) - description = _value_as_string(properties.get("description"), constants, context) - fmt = _value_as_string(properties.get("format"), constants, context) + def ctx_for(key: str) -> _FileContext: + return prop_contexts.get(key) or context + + label = _value_as_string(properties.get("label"), constants, ctx_for("label")) + description = _value_as_string(properties.get("description"), constants, ctx_for("description")) + fmt = _value_as_string(properties.get("format"), constants, ctx_for("format")) - aggregation_type = _normalize_agg_type(_value_as_string(properties.get("aggregation_type"), constants, context)) - expr = _normalize_definition(properties.get("definition"), constants, context) + aggregation_type = _normalize_agg_type( + _value_as_string(properties.get("aggregation_type"), constants, ctx_for("aggregation_type")) + ) + expr = _normalize_definition(properties.get("definition"), constants, ctx_for("definition")) agg_map = { "count": "count", @@ -1136,6 +1223,195 @@ def _parse_measure_block(block: AmlBlock, constants: dict[str, AmlValue], contex ) +def _collect_dataset_definitions( + documents: Iterable[_AmlDocument], +) -> tuple[ + dict[str, tuple[AmlBlock, _FileContext]], + dict[str, tuple[AmlBlock, _FileContext]], + dict[str, tuple[AmlBlock, _FileContext]], + dict[str, tuple[ObjectAssignment, _FileContext]], +]: + """Collect Dataset / PartialDataset / standalone Metric blocks and Dataset assignments. + + Returns (dataset_blocks, partial_dataset_blocks, metric_blocks, dataset_assignments). + """ + dataset_blocks: dict[str, tuple[AmlBlock, _FileContext]] = {} + partial_dataset_blocks: dict[str, tuple[AmlBlock, _FileContext]] = {} + metric_blocks: dict[str, tuple[AmlBlock, _FileContext]] = {} + dataset_assignments: dict[str, tuple[ObjectAssignment, _FileContext]] = {} + + for document in documents: + context = document.context + for item in document.items: + if isinstance(item, AmlBlock) and item.name: + if item.kind == "Dataset": + name = _qualify_declared_name(item.name, context.module_prefix) + dataset_blocks[name] = (AmlBlock(kind="Dataset", name=name, items=item.items), context) + elif item.kind == "PartialDataset": + name = _qualify_declared_name(item.name, context.module_prefix) + partial_dataset_blocks[name] = ( + AmlBlock(kind="PartialDataset", name=name, items=item.items), + context, + ) + elif item.kind == "Metric": + name = _qualify_declared_name(item.name, context.module_prefix) + metric_blocks[name] = (AmlBlock(kind="Metric", name=name, items=item.items), context) + continue + if isinstance(item, ObjectAssignment) and item.kind in {"Dataset", "PartialDataset"}: + name = _qualify_declared_name(item.name, context.module_prefix) + dataset_assignments[name] = (item, context) + elif isinstance(item, ObjectAssignment) and item.kind == "Metric": + # Inline assignment form: `Metric x = Metric { ... }`. Materialize + # the value's items into a metric block, matching block-form metrics. + value = item.value + if isinstance(value, (TypedBlock, InlineBlock)): + name = _qualify_declared_name(item.name, context.module_prefix) + metric_blocks[name] = (AmlBlock(kind="Metric", name=name, items=value.items), context) + + return dataset_blocks, partial_dataset_blocks, metric_blocks, dataset_assignments + + +def _resolve_dataset_artifacts( + documents: Iterable[_AmlDocument], + constants: dict[str, AmlValue], + existing_models: dict[str, Model], +) -> tuple[dict[str, Model], dict[str, Metric], list[tuple[AmlBlock, _FileContext]]]: + """Resolve dataset-level fields and standalone metrics into models / graph metrics. + + Also returns the resolved assignment-form Dataset blocks (with their + relationships property intact) so the relationship pass can attach join + edges declared inside inline `Dataset x = Dataset { ... }` assignments. + """ + documents = list(documents) + dataset_blocks, partial_dataset_blocks, metric_blocks, dataset_assignments = _collect_dataset_definitions(documents) + + # PartialDataset blocks resolve like partial models for .extend() composition. + resolved_blocks: dict[str, AmlBlock] = {} + resolving: set[str] = set() + + def resolve_named_block(name: str) -> AmlBlock | None: + if name in resolved_blocks: + return resolved_blocks[name] + if name in resolving: + return None + if name in dataset_blocks: + block, block_ctx = dataset_blocks[name] + stamped = _stamp_block_context(block, block_ctx) + resolved_blocks[name] = stamped + return stamped + if name in partial_dataset_blocks: + block, block_ctx = partial_dataset_blocks[name] + return _stamp_block_context(block, block_ctx) + assignment_entry = dataset_assignments.get(name) + if assignment_entry: + assignment, ctx = assignment_entry + resolving.add(name) + resolved_value = _resolve_block_from_value(assignment.value, ctx, resolve_named_block) + resolving.remove(name) + if resolved_value: + resolved_blocks[name] = AmlBlock(kind="Dataset", name=name, items=resolved_value.items) + return resolved_blocks[name] + return None + + dataset_models: dict[str, Model] = {} + + for name, (block, context) in dataset_blocks.items(): + model = _parse_dataset_block(block, constants, context) + if model and model.name not in existing_models: + dataset_models[model.name] = model + + # Resolved assignment-form dataset blocks, surfaced so the relationship pass + # can attach join edges declared inside them. + assignment_dataset_blocks: list[tuple[AmlBlock, _FileContext]] = [] + + for name, (assignment, context) in dataset_assignments.items(): + # PartialDataset assignments are reusable composition fragments (consumed + # via .extend()), not queryable datasets, mirroring named PartialDataset + # blocks which never surface as standalone models. + if assignment.kind == "PartialDataset": + continue + block = resolve_named_block(name) + if not block: + continue + block_with_name = AmlBlock(kind="Dataset", name=name, items=block.items) + assignment_dataset_blocks.append((block_with_name, context)) + model = _parse_dataset_block(block_with_name, constants, context) + if model and model.name not in existing_models: + dataset_models[model.name] = model + + # Standalone Metric blocks become graph-level metrics. + standalone_metrics: dict[str, Metric] = {} + for name, (block, context) in metric_blocks.items(): + metric_block = AmlBlock(kind="metric", name=block.name, items=block.items) + metric = _parse_measure_block(metric_block, constants, context) + if metric: + metric.name = name + standalone_metrics[name] = metric + + return dataset_models, standalone_metrics, assignment_dataset_blocks + + +def _parse_dataset_block(block: AmlBlock, constants: dict[str, AmlValue], context: _FileContext) -> Model | None: + """Parse a Dataset block's dataset-level dimension/metric blocks into a Model. + + Dataset dimensions and metrics use cross-model AQL definitions (the only + style Holistics allows in datasets). They are surfaced on a synthetic model + named after the dataset so they are not silently dropped. + """ + if not block.name: + return None + + properties = _properties_from_items(block.items) + label = _value_as_string(properties.get("label"), constants, context) + description = _value_as_string(properties.get("description"), constants, context) + + dimensions: list[Dimension] = [] + metrics: list[Metric] = [] + + for item in block.items: + if not isinstance(item, AmlBlock): + continue + # A child field carries its own defining context when the dataset was + # composed across modules (Dataset extending a PartialDataset from another + # file); resolve its constants/`use` aliases against that file. + item_context = item.context or context + if item.kind == "dimension": + dimension, _ = _parse_dimension_block(item, constants, item_context) + if not dimension: + continue + # Dataset dimensions usually combine fields across models, but some + # are authored with an aggregate AQL (e.g. count(...)). An aggregate + # cannot be a groupable dimension, so surface it as a derived metric. + if _sql_is_aggregate(dimension.sql): + metrics.append( + Metric( + name=dimension.name, + type="derived", + sql=dimension.sql, + label=dimension.label, + description=dimension.description, + format=dimension.format, + ) + ) + else: + dimensions.append(dimension) + elif item.kind in {"measure", "metric"}: + metric = _parse_measure_block(item, constants, item_context) + if metric: + metrics.append(metric) + + if not dimensions and not metrics: + return None + + return Model( + name=block.name, + description=description or label, + primary_key="id", + dimensions=dimensions, + metrics=metrics, + ) + + def _parse_relationship_definition(block: AmlBlock, context: _FileContext) -> _AmlRelationship | None: rel = _parse_relationship_block(block, context) if not rel: @@ -1188,11 +1464,23 @@ def _parse_dataset_relationships( relationships: list[_AmlRelationship] = [] refs: list[_RelationshipRef] = [] - properties = _properties_from_items(block.items) - relationships_value = properties.get("relationships") + relationships_property: AmlProperty | None = None + for item in block.items: + if isinstance(item, AmlProperty) and item.key == "relationships": + relationships_property = item + if relationships_property is None: + return relationships, refs + + relationships_value = relationships_property.value if not isinstance(relationships_value, list): return relationships, refs + # When the `relationships` property was contributed by a PartialDataset from + # another module, it carries that module's context so its model/field + # references (e.g. `rel(orders.customer_id > customers.id)`) qualify against + # the authoring file rather than the consuming dataset's file. + context = relationships_property.context or context + for value in relationships_value: if isinstance(value, TypedBlock): if value.type_name == "RelationshipConfig": @@ -1405,6 +1693,22 @@ def _properties_from_items(items: Iterable[AmlItem]) -> dict[str, AmlValue]: return props +def _property_contexts_from_items(items: Iterable[AmlItem]) -> dict[str, _FileContext | None]: + """Map each property key to the file context it was authored in. + + When a field block is composed across modules (a PartialDataset metric from + one file extended/overridden by a partial in another), individual properties + carry their own origin context. Resolving each property against that context + keeps constants/`use` aliases (e.g. `definition: rev_def`) bound to the file + that defined them rather than the file that overrode a sibling property. + """ + contexts: dict[str, _FileContext | None] = {} + for item in items: + if isinstance(item, AmlProperty): + contexts[item.key] = item.context + return contexts + + def _normalize_definition( value: AmlValue, constants: dict[str, AmlValue] | None = None, context: _FileContext | None = None ) -> str | None: @@ -1482,7 +1786,9 @@ def _translate_aql_to_sql(expr: str) -> str: if len(segments) == 1: return _translate_aql_inline(base) - current = _replace_aql_macros(base) + # Translate the base segment too: it may itself be a function call + # (e.g. count(orders.id) | of_all(...)), not just a bare field reference. + current = _translate_aql_inline(base) for segment in segments[1:]: current = _apply_aql_pipe(current, segment.strip()) return current @@ -1610,29 +1916,65 @@ def _find_matching_paren(expr: str, start: int) -> int | None: return None +# Aggregation functions: map directly to SQL aggregates. +_AQL_AGG_SQL = { + "count": "COUNT", + "count_all": "COUNT", + "sum": "SUM", + "avg": "AVG", + "average": "AVG", + "min": "MIN", + "max": "MAX", + "median": "MEDIAN", +} + +# Table-shaping functions (filter/group/select rows of a table). At the +# SQL-fragment level the surrounding aggregation produces the value, so these +# are best-effort passed through (the base flows to the next pipe stage). +_AQL_TABLE_FUNCS = {"filter", "group", "select", "where"} + +# Metric-modifier functions that change a metric's grouping/period scope. +# We cannot express their windowing at the fragment level, so we preserve the +# inner metric expression rather than dropping it. +_AQL_METRIC_MODIFIERS = {"of_all", "exclude", "keep_grains", "relative_period", "period_to_date", "running_total"} + + def _apply_aql_function(name: str, args: list[str], base: str | None) -> str: normalized = name.strip().lower() cleaned_args = [_replace_aql_macros(arg.strip()) for arg in args if arg.strip()] target = cleaned_args[0] if cleaned_args else base or "*" - if normalized in {"count", "count_all"}: - return f"COUNT({target})" + if normalized in _AQL_AGG_SQL: + sql_func = _AQL_AGG_SQL[normalized] + # Two-arg aggregation form: sum(table, expr) aggregates expr over table. + if base is None and len(cleaned_args) >= 2: + return f"{sql_func}({cleaned_args[1]})" + return f"{sql_func}({target})" if normalized in {"count_distinct", "countdistinct"}: + if base is None and len(cleaned_args) >= 2: + return f"COUNT(DISTINCT {cleaned_args[1]})" return f"COUNT(DISTINCT {target})" - if normalized in {"sum"}: - return f"SUM({target})" - if normalized in {"avg", "average"}: - return f"AVG({target})" - if normalized in {"min"}: - return f"MIN({target})" - if normalized in {"max"}: - return f"MAX({target})" if normalized in {"count_if", "countif"}: condition = cleaned_args[0] if cleaned_args else (base or "") if not condition: return "COUNT(*)" return f"SUM(CASE WHEN {condition} THEN 1 ELSE 0 END)" + if normalized in _AQL_TABLE_FUNCS: + # Best-effort: keep the upstream table expression flowing through the + # pipeline; the aggregation that follows still produces the value. + if base is not None: + return base + if cleaned_args: + return cleaned_args[0] + return "*" + + if normalized in _AQL_METRIC_MODIFIERS: + # Preserve the metric being modified so it is not lost. The scope change + # (ignore grouping / shift period) cannot be expressed in a fragment. + inner = base if base is not None else (cleaned_args[0] if cleaned_args else "") + return inner + if base: combined_args = [base] + cleaned_args if cleaned_args else [base] else: @@ -1749,6 +2091,19 @@ def _detect_ratio(expr: str) -> tuple[str, str] | None: return None +_AGGREGATE_FUNC_RE = re.compile( + r"\b(COUNT|SUM|AVG|MIN|MAX|MEDIAN|STDDEV|STDDEV_SAMP|STDDEV_POP|VAR_SAMP|VAR_POP|VARIANCE)\s*\(", + re.IGNORECASE, +) + + +def _sql_is_aggregate(sql: str | None) -> bool: + """Return True if the SQL fragment contains a SQL aggregate function call.""" + if not sql: + return False + return bool(_AGGREGATE_FUNC_RE.search(sql)) + + def _map_dimension_type(dim_type: str | None) -> tuple[str, str | None]: if not dim_type: return "categorical", None diff --git a/tests/adapters/holistics/test_kitchen_sink.py b/tests/adapters/holistics/test_kitchen_sink.py index f155ef0a..80c05c0f 100644 --- a/tests/adapters/holistics/test_kitchen_sink.py +++ b/tests/adapters/holistics/test_kitchen_sink.py @@ -121,6 +121,87 @@ def test_relationships(self, kitchen_sink_layer): assert summary is not None + def test_dataset_level_metrics_and_dimensions(self, kitchen_sink_layer): + """Dataset-level metric/dimension blocks (AQL, cross-model) are surfaced.""" + graph = kitchen_sink_layer.graph + + # Datasets surface as models named after the dataset. + assert "kitchen_transactions" in graph.models + assert "kitchen_sink" in graph.models + + transactions = graph.models["kitchen_transactions"] + + # Dataset dimension authored with an aggregate AQL (count(...)) is + # surfaced as a derived metric, since an aggregate cannot be a + # groupable dimension. + total_buyer_orders = transactions.get_metric("total_buyer_orders") + assert total_buyer_orders is not None + assert total_buyer_orders.sql == "COUNT(kitchen_orders.order_id)" + + # Dataset metric with no aggregation_type, defined purely via @aql. + avg_order_amount = transactions.get_metric("avg_order_amount") + assert avg_order_amount is not None + assert "SUM(kitchen_orders.amount)" in avg_order_amount.sql + assert "COUNT(kitchen_orders.order_id)" in avg_order_amount.sql + + buyer_event_ratio = transactions.get_metric("buyer_event_ratio") + assert buyer_event_ratio is not None + assert "COUNT(kitchen_events.event_id)" in buyer_event_ratio.sql + assert "COUNT(DISTINCT kitchen_buyers.person_id)" in buyer_event_ratio.sql + + sink = graph.models["kitchen_sink"] + revenue_per_customer = sink.get_metric("revenue_per_customer") + assert revenue_per_customer is not None + assert "SUM(kitchen_orders.amount)" in revenue_per_customer.sql + + def test_dataset_metrics_registered_at_graph_scope(self, kitchen_sink_layer): + """Dataset metrics are first-class graph metrics, reachable without a + model prefix via graph.metrics, list_metrics(), and get_metric().""" + layer = kitchen_sink_layer + graph = layer.graph + + for name in ("avg_order_amount", "buyer_event_ratio", "total_buyer_orders"): + assert name in graph.metrics, f"{name} should be registered at graph scope" + assert name in layer.list_metrics() + assert layer.get_metric(name).name == name + + # Dataset metric from a different dataset is also surfaced. + assert "revenue_per_customer" in graph.metrics + + def test_standalone_metric_and_partial_dataset(self, kitchen_sink_layer): + """Standalone Metric blocks register as graph metrics; PartialDataset + metrics compose into a Dataset via .extend().""" + graph = kitchen_sink_layer.graph + + # Standalone top-level Metric -> graph-level metric. + assert "kitchen_global_revenue" in graph.metrics + global_revenue = graph.metrics["kitchen_global_revenue"] + assert global_revenue.label == "Global Revenue" + assert global_revenue.sql == "SUM(kitchen_orders.amount)" + + # Dataset = base.extend(partial_dataset) surfaces the partial's metrics. + assert "kitchen_metric_store" in graph.models + store = graph.models["kitchen_metric_store"] + + order_count = store.get_metric("reusable_order_count") + assert order_count is not None + assert order_count.sql == "COUNT(kitchen_orders.order_id)" + + # where() table function preserved through the pipe; count still applies. + high_value = store.get_metric("high_value_orders") + assert high_value is not None + assert high_value.sql == "COUNT(kitchen_orders.order_id)" + + # of_all() metric modifier preserves the inner aggregation. + revenue_share = store.get_metric("revenue_share") + assert revenue_share is not None + assert revenue_share.sql == "SUM(kitchen_orders.amount)" + + # relative_period() period-over-period preserves the inner aggregation. + last_month = store.get_metric("revenue_last_month") + assert last_month is not None + assert last_month.sql == "SUM(kitchen_orders.amount)" + def test_extends_merge(self, kitchen_sink_layer): extended = kitchen_sink_layer.graph.models["kitchen_orders_extended"] inline = kitchen_sink_layer.graph.models["kitchen_orders_inline"] diff --git a/tests/adapters/holistics/test_parsing.py b/tests/adapters/holistics/test_parsing.py index d12e3e63..5c0782c2 100644 --- a/tests/adapters/holistics/test_parsing.py +++ b/tests/adapters/holistics/test_parsing.py @@ -308,6 +308,432 @@ def test_holistics_aql_translation_and_macros(): temp_path.unlink() +def test_holistics_aql_table_and_metric_functions(): + """Richer AQL functions (where/filter/group/of_all/exclude/relative_period) + are handled best-effort without losing the underlying aggregation.""" + from sidemantic.adapters.holistics import _translate_aql_to_sql + + # Table functions in a pipe pass through; the aggregation still applies. + assert _translate_aql_to_sql("orders | filter(orders.amount > 100) | count(orders.id)") == "COUNT(orders.id)" + assert _translate_aql_to_sql("orders | group(orders.status) | sum(orders.amount)") == "SUM(orders.amount)" + assert _translate_aql_to_sql("orders | where(orders.status == 'x') | count(orders.id)") == "COUNT(orders.id)" + + # Metric modifiers preserve the inner metric expression. + assert _translate_aql_to_sql("count(orders.id) | of_all(products)") == "COUNT(orders.id)" + assert _translate_aql_to_sql("sum(orders.amount) | exclude(orders.status)") == "SUM(orders.amount)" + assert ( + _translate_aql_to_sql("sum(orders.amount) | relative_period(orders.created_at, interval(-1 month))") + == "SUM(orders.amount)" + ) + + # Two-argument aggregation form: sum(table, expr) aggregates expr. + assert _translate_aql_to_sql("sum(order_items, order_items.quantity * products.price)") == ( + "SUM(order_items.quantity * products.price)" + ) + + # Nested aggregation in a ratio still translates each side. + assert _translate_aql_to_sql("sum(orders.amount) / count(orders.id)") == "SUM(orders.amount) / COUNT(orders.id)" + + +def test_holistics_inline_dataset_assignment(): + """Datasets declared with an inline object assignment (Dataset foo = Dataset {...}) + are resolved, not silently dropped, surfacing their metrics at graph scope.""" + aml_content = """ +Model base_orders { + type: 'table' + table_name: 'orders' + dimension order_id { type: 'number' } + measure order_count { type: 'number' aggregation_type: 'count' } +} + +Dataset inline_ds = Dataset { + label: 'Inline DS' + models: [base_orders] + metric inline_total { + label: 'Inline Total' + type: 'number' + definition: @aql count(base_orders.order_id);; + } +} +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".aml", delete=False) as f: + f.write(aml_content) + temp_path = Path(f.name) + + try: + graph = HolisticsAdapter().parse(temp_path) + + # The inline dataset surfaces as a model and its metric is graph-scoped. + assert "inline_ds" in graph.models + assert "inline_total" in graph.metrics + assert graph.models["inline_ds"].get_metric("inline_total").sql == "COUNT(base_orders.order_id)" + finally: + temp_path.unlink() + + +def test_holistics_partial_dataset_assignment_extend(): + """A PartialDataset declared via object assignment is collected and its + metrics compose into a Dataset that extends it. The partial itself is a + composition fragment, not a standalone queryable model.""" + aml_content = """ +Model base_orders { + type: 'table' + table_name: 'orders' + dimension order_id { type: 'number' } + measure order_count { type: 'number' aggregation_type: 'count' } +} + +PartialDataset reusable = PartialDataset { + metric reusable_count { + label: 'Reusable Count' + type: 'number' + definition: @aql count(base_orders.order_id);; + } +} + +Dataset base_ds { + label: 'Base DS' + models: [base_orders] +} + +Dataset combined = base_ds.extend(reusable) +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".aml", delete=False) as f: + f.write(aml_content) + temp_path = Path(f.name) + + try: + graph = HolisticsAdapter().parse(temp_path) + + # The extending dataset surfaces with the partial's metric, graph-scoped. + assert "combined" in graph.models + assert "reusable_count" in graph.metrics + assert graph.models["combined"].get_metric("reusable_count").sql == "COUNT(base_orders.order_id)" + # The partial fragment is not a standalone queryable model. + assert "reusable" not in graph.models + finally: + temp_path.unlink() + + +def test_holistics_extend_partial_preserves_defining_context(tmp_path): + """A Dataset that extends a PartialDataset declared in another module must + resolve the partial's field definitions against the partial's own file. A + metric whose definition references a const from the partial's module should + import as that const's AQL, not the literal const identifier.""" + finance_dir = tmp_path / "modules" / "finance" + finance_dir.mkdir(parents=True) + + # The partial and the const it references live in the `finance` module. + (finance_dir / "rev.aml").write_text( + """ +const rev_def = @aql sum(base_orders.amount);; + +PartialDataset reusable = PartialDataset { + metric reusable_rev { + label: 'Reusable Rev' + type: 'number' + definition: rev_def + } +} +""" + ) + + # The extending dataset lives at the project root (no module prefix) and pulls + # the partial in via a `use` alias. + (tmp_path / "root.aml").write_text( + """ +use finance { reusable } + +Model base_orders { + type: 'table' + table_name: 'orders' + dimension order_id { type: 'number' } + dimension amount { type: 'number' } +} + +Dataset base_ds { + label: 'Base DS' + models: [base_orders] +} + +Dataset combined = base_ds.extend(reusable) +""" + ) + + graph = HolisticsAdapter().parse(tmp_path) + + assert "combined" in graph.models + assert "reusable_rev" in graph.metrics + # The const from the partial's module is resolved, not dropped as a literal. + assert graph.metrics["reusable_rev"].sql == "SUM(base_orders.amount)" + assert graph.models["combined"].get_metric("reusable_rev").sql == "SUM(base_orders.amount)" + + +def test_holistics_extend_override_preserves_defining_context(tmp_path): + """When a PartialDataset from another module overrides an existing field of + the same name, the merged child block must keep the overriding partial's + defining context. Otherwise the field's `definition: rev_def` const resolves + against the consuming dataset's file (where `rev_def` is unknown) and imports + as the literal `rev_def` identifier instead of the const's AQL.""" + finance_dir = tmp_path / "modules" / "finance" + finance_dir.mkdir(parents=True) + + # The overriding partial and the const it references live in the `finance` + # module. It redefines `revenue` with a definition built from `rev_def`. + (finance_dir / "rev.aml").write_text( + """ +const rev_def = @aql sum(base_orders.amount);; + +PartialDataset override = PartialDataset { + metric revenue { + label: 'Finance Revenue' + type: 'number' + definition: rev_def + } +} +""" + ) + + # The root file declares a base partial that already defines `revenue`, then + # extends it with the `finance` partial which overrides that same field. + (tmp_path / "root.aml").write_text( + """ +use finance { override } + +Model base_orders { + type: 'table' + table_name: 'orders' + dimension order_id { type: 'number' } + dimension amount { type: 'number' } +} + +PartialDataset base_part = PartialDataset { + metric revenue { + label: 'Base Revenue' + type: 'number' + definition: @aql count(base_orders.order_id);; + } +} + +Dataset combined = base_part.extend(override) +""" + ) + + graph = HolisticsAdapter().parse(tmp_path) + + assert "combined" in graph.models + assert "revenue" in graph.metrics + # The overriding field wins and its const from the `finance` module resolves, + # rather than falling back to the consuming file and importing as a literal. + assert graph.metrics["revenue"].sql == "SUM(base_orders.amount)" + assert graph.models["combined"].get_metric("revenue").sql == "SUM(base_orders.amount)" + + +def test_holistics_merge_preserves_per_property_context(tmp_path): + """When a partial from another module defines a field whose `definition` + references a const from that module, and a consuming partial overrides only a + sibling property (e.g. `label`), the merged field block carries the consumer's + block-level context. Each property must still resolve against its own authoring + file, so `definition: rev_def` resolves to the finance const's AQL instead of + importing as the literal `rev_def` identifier.""" + finance_dir = tmp_path / "modules" / "finance" + finance_dir.mkdir(parents=True) + + # The const and the field that uses it live in the `finance` module. + (finance_dir / "rev.aml").write_text( + """ +const rev_def = @aql sum(base_orders.amount);; + +PartialDataset finance_part = PartialDataset { + metric revenue { + label: 'Finance Revenue' + type: 'number' + definition: rev_def + } +} +""" + ) + + # The root partial overrides ONLY the label of `revenue`, leaving the finance + # `definition` property untouched in the merged block. + (tmp_path / "root.aml").write_text( + """ +use finance { finance_part } + +Model base_orders { + type: 'table' + table_name: 'orders' + dimension order_id { type: 'number' } + dimension amount { type: 'number' } +} + +PartialDataset root_over = PartialDataset { + metric revenue { + label: 'Root Label Override' + } +} + +Dataset combined = finance_part.extend(root_over) +""" + ) + + graph = HolisticsAdapter().parse(tmp_path) + + assert "combined" in graph.models + assert "revenue" in graph.metrics + # The override applies to the label, but the finance `definition` property keeps + # resolving against the finance module rather than the consuming root file. + assert graph.metrics["revenue"].label == "Root Label Override" + assert graph.metrics["revenue"].sql == "SUM(base_orders.amount)" + assert graph.models["combined"].get_metric("revenue").sql == "SUM(base_orders.amount)" + + +def test_holistics_extend_partial_preserves_relationship_context(tmp_path): + """When a Dataset extends a PartialDataset from another module and that partial + contributes a `relationships` property, the relationship refs must qualify + against the partial's module. Otherwise `rel(orders.customer_id > customers.id)` + resolves against the consuming root file (`orders`/`customers`) instead of the + actual `finance.orders`/`finance.customers`, so the join edge is dropped.""" + finance_dir = tmp_path / "modules" / "finance" + finance_dir.mkdir(parents=True) + + # The models and the partial that joins them all live in the `finance` module. + # Its relationship refs are written unqualified, relative to that module. + (finance_dir / "ds.aml").write_text( + """ +Model orders { + type: 'table' + table_name: 'orders' + dimension order_id { type: 'number' } + dimension customer_id { type: 'number' } + dimension amount { type: 'number' } +} + +Model customers { + type: 'table' + table_name: 'customers' + dimension id { type: 'number' } + dimension name { type: 'text' } +} + +PartialDataset joins = PartialDataset { + relationships: [ + rel(orders.customer_id > customers.id, true) + ] +} +""" + ) + + # The extending dataset lives at the project root (no module prefix) and pulls + # the partial in via a `use` alias. + (tmp_path / "root.aml").write_text( + """ +use finance { joins } + +Dataset base_ds { + label: 'Base DS' + models: [finance.orders, finance.customers] +} + +Dataset combined = base_ds.extend(joins) +""" + ) + + graph = HolisticsAdapter().parse(tmp_path) + + assert "finance.orders" in graph.models + assert "finance.customers" in graph.models + # The relationship from the partial's module resolves against `finance`, so the + # join edge lands on finance.orders -> finance.customers rather than being + # skipped because `orders`/`customers` don't exist in the graph. + rels = graph.models["finance.orders"].relationships + assert any( + r.name == "finance.customers" and r.type == "many_to_one" and r.foreign_key == "customer_id" for r in rels + ), f"expected join edge attached, got {[(r.name, r.type) for r in rels]}" + + +def test_holistics_inline_metric_assignment(): + """A standalone metric written in inline assignment form (Metric x = Metric {...}) + registers as a graph-level metric, matching block-form standalone metrics.""" + aml_content = """ +Model base_orders { + type: 'table' + table_name: 'orders' + dimension order_id { type: 'number' } + dimension amount { type: 'number' } +} + +Metric revenue = Metric { + label: 'Revenue' + type: 'number' + definition: @aql sum(base_orders.amount);; +} +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".aml", delete=False) as f: + f.write(aml_content) + temp_path = Path(f.name) + + try: + graph = HolisticsAdapter().parse(temp_path) + + assert "revenue" in graph.metrics + assert graph.metrics["revenue"].sql == "SUM(base_orders.amount)" + finally: + temp_path.unlink() + + +def test_holistics_inline_dataset_assignment_relationships(): + """Relationships declared inside an inline Dataset assignment are attached to + the referenced models, not dropped (so cross-model metrics have a join path).""" + aml_content = """ +Model rel_orders { + type: 'table' + table_name: 'orders' + dimension order_id { type: 'number' } + dimension customer_id { type: 'number' } + dimension amount { type: 'number' } +} + +Model rel_customers { + type: 'table' + table_name: 'customers' + dimension id { type: 'number' } + dimension name { type: 'text' } +} + +Dataset rel_ds = Dataset { + label: 'Rel DS' + models: [rel_orders, rel_customers] + relationships: [ + rel(rel_orders.customer_id > rel_customers.id, true) + ] + metric total_amount { + label: 'Total Amount' + type: 'number' + definition: @aql sum(rel_orders.amount);; + } +} +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".aml", delete=False) as f: + f.write(aml_content) + temp_path = Path(f.name) + + try: + graph = HolisticsAdapter().parse(temp_path) + + assert "total_amount" in graph.metrics + rels = graph.models["rel_orders"].relationships + assert any( + r.name == "rel_customers" and r.type == "many_to_one" and r.foreign_key == "customer_id" for r in rels + ), f"expected join edge attached, got {[(r.name, r.type) for r in rels]}" + finally: + temp_path.unlink() + + # ============================================================================= # RELATIONSHIP PARSING TESTS # ============================================================================= diff --git a/tests/adapters/test_added_fixture_coverage.py b/tests/adapters/test_added_fixture_coverage.py index c6abe6de..8fb423ab 100644 --- a/tests/adapters/test_added_fixture_coverage.py +++ b/tests/adapters/test_added_fixture_coverage.py @@ -175,7 +175,6 @@ "tests/fixtures/atscale_sml_kitchen_sink/model_internet_sales.yml", "tests/fixtures/atscale_sml_kitchen_sink/row_security_country.yml", "tests/fixtures/cube/rbac_views.yaml", - "tests/fixtures/holistics_kitchen_sink/transactions.dataset.aml", "tests/fixtures/lookml/lkml_model_all_fields.model.lkml", "tests/fixtures/lookml/lkml_parameter_join.model.lkml", "tests/fixtures/lookml/pylookml_aggregate_tables.model.lkml", diff --git a/tests/adapters/test_fixture_functionality_contracts.py b/tests/adapters/test_fixture_functionality_contracts.py index d961a60a..8cbe9a6a 100644 --- a/tests/adapters/test_fixture_functionality_contracts.py +++ b/tests/adapters/test_fixture_functionality_contracts.py @@ -85,9 +85,7 @@ "tests/fixtures/holistics/relationships.aml", "tests/fixtures/holistics_kitchen_sink/constants.aml", "tests/fixtures/holistics_kitchen_sink/extensions.aml", - "tests/fixtures/holistics_kitchen_sink/kitchen_sink.dataset.aml", "tests/fixtures/holistics_kitchen_sink/relationships.aml", - "tests/fixtures/holistics_kitchen_sink/transactions.dataset.aml", "tests/fixtures/lookml/ecommerce_explores.lkml", "tests/fixtures/lookml/kitchen_sink_explores.lkml", "tests/fixtures/lookml/lkml_model_all_fields.model.lkml", @@ -150,6 +148,7 @@ "source_fragments_without_fields": {"AtScaleSMLAdapter", "MalloyAdapter"}, "semantic_only_no_sources": { "AtScaleSMLAdapter", + "HolisticsAdapter", "LookMLAdapter", "MalloyAdapter", "OmniAdapter", diff --git a/tests/fixtures/holistics_kitchen_sink/metric_store.aml b/tests/fixtures/holistics_kitchen_sink/metric_store.aml new file mode 100644 index 00000000..c4e0e952 --- /dev/null +++ b/tests/fixtures/holistics_kitchen_sink/metric_store.aml @@ -0,0 +1,46 @@ +// Reusable metric store pattern from docs.holistics.io: +// - standalone top-level Metric blocks (graph-level, independently reusable) +// - a PartialDataset grouping reusable metrics +// - a base Dataset extended with the PartialDataset via .extend() +// Also exercises richer AQL functions: where(), filter(), group(), +// of_all(), exclude(), and relative_period() (period-over-period). + +Metric kitchen_global_revenue { + label: 'Global Revenue' + type: 'number' + definition: @aql sum(kitchen_orders.amount) ;; +} + +PartialDataset kitchen_reusable_metrics { + metric reusable_order_count { + label: 'Reusable Order Count' + type: 'number' + definition: @aql count(kitchen_orders.order_id) ;; + } + + metric high_value_orders { + label: 'High Value Orders' + type: 'number' + definition: @aql kitchen_orders | where(kitchen_orders.amount > 100) | count(kitchen_orders.order_id) ;; + } + + metric revenue_share { + label: 'Revenue Share' + type: 'number' + definition: @aql sum(kitchen_orders.amount) | of_all(kitchen_products) ;; + } + + metric revenue_last_month { + label: 'Revenue Last Month' + type: 'number' + definition: @aql sum(kitchen_orders.amount) | relative_period(kitchen_orders.order_date, interval(-1 month)) ;; + } +} + +Dataset kitchen_metric_base { + label: 'Metric Store Base' + data_source_name: 'demo' + models: [kitchen_orders, kitchen_products] +} + +Dataset kitchen_metric_store = kitchen_metric_base.extend(kitchen_reusable_metrics)