diff --git a/sidemantic/adapters/omni.py b/sidemantic/adapters/omni.py index 8c4693f3..209fed06 100644 --- a/sidemantic/adapters/omni.py +++ b/sidemantic/adapters/omni.py @@ -31,42 +31,134 @@ def parse(self, source: str | Path) -> SemanticGraph: Returns: Semantic graph with imported models + + The Omni export layout this handles: + + - ``views/*.view.yaml`` (current Omni) or ``views/*.yaml`` (older exports) + define views (models). When ``views/`` is absent, view files are + discovered recursively. + - ``topics/*.topic.yaml`` define topics (a base view + nested joins). Topics + are recorded on ``graph.topics`` and their joins are realized as + relationships. + - ``relationships.yaml``/``relationships.yml`` is a bare top-level *list* of + joins (current Omni). A nested ``relationships:`` key inside + ``model.yaml`` is also still supported (older exports). """ graph = SemanticGraph() + # Topics are an Omni concept with no native graph slot; expose as attribute. + graph.topics = [] source_path = Path(source) - # Collect all view .yaml files - view_files = [] + # Collect view, topic and relationship files. + view_files: list[Path] = [] + topic_files: list[Path] = [] + relationships_files: list[Path] = [] + model_files: list[Path] = [] + if source_path.is_dir(): - # Look for views in views/ subdirectory or all yaml files + # Prefer the conventional subdirectory layout, fall back to recursive. views_dir = source_path / "views" if views_dir.exists(): - view_files = list(views_dir.glob("*.yaml")) + list(views_dir.glob("*.yml")) + candidate_views = list(views_dir.glob("*.yaml")) + list(views_dir.glob("*.yml")) + else: + candidate_views = list(source_path.rglob("*.yaml")) + list(source_path.rglob("*.yml")) + + topics_dir = source_path / "topics" + if topics_dir.exists(): + topic_files = self._glob_topics(topics_dir) else: - view_files = list(source_path.rglob("*.yaml")) + list(source_path.rglob("*.yml")) + topic_files = self._glob_topics(source_path) + + # Relationships and model files live at the project root. + for candidate in ("relationships.yaml", "relationships.yml"): + candidate_path = source_path / candidate + if candidate_path.exists(): + relationships_files.append(candidate_path) + for candidate in ("model.yaml", "model.yml"): + candidate_path = source_path / candidate + if candidate_path.exists(): + model_files.append(candidate_path) + + topic_set = {p.resolve() for p in topic_files} + rel_set = {p.resolve() for p in relationships_files} + model_set = {p.resolve() for p in model_files} + for candidate in candidate_views: + resolved = candidate.resolve() + if resolved in topic_set or resolved in rel_set or resolved in model_set: + continue + if self._is_model_or_relationships_file(candidate): + continue + view_files.append(candidate) else: - view_files = [source_path] + # Single file - dispatch by suffix. + if self._is_topic_file(source_path): + topic_files = [source_path] + elif source_path.name in ("relationships.yaml", "relationships.yml"): + relationships_files = [source_path] + elif source_path.name in ("model.yaml", "model.yml"): + model_files = [source_path] + else: + view_files = [source_path] - # Parse all views + # Parse all views first so relationships/topics can attach to them. for view_file in view_files: - # Skip model files (relationships defined separately) - if "model.yaml" in str(view_file) or "model.yml" in str(view_file): - continue - model = self._parse_view(view_file) if model: graph.add_model(model) - # Parse relationships from model file if present - if source_path.is_dir(): - model_file = source_path / "model.yaml" - if not model_file.exists(): - model_file = source_path / "model.yml" - if model_file.exists(): - self._parse_relationships(model_file, graph) + # Parse a global relationships file (bare list of joins). + for relationships_file in relationships_files: + self._parse_relationships_list(self._load_relationships_list(relationships_file), graph) + + # Parse relationships nested inside a model file (older Omni layout). + for model_file in model_files: + self._parse_relationships(model_file, graph) + + # Parse topics (base view + nested joins). + for topic_file in topic_files: + self._parse_topic(topic_file, graph) return graph + @staticmethod + def _is_topic_file(path: Path) -> bool: + """Whether a path is an Omni topic file (``*.topic.yaml``/``*.topic.yml``).""" + name = path.name.lower() + return name.endswith(".topic.yaml") or name.endswith(".topic.yml") + + @classmethod + def _glob_topics(cls, directory: Path) -> list[Path]: + """Find all topic files under a directory.""" + topics = list(directory.glob("*.topic.yaml")) + list(directory.glob("*.topic.yml")) + if directory.name != "topics": + # When scanning recursively also pick up nested topic files. + topics = list(directory.rglob("*.topic.yaml")) + list(directory.rglob("*.topic.yml")) + return topics + + @classmethod + def _is_model_or_relationships_file(cls, path: Path) -> bool: + """Whether a candidate view file is actually a model/relationships file.""" + name = path.name.lower() + if name in ("model.yaml", "model.yml", "relationships.yaml", "relationships.yml"): + return True + # Topic files are handled separately; never treat them as views. + return cls._is_topic_file(path) + + @staticmethod + def _load_relationships_list(relationships_file: Path) -> list[dict[str, Any]]: + """Load a bare top-level list of joins from a relationships file.""" + with open(relationships_file) as f: + data = yaml.safe_load(f) + + if data is None: + return [] + # Current Omni: bare list. Be tolerant of a wrapping ``relationships:`` key. + if isinstance(data, dict): + data = data.get("relationships") or [] + if not isinstance(data, list): + return [] + return [rel for rel in data if isinstance(rel, dict)] + def _parse_view(self, file_path: Path) -> Model | None: """Parse Omni view YAML into Sidemantic model. @@ -82,9 +174,6 @@ def _parse_view(self, file_path: Path) -> Model | None: if not view or not isinstance(view, dict): return None - # Get view name from filename or name field - name = view.get("name") or file_path.stem - # Get table reference schema = view.get("schema") table_name = view.get("table_name") or view.get("table") @@ -96,6 +185,27 @@ def _parse_view(self, file_path: Path) -> Model | None: else: table = None + # Derive the view name. Omni references a view as ``{schema}__{table_name}`` + # when it is scoped to a schema (see the "Reference this view as ..." header + # Omni emits), otherwise by its file stem. An explicit ``name:`` always wins. + # For schema-less views the file stem is the identifier that relationships and + # topics reference, so it must be used even when ``table_name`` differs from it + # (e.g. ``views/orders.yaml`` with ``table_name: fact_orders`` is the ``orders`` + # view, not ``fact_orders``). Using ``table_name`` there would silently drop any + # ``join_from_view: orders`` relationship. + name = view.get("name") + if not name: + if schema and table_name: + name = f"{schema}__{table_name}" + else: + # Strip the ``.view`` suffix Omni adds to view filenames. + stem = file_path.name + for suffix in (".view.yaml", ".view.yml", ".yaml", ".yml"): + if stem.lower().endswith(suffix): + stem = stem[: -len(suffix)] + break + name = stem + # Get SQL for query-based views sql = view.get("sql") if not sql and "query" in view: @@ -178,22 +288,41 @@ def _parse_dimension(self, name: str, dim_def: dict[str, Any]) -> Dimension | No sql = re.sub(r"\$\{[^.]+\.([^}]+)\}", r"\1", sql) - # Handle timeframes for time dimensions - timeframes = dim_def.get("timeframes") or [] + # Handle timeframes for time dimensions. Omni allows multiple timeframes per + # time dimension; map the first to the base granularity and keep the full + # list as supported_granularities. + timeframe_mapping = { + "date": "day", + "day": "day", + "week": "week", + "month": "month", + "quarter": "quarter", + "year": "year", + "hour": "hour", + "minute": "minute", + "second": "second", + } + timeframes = dim_def.get("timeframes") granularity = None + supported_granularities = None if dim_type == "time" and timeframes: - # Map first timeframe to granularity - timeframe_mapping = { - "date": "day", - "week": "week", - "month": "month", - "quarter": "quarter", - "year": "year", - "hour": "hour", - } - if timeframes: - first_timeframe = timeframes[0] if isinstance(timeframes, list) else timeframes - granularity = timeframe_mapping.get(first_timeframe, "day") + if not isinstance(timeframes, list): + timeframes = [timeframes] + mapped = [timeframe_mapping[tf] for tf in timeframes if tf in timeframe_mapping] + if mapped: + granularity = mapped[0] + # De-duplicate while preserving order. + supported_granularities = list(dict.fromkeys(mapped)) + + # Preserve Omni-specific dimension metadata that has no first-class field. + metadata: dict[str, Any] = {} + for key in ("synonyms", "all_values", "sample_values", "suggestion_list", "bin_boundaries"): + if key in dim_def and dim_def[key] is not None: + metadata[key] = dim_def[key] + if dim_def.get("order_by_field") is not None: + metadata["order_by_field"] = dim_def["order_by_field"] + if timeframes: + metadata["timeframes"] = timeframes return Dimension( name=name, @@ -201,7 +330,10 @@ def _parse_dimension(self, name: str, dim_def: dict[str, Any]) -> Dimension | No sql=sql, label=dim_def.get("label"), granularity=granularity, + supported_granularities=supported_granularities, description=dim_def.get("description"), + format=dim_def.get("format"), + metadata=metadata or None, ) def _parse_measure(self, name: str, measure_def: dict[str, Any]) -> Metric | None: @@ -245,7 +377,9 @@ def _parse_measure(self, name: str, measure_def: dict[str, Any]) -> Metric | Non description=measure_def.get("description"), ) - # Map Omni aggregate types + # Map Omni aggregate types to Sidemantic aggregations. Omni has several + # aggregate types that Sidemantic does not model natively; map them to the + # closest supported aggregation and preserve the original in metadata. agg_type_str = measure_def.get("aggregate_type", "") type_mapping = { "count": "count", @@ -255,6 +389,14 @@ def _parse_measure(self, name: str, measure_def: dict[str, Any]) -> Metric | Non "avg": "avg", "min": "min", "max": "max", + "median": "median", + # Omni percentile/list and the *_distinct_on variants have no direct + # Sidemantic aggregation. The distinct-on measures deduplicate rows by a + # custom primary key before aggregating; collapsing them to a plain + # sum/avg/median would silently drop the dedup and overcount whenever the + # source rows fan out. Leave agg unset so they parse as derived/custom-SQL + # measures (with the original aggregate_type and custom_primary_key_sql in + # metadata) rather than mislabeling the aggregation. } agg = type_mapping.get(agg_type_str) @@ -277,11 +419,20 @@ def _parse_measure(self, name: str, measure_def: dict[str, Any]) -> Metric | Non continue for operator, value in conditions.items(): - if operator == "is": - filters.append(f"{field} = '{value}'") - elif operator == "greater_than_or_equal_to": - filters.append(f"{field} >= {value}") - # Add more operators as needed + rendered = self._render_filter(field, operator, value) + if rendered: + filters.append(rendered) + + # Preserve Omni-specific measure metadata that has no first-class field. + metadata: dict[str, Any] = {} + if agg_type_str: + metadata["aggregate_type"] = agg_type_str + if measure_def.get("synonyms") is not None: + metadata["synonyms"] = measure_def["synonyms"] + if measure_def.get("percentile") is not None: + metadata["percentile"] = measure_def["percentile"] + if measure_def.get("custom_primary_key_sql") is not None: + metadata["custom_primary_key_sql"] = measure_def["custom_primary_key_sql"] # Determine metric type metric_type = None @@ -297,8 +448,48 @@ def _parse_measure(self, name: str, measure_def: dict[str, Any]) -> Metric | Non filters=filters if filters else None, label=measure_def.get("label"), description=measure_def.get("description"), + format=measure_def.get("format"), + metadata=metadata or None, ) + @staticmethod + def _render_filter(field: str, operator: str, value: Any) -> str | None: + """Render an Omni filter condition into a SQL WHERE fragment. + + Supports the documented Omni filter operators. Unknown operators are + skipped (returns ``None``). + """ + + def quote(val: Any) -> str: + # Numbers and booleans are emitted bare; everything else is quoted. + if isinstance(val, bool): + return "TRUE" if val else "FALSE" + if isinstance(val, (int, float)): + return str(val) + return f"'{val}'" + + if operator == "is": + return f"{field} = {quote(value)}" + if operator in ("is_not", "not"): + return f"{field} != {quote(value)}" + if operator == "greater_than": + return f"{field} > {quote(value)}" + if operator == "greater_than_or_equal_to": + return f"{field} >= {quote(value)}" + if operator == "less_than": + return f"{field} < {quote(value)}" + if operator == "less_than_or_equal_to": + return f"{field} <= {quote(value)}" + if operator == "contains": + return f"{field} LIKE '%{value}%'" + if operator == "starts_with": + return f"{field} LIKE '{value}%'" + if operator == "ends_with": + return f"{field} LIKE '%{value}'" + if operator == "between" and isinstance(value, (list, tuple)) and len(value) == 2: + return f"{field} BETWEEN {quote(value[0])} AND {quote(value[1])}" + return None + def _parse_time_offset_to_comparison(self, offset: str) -> str: """Parse Omni time offset string to comparison_type. @@ -358,7 +549,7 @@ def _extract_base_metric_name(self, comparison_name: str) -> str: return comparison_name def _parse_relationships(self, model_file: Path, graph: SemanticGraph) -> None: - """Parse relationships from Omni model file. + """Parse relationships nested inside an Omni model file (older layout). Args: model_file: Path to model.yaml file @@ -367,50 +558,160 @@ def _parse_relationships(self, model_file: Path, graph: SemanticGraph) -> None: with open(model_file) as f: model_def = yaml.safe_load(f) - if not model_def: + if not model_def or not isinstance(model_def, dict): return relationships_list = model_def.get("relationships") or [] + self._parse_relationships_list(relationships_list, graph) + + def _parse_relationships_list(self, relationships_list: list[dict[str, Any]], graph: SemanticGraph) -> None: + """Parse a list of Omni join definitions into relationships. + + Args: + relationships_list: List of join dicts (``join_from_view``, + ``join_to_view``, ``relationship_type``, ``on_sql``, ...). + graph: Semantic graph to add relationships to + """ + # Omni cardinalities mapped to Sidemantic relationship types. + # ``assumed_many_to_one`` is Omni's auto-inferred variant of many_to_one. + type_mapping = { + "one_to_one": "one_to_one", + "many_to_one": "many_to_one", + "assumed_many_to_one": "many_to_one", + "one_to_many": "one_to_many", + "many_to_many": "many_to_many", + } for rel_def in relationships_list: + if not isinstance(rel_def, dict): + continue + from_view = rel_def.get("join_from_view") to_view = rel_def.get("join_to_view") if not from_view or not to_view: continue - # Get relationship type rel_type_str = rel_def.get("relationship_type", "many_to_one") - type_mapping = { - "one_to_one": "one_to_one", - "many_to_one": "many_to_one", - "one_to_many": "one_to_many", - "many_to_many": "many_to_many", - } rel_type = type_mapping.get(rel_type_str, "many_to_one") - # Extract foreign key from on_sql - on_sql = rel_def.get("on_sql", "") - foreign_key = None - if on_sql: - import re - - # Try to extract ${from_view.column} = ${to_view.column} - matches = re.findall(r"\$\{([^.]+)\.([^}]+)\}", on_sql) - for view, column in matches: - if view == from_view: - foreign_key = column - break + # Extract foreign/primary keys from on_sql: ${from.col} = ${to.col} + foreign_key, primary_key = self._keys_from_on_sql(rel_def.get("on_sql", ""), from_view, to_view, rel_type) + + # Preserve Omni join metadata with no first-class field. + metadata: dict[str, Any] = {} + if rel_def.get("join_type") is not None: + metadata["join_type"] = rel_def["join_type"] + if rel_def.get("reversible") is not None: + metadata["reversible"] = rel_def["reversible"] + if rel_type_str == "assumed_many_to_one": + metadata["assumed"] = True - # Add relationship to from_view if from_view in graph.models: relationship = Relationship( name=to_view, type=rel_type, foreign_key=foreign_key, + primary_key=primary_key, + metadata=metadata or None, ) graph.models[from_view].relationships.append(relationship) + @staticmethod + def _keys_from_on_sql( + on_sql: str, from_view: str, to_view: str, rel_type: str = "many_to_one" + ) -> tuple[str | None, str | None]: + """Extract ``(foreign_key, primary_key)`` from an Omni ``on_sql`` join. + + The relationship is always attached to ``from_view`` with ``name=to_view``, + but Sidemantic's interpretation of ``foreign_key``/``primary_key`` depends on + the cardinality (see ``Relationship``): + + - ``many_to_one`` (from_view holds the FK): ``foreign_key`` is the from_view + column and ``primary_key`` is the to_view column. + - ``one_to_many`` / ``one_to_one`` (to_view holds the FK): ``foreign_key`` is + the to_view column (the FK in the related model) and ``primary_key`` is the + from_view column (the local key). Assigning these the other way around + reverses the join and produces invalid SQL. + """ + from_column = None + to_column = None + if on_sql: + import re + + matches = re.findall(r"\$\{([^.]+)\.([^}]+)\}", on_sql) + for view, column in matches: + if view == from_view and from_column is None: + from_column = column + elif view == to_view and to_column is None: + to_column = column + + if rel_type in ("one_to_many", "one_to_one"): + # to_view (related model) holds the foreign key; from_view contributes its key. + return to_column, from_column + # many_to_one (and default): from_view holds the foreign key. + return from_column, to_column + + def _parse_topic(self, topic_file: Path, graph: SemanticGraph) -> None: + """Parse an Omni topic file (base view + nested joins). + + Topics are recorded on ``graph.topics`` and their joins are realized as + ``many_to_one`` relationships from each parent view to its joined views + (mirroring how Omni traverses joins from a base view). + + Args: + topic_file: Path to ``*.topic.yaml`` file + graph: Semantic graph to add topic + relationships to + """ + with open(topic_file) as f: + topic_def = yaml.safe_load(f) + + if not topic_def or not isinstance(topic_def, dict): + return + + base_view = topic_def.get("base_view") + if not base_view: + return + + # Flatten the nested joins map into (parent_view, joined_view) edges. + joins = topic_def.get("joins") or {} + joined_views: list[str] = [] + edges: list[tuple[str, str]] = [] + + def walk(parent: str, joins_map: Any) -> None: + if not isinstance(joins_map, dict): + return + for joined_view, nested in joins_map.items(): + joined_views.append(joined_view) + edges.append((parent, joined_view)) + walk(joined_view, nested) + + walk(base_view, joins) + + topic_record = { + "name": topic_file.name.lower().split(".topic.")[0] or topic_file.stem, + "label": topic_def.get("label"), + "description": topic_def.get("description"), + "base_view": base_view, + "joined_views": joined_views, + } + graph.topics.append(topic_record) + + # Realize the join graph as relationships (skip duplicates already present). + for parent, joined_view in edges: + parent_model = graph.models.get(parent) + if parent_model is None: + continue + if any(rel.name == joined_view for rel in parent_model.relationships): + continue + parent_model.relationships.append( + Relationship( + name=joined_view, + type="many_to_one", + metadata={"source": "topic", "topic": topic_record["name"]}, + ) + ) + def export(self, graph: SemanticGraph, output_path: str | Path) -> None: """Export semantic graph to Omni view format. @@ -651,9 +952,16 @@ def _export_relationships(self, models: dict[str, Model], output_dir: Path) -> N } rel_def["relationship_type"] = type_mapping.get(rel.type, "many_to_one") - # Build on_sql - from_key = rel.foreign_key or f"{rel.name}_id" - to_key = rel.primary_key or "id" + # Build on_sql. Which side holds the foreign vs. primary key + # depends on cardinality (see Relationship): for many_to_one the + # from_view holds the FK; for one_to_many / one_to_one the to_view + # (related model) holds the FK and the from_view contributes its key. + if rel.type in ("one_to_many", "one_to_one"): + from_key = rel.primary_key or "id" + to_key = rel.foreign_key or f"{model.name}_id" + else: + from_key = rel.foreign_key or f"{rel.name}_id" + to_key = rel.primary_key or "id" rel_def["on_sql"] = f"${{{model.name}.{from_key}}} = ${{{rel.name}.{to_key}}}" relationships.append(rel_def) diff --git a/tests/adapters/omni/test_estore.py b/tests/adapters/omni/test_estore.py index bfef6c19..2ce458a5 100644 --- a/tests/adapters/omni/test_estore.py +++ b/tests/adapters/omni/test_estore.py @@ -1,10 +1,13 @@ """Tests for estore-analytics Omni fixtures. Fixtures sourced from vbalalian/estore-analytics (MIT license). -Tests parsing of real-world Omni views with advanced features: -bin_boundaries, all_values, sample_values, format, filtered measures, -computed measures, synonyms, custom SQL dims, funnel measures, ratio measures, -hierarchical categories, RFM scores, and SCD Type 2 snapshots. +Tests parsing of a real-world Omni export with advanced features: +- a global ``relationships.yaml`` file (bare top-level list of joins), +- ``*.topic.yaml`` topic files (base view + nested joins), +- ``*.view.yaml`` views named by Omni's ``{schema}__{table_name}`` convention, +- dimension/measure metadata: bin_boundaries, all_values, sample_values, format, + synonyms, filtered measures, computed measures, custom SQL dims, funnel + measures, ratio measures, hierarchical categories, and RFM scores. """ from pathlib import Path @@ -32,21 +35,22 @@ def test_estore_loads_all_views(estore_graph): """All 6 estore view files parse into models.""" model_names = sorted(estore_graph.models.keys()) assert len(model_names) == 6 - # Names come from file stems (dim_users.view.yaml -> dim_users.view) + # Views are named by Omni's reference convention {schema}__{table_name} + # (e.g. dim_users.view.yaml in schema omni_dbt_marts -> omni_dbt_marts__dim_users). for expected in [ - "dim_categories.view", - "dim_products.view", - "dim_user_rfm.view", - "dim_users.view", - "fct_events.view", - "fct_sessions.view", + "omni_dbt_marts__dim_categories", + "omni_dbt_marts__dim_products", + "omni_dbt_marts__dim_user_rfm", + "omni_dbt_marts__dim_users", + "omni_dbt_marts__fct_events", + "omni_dbt_marts__fct_sessions", ]: assert expected in model_names, f"Missing model: {expected}" def test_estore_dim_users_dimensions(estore_graph): """dim_users has 24 dimensions including custom SQL and bin_boundaries.""" - model = estore_graph.models["dim_users.view"] + model = estore_graph.models["omni_dbt_marts__dim_users"] dim_names = [d.name for d in model.dimensions] # Basic dims @@ -70,7 +74,7 @@ def test_estore_dim_users_dimensions(estore_graph): def test_estore_dim_users_measures(estore_graph): """dim_users has 16 measures including filtered and computed.""" - model = estore_graph.models["dim_users.view"] + model = estore_graph.models["omni_dbt_marts__dim_users"] metric_names = [m.name for m in model.metrics] # Standard aggregates @@ -93,7 +97,7 @@ def test_estore_dim_users_measures(estore_graph): def test_estore_fct_events_dimensions(estore_graph): """fct_events has 16 dimensions including event metadata.""" - model = estore_graph.models["fct_events.view"] + model = estore_graph.models["omni_dbt_marts__fct_events"] dim_names = [d.name for d in model.dimensions] assert "event_id" in dim_names @@ -114,7 +118,7 @@ def test_estore_fct_events_dimensions(estore_graph): def test_estore_fct_events_measures(estore_graph): """fct_events has 13 measures including count_distinct and filtered.""" - model = estore_graph.models["fct_events.view"] + model = estore_graph.models["omni_dbt_marts__fct_events"] metric_names = [m.name for m in model.metrics] # count_distinct measures @@ -136,7 +140,7 @@ def test_estore_fct_events_measures(estore_graph): def test_estore_fct_sessions_dimensions(estore_graph): """fct_sessions has 19 dimensions including custom SQL dims.""" - model = estore_graph.models["fct_sessions.view"] + model = estore_graph.models["omni_dbt_marts__fct_sessions"] dim_names = [d.name for d in model.dimensions] # Funnel dim @@ -152,7 +156,7 @@ def test_estore_fct_sessions_dimensions(estore_graph): def test_estore_fct_sessions_measures(estore_graph): """fct_sessions has 10 measures including funnel and ratio measures.""" - model = estore_graph.models["fct_sessions.view"] + model = estore_graph.models["omni_dbt_marts__fct_sessions"] metric_names = [m.name for m in model.metrics] # Funnel measures @@ -172,7 +176,7 @@ def test_estore_fct_sessions_measures(estore_graph): def test_estore_dim_products(estore_graph): """dim_products has brand/category dims and a count measure.""" - model = estore_graph.models["dim_products.view"] + model = estore_graph.models["omni_dbt_marts__dim_products"] dim_names = [d.name for d in model.dimensions] assert "brand" in dim_names @@ -186,7 +190,7 @@ def test_estore_dim_products(estore_graph): def test_estore_dim_categories(estore_graph): """dim_categories has hierarchical category levels.""" - model = estore_graph.models["dim_categories.view"] + model = estore_graph.models["omni_dbt_marts__dim_categories"] dim_names = [d.name for d in model.dimensions] assert "category_lvl_1" in dim_names @@ -200,7 +204,7 @@ def test_estore_dim_categories(estore_graph): def test_estore_dim_user_rfm(estore_graph): """dim_user_rfm has RFM score dimensions.""" - model = estore_graph.models["dim_user_rfm.view"] + model = estore_graph.models["omni_dbt_marts__dim_user_rfm"] dim_names = [d.name for d in model.dimensions] assert "recency_score" in dim_names @@ -221,12 +225,12 @@ def test_estore_dim_user_rfm(estore_graph): def test_estore_table_references(estore_graph): """All estore views have schema.table_name format.""" expected_tables = { - "dim_users.view": "omni_dbt_marts.dim_users", - "fct_events.view": "omni_dbt_marts.fct_events", - "fct_sessions.view": "omni_dbt_marts.fct_sessions", - "dim_products.view": "omni_dbt_marts.dim_products", - "dim_categories.view": "omni_dbt_marts.dim_categories", - "dim_user_rfm.view": "omni_dbt_marts.dim_user_rfm", + "omni_dbt_marts__dim_users": "omni_dbt_marts.dim_users", + "omni_dbt_marts__fct_events": "omni_dbt_marts.fct_events", + "omni_dbt_marts__fct_sessions": "omni_dbt_marts.fct_sessions", + "omni_dbt_marts__dim_products": "omni_dbt_marts.dim_products", + "omni_dbt_marts__dim_categories": "omni_dbt_marts.dim_categories", + "omni_dbt_marts__dim_user_rfm": "omni_dbt_marts.dim_user_rfm", } for model_name, expected_table in expected_tables.items(): model = estore_graph.models[model_name] @@ -241,15 +245,15 @@ def test_estore_table_references(estore_graph): def test_estore_primary_keys(estore_graph): """Views with primary_key: true are detected.""" # fct_events has event_id as primary key - events = estore_graph.models["fct_events.view"] + events = estore_graph.models["omni_dbt_marts__fct_events"] assert events.primary_key == "event_id" # dim_products has product_id as primary key - products = estore_graph.models["dim_products.view"] + products = estore_graph.models["omni_dbt_marts__dim_products"] assert products.primary_key == "product_id" # dim_categories has raw_category_id as primary key - categories = estore_graph.models["dim_categories.view"] + categories = estore_graph.models["omni_dbt_marts__dim_categories"] assert categories.primary_key == "raw_category_id" @@ -260,7 +264,7 @@ def test_estore_primary_keys(estore_graph): def test_estore_count_distinct_measures(estore_graph): """count_distinct aggregation is correctly parsed.""" - events = estore_graph.models["fct_events.view"] + events = estore_graph.models["omni_dbt_marts__fct_events"] unique_users = events.get_metric("unique_users") assert unique_users.agg == "count_distinct" @@ -275,7 +279,7 @@ def test_estore_count_distinct_measures(estore_graph): def test_estore_filtered_measures(estore_graph): """Filtered measures parse the filter conditions.""" - events = estore_graph.models["fct_events.view"] + events = estore_graph.models["omni_dbt_marts__fct_events"] purchase_count = events.get_metric("purchase_count") assert purchase_count.agg == "count" @@ -293,7 +297,7 @@ def test_estore_filtered_measures(estore_graph): def test_estore_dim_users_filtered_measures(estore_graph): """dim_users filtered measures (churned_user_count, purchaser_count).""" - users = estore_graph.models["dim_users.view"] + users = estore_graph.models["omni_dbt_marts__dim_users"] churned = users.get_metric("churned_user_count") assert churned.agg == "count" @@ -311,13 +315,13 @@ def test_estore_dim_users_filtered_measures(estore_graph): def test_estore_derived_measures(estore_graph): """Computed measures without aggregate_type parse as derived.""" - events = estore_graph.models["fct_events.view"] + events = estore_graph.models["omni_dbt_marts__fct_events"] purchase_rate = events.get_metric("purchase_rate") assert purchase_rate.sql is not None assert "is_purchase" in purchase_rate.sql - users = estore_graph.models["dim_users.view"] + users = estore_graph.models["omni_dbt_marts__dim_users"] churn_rate = users.get_metric("churn_rate") assert churn_rate.sql is not None @@ -325,7 +329,7 @@ def test_estore_derived_measures(estore_graph): def test_estore_sessions_ratio_measures(estore_graph): """fct_sessions ratio measures have SQL expressions.""" - sessions = estore_graph.models["fct_sessions.view"] + sessions = estore_graph.models["omni_dbt_marts__fct_sessions"] conversion_rate = sessions.get_metric("conversion_rate") assert conversion_rate.sql is not None @@ -345,7 +349,7 @@ def test_estore_sessions_ratio_measures(estore_graph): def test_estore_sql_references_cleaned(estore_graph): """${view.field} references are cleaned to just field names.""" - events = estore_graph.models["fct_events.view"] + events = estore_graph.models["omni_dbt_marts__fct_events"] sum_revenue = events.get_metric("sum_revenue") # ${omni_dbt_marts__fct_events.revenue} should become just "revenue" @@ -355,7 +359,7 @@ def test_estore_sql_references_cleaned(estore_graph): def test_estore_custom_sql_dims_cleaned(estore_graph): """Custom SQL dimensions have ${view.field} references cleaned.""" - sessions = estore_graph.models["fct_sessions.view"] + sessions = estore_graph.models["omni_dbt_marts__fct_sessions"] converting = sessions.get_dimension("is_converting_session") assert converting.sql is not None @@ -370,18 +374,18 @@ def test_estore_custom_sql_dims_cleaned(estore_graph): def test_estore_model_descriptions(estore_graph): """Models pick up description from view YAML.""" - events = estore_graph.models["fct_events.view"] + events = estore_graph.models["omni_dbt_marts__fct_events"] assert events.description is not None assert "events" in events.description.lower() - sessions = estore_graph.models["fct_sessions.view"] + sessions = estore_graph.models["omni_dbt_marts__fct_sessions"] assert sessions.description is not None assert "session" in sessions.description.lower() def test_estore_dimension_descriptions(estore_graph): """Dimensions carry their descriptions through.""" - events = estore_graph.models["fct_events.view"] + events = estore_graph.models["omni_dbt_marts__fct_events"] event_type = events.get_dimension("event_type") assert event_type.description is not None assert "event" in event_type.description.lower() @@ -389,7 +393,7 @@ def test_estore_dimension_descriptions(estore_graph): def test_estore_metric_labels(estore_graph): """Metric labels are preserved.""" - events = estore_graph.models["fct_events.view"] + events = estore_graph.models["omni_dbt_marts__fct_events"] sum_rev = events.get_metric("sum_revenue") assert sum_rev.label == "Total Revenue" @@ -398,5 +402,161 @@ def test_estore_metric_labels(estore_graph): assert unique_users.label == "Unique Users" +# ============================================================================= +# RELATIONSHIPS (global relationships.yaml — bare top-level list of joins) +# ============================================================================= + + +def test_estore_relationships_parsed(estore_graph): + """The global relationships.yaml is parsed into model relationships. + + Before this fix the estore export yielded 6 models but 0 relationships + because the adapter only read a nested ``relationships:`` key inside + model.yaml. Omni now ships a bare top-level list in relationships.yaml. + """ + all_rels = [] + for model in estore_graph.models.values(): + for rel in model.relationships: + all_rels.append((model.name, rel.name, rel.type)) + + # 4 joins defined in relationships.yaml + assert ("omni_dbt_marts__fct_events", "omni_dbt_marts__dim_products", "many_to_one") in all_rels + assert ("omni_dbt_marts__fct_events", "omni_dbt_marts__dim_users", "many_to_one") in all_rels + assert ("omni_dbt_marts__dim_users", "omni_dbt_marts__dim_user_rfm", "one_to_one") in all_rels + assert ("omni_dbt_marts__fct_sessions", "omni_dbt_marts__dim_users", "many_to_one") in all_rels + + +def test_estore_relationship_keys_from_on_sql(estore_graph): + """Foreign and primary keys are extracted from on_sql.""" + events = estore_graph.models["omni_dbt_marts__fct_events"] + + to_products = next(r for r in events.relationships if r.name == "omni_dbt_marts__dim_products") + assert to_products.foreign_key == "product_id" + assert to_products.primary_key == "product_id" + + to_users = next(r for r in events.relationships if r.name == "omni_dbt_marts__dim_users") + assert to_users.foreign_key == "user_id" + assert to_users.primary_key == "user_id" + + +def test_estore_relationship_metadata(estore_graph): + """join_type / reversible metadata is preserved on relationships.""" + events = estore_graph.models["omni_dbt_marts__fct_events"] + to_products = next(r for r in events.relationships if r.name == "omni_dbt_marts__dim_products") + assert to_products.metadata is not None + assert to_products.metadata["join_type"] == "always_left" + assert to_products.metadata["reversible"] is False + + +def test_estore_one_to_one_relationship(estore_graph): + """dim_users -> dim_user_rfm is a one_to_one relationship.""" + users = estore_graph.models["omni_dbt_marts__dim_users"] + rfm = next(r for r in users.relationships if r.name == "omni_dbt_marts__dim_user_rfm") + assert rfm.type == "one_to_one" + + +# ============================================================================= +# TOPICS (*.topic.yaml — base view + nested joins) +# ============================================================================= + + +def test_estore_topics_parsed(estore_graph): + """All three topic files are recorded on graph.topics. + + Before this fix topics were never read (parse only globbed views/*.yaml). + """ + topics = {t["name"]: t for t in estore_graph.topics} + assert set(topics) == {"events", "customers", "sessions"} + + +def test_estore_topic_base_views_and_labels(estore_graph): + """Topics expose base_view and label.""" + topics = {t["name"]: t for t in estore_graph.topics} + + assert topics["events"]["base_view"] == "omni_dbt_marts__fct_events" + assert topics["events"]["label"] == "Events" + + assert topics["customers"]["base_view"] == "omni_dbt_marts__dim_users" + assert topics["customers"]["label"] == "Customers" + + assert topics["sessions"]["base_view"] == "omni_dbt_marts__fct_sessions" + assert topics["sessions"]["label"] == "Sessions" + + +def test_estore_topic_nested_joins_flattened(estore_graph): + """Nested joins in a topic are flattened into the joined_views list.""" + topics = {t["name"]: t for t in estore_graph.topics} + + # Events topic: dim_users -> dim_user_rfm (nested), dim_products + events_joins = set(topics["events"]["joined_views"]) + assert "omni_dbt_marts__dim_users" in events_joins + assert "omni_dbt_marts__dim_user_rfm" in events_joins # nested under dim_users + assert "omni_dbt_marts__dim_products" in events_joins + + # Sessions topic: dim_users -> dim_user_rfm (nested) + sessions_joins = set(topics["sessions"]["joined_views"]) + assert "omni_dbt_marts__dim_users" in sessions_joins + assert "omni_dbt_marts__dim_user_rfm" in sessions_joins + + +# ============================================================================= +# DIMENSION / MEASURE METADATA +# ============================================================================= + + +def test_estore_dimension_format(estore_graph): + """Dimension format is captured.""" + users = estore_graph.models["omni_dbt_marts__dim_users"] + assert users.get_dimension("total_revenue").format == "currency" + assert users.get_dimension("user_id").format == "ID" + + +def test_estore_dimension_bin_boundaries(estore_graph): + """bin_boundaries are preserved in dimension metadata.""" + users = estore_graph.models["omni_dbt_marts__dim_users"] + aov_bin = users.get_dimension("avg_order_value_bin") + assert aov_bin.metadata is not None + assert aov_bin.metadata["bin_boundaries"] == [50, 100, 200, 400] + + +def test_estore_dimension_all_values(estore_graph): + """all_values are preserved in dimension metadata.""" + users = estore_graph.models["omni_dbt_marts__dim_users"] + status = users.get_dimension("activity_status") + assert status.metadata is not None + assert status.metadata["all_values"] == ["active", "declining", "at_risk", "churned", "prospect"] + + +def test_estore_dimension_sample_values(estore_graph): + """sample_values are preserved in dimension metadata.""" + users = estore_graph.models["omni_dbt_marts__dim_users"] + flag = users.get_dimension("data_quality_flag") + assert flag.metadata is not None + assert flag.metadata["sample_values"] == ["missing_sessions", "anomalous_session_ratio"] + + +def test_estore_measure_format_and_synonyms(estore_graph): + """Measure format and synonyms are captured.""" + events = estore_graph.models["omni_dbt_marts__fct_events"] + sum_revenue = events.get_metric("sum_revenue") + assert sum_revenue.format == "BIGUSDCURRENCY_2" + assert sum_revenue.metadata is not None + assert sum_revenue.metadata["synonyms"] == ["sales"] + assert sum_revenue.metadata["aggregate_type"] == "sum" + + +def test_estore_average_aggregate_mapped(estore_graph): + """Omni 'average' aggregate_type maps to avg.""" + users = estore_graph.models["omni_dbt_marts__dim_users"] + assert users.get_metric("avg_revenue_per_user").agg == "avg" + + +def test_estore_boolean_filter_value(estore_graph): + """A measure filter on a boolean field renders TRUE/FALSE, not quoted.""" + users = estore_graph.models["omni_dbt_marts__dim_users"] + purchasers = users.get_metric("purchaser_count") + assert purchasers.filters == ["has_purchase_history = TRUE"] + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tests/adapters/omni/test_parsing.py b/tests/adapters/omni/test_parsing.py index d2e811da..72d30452 100644 --- a/tests/adapters/omni/test_parsing.py +++ b/tests/adapters/omni/test_parsing.py @@ -215,6 +215,152 @@ def test_omni_time_comparison_export(): assert has_offset, "Expected date_offset_from_query filter in exported measure" +# ============================================================================= +# RELATIONSHIP KEY DIRECTION TESTS +# ============================================================================= + + +def _write_one_to_many_fixture(tmpdir_path: Path) -> None: + """Write a customers --one_to_many--> orders fixture with distinct key names. + + The join key on each side has a different column name (``customers.id`` vs + ``orders.customer_id``) so that key-direction bugs surface (when the names + match the direction is masked). + """ + views_dir = tmpdir_path / "views" + views_dir.mkdir() + + with open(views_dir / "customers.yaml", "w") as f: + yaml.dump( + { + "name": "customers", + "table_name": "customers", + "dimensions": { + "id": {"type": "number", "sql": "${TABLE}.id", "primary_key": True}, + "name": {"type": "string", "sql": "${TABLE}.name"}, + }, + "measures": {"count": {"aggregate_type": "count"}}, + }, + f, + ) + + with open(views_dir / "orders.yaml", "w") as f: + yaml.dump( + { + "name": "orders", + "table_name": "orders", + "dimensions": { + "id": {"type": "number", "sql": "${TABLE}.id", "primary_key": True}, + "customer_id": {"type": "number", "sql": "${TABLE}.customer_id"}, + "amount": {"type": "number", "sql": "${TABLE}.amount"}, + }, + "measures": {"total": {"aggregate_type": "sum", "sql": "${orders.amount}"}}, + }, + f, + ) + + with open(tmpdir_path / "relationships.yaml", "w") as f: + yaml.dump( + [ + { + "join_from_view": "customers", + "join_to_view": "orders", + "relationship_type": "one_to_many", + "on_sql": "${customers.id} = ${orders.customer_id}", + } + ], + f, + ) + + +def test_omni_one_to_many_key_direction(tmp_path): + """one_to_many keys must keep the local/related sides straight. + + For ``customers one_to_many orders`` joined on + ``${customers.id} = ${orders.customer_id}``, Sidemantic interprets + ``primary_key`` as the local (customers) key and ``foreign_key`` as the + related (orders) key. Assigning the on_sql sides naively (from->foreign, + to->primary) reverses the join and produces invalid SQL. + """ + _write_one_to_many_fixture(tmp_path) + + adapter = OmniAdapter() + graph = adapter.parse(tmp_path) + + rel = next(r for r in graph.models["customers"].relationships if r.name == "orders") + assert rel.type == "one_to_many" + # Local (customers) key carried as primary_key; related (orders) FK as foreign_key. + assert rel.primary_key == "id" + assert rel.foreign_key == "customer_id" + + +def test_omni_one_to_many_join_sql_not_reversed(tmp_path): + """The compiled join condition must be customers.id = orders.customer_id.""" + from sidemantic import SemanticLayer + + _write_one_to_many_fixture(tmp_path) + + adapter = OmniAdapter() + graph = adapter.parse(tmp_path) + + layer = SemanticLayer() + for model in graph.models.values(): + layer.add_model(model) + + sql = layer.compile(metrics=["orders.total"], dimensions=["customers.name"]).lower() + + # Normalize whitespace so we can match the join condition regardless of layout. + flat = " ".join(sql.split()) + + # Correct join pairs customers.id with orders.customer_id. + assert "customers_cte.id = orders_cte.customer_id" in flat + # The reversed (buggy) pairing joins the customers FK to the orders PK, and + # even references a customer_id column that customers does not have. + assert "customers_cte.customer_id = orders_cte.id" not in flat + assert "customers_cte.customer_id" not in flat + + +def test_omni_one_to_many_export_round_trip(tmp_path): + """Exporting a one_to_many relationship preserves key direction on re-import.""" + from sidemantic.core.relationship import Relationship + + customers = Model( + name="customers", + table="customers", + primary_key="id", + dimensions=[Dimension(name="id", sql="id", type="numeric")], + relationships=[Relationship(name="orders", type="one_to_many", primary_key="id", foreign_key="customer_id")], + ) + orders = Model( + name="orders", + table="orders", + primary_key="id", + dimensions=[ + Dimension(name="id", sql="id", type="numeric"), + Dimension(name="customer_id", sql="customer_id", type="numeric"), + ], + ) + + graph = SemanticGraph() + graph.add_model(customers) + graph.add_model(orders) + + adapter = OmniAdapter() + adapter.export(graph, tmp_path) + + # The exported on_sql must place the local key on the customers side and the + # related FK on the orders side: ${customers.id} = ${orders.customer_id}. + model_def = yaml.safe_load((tmp_path / "model.yaml").read_text()) + on_sql = model_def["relationships"][0]["on_sql"] + assert on_sql == "${customers.id} = ${orders.customer_id}" + + reimported = adapter.parse(tmp_path) + rel = next(r for r in reimported.models["customers"].relationships if r.name == "orders") + assert rel.type == "one_to_many" + assert rel.primary_key == "id" + assert rel.foreign_key == "customer_id" + + # ============================================================================= # DIMENSION TYPE MAPPING TESTS # ============================================================================= @@ -357,6 +503,259 @@ def test_omni_measure_aggregation_types(): temp_path.unlink() +def test_omni_extended_aggregate_types(): + """Test extended Omni aggregate types map to closest Sidemantic aggregation.""" + view_def = { + "name": "test", + "table_name": "test_table", + "measures": { + "med": {"aggregate_type": "median", "sql": "${test.amount}"}, + "med_distinct": { + "aggregate_type": "median_distinct_on", + "sql": "${test.amount}", + "custom_primary_key_sql": "${test.id}", + }, + "sum_distinct": {"aggregate_type": "sum_distinct_on", "sql": "${test.amount}"}, + "avg_distinct": {"aggregate_type": "average_distinct_on", "sql": "${test.amount}"}, + "p95": {"aggregate_type": "percentile", "sql": "${test.amount}", "percentile": 95}, + "names": {"aggregate_type": "list", "sql": "${test.name}"}, + }, + } + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + yaml.dump(view_def, f) + temp_path = Path(f.name) + + try: + adapter = OmniAdapter() + graph = adapter.parse(temp_path) + model = graph.models["test"] + + # plain median maps to median + assert model.get_metric("med").agg == "median" + # *_distinct_on variants dedupe by a custom key before aggregating, which + # Sidemantic cannot model natively; they must NOT collapse to a plain + # sum/avg/median (that would silently drop the dedup and overcount on + # fan-out). They parse with agg unset and preserve the intent in metadata. + assert model.get_metric("med_distinct").agg is None + assert model.get_metric("sum_distinct").agg is None + assert model.get_metric("avg_distinct").agg is None + assert model.get_metric("sum_distinct").metadata["aggregate_type"] == "sum_distinct_on" + assert model.get_metric("avg_distinct").metadata["aggregate_type"] == "average_distinct_on" + # percentile and list have no native agg -> parsed as custom SQL (derived) + assert model.get_metric("p95").agg is None + assert model.get_metric("p95").metadata["aggregate_type"] == "percentile" + assert model.get_metric("p95").metadata["percentile"] == 95 + assert model.get_metric("names").agg is None + assert model.get_metric("names").metadata["aggregate_type"] == "list" + # original aggregate_type always preserved in metadata + assert model.get_metric("med_distinct").metadata["aggregate_type"] == "median_distinct_on" + # custom_primary_key_sql is preserved verbatim (Omni reference form retained) + assert model.get_metric("med_distinct").metadata["custom_primary_key_sql"] == "${test.id}" + finally: + temp_path.unlink() + + +def test_omni_filter_operators(): + """Test the documented Omni filter operators render to SQL.""" + view_def = { + "name": "test", + "table_name": "test_table", + "measures": { + "m_is": {"aggregate_type": "count", "filters": {"status": {"is": "open"}}}, + "m_not": {"aggregate_type": "count", "filters": {"status": {"not": "closed"}}}, + "m_gt": {"aggregate_type": "count", "filters": {"amount": {"greater_than": 100}}}, + "m_lt": {"aggregate_type": "count", "filters": {"amount": {"less_than": 10}}}, + "m_lte": {"aggregate_type": "count", "filters": {"amount": {"less_than_or_equal_to": 5}}}, + "m_contains": {"aggregate_type": "count", "filters": {"name": {"contains": "abc"}}}, + "m_starts": {"aggregate_type": "count", "filters": {"name": {"starts_with": "a"}}}, + "m_ends": {"aggregate_type": "count", "filters": {"name": {"ends_with": "z"}}}, + "m_between": {"aggregate_type": "count", "filters": {"amount": {"between": [1, 10]}}}, + }, + } + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + yaml.dump(view_def, f) + temp_path = Path(f.name) + + try: + adapter = OmniAdapter() + model = adapter.parse(temp_path).models["test"] + + assert model.get_metric("m_is").filters == ["status = 'open'"] + assert model.get_metric("m_not").filters == ["status != 'closed'"] + assert model.get_metric("m_gt").filters == ["amount > 100"] + assert model.get_metric("m_lt").filters == ["amount < 10"] + assert model.get_metric("m_lte").filters == ["amount <= 5"] + assert model.get_metric("m_contains").filters == ["name LIKE '%abc%'"] + assert model.get_metric("m_starts").filters == ["name LIKE 'a%'"] + assert model.get_metric("m_ends").filters == ["name LIKE '%z'"] + assert model.get_metric("m_between").filters == ["amount BETWEEN 1 AND 10"] + finally: + temp_path.unlink() + + +def test_omni_multiple_timeframes(): + """Test multiple timeframes set base granularity and supported_granularities.""" + view_def = { + "name": "test", + "table_name": "test_table", + "dimensions": { + "created_at": { + "type": "timestamp", + "sql": "${TABLE}.created_at", + "timeframes": ["date", "week", "month", "year"], + }, + }, + } + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + yaml.dump(view_def, f) + temp_path = Path(f.name) + + try: + adapter = OmniAdapter() + created_at = adapter.parse(temp_path).models["test"].get_dimension("created_at") + assert created_at.granularity == "day" # first timeframe "date" -> day + assert created_at.supported_granularities == ["day", "week", "month", "year"] + assert created_at.metadata["timeframes"] == ["date", "week", "month", "year"] + finally: + temp_path.unlink() + + +def test_omni_global_relationships_file(): + """Test a bare top-level relationships.yaml list (current Omni format).""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir_path = Path(tmpdir) + views_dir = tmpdir_path / "views" + views_dir.mkdir() + + with open(views_dir / "orders.yaml", "w") as f: + yaml.dump({"name": "orders", "table_name": "orders"}, f) + with open(views_dir / "customers.yaml", "w") as f: + yaml.dump({"name": "customers", "table_name": "customers"}, f) + + # Bare top-level list of joins, including an assumed_many_to_one. + relationships = [ + { + "join_from_view": "orders", + "join_to_view": "customers", + "join_type": "always_left", + "on_sql": "${orders.customer_id} = ${customers.id}", + "relationship_type": "assumed_many_to_one", + "reversible": False, + } + ] + with open(tmpdir_path / "relationships.yaml", "w") as f: + yaml.dump(relationships, f) + + adapter = OmniAdapter() + graph = adapter.parse(tmpdir_path) + + orders = graph.models["orders"] + rel = next(r for r in orders.relationships if r.name == "customers") + # assumed_many_to_one collapses to many_to_one but is flagged in metadata. + assert rel.type == "many_to_one" + assert rel.foreign_key == "customer_id" + assert rel.primary_key == "id" + assert rel.metadata["assumed"] is True + + +def test_omni_schemaless_view_name_from_file_stem(): + """Schema-less views are named by their file stem, not ``table_name``. + + Omni relationships/topics reference a schema-less view by its file name. When + ``table_name`` differs from the stem (e.g. ``views/orders.yaml`` with + ``table_name: fact_orders``), the model must still be registered as ``orders`` + so a ``join_from_view: orders`` relationship attaches instead of being dropped. + """ + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir_path = Path(tmpdir) + views_dir = tmpdir_path / "views" + views_dir.mkdir() + + # No name, no schema, and table_name differs from the file stem. + with open(views_dir / "orders.yaml", "w") as f: + yaml.dump( + { + "table_name": "fact_orders", + "dimensions": {"customer_id": {"type": "number", "sql": "${TABLE}.customer_id"}}, + }, + f, + ) + with open(views_dir / "customers.yaml", "w") as f: + yaml.dump( + { + "table_name": "dim_customers", + "dimensions": {"id": {"type": "number", "sql": "${TABLE}.id", "primary_key": True}}, + }, + f, + ) + + with open(tmpdir_path / "relationships.yaml", "w") as f: + yaml.dump( + [ + { + "join_from_view": "orders", + "join_to_view": "customers", + "relationship_type": "many_to_one", + "on_sql": "${orders.customer_id} = ${customers.id}", + } + ], + f, + ) + + adapter = OmniAdapter() + graph = adapter.parse(tmpdir_path) + + # Registered under the file stem, with the physical table preserved. + assert "orders" in graph.models + assert "fact_orders" not in graph.models + assert graph.models["orders"].table == "fact_orders" + + # The relationship referencing the stem-named view attaches (not dropped). + rel = next(r for r in graph.models["orders"].relationships if r.name == "customers") + assert rel.type == "many_to_one" + assert rel.foreign_key == "customer_id" + assert rel.primary_key == "id" + + +def test_omni_topic_realizes_relationships(): + """Test that a topic file's joins are realized as relationships.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir_path = Path(tmpdir) + views_dir = tmpdir_path / "views" + views_dir.mkdir() + topics_dir = tmpdir_path / "topics" + topics_dir.mkdir() + + for view in ("orders", "customers", "addresses"): + with open(views_dir / f"{view}.yaml", "w") as f: + yaml.dump({"name": view, "table_name": view}, f) + + topic = { + "base_view": "orders", + "label": "Orders", + "joins": {"customers": {"addresses": {}}}, + } + with open(topics_dir / "orders.topic.yaml", "w") as f: + yaml.dump(topic, f) + + adapter = OmniAdapter() + graph = adapter.parse(tmpdir_path) + + topics = {t["name"]: t for t in graph.topics} + assert "orders" in topics + assert topics["orders"]["base_view"] == "orders" + assert set(topics["orders"]["joined_views"]) == {"customers", "addresses"} + + # orders -> customers and customers -> addresses become relationships. + orders_rels = {r.name for r in graph.models["orders"].relationships} + assert "customers" in orders_rels + customers_rels = {r.name for r in graph.models["customers"].relationships} + assert "addresses" in customers_rels + + def test_omni_measure_sql_reference_cleanup(): """Test that ${view.field} SQL references are cleaned up during parse.""" view_def = { diff --git a/tests/adapters/test_added_fixture_coverage.py b/tests/adapters/test_added_fixture_coverage.py index 3aa89d3f..135ed255 100644 --- a/tests/adapters/test_added_fixture_coverage.py +++ b/tests/adapters/test_added_fixture_coverage.py @@ -193,6 +193,11 @@ "tests/fixtures/lookml/segment_attribution_model.model.lkml", "tests/fixtures/omni/estore/model.yaml", "tests/fixtures/omni/estore/relationships.yaml", + # Topic files reference views but define no models of their own; parsed + # standalone they yield an empty graph. + "tests/fixtures/omni/estore/topics/Customers.topic.yaml", + "tests/fixtures/omni/estore/topics/Events.topic.yaml", + "tests/fixtures/omni/estore/topics/sessions.topic.yaml", "tests/fixtures/rill/bids_canvas.yaml", "tests/fixtures/rill/bids_explore.yaml", "tests/fixtures/rill/nyc_trips_dashboard.yaml", @@ -210,9 +215,6 @@ "tests/fixtures/malloy/ecommerce_malloydata.malloy", "tests/fixtures/malloy/flights_cube.malloy", "tests/fixtures/malloy/ga4_config.malloy", - "tests/fixtures/omni/estore/topics/Customers.topic.yaml", - "tests/fixtures/omni/estore/topics/Events.topic.yaml", - "tests/fixtures/omni/estore/topics/sessions.topic.yaml", } ADDED_EXPECTED_NO_COMPILE_QUERY_FIXTURES = { @@ -250,16 +252,11 @@ "tests/fixtures/metricflow/scd_metrics.yaml", "tests/fixtures/omni/estore/model.yaml", "tests/fixtures/omni/estore/relationships.yaml", - "tests/fixtures/omni/estore/snapshots/snap_user_rfm.view.yaml", "tests/fixtures/omni/estore/topics/Customers.topic.yaml", "tests/fixtures/omni/estore/topics/Events.topic.yaml", "tests/fixtures/omni/estore/topics/sessions.topic.yaml", - "tests/fixtures/omni/estore/views/dim_categories.view.yaml", - "tests/fixtures/omni/estore/views/dim_products.view.yaml", - "tests/fixtures/omni/estore/views/dim_user_rfm.view.yaml", - "tests/fixtures/omni/estore/views/dim_users.view.yaml", - "tests/fixtures/omni/estore/views/fct_events.view.yaml", - "tests/fixtures/omni/estore/views/fct_sessions.view.yaml", + # Note: views/*.view.yaml and snapshots/*.view.yaml now compile (named + # {schema}__{table}, no dot in name). "tests/fixtures/rill/bids_canvas.yaml", "tests/fixtures/rill/bids_explore.yaml", "tests/fixtures/rill/nyc_trips_dashboard.yaml", diff --git a/tests/adapters/test_fixture_functionality_contracts.py b/tests/adapters/test_fixture_functionality_contracts.py index ecde29a4..72f7036d 100644 --- a/tests/adapters/test_fixture_functionality_contracts.py +++ b/tests/adapters/test_fixture_functionality_contracts.py @@ -100,6 +100,11 @@ "tests/fixtures/lookml/segment_attribution_model.model.lkml", "tests/fixtures/omni/estore/model.yaml", "tests/fixtures/omni/estore/relationships.yaml", + # Topic files reference views but define no models of their own; parsed + # standalone they yield an empty graph. + "tests/fixtures/omni/estore/topics/Customers.topic.yaml", + "tests/fixtures/omni/estore/topics/Events.topic.yaml", + "tests/fixtures/omni/estore/topics/sessions.topic.yaml", "tests/fixtures/omni/model.yaml", "tests/fixtures/rill/bids_canvas.yaml", "tests/fixtures/rill/bids_explore.yaml", @@ -127,9 +132,6 @@ "tests/fixtures/malloy/ecommerce_malloydata.malloy", "tests/fixtures/malloy/flights_cube.malloy", "tests/fixtures/malloy/ga4_config.malloy", - "tests/fixtures/omni/estore/topics/Customers.topic.yaml", - "tests/fixtures/omni/estore/topics/Events.topic.yaml", - "tests/fixtures/omni/estore/topics/sessions.topic.yaml", "tests/fixtures/tableau/real_world/document_api_tableau10.tds", "tests/fixtures/tableau/real_world/document_api_tableau93.tds", }