diff --git a/sidemantic/adapters/tableau.py b/sidemantic/adapters/tableau.py index 78def200..56163e1a 100644 --- a/sidemantic/adapters/tableau.py +++ b/sidemantic/adapters/tableau.py @@ -23,6 +23,9 @@ "date": "time", "datetime": "time", "boolean": "boolean", + # Geospatial columns (Tableau's TABLEAU.TABGEOGRAPHY); treated as + # categorical since they group/filter rather than aggregate numerically. + "spatial": "categorical", } _DATATYPE_GRANULARITY: dict[str, str] = { @@ -583,6 +586,25 @@ def _normalize_column_name(name: str) -> str: return stripped +# --- Relation type groupings --- +# Physical-layer set operations: members are stacked vertically (UNION ALL). +# "union" is an explicit multi-table union; "batch-union" is a wildcard/pattern +# union over many same-shaped tables. Both expose the same nested-relation shape. +_SET_OPERATION_RELATIONS: set[str] = {"union", "batch-union"} + +# Derived/wrapper relations that transform or wrap a single child relation +# (or raw SQL) rather than referencing a base table directly: +# - pivot: reshapes columns of its child relation (top-level pivots +# are emitted as synthesized UNPIVOT SQL; see +# _build_pivot_sql. Listed here so a pivot nested inside a +# join/union tree still resolves to its child's grain.) +# - subquery: wraps SQL as a derived subquery (like "text") +# - stored-proc: references a stored procedure as a relation +# - project: column projection over a child relation +# - text-transform: applies a text/parse transform over a child relation +_WRAPPER_RELATIONS: set[str] = {"pivot", "subquery", "stored-proc", "project", "text-transform"} + + def _extract_table_name(relation_elem: ET.Element) -> str | None: """Extract qualified table name from a element.""" table_attr = relation_elem.get("table") @@ -591,6 +613,99 @@ def _extract_table_name(relation_elem: ET.Element) -> str | None: return None +def _iter_child_relations(relation_elem: ET.Element) -> list[ET.Element]: + """Return direct child elements, tolerating namespaced tags.""" + return [child for child in relation_elem if _is_relation_tag(child.tag)] + + +def _local_tag(tag: str) -> str: + """Return the local part of a possibly namespace-prefixed XML tag.""" + local = tag + for sep in ("}", ":"): + if sep in local: + local = local.rsplit(sep, 1)[-1] + return local + + +def _extract_pivot_source_columns(relation_elem: ET.Element) -> list[str]: + """Collect the wide source columns a unpivots. + + Tableau stores the source (wide) columns being unpivoted as ```` + children (either directly under the pivot relation or nested inside each + ```` element). Returns the de-duplicated, normalized column + names in document order. Returns an empty list when no source columns are + declared (older / minimal datasources), in which case UNPIVOT SQL cannot be + synthesized. + """ + sources: list[str] = [] + seen: set[str] = set() + for elem in relation_elem.iter(): + if _local_tag(elem.tag) != "map": + continue + source = elem.get("source") + if not source: + continue + name = _normalize_column_name(source) + if name and name not in seen: + seen.add(name) + sources.append(name) + return sources + + +def _strip_derived_alias(expr: str) -> str: + """Strip a trailing "AS " from a parenthesized derived-table expression. + + ``_parse_relation_tree`` returns subquery/custom-SQL/union sources wrapped as + ``() AS x) AS t``. Strip the + outer alias so the generator supplies the single alias it expects. + + Only the outer ``() AS `` shape is unwrapped; any other expression + is returned unchanged. + """ + text = expr.strip() + if not text.startswith("("): + return expr + # Find the parenthesis that closes the leading "(", skipping parentheses that + # appear inside SQL string literals ('...') and quoted identifiers ("..."). + # Without this, a body like ``SELECT ')' AS amount`` would terminate the scan + # early and leave the outer alias attached. + depth = 0 + close_idx = -1 + quote: str | None = None + i = 0 + length = len(text) + while i < length: + ch = text[i] + if quote is not None: + if ch == quote: + # Two consecutive quote chars are an escaped quote, not a close. + if i + 1 < length and text[i + 1] == quote: + i += 2 + continue + quote = None + i += 1 + continue + if ch in ("'", '"'): + quote = ch + elif ch == "(": + depth += 1 + elif ch == ")": + depth -= 1 + if depth == 0: + close_idx = i + break + i += 1 + if close_idx == -1: + return expr + remainder = text[close_idx + 1 :].lstrip() + if remainder[:3].upper() == "AS " or remainder.upper() == "AS": + # "() AS " -> "()"; generator adds its own alias. + return text[: close_idx + 1] + return expr + + # Namespace prefixes commonly used in Tableau XML files _TABLEAU_NS_PREFIXES = [ "user", @@ -647,6 +762,27 @@ def _is_object_graph_tag(tag: str) -> bool: return tag.endswith("object-graph") and ("." in tag or ":" in tag) +def _get_attr_local(elem: ET.Element, local_name: str) -> str | None: + """Read an attribute by its local name, tolerating namespace prefixes. + + Tableau Semantics attributes (semantic-layer, is-legacy) may appear plain or + with a namespace prefix that _parse_tableau_xml rewrites to an underscored + form (e.g. "ns_is-legacy"). Match the plain name first, then any attribute + whose local part (after '}', ':' or '_') equals ``local_name``. + """ + value = elem.get(local_name) + if value is not None: + return value + for key, val in elem.attrib.items(): + candidate = key + for sep in ("}", ":"): + if sep in candidate: + candidate = candidate.rsplit(sep, 1)[-1] + if candidate == local_name or candidate.endswith("_" + local_name): + return val + return None + + def _find_relation_element(connection: ET.Element) -> ET.Element | None: """Find the element inside a connection, handling namespace prefixes. @@ -752,6 +888,12 @@ class _ObjectGraphInfo: relationships: list[Relationship] joins: list[_ObjectGraphJoin] + # Tableau Semantics-layer attributes read from the element: + # semantic_layer -> value of the "semantic-layer" attribute (e.g. "true") + # is_legacy -> value of the "is-legacy" attribute (e.g. "false") + # None when the attribute is absent (older / non-semantic datasources). + semantic_layer: str | None = None + is_legacy: str | None = None def _quote_sql_identifier(identifier: str) -> str: @@ -900,6 +1042,42 @@ def _parse_datasource(self, ds_elem: ET.Element) -> Model | None: elif rel_type == "collection": collection_info = self._parse_collection(relation) table = collection_info.base_table_qualified + elif rel_type in _SET_OPERATION_RELATIONS: + # union / batch-union: stack member relations with UNION ALL + union_sql = self._build_union_sql(relation) + if union_sql: + # Either a multi-member UNION ALL or a single member that + # resolved to a "SELECT * FROM " select. Both are + # derived SQL, not a bare table reference, so store them as + # model.sql; the generator wraps model.sql as "() AS t". + sql = union_sql + elif rel_type == "pivot": + # Pivot reshapes wide source columns into "Pivot Field Names"/ + # "Pivot Field Values" outputs. Synthesize UNPIVOT derived SQL + # so the model queries those output columns rather than the raw + # child table where they do not exist. + pivot_sql = self._build_pivot_sql(relation) + if pivot_sql: + sql = pivot_sql + else: + # Source columns not declared: fall back to the wrapped + # child table (best effort for minimal datasources). + base_table, _ = self._parse_relation_tree(relation) + table = base_table + elif rel_type in _WRAPPER_RELATIONS: + # subquery / stored-proc / project / text-transform: + # derived relations that wrap a child relation or raw SQL. + base_table, joins = self._parse_relation_tree(relation) + if joins: + sql = self._build_join_sql(base_table, joins) + relationships = self._extract_relationships(joins) + elif base_table is not None and (base_table.startswith("(") or " " in base_table): + # Returned a subquery/derived expression -> use as SQL. + # Strip any outer "AS " so the generator's own + # "() AS t" wrapping does not double-alias it. + sql = _strip_derived_alias(base_table) + else: + table = base_table # Build metadata lookup from before object-graph parsing so # collection sources can build a projected joined SQL model. @@ -951,6 +1129,17 @@ def _parse_datasource(self, ds_elem: ET.Element) -> Model | None: ) primary_key = "__tableau_pk" + # Surface Tableau Semantics-layer attributes (semantic-layer / is-legacy) + # from the object-graph as model metadata so downstream consumers can + # distinguish modern semantic models from legacy object models. + model_metadata: dict | None = None + if object_graph.semantic_layer is not None or object_graph.is_legacy is not None: + model_metadata = {} + if object_graph.semantic_layer is not None: + model_metadata["tableau_semantic_layer"] = object_graph.semantic_layer + if object_graph.is_legacy is not None: + model_metadata["tableau_is_legacy"] = object_graph.is_legacy + model = Model( name=name, table=table, @@ -960,6 +1149,7 @@ def _parse_datasource(self, ds_elem: ET.Element) -> Model | None: metrics=metrics, relationships=relationships, segments=segments, + metadata=model_metadata, ) return model @@ -1252,6 +1442,12 @@ def _parse_object_graph(self, ds_elem: ET.Element) -> _ObjectGraphInfo: if og_elem is None: return _ObjectGraphInfo(relationships=[], joins=[]) + # Read Tableau Semantics-layer attributes from the element. + # Newer "Tableau Semantics" datasources tag the object-graph with + # semantic-layer / is-legacy attributes describing the modeling layer. + semantic_layer = _get_attr_local(og_elem, "semantic-layer") + is_legacy = _get_attr_local(og_elem, "is-legacy") + # Build object-id -> table-name map from obj_map: dict[str, str] = {} objects_elem = og_elem.find("objects") @@ -1267,7 +1463,12 @@ def _parse_object_graph(self, ds_elem: ET.Element) -> _ObjectGraphInfo: joins: list[_ObjectGraphJoin] = [] rels_elem = og_elem.find("relationships") if rels_elem is None: - return _ObjectGraphInfo(relationships=[], joins=[]) + return _ObjectGraphInfo( + relationships=[], + joins=[], + semantic_layer=semantic_layer, + is_legacy=is_legacy, + ) for rel in rels_elem.findall("relationship"): # Extract join columns from expression @@ -1308,7 +1509,12 @@ def _parse_object_graph(self, ds_elem: ET.Element) -> _ObjectGraphInfo: ) ) - return _ObjectGraphInfo(relationships=relationships, joins=joins) + return _ObjectGraphInfo( + relationships=relationships, + joins=joins, + semantic_layer=semantic_layer, + is_legacy=is_legacy, + ) def _build_collection_field_sources(self, metadata_lookup: dict[str, dict]) -> dict[str, tuple[str, str]]: """Map semantic field names to logical table + physical column sources.""" @@ -1521,8 +1727,8 @@ def _parse_relation_tree(self, relation_elem: ET.Element) -> tuple[str | None, l table_name = _extract_table_name(relation_elem) return (table_name, []) - if rel_type == "text": - # Custom SQL: wrap as subquery with quoted alias + if rel_type in ("text", "subquery"): + # Custom SQL / subquery: wrap as a derived subquery with quoted alias. name = relation_elem.get("name", "") sql_body = (relation_elem.text or "").strip() if sql_body and name: @@ -1530,6 +1736,35 @@ def _parse_relation_tree(self, relation_elem: ET.Element) -> tuple[str | None, l return (f"({sql_body}) AS {quoted_name}", []) return (name or sql_body, []) + if rel_type == "stored-proc": + # Stored procedure: cannot be joined/unioned. Reference its actual + # name when available, otherwise fall back to the relation name. + sp_name = relation_elem.get("stored-proc") or relation_elem.get("name") + actual = relation_elem.find("actual-name") + if actual is not None and actual.text: + sp_name = actual.text + return (_strip_brackets(sp_name) if sp_name else None, []) + + if rel_type in _SET_OPERATION_RELATIONS: + # union / batch-union nested in a relation tree: build a subquery. + union_sql = self._build_union_sql(relation_elem) + if union_sql: + name = relation_elem.get("name", "") + quoted_name = f'"{name}"' if name and (" " in name or "(" in name) else name + alias = f" AS {quoted_name}" if quoted_name else "" + return (f"({union_sql}){alias}", []) + return (None, []) + + if rel_type in _WRAPPER_RELATIONS: + # pivot / project / text-transform wrap a single child relation. + # The transform reshapes columns but keeps a single table grain, so + # resolve to the wrapped child's base table/SQL. + children = _iter_child_relations(relation_elem) + if children: + return self._parse_relation_tree(children[0]) + # No nested relation: fall back to a referenced table if present. + return (_extract_table_name(relation_elem), []) + if rel_type != "join": return (None, []) @@ -1596,6 +1831,79 @@ def _build_join_sql(self, base_table: str | None, joins: list[_JoinInfo]) -> str return "\n".join(parts) + def _build_union_sql(self, relation_elem: ET.Element) -> str | None: + """Build UNION ALL SQL for a element. + + Tableau unions stack same-shaped member relations vertically. Each member + is a nested (typically type="table", but possibly custom SQL or + another derived relation). We resolve each member to a FROM source and + combine them with UNION ALL (Tableau unions keep duplicate rows). + """ + members = _iter_child_relations(relation_elem) + selects: list[str] = [] + for member in members: + source, joins = self._parse_relation_tree(member) + # Only flat sources (no nested joins) are valid union members. + if not source or joins: + continue + selects.append(f"SELECT * FROM {source}") + + if len(selects) < 2: + # A union needs at least two members to be meaningful; otherwise let + # the caller fall back to treating it as a single table. + return selects[0] if selects else None + + return "\nUNION ALL\n".join(selects) + + def _build_pivot_sql(self, relation_elem: ET.Element) -> str | None: + """Synthesize UNPIVOT SQL for a element. + + A Tableau pivot reshapes a set of wide source columns into two output + columns: a name column (default "Pivot Field Names") and a value column + (default "Pivot Field Values"). Resolving such a relation to its raw + child table is wrong: the imported fields are the pivot outputs, which do + not exist on the wide source table. We emit a DuckDB UNPIVOT derived + query so the generated SQL selects the pivot output columns from a source + that actually produces them. + + Returns ``None`` when the wrapped child table or the wide source columns + cannot be determined, so the caller can fall back to its prior behavior. + """ + children = _iter_child_relations(relation_elem) + if not children: + return None + child_table, child_joins = self._parse_relation_tree(children[0]) + # UNPIVOT needs a single concrete FROM source, not a joined/derived tree. + if not child_table or child_joins or child_table.startswith("(") or " " in child_table: + return None + + source_columns = _extract_pivot_source_columns(relation_elem) + if not source_columns: + return None + + # Default to Tableau's standard pivot output column names; override from + # declarations when present so a renamed pivot still maps. + name_col = "Pivot Field Names" + value_col = "Pivot Field Values" + pivot_outputs = [ + _normalize_column_name(elem.get("name", "")) + for elem in relation_elem + if _local_tag(elem.tag) == "pivot-column" and elem.get("name") + ] + for output in pivot_outputs: + lowered = output.lower() + if "name" in lowered: + name_col = output + elif "value" in lowered: + value_col = output + + on_cols = ", ".join(_quote_sql_identifier(col) for col in source_columns) + return ( + f"SELECT * FROM (UNPIVOT {_quote_table_reference(child_table)} " + f"ON {on_cols} " + f"INTO NAME {_quote_sql_identifier(name_col)} VALUE {_quote_sql_identifier(value_col)})" + ) + def _extract_relationships(self, joins: list[_JoinInfo]) -> list[Relationship]: """Extract Relationship objects from parsed joins.""" relationships: list[Relationship] = [] diff --git a/tests/adapters/tableau/test_parsing.py b/tests/adapters/tableau/test_parsing.py index 3a5713df..9617738a 100644 --- a/tests/adapters/tableau/test_parsing.py +++ b/tests/adapters/tableau/test_parsing.py @@ -432,3 +432,142 @@ def test_empty_datasource(adapter, tmp_path): empty_tds.write_text("\n\n") graph = adapter.parse(empty_tds) assert len(graph.models) == 0 + + +# ============================================================================= +# DERIVED RELATION SOURCES (union / subquery) +# ============================================================================= + +_MEASURE_COL = ( + '' +) + + +def _compiles_to_valid_duckdb_sql(model) -> str: + """Compile a single measure for the model and assert DuckDB accepts the SQL.""" + import duckdb + + sl = SemanticLayer() + sl.add_model(model) + measure = model.metrics[0].name + sql = sl.compile(metrics=[f"{model.name}.{measure}"]) + + con = duckdb.connect() + try: + con.execute("EXPLAIN " + sql) + except duckdb.Error as exc: # pragma: no cover - failure path + message = str(exc).splitlines()[0] + # A missing table is fine (fixtures reference uncreated tables); only a + # syntax/parser error means we generated invalid FROM-clause SQL. + if "does not exist" not in message and "Catalog Error" not in message: + raise AssertionError(f"DuckDB rejected generated SQL:\n{sql}\n--> {message}") from exc + return sql + + +def test_single_member_union_is_derived_sql(adapter, tmp_path): + """A union with a single usable member must produce derived SQL, not a bare + "SELECT * FROM " stored on model.table (which would emit + "FROM SELECT * FROM ...").""" + tds = tmp_path / "single_union.tds" + tds.write_text( + f""" + + + + + + + {_MEASURE_COL} +""" + ) + + graph = adapter.parse(tds) + model = graph.models["single_union"] + + # Must be stored as derived SQL, never as a bare-SELECT "table". + assert model.table is None + assert model.sql == "SELECT * FROM public.orders" + + sql = _compiles_to_valid_duckdb_sql(model) + assert "FROM (SELECT * FROM public.orders) AS t" in sql + + +def test_multi_member_union_builds_union_all(adapter, tmp_path): + """A union with multiple members stacks them with UNION ALL as derived SQL.""" + tds = tmp_path / "multi_union.tds" + tds.write_text( + f""" + + + + + + + + {_MEASURE_COL} +""" + ) + + graph = adapter.parse(tds) + model = graph.models["multi_union"] + + assert model.table is None + assert model.sql is not None + assert "UNION ALL" in model.sql + + _compiles_to_valid_duckdb_sql(model) + + +def test_top_level_subquery_relation_not_double_aliased(adapter, tmp_path): + """A top-level type='subquery' relation stores raw SELECT SQL; the generator's + own "() AS t" wrapping must not produce a double-aliased derived table.""" + tds = tmp_path / "subquery.tds" + tds.write_text( + f""" + + + SELECT * FROM users WHERE active = 1 + + {_MEASURE_COL} +""" + ) + + graph = adapter.parse(tds) + model = graph.models["subquery_ds"] + + assert model.table is None + assert model.sql is not None + # Outer "AS " must have been stripped so the generator can add "AS t". + assert not model.sql.rstrip().endswith('AS "Active Users"') + + sql = _compiles_to_valid_duckdb_sql(model) + assert 'AS "Active Users"' not in sql + + +def test_subquery_with_paren_in_string_literal_strips_alias(adapter, tmp_path): + """A subquery whose SELECT contains a ")" inside a SQL string literal must + still have its outer "AS " stripped. The alias-stripping paren scan + must skip quoted strings, otherwise the literal ")" ends the scan early and + leaves "(...) AS " double-aliased by the generator.""" + tds = tmp_path / "paren_literal.tds" + tds.write_text( + f""" + + + SELECT 1 AS amount, ')' AS marker + + {_MEASURE_COL} +""" + ) + + graph = adapter.parse(tds) + model = graph.models["paren_ds"] + + assert model.table is None + assert model.sql is not None + # The ")" literal must not have truncated alias stripping. + assert not model.sql.rstrip().endswith('AS "Weird"') + assert "')'" in model.sql + + sql = _compiles_to_valid_duckdb_sql(model) + assert 'AS "Weird"' not in sql diff --git a/tests/adapters/tableau/test_relation_types.py b/tests/adapters/tableau/test_relation_types.py new file mode 100644 index 00000000..9ecfc7f7 --- /dev/null +++ b/tests/adapters/tableau/test_relation_types.py @@ -0,0 +1,208 @@ +"""Tests for Tableau adapter handling of extended relation types and attributes. + +Covers physical-layer relation types beyond table/join/text/collection +(union, batch-union, pivot, subquery, stored-proc, project, text-transform), +the Tableau Semantics object-graph semantic-layer / is-legacy attributes, and +the spatial datatype mapping. +""" + +from pathlib import Path + +import pytest + +from sidemantic.adapters.tableau import TableauAdapter + +FIXTURES = Path(__file__).parent.parent.parent / "fixtures" / "tableau" + + +@pytest.fixture +def adapter(): + return TableauAdapter() + + +# ============================================================================= +# SET-OPERATION RELATIONS: union / batch-union +# ============================================================================= + + +def test_union_relation_builds_union_all_sql(adapter): + """union.tds: a union relation stacks members with UNION ALL.""" + graph = adapter.parse(FIXTURES / "union.tds") + + assert "union_sales" in graph.models + model = graph.models["union_sales"] + + assert model.table is None + assert model.sql is not None + assert "UNION ALL" in model.sql + assert "public.sales_2023" in model.sql + assert "public.sales_2024" in model.sql + + # Columns still imported + assert model.get_dimension("region") is not None + assert model.get_metric("amount") is not None + + +def test_batch_union_relation_builds_union_all_sql(adapter): + """batch_union.tds: a batch-union (wildcard) relation also unions members.""" + graph = adapter.parse(FIXTURES / "batch_union.tds") + + assert "monthly_logs" in graph.models + model = graph.models["monthly_logs"] + + assert model.table is None + assert model.sql is not None + # Three members -> two UNION ALL separators + assert model.sql.count("UNION ALL") == 2 + assert "jan.csv" in model.sql + assert "feb.csv" in model.sql + assert "mar.csv" in model.sql + + +# ============================================================================= +# WRAPPER RELATIONS: subquery / stored-proc / pivot / project / text-transform +# ============================================================================= + + +def test_subquery_relation_becomes_derived_sql(adapter): + """subquery.tds: a subquery relation wraps its SQL as a derived source.""" + graph = adapter.parse(FIXTURES / "subquery.tds") + + assert "active_users" in graph.models + model = graph.models["active_users"] + + assert model.sql is not None + assert "SELECT * FROM users WHERE active = true" in model.sql + assert model.get_dimension("user_id") is not None + + +def test_stored_proc_relation_resolves_actual_name(adapter): + """spatial_proc.tds: a stored-proc relation resolves to its actual name.""" + graph = adapter.parse(FIXTURES / "spatial_proc.tds") + + assert "store_locations" in graph.models + model = graph.models["store_locations"] + + # Stored procedures can't be joined/unioned; we resolve to the proc name. + assert model.table == "dbo.get_store_locations" + + +def test_pivot_relation_builds_unpivot_sql(adapter): + """pivot.tds: a pivot relation emits UNPIVOT derived SQL, not a bare reference + to the wide child table (where the pivot output columns do not exist).""" + graph = adapter.parse(FIXTURES / "pivot.tds") + + assert "pivoted_sales" in graph.models + model = graph.models["pivoted_sales"] + + # Must NOT resolve to the raw wide table: the imported fields are the pivot + # outputs, which only exist after the UNPIVOT. + assert model.table is None + assert model.sql is not None + assert "UNPIVOT" in model.sql + # Wide source columns are unpivoted into the standard output columns. + assert '"Q1"' in model.sql and '"Q4"' in model.sql + assert 'INTO NAME "Pivot Field Names" VALUE "Pivot Field Values"' in model.sql + + # Pivot output columns still imported + assert model.get_dimension("Pivot Field Names") is not None + assert model.get_metric("Pivot Field Values") is not None + + +def test_pivot_relation_sql_compiles_to_valid_duckdb(adapter): + """The synthesized UNPIVOT SQL must form a valid DuckDB derived table when the + generator wraps it as "() AS t".""" + import duckdb + + from sidemantic import SemanticLayer + + graph = adapter.parse(FIXTURES / "pivot.tds") + model = graph.models["pivoted_sales"] + + sl = SemanticLayer() + sl.add_model(model) + sql = sl.compile(metrics=["pivoted_sales.Pivot Field Values"]) + + con = duckdb.connect() + try: + con.execute("EXPLAIN " + sql) + except duckdb.Error as exc: # pragma: no cover - failure path + message = str(exc).splitlines()[0] + # A missing source table is fine (the fixture references an uncreated + # table); only a syntax/parser error means we emitted invalid SQL. + if "does not exist" not in message and "Catalog Error" not in message: + raise AssertionError(f"DuckDB rejected generated UNPIVOT SQL:\n{sql}\n--> {message}") from exc + + +def test_project_relation_resolves_to_child_table(adapter): + """project.tds: a project relation resolves to its wrapped child table.""" + graph = adapter.parse(FIXTURES / "project.tds") + + assert "projected_orders" in graph.models + model = graph.models["projected_orders"] + + assert model.table == "public.orders" + + +def test_text_transform_relation_resolves_to_child_table(adapter): + """text_transform.tds: a text-transform relation resolves to its child.""" + graph = adapter.parse(FIXTURES / "text_transform.tds") + + assert "parsed_logs" in graph.models + model = graph.models["parsed_logs"] + + assert model.table == "raw_logs.txt" + + +# ============================================================================= +# SPATIAL DATATYPE +# ============================================================================= + + +def test_spatial_datatype_maps_to_categorical(adapter): + """spatial_proc.tds: spatial datatype columns map to categorical dimensions.""" + graph = adapter.parse(FIXTURES / "spatial_proc.tds") + model = graph.models["store_locations"] + + geometry = model.get_dimension("geometry") + assert geometry is not None + assert geometry.type == "categorical" + + +# ============================================================================= +# TABLEAU SEMANTICS: object-graph semantic-layer / is-legacy attributes +# ============================================================================= + + +def test_object_graph_semantic_layer_attributes_captured(adapter): + """semantic_layer.tds: semantic-layer / is-legacy attributes -> model metadata.""" + graph = adapter.parse(FIXTURES / "semantic_layer.tds") + + assert "semantic_model" in graph.models + model = graph.models["semantic_model"] + + assert model.metadata is not None + assert model.metadata.get("tableau_semantic_layer") == "true" + assert model.metadata.get("tableau_is_legacy") == "false" + + # Relationships still extracted from the object-graph. + assert len(model.relationships) == 1 + assert model.relationships[0].name == "Customers" + + +def test_legacy_object_graph_has_no_semantic_metadata(adapter): + """Object-graph without semantic attributes leaves model metadata unset.""" + # The false_object_graph inline fixture (see test_parsing) has no + # semantic-layer / is-legacy attributes; assert via a real_world fixture + # that lacks them too. + real_world = FIXTURES / "real_world" / "thoughtspot_sf_trial.tds" + if not real_world.exists(): + pytest.skip("real_world fixtures not present") + + graph = adapter.parse(real_world) + model = graph.models.get("SF Trial") + assert model is not None + # No semantic-layer / is-legacy attributes on this object-graph. + if model.metadata is not None: + assert "tableau_semantic_layer" not in model.metadata + assert "tableau_is_legacy" not in model.metadata diff --git a/tests/fixtures/tableau/batch_union.tds b/tests/fixtures/tableau/batch_union.tds new file mode 100644 index 00000000..ccecf680 --- /dev/null +++ b/tests/fixtures/tableau/batch_union.tds @@ -0,0 +1,19 @@ + + + + + + + + + + + [Event] + [Union of Logs] + event + string + + + + + diff --git a/tests/fixtures/tableau/pivot.tds b/tests/fixtures/tableau/pivot.tds new file mode 100644 index 00000000..6dda1ccb --- /dev/null +++ b/tests/fixtures/tableau/pivot.tds @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + [Pivot Field Names] + [Pivoted] + Pivot Field Names + string + + + [Pivot Field Values] + [Pivoted] + Pivot Field Values + real + Sum + + + + + + diff --git a/tests/fixtures/tableau/project.tds b/tests/fixtures/tableau/project.tds new file mode 100644 index 00000000..b91c632b --- /dev/null +++ b/tests/fixtures/tableau/project.tds @@ -0,0 +1,17 @@ + + + + + + + + + [Order ID] + [Projected] + order_id + integer + + + + + diff --git a/tests/fixtures/tableau/semantic_layer.tds b/tests/fixtures/tableau/semantic_layer.tds new file mode 100644 index 00000000..551cd0fb --- /dev/null +++ b/tests/fixtures/tableau/semantic_layer.tds @@ -0,0 +1,51 @@ + + + + + + + + + + [Order ID] + [Orders] + order_id + integer + + + [Customer ID] + [Orders] + customer_id + integer + + + [ID] + [Customers] + id + integer + + + [Customer Name] + [Customers] + customer_name + string + + + + + + + + + + + + + + + + + + + + diff --git a/tests/fixtures/tableau/spatial_proc.tds b/tests/fixtures/tableau/spatial_proc.tds new file mode 100644 index 00000000..c3ecb0c6 --- /dev/null +++ b/tests/fixtures/tableau/spatial_proc.tds @@ -0,0 +1,24 @@ + + + + + dbo.get_store_locations + + + + [Store ID] + [get_store_locations] + store_id + integer + + + [Geometry] + [get_store_locations] + geometry + spatial + + + + + + diff --git a/tests/fixtures/tableau/subquery.tds b/tests/fixtures/tableau/subquery.tds new file mode 100644 index 00000000..3d24f9a1 --- /dev/null +++ b/tests/fixtures/tableau/subquery.tds @@ -0,0 +1,15 @@ + + + + SELECT * FROM users WHERE active = true + + + [User ID] + [Active Users] + user_id + integer + + + + + diff --git a/tests/fixtures/tableau/text_transform.tds b/tests/fixtures/tableau/text_transform.tds new file mode 100644 index 00000000..d4548225 --- /dev/null +++ b/tests/fixtures/tableau/text_transform.tds @@ -0,0 +1,17 @@ + + + + + + + + + [Message] + [Parsed] + message + string + + + + + diff --git a/tests/fixtures/tableau/union.tds b/tests/fixtures/tableau/union.tds new file mode 100644 index 00000000..50d2f393 --- /dev/null +++ b/tests/fixtures/tableau/union.tds @@ -0,0 +1,28 @@ + + + + + + + + + + [Region] + [Union of Sales] + region + string + + + [Amount] + [Union of Sales] + amount + real + Sum + + + + + + + +