Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
316 changes: 312 additions & 4 deletions sidemantic/adapters/tableau.py

Large diffs are not rendered by default.

139 changes: 139 additions & 0 deletions tests/adapters/tableau/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,3 +432,142 @@ def test_empty_datasource(adapter, tmp_path):
empty_tds.write_text("<?xml version='1.0' encoding='utf-8' ?>\n<datasource version='18.1' />\n")
graph = adapter.parse(empty_tds)
assert len(graph.models) == 0


# =============================================================================
# DERIVED RELATION SOURCES (union / subquery)
# =============================================================================

_MEASURE_COL = (
'<column caption="Amount" name="[amount]" datatype="real" role="measure" type="quantitative" aggregation="Sum" />'
)


def _compiles_to_valid_duckdb_sql(model) -> str:
"""Compile a single measure for the model and assert DuckDB accepts the SQL."""
import duckdb

sl = SemanticLayer()
sl.add_model(model)
measure = model.metrics[0].name
sql = sl.compile(metrics=[f"{model.name}.{measure}"])

con = duckdb.connect()
try:
con.execute("EXPLAIN " + sql)
except duckdb.Error as exc: # pragma: no cover - failure path
message = str(exc).splitlines()[0]
# A missing table is fine (fixtures reference uncreated tables); only a
# syntax/parser error means we generated invalid FROM-clause SQL.
if "does not exist" not in message and "Catalog Error" not in message:
raise AssertionError(f"DuckDB rejected generated SQL:\n{sql}\n--> {message}") from exc
return sql


def test_single_member_union_is_derived_sql(adapter, tmp_path):
"""A union with a single usable member must produce derived SQL, not a bare
"SELECT * FROM <source>" stored on model.table (which would emit
"FROM SELECT * FROM ...")."""
tds = tmp_path / "single_union.tds"
tds.write_text(
f"""<?xml version='1.0' encoding='utf-8' ?>
<datasource formatted-name='single_union' inline='true' version='18.1'>
<connection class='federated'>
<relation type='union' name='MyUnion'>
<relation type='table' name='t1' table='[public].[orders]' />
</relation>
</connection>
{_MEASURE_COL}
</datasource>"""
)

graph = adapter.parse(tds)
model = graph.models["single_union"]

# Must be stored as derived SQL, never as a bare-SELECT "table".
assert model.table is None
assert model.sql == "SELECT * FROM public.orders"

sql = _compiles_to_valid_duckdb_sql(model)
assert "FROM (SELECT * FROM public.orders) AS t" in sql


def test_multi_member_union_builds_union_all(adapter, tmp_path):
"""A union with multiple members stacks them with UNION ALL as derived SQL."""
tds = tmp_path / "multi_union.tds"
tds.write_text(
f"""<?xml version='1.0' encoding='utf-8' ?>
<datasource formatted-name='multi_union' inline='true' version='18.1'>
<connection class='federated'>
<relation type='union' name='MyUnion'>
<relation type='table' name='t1' table='[public].[orders_2020]' />
<relation type='table' name='t2' table='[public].[orders_2021]' />
</relation>
</connection>
{_MEASURE_COL}
</datasource>"""
)

graph = adapter.parse(tds)
model = graph.models["multi_union"]

assert model.table is None
assert model.sql is not None
assert "UNION ALL" in model.sql

_compiles_to_valid_duckdb_sql(model)


def test_top_level_subquery_relation_not_double_aliased(adapter, tmp_path):
"""A top-level type='subquery' relation stores raw SELECT SQL; the generator's
own "(<sql>) AS t" wrapping must not produce a double-aliased derived table."""
tds = tmp_path / "subquery.tds"
tds.write_text(
f"""<?xml version='1.0' encoding='utf-8' ?>
<datasource formatted-name='subquery_ds' inline='true' version='18.1'>
<connection class='federated'>
<relation type='subquery' name='Active Users'>SELECT * FROM users WHERE active = 1</relation>
</connection>
{_MEASURE_COL}
</datasource>"""
)

graph = adapter.parse(tds)
model = graph.models["subquery_ds"]

assert model.table is None
assert model.sql is not None
# Outer "AS <alias>" must have been stripped so the generator can add "AS t".
assert not model.sql.rstrip().endswith('AS "Active Users"')

sql = _compiles_to_valid_duckdb_sql(model)
assert 'AS "Active Users"' not in sql


def test_subquery_with_paren_in_string_literal_strips_alias(adapter, tmp_path):
"""A subquery whose SELECT contains a ")" inside a SQL string literal must
still have its outer "AS <alias>" stripped. The alias-stripping paren scan
must skip quoted strings, otherwise the literal ")" ends the scan early and
leaves "(...) AS <alias>" double-aliased by the generator."""
tds = tmp_path / "paren_literal.tds"
tds.write_text(
f"""<?xml version='1.0' encoding='utf-8' ?>
<datasource formatted-name='paren_ds' inline='true' version='18.1'>
<connection class='federated'>
<relation type='subquery' name='Weird'>SELECT 1 AS amount, ')' AS marker</relation>
</connection>
{_MEASURE_COL}
</datasource>"""
)

graph = adapter.parse(tds)
model = graph.models["paren_ds"]

assert model.table is None
assert model.sql is not None
# The ")" literal must not have truncated alias stripping.
assert not model.sql.rstrip().endswith('AS "Weird"')
assert "')'" in model.sql

sql = _compiles_to_valid_duckdb_sql(model)
assert 'AS "Weird"' not in sql
208 changes: 208 additions & 0 deletions tests/adapters/tableau/test_relation_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
"""Tests for Tableau adapter handling of extended relation types and attributes.

Covers physical-layer relation types beyond table/join/text/collection
(union, batch-union, pivot, subquery, stored-proc, project, text-transform),
the Tableau Semantics object-graph semantic-layer / is-legacy attributes, and
the spatial datatype mapping.
"""

from pathlib import Path

import pytest

from sidemantic.adapters.tableau import TableauAdapter

FIXTURES = Path(__file__).parent.parent.parent / "fixtures" / "tableau"


@pytest.fixture
def adapter():
return TableauAdapter()


# =============================================================================
# SET-OPERATION RELATIONS: union / batch-union
# =============================================================================


def test_union_relation_builds_union_all_sql(adapter):
"""union.tds: a union relation stacks members with UNION ALL."""
graph = adapter.parse(FIXTURES / "union.tds")

assert "union_sales" in graph.models
model = graph.models["union_sales"]

assert model.table is None
assert model.sql is not None
assert "UNION ALL" in model.sql
assert "public.sales_2023" in model.sql
assert "public.sales_2024" in model.sql

# Columns still imported
assert model.get_dimension("region") is not None
assert model.get_metric("amount") is not None


def test_batch_union_relation_builds_union_all_sql(adapter):
"""batch_union.tds: a batch-union (wildcard) relation also unions members."""
graph = adapter.parse(FIXTURES / "batch_union.tds")

assert "monthly_logs" in graph.models
model = graph.models["monthly_logs"]

assert model.table is None
assert model.sql is not None
# Three members -> two UNION ALL separators
assert model.sql.count("UNION ALL") == 2
assert "jan.csv" in model.sql
assert "feb.csv" in model.sql
assert "mar.csv" in model.sql


# =============================================================================
# WRAPPER RELATIONS: subquery / stored-proc / pivot / project / text-transform
# =============================================================================


def test_subquery_relation_becomes_derived_sql(adapter):
"""subquery.tds: a subquery relation wraps its SQL as a derived source."""
graph = adapter.parse(FIXTURES / "subquery.tds")

assert "active_users" in graph.models
model = graph.models["active_users"]

assert model.sql is not None
assert "SELECT * FROM users WHERE active = true" in model.sql
assert model.get_dimension("user_id") is not None


def test_stored_proc_relation_resolves_actual_name(adapter):
"""spatial_proc.tds: a stored-proc relation resolves to its actual name."""
graph = adapter.parse(FIXTURES / "spatial_proc.tds")

assert "store_locations" in graph.models
model = graph.models["store_locations"]

# Stored procedures can't be joined/unioned; we resolve to the proc name.
assert model.table == "dbo.get_store_locations"


def test_pivot_relation_builds_unpivot_sql(adapter):
"""pivot.tds: a pivot relation emits UNPIVOT derived SQL, not a bare reference
to the wide child table (where the pivot output columns do not exist)."""
graph = adapter.parse(FIXTURES / "pivot.tds")

assert "pivoted_sales" in graph.models
model = graph.models["pivoted_sales"]

# Must NOT resolve to the raw wide table: the imported fields are the pivot
# outputs, which only exist after the UNPIVOT.
assert model.table is None
assert model.sql is not None
assert "UNPIVOT" in model.sql
# Wide source columns are unpivoted into the standard output columns.
assert '"Q1"' in model.sql and '"Q4"' in model.sql
assert 'INTO NAME "Pivot Field Names" VALUE "Pivot Field Values"' in model.sql

# Pivot output columns still imported
assert model.get_dimension("Pivot Field Names") is not None
assert model.get_metric("Pivot Field Values") is not None


def test_pivot_relation_sql_compiles_to_valid_duckdb(adapter):
"""The synthesized UNPIVOT SQL must form a valid DuckDB derived table when the
generator wraps it as "(<sql>) AS t"."""
import duckdb

from sidemantic import SemanticLayer

graph = adapter.parse(FIXTURES / "pivot.tds")
model = graph.models["pivoted_sales"]

sl = SemanticLayer()
sl.add_model(model)
sql = sl.compile(metrics=["pivoted_sales.Pivot Field Values"])

con = duckdb.connect()
try:
con.execute("EXPLAIN " + sql)
except duckdb.Error as exc: # pragma: no cover - failure path
message = str(exc).splitlines()[0]
# A missing source table is fine (the fixture references an uncreated
# table); only a syntax/parser error means we emitted invalid SQL.
if "does not exist" not in message and "Catalog Error" not in message:
raise AssertionError(f"DuckDB rejected generated UNPIVOT SQL:\n{sql}\n--> {message}") from exc


def test_project_relation_resolves_to_child_table(adapter):
"""project.tds: a project relation resolves to its wrapped child table."""
graph = adapter.parse(FIXTURES / "project.tds")

assert "projected_orders" in graph.models
model = graph.models["projected_orders"]

assert model.table == "public.orders"


def test_text_transform_relation_resolves_to_child_table(adapter):
"""text_transform.tds: a text-transform relation resolves to its child."""
graph = adapter.parse(FIXTURES / "text_transform.tds")

assert "parsed_logs" in graph.models
model = graph.models["parsed_logs"]

assert model.table == "raw_logs.txt"


# =============================================================================
# SPATIAL DATATYPE
# =============================================================================


def test_spatial_datatype_maps_to_categorical(adapter):
"""spatial_proc.tds: spatial datatype columns map to categorical dimensions."""
graph = adapter.parse(FIXTURES / "spatial_proc.tds")
model = graph.models["store_locations"]

geometry = model.get_dimension("geometry")
assert geometry is not None
assert geometry.type == "categorical"


# =============================================================================
# TABLEAU SEMANTICS: object-graph semantic-layer / is-legacy attributes
# =============================================================================


def test_object_graph_semantic_layer_attributes_captured(adapter):
"""semantic_layer.tds: semantic-layer / is-legacy attributes -> model metadata."""
graph = adapter.parse(FIXTURES / "semantic_layer.tds")

assert "semantic_model" in graph.models
model = graph.models["semantic_model"]

assert model.metadata is not None
assert model.metadata.get("tableau_semantic_layer") == "true"
assert model.metadata.get("tableau_is_legacy") == "false"

# Relationships still extracted from the object-graph.
assert len(model.relationships) == 1
assert model.relationships[0].name == "Customers"


def test_legacy_object_graph_has_no_semantic_metadata(adapter):
"""Object-graph without semantic attributes leaves model metadata unset."""
# The false_object_graph inline fixture (see test_parsing) has no
# semantic-layer / is-legacy attributes; assert via a real_world fixture
# that lacks them too.
real_world = FIXTURES / "real_world" / "thoughtspot_sf_trial.tds"
if not real_world.exists():
pytest.skip("real_world fixtures not present")

graph = adapter.parse(real_world)
model = graph.models.get("SF Trial")
assert model is not None
# No semantic-layer / is-legacy attributes on this object-graph.
if model.metadata is not None:
assert "tableau_semantic_layer" not in model.metadata
assert "tableau_is_legacy" not in model.metadata
19 changes: 19 additions & 0 deletions tests/fixtures/tableau/batch_union.tds
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version='1.0' encoding='utf-8' ?>
<datasource formatted-name='monthly_logs' inline='true' version='18.1'>
<connection class='textscan' directory='/data/logs'>
<relation name='Union of Logs' type='batch-union'>
<relation name='jan' type='table' table='[jan.csv]' />
<relation name='feb' type='table' table='[feb.csv]' />
<relation name='mar' type='table' table='[mar.csv]' />
</relation>
<metadata-records>
<metadata-record class='column'>
<local-name>[Event]</local-name>
<parent-name>[Union of Logs]</parent-name>
<remote-alias>event</remote-alias>
<local-type>string</local-type>
</metadata-record>
</metadata-records>
</connection>
<column caption='Event' datatype='string' name='[event]' role='dimension' type='nominal' />
</datasource>
Loading
Loading