From 3ad8bbcb83dbf3fc1eb932806f153ac7a707a4ba Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sat, 13 Jun 2026 11:14:27 -0700
Subject: [PATCH 01/17] feat(thoughtspot): add first-class TML Model object
 support

ThoughtSpot deprecated the Worksheet object (auto-migrated to Models and
removed in 10.12.0.cl). The adapter routed the top-level model: key into
the worksheet parser, which only reads worksheet keys, so current TML
Model files (export_schema_version v2) imported as empty/incorrect models.

- Add _parse_model for the TML Model object: reads model_tables: for
  tables, model-level columns: for fields, and joins nested inside each
  model_tables entry via with:/on:/type:/cardinality:.
- Map join cardinality (MANY_TO_ONE/ONE_TO_ONE/ONE_TO_MANY/MANY_TO_MANY)
  to sidemantic relationship types; resolve model_tables aliases used by
  role-playing dimensions.
- Keep _parse_worksheet for legacy worksheet content nested under model:.
- Extend loader auto-detection so a model + model_tables + columns
  .tml/.yaml/.yml file is recognized as ThoughtSpot.
- Add sales.model.tml and model_alias.model.tml fixtures plus parsing,
  alias-resolution, loader auto-detect, and legacy-fallback tests.
---
 sidemantic/adapters/thoughtspot.py            | 258 +++++++++++++++++-
 sidemantic/loaders.py                         |   7 +
 tests/adapters/thoughtspot/test_parsing.py    | 178 ++++++++++++
 .../thoughtspot/model_alias.model.tml         |  32 +++
 tests/fixtures/thoughtspot/sales.model.tml    | 102 +++++++
 5 files changed, 576 insertions(+), 1 deletion(-)
 create mode 100644 tests/fixtures/thoughtspot/model_alias.model.tml
 create mode 100644 tests/fixtures/thoughtspot/sales.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index 56d40eb2..af7fa558 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -28,6 +28,13 @@
 _TIME_TYPES = {"DATE", "TIME", "DATETIME", "TIMESTAMP"}
 _BOOL_TYPES = {"BOOL", "BOOLEAN"}
 
+_CARDINALITY_MAP = {
+    "MANY_TO_ONE": "many_to_one",
+    "ONE_TO_ONE": "one_to_one",
+    "ONE_TO_MANY": "one_to_many",
+    "MANY_TO_MANY": "many_to_many",
+}
+
 _AGGREGATION_MAP = {
     "SUM": "sum",
     "COUNT": "count",
@@ -217,7 +224,14 @@ def _parse_file(self, file_path: Path) -> Model | None:
         if "worksheet" in data:
             return self._parse_worksheet(data.get("worksheet"), data)
         if "model" in data:
-            return self._parse_worksheet(data.get("model"), data)
+            model_def = data.get("model")
+            # TML Model objects (export_schema_version v2) use `model_tables:` and
+            # model-level `columns:`. Legacy Worksheet content nested under `model:`
+            # still uses `tables:`/`worksheet_columns:`, so fall back to the
+            # worksheet parser for back-compat.
+            if isinstance(model_def, dict) and ("model_tables" in model_def or "columns" in model_def):
+                return self._parse_model(model_def, data)
+            return self._parse_worksheet(model_def, data)
 
         return None
 
@@ -455,6 +469,248 @@ def _parse_worksheet(self, worksheet_def: dict[str, Any] | None, full_def: dict[
 
         return model
 
+    def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any]) -> Model | None:
+        """Parse a TML Model object (export_schema_version v2).
+
+        Model TML differs from the legacy Worksheet TML:
+        - tables live under `model_tables:` (vs worksheet `tables:`)
+        - joins are nested inside each `model_tables` entry under `joins:`,
+          using `with:`/`on:`/`type:`/`cardinality:` (vs top-level worksheet
+          `joins:` with `source`/`destination`/`is_one_to_one`)
+        - fields live under model-level `columns:` (vs `worksheet_columns:`)
+        """
+        if not model_def:
+            return None
+
+        name = model_def.get("name")
+        if not name:
+            return None
+
+        description = model_def.get("description")
+        model_tables = model_def.get("model_tables") or []
+
+        table_name_lookup = self._table_name_lookup(model_tables)
+
+        # Each model_tables entry may carry an `alias` used in column_id paths
+        # and join expressions; map alias -> table name so refs resolve.
+        alias_lookup: dict[str, str] = {}
+        for table in model_tables:
+            table_name = table.get("name") or table.get("id")
+            alias = table.get("alias")
+            if alias and table_name:
+                alias_lookup[alias] = table_name
+        # Build the path lookup used to resolve column_id/expression table refs.
+        path_lookup: dict[str, str] = dict(table_name_lookup)
+        path_lookup.update(alias_lookup)
+
+        # Flatten nested joins (one per model_tables entry) into the same shape
+        # the worksheet join helpers consume.
+        flat_joins: list[dict[str, Any]] = []
+        for table in model_tables:
+            source = table.get("name") or table.get("id")
+            for join_def in table.get("joins") or []:
+                destination = join_def.get("with")
+                if not source or not destination:
+                    continue
+                resolved_dest = alias_lookup.get(destination, table_name_lookup.get(destination, destination))
+                # PyYAML (YAML 1.1) parses the bare `on:` key as the boolean True.
+                on_value = join_def.get("on")
+                if on_value is None and True in join_def:
+                    on_value = join_def.get(True)
+                flat_joins.append(
+                    {
+                        "source": source,
+                        "destination": resolved_dest,
+                        "type": join_def.get("type"),
+                        "on": on_value,
+                        "cardinality": join_def.get("cardinality"),
+                    }
+                )
+
+        sql, base_table = self._build_join_sql(model_tables, flat_joins, path_lookup, table_name_lookup)
+        relationships = self._parse_model_relationships(flat_joins, path_lookup, table_name_lookup)
+
+        formulas = model_def.get("formulas") or []
+        formula_by_id = {f.get("id"): f for f in formulas if f.get("id")}
+        formula_by_name = {f.get("name"): f for f in formulas if f.get("name")}
+
+        dimensions: list[Dimension] = []
+        metrics: list[Metric] = []
+
+        for col_def in model_def.get("columns") or []:
+            col_name = col_def.get("name")
+            column_id = col_def.get("column_id")
+            formula_id = col_def.get("formula_id")
+            if not col_name:
+                if formula_id and formula_id in formula_by_id:
+                    col_name = formula_by_id[formula_id].get("name")
+                elif column_id:
+                    col_name = column_id.split("::")[-1]
+
+            if not col_name:
+                continue
+
+            properties = col_def.get("properties") or {}
+            column_type = _normalize(properties.get("column_type")) or "ATTRIBUTE"
+            bucket = _map_bucket(properties.get("default_date_bucket"))
+            label = col_def.get("custom_name") or col_def.get("display_name")
+            col_description = col_def.get("description")
+            format_pattern = properties.get("format_pattern")
+
+            sql_expr = None
+            if formula_id and formula_id in formula_by_id:
+                sql_expr = formula_by_id[formula_id].get("expr")
+            elif formula_id and formula_id in formula_by_name:
+                sql_expr = formula_by_name[formula_id].get("expr")
+            elif col_name in formula_by_name:
+                sql_expr = formula_by_name[col_name].get("expr")
+
+            if not sql_expr and column_id:
+                if "::" in column_id:
+                    path_id, col_ref = column_id.split("::", 1)
+                    table_name = path_lookup.get(path_id)
+                    if table_name:
+                        sql_expr = f"{table_name}.{col_ref}"
+                    else:
+                        sql_expr = col_ref
+                else:
+                    sql_expr = column_id
+
+            sql_expr = _convert_tml_expr(sql_expr, path_lookup)
+
+            if column_type == "MEASURE":
+                agg, unsupported_func = _map_aggregation(properties.get("aggregation"))
+                metric_sql = sql_expr
+                if agg:
+                    metric = Metric(
+                        name=col_name,
+                        agg=agg,
+                        sql=metric_sql,
+                        label=label,
+                        description=col_description,
+                        format=format_pattern,
+                    )
+                else:
+                    if unsupported_func:
+                        metric_sql = f"{unsupported_func}({metric_sql})" if metric_sql else unsupported_func
+                    metric = Metric(
+                        name=col_name,
+                        type="derived",
+                        sql=metric_sql,
+                        label=label,
+                        description=col_description,
+                        format=format_pattern,
+                    )
+                metrics.append(metric)
+            else:
+                data_type = col_def.get("data_type") or (col_def.get("db_column_properties") or {}).get("data_type")
+                dim_type, granularity = _map_dimension_type(data_type, bucket)
+                dim = Dimension(
+                    name=col_name,
+                    type=dim_type,
+                    sql=sql_expr,
+                    granularity=granularity,
+                    label=label,
+                    description=col_description,
+                    format=format_pattern,
+                )
+                dimensions.append(dim)
+
+        default_time_dimension = None
+        default_grain = None
+        for dim in dimensions:
+            if dim.type == "time":
+                default_time_dimension = dim.name
+                default_grain = dim.granularity
+                break
+
+        primary_key = "id"
+        if any(d.name.lower() == "id" for d in dimensions):
+            primary_key = next(d.name for d in dimensions if d.name.lower() == "id")
+
+        model = Model(
+            name=name,
+            table=base_table if not sql else None,
+            sql=sql,
+            description=description,
+            primary_key=primary_key,
+            dimensions=dimensions,
+            metrics=metrics,
+            relationships=relationships,
+            default_time_dimension=default_time_dimension,
+            default_grain=default_grain,
+        )
+
+        if base_table:
+            setattr(model, "_worksheet_base_table", base_table)
+        setattr(model, "_source_tml_type", "model")
+
+        return model
+
+    def _parse_model_relationships(
+        self,
+        joins: list[dict[str, Any]],
+        table_path_lookup: dict[str, str] | None = None,
+        table_name_lookup: dict[str, str] | None = None,
+    ) -> list[Relationship]:
+        """Build relationships from flattened model_tables joins.
+
+        Model joins carry `cardinality:` directly (MANY_TO_ONE/ONE_TO_ONE/
+        ONE_TO_MANY/MANY_TO_MANY), unlike worksheet joins which only flag
+        `is_one_to_one`.
+        """
+        relationships: list[Relationship] = []
+
+        for join_def in joins:
+            source = join_def.get("source")
+            destination = join_def.get("destination")
+            if not source or not destination:
+                continue
+
+            join_type = _normalize(join_def.get("type")) or "INNER"
+            on_value = join_def.get("on")
+            lookup = table_path_lookup or table_name_lookup
+            left, right = _extract_join_refs(on_value, lookup)
+
+            if table_name_lookup:
+                source = table_name_lookup.get(source, source)
+                destination = table_name_lookup.get(destination, destination)
+
+            foreign_key = None
+            primary_key = None
+
+            if left and right:
+                left_table, left_col = left
+                right_table, right_col = right
+
+                if left_table == source and right_table == destination:
+                    foreign_key = left_col
+                    primary_key = right_col
+                elif left_table == destination and right_table == source:
+                    foreign_key = right_col
+                    primary_key = left_col
+                else:
+                    if left_table == source:
+                        foreign_key = left_col
+                    if right_table == destination:
+                        primary_key = right_col
+
+            cardinality = _normalize(join_def.get("cardinality"))
+            rel_type = _CARDINALITY_MAP.get(cardinality or "", "many_to_one")
+            if join_type in {"RIGHT_OUTER", "FULL_OUTER", "OUTER"} and cardinality not in _CARDINALITY_MAP:
+                rel_type = "many_to_many"
+
+            relationships.append(
+                Relationship(
+                    name=destination,
+                    type=rel_type,
+                    foreign_key=foreign_key,
+                    primary_key=primary_key,
+                )
+            )
+
+        return relationships
+
     def _build_join_sql(
         self,
         tables: list[dict[str, Any]],
diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py
index 40c622b2..735cd0cb 100644
--- a/sidemantic/loaders.py
+++ b/sidemantic/loaders.py
@@ -184,6 +184,13 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path, *, strict
                 adapter = ThoughtSpotAdapter()
             elif _contains_yaml_key(yaml_data, "worksheet") and _contains_yaml_key(yaml_data, "worksheet_columns"):
                 adapter = ThoughtSpotAdapter()
+            elif (
+                _contains_yaml_key(yaml_data, "model")
+                and _contains_yaml_key(yaml_data, "model_tables")
+                and _contains_yaml_key(yaml_data, "columns")
+            ):
+                # ThoughtSpot TML Model object (export_schema_version v2)
+                adapter = ThoughtSpotAdapter()
             elif _yaml_has_top_level_key(yaml_data, "tables") and _contains_yaml_key(yaml_data, "base_table"):
                 # Snowflake Cortex Semantic Model format
                 adapter = SnowflakeAdapter()
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index b01068d6..1236cdd8 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -674,3 +674,181 @@ def test_import_tpch_liveboard_skipped():
     adapter = ThoughtSpotAdapter()
     graph = adapter.parse("tests/fixtures/thoughtspot/tpch_liveboard.liveboard.tml")
     assert len(graph.models) == 0
+
+
+# =============================================================================
+# TML MODEL OBJECT (export_schema_version v2)
+# =============================================================================
+
+
+def test_import_thoughtspot_model():
+    """Parse a first-class TML Model object (model_tables + columns + nested joins)."""
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/sales.model.tml")
+
+    assert "sales_model" in graph.models
+    model = graph.models["sales_model"]
+
+    # Nested model_tables joins produce a joined SQL and relationships.
+    assert model.sql is not None
+    assert "JOIN customers" in model.sql
+    assert "JOIN regions" in model.sql
+    # Composite ON predicate from the regions join is preserved.
+    assert "country_code" in model.sql
+
+    relationships = {rel.name: rel for rel in model.relationships}
+    # cardinality: MANY_TO_ONE
+    assert relationships["customers"].type == "many_to_one"
+    assert relationships["customers"].foreign_key == "customer_id"
+    assert relationships["customers"].primary_key == "id"
+    # cardinality: ONE_TO_ONE
+    assert relationships["regions"].type == "one_to_one"
+    assert relationships["regions"].foreign_key == "region_id"
+    assert relationships["regions"].primary_key == "id"
+
+    # Dimensions: column_id <table>::<column> resolves to table.column.
+    order_id = model.get_dimension("order_id")
+    assert order_id is not None
+    assert order_id.sql == "sales.id"
+    assert order_id.description == "Order identifier"
+
+    order_date = model.get_dimension("order_date")
+    assert order_date is not None
+    assert order_date.type == "time"
+    assert order_date.granularity == "day"
+    assert order_date.label == "Order Date"
+
+    is_active = model.get_dimension("is_active")
+    assert is_active is not None
+    assert is_active.type == "boolean"
+
+    # Column from a joined table resolves to that table's name.
+    customer_name = model.get_dimension("customer_name")
+    assert customer_name is not None
+    assert customer_name.sql == "customers.name"
+    assert customer_name.label == "Customer"
+
+    region_name = model.get_dimension("region_name")
+    assert region_name is not None
+    assert region_name.sql == "regions.name"
+
+    # Simple aggregated measure.
+    gross_revenue = model.get_metric("gross_revenue")
+    assert gross_revenue is not None
+    assert gross_revenue.agg == "sum"
+    assert gross_revenue.format == "$#,##0.00"
+    assert gross_revenue.sql == "sales.gross_revenue"
+
+    # Formula-backed measures.
+    net_revenue = model.get_metric("net_revenue")
+    assert net_revenue is not None
+    assert net_revenue.agg == "sum"
+    assert "gross_revenue" in (net_revenue.sql or "")
+
+    avg_order_value = model.get_metric("avg_order_value")
+    assert avg_order_value is not None
+    assert avg_order_value.agg == "avg"
+    assert "/" in (avg_order_value.sql or "")
+
+    # Aggregation coverage.
+    assert model.get_metric("distinct_customers").agg == "count_distinct"
+    assert model.get_metric("order_count").agg == "count"
+    assert model.get_metric("min_order_value").agg == "min"
+    assert model.get_metric("max_order_value").agg == "max"
+    assert model.get_metric("median_order_value").agg == "median"
+
+    # Unsupported aggregation falls back to a derived metric.
+    revenue_stddev = model.get_metric("revenue_stddev")
+    assert revenue_stddev is not None
+    assert revenue_stddev.type == "derived"
+    assert "STDDEV" in (revenue_stddev.sql or "")
+
+
+def test_import_thoughtspot_model_alias():
+    """Parse a TML Model with id-based tables and an aliased (role-playing) join."""
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_alias.model.tml")
+
+    assert "orders_model" in graph.models
+    model = graph.models["orders_model"]
+
+    # The alias `ship_country` in column_id / join `with` resolves to `countries`.
+    assert model.sql is not None
+    assert "JOIN countries" in model.sql
+
+    relationships = {rel.name: rel for rel in model.relationships}
+    assert "countries" in relationships
+    assert relationships["countries"].type == "many_to_one"
+    assert relationships["countries"].foreign_key == "ship_country_id"
+    assert relationships["countries"].primary_key == "id"
+
+    ship_country = model.get_dimension("ship_country_name")
+    assert ship_country is not None
+    assert ship_country.sql == "countries.name"
+
+    amount = model.get_metric("amount")
+    assert amount is not None
+    assert amount.agg == "sum"
+
+
+def test_thoughtspot_model_auto_detect_loader():
+    """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tml_file = Path(tmpdir) / "sales.model.yaml"
+        tml_file.write_text(
+            """
+model:
+  name: sales_model
+  description: Sales model
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+  columns:
+    - name: order_id
+      column_id: sales::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: sales::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+"""
+        )
+
+        layer = SemanticLayer()
+        load_from_directory(layer, tmpdir)
+
+        assert "sales_model" in layer.graph.models
+        model = layer.graph.models["sales_model"]
+        assert model.get_metric("amount").agg == "sum"
+
+
+def test_thoughtspot_legacy_model_key_still_worksheet():
+    """Legacy worksheet content nested under `model:` still parses as a worksheet."""
+    adapter = ThoughtSpotAdapter()
+    tml_def = {
+        "model": {
+            "name": "legacy_sheet",
+            "tables": [{"name": "orders"}],
+            "worksheet_columns": [
+                {
+                    "name": "amount",
+                    "column_id": "orders::amount",
+                    "properties": {"column_type": "MEASURE", "aggregation": "SUM"},
+                },
+            ],
+        }
+    }
+
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".tml", delete=False) as f:
+        yaml.safe_dump(tml_def, f, sort_keys=False)
+        temp_path = Path(f.name)
+
+    try:
+        graph = adapter.parse(temp_path)
+        assert "legacy_sheet" in graph.models
+        model = graph.models["legacy_sheet"]
+        assert model.get_metric("amount").agg == "sum"
+    finally:
+        temp_path.unlink(missing_ok=True)
diff --git a/tests/fixtures/thoughtspot/model_alias.model.tml b/tests/fixtures/thoughtspot/model_alias.model.tml
new file mode 100644
index 00000000..e6e4d969
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_alias.model.tml
@@ -0,0 +1,32 @@
+guid: "model-alias"
+export_schema_version: "2"
+model:
+  name: orders_model
+  description: Model with id-based tables and an aliased role-playing join
+  model_tables:
+    - name: orders
+      id: orders_tbl
+      fqn: ANALYTICS.PUBLIC.orders
+      joins:
+        - with: ship_country
+          on: "[orders::ship_country_id] = [ship_country::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: countries
+      id: countries_tbl
+      fqn: ANALYTICS.PUBLIC.countries
+      alias: ship_country
+  columns:
+    - name: order_id
+      column_id: orders::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: ship_country_name
+      column_id: ship_country::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: orders::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
diff --git a/tests/fixtures/thoughtspot/sales.model.tml b/tests/fixtures/thoughtspot/sales.model.tml
new file mode 100644
index 00000000..5a292e77
--- /dev/null
+++ b/tests/fixtures/thoughtspot/sales.model.tml
@@ -0,0 +1,102 @@
+guid: "model-001"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: Sales model (TML v2 export)
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+      joins:
+        - with: customers
+          on: "[sales::customer_id] = [customers::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+        - with: regions
+          on: "[sales::region_id] = [regions::id] AND [sales::country_code] = [regions::country_code]"
+          type: INNER
+          cardinality: ONE_TO_ONE
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+    - name: regions
+      fqn: ANALYTICS.PUBLIC.regions
+  formulas:
+    - name: net_revenue
+      expr: "[sales::gross_revenue] - [sales::discount]"
+      id: net_rev
+    - name: avg_order_value
+      expr: "[sales::gross_revenue] / [sales::order_count]"
+      id: avg_order
+  columns:
+    - name: order_id
+      description: Order identifier
+      column_id: sales::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: order_date
+      column_id: sales::order_date
+      data_type: DATE
+      custom_name: Order Date
+      properties:
+        column_type: ATTRIBUTE
+        default_date_bucket: DAILY
+        format_pattern: "YYYY-MM-DD"
+    - name: is_active
+      column_id: sales::is_active
+      data_type: BOOLEAN
+      properties:
+        column_type: ATTRIBUTE
+    - name: customer_name
+      column_id: customers::name
+      custom_name: Customer
+      properties:
+        column_type: ATTRIBUTE
+    - name: region_name
+      column_id: regions::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: gross_revenue
+      column_id: sales::gross_revenue
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+        format_pattern: "$#,##0.00"
+    - name: net_revenue
+      formula_id: net_rev
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+    - name: avg_order_value
+      formula_id: avg_order
+      properties:
+        column_type: MEASURE
+        aggregation: AVERAGE
+    - name: distinct_customers
+      column_id: sales::customer_id
+      properties:
+        column_type: MEASURE
+        aggregation: COUNT_DISTINCT
+    - name: order_count
+      column_id: sales::id
+      properties:
+        column_type: MEASURE
+        aggregation: COUNT
+    - name: min_order_value
+      column_id: sales::gross_revenue
+      properties:
+        column_type: MEASURE
+        aggregation: MIN
+    - name: max_order_value
+      column_id: sales::gross_revenue
+      properties:
+        column_type: MEASURE
+        aggregation: MAX
+    - name: median_order_value
+      column_id: sales::gross_revenue
+      properties:
+        column_type: MEASURE
+        aggregation: MEDIAN
+    - name: revenue_stddev
+      column_id: sales::gross_revenue
+      properties:
+        column_type: MEASURE
+        aggregation: STD_DEVIATION

From ede3e1858b15b9338be7b6409302f2fa73a3b038 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sat, 13 Jun 2026 15:33:45 -0700
Subject: [PATCH 02/17] fix(thoughtspot): keep table qualifier for model
 column_id name refs

Model column_id paths reference a table by its name even when the table
also has an id (e.g. column_id: orders::amount for a table with
id: orders_tbl). _table_name_lookup only mapped id -> name, so those
refs missed the lookup and the qualifier was dropped, emitting bare
columns like amount/id that are ambiguous in joined model SQL.

Map name -> name in addition to id -> name (id mappings take
precedence) so column_id qualifiers resolve to table.column.
---
 sidemantic/adapters/thoughtspot.py         | 9 +++++++++
 tests/adapters/thoughtspot/test_parsing.py | 7 +++++++
 2 files changed, 16 insertions(+)

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index af7fa558..3751d04f 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -884,6 +884,15 @@ def _table_name_lookup(self, tables: list[dict[str, Any]]) -> dict[str, str]:
             table_name = table.get("name") or table.get("id")
             if table_id and table_name:
                 lookup[table_id] = table_name
+        # Model `column_id`s reference a table by its `name` even when an `id`
+        # is present (e.g. `column_id: orders::amount` for a table with
+        # `id: orders_tbl`). Map `name -> name` too so those qualifiers resolve
+        # instead of being dropped, which would emit ambiguous unqualified
+        # columns in joined models. `id -> name` mappings take precedence.
+        for table in tables:
+            table_name = table.get("name")
+            if table_name and table_name not in lookup:
+                lookup[table_name] = table_name
         return lookup
 
     def export(self, graph: SemanticGraph, output_path: str | Path) -> None:
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index 1236cdd8..2c1d323e 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -786,9 +786,16 @@ def test_import_thoughtspot_model_alias():
     assert ship_country is not None
     assert ship_country.sql == "countries.name"
 
+    # `column_id` paths that use the table `name` (even when an `id` exists)
+    # keep their table qualifier instead of collapsing to a bare column.
+    order_id = model.get_dimension("order_id")
+    assert order_id is not None
+    assert order_id.sql == "orders.id"
+
     amount = model.get_metric("amount")
     assert amount is not None
     assert amount.agg == "sum"
+    assert amount.sql == "orders.amount"
 
 
 def test_thoughtspot_model_auto_detect_loader():

From 924af094202cc50bb1cb5ca4097782a6f92310e4 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 07:00:54 -0700
Subject: [PATCH 03/17] fix(thoughtspot): make joined models queryable and
 preserve role-playing aliases

Joined Model TMLs are exported as derived SQL (FROM (<sql>) AS t) but their
dimension/metric SQL kept inner table qualifiers like sales.gross_revenue,
which are out of scope in the wrapping subquery and made joined models
unqueryable. Rewrite the inner SELECT * into an explicit projection that
aliases each table.column to a stable unqualified output name, expose the
primary key, and rewrite column SQL to those aliases.

Also preserve with: aliases as role identifiers instead of resolving them to
the backing table, so role-playing joins (ship_country/bill_country both
backed by countries) stay distinct via JOIN countries AS <alias> rather than
collapsing into a single ambiguous countries relation.
---
 sidemantic/adapters/thoughtspot.py            | 127 ++++++++++++++++--
 tests/adapters/thoughtspot/test_parsing.py    | 122 ++++++++++++++---
 .../thoughtspot/role_playing.model.tml        |  41 ++++++
 3 files changed, 263 insertions(+), 27 deletions(-)
 create mode 100644 tests/fixtures/thoughtspot/role_playing.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index 3751d04f..b3c949ac 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -187,6 +187,82 @@ def _simple_column(sql: str | None, fallback: str | None) -> str | None:
     return fallback
 
 
+def _expose_joined_columns(
+    sql: str | None,
+    tables: set[str],
+    dimensions: list[Dimension],
+    metrics: list[Metric],
+    base_table: str | None = None,
+    primary_key: str | None = None,
+) -> str | None:
+    """Rewrite a joined model's derived SQL so its columns are queryable.
+
+    A joined Model TML becomes a derived table (``FROM (<sql>) AS t``) whose
+    column expressions still carry inner table qualifiers like ``sales.amount``.
+    Those qualifiers are out of scope once the join is wrapped in a subquery, so
+    a normal query (e.g. ``SELECT sales.amount FROM (...) AS t``) fails with
+    "table sales not found". This replaces the ``SELECT *`` projection with an
+    explicit list that aliases each referenced ``table.column`` to a stable,
+    unqualified output name, then rewrites the dimension/metric SQL to use those
+    aliases so the outer query stays in scope.
+
+    The model's ``primary_key`` is also passed through by the SQL generator as a
+    bare column, so the base table's primary key is exposed under that name when
+    no model column already projects it.
+    """
+    if not sql or "SELECT * FROM " not in sql:
+        return sql
+
+    # Collect every distinct `table.column` referenced by the model's columns
+    # where `table` is one of the joined tables. Preserve first-seen order so
+    # the generated projection is deterministic.
+    projection: dict[tuple[str, str], str] = {}
+
+    def _collect(expr: str | None) -> None:
+        if not expr:
+            return
+        for table, column in _TML_DOT_REF.findall(expr):
+            if table in tables:
+                projection.setdefault((table, column), f"{table}__{column}")
+
+    for dim in dimensions:
+        _collect(dim.sql)
+    for metric in metrics:
+        _collect(metric.sql)
+
+    if not projection:
+        return sql
+
+    def _rewrite(expr: str | None) -> str | None:
+        if not expr:
+            return expr
+
+        def _replace(match: re.Match[str]) -> str:
+            table = match.group(1)
+            column = match.group(2)
+            alias = projection.get((table, column))
+            return alias if alias else match.group(0)
+
+        return _TML_DOT_REF.sub(_replace, expr)
+
+    for dim in dimensions:
+        dim.sql = _rewrite(dim.sql)
+    for metric in metrics:
+        metric.sql = _rewrite(metric.sql)
+
+    select_parts = [f"{table}.{column} AS {alias}" for (table, column), alias in projection.items()]
+
+    # The SQL generator passes through `model.primary_key` as a bare column when
+    # querying derived models. Expose `<base_table>.<primary_key>` under that
+    # name so the key resolves instead of referencing an out-of-scope column.
+    aliases = set(projection.values())
+    if base_table and primary_key and primary_key not in aliases:
+        select_parts.append(f"{base_table}.{primary_key} AS {primary_key}")
+
+    select_list = ", ".join(select_parts)
+    return sql.replace("SELECT * FROM ", f"SELECT {select_list} FROM ", 1)
+
+
 class ThoughtSpotAdapter(BaseAdapter):
     """Adapter for ThoughtSpot TML (YAML) tables and worksheets."""
 
@@ -492,16 +568,23 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
         table_name_lookup = self._table_name_lookup(model_tables)
 
         # Each model_tables entry may carry an `alias` used in column_id paths
-        # and join expressions; map alias -> table name so refs resolve.
-        alias_lookup: dict[str, str] = {}
+        # and join expressions. The alias is the role identifier (e.g.
+        # `ship_country`/`bill_country` both backed by `countries`), so keep the
+        # alias as the join/relationship/qualifier name and only track the
+        # underlying table for emitting `JOIN <table> AS <alias>`. Resolving the
+        # alias away here would collapse distinct role-playing joins into a
+        # single ambiguous `countries` relation.
+        alias_to_table: dict[str, str] = {}
         for table in model_tables:
             table_name = table.get("name") or table.get("id")
             alias = table.get("alias")
             if alias and table_name:
-                alias_lookup[alias] = table_name
+                alias_to_table[alias] = table_name
         # Build the path lookup used to resolve column_id/expression table refs.
+        # Aliases resolve to themselves so qualifiers stay role-scoped.
         path_lookup: dict[str, str] = dict(table_name_lookup)
-        path_lookup.update(alias_lookup)
+        for alias in alias_to_table:
+            path_lookup[alias] = alias
 
         # Flatten nested joins (one per model_tables entry) into the same shape
         # the worksheet join helpers consume.
@@ -512,7 +595,12 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
                 destination = join_def.get("with")
                 if not source or not destination:
                     continue
-                resolved_dest = alias_lookup.get(destination, table_name_lookup.get(destination, destination))
+                # Keep an aliased destination as-is (the role name); only resolve
+                # non-aliased ids to their table name.
+                if destination in alias_to_table:
+                    resolved_dest = destination
+                else:
+                    resolved_dest = table_name_lookup.get(destination, destination)
                 # PyYAML (YAML 1.1) parses the bare `on:` key as the boolean True.
                 on_value = join_def.get("on")
                 if on_value is None and True in join_def:
@@ -527,7 +615,7 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
                     }
                 )
 
-        sql, base_table = self._build_join_sql(model_tables, flat_joins, path_lookup, table_name_lookup)
+        sql, base_table = self._build_join_sql(model_tables, flat_joins, path_lookup, table_name_lookup, alias_to_table)
         relationships = self._parse_model_relationships(flat_joins, path_lookup, table_name_lookup)
 
         formulas = model_def.get("formulas") or []
@@ -616,6 +704,21 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
                 )
                 dimensions.append(dim)
 
+        primary_key = "id"
+        if any(d.name.lower() == "id" for d in dimensions):
+            primary_key = next(d.name for d in dimensions if d.name.lower() == "id")
+
+        # A joined model is exported as derived SQL (FROM (<sql>) AS t); rewrite
+        # its `SELECT *` into explicit aliased columns and update the dimension/
+        # metric SQL so the inner table qualifiers stay in scope when queried.
+        if sql:
+            known_tables = set(table_name_lookup.values())
+            for join_def in flat_joins:
+                known_tables.add(join_def.get("source"))
+                known_tables.add(join_def.get("destination"))
+            known_tables.discard(None)
+            sql = _expose_joined_columns(sql, known_tables, dimensions, metrics, base_table, primary_key)
+
         default_time_dimension = None
         default_grain = None
         for dim in dimensions:
@@ -624,10 +727,6 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
                 default_grain = dim.granularity
                 break
 
-        primary_key = "id"
-        if any(d.name.lower() == "id" for d in dimensions):
-            primary_key = next(d.name for d in dimensions if d.name.lower() == "id")
-
         model = Model(
             name=name,
             table=base_table if not sql else None,
@@ -717,6 +816,7 @@ def _build_join_sql(
         joins: list[dict[str, Any]],
         table_path_lookup: dict[str, str] | None = None,
         table_name_lookup: dict[str, str] | None = None,
+        alias_to_table: dict[str, str] | None = None,
     ) -> tuple[str | None, str | None]:
         base_table = None
         if tables:
@@ -766,7 +866,12 @@ def _build_join_sql(
                 "INNER": "INNER",
             }.get(join_type, "INNER")
 
-            clauses.append(f"{join_keyword} JOIN {right} ON {on_expr}")
+            # Role-playing joins keep the alias as the relation name; emit
+            # `<table> AS <alias>` so two roles backed by the same table do not
+            # produce an ambiguous, duplicated join.
+            backing = alias_to_table.get(right) if alias_to_table else None
+            right_clause = f"{backing} AS {right}" if backing and backing != right else right
+            clauses.append(f"{join_keyword} JOIN {right_clause} ON {on_expr}")
             joined.add(right)
 
         if not clauses:
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index 2c1d323e..c0294cd6 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -706,10 +706,18 @@ def test_import_thoughtspot_model():
     assert relationships["regions"].foreign_key == "region_id"
     assert relationships["regions"].primary_key == "id"
 
-    # Dimensions: column_id <table>::<column> resolves to table.column.
+    # A joined model becomes a derived table. Its `SELECT *` is rewritten into an
+    # explicit projection that aliases each inner `table.column` to a stable,
+    # unqualified output name so the columns stay in scope when queried.
+    assert "SELECT *" not in model.sql
+    assert "sales.id AS sales__id" in model.sql
+    assert "customers.name AS customers__name" in model.sql
+
+    # Dimensions reference the unqualified aliased output names, not inner
+    # `table.column` qualifiers that are out of scope in the derived subquery.
     order_id = model.get_dimension("order_id")
     assert order_id is not None
-    assert order_id.sql == "sales.id"
+    assert order_id.sql == "sales__id"
     assert order_id.description == "Order identifier"
 
     order_date = model.get_dimension("order_date")
@@ -722,22 +730,22 @@ def test_import_thoughtspot_model():
     assert is_active is not None
     assert is_active.type == "boolean"
 
-    # Column from a joined table resolves to that table's name.
+    # Column from a joined table resolves to that table's aliased output name.
     customer_name = model.get_dimension("customer_name")
     assert customer_name is not None
-    assert customer_name.sql == "customers.name"
+    assert customer_name.sql == "customers__name"
     assert customer_name.label == "Customer"
 
     region_name = model.get_dimension("region_name")
     assert region_name is not None
-    assert region_name.sql == "regions.name"
+    assert region_name.sql == "regions__name"
 
     # Simple aggregated measure.
     gross_revenue = model.get_metric("gross_revenue")
     assert gross_revenue is not None
     assert gross_revenue.agg == "sum"
     assert gross_revenue.format == "$#,##0.00"
-    assert gross_revenue.sql == "sales.gross_revenue"
+    assert gross_revenue.sql == "sales__gross_revenue"
 
     # Formula-backed measures.
     net_revenue = model.get_metric("net_revenue")
@@ -772,30 +780,112 @@ def test_import_thoughtspot_model_alias():
     assert "orders_model" in graph.models
     model = graph.models["orders_model"]
 
-    # The alias `ship_country` in column_id / join `with` resolves to `countries`.
+    # The alias `ship_country` is the role identifier and is preserved as the
+    # join relation (`countries AS ship_country`) and relationship name, instead
+    # of being resolved away to the backing `countries` table.
     assert model.sql is not None
-    assert "JOIN countries" in model.sql
+    assert "JOIN countries AS ship_country" in model.sql
 
     relationships = {rel.name: rel for rel in model.relationships}
-    assert "countries" in relationships
-    assert relationships["countries"].type == "many_to_one"
-    assert relationships["countries"].foreign_key == "ship_country_id"
-    assert relationships["countries"].primary_key == "id"
+    assert "ship_country" in relationships
+    assert relationships["ship_country"].type == "many_to_one"
+    assert relationships["ship_country"].foreign_key == "ship_country_id"
+    assert relationships["ship_country"].primary_key == "id"
 
     ship_country = model.get_dimension("ship_country_name")
     assert ship_country is not None
-    assert ship_country.sql == "countries.name"
+    assert ship_country.sql == "ship_country__name"
 
     # `column_id` paths that use the table `name` (even when an `id` exists)
-    # keep their table qualifier instead of collapsing to a bare column.
+    # keep their table qualifier instead of collapsing to a bare column; the
+    # qualifier is carried through to the aliased output name.
     order_id = model.get_dimension("order_id")
     assert order_id is not None
-    assert order_id.sql == "orders.id"
+    assert order_id.sql == "orders__id"
 
     amount = model.get_metric("amount")
     assert amount is not None
     assert amount.agg == "sum"
-    assert amount.sql == "orders.amount"
+    assert amount.sql == "orders__amount"
+
+
+def test_thoughtspot_joined_model_is_queryable():
+    """A joined Model TML compiles to SQL that executes (columns stay in scope).
+
+    Regression: joined models were exported as `FROM (SELECT * FROM sales JOIN ...) AS t`
+    while dimensions/metrics kept inner qualifiers like `sales.gross_revenue`, so a
+    normal query produced `SELECT sales.gross_revenue FROM (...) AS t` and failed with
+    "table sales not found".
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/sales.model.tml")
+
+    layer = SemanticLayer()
+    for model in graph.models.values():
+        layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute(
+        "CREATE TABLE sales (id INT, order_date DATE, is_active BOOL, gross_revenue DOUBLE, "
+        "discount DOUBLE, order_count INT, customer_id INT, region_id INT, country_code VARCHAR)"
+    )
+    con.execute(
+        "INSERT INTO sales VALUES (1, DATE '2024-01-01', true, 100.0, 10.0, 2, 5, 7, 'US'), "
+        "(2, DATE '2024-01-01', true, 50.0, 5.0, 1, 5, 7, 'US')"
+    )
+    con.execute("CREATE TABLE customers (id INT, name VARCHAR)")
+    con.execute("INSERT INTO customers VALUES (5, 'Acme')")
+    con.execute("CREATE TABLE regions (id INT, country_code VARCHAR, name VARCHAR)")
+    con.execute("INSERT INTO regions VALUES (7, 'US', 'West')")
+
+    sql = layer.compile(
+        metrics=["sales_model.gross_revenue", "sales_model.net_revenue", "sales_model.order_count"],
+        dimensions=["sales_model.order_date", "sales_model.customer_name", "sales_model.region_name"],
+    )
+    rows = con.execute(sql).fetchall()
+    assert rows == [(__import__("datetime").date(2024, 1, 1), "Acme", "West", 150.0, 135.0, 2)]
+
+
+def test_thoughtspot_role_playing_joins_stay_distinct():
+    """Two aliases backed by the same table become distinct role-playing joins.
+
+    Regression: resolving `with:` aliases to the backing table name collapsed
+    `ship_country` and `bill_country` (both backed by `countries`) into a single
+    ambiguous `countries` join/relationship.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/role_playing.model.tml")
+    model = graph.models["shipments_model"]
+
+    # Both aliases survive as distinct, aliased joins and relationships.
+    assert "JOIN countries AS ship_country" in model.sql
+    assert "JOIN countries AS bill_country" in model.sql
+    rel_names = {rel.name for rel in model.relationships}
+    assert {"ship_country", "bill_country"} <= rel_names
+    assert model.get_dimension("ship_country_name").sql == "ship_country__name"
+    assert model.get_dimension("bill_country_name").sql == "bill_country__name"
+
+    layer = SemanticLayer()
+    for m in graph.models.values():
+        layer.add_model(m)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE orders (id INT, amount DOUBLE, ship_country_id INT, bill_country_id INT)")
+    con.execute("INSERT INTO orders VALUES (1, 100.0, 7, 8)")
+    con.execute("CREATE TABLE countries (id INT, name VARCHAR)")
+    con.execute("INSERT INTO countries VALUES (7, 'US'), (8, 'CA')")
+
+    sql = layer.compile(
+        metrics=["shipments_model.amount"],
+        dimensions=["shipments_model.ship_country_name", "shipments_model.bill_country_name"],
+    )
+    rows = con.execute(sql).fetchall()
+    # The two roles resolve to different countries (US shipped, CA billed).
+    assert rows == [("US", "CA", 100.0)]
 
 
 def test_thoughtspot_model_auto_detect_loader():
diff --git a/tests/fixtures/thoughtspot/role_playing.model.tml b/tests/fixtures/thoughtspot/role_playing.model.tml
new file mode 100644
index 00000000..c52c6da9
--- /dev/null
+++ b/tests/fixtures/thoughtspot/role_playing.model.tml
@@ -0,0 +1,41 @@
+guid: "model-role-playing"
+export_schema_version: "2"
+model:
+  name: shipments_model
+  description: Role-playing model with two aliases backed by the same table
+  model_tables:
+    - name: orders
+      fqn: ANALYTICS.PUBLIC.orders
+      joins:
+        - with: ship_country
+          on: "[orders::ship_country_id] = [ship_country::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+        - with: bill_country
+          on: "[orders::bill_country_id] = [bill_country::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: countries
+      fqn: ANALYTICS.PUBLIC.countries
+      alias: ship_country
+    - name: countries
+      fqn: ANALYTICS.PUBLIC.countries
+      alias: bill_country
+  columns:
+    - name: order_id
+      column_id: orders::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: ship_country_name
+      column_id: ship_country::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: bill_country_name
+      column_id: bill_country::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: orders::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From 8f9777cd1faa4b3a63930d12cf520fd12199d625 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 08:31:36 -0700
Subject: [PATCH 04/17] fix(thoughtspot): resolve unqualified formula refs and
 aliased base tables in joined models

---
 sidemantic/adapters/thoughtspot.py            | 65 ++++++++++++---
 tests/adapters/thoughtspot/test_parsing.py    | 80 +++++++++++++++++++
 .../thoughtspot/model_base_alias.model.tml    | 30 +++++++
 .../model_unqualified_formula.model.tml       | 38 +++++++++
 4 files changed, 201 insertions(+), 12 deletions(-)
 create mode 100644 tests/fixtures/thoughtspot/model_base_alias.model.tml
 create mode 100644 tests/fixtures/thoughtspot/model_unqualified_formula.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index b3c949ac..ceeef7a1 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -54,6 +54,9 @@
 _SIMPLE_IDENTIFIER = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)*$")
 _TML_REF = re.compile(r"\[([^\]]+)\]")
 _TML_DOT_REF = re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*)\.([A-Za-z_][A-Za-z0-9_]*)\b")
+# A bare identifier that is NOT part of a `table.column` qualifier and is NOT a
+# function call: not preceded by `.`/word char, not followed by `.` or `(`.
+_BARE_IDENTIFIER = re.compile(r"(?<![\w.])([A-Za-z_][A-Za-z0-9_]*)(?![\w.])(?!\s*\()")
 
 
 def _normalize(value: Any) -> str | None:
@@ -233,6 +236,21 @@ def _collect(expr: str | None) -> None:
     if not projection:
         return sql
 
+    # ThoughtSpot formulas also use unqualified references (e.g.
+    # `[gross_revenue] - [sales::discount]`), which convert to bare identifiers
+    # like `gross_revenue`. Map each such column name to its projected alias when
+    # exactly one joined table projects that column, so the rewritten expression
+    # uses the in-scope output alias instead of an out-of-scope bare column.
+    bare_to_alias: dict[str, str] = {}
+    ambiguous: set[str] = set()
+    for (_table, column), alias in projection.items():
+        if column in bare_to_alias and bare_to_alias[column] != alias:
+            ambiguous.add(column)
+        else:
+            bare_to_alias[column] = alias
+    for column in ambiguous:
+        bare_to_alias.pop(column, None)
+
     def _rewrite(expr: str | None) -> str | None:
         if not expr:
             return expr
@@ -243,7 +261,12 @@ def _replace(match: re.Match[str]) -> str:
             alias = projection.get((table, column))
             return alias if alias else match.group(0)
 
-        return _TML_DOT_REF.sub(_replace, expr)
+        rewritten = _TML_DOT_REF.sub(_replace, expr)
+
+        def _replace_bare(match: re.Match[str]) -> str:
+            return bare_to_alias.get(match.group(1), match.group(0))
+
+        return _BARE_IDENTIFIER.sub(_replace_bare, rewritten)
 
     for dim in dimensions:
         dim.sql = _rewrite(dim.sql)
@@ -587,10 +610,13 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
             path_lookup[alias] = alias
 
         # Flatten nested joins (one per model_tables entry) into the same shape
-        # the worksheet join helpers consume.
+        # the worksheet join helpers consume. When a table carries an `alias`,
+        # its `column_id`/`on` qualifiers use the alias (e.g. `o::id`), so the
+        # join `source` must be the alias too (not the backing table name) for
+        # the join-direction logic and SQL relation name to stay consistent.
         flat_joins: list[dict[str, Any]] = []
         for table in model_tables:
-            source = table.get("name") or table.get("id")
+            source = table.get("alias") or table.get("name") or table.get("id")
             for join_def in table.get("joins") or []:
                 destination = join_def.get("with")
                 if not source or not destination:
@@ -713,6 +739,10 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
         # metric SQL so the inner table qualifiers stay in scope when queried.
         if sql:
             known_tables = set(table_name_lookup.values())
+            # Aliases (role-playing or a base-table alias) are the in-scope
+            # relation names that qualify columns like `o.id`/`ship_country.name`,
+            # so they must be recognized when projecting the derived columns.
+            known_tables.update(alias_to_table.keys())
             for join_def in flat_joins:
                 known_tables.add(join_def.get("source"))
                 known_tables.add(join_def.get("destination"))
@@ -819,16 +849,24 @@ def _build_join_sql(
         alias_to_table: dict[str, str] | None = None,
     ) -> tuple[str | None, str | None]:
         base_table = None
+        base_alias = None
         if tables:
             base_table = tables[0].get("name") or tables[0].get("id")
+            base_alias = tables[0].get("alias")
+
+        # When the base table is aliased, its `column_id`/`on` qualifiers use the
+        # alias (e.g. `o::id`), so the relation in scope is the alias. Emit
+        # `FROM <table> AS <alias>` and key the join tracking on the alias so the
+        # alias resolves instead of failing with "table o not found".
+        base_relation = base_alias if (base_alias and base_alias != base_table) else base_table
 
         joined: set[str] = set()
-        if base_table:
-            joined.add(base_table)
+        if base_relation:
+            joined.add(base_relation)
 
         clauses: list[str] = []
-        if base_table:
-            clauses.append(base_table)
+        if base_relation:
+            clauses.append(f"{base_table} AS {base_alias}" if base_relation != base_table else base_table)
 
         for join_def in joins:
             source = join_def.get("source")
@@ -846,10 +884,11 @@ def _build_join_sql(
                 source = table_name_lookup.get(source, source)
                 destination = table_name_lookup.get(destination, destination)
 
-            if not base_table:
+            if not base_relation:
                 base_table = source
-                clauses.append(base_table)
-                joined.add(base_table)
+                base_relation = source
+                clauses.append(base_relation)
+                joined.add(base_relation)
 
             if source in joined and destination not in joined:
                 right = destination
@@ -878,13 +917,15 @@ def _build_join_sql(
             return None, None
 
         if len(clauses) == 1:
-            return None, clauses[0]
+            # Single base table, no joins: `model.table` is the real table name,
+            # not an alias (no subquery wraps it, so there is nothing to alias).
+            return None, base_table
 
         sql = "SELECT * FROM " + clauses[0]
         for clause in clauses[1:]:
             sql += f"\n{clause}"
 
-        return sql, base_table
+        return sql, base_relation
 
     def _parse_join_relationships(
         self,
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index c0294cd6..0b6004f0 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -888,6 +888,86 @@ def test_thoughtspot_role_playing_joins_stay_distinct():
     assert rows == [("US", "CA", 100.0)]
 
 
+def test_thoughtspot_unqualified_formula_ref_is_queryable():
+    """A joined-model formula using an unqualified column ref stays queryable.
+
+    Regression: ThoughtSpot formulas often use the unqualified reference form,
+    e.g. `[gross_revenue] - [sales::discount]`. The derived projection exposed
+    `sales.gross_revenue AS sales__gross_revenue`, but the metric SQL kept the
+    bare `gross_revenue`, so a normal query failed with
+    `Referenced column "gross_revenue" not found`. The bare ref must resolve to
+    the projected output alias.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_unqualified_formula.model.tml")
+    model = graph.models["sales_model"]
+
+    # The unqualified `gross_revenue` ref resolves to the projected output alias,
+    # and the qualified `sales::discount` ref to its alias.
+    net_revenue = model.get_metric("net_revenue")
+    assert net_revenue is not None
+    assert net_revenue.sql == "sales__gross_revenue - sales__discount"
+
+    layer = SemanticLayer()
+    for m in graph.models.values():
+        layer.add_model(m)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE sales (id INT, gross_revenue DOUBLE, discount DOUBLE, customer_id INT)")
+    con.execute("INSERT INTO sales VALUES (1, 100.0, 10.0, 5), (2, 50.0, 5.0, 5)")
+    con.execute("CREATE TABLE customers (id INT, name VARCHAR)")
+    con.execute("INSERT INTO customers VALUES (5, 'Acme')")
+
+    sql = layer.compile(
+        metrics=["sales_model.gross_revenue", "sales_model.net_revenue"],
+        dimensions=["sales_model.customer_name"],
+    )
+    rows = con.execute(sql).fetchall()
+    # gross_revenue = 150, net_revenue = (100-10) + (50-5) = 135
+    assert rows == [("Acme", 150.0, 135.0)]
+
+
+def test_thoughtspot_aliased_base_table_is_queryable():
+    """A joined model whose base/source table is aliased stays queryable.
+
+    Regression: aliases were applied only to the joined `right` relation, so a
+    base table declared as `name: orders, alias: o` (with `column_id: o::amount`
+    and an `on` clause using `[o::id]`) emitted `FROM orders` while the columns
+    referenced `o.*`, failing with `Referenced table "o" not found`.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_base_alias.model.tml")
+    model = graph.models["orders_model"]
+
+    # The base table is aliased in the FROM clause so the `o` qualifier resolves.
+    assert model.sql is not None
+    assert "FROM orders AS o" in model.sql
+    # Base-table columns are projected under stable aliases keyed on the alias.
+    assert model.get_dimension("order_id").sql == "o__id"
+    assert model.get_metric("amount").sql == "o__amount"
+
+    layer = SemanticLayer()
+    for m in graph.models.values():
+        layer.add_model(m)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE orders (id INT, amount DOUBLE, customer_id INT)")
+    con.execute("INSERT INTO orders VALUES (1, 100.0, 5), (2, 25.0, 5)")
+    con.execute("CREATE TABLE customers (id INT, name VARCHAR)")
+    con.execute("INSERT INTO customers VALUES (5, 'Acme')")
+
+    sql = layer.compile(
+        metrics=["orders_model.amount"],
+        dimensions=["orders_model.customer_name"],
+    )
+    rows = con.execute(sql).fetchall()
+    assert rows == [("Acme", 125.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_base_alias.model.tml b/tests/fixtures/thoughtspot/model_base_alias.model.tml
new file mode 100644
index 00000000..c1478e78
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_base_alias.model.tml
@@ -0,0 +1,30 @@
+guid: "model-base-alias"
+export_schema_version: "2"
+model:
+  name: orders_model
+  description: Joined model whose base/source table carries an alias (orders AS o)
+  model_tables:
+    - name: orders
+      alias: o
+      fqn: ANALYTICS.PUBLIC.orders
+      joins:
+        - with: customers
+          on: "[o::customer_id] = [customers::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+  columns:
+    - name: order_id
+      column_id: o::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: customer_name
+      column_id: customers::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: o::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
diff --git a/tests/fixtures/thoughtspot/model_unqualified_formula.model.tml b/tests/fixtures/thoughtspot/model_unqualified_formula.model.tml
new file mode 100644
index 00000000..bf0c4e45
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_unqualified_formula.model.tml
@@ -0,0 +1,38 @@
+guid: "model-unqualified-formula"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: Joined model whose formula uses an unqualified column reference
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+      joins:
+        - with: customers
+          on: "[sales::customer_id] = [customers::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+  formulas:
+    - name: net_revenue
+      expr: "[gross_revenue] - [sales::discount]"
+      id: net_rev
+  columns:
+    - name: order_id
+      column_id: sales::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: customer_name
+      column_id: customers::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: gross_revenue
+      column_id: sales::gross_revenue
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+    - name: net_revenue
+      formula_id: net_rev
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From 14a9a58219c8aef97dfaae5716fa7c7260a5efd6 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 09:10:52 -0700
Subject: [PATCH 05/17] fix(thoughtspot): preserve string literals and make
 single aliased tables queryable

---
 sidemantic/adapters/thoughtspot.py            | 36 +++++++++++++---
 tests/adapters/thoughtspot/test_parsing.py    | 42 ++++++++++++++++++-
 .../model_single_table_alias.model.tml        | 19 +++++++++
 .../model_unqualified_formula.model.tml       | 11 +++++
 4 files changed, 101 insertions(+), 7 deletions(-)
 create mode 100644 tests/fixtures/thoughtspot/model_single_table_alias.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index ceeef7a1..3b853ba0 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -57,6 +57,25 @@
 # A bare identifier that is NOT part of a `table.column` qualifier and is NOT a
 # function call: not preceded by `.`/word char, not followed by `.` or `(`.
 _BARE_IDENTIFIER = re.compile(r"(?<![\w.])([A-Za-z_][A-Za-z0-9_]*)(?![\w.])(?!\s*\()")
+# A quoted string literal (single or double quoted, with doubled-quote escapes).
+_STRING_LITERAL = re.compile(r"'(?:[^']|'')*'|\"(?:[^\"]|\"\")*\"")
+
+
+def _sub_outside_strings(pattern: re.Pattern[str], repl: Any, text: str) -> str:
+    """Apply ``pattern.sub(repl, ...)`` only to regions outside quoted literals.
+
+    Column-reference rewriting must not touch string literals, otherwise a
+    formula like ``[status] = 'status'`` would rewrite the literal too and change
+    the predicate's meaning.
+    """
+    result: list[str] = []
+    last = 0
+    for m in _STRING_LITERAL.finditer(text):
+        result.append(pattern.sub(repl, text[last : m.start()]))
+        result.append(m.group(0))
+        last = m.end()
+    result.append(pattern.sub(repl, text[last:]))
+    return "".join(result)
 
 
 def _normalize(value: Any) -> str | None:
@@ -261,12 +280,13 @@ def _replace(match: re.Match[str]) -> str:
             alias = projection.get((table, column))
             return alias if alias else match.group(0)
 
-        rewritten = _TML_DOT_REF.sub(_replace, expr)
-
         def _replace_bare(match: re.Match[str]) -> str:
             return bare_to_alias.get(match.group(1), match.group(0))
 
-        return _BARE_IDENTIFIER.sub(_replace_bare, rewritten)
+        # Rewrite column references only outside quoted string literals so a
+        # literal that happens to match a column name is left untouched.
+        rewritten = _sub_outside_strings(_TML_DOT_REF, _replace, expr)
+        return _sub_outside_strings(_BARE_IDENTIFIER, _replace_bare, rewritten)
 
     for dim in dimensions:
         dim.sql = _rewrite(dim.sql)
@@ -917,8 +937,14 @@ def _build_join_sql(
             return None, None
 
         if len(clauses) == 1:
-            # Single base table, no joins: `model.table` is the real table name,
-            # not an alias (no subquery wraps it, so there is nothing to alias).
+            if base_relation != base_table:
+                # Single but aliased base table (e.g. `name: orders, alias: o`
+                # with `column_id: o::amount`): the fields reference `o.*`, so the
+                # alias must be in scope. Emit `SELECT * FROM orders AS o`, which
+                # `_expose_joined_columns` rewrites into queryable output aliases.
+                return f"SELECT * FROM {clauses[0]}", base_relation
+            # Single, unaliased base table: `model.table` is the real table name
+            # (no subquery wraps it, so there is nothing to alias).
             return None, base_table
 
         sql = "SELECT * FROM " + clauses[0]
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index 0b6004f0..1ba74048 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -910,13 +910,19 @@ def test_thoughtspot_unqualified_formula_ref_is_queryable():
     assert net_revenue is not None
     assert net_revenue.sql == "sales__gross_revenue - sales__discount"
 
+    # A string literal that matches a column name (`'status'`) must NOT be
+    # rewritten; only the bare column reference `status` is qualified.
+    is_open = model.get_dimension("is_open")
+    assert is_open is not None
+    assert is_open.sql == "sales__status = 'status'"
+
     layer = SemanticLayer()
     for m in graph.models.values():
         layer.add_model(m)
 
     con = duckdb.connect()
-    con.execute("CREATE TABLE sales (id INT, gross_revenue DOUBLE, discount DOUBLE, customer_id INT)")
-    con.execute("INSERT INTO sales VALUES (1, 100.0, 10.0, 5), (2, 50.0, 5.0, 5)")
+    con.execute("CREATE TABLE sales (id INT, gross_revenue DOUBLE, discount DOUBLE, customer_id INT, status VARCHAR)")
+    con.execute("INSERT INTO sales VALUES (1, 100.0, 10.0, 5, 'status'), (2, 50.0, 5.0, 5, 'closed')")
     con.execute("CREATE TABLE customers (id INT, name VARCHAR)")
     con.execute("INSERT INTO customers VALUES (5, 'Acme')")
 
@@ -968,6 +974,38 @@ def test_thoughtspot_aliased_base_table_is_queryable():
     assert rows == [("Acme", 125.0)]
 
 
+def test_thoughtspot_single_aliased_table_is_queryable():
+    """A single (join-less) aliased table stays queryable.
+
+    Regression: `model_tables` with one entry `name: orders, alias: o` and
+    columns like `o::amount` converted fields to `o.amount` but emitted
+    `table=orders` with no SQL, so the compiled query selected `o.amount` from
+    `orders` with no `o` alias in scope. The base table must be aliased.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_single_table_alias.model.tml")
+    model = graph.models["orders_model"]
+
+    # The alias is in scope: SQL wraps the single table as `orders AS o`.
+    assert model.sql is not None
+    assert "FROM orders AS o" in model.sql
+    assert model.get_metric("amount").sql == "o__amount"
+
+    layer = SemanticLayer()
+    for m in graph.models.values():
+        layer.add_model(m)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE orders (id INT, amount DOUBLE)")
+    con.execute("INSERT INTO orders VALUES (1, 100.0), (2, 25.0)")
+
+    sql = layer.compile(metrics=["orders_model.amount"], dimensions=["orders_model.order_id"])
+    rows = sorted(con.execute(sql).fetchall())
+    assert rows == [(1, 100.0), (2, 25.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_single_table_alias.model.tml b/tests/fixtures/thoughtspot/model_single_table_alias.model.tml
new file mode 100644
index 00000000..0eecd689
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_single_table_alias.model.tml
@@ -0,0 +1,19 @@
+guid: "model-single-table-alias"
+export_schema_version: "2"
+model:
+  name: orders_model
+  description: Single aliased table (orders AS o) with no joins
+  model_tables:
+    - name: orders
+      alias: o
+      fqn: ANALYTICS.PUBLIC.orders
+  columns:
+    - name: order_id
+      column_id: o::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: o::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
diff --git a/tests/fixtures/thoughtspot/model_unqualified_formula.model.tml b/tests/fixtures/thoughtspot/model_unqualified_formula.model.tml
index bf0c4e45..43d3f689 100644
--- a/tests/fixtures/thoughtspot/model_unqualified_formula.model.tml
+++ b/tests/fixtures/thoughtspot/model_unqualified_formula.model.tml
@@ -17,6 +17,9 @@ model:
     - name: net_revenue
       expr: "[gross_revenue] - [sales::discount]"
       id: net_rev
+    - name: is_open
+      expr: "[status] = 'status'"
+      id: is_open_f
   columns:
     - name: order_id
       column_id: sales::id
@@ -26,6 +29,14 @@ model:
       column_id: customers::name
       properties:
         column_type: ATTRIBUTE
+    - name: status
+      column_id: sales::status
+      properties:
+        column_type: ATTRIBUTE
+    - name: is_open
+      formula_id: is_open_f
+      properties:
+        column_type: ATTRIBUTE
     - name: gross_revenue
       column_id: sales::gross_revenue
       properties:

From f1679a77bad4f8a9d44f095a38f9f33b7f181554 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 11:13:12 -0700
Subject: [PATCH 06/17] fix(thoughtspot): map bare formula refs by column name
 and project relationship keys

---
 sidemantic/adapters/thoughtspot.py            | 81 +++++++++++++++--
 tests/adapters/thoughtspot/test_parsing.py    | 89 +++++++++++++++++++
 .../model_relationship_fk.model.tml           | 29 ++++++
 .../model_renamed_formula.model.tml           | 39 ++++++++
 4 files changed, 229 insertions(+), 9 deletions(-)
 create mode 100644 tests/fixtures/thoughtspot/model_relationship_fk.model.tml
 create mode 100644 tests/fixtures/thoughtspot/model_renamed_formula.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index 3b853ba0..2340e772 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -209,6 +209,23 @@ def _simple_column(sql: str | None, fallback: str | None) -> str | None:
     return fallback
 
 
+def _simple_table_column(sql: str | None, tables: set[str]) -> tuple[str, str] | None:
+    """Return the ``(table, column)`` pair if ``sql`` is exactly ``table.column``.
+
+    Only matches when ``table`` is one of the joined ``tables`` so the reference
+    corresponds to a column projected by the derived subquery.
+    """
+    if not sql:
+        return None
+    match = _TML_DOT_REF.fullmatch(sql.strip())
+    if not match:
+        return None
+    table, column = match.group(1), match.group(2)
+    if table in tables:
+        return (table, column)
+    return None
+
+
 def _expose_joined_columns(
     sql: str | None,
     tables: set[str],
@@ -216,6 +233,7 @@ def _expose_joined_columns(
     metrics: list[Metric],
     base_table: str | None = None,
     primary_key: str | None = None,
+    foreign_keys: dict[str, tuple[str, str]] | None = None,
 ) -> str | None:
     """Rewrite a joined model's derived SQL so its columns are queryable.
 
@@ -231,6 +249,12 @@ def _expose_joined_columns(
     The model's ``primary_key`` is also passed through by the SQL generator as a
     bare column, so the base table's primary key is exposed under that name when
     no model column already projects it.
+
+    ``foreign_keys`` maps each relationship join key (the bare column name the
+    SQL generator selects when this model participates in a cross-model join) to
+    the ``(table, column)`` that backs it. Each one is projected under its bare
+    name when no model column already exposes it, so the derived subquery stays
+    joinable.
     """
     if not sql or "SELECT * FROM " not in sql:
         return sql
@@ -262,13 +286,27 @@ def _collect(expr: str | None) -> None:
     # uses the in-scope output alias instead of an out-of-scope bare column.
     bare_to_alias: dict[str, str] = {}
     ambiguous: set[str] = set()
-    for (_table, column), alias in projection.items():
-        if column in bare_to_alias and bare_to_alias[column] != alias:
-            ambiguous.add(column)
+
+    def _record_bare(name: str, alias: str) -> None:
+        if name in bare_to_alias and bare_to_alias[name] != alias:
+            ambiguous.add(name)
         else:
-            bare_to_alias[column] = alias
-    for column in ambiguous:
-        bare_to_alias.pop(column, None)
+            bare_to_alias[name] = alias
+
+    for (_table, column), alias in projection.items():
+        _record_bare(column, alias)
+    # Formulas can also reference another TML column by its model name even when
+    # that name differs from the backing DB column (e.g. a column `gross_revenue`
+    # mapped to `column_id: sales::gross_amt`, with formula `[gross_revenue] -
+    # [discount]`). The projection aliases the DB column (`sales__gross_amt`), so
+    # also map the model column name to that alias; otherwise the bare model name
+    # stays out of scope and the query fails.
+    for field in (*dimensions, *metrics):
+        ref = _simple_table_column(field.sql, tables)
+        if ref:
+            _record_bare(field.name, projection[ref])
+    for name in ambiguous:
+        bare_to_alias.pop(name, None)
 
     def _rewrite(expr: str | None) -> str | None:
         if not expr:
@@ -295,12 +333,25 @@ def _replace_bare(match: re.Match[str]) -> str:
 
     select_parts = [f"{table}.{column} AS {alias}" for (table, column), alias in projection.items()]
 
+    # Track which bare output names already exist so pass-through keys are not
+    # projected twice.
+    exposed: set[str] = set(projection.values())
+
     # The SQL generator passes through `model.primary_key` as a bare column when
     # querying derived models. Expose `<base_table>.<primary_key>` under that
     # name so the key resolves instead of referencing an out-of-scope column.
-    aliases = set(projection.values())
-    if base_table and primary_key and primary_key not in aliases:
+    if base_table and primary_key and primary_key not in exposed:
         select_parts.append(f"{base_table}.{primary_key} AS {primary_key}")
+        exposed.add(primary_key)
+
+    # Relationship foreign keys are also passed through as bare columns when this
+    # model is joined to a separately loaded related model. A foreign key that is
+    # not already projected by a dimension/metric (or the primary key) would be
+    # missing from the subquery, so expose it from its backing table.
+    for fk, (fk_table, fk_column) in sorted((foreign_keys or {}).items()):
+        if fk and fk not in exposed and fk_table in tables:
+            select_parts.append(f"{fk_table}.{fk_column} AS {fk}")
+            exposed.add(fk)
 
     select_list = ", ".join(select_parts)
     return sql.replace("SELECT * FROM ", f"SELECT {select_list} FROM ", 1)
@@ -767,7 +818,19 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
                 known_tables.add(join_def.get("source"))
                 known_tables.add(join_def.get("destination"))
             known_tables.discard(None)
-            sql = _expose_joined_columns(sql, known_tables, dimensions, metrics, base_table, primary_key)
+
+            # Resolve each relationship's foreign key to the `(table, column)` it
+            # comes from in the join `on` clauses, so the derived projection can
+            # expose those join keys for cross-model queries.
+            fk_refs: dict[str, tuple[str, str]] = {}
+            fk_names = {rel.foreign_key for rel in relationships if rel.foreign_key}
+            for join_def in flat_joins:
+                left, right = _extract_join_refs(join_def.get("on"), path_lookup)
+                for ref in (left, right):
+                    if ref and ref[1] in fk_names and ref[0] in known_tables:
+                        fk_refs.setdefault(ref[1], ref)
+
+            sql = _expose_joined_columns(sql, known_tables, dimensions, metrics, base_table, primary_key, fk_refs)
 
         default_time_dimension = None
         default_grain = None
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index 1ba74048..b78b22c9 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -1006,6 +1006,95 @@ def test_thoughtspot_single_aliased_table_is_queryable():
     assert rows == [(1, 100.0), (2, 25.0)]
 
 
+def test_thoughtspot_renamed_column_formula_ref_is_queryable():
+    """A formula referencing a TML column whose backing DB column differs stays queryable.
+
+    Regression: a column `gross_revenue` mapped to `column_id: sales::gross_amt`
+    is projected as `sales.gross_amt AS sales__gross_amt`, but the bare-reference
+    map was keyed only on DB column names. A formula `[gross_revenue] - [discount]`
+    kept the bare model names `gross_revenue`/`discount`, which are out of scope in
+    the derived subquery, so a normal query failed. The bare model names must
+    resolve to their projected output aliases.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_renamed_formula.model.tml")
+    model = graph.models["sales_model"]
+
+    # The formula's bare model-name refs resolve to the projected DB-column aliases.
+    net_revenue = model.get_metric("net_revenue")
+    assert net_revenue is not None
+    assert net_revenue.sql == "sales__gross_amt - sales__disc_amt"
+
+    layer = SemanticLayer()
+    for m in graph.models.values():
+        layer.add_model(m)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE sales (id INT, gross_amt DOUBLE, disc_amt DOUBLE, customer_id INT)")
+    con.execute("INSERT INTO sales VALUES (1, 100.0, 10.0, 5), (2, 50.0, 5.0, 5)")
+    con.execute("CREATE TABLE customers (id INT, name VARCHAR)")
+    con.execute("INSERT INTO customers VALUES (5, 'Acme')")
+
+    sql = layer.compile(
+        metrics=["sales_model.gross_revenue", "sales_model.net_revenue"],
+        dimensions=["sales_model.customer_name"],
+    )
+    rows = con.execute(sql).fetchall()
+    # gross_revenue = 150, net_revenue = (100-10) + (50-5) = 135
+    assert rows == [("Acme", 150.0, 135.0)]
+
+
+def test_thoughtspot_relationship_fk_is_projected_for_cross_model_query():
+    """A relationship foreign key is exposed even when it is not also a column.
+
+    Regression: the derived projection only emitted columns referenced by
+    dimensions/metrics plus the primary key. When this model joined a separately
+    loaded related model, the SQL generator selected the relationship's foreign
+    key (e.g. `region_id`) as a bare column from the derived subquery, but it was
+    never projected, so the cross-model query failed with "Column region_id ...
+    cannot be referenced before it is defined".
+    """
+    import duckdb
+
+    from sidemantic.core.dimension import Dimension
+    from sidemantic.core.model import Model
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_relationship_fk.model.tml")
+    sales_model = graph.models["sales_model"]
+
+    # The foreign key is projected from the base table under its bare name even
+    # though no dimension/metric references it.
+    assert sales_model.sql is not None
+    assert "sales.region_id AS region_id" in sales_model.sql
+    rel = {r.name: r for r in sales_model.relationships}["regions"]
+    assert rel.foreign_key == "region_id"
+
+    # A separately loaded `regions` model joined via the parsed relationship.
+    regions = Model(
+        name="regions",
+        table="regions",
+        primary_key="id",
+        dimensions=[Dimension(name="name", type="categorical", sql="name")],
+    )
+
+    layer = SemanticLayer()
+    layer.add_model(sales_model)
+    layer.add_model(regions)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE sales (id INT, amount DOUBLE, region_id INT)")
+    con.execute("INSERT INTO sales VALUES (1, 100.0, 7), (2, 25.0, 7)")
+    con.execute("CREATE TABLE regions (id INT, name VARCHAR)")
+    con.execute("INSERT INTO regions VALUES (7, 'West')")
+
+    sql = layer.compile(metrics=["sales_model.amount"], dimensions=["regions.name"])
+    rows = con.execute(sql).fetchall()
+    assert rows == [("West", 125.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_relationship_fk.model.tml b/tests/fixtures/thoughtspot/model_relationship_fk.model.tml
new file mode 100644
index 00000000..e30681e9
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_relationship_fk.model.tml
@@ -0,0 +1,29 @@
+guid: "model-relationship-fk"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: Joined model whose relationship foreign key is not also a column
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+      joins:
+        - with: regions
+          on: "[sales::region_id] = [regions::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: regions
+      fqn: ANALYTICS.PUBLIC.regions
+  columns:
+    - name: order_id
+      column_id: sales::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: region_name
+      column_id: regions::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: sales::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
diff --git a/tests/fixtures/thoughtspot/model_renamed_formula.model.tml b/tests/fixtures/thoughtspot/model_renamed_formula.model.tml
new file mode 100644
index 00000000..ba3c3fcb
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_renamed_formula.model.tml
@@ -0,0 +1,39 @@
+guid: "model-renamed-formula"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: Formula references a TML column whose backing DB column name differs
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+      joins:
+        - with: customers
+          on: "[sales::customer_id] = [customers::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+  formulas:
+    - name: net_revenue
+      expr: "[gross_revenue] - [discount]"
+      id: net_rev
+  columns:
+    - name: customer_name
+      column_id: customers::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: gross_revenue
+      column_id: sales::gross_amt
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+    - name: discount
+      column_id: sales::disc_amt
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+    - name: net_revenue
+      formula_id: net_rev
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From ae95a8f56fbfddd73e87aafb0d8f795213f31afa Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 11:32:36 -0700
Subject: [PATCH 07/17] fix(thoughtspot): resolve single-table formula refs and
 one-to-many join direction

---
 sidemantic/adapters/thoughtspot.py            | 48 ++++++++++++
 tests/adapters/thoughtspot/test_parsing.py    | 76 +++++++++++++++++++
 .../thoughtspot/model_one_to_many.model.tml   | 29 +++++++
 ...del_single_table_renamed_formula.model.tml | 32 ++++++++
 4 files changed, 185 insertions(+)
 create mode 100644 tests/fixtures/thoughtspot/model_one_to_many.model.tml
 create mode 100644 tests/fixtures/thoughtspot/model_single_table_renamed_formula.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index 2340e772..becf1a83 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -357,6 +357,37 @@ def _replace_bare(match: re.Match[str]) -> str:
     return sql.replace("SELECT * FROM ", f"SELECT {select_list} FROM ", 1)
 
 
+def _resolve_bare_refs_to_db_columns(dimensions: list[Dimension], metrics: list[Metric]) -> None:
+    """Rewrite formula bare model-name refs to their backing DB columns in place.
+
+    For a join-less model the SQL generator queries the base table directly, so a
+    formula that references another TML column by its model name only works when
+    that name equals the backing DB column. Build a map of model name -> DB column
+    from the non-formula fields (whose SQL is a plain ``column`` or
+    ``table.column``) and rewrite each formula's bare references so they target
+    the real column. Only names that differ from their DB column are rewritten;
+    string literals are left untouched.
+    """
+    name_to_db_col: dict[str, str] = {}
+    for field in (*dimensions, *metrics):
+        sql = field.sql
+        if not sql:
+            continue
+        _table, column = _split_sql_identifier(sql)
+        if column and column != field.name:
+            name_to_db_col[field.name] = column
+
+    if not name_to_db_col:
+        return
+
+    def _replace_bare(match: re.Match[str]) -> str:
+        return name_to_db_col.get(match.group(1), match.group(0))
+
+    for field in (*dimensions, *metrics):
+        if field.sql:
+            field.sql = _sub_outside_strings(_BARE_IDENTIFIER, _replace_bare, field.sql)
+
+
 class ThoughtSpotAdapter(BaseAdapter):
     """Adapter for ThoughtSpot TML (YAML) tables and worksheets."""
 
@@ -831,6 +862,15 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
                         fk_refs.setdefault(ref[1], ref)
 
             sql = _expose_joined_columns(sql, known_tables, dimensions, metrics, base_table, primary_key, fk_refs)
+        else:
+            # Single-table (join-less) model: no derived subquery wraps it, so the
+            # `_expose_joined_columns` rewrite never runs. A formula that refers to
+            # another TML column by its model name (e.g. column `gross_revenue`
+            # mapped from `sales::gross_amt`, formula `[gross_revenue] -
+            # [discount]`) keeps the bare model name, which is not a real column on
+            # the base table. Rewrite those bare refs to the backing DB column so
+            # the query stays valid.
+            _resolve_bare_refs_to_db_columns(dimensions, metrics)
 
         default_time_dimension = None
         default_grain = None
@@ -912,6 +952,14 @@ def _parse_model_relationships(
             if join_type in {"RIGHT_OUTER", "FULL_OUTER", "OUTER"} and cardinality not in _CARDINALITY_MAP:
                 rel_type = "many_to_many"
 
+            # The keys above follow the `many_to_one` convention: `foreign_key` on
+            # the source (local) side, `primary_key` on the destination (related)
+            # side. For a `one_to_many` relationship the related model holds the
+            # foreign key and the local model holds the primary key, so swap them
+            # to match how Sidemantic interprets `one_to_many` keys.
+            if rel_type == "one_to_many":
+                foreign_key, primary_key = primary_key, foreign_key
+
             relationships.append(
                 Relationship(
                     name=destination,
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index b78b22c9..c2c03d34 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -1095,6 +1095,82 @@ def test_thoughtspot_relationship_fk_is_projected_for_cross_model_query():
     assert rows == [("West", 125.0)]
 
 
+def test_thoughtspot_single_table_renamed_formula_ref_is_queryable():
+    """A join-less model's formula referencing a renamed column stays queryable.
+
+    Regression: the renamed-column rewrite only ran on the derived (joined) SQL
+    path. For a single-table model (`sql is None`), a formula like
+    `[gross_revenue] - [discount]` kept the bare model names, so the compiled
+    `SELECT gross_revenue - discount FROM sales` failed because the table only has
+    the backing columns `gross_amt`/`disc_amt`.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_single_table_renamed_formula.model.tml")
+    model = graph.models["sales_model"]
+
+    # No join, so the model queries the base table directly; the formula's bare
+    # model-name refs are rewritten to the backing DB columns.
+    assert model.sql is None
+    assert model.table == "sales"
+    assert model.get_metric("net_revenue").sql == "gross_amt - disc_amt"
+
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE sales (id INT, gross_amt DOUBLE, disc_amt DOUBLE)")
+    con.execute("INSERT INTO sales VALUES (1, 100.0, 10.0), (2, 50.0, 5.0)")
+
+    sql = layer.compile(
+        metrics=["sales_model.gross_revenue", "sales_model.net_revenue"],
+        dimensions=["sales_model.order_id"],
+    )
+    rows = sorted(con.execute(sql).fetchall())
+    # net_revenue = gross_amt - disc_amt
+    assert rows == [(1, 100.0, 90.0), (2, 50.0, 45.0)]
+
+
+def test_thoughtspot_one_to_many_join_keys_match_direction():
+    """A `one_to_many` join maps the foreign key to the related (child) model.
+
+    Regression: keys were assigned with the `many_to_one` convention (foreign key
+    on the local/source side). For a `one_to_many` join `[customers::id] =
+    [orders::customer_id]`, Sidemantic expects `foreign_key` on the related model
+    (`orders.customer_id`) and `primary_key` on the local model (`customers.id`),
+    so cross-model queries joined the wrong columns before this fix.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_one_to_many.model.tml")
+    model = graph.models["customers_model"]
+
+    rel = {r.name: r for r in model.relationships}["orders"]
+    assert rel.type == "one_to_many"
+    # Related (child) model holds the FK; local model holds the PK.
+    assert rel.foreign_key == "customer_id"
+    assert rel.primary_key == "id"
+
+    layer = SemanticLayer()
+    for m in graph.models.values():
+        layer.add_model(m)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE customers (id INT, name VARCHAR)")
+    con.execute("INSERT INTO customers VALUES (5, 'Acme')")
+    con.execute("CREATE TABLE orders (id INT, customer_id INT, amount DOUBLE)")
+    con.execute("INSERT INTO orders VALUES (1, 5, 100.0), (2, 5, 25.0)")
+
+    sql = layer.compile(
+        metrics=["customers_model.order_amount"],
+        dimensions=["customers_model.customer_name"],
+    )
+    rows = con.execute(sql).fetchall()
+    assert rows == [("Acme", 125.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_one_to_many.model.tml b/tests/fixtures/thoughtspot/model_one_to_many.model.tml
new file mode 100644
index 00000000..34d56ab9
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_one_to_many.model.tml
@@ -0,0 +1,29 @@
+guid: "model-one-to-many"
+export_schema_version: "2"
+model:
+  name: customers_model
+  description: One-to-many parent-to-child join (FK lives on the related model)
+  model_tables:
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+      joins:
+        - with: orders
+          on: "[customers::id] = [orders::customer_id]"
+          type: LEFT_OUTER
+          cardinality: ONE_TO_MANY
+    - name: orders
+      fqn: ANALYTICS.PUBLIC.orders
+  columns:
+    - name: customer_id
+      column_id: customers::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: customer_name
+      column_id: customers::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: order_amount
+      column_id: orders::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
diff --git a/tests/fixtures/thoughtspot/model_single_table_renamed_formula.model.tml b/tests/fixtures/thoughtspot/model_single_table_renamed_formula.model.tml
new file mode 100644
index 00000000..97467a0d
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_single_table_renamed_formula.model.tml
@@ -0,0 +1,32 @@
+guid: "model-single-table-renamed-formula"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: Single-table model whose formula references renamed columns
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+  formulas:
+    - name: net_revenue
+      expr: "[gross_revenue] - [discount]"
+      id: net_rev
+  columns:
+    - name: order_id
+      column_id: sales::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: gross_revenue
+      column_id: sales::gross_amt
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+    - name: discount
+      column_id: sales::disc_amt
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+    - name: net_revenue
+      formula_id: net_rev
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From 8829f2f22467daa28dd6403b43e32d7d3006790c Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 11:49:56 -0700
Subject: [PATCH 08/17] fix(thoughtspot): correct one-to-one join key direction
 and project relationship keys

---
 sidemantic/adapters/thoughtspot.py            | 26 ++++++---
 tests/adapters/thoughtspot/test_parsing.py    | 56 ++++++++++++++++++-
 .../thoughtspot/model_one_to_one.model.tml    | 29 ++++++++++
 3 files changed, 99 insertions(+), 12 deletions(-)
 create mode 100644 tests/fixtures/thoughtspot/model_one_to_one.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index becf1a83..b5cbba8f 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -850,15 +850,23 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
                 known_tables.add(join_def.get("destination"))
             known_tables.discard(None)
 
-            # Resolve each relationship's foreign key to the `(table, column)` it
-            # comes from in the join `on` clauses, so the derived projection can
-            # expose those join keys for cross-model queries.
+            # Resolve each relationship's join keys to the `(table, column)` they
+            # come from in the join `on` clauses, so the derived projection can
+            # expose them for cross-model queries. The SQL generator passes through
+            # the foreign key (many_to_one) or the local primary key (one_to_one/
+            # one_to_many) as bare columns from this derived subquery, so cover
+            # both sides.
             fk_refs: dict[str, tuple[str, str]] = {}
-            fk_names = {rel.foreign_key for rel in relationships if rel.foreign_key}
+            key_names: set[str] = set()
+            for rel in relationships:
+                if rel.foreign_key:
+                    key_names.add(rel.foreign_key)
+                if rel.primary_key:
+                    key_names.add(rel.primary_key)
             for join_def in flat_joins:
                 left, right = _extract_join_refs(join_def.get("on"), path_lookup)
                 for ref in (left, right):
-                    if ref and ref[1] in fk_names and ref[0] in known_tables:
+                    if ref and ref[1] in key_names and ref[0] in known_tables:
                         fk_refs.setdefault(ref[1], ref)
 
             sql = _expose_joined_columns(sql, known_tables, dimensions, metrics, base_table, primary_key, fk_refs)
@@ -954,10 +962,10 @@ def _parse_model_relationships(
 
             # The keys above follow the `many_to_one` convention: `foreign_key` on
             # the source (local) side, `primary_key` on the destination (related)
-            # side. For a `one_to_many` relationship the related model holds the
-            # foreign key and the local model holds the primary key, so swap them
-            # to match how Sidemantic interprets `one_to_many` keys.
-            if rel_type == "one_to_many":
+            # side. Sidemantic treats `one_to_many` and `one_to_one` as edges
+            # where the related model owns the `foreign_key` and the local model
+            # owns the `primary_key`, so swap them to match that key direction.
+            if rel_type in ("one_to_many", "one_to_one"):
                 foreign_key, primary_key = primary_key, foreign_key
 
             relationships.append(
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index c2c03d34..a2f1133f 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -701,10 +701,12 @@ def test_import_thoughtspot_model():
     assert relationships["customers"].type == "many_to_one"
     assert relationships["customers"].foreign_key == "customer_id"
     assert relationships["customers"].primary_key == "id"
-    # cardinality: ONE_TO_ONE
+    # cardinality: ONE_TO_ONE. Sidemantic treats one_to_one (like one_to_many) as
+    # an edge where the related model owns the foreign key and the local model
+    # owns the primary key, so the parsed source-side FK/PK are stored swapped.
     assert relationships["regions"].type == "one_to_one"
-    assert relationships["regions"].foreign_key == "region_id"
-    assert relationships["regions"].primary_key == "id"
+    assert relationships["regions"].foreign_key == "id"
+    assert relationships["regions"].primary_key == "region_id"
 
     # A joined model becomes a derived table. Its `SELECT *` is rewritten into an
     # explicit projection that aliases each inner `table.column` to a stable,
@@ -1171,6 +1173,54 @@ def test_thoughtspot_one_to_many_join_keys_match_direction():
     assert rows == [("Acme", 125.0)]
 
 
+def test_thoughtspot_one_to_one_join_keys_match_direction():
+    """A `one_to_one` join with a source-side FK joins correctly cross-model.
+
+    Regression: `one_to_one` keys were stored in the `many_to_one` positions
+    (FK on the source/local side). Sidemantic reads `one_to_one` like a has-one
+    edge where the related model owns the foreign key, so a cross-model query with
+    a separately loaded `regions` model joined `regions.region_id` to
+    `sales_model.id` and failed instead of joining `regions.id` to
+    `sales_model.region_id`.
+    """
+    import duckdb
+
+    from sidemantic.core.dimension import Dimension
+    from sidemantic.core.model import Model
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_one_to_one.model.tml")
+    sales_model = graph.models["sales_model"]
+
+    rel = {r.name: r for r in sales_model.relationships}["regions"]
+    assert rel.type == "one_to_one"
+    # Related model owns the FK (regions.id), local model owns the PK (region_id).
+    assert rel.foreign_key == "id"
+    assert rel.primary_key == "region_id"
+
+    # A separately loaded `regions` model joined via the parsed relationship.
+    regions = Model(
+        name="regions",
+        table="regions",
+        primary_key="id",
+        dimensions=[Dimension(name="zone", type="categorical", sql="zone")],
+    )
+
+    layer = SemanticLayer()
+    layer.add_model(sales_model)
+    layer.add_model(regions)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE sales (id INT, amount DOUBLE, region_id INT)")
+    con.execute("INSERT INTO sales VALUES (1, 100.0, 7), (2, 25.0, 7)")
+    con.execute("CREATE TABLE regions (id INT, name VARCHAR, zone VARCHAR)")
+    con.execute("INSERT INTO regions VALUES (7, 'West', 'PACIFIC')")
+
+    sql = layer.compile(metrics=["sales_model.amount"], dimensions=["regions.zone"])
+    rows = con.execute(sql).fetchall()
+    assert rows == [("PACIFIC", 125.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_one_to_one.model.tml b/tests/fixtures/thoughtspot/model_one_to_one.model.tml
new file mode 100644
index 00000000..492fa89e
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_one_to_one.model.tml
@@ -0,0 +1,29 @@
+guid: "model-one-to-one"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: One-to-one join where the source side owns the foreign key
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+      joins:
+        - with: regions
+          on: "[sales::region_id] = [regions::id]"
+          type: INNER
+          cardinality: ONE_TO_ONE
+    - name: regions
+      fqn: ANALYTICS.PUBLIC.regions
+  columns:
+    - name: order_id
+      column_id: sales::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: region_name
+      column_id: regions::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: sales::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From c270e69c115b610a314aba9d069117240e67050b Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 12:08:10 -0700
Subject: [PATCH 09/17] fix(thoughtspot): preserve composite join keys and
 infer non-id primary keys

---
 sidemantic/adapters/thoughtspot.py            | 101 ++++++++++++++----
 tests/adapters/thoughtspot/test_parsing.py    |  88 ++++++++++++++-
 .../thoughtspot/model_composite_key.model.tml |  29 +++++
 .../thoughtspot/model_non_id_key.model.tml    |  29 +++++
 4 files changed, 222 insertions(+), 25 deletions(-)
 create mode 100644 tests/fixtures/thoughtspot/model_composite_key.model.tml
 create mode 100644 tests/fixtures/thoughtspot/model_non_id_key.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index b5cbba8f..f5b145d6 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -164,6 +164,35 @@ def _extract_join_refs(
     return left, right
 
 
+def _extract_all_join_refs(
+    expr: str | None, table_path_lookup: dict[str, str] | None = None
+) -> list[tuple[tuple[str | None, str], tuple[str | None, str]]]:
+    """Extract every ``left = right`` key pair from a (possibly composite) join.
+
+    A composite ON clause like ``[a::x] = [b::y] AND [a::p] = [b::q]`` yields all
+    consecutive ref pairs, so composite-key relationships keep both columns
+    instead of silently dropping all but the first pair.
+    """
+    if not expr:
+        return []
+
+    tokens = _TML_REF.findall(expr)
+    if len(tokens) < 2:
+        tokens = [f"{t[0]}.{t[1]}" for t in _TML_DOT_REF.findall(expr)]
+
+    pairs: list[tuple[tuple[str | None, str], tuple[str | None, str]]] = []
+    for i in range(0, len(tokens) - 1, 2):
+        left = _parse_ref_token(tokens[i])
+        right = _parse_ref_token(tokens[i + 1])
+        if table_path_lookup:
+            if left[0] in table_path_lookup:
+                left = (table_path_lookup[left[0]], left[1])
+            if right[0] in table_path_lookup:
+                right = (table_path_lookup[right[0]], right[1])
+        pairs.append((left, right))
+    return pairs
+
+
 def _split_sql_identifier(sql: str | None) -> tuple[str | None, str | None]:
     if not sql or not _SIMPLE_IDENTIFIER.match(sql):
         return None, None
@@ -832,9 +861,12 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
                 )
                 dimensions.append(dim)
 
-        primary_key = "id"
-        if any(d.name.lower() == "id" for d in dimensions):
-            primary_key = next(d.name for d in dimensions if d.name.lower() == "id")
+        # The SQL generator always selects `model.primary_key` from derived models
+        # as a bare column, so it must name a column that exists on the base
+        # table. Prefer a dimension named `id`; otherwise infer the key from a
+        # base-table column so a model whose key is not literally `id` (e.g.
+        # `order_key`) does not project a non-existent `id` column.
+        primary_key = self._infer_model_primary_key(dimensions, base_table)
 
         # A joined model is exported as derived SQL (FROM (<sql>) AS t); rewrite
         # its `SELECT *` into explicit aliased columns and update the dimension/
@@ -860,14 +892,14 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
             key_names: set[str] = set()
             for rel in relationships:
                 if rel.foreign_key:
-                    key_names.add(rel.foreign_key)
+                    key_names.update(rel.foreign_key_columns)
                 if rel.primary_key:
-                    key_names.add(rel.primary_key)
+                    key_names.update(rel.primary_key_columns)
             for join_def in flat_joins:
-                left, right = _extract_join_refs(join_def.get("on"), path_lookup)
-                for ref in (left, right):
-                    if ref and ref[1] in key_names and ref[0] in known_tables:
-                        fk_refs.setdefault(ref[1], ref)
+                for left, right in _extract_all_join_refs(join_def.get("on"), path_lookup):
+                    for ref in (left, right):
+                        if ref and ref[1] in key_names and ref[0] in known_tables:
+                            fk_refs.setdefault(ref[1], ref)
 
             sql = _expose_joined_columns(sql, known_tables, dimensions, metrics, base_table, primary_key, fk_refs)
         else:
@@ -907,6 +939,31 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
 
         return model
 
+    def _infer_model_primary_key(self, dimensions: list[Dimension], base_table: str | None) -> str:
+        """Infer a queryable primary key column for a TML Model.
+
+        Prefer a dimension literally named ``id``. Otherwise, if the base table is
+        known, keep ``id`` when a base-table column actually resolves to ``id``;
+        failing that, use the first base-table column so the key references a real
+        column. Fall back to ``id`` only when no better candidate exists.
+        """
+        for dim in dimensions:
+            if dim.name.lower() == "id":
+                return dim.name
+
+        if base_table:
+            base_columns: list[str] = []
+            for dim in dimensions:
+                table, column = _split_sql_identifier(dim.sql)
+                if column and (table is None or table == base_table):
+                    base_columns.append(column)
+            if "id" in base_columns:
+                return "id"
+            if base_columns:
+                return base_columns[0]
+
+        return "id"
+
     def _parse_model_relationships(
         self,
         joins: list[dict[str, Any]],
@@ -930,30 +987,34 @@ def _parse_model_relationships(
             join_type = _normalize(join_def.get("type")) or "INNER"
             on_value = join_def.get("on")
             lookup = table_path_lookup or table_name_lookup
-            left, right = _extract_join_refs(on_value, lookup)
+            # Composite predicates carry more than one key pair; keep all of them
+            # so cross-model joins do not silently drop part of the key.
+            ref_pairs = _extract_all_join_refs(on_value, lookup)
 
             if table_name_lookup:
                 source = table_name_lookup.get(source, source)
                 destination = table_name_lookup.get(destination, destination)
 
-            foreign_key = None
-            primary_key = None
-
-            if left and right:
+            foreign_keys: list[str] = []
+            primary_keys: list[str] = []
+            for left, right in ref_pairs:
                 left_table, left_col = left
                 right_table, right_col = right
 
                 if left_table == source and right_table == destination:
-                    foreign_key = left_col
-                    primary_key = right_col
+                    foreign_keys.append(left_col)
+                    primary_keys.append(right_col)
                 elif left_table == destination and right_table == source:
-                    foreign_key = right_col
-                    primary_key = left_col
+                    foreign_keys.append(right_col)
+                    primary_keys.append(left_col)
                 else:
                     if left_table == source:
-                        foreign_key = left_col
+                        foreign_keys.append(left_col)
                     if right_table == destination:
-                        primary_key = right_col
+                        primary_keys.append(right_col)
+
+            foreign_key = foreign_keys[0] if len(foreign_keys) == 1 else (foreign_keys or None)
+            primary_key = primary_keys[0] if len(primary_keys) == 1 else (primary_keys or None)
 
             cardinality = _normalize(join_def.get("cardinality"))
             rel_type = _CARDINALITY_MAP.get(cardinality or "", "many_to_one")
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index a2f1133f..5e98649c 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -701,12 +701,14 @@ def test_import_thoughtspot_model():
     assert relationships["customers"].type == "many_to_one"
     assert relationships["customers"].foreign_key == "customer_id"
     assert relationships["customers"].primary_key == "id"
-    # cardinality: ONE_TO_ONE. Sidemantic treats one_to_one (like one_to_many) as
-    # an edge where the related model owns the foreign key and the local model
-    # owns the primary key, so the parsed source-side FK/PK are stored swapped.
+    # cardinality: ONE_TO_ONE with a composite ON predicate. Sidemantic treats
+    # one_to_one (like one_to_many) as an edge where the related model owns the
+    # foreign key and the local model owns the primary key, so the parsed
+    # source-side FK/PK are stored swapped. Both key pairs from the composite
+    # predicate are preserved.
     assert relationships["regions"].type == "one_to_one"
-    assert relationships["regions"].foreign_key == "id"
-    assert relationships["regions"].primary_key == "region_id"
+    assert relationships["regions"].foreign_key == ["id", "country_code"]
+    assert relationships["regions"].primary_key == ["region_id", "country_code"]
 
     # A joined model becomes a derived table. Its `SELECT *` is rewritten into an
     # explicit projection that aliases each inner `table.column` to a stable,
@@ -1221,6 +1223,82 @@ def test_thoughtspot_one_to_one_join_keys_match_direction():
     assert rows == [("PACIFIC", 125.0)]
 
 
+def test_thoughtspot_non_id_primary_key_is_queryable():
+    """A joined model whose key is not literally `id` stays queryable.
+
+    Regression: `primary_key` defaulted to `id`, and the derived projection
+    injected `orders.id AS id`. The SQL generator always selects the primary key
+    from derived models, so even a basic aggregate failed with
+    `Table "orders" does not have a column named "id"`. The key must be inferred
+    from a real base-table column (here `order_key`).
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_non_id_key.model.tml")
+    model = graph.models["orders_model"]
+
+    # The primary key is inferred from a real base-table column, not the default.
+    assert model.primary_key == "order_key"
+    assert model.sql is not None
+    assert "orders.order_key AS order_key" in model.sql
+    assert "orders.id AS id" not in model.sql
+
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE orders (order_key INT, amount DOUBLE, customer_id INT)")
+    con.execute("INSERT INTO orders VALUES (1, 100.0, 5), (2, 25.0, 5)")
+    con.execute("CREATE TABLE customers (id INT, name VARCHAR)")
+    con.execute("INSERT INTO customers VALUES (5, 'Acme')")
+
+    sql = layer.compile(metrics=["orders_model.amount"], dimensions=["orders_model.customer_name"])
+    rows = con.execute(sql).fetchall()
+    assert rows == [("Acme", 125.0)]
+
+
+def test_thoughtspot_composite_join_keys_are_preserved():
+    """A composite-key join keeps every key pair instead of dropping all but one.
+
+    Regression: only the first `left = right` pair of a composite ON predicate was
+    stored, so `[sales::region_id] = [regions::id] AND [sales::country_code] =
+    [regions::country_code]` became `region_id -> id` only. Cross-model joins then
+    used a truncated key and could mis-join rows.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_composite_key.model.tml")
+    model = graph.models["sales_model"]
+
+    rel = {r.name: r for r in model.relationships}["regions"]
+    assert rel.type == "many_to_one"
+    # Both key pairs are preserved as composite lists.
+    assert rel.foreign_key == ["region_id", "country_code"]
+    assert rel.primary_key == ["id", "country_code"]
+
+    # Both join columns are projected by the derived subquery so a composite
+    # cross-model join stays in scope.
+    assert model.sql is not None
+    assert "sales.region_id AS region_id" in model.sql
+    assert "sales.country_code AS country_code" in model.sql
+
+    # The model still compiles and runs standalone.
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE sales (id INT, amount DOUBLE, region_id INT, country_code VARCHAR)")
+    con.execute("INSERT INTO sales VALUES (1, 100.0, 7, 'US'), (2, 25.0, 7, 'US')")
+    con.execute("CREATE TABLE regions (id INT, country_code VARCHAR, name VARCHAR)")
+    con.execute("INSERT INTO regions VALUES (7, 'US', 'West')")
+
+    sql = layer.compile(metrics=["sales_model.amount"], dimensions=["sales_model.region_name"])
+    rows = con.execute(sql).fetchall()
+    assert rows == [("West", 125.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_composite_key.model.tml b/tests/fixtures/thoughtspot/model_composite_key.model.tml
new file mode 100644
index 00000000..3e643076
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_composite_key.model.tml
@@ -0,0 +1,29 @@
+guid: "model-composite-key"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: Joined model with a composite-key join predicate
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+      joins:
+        - with: regions
+          on: "[sales::region_id] = [regions::id] AND [sales::country_code] = [regions::country_code]"
+          type: INNER
+          cardinality: MANY_TO_ONE
+    - name: regions
+      fqn: ANALYTICS.PUBLIC.regions
+  columns:
+    - name: order_id
+      column_id: sales::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: region_name
+      column_id: regions::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: sales::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
diff --git a/tests/fixtures/thoughtspot/model_non_id_key.model.tml b/tests/fixtures/thoughtspot/model_non_id_key.model.tml
new file mode 100644
index 00000000..a7f16d9e
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_non_id_key.model.tml
@@ -0,0 +1,29 @@
+guid: "model-non-id-key"
+export_schema_version: "2"
+model:
+  name: orders_model
+  description: Joined model whose base table key is not literally `id`
+  model_tables:
+    - name: orders
+      fqn: ANALYTICS.PUBLIC.orders
+      joins:
+        - with: customers
+          on: "[orders::customer_id] = [customers::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+  columns:
+    - name: order_key
+      column_id: orders::order_key
+      properties:
+        column_type: ATTRIBUTE
+    - name: customer_name
+      column_id: customers::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: orders::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From 9f33276de3df21d85ac41486bf1873541b3b8edf Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 12:24:02 -0700
Subject: [PATCH 10/17] fix(thoughtspot): inline nested formula references in
 model formulas

---
 sidemantic/adapters/thoughtspot.py            | 39 +++++++++++++++
 tests/adapters/thoughtspot/test_parsing.py    | 34 ++++++++++++++
 .../model_nested_formula.model.tml            | 47 +++++++++++++++++++
 3 files changed, 120 insertions(+)
 create mode 100644 tests/fixtures/thoughtspot/model_nested_formula.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index f5b145d6..4b440b37 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -130,6 +130,33 @@ def _replace(match: re.Match[str]) -> str:
     return _TML_REF.sub(_replace, expr)
 
 
+def _inline_formula_refs(
+    expr: str | None,
+    formula_expr_by_name: dict[str, str],
+    _seen: frozenset[str] = frozenset(),
+) -> str | None:
+    """Recursively inline ``[formula_name]`` references in a TML formula.
+
+    A formula that references another formula by name (e.g. ``margin`` defined as
+    ``[net_revenue] / [gross_revenue]`` where ``net_revenue`` is itself a formula)
+    must have the nested formula expanded inline; otherwise the bare reference is
+    left unresolved and points at a column the derived subquery never projects.
+    Self/cyclic references are left untouched to avoid infinite recursion.
+    """
+    if not expr or not formula_expr_by_name:
+        return expr
+
+    def _replace(match: re.Match[str]) -> str:
+        token = match.group(1)
+        # Only inline unqualified references to a known formula name.
+        if "::" not in token and token in formula_expr_by_name and token not in _seen:
+            inner = _inline_formula_refs(formula_expr_by_name[token], formula_expr_by_name, _seen | {token})
+            return f"({inner})"
+        return match.group(0)
+
+    return _TML_REF.sub(_replace, expr)
+
+
 def _parse_ref_token(token: str) -> tuple[str | None, str]:
     if "::" in token:
         table, column = token.split("::", 1)
@@ -778,6 +805,9 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
         formulas = model_def.get("formulas") or []
         formula_by_id = {f.get("id"): f for f in formulas if f.get("id")}
         formula_by_name = {f.get("name"): f for f in formulas if f.get("name")}
+        # Map formula name -> expression so nested formula references can be
+        # inlined before the expression is converted/aliased.
+        formula_expr_by_name = {f.get("name"): f.get("expr") for f in formulas if f.get("name") and f.get("expr")}
 
         dimensions: list[Dimension] = []
         metrics: list[Metric] = []
@@ -803,12 +833,21 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
             format_pattern = properties.get("format_pattern")
 
             sql_expr = None
+            is_formula = False
             if formula_id and formula_id in formula_by_id:
                 sql_expr = formula_by_id[formula_id].get("expr")
+                is_formula = True
             elif formula_id and formula_id in formula_by_name:
                 sql_expr = formula_by_name[formula_id].get("expr")
+                is_formula = True
             elif col_name in formula_by_name:
                 sql_expr = formula_by_name[col_name].get("expr")
+                is_formula = True
+
+            # Inline references to other formulas so nested formula expressions
+            # resolve to physical columns instead of unprojected formula names.
+            if is_formula:
+                sql_expr = _inline_formula_refs(sql_expr, formula_expr_by_name)
 
             if not sql_expr and column_id:
                 if "::" in column_id:
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index 5e98649c..8401bd38 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -1299,6 +1299,40 @@ def test_thoughtspot_composite_join_keys_are_preserved():
     assert rows == [("West", 125.0)]
 
 
+def test_thoughtspot_nested_formula_ref_is_inlined_and_queryable():
+    """A formula referencing another formula inlines the nested expression.
+
+    Regression: a formula like `[net_revenue] / [gross_revenue]` (where
+    `net_revenue` is itself a formula) kept the bare `net_revenue` token, which the
+    derived subquery never projected, so the query failed with
+    `Referenced column "net_revenue" not found`. The nested formula must be
+    expanded inline so the expression resolves to physical columns.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_nested_formula.model.tml")
+    model = graph.models["sales_model"]
+
+    # The nested `net_revenue` formula is inlined into `margin`.
+    assert model.get_metric("net_revenue").sql == "sales__gross_revenue - sales__discount"
+    assert model.get_metric("margin").sql == "(sales__gross_revenue - sales__discount) / sales__gross_revenue"
+
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE sales (id INT, gross_revenue DOUBLE, discount DOUBLE, customer_id INT)")
+    con.execute("INSERT INTO sales VALUES (1, 100.0, 10.0, 5)")
+    con.execute("CREATE TABLE customers (id INT, name VARCHAR)")
+    con.execute("INSERT INTO customers VALUES (5, 'Acme')")
+
+    sql = layer.compile(metrics=["sales_model.margin"], dimensions=["sales_model.customer_name"])
+    rows = con.execute(sql).fetchall()
+    # margin = (100 - 10) / 100 = 0.9
+    assert rows == [("Acme", 0.9)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_nested_formula.model.tml b/tests/fixtures/thoughtspot/model_nested_formula.model.tml
new file mode 100644
index 00000000..2b7a9dd3
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_nested_formula.model.tml
@@ -0,0 +1,47 @@
+guid: "model-nested-formula"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: Joined model whose formula references another formula
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+      joins:
+        - with: customers
+          on: "[sales::customer_id] = [customers::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+  formulas:
+    - name: net_revenue
+      expr: "[gross_revenue] - [discount]"
+      id: net_rev
+    - name: margin
+      expr: "[net_revenue] / [gross_revenue]"
+      id: margin_f
+  columns:
+    - name: customer_name
+      column_id: customers::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: gross_revenue
+      column_id: sales::gross_revenue
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+    - name: discount
+      column_id: sales::discount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+    - name: net_revenue
+      formula_id: net_rev
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+    - name: margin
+      formula_id: margin_f
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From 5a590f767b8ea74033228a1bcacc220a3a194b26 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 12:38:50 -0700
Subject: [PATCH 11/17] fix(thoughtspot): prefer TML field names over colliding
 columns and resolve renamed id keys

---
 sidemantic/adapters/thoughtspot.py            | 51 ++++++++------
 tests/adapters/thoughtspot/test_parsing.py    | 68 +++++++++++++++++++
 .../model_name_collision.model.tml            | 39 +++++++++++
 .../model_renamed_id_key.model.tml            | 29 ++++++++
 4 files changed, 167 insertions(+), 20 deletions(-)
 create mode 100644 tests/fixtures/thoughtspot/model_name_collision.model.tml
 create mode 100644 tests/fixtures/thoughtspot/model_renamed_id_key.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index 4b440b37..3569d139 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -340,29 +340,37 @@ def _collect(expr: str | None) -> None:
     # like `gross_revenue`. Map each such column name to its projected alias when
     # exactly one joined table projects that column, so the rewritten expression
     # uses the in-scope output alias instead of an out-of-scope bare column.
-    bare_to_alias: dict[str, str] = {}
-    ambiguous: set[str] = set()
+    def _build_map(pairs: list[tuple[str, str]]) -> dict[str, str]:
+        """Map name -> alias, dropping names that map to conflicting aliases."""
+        mapping: dict[str, str] = {}
+        ambiguous: set[str] = set()
+        for name, alias in pairs:
+            if name in mapping and mapping[name] != alias:
+                ambiguous.add(name)
+            else:
+                mapping[name] = alias
+        for name in ambiguous:
+            mapping.pop(name, None)
+        return mapping
 
-    def _record_bare(name: str, alias: str) -> None:
-        if name in bare_to_alias and bare_to_alias[name] != alias:
-            ambiguous.add(name)
-        else:
-            bare_to_alias[name] = alias
+    # Physical DB column names map to their projected aliases.
+    physical_map = _build_map([(column, alias) for (_table, column), alias in projection.items()])
 
-    for (_table, column), alias in projection.items():
-        _record_bare(column, alias)
     # Formulas can also reference another TML column by its model name even when
     # that name differs from the backing DB column (e.g. a column `gross_revenue`
     # mapped to `column_id: sales::gross_amt`, with formula `[gross_revenue] -
-    # [discount]`). The projection aliases the DB column (`sales__gross_amt`), so
-    # also map the model column name to that alias; otherwise the bare model name
-    # stays out of scope and the query fails.
-    for field in (*dimensions, *metrics):
-        ref = _simple_table_column(field.sql, tables)
-        if ref:
-            _record_bare(field.name, projection[ref])
-    for name in ambiguous:
-        bare_to_alias.pop(name, None)
+    # [discount]`). Build a separate field-name map and let it take precedence:
+    # a formula's `[amount]` refers to the TML field `amount`, which may resolve
+    # to a different physical column than one literally named `amount`.
+    field_map = _build_map(
+        [
+            (field.name, projection[ref])
+            for field in (*dimensions, *metrics)
+            if (ref := _simple_table_column(field.sql, tables))
+        ]
+    )
+
+    bare_to_alias = {**physical_map, **field_map}
 
     def _rewrite(expr: str | None) -> str | None:
         if not expr:
@@ -981,14 +989,17 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
     def _infer_model_primary_key(self, dimensions: list[Dimension], base_table: str | None) -> str:
         """Infer a queryable primary key column for a TML Model.
 
-        Prefer a dimension literally named ``id``. Otherwise, if the base table is
+        Prefer a dimension named ``id`` but resolve it to its backing physical
+        column (a column named ``id`` may map to a differently named DB column,
+        e.g. ``column_id: orders::order_key``). Otherwise, if the base table is
         known, keep ``id`` when a base-table column actually resolves to ``id``;
         failing that, use the first base-table column so the key references a real
         column. Fall back to ``id`` only when no better candidate exists.
         """
         for dim in dimensions:
             if dim.name.lower() == "id":
-                return dim.name
+                _table, column = _split_sql_identifier(dim.sql)
+                return column or dim.name
 
         if base_table:
             base_columns: list[str] = []
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index 8401bd38..431fd79c 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -1333,6 +1333,74 @@ def test_thoughtspot_nested_formula_ref_is_inlined_and_queryable():
     assert rows == [("Acme", 0.9)]
 
 
+def test_thoughtspot_formula_ref_prefers_tml_field_over_physical_name():
+    """A bare formula ref resolves to the TML field, not a colliding physical name.
+
+    Regression: physical column names were recorded first, so adding TML field
+    names could mark a valid reference ambiguous and drop it. With
+    `gross_revenue -> sales::amount` and `amount -> sales::cost`, the formula
+    `[amount] + [gross_revenue]` must resolve the bare `amount` to `sales__cost`
+    (the TML field), not be dropped because `amount` also names a physical column.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_name_collision.model.tml")
+    model = graph.models["sales_model"]
+
+    # The bare `amount` ref resolves to the TML field's backing column (cost),
+    # not the physical `amount` column.
+    assert model.get_metric("total").sql == "sales__cost + sales__amount"
+
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE sales (id INT, amount DOUBLE, cost DOUBLE, customer_id INT)")
+    con.execute("INSERT INTO sales VALUES (1, 100.0, 30.0, 5)")
+    con.execute("CREATE TABLE customers (id INT, name VARCHAR)")
+    con.execute("INSERT INTO customers VALUES (5, 'Acme')")
+
+    sql = layer.compile(metrics=["sales_model.total"], dimensions=["sales_model.customer_name"])
+    rows = con.execute(sql).fetchall()
+    # total = cost (30) + amount (100) = 130
+    assert rows == [("Acme", 130.0)]
+
+
+def test_thoughtspot_renamed_id_key_uses_backing_column():
+    """A semantic column named `id` backed by another column resolves to it.
+
+    Regression: `_infer_model_primary_key` returned the semantic name `id` even
+    when the column mapped to a different physical column (`column_id:
+    orders::order_key`). The derived projection then injected `orders.id AS id`,
+    failing because the base table only has `order_key`.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_renamed_id_key.model.tml")
+    model = graph.models["orders_model"]
+
+    # The primary key resolves to the backing physical column, not the name `id`.
+    assert model.primary_key == "order_key"
+    assert model.sql is not None
+    assert "orders.order_key AS order_key" in model.sql
+    assert "orders.id AS id" not in model.sql
+
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE orders (order_key INT, amount DOUBLE, customer_id INT)")
+    con.execute("INSERT INTO orders VALUES (1, 100.0, 5), (2, 25.0, 5)")
+    con.execute("CREATE TABLE customers (id INT, name VARCHAR)")
+    con.execute("INSERT INTO customers VALUES (5, 'Acme')")
+
+    sql = layer.compile(metrics=["orders_model.amount"], dimensions=["orders_model.customer_name"])
+    rows = con.execute(sql).fetchall()
+    assert rows == [("Acme", 125.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_name_collision.model.tml b/tests/fixtures/thoughtspot/model_name_collision.model.tml
new file mode 100644
index 00000000..7fcaa477
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_name_collision.model.tml
@@ -0,0 +1,39 @@
+guid: "model-name-collision"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: A TML field name collides with a different physical column name
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+      joins:
+        - with: customers
+          on: "[sales::customer_id] = [customers::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+  formulas:
+    - name: total
+      expr: "[amount] + [gross_revenue]"
+      id: total_f
+  columns:
+    - name: customer_name
+      column_id: customers::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: gross_revenue
+      column_id: sales::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+    - name: amount
+      column_id: sales::cost
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
+    - name: total
+      formula_id: total_f
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
diff --git a/tests/fixtures/thoughtspot/model_renamed_id_key.model.tml b/tests/fixtures/thoughtspot/model_renamed_id_key.model.tml
new file mode 100644
index 00000000..35d7581c
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_renamed_id_key.model.tml
@@ -0,0 +1,29 @@
+guid: "model-renamed-id-key"
+export_schema_version: "2"
+model:
+  name: orders_model
+  description: A semantic column named id is backed by a different physical column
+  model_tables:
+    - name: orders
+      fqn: ANALYTICS.PUBLIC.orders
+      joins:
+        - with: customers
+          on: "[orders::customer_id] = [customers::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+  columns:
+    - name: id
+      column_id: orders::order_key
+      properties:
+        column_type: ATTRIBUTE
+    - name: customer_name
+      column_id: customers::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: orders::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From e4aaadecf4fae650293953e41e8b89961703ef44 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 12:52:40 -0700
Subject: [PATCH 12/17] fix(thoughtspot): skip joined-table id columns when
 inferring primary key

---
 sidemantic/adapters/thoughtspot.py            | 17 ++++++----
 tests/adapters/thoughtspot/test_parsing.py    | 34 +++++++++++++++++++
 .../thoughtspot/model_joined_id_key.model.tml | 29 ++++++++++++++++
 3 files changed, 74 insertions(+), 6 deletions(-)
 create mode 100644 tests/fixtures/thoughtspot/model_joined_id_key.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index 3569d139..eb8c3f61 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -991,15 +991,20 @@ def _infer_model_primary_key(self, dimensions: list[Dimension], base_table: str
 
         Prefer a dimension named ``id`` but resolve it to its backing physical
         column (a column named ``id`` may map to a differently named DB column,
-        e.g. ``column_id: orders::order_key``). Otherwise, if the base table is
-        known, keep ``id`` when a base-table column actually resolves to ``id``;
-        failing that, use the first base-table column so the key references a real
-        column. Fall back to ``id`` only when no better candidate exists.
+        e.g. ``column_id: orders::order_key``). Only accept it when its SQL is
+        unqualified or belongs to the base table; a joined-table ``id`` (e.g.
+        ``customers::id``) is not the base model's key. Otherwise, if the base
+        table is known, keep ``id`` when a base-table column actually resolves to
+        ``id``; failing that, use the first base-table column so the key
+        references a real column. Fall back to ``id`` only when no better
+        candidate exists.
         """
         for dim in dimensions:
             if dim.name.lower() == "id":
-                _table, column = _split_sql_identifier(dim.sql)
-                return column or dim.name
+                table, column = _split_sql_identifier(dim.sql)
+                if table is None or base_table is None or table == base_table:
+                    return column or dim.name
+                break
 
         if base_table:
             base_columns: list[str] = []
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index 431fd79c..ef2e69cc 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -1401,6 +1401,40 @@ def test_thoughtspot_renamed_id_key_uses_backing_column():
     assert rows == [("Acme", 125.0)]
 
 
+def test_thoughtspot_joined_id_dimension_is_not_used_as_primary_key():
+    """A joined-table column named `id` is not treated as the base model's key.
+
+    Regression: `_infer_model_primary_key` returned the backing column of the
+    first dimension named `id` even when it came from a joined table
+    (`customers::id`). The derived projection then emitted `orders.id AS id`,
+    failing because the base `orders` table has no `id` (its key is `order_key`).
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_joined_id_key.model.tml")
+    model = graph.models["orders_model"]
+
+    # The joined-table id is skipped; the base-table column is used as the key.
+    assert model.primary_key == "order_key"
+    assert model.sql is not None
+    assert "orders.order_key AS order_key" in model.sql
+    assert "orders.id AS id" not in model.sql
+
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE orders (order_key INT, amount DOUBLE, customer_id INT)")
+    con.execute("INSERT INTO orders VALUES (1, 100.0, 5), (2, 25.0, 5)")
+    con.execute("CREATE TABLE customers (id INT)")
+    con.execute("INSERT INTO customers VALUES (5)")
+
+    sql = layer.compile(metrics=["orders_model.amount"], dimensions=["orders_model.order_key"])
+    rows = sorted(con.execute(sql).fetchall())
+    assert rows == [(1, 100.0), (2, 25.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_joined_id_key.model.tml b/tests/fixtures/thoughtspot/model_joined_id_key.model.tml
new file mode 100644
index 00000000..22573a04
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_joined_id_key.model.tml
@@ -0,0 +1,29 @@
+guid: "model-joined-id-key"
+export_schema_version: "2"
+model:
+  name: orders_model
+  description: A joined-table id dimension precedes the base table key
+  model_tables:
+    - name: orders
+      fqn: ANALYTICS.PUBLIC.orders
+      joins:
+        - with: customers
+          on: "[orders::customer_id] = [customers::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+  columns:
+    - name: id
+      column_id: customers::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: order_key
+      column_id: orders::order_key
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: orders::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From 608e374788db7010d684d2f9662c98c66eabccf0 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 13:07:13 -0700
Subject: [PATCH 13/17] fix(thoughtspot): only treat equality conjuncts as
 relationship join keys

---
 sidemantic/adapters/thoughtspot.py            | 31 +++++++++----
 tests/adapters/thoughtspot/test_parsing.py    | 43 +++++++++++++++++++
 .../thoughtspot/model_non_equi_join.model.tml | 29 +++++++++++++
 3 files changed, 94 insertions(+), 9 deletions(-)
 create mode 100644 tests/fixtures/thoughtspot/model_non_equi_join.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index eb8c3f61..00d452dd 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -59,6 +59,10 @@
 _BARE_IDENTIFIER = re.compile(r"(?<![\w.])([A-Za-z_][A-Za-z0-9_]*)(?![\w.])(?!\s*\()")
 # A quoted string literal (single or double quoted, with doubled-quote escapes).
 _STRING_LITERAL = re.compile(r"'(?:[^']|'')*'|\"(?:[^\"]|\"\")*\"")
+# Split a join predicate into conjuncts on a word-boundary `AND` (case-insensitive).
+_AND_SPLIT = re.compile(r"\bAND\b", re.IGNORECASE)
+# A plain `=` equality operator (not part of `<=`, `>=`, `!=`, `<>`, or `==`).
+_EQUALITY_OP = re.compile(r"(?<![<>=!])=(?![=])")
 
 
 def _sub_outside_strings(pattern: re.Pattern[str], repl: Any, text: str) -> str:
@@ -197,20 +201,29 @@ def _extract_all_join_refs(
     """Extract every ``left = right`` key pair from a (possibly composite) join.
 
     A composite ON clause like ``[a::x] = [b::y] AND [a::p] = [b::q]`` yields all
-    consecutive ref pairs, so composite-key relationships keep both columns
-    instead of silently dropping all but the first pair.
+    equality pairs, so composite-key relationships keep both columns instead of
+    silently dropping all but the first pair. Only pure equality conjuncts are
+    treated as key pairs; range/non-equi predicates (e.g.
+    ``[a::date] BETWEEN [b::start] AND [b::end]``) are skipped so they are not
+    mistaken for additional equality keys.
     """
     if not expr:
         return []
 
-    tokens = _TML_REF.findall(expr)
-    if len(tokens) < 2:
-        tokens = [f"{t[0]}.{t[1]}" for t in _TML_DOT_REF.findall(expr)]
-
     pairs: list[tuple[tuple[str | None, str], tuple[str | None, str]]] = []
-    for i in range(0, len(tokens) - 1, 2):
-        left = _parse_ref_token(tokens[i])
-        right = _parse_ref_token(tokens[i + 1])
+    for conjunct in _AND_SPLIT.split(expr):
+        tokens = _TML_REF.findall(conjunct)
+        if len(tokens) < 2:
+            tokens = [f"{t[0]}.{t[1]}" for t in _TML_DOT_REF.findall(conjunct)]
+
+        # Only an equality between exactly two refs is a join key pair. A `=` that
+        # is part of `<=`/`>=`/`!=` is not a plain equality.
+        equalities = _EQUALITY_OP.findall(conjunct)
+        if len(tokens) != 2 or len(equalities) != 1:
+            continue
+
+        left = _parse_ref_token(tokens[0])
+        right = _parse_ref_token(tokens[1])
         if table_path_lookup:
             if left[0] in table_path_lookup:
                 left = (table_path_lookup[left[0]], left[1])
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index ef2e69cc..7f813e01 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -1435,6 +1435,49 @@ def test_thoughtspot_joined_id_dimension_is_not_used_as_primary_key():
     assert rows == [(1, 100.0), (2, 25.0)]
 
 
+def test_thoughtspot_non_equi_join_predicate_is_not_a_relationship_key():
+    """A range predicate in a join ON clause is not mistaken for an equality key.
+
+    Regression: the composite-key extractor paired every two consecutive refs, so
+    `[sales::region_id] = [regions::id] AND [sales::date] BETWEEN
+    [regions::start_date] AND [regions::end_date]` produced bogus extra keys
+    (`date -> start_date`). Only the real equality conjunct should become the
+    relationship key; the range predicate stays in the join SQL but is ignored as
+    a key.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_non_equi_join.model.tml")
+    model = graph.models["sales_model"]
+
+    rel = {r.name: r for r in model.relationships}["regions"]
+    # Only the equality key pair is captured; the BETWEEN refs are not keys.
+    assert rel.foreign_key == "region_id"
+    assert rel.primary_key == "id"
+
+    # The range predicate is preserved in the join SQL, but its columns are not
+    # projected as spurious key columns.
+    assert model.sql is not None
+    assert "BETWEEN regions.start_date AND regions.end_date" in model.sql
+    assert "AS start_date" not in model.sql
+    assert "AS date" not in model.sql
+
+    # The model still compiles and runs.
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE sales (id INT, amount DOUBLE, region_id INT, date DATE)")
+    con.execute("INSERT INTO sales VALUES (1, 100.0, 7, DATE '2024-06-15'), (2, 25.0, 7, DATE '2024-06-20')")
+    con.execute("CREATE TABLE regions (id INT, name VARCHAR, start_date DATE, end_date DATE)")
+    con.execute("INSERT INTO regions VALUES (7, 'West', DATE '2024-06-01', DATE '2024-06-30')")
+
+    sql = layer.compile(metrics=["sales_model.amount"], dimensions=["sales_model.region_name"])
+    rows = con.execute(sql).fetchall()
+    assert rows == [("West", 125.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_non_equi_join.model.tml b/tests/fixtures/thoughtspot/model_non_equi_join.model.tml
new file mode 100644
index 00000000..1b60ad5a
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_non_equi_join.model.tml
@@ -0,0 +1,29 @@
+guid: "model-non-equi-join"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: Join with an equality key plus a BETWEEN range predicate
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+      joins:
+        - with: regions
+          on: "[sales::region_id] = [regions::id] AND [sales::date] BETWEEN [regions::start_date] AND [regions::end_date]"
+          type: INNER
+          cardinality: MANY_TO_ONE
+    - name: regions
+      fqn: ANALYTICS.PUBLIC.regions
+  columns:
+    - name: order_id
+      column_id: sales::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: region_name
+      column_id: regions::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: sales::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From f4e34cd41b1f9bf30de4915cebf87a148bbb0553 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 13:21:32 -0700
Subject: [PATCH 14/17] fix(thoughtspot): resolve aliased table id and name
 refs to the alias

---
 sidemantic/adapters/thoughtspot.py            | 13 +++++++
 tests/adapters/thoughtspot/test_parsing.py    | 37 +++++++++++++++++++
 .../model_aliased_table_id_ref.model.tml      | 31 ++++++++++++++++
 3 files changed, 81 insertions(+)
 create mode 100644 tests/fixtures/thoughtspot/model_aliased_table_id_ref.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index 00d452dd..edba394b 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -787,6 +787,19 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
         path_lookup: dict[str, str] = dict(table_name_lookup)
         for alias in alias_to_table:
             path_lookup[alias] = alias
+        # An aliased entry may still be referenced by its `id`/`name` in a
+        # `column_id` or `on` expression (e.g. `id: countries_tbl, alias:
+        # ship_country` with `column_id: countries_tbl::name`). The SQL relation in
+        # scope is the alias (`JOIN countries AS ship_country`), so map the entry's
+        # id/name tokens to the alias too; otherwise the qualifier resolves to the
+        # backing table name, which is not in scope.
+        for table in model_tables:
+            alias = table.get("alias")
+            if not alias:
+                continue
+            for token in (table.get("id"), table.get("name")):
+                if token and token != alias:
+                    path_lookup[token] = alias
 
         # Flatten nested joins (one per model_tables entry) into the same shape
         # the worksheet join helpers consume. When a table carries an `alias`,
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index 7f813e01..1fb96542 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -1478,6 +1478,43 @@ def test_thoughtspot_non_equi_join_predicate_is_not_a_relationship_key():
     assert rows == [("West", 125.0)]
 
 
+def test_thoughtspot_aliased_table_referenced_by_id_is_queryable():
+    """An aliased table referenced by its id resolves to the alias, not the table.
+
+    Regression: a `model_tables` entry `id: countries_tbl, alias: ship_country`
+    referenced by `column_id: countries_tbl::name` resolved to `countries.name`,
+    but the SQL emits `JOIN countries AS ship_country`, so `countries` is not in
+    scope. The id/name tokens of an aliased entry must resolve to the alias.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_aliased_table_id_ref.model.tml")
+    model = graph.models["shipments_model"]
+
+    # The id-qualified field resolves to the alias relation, which is in scope.
+    assert model.get_dimension("ship_country_name").sql == "ship_country__name"
+    assert model.sql is not None
+    assert "JOIN countries AS ship_country" in model.sql
+    assert "ship_country.name AS ship_country__name" in model.sql
+
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE orders (id INT, amount DOUBLE, ship_country_id INT)")
+    con.execute("INSERT INTO orders VALUES (1, 100.0, 7), (2, 25.0, 7)")
+    con.execute("CREATE TABLE countries (id INT, name VARCHAR)")
+    con.execute("INSERT INTO countries VALUES (7, 'US')")
+
+    sql = layer.compile(
+        metrics=["shipments_model.amount"],
+        dimensions=["shipments_model.ship_country_name"],
+    )
+    rows = con.execute(sql).fetchall()
+    assert rows == [("US", 125.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_aliased_table_id_ref.model.tml b/tests/fixtures/thoughtspot/model_aliased_table_id_ref.model.tml
new file mode 100644
index 00000000..014ade24
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_aliased_table_id_ref.model.tml
@@ -0,0 +1,31 @@
+guid: "model-aliased-table-id-ref"
+export_schema_version: "2"
+model:
+  name: shipments_model
+  description: An aliased table is referenced by its id in a column_id
+  model_tables:
+    - name: orders
+      fqn: ANALYTICS.PUBLIC.orders
+      joins:
+        - with: ship_country
+          on: "[orders::ship_country_id] = [ship_country::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: countries
+      id: countries_tbl
+      alias: ship_country
+      fqn: ANALYTICS.PUBLIC.countries
+  columns:
+    - name: order_id
+      column_id: orders::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: ship_country_name
+      column_id: countries_tbl::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: orders::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From 280e05bf8c07a60ac9f03328c3f38400d8264272 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 13:36:24 -0700
Subject: [PATCH 15/17] fix(thoughtspot): resolve aliased join targets and
 infer measure-backed keys

---
 sidemantic/adapters/thoughtspot.py            | 42 ++++++-----
 tests/adapters/thoughtspot/test_parsing.py    | 70 +++++++++++++++++++
 .../model_join_target_by_id.model.tml         | 31 ++++++++
 .../thoughtspot/model_measure_key.model.tml   | 30 ++++++++
 4 files changed, 157 insertions(+), 16 deletions(-)
 create mode 100644 tests/fixtures/thoughtspot/model_join_target_by_id.model.tml
 create mode 100644 tests/fixtures/thoughtspot/model_measure_key.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index edba394b..ad64fe4a 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -813,12 +813,15 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
                 destination = join_def.get("with")
                 if not source or not destination:
                     continue
-                # Keep an aliased destination as-is (the role name); only resolve
-                # non-aliased ids to their table name.
+                # Resolve the join target to the in-scope relation name. For an
+                # aliased entry that is the role alias (so a `with:` token that is
+                # the alias, or the table id/name of an aliased entry, becomes the
+                # alias); `path_lookup` already encodes both, falling back to the
+                # backing table name for non-aliased entries.
                 if destination in alias_to_table:
                     resolved_dest = destination
                 else:
-                    resolved_dest = table_name_lookup.get(destination, destination)
+                    resolved_dest = path_lookup.get(destination, table_name_lookup.get(destination, destination))
                 # PyYAML (YAML 1.1) parses the bare `on:` key as the boolean True.
                 on_value = join_def.get("on")
                 if on_value is None and True in join_def:
@@ -939,7 +942,7 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
         # table. Prefer a dimension named `id`; otherwise infer the key from a
         # base-table column so a model whose key is not literally `id` (e.g.
         # `order_key`) does not project a non-existent `id` column.
-        primary_key = self._infer_model_primary_key(dimensions, base_table)
+        primary_key = self._infer_model_primary_key(dimensions, metrics, base_table)
 
         # A joined model is exported as derived SQL (FROM (<sql>) AS t); rewrite
         # its `SELECT *` into explicit aliased columns and update the dimension/
@@ -1012,30 +1015,37 @@ def _parse_model(self, model_def: dict[str, Any] | None, full_def: dict[str, Any
 
         return model
 
-    def _infer_model_primary_key(self, dimensions: list[Dimension], base_table: str | None) -> str:
+    def _infer_model_primary_key(
+        self,
+        dimensions: list[Dimension],
+        metrics: list[Metric],
+        base_table: str | None,
+    ) -> str:
         """Infer a queryable primary key column for a TML Model.
 
-        Prefer a dimension named ``id`` but resolve it to its backing physical
-        column (a column named ``id`` may map to a differently named DB column,
-        e.g. ``column_id: orders::order_key``). Only accept it when its SQL is
+        Prefer a field named ``id`` but resolve it to its backing physical column
+        (a column named ``id`` may map to a differently named DB column, e.g.
+        ``column_id: orders::order_key``). Only accept it when its SQL is
         unqualified or belongs to the base table; a joined-table ``id`` (e.g.
         ``customers::id``) is not the base model's key. Otherwise, if the base
         table is known, keep ``id`` when a base-table column actually resolves to
         ``id``; failing that, use the first base-table column so the key
-        references a real column. Fall back to ``id`` only when no better
-        candidate exists.
+        references a real column. ThoughtSpot often exports numeric key columns as
+        measures, so dimensions are scanned first but metrics are considered too.
+        Fall back to ``id`` only when no better candidate exists.
         """
-        for dim in dimensions:
-            if dim.name.lower() == "id":
-                table, column = _split_sql_identifier(dim.sql)
+        fields: list[Dimension | Metric] = [*dimensions, *metrics]
+        for field in fields:
+            if field.name.lower() == "id":
+                table, column = _split_sql_identifier(field.sql)
                 if table is None or base_table is None or table == base_table:
-                    return column or dim.name
+                    return column or field.name
                 break
 
         if base_table:
             base_columns: list[str] = []
-            for dim in dimensions:
-                table, column = _split_sql_identifier(dim.sql)
+            for field in fields:
+                table, column = _split_sql_identifier(field.sql)
                 if column and (table is None or table == base_table):
                     base_columns.append(column)
             if "id" in base_columns:
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index 1fb96542..f12f1d90 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -1515,6 +1515,76 @@ def test_thoughtspot_aliased_table_referenced_by_id_is_queryable():
     assert rows == [("US", 125.0)]
 
 
+def test_thoughtspot_join_target_by_id_resolves_to_alias():
+    """A join whose `with:` target is an aliased table id resolves to the alias.
+
+    Regression: after id/name refs mapped to the alias, the join-flatten step
+    still resolved `with: countries_tbl` to the backing table name, so the SQL
+    emitted `JOIN countries ON ... ship_country.id` (alias not in scope) and the
+    relationship was named `countries`. The join target must resolve to the alias.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_join_target_by_id.model.tml")
+    model = graph.models["shipments_model"]
+
+    # The join uses the alias as the relation name, matching the ON clause.
+    assert model.sql is not None
+    assert "JOIN countries AS ship_country" in model.sql
+    assert {r.name for r in model.relationships} == {"ship_country"}
+
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE orders (id INT, amount DOUBLE, ship_country_id INT)")
+    con.execute("INSERT INTO orders VALUES (1, 100.0, 7), (2, 25.0, 7)")
+    con.execute("CREATE TABLE countries (id INT, name VARCHAR)")
+    con.execute("INSERT INTO countries VALUES (7, 'US')")
+
+    sql = layer.compile(
+        metrics=["shipments_model.amount"],
+        dimensions=["shipments_model.ship_country_name"],
+    )
+    rows = con.execute(sql).fetchall()
+    assert rows == [("US", 125.0)]
+
+
+def test_thoughtspot_measure_key_column_is_inferred_as_primary_key():
+    """A base-table key exported as a measure is still inferred as the primary key.
+
+    Regression: `_infer_model_primary_key` only scanned dimensions, but
+    ThoughtSpot often exports key columns as measures. With `order_key` as a
+    MEASURE and no physical `orders.id`, the primary key defaulted to `id` and the
+    derived projection injected a non-existent `orders.id AS id`, breaking even a
+    plain aggregate query.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_measure_key.model.tml")
+    model = graph.models["orders_model"]
+
+    # The measure-backed base-table column is used as the key.
+    assert model.primary_key == "order_key"
+    assert model.sql is not None
+    assert "orders.id AS id" not in model.sql
+
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE orders (order_key INT, amount DOUBLE, customer_id INT)")
+    con.execute("INSERT INTO orders VALUES (1, 100.0, 5), (2, 25.0, 5)")
+    con.execute("CREATE TABLE customers (id INT, name VARCHAR)")
+    con.execute("INSERT INTO customers VALUES (5, 'Acme')")
+
+    sql = layer.compile(metrics=["orders_model.amount"], dimensions=["orders_model.customer_name"])
+    rows = con.execute(sql).fetchall()
+    assert rows == [("Acme", 125.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_join_target_by_id.model.tml b/tests/fixtures/thoughtspot/model_join_target_by_id.model.tml
new file mode 100644
index 00000000..30a8c571
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_join_target_by_id.model.tml
@@ -0,0 +1,31 @@
+guid: "model-join-target-by-id"
+export_schema_version: "2"
+model:
+  name: shipments_model
+  description: A join identifies an aliased target table by its id
+  model_tables:
+    - name: orders
+      fqn: ANALYTICS.PUBLIC.orders
+      joins:
+        - with: countries_tbl
+          on: "[orders::ship_country_id] = [ship_country::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: countries
+      id: countries_tbl
+      alias: ship_country
+      fqn: ANALYTICS.PUBLIC.countries
+  columns:
+    - name: order_id
+      column_id: orders::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: ship_country_name
+      column_id: ship_country::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: orders::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
diff --git a/tests/fixtures/thoughtspot/model_measure_key.model.tml b/tests/fixtures/thoughtspot/model_measure_key.model.tml
new file mode 100644
index 00000000..5f0c1858
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_measure_key.model.tml
@@ -0,0 +1,30 @@
+guid: "model-measure-key"
+export_schema_version: "2"
+model:
+  name: orders_model
+  description: The base table key is exported as a MEASURE and there is no id
+  model_tables:
+    - name: orders
+      fqn: ANALYTICS.PUBLIC.orders
+      joins:
+        - with: customers
+          on: "[orders::customer_id] = [customers::id]"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+  columns:
+    - name: order_key
+      column_id: orders::order_key
+      properties:
+        column_type: MEASURE
+        aggregation: COUNT
+    - name: customer_name
+      column_id: customers::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: orders::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From 8a23d0d55f9bbf204c0844ead814b17d7b797279 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 13:50:58 -0700
Subject: [PATCH 16/17] fix(thoughtspot): skip key-less and function-wrapped
 joins as relationship keys

---
 sidemantic/adapters/thoughtspot.py            | 42 +++++++----
 tests/adapters/thoughtspot/test_parsing.py    | 69 +++++++++++++++++++
 .../model_range_only_join.model.tml           | 29 ++++++++
 .../model_wrapped_join_key.model.tml          | 29 ++++++++
 4 files changed, 156 insertions(+), 13 deletions(-)
 create mode 100644 tests/fixtures/thoughtspot/model_range_only_join.model.tml
 create mode 100644 tests/fixtures/thoughtspot/model_wrapped_join_key.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index ad64fe4a..18ef0d6a 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -202,28 +202,37 @@ def _extract_all_join_refs(
 
     A composite ON clause like ``[a::x] = [b::y] AND [a::p] = [b::q]`` yields all
     equality pairs, so composite-key relationships keep both columns instead of
-    silently dropping all but the first pair. Only pure equality conjuncts are
-    treated as key pairs; range/non-equi predicates (e.g.
-    ``[a::date] BETWEEN [b::start] AND [b::end]``) are skipped so they are not
-    mistaken for additional equality keys.
+    silently dropping all but the first pair. Only a plain equality between two
+    bare column references is a key pair; range/non-equi predicates (e.g.
+    ``[a::date] BETWEEN [b::start] AND [b::end]``) and wrapped expressions (e.g.
+    ``LOWER([a::email]) = LOWER([b::email])``) are skipped so they are not
+    mistaken for equality keys.
     """
     if not expr:
         return []
 
+    def _bare_ref(side: str) -> tuple[str | None, str] | None:
+        """Parse ``side`` only if it is exactly one bare ref, not a wrapped expr."""
+        side = side.strip()
+        bracketed = _TML_REF.fullmatch(side)
+        if bracketed:
+            return _parse_ref_token(bracketed.group(1))
+        if _TML_DOT_REF.fullmatch(side) or _SIMPLE_IDENTIFIER.match(side):
+            return _parse_ref_token(side)
+        return None
+
     pairs: list[tuple[tuple[str | None, str], tuple[str | None, str]]] = []
     for conjunct in _AND_SPLIT.split(expr):
-        tokens = _TML_REF.findall(conjunct)
-        if len(tokens) < 2:
-            tokens = [f"{t[0]}.{t[1]}" for t in _TML_DOT_REF.findall(conjunct)]
-
-        # Only an equality between exactly two refs is a join key pair. A `=` that
-        # is part of `<=`/`>=`/`!=` is not a plain equality.
+        # A plain equality has exactly one `=` (not part of `<=`/`>=`/`!=`).
         equalities = _EQUALITY_OP.findall(conjunct)
-        if len(tokens) != 2 or len(equalities) != 1:
+        if len(equalities) != 1:
+            continue
+        left_side, right_side = _EQUALITY_OP.split(conjunct, maxsplit=1)
+        left = _bare_ref(left_side)
+        right = _bare_ref(right_side)
+        if not left or not right:
             continue
 
-        left = _parse_ref_token(tokens[0])
-        right = _parse_ref_token(tokens[1])
         if table_path_lookup:
             if left[0] in table_path_lookup:
                 left = (table_path_lookup[left[0]], left[1])
@@ -1082,6 +1091,13 @@ def _parse_model_relationships(
             # so cross-model joins do not silently drop part of the key.
             ref_pairs = _extract_all_join_refs(on_value, lookup)
 
+            # A join with no equality keys (e.g. a pure range predicate) has no
+            # usable cross-model relationship key. Emitting a relationship with
+            # unset keys makes Sidemantic default to `<name>_id`/`id`, which do not
+            # exist; skip it. The predicate stays in the derived join SQL.
+            if not ref_pairs:
+                continue
+
             if table_name_lookup:
                 source = table_name_lookup.get(source, source)
                 destination = table_name_lookup.get(destination, destination)
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index f12f1d90..0d57c41e 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -1585,6 +1585,75 @@ def test_thoughtspot_measure_key_column_is_inferred_as_primary_key():
     assert rows == [("Acme", 125.0)]
 
 
+def test_thoughtspot_range_only_join_emits_no_relationship():
+    """A join with only a range predicate emits no cross-model relationship.
+
+    Regression: a join with no equality key (e.g. `[sales::date] BETWEEN
+    [calendar::start_date] AND [calendar::end_date]`) still appended a relationship
+    with unset keys, which Sidemantic defaults to `calendar_id`/`id` (non-existent
+    columns). No relationship should be emitted; the predicate stays in the join.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_range_only_join.model.tml")
+    model = graph.models["sales_model"]
+
+    # No relationship is created from a key-less range join.
+    assert model.relationships == []
+    # The range predicate is preserved in the derived join SQL.
+    assert model.sql is not None
+    assert "BETWEEN calendar.start_date AND calendar.end_date" in model.sql
+
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE sales (id INT, amount DOUBLE, date DATE)")
+    con.execute("INSERT INTO sales VALUES (1, 100.0, DATE '2024-06-15'), (2, 25.0, DATE '2024-06-20')")
+    con.execute("CREATE TABLE calendar (period_name VARCHAR, start_date DATE, end_date DATE)")
+    con.execute("INSERT INTO calendar VALUES ('June', DATE '2024-06-01', DATE '2024-06-30')")
+
+    sql = layer.compile(metrics=["sales_model.amount"], dimensions=["sales_model.period_name"])
+    rows = con.execute(sql).fetchall()
+    assert rows == [("June", 125.0)]
+
+
+def test_thoughtspot_wrapped_equality_is_not_a_relationship_key():
+    """A function-wrapped equality (e.g. LOWER()) is not stored as a join key.
+
+    Regression: `LOWER([users::email]) = LOWER([contacts::email])` matched the
+    "two refs, one `=`" heuristic and stored a plain `email -> email` key. A
+    cross-model query would then use a case-sensitive `email = email` join that
+    differs from the `LOWER()` predicate. Only bare-ref equalities are keys.
+    """
+    import duckdb
+
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_wrapped_join_key.model.tml")
+    model = graph.models["users_model"]
+
+    # The wrapped equality does not become a relationship key.
+    assert model.relationships == []
+    # The LOWER() predicate is preserved in the derived join SQL.
+    assert model.sql is not None
+    assert "LOWER(users.email) = LOWER(contacts.email)" in model.sql
+
+    layer = SemanticLayer()
+    layer.add_model(model)
+
+    con = duckdb.connect()
+    con.execute("CREATE TABLE users (id INT, amount DOUBLE, email VARCHAR)")
+    con.execute("INSERT INTO users VALUES (1, 100.0, 'A@X.COM'), (2, 25.0, 'A@X.COM')")
+    con.execute("CREATE TABLE contacts (name VARCHAR, email VARCHAR)")
+    con.execute("INSERT INTO contacts VALUES ('Acme', 'a@x.com')")
+
+    sql = layer.compile(metrics=["users_model.amount"], dimensions=["users_model.contact_name"])
+    rows = con.execute(sql).fetchall()
+    # The case-insensitive join still matches via the preserved LOWER() predicate.
+    assert rows == [("Acme", 125.0)]
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_range_only_join.model.tml b/tests/fixtures/thoughtspot/model_range_only_join.model.tml
new file mode 100644
index 00000000..6ff8b92a
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_range_only_join.model.tml
@@ -0,0 +1,29 @@
+guid: "model-range-only-join"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: Join with only a range predicate and no equality key
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+      joins:
+        - with: calendar
+          on: "[sales::date] BETWEEN [calendar::start_date] AND [calendar::end_date]"
+          type: INNER
+          cardinality: MANY_TO_ONE
+    - name: calendar
+      fqn: ANALYTICS.PUBLIC.calendar
+  columns:
+    - name: order_id
+      column_id: sales::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: period_name
+      column_id: calendar::period_name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: sales::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM
diff --git a/tests/fixtures/thoughtspot/model_wrapped_join_key.model.tml b/tests/fixtures/thoughtspot/model_wrapped_join_key.model.tml
new file mode 100644
index 00000000..42659db8
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_wrapped_join_key.model.tml
@@ -0,0 +1,29 @@
+guid: "model-wrapped-join-key"
+export_schema_version: "2"
+model:
+  name: users_model
+  description: Join keys wrapped in LOWER() are not plain equality keys
+  model_tables:
+    - name: users
+      fqn: ANALYTICS.PUBLIC.users
+      joins:
+        - with: contacts
+          on: "LOWER([users::email]) = LOWER([contacts::email])"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: contacts
+      fqn: ANALYTICS.PUBLIC.contacts
+  columns:
+    - name: user_id
+      column_id: users::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: contact_name
+      column_id: contacts::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: users::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM

From 176fe8da03e4fd41709465f0e16275023ae2d2ca Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sun, 14 Jun 2026 14:05:03 -0700
Subject: [PATCH 17/17] fix(thoughtspot): strip surrounding parentheses when
 matching join equality keys

---
 sidemantic/adapters/thoughtspot.py            |  5 +++-
 tests/adapters/thoughtspot/test_parsing.py    | 18 ++++++++++++
 .../model_parenthesized_join.model.tml        | 29 +++++++++++++++++++
 3 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 tests/fixtures/thoughtspot/model_parenthesized_join.model.tml

diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py
index 18ef0d6a..42dd15ce 100644
--- a/sidemantic/adapters/thoughtspot.py
+++ b/sidemantic/adapters/thoughtspot.py
@@ -213,7 +213,10 @@ def _extract_all_join_refs(
 
     def _bare_ref(side: str) -> tuple[str | None, str] | None:
         """Parse ``side`` only if it is exactly one bare ref, not a wrapped expr."""
-        side = side.strip()
+        # Harmless surrounding parentheses/whitespace (e.g. `([a::x] = [b::y])`
+        # splits into `([a::x]` / `[b::y])`) are stripped; a bare ref never
+        # contains parentheses, so this does not admit function-wrapped exprs.
+        side = side.strip().strip("()").strip()
         bracketed = _TML_REF.fullmatch(side)
         if bracketed:
             return _parse_ref_token(bracketed.group(1))
diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py
index 0d57c41e..bb736851 100644
--- a/tests/adapters/thoughtspot/test_parsing.py
+++ b/tests/adapters/thoughtspot/test_parsing.py
@@ -1654,6 +1654,24 @@ def test_thoughtspot_wrapped_equality_is_not_a_relationship_key():
     assert rows == [("Acme", 125.0)]
 
 
+def test_thoughtspot_parenthesized_equality_is_a_relationship_key():
+    """An equality wrapped in harmless parentheses still becomes a join key.
+
+    Regression: requiring each equality side to be a bare ref dropped valid keys
+    when the predicate was parenthesized (e.g. `([sales::customer_id] =
+    [customers::id])`), since the sides reached the matcher as `([sales::...]` /
+    `[customers::id])`. Surrounding parentheses must be stripped first.
+    """
+    adapter = ThoughtSpotAdapter()
+    graph = adapter.parse("tests/fixtures/thoughtspot/model_parenthesized_join.model.tml")
+    model = graph.models["sales_model"]
+
+    rel = {r.name: r for r in model.relationships}["customers"]
+    assert rel.type == "many_to_one"
+    assert rel.foreign_key == "customer_id"
+    assert rel.primary_key == "id"
+
+
 def test_thoughtspot_model_auto_detect_loader():
     """A model + model_tables + columns YAML file is auto-detected as ThoughtSpot."""
     with tempfile.TemporaryDirectory() as tmpdir:
diff --git a/tests/fixtures/thoughtspot/model_parenthesized_join.model.tml b/tests/fixtures/thoughtspot/model_parenthesized_join.model.tml
new file mode 100644
index 00000000..f53385e8
--- /dev/null
+++ b/tests/fixtures/thoughtspot/model_parenthesized_join.model.tml
@@ -0,0 +1,29 @@
+guid: "model-parenthesized-join"
+export_schema_version: "2"
+model:
+  name: sales_model
+  description: An equality join predicate wrapped in harmless parentheses
+  model_tables:
+    - name: sales
+      fqn: ANALYTICS.PUBLIC.sales
+      joins:
+        - with: customers
+          on: "([sales::customer_id] = [customers::id])"
+          type: LEFT_OUTER
+          cardinality: MANY_TO_ONE
+    - name: customers
+      fqn: ANALYTICS.PUBLIC.customers
+  columns:
+    - name: order_id
+      column_id: sales::id
+      properties:
+        column_type: ATTRIBUTE
+    - name: customer_name
+      column_id: customers::name
+      properties:
+        column_type: ATTRIBUTE
+    - name: amount
+      column_id: sales::amount
+      properties:
+        column_type: MEASURE
+        aggregation: SUM