macbre
diff --git a/‎AGENTS.md‎
Lines changed: 2 additions & 4 deletions b/‎AGENTS.md‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎ARCHITECTURE.md‎
Lines changed: 5 additions & 8 deletions b/‎ARCHITECTURE.md‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎poetry.lock‎
Lines changed: 1 addition & 17 deletions b/‎poetry.lock‎
Lines changed: 1 addition & 17 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 3 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎sql_metadata/column_extractor.py‎
Lines changed: 2 additions & 4 deletions b/‎sql_metadata/column_extractor.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎sql_metadata/keywords_lists.py‎
Lines changed: 3 additions & 138 deletions b/‎sql_metadata/keywords_lists.py‎
Lines changed: 3 additions & 138 deletions
diff --git a/‎sql_metadata/nested_resolver.py‎
Lines changed: 2 additions & 5 deletions b/‎sql_metadata/nested_resolver.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎sql_metadata/parser.py‎
Lines changed: 2 additions & 8 deletions b/‎sql_metadata/parser.py‎
Lines changed: 2 additions & 8 deletions
@@ -14,7 +14,6 @@ This file contains important information about the sql-metadata repository for A
 **Technology Stack:**
 - Python 3.10+
 - sqlglot library for SQL parsing and AST construction
-- sqlparse used only for legacy tokenization fallback
 - Poetry for dependency management
 - pytest for testing
 - ruff for linting and formatting
@@ -33,8 +32,8 @@ sql-metadata/
 │   ├── nested_resolver.py        # NestedResolver — CTE/subquery names, bodies, resolution
 │   ├── query_type_extractor.py   # QueryTypeExtractor — query type detection
 │   ├── comments.py               # Comment extraction/stripping (pure functions)
-│   ├── keywords_lists.py         # QueryType/TokenType enums, keyword sets
-│   ├── utils.py                  # UniqueList, flatten_list, shared helpers
+│   ├── keywords_lists.py         # QueryType enum
+│   ├── utils.py                  # UniqueList, last_segment, shared helpers
 │   ├── generalizator.py          # Query anonymisation
 │   └── __init__.py               # Exports: Parser, QueryType
 ├── test/                          # Test suite (25 test files)
@@ -220,7 +219,6 @@ Co-Authored-By: Claude <noreply@anthropic.com>
 
 ### Production
 - **sqlglot** (^30.0.3): SQL parsing and AST construction
-- **sqlparse** (>=0.4.1, <0.6.0): Legacy tokenization
 
 ### Development
 - **pytest** (^9.0.2): Testing framework
 
@@ -15,8 +15,8 @@ sql-metadata v3 is a Python library that parses SQL queries and extracts metadat
 | [`nested_resolver.py`](sql_metadata/nested_resolver.py) | CTE/subquery name and body extraction, nested column resolution | `NestedResolver` |
 | [`query_type_extractor.py`](sql_metadata/query_type_extractor.py) | Query type detection from AST root node | `QueryTypeExtractor` |
 | [`comments.py`](sql_metadata/comments.py) | Comment extraction/stripping via tokenizer gaps | `extract_comments`, `strip_comments` |
-| [`keywords_lists.py`](sql_metadata/keywords_lists.py) | Keyword sets, `QueryType` and `TokenType` enums | — |
-| [`utils.py`](sql_metadata/utils.py) | `UniqueList` (deduplicating list), `flatten_list`, `_make_reverse_cte_map` | — |
+| [`keywords_lists.py`](sql_metadata/keywords_lists.py) | `QueryType` enum | — |
+| [`utils.py`](sql_metadata/utils.py) | `UniqueList` (deduplicating list), `last_segment`, `DOT_PLACEHOLDER` | — |
 | [`generalizator.py`](sql_metadata/generalizator.py) | Query anonymisation for log aggregation | `Generalizator` |
 
 ---
@@ -427,16 +427,13 @@ A collection of pure stateless functions (no class). Exploits the fact that sqlg
 
 ### Supporting Modules
 
-**[`keywords_lists.py`](sql_metadata/keywords_lists.py)** — keyword sets used for token classification and query type mapping:
-- `KEYWORDS_BEFORE_COLUMNS` — keywords after which columns appear (`SELECT`, `WHERE`, `ON`, etc.)
-- `TABLE_ADJUSTMENT_KEYWORDS` — keywords after which tables appear (`FROM`, `JOIN`, `INTO`, etc.)
-- `COLUMNS_SECTIONS` — maps keywords to `columns_dict` section names
+**[`keywords_lists.py`](sql_metadata/keywords_lists.py):**
 - `QueryType` — string enum (`str, Enum`) for direct comparison (`parser.query_type == "SELECT"`)
 
 **[`utils.py`](sql_metadata/utils.py):**
 - `UniqueList` — deduplicating list with O(1) membership checks via internal `set`. Used everywhere to collect columns, tables, aliases.
-- `flatten_list` — recursively flattens nested lists from multi-column alias resolution.
-- `_make_reverse_cte_map` — builds reverse mapping from placeholder CTE names to originals, shared by `ColumnExtractor` and `NestedResolver`.
+- `last_segment` — returns the last dot-separated segment of a qualified name (e.g. ``"schema.table.column"`` → ``"column"``).
+- `DOT_PLACEHOLDER` — encoding constant for qualified CTE names (``__DOT__``).
 
 **[`generalizator.py`](sql_metadata/generalizator.py)** — anonymises SQL for log aggregation: strips comments, replaces literals with `X`, numbers with `N`, collapses `IN(...)` lists to `(XYZ)`.
 
 
@@ -7,7 +7,7 @@
 [![Maintenance](https://img.shields.io/badge/maintained%3F-yes-green.svg)](https://github.com/macbre/sql-metadata/graphs/commit-activity)
 [![Downloads](https://pepy.tech/badge/sql-metadata/month)](https://pepy.tech/project/sql-metadata)
 
-Uses tokenized query returned by [`python-sqlparse`](https://github.com/andialbrecht/sqlparse) and generates query metadata.
+Uses [`sqlglot`](https://github.com/tobymao/sqlglot) to parse SQL queries and extract metadata.
 
 **Extracts column names and tables** used by the query. 
 Automatically conduct **column alias resolution**, **sub queries aliases resolution** as well as **tables aliases resolving**.
 
@@ -1,8 +1,8 @@
 [tool.poetry]
 name = "sql_metadata"
-version = "2.20.0"
+version = "3.0.0"
 license="MIT"
-description = "Uses tokenized query returned by python-sqlparse and generates query metadata"
+description = "Uses sqlglot to parse SQL queries and extract metadata"
 authors = ["Maciej Brencz <maciej.brencz@gmail.com>", "Radosław Drążkiewicz <collerek@gmail.com>"]
 readme = "README.md"
 homepage = "https://github.com/macbre/sql-metadata"
@@ -14,7 +14,6 @@ packages = [
 
 [tool.poetry.dependencies]
 python = "^3.10"
-sqlparse = ">=0.4.1,<0.6.0"
 sqlglot = "^30.0.3"
 
 [tool.poetry.group.dev.dependencies]
 
@@ -154,7 +154,6 @@ class _Collector:
     """
 
     __slots__ = (
-        "ta",
         "columns",
         "columns_dict",
         "alias_names",
@@ -166,8 +165,7 @@ class _Collector:
         "output_columns",
     )
 
-    def __init__(self, table_aliases: dict[str, str]):
-        self.ta = table_aliases
+    def __init__(self) -> None:
         self.columns = UniqueList()
         self.columns_dict: dict[str, UniqueList] = {}
         self.alias_names = UniqueList()
@@ -252,7 +250,7 @@ def __init__(
         self._ast = ast
         self._table_aliases = table_aliases
         self._cte_name_map = cte_name_map or {}
-        self._collector = _Collector(table_aliases)
+        self._collector = _Collector()
         self._reverse_cte_map = self._cte_name_map
 
     # -------------------------------------------------------------------
 
@@ -1,89 +1,11 @@
-"""SQL keyword sets and enums used to classify tokens and query types.
+"""Query type enum for classifying SQL statements.
 
-Defines the canonical sets of normalised SQL keywords that the token-based
-parser (``token.py``) and the AST-based extractors use to decide when a
-token is relevant (e.g. precedes a column or table reference) and to map
-query prefixes to :class:`QueryType` values.  Keyword values are stored
-**without spaces** (``INNERJOIN``, ``ORDERBY``) because the tokeniser
-strips whitespace before comparison.
+Defines the :class:`QueryType` enum used by :class:`QueryTypeExtractor`
+and exported from the ``sql_metadata`` package.
 """
 
 from enum import Enum
 
-#: Normalised keywords after which the next token(s) are column references.
-#: Used by the token-linked-list walker and by ``COLUMNS_SECTIONS`` to
-#: decide which ``columns_dict`` section a column belongs to.
-KEYWORDS_BEFORE_COLUMNS = {
-    "SELECT",
-    "WHERE",
-    "HAVING",
-    "ORDERBY",
-    "GROUPBY",
-    "ON",
-    "SET",
-    "USING",
-}
-
-#: Normalised keywords after which the next token is a **table** name.
-#: Includes all JOIN variants (whitespace-stripped) as well as INTO,
-#: UPDATE, TABLE, and the DDL guard ``IFNOTEXISTS``.
-TABLE_ADJUSTMENT_KEYWORDS = {
-    "FROM",
-    "JOIN",
-    "CROSSJOIN",
-    "INNERJOIN",
-    "FULLJOIN",
-    "FULLOUTERJOIN",
-    "LEFTJOIN",
-    "RIGHTJOIN",
-    "LEFTOUTERJOIN",
-    "RIGHTOUTERJOIN",
-    "NATURALJOIN",
-    "INTO",
-    "UPDATE",
-    "TABLE",
-    "IFNOTEXISTS",
-}
-
-#: Keywords that signal the end of a ``WITH`` (CTE) block and the start
-#: of the main statement body.  Used by the legacy token-based WITH parser
-#: and referenced in ``_ast.py`` for malformed-query detection.
-WITH_ENDING_KEYWORDS = {"UPDATE", "SELECT", "DELETE", "REPLACE", "INSERT"}
-
-#: Keywords that can appear immediately before a parenthesised subquery
-#: in a FROM/JOIN position.  A subset of ``TABLE_ADJUSTMENT_KEYWORDS``
-#: excluding DML-only entries (INTO, UPDATE, TABLE).
-SUBQUERY_PRECEDING_KEYWORDS = {
-    "FROM",
-    "JOIN",
-    "CROSSJOIN",
-    "INNERJOIN",
-    "FULLJOIN",
-    "FULLOUTERJOIN",
-    "LEFTJOIN",
-    "RIGHTJOIN",
-    "LEFTOUTERJOIN",
-    "RIGHTOUTERJOIN",
-    "NATURALJOIN",
-}
-
-#: Maps a normalised keyword to the ``columns_dict`` section name that
-#: columns following it belong to.  For example, columns after ``SELECT``
-#: go into the ``"select"`` section, columns after ``ON``/``USING`` go
-#: into ``"join"``.
-COLUMNS_SECTIONS = {
-    "SELECT": "select",
-    "WHERE": "where",
-    "HAVING": "having",
-    "ORDERBY": "order_by",
-    "ON": "join",
-    "USING": "join",
-    "INTO": "insert",
-    "SET": "update",
-    "GROUPBY": "group_by",
-    "INNERJOIN": "inner_join",
-}
-
 
 class QueryType(str, Enum):
     """Enumeration of SQL statement types recognised by the parser.
@@ -103,60 +25,3 @@ class QueryType(str, Enum):
     DROP = "DROP TABLE"
     TRUNCATE = "TRUNCATE TABLE"
     MERGE = "MERGE"
-
-
-class TokenType(str, Enum):
-    """Semantic classification assigned to an :class:`SQLToken` during parsing.
-
-    These types are used by the legacy token-based extraction pipeline to
-    label each token after the keyword-driven classification pass.  In the
-    v3 sqlglot-based pipeline they are still referenced for backward
-    compatibility in test assertions and token introspection.
-    """
-
-    COLUMN = "COLUMN"
-    TABLE = "TABLE"
-    COLUMN_ALIAS = "COLUMN_ALIAS"
-    TABLE_ALIAS = "TABLE_ALIAS"
-    WITH_NAME = "WITH_NAME"
-    SUB_QUERY_NAME = "SUB_QUERY_NAME"
-    PARENTHESIS = "PARENTHESIS"
-
-
-#: Maps normalised query-prefix strings to :class:`QueryType` values.
-#: Cannot be replaced by the enum alone because ``WITH`` maps to
-#: ``SELECT`` (a CTE followed by its main query) and composite prefixes
-#: like ``CREATETABLE`` need their own entries.
-SUPPORTED_QUERY_TYPES = {
-    "INSERT": QueryType.INSERT,
-    "REPLACE": QueryType.REPLACE,
-    "UPDATE": QueryType.UPDATE,
-    "SELECT": QueryType.SELECT,
-    "DELETE": QueryType.DELETE,
-    "WITH": QueryType.SELECT,
-    "CREATETABLE": QueryType.CREATE,
-    "CREATETEMPORARY": QueryType.CREATE,
-    "ALTERTABLE": QueryType.ALTER,
-    "DROPTABLE": QueryType.DROP,
-    "CREATEFUNCTION": QueryType.CREATE,
-    "TRUNCATETABLE": QueryType.TRUNCATE,
-}
-
-#: Union of all keyword sets the tokeniser cares about.  Tokens whose
-#: normalised value falls outside this set are **not** tracked as the
-#: ``last_keyword`` on subsequent tokens, keeping the classification
-#: logic focused on structurally significant positions only.
-RELEVANT_KEYWORDS = {
-    *KEYWORDS_BEFORE_COLUMNS,
-    *TABLE_ADJUSTMENT_KEYWORDS,
-    *WITH_ENDING_KEYWORDS,
-    *SUBQUERY_PRECEDING_KEYWORDS,
-    "LIMIT",
-    "OFFSET",
-    "RETURNING",
-    "VALUES",
-    "INDEX",
-    "KEY",
-    "WITH",
-    "WINDOW",
-}
@@ -8,7 +8,6 @@
 
 from __future__ import annotations
 
-import copy
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
@@ -125,8 +124,6 @@ def not_sql(self, expression: exp.Expression) -> str:
         return super().not_sql(expression)  # type: ignore[arg-type, no-any-return]
 
 
-_GENERATOR = _PreservingGenerator()
-
 
 # ---------------------------------------------------------------------------
 # Resolution helpers
@@ -669,10 +666,10 @@ def _body_sql(node: exp.Expression) -> str:
 
         Renders the CTE body as ``SELECT id FROM users`` (quotes stripped).
         """
-        body = copy.deepcopy(node)
+        body = node.copy()
         for ident in body.find_all(exp.Identifier):
             ident.set("quoted", False)
-        return _GENERATOR.generate(body)
+        return _PreservingGenerator().generate(body, copy=False)
 
     @staticmethod
     def _walk_subqueries(
 
@@ -424,8 +424,6 @@ def limit_and_offset(self) -> tuple[int, int] | None:
         if self._limit_and_offset is not None:
             return self._limit_and_offset
 
-        from sqlglot import exp
-
         ast = self._ast_parser.ast
         if ast is None:
             return None
@@ -452,7 +450,7 @@ def values(self) -> list[Any]:
 
         :rtype: list[Any]
         """
-        if self._values:
+        if self._values is not None:
             return self._values
         self._values = self._extract_values()
         return self._values
@@ -468,7 +466,7 @@ def values_dict(self) -> dict[str, Any] | None:
         :rtype: dict[str, Any] | None
         """
         values = self.values
-        if self._values_dict or not values:
+        if self._values_dict is not None or not values:
             return self._values_dict
         columns = self.columns
 
@@ -516,8 +514,6 @@ def _extract_values(self) -> list[Any]:
             multi-row inserts, or an empty list when no VALUES clause exists.
         :rtype: list[Any]
         """
-        from sqlglot import exp
-
         try:
             ast = self._ast_parser.ast
         except ValueError:
@@ -547,8 +543,6 @@ def _convert_value(val: exp.Expression) -> int | float | str:
         :returns: The Python int, float, or str representation.
         :rtype: int | float | str
         """
-        from sqlglot import exp
-
         if isinstance(val, exp.Literal):
             if val.is_int:
                 return int(val.this)