1- """SQL keyword sets and enums used to classify tokens and query types .
1+ """Query type enum for classifying SQL statements .
22
3- Defines the canonical sets of normalised SQL keywords that the token-based
4- parser (``token.py``) and the AST-based extractors use to decide when a
5- token is relevant (e.g. precedes a column or table reference) and to map
6- query prefixes to :class:`QueryType` values. Keyword values are stored
7- **without spaces** (``INNERJOIN``, ``ORDERBY``) because the tokeniser
8- strips whitespace before comparison.
3+ Defines the :class:`QueryType` enum used by :class:`QueryTypeExtractor`
4+ and exported from the ``sql_metadata`` package.
95"""
106
117from enum import Enum
128
13- #: Normalised keywords after which the next token(s) are column references.
14- #: Used by the token-linked-list walker and by ``COLUMNS_SECTIONS`` to
15- #: decide which ``columns_dict`` section a column belongs to.
16- KEYWORDS_BEFORE_COLUMNS = {
17- "SELECT" ,
18- "WHERE" ,
19- "HAVING" ,
20- "ORDERBY" ,
21- "GROUPBY" ,
22- "ON" ,
23- "SET" ,
24- "USING" ,
25- }
26-
27- #: Normalised keywords after which the next token is a **table** name.
28- #: Includes all JOIN variants (whitespace-stripped) as well as INTO,
29- #: UPDATE, TABLE, and the DDL guard ``IFNOTEXISTS``.
30- TABLE_ADJUSTMENT_KEYWORDS = {
31- "FROM" ,
32- "JOIN" ,
33- "CROSSJOIN" ,
34- "INNERJOIN" ,
35- "FULLJOIN" ,
36- "FULLOUTERJOIN" ,
37- "LEFTJOIN" ,
38- "RIGHTJOIN" ,
39- "LEFTOUTERJOIN" ,
40- "RIGHTOUTERJOIN" ,
41- "NATURALJOIN" ,
42- "INTO" ,
43- "UPDATE" ,
44- "TABLE" ,
45- "IFNOTEXISTS" ,
46- }
47-
48- #: Keywords that signal the end of a ``WITH`` (CTE) block and the start
49- #: of the main statement body. Used by the legacy token-based WITH parser
50- #: and referenced in ``_ast.py`` for malformed-query detection.
51- WITH_ENDING_KEYWORDS = {"UPDATE" , "SELECT" , "DELETE" , "REPLACE" , "INSERT" }
52-
53- #: Keywords that can appear immediately before a parenthesised subquery
54- #: in a FROM/JOIN position. A subset of ``TABLE_ADJUSTMENT_KEYWORDS``
55- #: excluding DML-only entries (INTO, UPDATE, TABLE).
56- SUBQUERY_PRECEDING_KEYWORDS = {
57- "FROM" ,
58- "JOIN" ,
59- "CROSSJOIN" ,
60- "INNERJOIN" ,
61- "FULLJOIN" ,
62- "FULLOUTERJOIN" ,
63- "LEFTJOIN" ,
64- "RIGHTJOIN" ,
65- "LEFTOUTERJOIN" ,
66- "RIGHTOUTERJOIN" ,
67- "NATURALJOIN" ,
68- }
69-
70- #: Maps a normalised keyword to the ``columns_dict`` section name that
71- #: columns following it belong to. For example, columns after ``SELECT``
72- #: go into the ``"select"`` section, columns after ``ON``/``USING`` go
73- #: into ``"join"``.
74- COLUMNS_SECTIONS = {
75- "SELECT" : "select" ,
76- "WHERE" : "where" ,
77- "HAVING" : "having" ,
78- "ORDERBY" : "order_by" ,
79- "ON" : "join" ,
80- "USING" : "join" ,
81- "INTO" : "insert" ,
82- "SET" : "update" ,
83- "GROUPBY" : "group_by" ,
84- "INNERJOIN" : "inner_join" ,
85- }
86-
879
8810class QueryType (str , Enum ):
8911 """Enumeration of SQL statement types recognised by the parser.
@@ -103,60 +25,3 @@ class QueryType(str, Enum):
10325 DROP = "DROP TABLE"
10426 TRUNCATE = "TRUNCATE TABLE"
10527 MERGE = "MERGE"
106-
107-
108- class TokenType (str , Enum ):
109- """Semantic classification assigned to an :class:`SQLToken` during parsing.
110-
111- These types are used by the legacy token-based extraction pipeline to
112- label each token after the keyword-driven classification pass. In the
113- v3 sqlglot-based pipeline they are still referenced for backward
114- compatibility in test assertions and token introspection.
115- """
116-
117- COLUMN = "COLUMN"
118- TABLE = "TABLE"
119- COLUMN_ALIAS = "COLUMN_ALIAS"
120- TABLE_ALIAS = "TABLE_ALIAS"
121- WITH_NAME = "WITH_NAME"
122- SUB_QUERY_NAME = "SUB_QUERY_NAME"
123- PARENTHESIS = "PARENTHESIS"
124-
125-
126- #: Maps normalised query-prefix strings to :class:`QueryType` values.
127- #: Cannot be replaced by the enum alone because ``WITH`` maps to
128- #: ``SELECT`` (a CTE followed by its main query) and composite prefixes
129- #: like ``CREATETABLE`` need their own entries.
130- SUPPORTED_QUERY_TYPES = {
131- "INSERT" : QueryType .INSERT ,
132- "REPLACE" : QueryType .REPLACE ,
133- "UPDATE" : QueryType .UPDATE ,
134- "SELECT" : QueryType .SELECT ,
135- "DELETE" : QueryType .DELETE ,
136- "WITH" : QueryType .SELECT ,
137- "CREATETABLE" : QueryType .CREATE ,
138- "CREATETEMPORARY" : QueryType .CREATE ,
139- "ALTERTABLE" : QueryType .ALTER ,
140- "DROPTABLE" : QueryType .DROP ,
141- "CREATEFUNCTION" : QueryType .CREATE ,
142- "TRUNCATETABLE" : QueryType .TRUNCATE ,
143- }
144-
145- #: Union of all keyword sets the tokeniser cares about. Tokens whose
146- #: normalised value falls outside this set are **not** tracked as the
147- #: ``last_keyword`` on subsequent tokens, keeping the classification
148- #: logic focused on structurally significant positions only.
149- RELEVANT_KEYWORDS = {
150- * KEYWORDS_BEFORE_COLUMNS ,
151- * TABLE_ADJUSTMENT_KEYWORDS ,
152- * WITH_ENDING_KEYWORDS ,
153- * SUBQUERY_PRECEDING_KEYWORDS ,
154- "LIMIT" ,
155- "OFFSET" ,
156- "RETURNING" ,
157- "VALUES" ,
158- "INDEX" ,
159- "KEY" ,
160- "WITH" ,
161- "WINDOW" ,
162- }
0 commit comments