@@ -452,9 +452,8 @@ class IndexMeta:
452452
453453_KEY_FIELD_RE = re .compile (r"key_field\s*=\s*'?\"?([^'\",)\s]+)\"?'?" , re .IGNORECASE )
454454_ALIAS_RE = re .compile (r"alias\s*=\s*([A-Za-z_][A-Za-z0-9_]*)" , re .IGNORECASE )
455- _CAST_FIELD_RE = re .compile (r"^\(*\"?([A-Za-z_][A-Za-z0-9_]*)\"?\)*\s*::\s*pdb\." , re .IGNORECASE )
456- _PLAIN_FIELD_RE = re .compile (r'^\(*"?([A-Za-z_][A-Za-z0-9_]*)"?\)*$' )
457455_TOKENIZER_NAME_RE = re .compile (r"::pdb\.([A-Za-z_][A-Za-z0-9_]*)" , re .IGNORECASE )
456+ _IDENT_RE = re .compile (r"^[A-Za-z_][A-Za-z0-9_]*$" )
458457
459458
460459def _split_top_level_csv (expr : str ) -> list [str ]:
@@ -520,16 +519,58 @@ def _extract_bm25_field_list(indexdef: str) -> list[str]:
520519
521520
522521def _extract_field_name (field_expr : str ) -> str | None :
523- expr = field_expr .strip ()
524- cast_match = _CAST_FIELD_RE .match (expr )
525- if cast_match :
526- return cast_match .group (1 )
527- plain_match = _PLAIN_FIELD_RE .match (expr )
528- if plain_match :
529- return plain_match .group (1 )
522+ expr = _strip_outer_parens (field_expr .strip ())
523+ cast_marker = re .search (r"::\s*pdb\." , expr , re .IGNORECASE )
524+ if cast_marker is not None :
525+ expr = _strip_outer_parens (expr [: cast_marker .start ()].strip ())
526+
527+ if "->" in expr :
528+ expr = _strip_outer_parens (expr .split ("->" , 1 )[0 ].strip ())
529+
530+ # Strip schema/table qualifiers and keep the terminal identifier.
531+ if "." in expr :
532+ expr = _strip_outer_parens (expr .rsplit ("." , 1 )[1 ].strip ())
533+
534+ if expr .startswith ('"' ) and expr .endswith ('"' ) and len (expr ) >= 2 :
535+ return expr [1 :- 1 ].replace ('""' , '"' )
536+ if _IDENT_RE .match (expr ):
537+ return expr
530538 return None
531539
532540
541+ def _strip_outer_parens (value : str ) -> str :
542+ expr = value
543+ while expr .startswith ("(" ) and expr .endswith (")" ) and _has_balanced_outer_parens (expr ):
544+ expr = expr [1 :- 1 ].strip ()
545+ return expr
546+
547+
548+ def _has_balanced_outer_parens (value : str ) -> bool :
549+ depth = 0
550+ in_single = False
551+ in_double = False
552+
553+ for i , ch in enumerate (value ):
554+ if ch == "'" and not in_double :
555+ in_single = not in_single
556+ continue
557+ if ch == '"' and not in_single :
558+ in_double = not in_double
559+ continue
560+ if in_single or in_double :
561+ continue
562+
563+ if ch == "(" :
564+ depth += 1
565+ elif ch == ")" :
566+ depth -= 1
567+ if depth == 0 and i != len (value ) - 1 :
568+ return False
569+ if depth < 0 :
570+ return False
571+ return depth == 0
572+
573+
533574def _extract_key_field (indexdef : str ) -> str | None :
534575 match = _KEY_FIELD_RE .search (indexdef )
535576 if match :
0 commit comments