diff --git a/LLDBPlugin/touchlab_kotlin_lldb/__init__.py b/LLDBPlugin/touchlab_kotlin_lldb/__init__.py index 8196b49..58d67d1 100644 --- a/LLDBPlugin/touchlab_kotlin_lldb/__init__.py +++ b/LLDBPlugin/touchlab_kotlin_lldb/__init__.py @@ -1,5 +1,4 @@ import os -from typing import Optional import lldb @@ -8,16 +7,13 @@ from .util.log import log from .commands import FieldTypeCommand, SymbolByNameCommand, TypeByAddressCommand, GCCollectCommand -from .types.summary import kotlin_object_type_summary, kotlin_objc_class_summary -from .types.proxy import KonanProxyTypeProvider, KonanObjcProxyTypeProvider +from .types.summary import kotlin_object_type_summary +from .types.proxy import KonanProxyTypeProvider from .cache import LLDBCache os.environ['CLIENT_TYPE'] = 'Xcode' -KONAN_INIT_PREFIX = '_Konan_init_' -KONAN_INIT_MODULE_NAME = '[0-9a-zA-Z_]+' -KONAN_INIT_SUFFIX = '_kexe' def __lldb_init_module(debugger: lldb.SBDebugger, _): log(lambda: "init start") @@ -27,8 +23,6 @@ def __lldb_init_module(debugger: lldb.SBDebugger, _): register_commands(debugger) register_hooks(debugger) - configure_objc_types_init(debugger) - log(lambda: "init end") @@ -38,78 +32,6 @@ def reset_cache(): LLDBCache.reset() -def configure_objc_types_init(debugger: lldb.SBDebugger): - target = debugger.GetDummyTarget() - breakpoint = target.BreakpointCreateByRegex( - "^{}({})({})?$".format(KONAN_INIT_PREFIX, KONAN_INIT_MODULE_NAME, KONAN_INIT_SUFFIX) - ) - breakpoint.SetOneShot(True) - breakpoint.SetAutoContinue(True) - breakpoint.SetScriptCallbackFunction('{}.{}'.format(__name__, configure_objc_types_breakpoint.__name__)) - - -def configure_objc_types_breakpoint(frame: lldb.SBFrame, bp_loc: lldb.SBBreakpointLocation, internal_dict): - process = frame.thread.process - target = process.target - - symbols = target.FindSymbols('_OBJC_CLASS_RO_$_KotlinBase') - - base_class_name: Optional[str] = None - for symbol_context in symbols: - error = lldb.SBError() - name_addr = process.ReadPointerFromMemory(symbol_context.symbol.addr.GetLoadAddress(target) + 6 * 4, error) - # TODO: Log error? - if not error.success: - continue - base_class_name = process.ReadCStringFromMemory(name_addr, 128, error) - # TODO: Log error? - if not error.success: - continue - - break - - module_name = frame.symbol.name.removeprefix(KONAN_INIT_PREFIX).removesuffix(KONAN_INIT_SUFFIX) - if module_name == "stdlib": - return False - - specifiers_to_register = [ - lldb.SBTypeNameSpecifier( - '^{}\\.'.format(module_name), - lldb.eMatchTypeRegex, - ), - ] - - if base_class_name is not None: - objc_class_prefix = base_class_name.removesuffix("Base") - specifiers_to_register.append( - lldb.SBTypeNameSpecifier( - '^{}'.format(objc_class_prefix), - lldb.eMatchTypeRegex, - ) - ) - - category = target.debugger.GetCategory(KOTLIN_CATEGORY) - - for type_specifier in specifiers_to_register: - category.AddTypeSummary( - type_specifier, - lldb.SBTypeSummary.CreateWithFunctionName( - '{}.{}'.format(__name__, kotlin_objc_class_summary.__name__), - lldb.eTypeOptionHideValue, - ) - ) - category.AddTypeSynthetic( - type_specifier, - lldb.SBTypeSynthetic.CreateWithClassName( - '{}.{}'.format(__name__, KonanObjcProxyTypeProvider.__name__), - ) - ) - - bp_loc.GetBreakpoint().SetEnabled(False) - - return False - - def configure_types(debugger: lldb.SBDebugger): category = debugger.CreateCategory(KOTLIN_CATEGORY) @@ -154,9 +76,4 @@ def register_hooks(debugger: lldb.SBDebugger): # Avoid Kotlin/Native runtime debugger.HandleCommand('settings set target.process.thread.step-avoid-regexp ^::Kotlin_') - hooks_to_register = [ - KonanHook, - ] - - for hook in hooks_to_register: - debugger.HandleCommand('target stop-hook add -P {}.{}'.format(__name__, hook.__name__)) + debugger.HandleCommand('target stop-hook add -P {}.{}'.format(__name__, KonanHook.__name__)) diff --git a/LLDBPlugin/touchlab_kotlin_lldb/cache/__init__.py b/LLDBPlugin/touchlab_kotlin_lldb/cache/__init__.py index f9b41a7..a9318c5 100644 --- a/LLDBPlugin/touchlab_kotlin_lldb/cache/__init__.py +++ b/LLDBPlugin/touchlab_kotlin_lldb/cache/__init__.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Dict, Optional, Set import lldb @@ -28,3 +28,13 @@ def __init__(self): self._array_header_type: Optional[lldb.SBType] = None self._runtime_type_size: Optional[lldb.value] = None self._runtime_type_alignment: Optional[lldb.value] = None + # Keys (UUID, or file path when UUID is empty) of modules fully handled + # for Kotlin type-formatter registration: either registered or ruled out. + self.registered_module_keys: Set[str] = set() + # Kotlin modules whose module-name formatters are registered but whose + # ObjC base-class prefix isn't readable yet; retried on later stops. + # key -> {'module', 'names', 'names_registered', 'attempts'}. + self.pending: Dict[str, dict] = {} + # Module count at the last scan; lets a stop with no new modules and no + # pending retries short-circuit to a single integer compare. + self.last_scanned_module_count: int = 0 diff --git a/LLDBPlugin/touchlab_kotlin_lldb/module_registration.py b/LLDBPlugin/touchlab_kotlin_lldb/module_registration.py new file mode 100644 index 0000000..e1d215d --- /dev/null +++ b/LLDBPlugin/touchlab_kotlin_lldb/module_registration.py @@ -0,0 +1,244 @@ +import re +from typing import List, Optional + +import lldb + +from .types.base import KOTLIN_CATEGORY +from .types.summary import kotlin_objc_class_summary +from .types.proxy import KonanObjcProxyTypeProvider +from .cache import LLDBCache +from .util.log import log + +# Matches the per-module Kotlin/Native initializer symbol, e.g. +# `_Konan_init_MyModule` (frameworks) or `_Konan_init_MyApp_kexe` (executables). +# The captured group is the module name used to build the type-name specifier. +_KONAN_INIT_RE = re.compile(r'^_Konan_init_([0-9a-zA-Z_]+?)(_kexe)?$') + +# A fixed Kotlin/Native runtime symbol present in every Kotlin binary. Used as a +# cheap exact-name probe (hashed symbol lookup) to decide whether a module is a +# Kotlin module before we iterate its full symbol table. +_KOTLIN_RUNTIME_MARKER = 'Kotlin_initRuntimeIfNeeded' + +# ObjC RO-data symbol for the exported Kotlin base class. Its name field gives +# the ObjC class-name prefix (e.g. `SharedBase` -> `Shared`) used to format +# Kotlin objects bridged into Swift/ObjC. +_KOTLIN_BASE_OBJC_SYMBOL = '_OBJC_CLASS_RO_$_KotlinBase' + +# Offset (in bytes) of the `name` pointer inside the ObjC class_ro_t struct. +# Valid for the 64-bit layout; all current Apple targets are 64-bit. +_OBJC_CLASS_RO_NAME_OFFSET = 6 * 4 + +# Module file paths under these roots are system/runtime modules that can never +# contain Kotlin/Native code (these also cover the dyld shared cache, whose +# modules report these paths). Skipping them is what avoids the launch-time +# slowdown: we never force LLDB to realize their symbol tables. Matched as a +# substring so simulator/device runtime roots (e.g. `.../RuntimeRoot/usr/lib`) +# are covered too. No trailing slash, so bare `/usr/lib` matches. +_SYSTEM_PATH_MARKERS = ('/usr/lib', '/System/') + +# Max stops to keep retrying the ObjC base-class read before giving up. The read +# can fail on the earliest stop(s), before dyld has rebased the class_ro_t name +# pointer; it succeeds once the module is fully mapped. +_MAX_REGISTER_ATTEMPTS = 16 + + +def _module_key(module: lldb.SBModule) -> str: + """Stable identity for a module across stops. + + Prefers the content UUID; falls back to the file path when LLDB reports no + UUID, so two distinct no-UUID modules don't collide on a single key.""" + return module.GetUUIDString() or module.GetFileSpec().fullpath or '' + + +def _is_candidate_module(module: lldb.SBModule) -> bool: + """Cheap, symbol-table-free check: only app/embedded modules are candidates. + + Reads just the module's file path (already known to LLDB at load time), so + it never triggers symbol-table realization for system frameworks.""" + directory = module.GetFileSpec().GetDirectory() or '' + for marker in _SYSTEM_PATH_MARKERS: + if marker in directory: + return False + return True + + +def _is_kotlin_module(module: lldb.SBModule) -> bool: + """Exact-name symbol probe (hashed lookup) for the Kotlin runtime marker.""" + return len(module.FindSymbols(_KOTLIN_RUNTIME_MARKER)) > 0 + + +def _kotlin_module_names(module: lldb.SBModule) -> List[str]: + """Collect Kotlin module names from this module's `_Konan_init_*` symbols.""" + names: List[str] = [] + for symbol in module.symbols: + name = symbol.name + if name is None: + continue + match = _KONAN_INIT_RE.match(name) + if match is None: + continue + module_name = match.group(1) + if module_name == 'stdlib': + continue + names.append(module_name) + return names + + +def _read_objc_class_prefix( + target: lldb.SBTarget, + process: lldb.SBProcess, + base_symbols: lldb.SBSymbolContextList, +) -> Optional[str]: + """Read the exported Kotlin ObjC base class name and strip the `Base` suffix. + + Returns None if the name pointer can't be read yet (e.g. not rebased on the + earliest stop) or if stripping leaves an empty prefix (which would produce a + `^` regex matching every type) — both signal the caller to retry/skip.""" + for symbol_context in base_symbols: + error = lldb.SBError() + symbol_addr = symbol_context.symbol.addr.GetLoadAddress(target) + name_addr = process.ReadPointerFromMemory(symbol_addr + _OBJC_CLASS_RO_NAME_OFFSET, error) + if not error.success: + continue + base_class_name = process.ReadCStringFromMemory(name_addr, 128, error) + if not error.success or not base_class_name: + continue + prefix = base_class_name.removesuffix('Base') + return prefix or None + return None + + +def _register_specifiers(target: lldb.SBTarget, specifiers: List[lldb.SBTypeNameSpecifier]): + category = target.debugger.GetCategory(KOTLIN_CATEGORY) + for type_specifier in specifiers: + category.AddTypeSummary( + type_specifier, + lldb.SBTypeSummary.CreateWithFunctionName( + '{}.{}'.format(kotlin_objc_class_summary.__module__, kotlin_objc_class_summary.__name__), + lldb.eTypeOptionHideValue, + ), + ) + category.AddTypeSynthetic( + type_specifier, + lldb.SBTypeSynthetic.CreateWithClassName( + '{}.{}'.format(KonanObjcProxyTypeProvider.__module__, KonanObjcProxyTypeProvider.__name__), + ), + ) + + +def _finalize(cache: 'LLDBCache', key: str): + """Mark a module fully handled: stop scanning/retrying it.""" + cache.pending.pop(key, None) + cache.registered_module_keys.add(key) + + +def _classify_module(cache: 'LLDBCache', module: lldb.SBModule, key: str): + """Gate a newly-seen module, cheapest check first. + + System modules and non-Kotlin frameworks are marked handled immediately + (no symbol-table realization for system modules). Genuine Kotlin modules are + placed in `cache.pending` for registration on this and later stops.""" + if not _is_candidate_module(module): + cache.registered_module_keys.add(key) + return + if not _is_kotlin_module(module): + cache.registered_module_keys.add(key) + return + + names = _kotlin_module_names(module) + if not names: + # Has the Kotlin runtime marker but exposes no module init symbols + # (e.g. only stdlib): nothing to register. + log(lambda: 'Kotlin marker present but no module names for {}; skipping.'.format( + module.GetFileSpec().GetFilename())) + cache.registered_module_keys.add(key) + return + + cache.pending[key] = { + 'module': module, + 'names': names, + 'names_registered': False, + 'attempts': 0, + } + + +def _attempt_register(target: lldb.SBTarget, process: lldb.SBProcess, cache: 'LLDBCache', key: str): + """One registration attempt for a pending Kotlin module. + + Registers the `^\\.` specifiers once, then tries to read the ObjC + base-class prefix and register `^`. Retries the prefix on later + stops until it's readable or the attempt cap is hit.""" + entry = cache.pending.get(key) + if entry is None: + return + entry['attempts'] += 1 + module = entry['module'] + + if not entry['names_registered']: + _register_specifiers(target, [ + lldb.SBTypeNameSpecifier('^{}\\.'.format(name), lldb.eMatchTypeRegex) + for name in entry['names'] + ]) + entry['names_registered'] = True + + base_symbols = module.FindSymbols(_KOTLIN_BASE_OBJC_SYMBOL) + if not base_symbols: + # No exported ObjC base class in this module; module-name formatters are + # all there is to register. + _finalize(cache, key) + log(lambda: 'Registered Kotlin formatters for {} (modules={}, no ObjC base class).'.format( + module.GetFileSpec().GetFilename(), entry['names'])) + return + + objc_class_prefix = _read_objc_class_prefix(target, process, base_symbols) + if objc_class_prefix: + _register_specifiers(target, [ + lldb.SBTypeNameSpecifier('^{}'.format(objc_class_prefix), lldb.eMatchTypeRegex) + ]) + _finalize(cache, key) + log(lambda: 'Registered Kotlin formatters for {} (modules={}, objc_prefix={}).'.format( + module.GetFileSpec().GetFilename(), entry['names'], objc_class_prefix)) + return + + if entry['attempts'] >= _MAX_REGISTER_ATTEMPTS: + # Give up on the ObjC prefix; module-name formatters stay registered. + _finalize(cache, key) + log(lambda: 'Gave up reading ObjC prefix for {} after {} attempts; ' + '^ formatting unavailable.'.format( + module.GetFileSpec().GetFilename(), entry['attempts'])) + return + # Otherwise leave it pending and retry on a later stop. + + +def scan_and_register_modules(execution_context: lldb.SBExecutionContext): + """Lazily register Kotlin type formatters for any loaded Kotlin module. + + Side effect only — never influences whether the process stops. Replaces the + old global `_Konan_init_*` regex breakpoint, which forced LLDB to realize the + symbol table of every loaded module at launch (the slow app launch under the + debugger). Steady-state cost is a single integer compare; new modules trigger + a full scan; pending modules get a cheap targeted retry.""" + target = execution_context.target + if not target.IsValid(): + return + process = target.GetProcess() + if not process.IsValid(): + return + + cache = LLDBCache.instance() + num_modules = target.GetNumModules() + + if num_modules != cache.last_scanned_module_count: + cache.last_scanned_module_count = num_modules + for i in range(num_modules): + module = target.GetModuleAtIndex(i) + key = _module_key(module) + if key in cache.registered_module_keys or key in cache.pending: + continue + _classify_module(cache, module, key) + + # Attempt (and retry) registration for everything pending. Iterating + # `pending` directly keeps the retry window O(pending), never O(all modules). + if cache.pending: + for key in list(cache.pending.keys()): + _attempt_register(target, process, cache, key) diff --git a/LLDBPlugin/touchlab_kotlin_lldb/stepping/KonanHook.py b/LLDBPlugin/touchlab_kotlin_lldb/stepping/KonanHook.py index 8bbb5e7..0db81f4 100644 --- a/LLDBPlugin/touchlab_kotlin_lldb/stepping/KonanHook.py +++ b/LLDBPlugin/touchlab_kotlin_lldb/stepping/KonanHook.py @@ -3,6 +3,8 @@ from .KonanStepIn import KonanStepIn from .KonanStepOut import KonanStepOut from .KonanStepOver import KonanStepOver +from ..module_registration import scan_and_register_modules +from ..util.log import log KONAN_LLDB_DONT_SKIP_BRIDGING_FUNCTIONS = 'KONAN_LLDB_DONT_SKIP_BRIDGING_FUNCTIONS' MAX_SIZE_FOR_STOP_REASON = 20 @@ -18,6 +20,14 @@ def __init__(self, target: lldb.SBTarget, extra_args, _): pass def handle_stop(self, execution_context: lldb.SBExecutionContext, stream: lldb.SBStream) -> bool: + # Lazy Kotlin type-formatter registration runs as a side effect on every + # stop. Isolated in try/except so a registration failure can never alter + # this hook's stop/continue decision below. + try: + scan_and_register_modules(execution_context) + except Exception as e: + log(lambda: 'Kotlin module registration error: {}'.format(e)) + is_bridging_functions_skip_enabled = not execution_context.target.GetEnvironment().Get( KONAN_LLDB_DONT_SKIP_BRIDGING_FUNCTIONS )