diff --git a/tools/perf/README.md b/tools/perf/README.md new file mode 100644 index 0000000000..93ed4a9702 --- /dev/null +++ b/tools/perf/README.md @@ -0,0 +1,233 @@ +# iTerm2 Performance Testing Tools + +Scripts for stress testing and profiling iTerm2 builds with latency instrumentation. + +## Quick Start + +```bash +# Basic test (10 tabs, 20 seconds, normal mode) +./run_stress_test.sh /path/to/iTerm2.app + +# Compare behavior across tab counts +./run_stress_test.sh --tabs=1,3,10 /path/to/iTerm2.app + +# With title injection (exercises OSC 0 handling) +./run_stress_test.sh --title /path/to/iTerm2.app + +# With DTrace metrics (requires sudo) +./run_stress_test.sh --dtrace /path/to/iTerm2.app + +# With tmux wrapping (cleanup attempted) +./run_stress_test.sh --tmux /path/to/iTerm2.app + +# htop-style dashboard load +./run_stress_test.sh --mode=htop /path/to/iTerm2.app + +# Progress bars stress test +./run_stress_test.sh --mode=progress /path/to/iTerm2.app + +# Status grid with tmux wrapping +./run_stress_test.sh --tmux --mode=status /path/to/iTerm2.app +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `run_stress_test.sh` | Main test harness - opens iTerm2, creates tabs, runs stress load, profiles | +| `stress_load.py` | Unified load generator - terminal output stress and dashboard modes | +| `analyze_profile.py` | Analyzes `sample` profiler output for hotspots | +| `analyze_self_time.py` | Analyzes self-time profiler output, filters non-actionable symbols | +| `iterm_ux_metrics_v2.d` | DTrace script for frame rate and latency metrics | +| `iterm_self_time.d` | DTrace script for self-time (exclusive time) profiling | + +## Options + +``` +-t, --time=SEC Duration in seconds (default: 20) +--tabs=N,M,... Tab counts to test (runs separate test for each) +--title[=MS] Inject OSC 0 title changes (default: every 2000ms) +--fps=N Target frame rate for dashboard modes (default: 30, 0 = unthrottled) + Accepts decimals (e.g., 0.5). Ignored for stress modes. +--dtrace Enable DTrace UX metrics (requires sudo) +--self-time Enable self-time profiling (requires sudo, see below) +--inject Enable continuous responsiveness testing (see below) +--mode=MODES Stress mode(s), comma-separated (see Modes below) +--speed=SPEED Output speed: normal or slow +--tmux Wrap test in auto-cleanup tmux session +--load-script=PATH Use custom load generator (for non-built-in scripts) +--suite=NAME Use isolated UserDefaults suite (default: com.iterm2.defaults) + --suite=user uses normal iTerm2 preferences + --suite=none disables suite isolation +--forever Run indefinitely without profiling +``` + +## Modes + +The `--mode` flag selects the stress pattern. Multiple modes can be comma-separated +and will run sequentially, time-sliced within a single test. + +### Terminal Output Stress (unthrottled) + +| Mode | Description | +|------|-------------| +| `normal` | Mixed output patterns (ASCII, CJK, emoji, bidi), no screen clears (default) | +| `buffer` | Long lines (~600 chars), stresses line buffer handling | +| `clearcodes` | All patterns including screen clear/erase sequences | +| `flood` | Maximum throughput using `yes` command | + +### Dashboard/UI Stress (throttled by --fps, default 30) + +| Mode | Description | Code Paths Stressed | +|------|-------------|---------------------| +| `htop` | CPU meters + scrolling process list | Scroll regions, partial updates, color bars | +| `watch` | Full-screen clear + redraw | Burst rendering, screen clear, cursor home | +| `progress` | 20 progress bars updating in place | Cursor positioning, same-line overwrites | +| `table` | Fixed header + scroll region body | Scroll regions, selective scroll | +| `status` | Grid of color-coded service status cells | Frequent SGR changes, partial cell updates | + +### Special + +| Mode | Description | +|------|-------------| +| `all` | Runs all 8 modes sequentially within a single test | + +### Examples + +```bash +# All dashboard modes at 120fps +./run_stress_test.sh --mode=htop,watch,progress,table,status --fps=120 -t 50 /path/to/iTerm2.app + +# Mix stress and dashboard modes +./run_stress_test.sh --mode=normal,htop,buffer -t 30 /path/to/iTerm2.app + +# Dashboard unthrottled (as fast as possible) +./run_stress_test.sh --mode=htop --fps=0 /path/to/iTerm2.app +``` + +## Responsiveness Testing + +The `--inject` option enables continuous interaction injection to measure UI responsiveness +under load. This exercises the latency instrumentation code paths throughout the test: + +| Event Type | Interval | Purpose | +|------------|----------|---------| +| Keyboard input | 500ms | Single character input to measure key-to-screen latency | +| Scroll events | 2s | Page Up/Down alternating to test scroll responsiveness | +| Tab switches | 3s | Cycles through all tabs to test tab change latency | + +### Example + +```bash +# Run stress test with responsiveness injection +./run_stress_test.sh --inject /path/to/iTerm2.app + +# Combined with title injection for OSC handling +./run_stress_test.sh --inject --title /path/to/iTerm2.app + +# Full instrumentation with DTrace +sudo ./run_stress_test.sh --inject --dtrace /path/to/iTerm2.app +``` + +The injection summary is printed at test completion showing event counts. + +## Self-Time Profiling + +The `--self-time` option enables DTrace-based profiling that shows "self time" - the time +functions actually spend executing their own code, not time spent in functions they call. + +This is more actionable than total/inclusive time because: +- Functions like `main()` appear at the top of every call stack but do no real work +- High-level callers (like event loops) dominate inclusive time +- Self-time shows which functions actually burn CPU + +### Example + +```bash +# Run stress test with self-time profiling +sudo ./run_stress_test.sh --self-time /path/to/iTerm2.app + +# Combined with other options +sudo ./run_stress_test.sh --self-time --dtrace --tabs=5,10 /path/to/iTerm2.app +``` + +### Output + +The analysis script filters results into categories: +- **Actionable iTerm2 functions** - Code you can optimize +- **System hotspots** - Runtime overhead (objc_msgSend, malloc) for awareness +- **Other code** - Libraries and frameworks + +High system overhead (>40%) suggests opportunities like: +- Batching operations to reduce objc_msgSend calls +- Object pooling to reduce malloc/free +- Caching to reduce repeated lookups + +### Scripts + +| Script | Purpose | +|--------|---------| +| `iterm_self_time.d` | DTrace script using profile provider at 997Hz | +| `analyze_self_time.py` | Parses output, filters non-actionable symbols | + +## Tmux Wrapping + +The `--tmux` option wraps the entire test in a tmux session that auto-cleans on exit: +- Session is killed on normal exit, Ctrl-C, or crash +- Prevents orphaned stress processes if the harness is killed +- Session name: `iterm2-perf--` +- Interactive: attaches to session for live viewing +- Non-interactive: waits for session to complete + +## Suite Presets + +The `suites/` directory contains plist files for reproducible test configurations. +These are used with the `--suite=` option which isolates preferences via NSUserDefaults suites. + +### Available Suites + +| Suite | Purpose | +|-------|---------| +| `com.iterm2.defaults` | Empty suite for clean default behavior | +| `com.iterm2.fairness` | Enables `useFairnessScheduler` (requires PR #568) | + +### Installation + +Manually copy the desired suite to `~/Library/Preferences/`: + +```bash +cp tools/perf/suites/com.iterm2.fairness.plist ~/Library/Preferences/ +``` + +### Usage + +```bash +# Test with fairness scheduler enabled +./run_stress_test.sh --suite=com.iterm2.fairness /path/to/iTerm2.app + +# Test with clean defaults +./run_stress_test.sh --suite=com.iterm2.defaults /path/to/iTerm2.app + +# Default: com.iterm2.defaults (auto-created empty suite for isolation) +./run_stress_test.sh /path/to/iTerm2.app +``` + +Suite plists are stored separately from your normal iTerm2 preferences (`com.googlecode.iterm2.plist`), +so testing with different suites won't affect your personal settings. + +## Output + +The test produces: +- **Profile analysis** - CPU hotspots from `sample` profiler +- **Latency metrics** - KeyboardInput, TitleUpdate timings (from instrumented builds) +- **Timer analysis** - GCD/NSTimer efficiency, cadence stability +- **DTrace metrics** - Frame rates, adaptive mode, lock contention (if --dtrace) +- **Summary table** - Cross-run comparison when testing multiple tab counts + +## Requirements + +- macOS with `sample` profiler +- Python 3 +- For --tmux: tmux installed +- For --dtrace: sudo access +- Instrumented iTerm2 build (for latency metrics) diff --git a/tools/perf/analyze_profile.py b/tools/perf/analyze_profile.py new file mode 100644 index 0000000000..6f2d071658 --- /dev/null +++ b/tools/perf/analyze_profile.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +""" +Analyze macOS sample profiler output for iTerm2. + +Usage: + python3 analyze_profile.py + +Reads a profile file generated by the macOS `sample` command and +prints a summary of iTerm2-relevant hotspots and patterns. +""" + +import os +import re +import sys + + +def analyze_profile(output_file): + """Analyze the profile output for hotspots and inefficiencies.""" + print(f"\nAnalyzing profile: {output_file}") + + if not os.path.exists(output_file): + print("Error: Profile output file not found") + return False + + with open(output_file, 'r') as f: + content = f.read() + + # Count occurrences of key patterns. + patterns = { + "boolForKey": r'\[iTermPreferences boolForKey:\]', + "intForKey": r'\[iTermPreferences intForKey:\]', + "objectForKey": r'\[iTermPreferences objectForKey:\]', + "updateConfigurationFields": r'updateConfigurationFields', + "NSUserDefaults": r'NSUserDefaults', + "@synchronized": r'@synchronized', + "os_unfair_lock": r'os_unfair_lock', + # StringToScreenChars and text processing + "StringToScreenChars": r'StringToScreenChars', + "ScreenCharArray": r'ScreenCharArray', + "bidi/Bidi": r'[Bb]idi', + "VT100Terminal": r'VT100Terminal', + "executeToken": r'executeToken', + # Metal rendering + "Metal": r'Metal|metal|MTL|CAMetalLayer', + "iTermTextRenderer": r'iTermTextRenderer', + } + + # Broader categories for spotting redundant work or churn. + categories = { + "Allocations": r'\b(malloc|calloc|realloc|free|operator new|operator delete)\b', + "ObjC retain/release": r'objc_(retain|release|autoreleaseReturnValue|retainAutoreleasedReturnValue)', + "Autorelease pools": r'NSAutoreleasePool|autoreleasepool', + "Strings/Unicode": r'NSString|CFString|StringToScreenChars|ScreenCharArray', + "CoreText": r'CTLine|CTRun|CTFont|CoreText', + "CoreGraphics": r'CGContext|CGColor|CGPath|CGImage|CoreGraphics', + "AppKit geometry": r'NSRect|NSMakeRect|convertRect|bounds|frame', + "Locks/dispatch": r'os_unfair_lock|pthread_mutex|dispatch_semaphore|@synchronized', + "Terminal parsing": r'VT100Parser|VT100Terminal|VT100Screen|executeToken', + "Rendering": r'iTermTextRenderer|Metal|metal|MTL|CAMetalLayer', + "Process/cache": r'iTermProcessCache|TaskNotifier|deepestForegroundJob', + } + + print("\n" + "=" * 60) + print("Profile Summary") + print("=" * 60) + + for name, pattern in patterns.items(): + count = len(re.findall(pattern, content)) + print(f" {name}: {count} occurrences") + + print("=" * 60) + print("\n" + "=" * 60) + print("Category Summary") + print("=" * 60) + for name, pattern in categories.items(): + count = len(re.findall(pattern, content)) + print(f" {name}: {count} occurrences") + print("=" * 60) + + # Extract top iTerm2 symbols from the call graph. + symbol_pattern = re.compile(r'^\s*[+!:|]*\s*(\d+)\s+(.+?)\s+\(in iTerm2(?:\.debug\.dylib)?\)') + counts = {} + for line in content.splitlines(): + match = symbol_pattern.match(line) + if not match: + continue + count = int(match.group(1)) + symbol = match.group(2).strip() + if count > counts.get(symbol, 0): + counts[symbol] = count + + if counts: + print("\n" + "=" * 60) + print("Top iTerm2 Symbols (by sample count)") + print("=" * 60) + for symbol, count in sorted(counts.items(), key=lambda item: item[1], reverse=True)[:15]: + print(f" {count} {symbol}") + print("=" * 60) + + print(f"\nFull profile saved to: {output_file}") + return True + + +def main(): + if len(sys.argv) != 2: + print("Usage: python3 analyze_profile.py ") + sys.exit(1) + + if not analyze_profile(sys.argv[1]): + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tools/perf/analyze_self_time.py b/tools/perf/analyze_self_time.py new file mode 100755 index 0000000000..b594bc4391 --- /dev/null +++ b/tools/perf/analyze_self_time.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python3 +""" +Analyze self-time profiling output from iterm_self_time.d + +Parses DTrace output and: +- Filters out non-actionable system symbols (objc_msgSend, malloc, etc.) +- Groups by iTerm2 code vs system code +- Calculates percentages +- Provides actionable summary + +Usage: + sudo dtrace -p PID -s iterm_self_time.d 30 | python3 analyze_self_time.py + # or + python3 analyze_self_time.py < dtrace_output.txt +""" + +import sys +import re +from collections import defaultdict +from typing import Dict, List, Tuple + +# Symbols to filter from "actionable" list +# These are runtime/system overhead, not application code +SYSTEM_SYMBOLS = { + # Objective-C runtime + 'objc_msgSend', 'objc_msgSendSuper', 'objc_msgSend_stret', + 'objc_msgSendSuper2', 'objc_msgSend_uncached', + 'objc_retain', 'objc_release', 'objc_autorelease', + 'objc_retainAutoreleasedReturnValue', 'objc_autoreleaseReturnValue', + 'objc_storeStrong', 'objc_destroyWeak', 'objc_loadWeakRetained', + 'objc_alloc', 'objc_alloc_init', 'objc_opt_new', + '_objc_rootAllocWithZone', '_objc_rootDealloc', + + # Memory allocation + 'malloc', 'free', 'calloc', 'realloc', 'malloc_zone_malloc', + 'malloc_zone_free', 'malloc_zone_realloc', 'malloc_zone_calloc', + 'nanov2_malloc', 'nanov2_free', 'szone_malloc', 'szone_free', + + # Memory operations + 'memmove', 'memcpy', 'memset', 'bzero', '__bzero', + 'memcmp', 'strlen', 'strcmp', 'strncmp', + + # libdispatch + '_dispatch_lane_invoke', '_dispatch_worker_thread2', + '_dispatch_queue_override_invoke', '_dispatch_call_block_and_release', + '_dispatch_workloop_worker_thread', '_dispatch_continuation_pop', + '_dispatch_client_callout', '_dispatch_sync_f_slow', + + # Thread management + 'start_wqthread', 'thread_start', '_pthread_wqthread', + '__psynch_cvwait', '__psynch_mutexwait', '__semwait_signal', + 'pthread_mutex_lock', 'pthread_mutex_unlock', + + # System calls / kernel + 'mach_msg_trap', 'mach_msg', '__ulock_wait', '__ulock_wake', + 'kevent_qos', 'kevent_id', '__select', '__pselect', + + # CoreFoundation / Foundation internals + 'CFRelease', 'CFRetain', '_CFRelease', '_CFRetain', + 'CFArrayGetCount', 'CFDictionaryGetValue', + + # Other system + 'dyld_stub_binder', '_dyld_start', 'ImageLoaderMachO::*', +} + +# Markers that indicate iTerm2/user code (case-insensitive prefix match) +ITERM_MARKERS = [ + 'iterm', 'pty', 'vt100', 'metal', 'terminal', + 'screen', 'session', 'tab', 'window', 'profile', + 'conductor', 'token', 'fairness', 'scheduler', +] + + +def is_system_symbol(symbol: str) -> bool: + """Check if a symbol is a non-actionable system symbol.""" + # Check exact matches + if symbol in SYSTEM_SYMBOLS: + return True + + # Check prefix patterns (runtime/kernel symbols only, not framework prefixes) + # NS/CF prefixes removed - those are now handled by module-based classification + system_prefixes = [ + 'objc_', '_objc_', 'malloc_', 'szone_', 'nanov2_', + '_dispatch_', '__pthread_', '_pthread_', 'pthread_', + 'mach_', '__mach_', 'dyld_', '_dyld_', + '__ulock_', '__psynch_', '__semwait_', + ] + for prefix in system_prefixes: + if symbol.startswith(prefix): + return True + + return False + + +def is_iterm_symbol(symbol: str) -> bool: + """Check if a symbol is iTerm2 application code.""" + symbol_lower = symbol.lower() + for marker in ITERM_MARKERS: + if marker in symbol_lower: + return True + return False + + +def normalize_module(module: str) -> str: + """Normalize module name to handle path and architecture variations. + + DTrace may report modules as: + - "iTerm2" + - "iTerm2.app/Contents/MacOS/iTerm2" + - "iTerm2 (x86_64)" + """ + # Strip path components + module = module.split('/')[-1] + # Strip architecture suffix like " (x86_64)" + if ' (' in module: + module = module.split(' (')[0] + return module + + +def is_iterm2_module(module: str) -> bool: + """Check if a module is the iTerm2 binary.""" + return normalize_module(module) == 'iTerm2' + + +# Known system frameworks/libraries (module names after normalization) +SYSTEM_MODULES = { + 'CoreFoundation', 'Foundation', 'AppKit', 'CoreGraphics', + 'CoreText', 'QuartzCore', 'Metal', 'MetalPerformanceShaders', + 'IOKit', 'Security', 'SystemConfiguration', +} + + +def is_system_module(module: str) -> bool: + """Check if a module is a known system library/framework. + + Assumes module is already normalized. + """ + if module.startswith('lib'): + return True + if module in SYSTEM_MODULES: + return True + return False + + +def parse_dtrace_output(lines: List[str]) -> Tuple[Dict[str, int], Dict[str, int], int]: + """Parse DTrace output and return self-time counts, iTerm2-attributed counts, and unattributed stack samples.""" + self_time_counts: Dict[str, int] = defaultdict(int) + iterm_attributed: Dict[str, int] = defaultdict(int) + unattributed_stacks: int = 0 + current_section = None + + # Regex to match DTrace stack frame output + # Format: " iTerm2`symbolname+0x123" or " iTerm2`symbolname+123" + # or " libsystem_malloc.dylib`malloc+0x45" + # Note: Objective-C symbols can have spaces, e.g. "-[Foo bar:baz:]" + # Offset can be hex (+0x...) or decimal (+...) or absent + frame_pattern = re.compile(r'^\s+([^`]+)`(.+?)(?:\+(?:0x)?[0-9a-fA-F]+)?$') + count_pattern = re.compile(r'^\s+(\d+)$') + + current_frames = [] + + for line in lines: + line = line.rstrip() + + # Track sections using markers + if line == '===SELF_TIME===': + current_section = 'self_time' + current_frames = [] + continue + elif line == '===END_SELF_TIME===': + current_section = None + current_frames = [] + continue + elif line == '===STACKS===': + current_section = 'stacks' + current_frames = [] + continue + elif line == '===END_STACKS===': + current_section = None + current_frames = [] + continue + + if current_section not in ('self_time', 'stacks'): + continue + + # Match frame lines + frame_match = frame_pattern.match(line) + if frame_match: + module = normalize_module(frame_match.group(1)) + symbol = frame_match.group(2) + current_frames.append((module, symbol)) + continue + + # Match count lines (single number on a line) + count_match = count_pattern.match(line) + if count_match and current_frames: + count = int(count_match.group(1)) + + if current_section == 'self_time': + # For self-time, take the leaf frame (first in DTrace output) + module, symbol = current_frames[0] + key = f"{module}:{symbol}" + self_time_counts[key] += count + + elif current_section == 'stacks': + # For stacks, find the iTerm2 frame closest to leaf and attribute to it. + # DTrace prints leaf first, so first iTerm2 match is closest to leaf. + attributed = False + for module, symbol in current_frames: + if is_iterm2_module(module) and 'DYLD-STUB' not in symbol: + iterm_attributed[symbol] += count + attributed = True + break + if not attributed: + unattributed_stacks += count + + current_frames = [] + continue + + # Empty line resets frame accumulator + if not line.strip(): + current_frames = [] + + return self_time_counts, iterm_attributed, unattributed_stacks + + +def analyze_and_report(self_time_counts: Dict[str, int], iterm_attributed: Dict[str, int], unattributed_stacks: int) -> None: + """Analyze counts and print actionable report.""" + if not self_time_counts and not iterm_attributed: + print("No self-time data found in input.") + print("Make sure the input contains DTrace output from iterm_self_time.d") + return + + total_samples = sum(self_time_counts.values()) + + # Categorize symbols + # Keys are "module:symbol" format + iterm_symbols: List[Tuple[str, int]] = [] + system_symbols: List[Tuple[str, int]] = [] + other_symbols: List[Tuple[str, int]] = [] + + for key, count in self_time_counts.items(): + # Parse "module:symbol" format + if ':' in key: + module, symbol = key.split(':', 1) + else: + module, symbol = '', key + + # Categorize by module first (most reliable) + if is_iterm2_module(module): + # iTerm2 module, but filter out DYLD stubs which are system calls + if 'DYLD-STUB' in symbol: + system_symbols.append((symbol, count)) + else: + iterm_symbols.append((symbol, count)) + elif is_system_module(module): + # Known system module + system_symbols.append((symbol, count)) + elif is_system_symbol(symbol): + # Unknown module but symbol looks like system code + system_symbols.append((symbol, count)) + elif is_iterm_symbol(symbol): + # Unknown module but symbol looks like iTerm2 code + iterm_symbols.append((symbol, count)) + else: + other_symbols.append((symbol, count)) + + # Sort by count descending + iterm_symbols.sort(key=lambda x: x[1], reverse=True) + system_symbols.sort(key=lambda x: x[1], reverse=True) + other_symbols.sort(key=lambda x: x[1], reverse=True) + + # Calculate totals + iterm_total = sum(c for _, c in iterm_symbols) + system_total = sum(c for _, c in system_symbols) + other_total = sum(c for _, c in other_symbols) + + # Print report + print("=" * 70) + print("Self-Time Analysis Report") + print("=" * 70) + print() + + # First show iTerm2-attributed samples (most actionable) + if iterm_attributed or unattributed_stacks: + attributed_total = sum(iterm_attributed.values()) + stacks_total = attributed_total + unattributed_stacks + attributed_sorted = sorted(iterm_attributed.items(), key=lambda x: x[1], reverse=True) + + print("-" * 70) + print("iTerm2 CALLERS (attributed from call stacks)") + print("-" * 70) + print("iTerm2 functions whose call stacks lead to CPU usage:") + print() + print(f"{'Samples':>10} {'%':>6} Function") + print(f"{'-'*10} {'-'*6} {'-'*50}") + + for symbol, count in attributed_sorted[:25]: + pct = 100 * count / attributed_total if attributed_total else 0 + print(f"{count:>10,} {pct:>5.1f}% {symbol}") + + print() + print(f"Stack samples: {stacks_total:,} total") + print(f" Attributed to iTerm2: {attributed_total:,} ({100*attributed_total/stacks_total:.1f}%)") + print(f" No iTerm2 frame: {unattributed_stacks:,} ({100*unattributed_stacks/stacks_total:.1f}%)") + print() + + print("-" * 70) + print("RAW SELF-TIME (what's actually executing)") + print("-" * 70) + print() + print(f"Total samples: {total_samples:,}") + print(f" iTerm2 code: {iterm_total:>8,} ({100*iterm_total/total_samples:5.1f}%)") + print(f" System code: {system_total:>8,} ({100*system_total/total_samples:5.1f}%)") + print(f" Other code: {other_total:>8,} ({100*other_total/total_samples:5.1f}%)") + print() + + print("-" * 70) + print("TOP ACTIONABLE FUNCTIONS (iTerm2 code - raw self-time)") + print("-" * 70) + print(f"{'Samples':>10} {'Self%':>6} Function") + print(f"{'-'*10} {'-'*6} {'-'*50}") + + for symbol, count in iterm_symbols[:25]: + pct = 100 * count / total_samples + print(f"{count:>10,} {pct:>5.1f}% {symbol}") + + if not iterm_symbols: + print(" (no iTerm2 symbols found)") + + print() + print("-" * 70) + print("SYSTEM FUNCTIONS (exclusive leaf samples)") + print("-" * 70) + print(f"{'Samples':>10} {'Self%':>6} Function") + print(f"{'-'*10} {'-'*6} {'-'*50}") + + for symbol, count in system_symbols[:15]: + pct = 100 * count / total_samples + print(f"{count:>10,} {pct:>5.1f}% {symbol}") + + print() + print("-" * 70) + print("OTHER LEAF FUNCTIONS (libraries, frameworks)") + print("-" * 70) + print(f"{'Samples':>10} {'Self%':>6} Function") + print(f"{'-'*10} {'-'*6} {'-'*50}") + + for symbol, count in other_symbols[:15]: + pct = 100 * count / total_samples + print(f"{count:>10,} {pct:>5.1f}% {symbol}") + + + +def main(): + """Main entry point.""" + # Read all input + if len(sys.argv) > 1: + with open(sys.argv[1], 'r') as f: + lines = f.readlines() + else: + lines = sys.stdin.readlines() + + if not lines: + print("Usage: python3 analyze_self_time.py [dtrace_output.txt]") + print(" or: dtrace ... | python3 analyze_self_time.py") + sys.exit(1) + + self_time_counts, iterm_attributed, unattributed_stacks = parse_dtrace_output(lines) + analyze_and_report(self_time_counts, iterm_attributed, unattributed_stacks) + + +if __name__ == '__main__': + main() diff --git a/tools/perf/iterm_perf.d b/tools/perf/iterm_perf.d new file mode 100644 index 0000000000..61147ba990 --- /dev/null +++ b/tools/perf/iterm_perf.d @@ -0,0 +1,80 @@ +#!/usr/sbin/dtrace -s + +#pragma D option quiet + +dtrace:::BEGIN +{ + printf("Tracing iTerm2 performance... Ctrl-C to stop.\n"); + start = timestamp; +} + +objc$target:PTYSession:-updateDisplayBecause*:entry +{ + @updates = count(); +} + +objc$target:PTYSession:-refresh:entry +{ + @refreshes = count(); +} + +objc$target:PTYTextView:-refresh:entry +{ + @textview_refreshes = count(); +} + +objc$target:VT100Screen*:-sync*:entry +{ + @syncs = count(); +} + +objc$target:VT100ScreenMutableState:-performBlockWithJoinedThreads*:entry +{ + @joined_blocks = count(); +} + +/* FairnessScheduler probes - these fire only when FairnessScheduler is active */ +objc$target:iTermFairnessScheduler:-register*:entry +{ + @fairness_register = count(); +} + +objc$target:iTermFairnessScheduler:-sessionDidEnqueueWork*:entry +{ + @fairness_enqueue = count(); +} + +objc$target:iTermTokenExecutor:-executeTurnWithTokenBudget*:entry +{ + @fairness_execute_turn = count(); +} + +dtrace:::END +{ + duration_sec = (timestamp - start) / 1000000000; + printf("\n============================================================\n"); + printf("DTrace Performance Summary (duration: %d sec)\n", duration_sec); + printf("============================================================\n"); + + printa(" updateDisplayBecause: %@d calls\n", @updates); + printa(" PTYSession refresh: %@d calls\n", @refreshes); + printa(" PTYTextView refresh: %@d calls\n", @textview_refreshes); + printa(" VT100Screen sync: %@d calls\n", @syncs); + printa(" joinedThreads blocks: %@d calls\n", @joined_blocks); + + printf("\nFairnessScheduler (0 = legacy path):\n"); + printa(" register: %@d calls\n", @fairness_register); + printa(" sessionDidEnqueueWork: %@d calls\n", @fairness_enqueue); + printa(" executeTurn: %@d calls\n", @fairness_execute_turn); + + normalize(@updates, duration_sec); + normalize(@refreshes, duration_sec); + normalize(@textview_refreshes, duration_sec); + normalize(@syncs, duration_sec); + + printf("\nRates:\n"); + printa(" updateDisplay/sec: %@d\n", @updates); + printa(" PTYSession refresh/sec: %@d\n", @refreshes); + printa(" PTYTextView refresh/sec:%@d\n", @textview_refreshes); + printa(" sync/sec: %@d\n", @syncs); +} diff --git a/tools/perf/iterm_perf_csv.d b/tools/perf/iterm_perf_csv.d new file mode 100644 index 0000000000..63c8c42bf4 --- /dev/null +++ b/tools/perf/iterm_perf_csv.d @@ -0,0 +1,38 @@ +#!/usr/sbin/dtrace -s + +/* DTrace script for iTerm2 performance metrics - CSV output for parsing */ + +#pragma D option quiet + +dtrace:::BEGIN +{ + start = timestamp; +} + +objc$target:PTYSession:-updateDisplayBecause*:entry +{ + @updates = count(); +} + +objc$target:PTYTextView:-refresh:entry +{ + @refreshes = count(); +} + +objc$target:VT100Screen*:-sync*:entry +{ + @syncs = count(); +} + +dtrace:::END +{ + duration_sec = (timestamp - start) / 1000000000; + + /* Output CSV format: duration,updates,refreshes,syncs */ + printa("%d,", @updates); + printa("%d,", @refreshes); + printa("%d\n", @syncs); + + /* Also output human-readable summary to stderr */ + printf("duration_sec=%d\n", duration_sec); +} diff --git a/tools/perf/iterm_self_time.d b/tools/perf/iterm_self_time.d new file mode 100755 index 0000000000..7d0f6406d0 --- /dev/null +++ b/tools/perf/iterm_self_time.d @@ -0,0 +1,86 @@ +#!/usr/sbin/dtrace -s +/* + * iTerm2 Self-Time Profiler + * + * Uses DTrace profile provider to sample the stack at fixed intervals. + * Aggregates by the bottom frame (actual executing function) to get self-time. + * This shows which functions actually burn CPU, not just which are high in + * the call stack. + * + * Usage: sudo dtrace -p PID -s iterm_self_time.d [DURATION_SECONDS] + * Duration defaults to 0 (run until Ctrl-C) + * + * Output format (machine-parseable): + * Section markers: ===SECTION_NAME=== + * Each entry: stack frames (one per line), then count on its own line + * Frame format: module`symbol+offset + * Count format: whitespace + number + */ + +#pragma D option quiet +#pragma D option ustackframes=100 +#pragma D option bufsize=16m + +dtrace:::BEGIN +{ + start = timestamp; + seconds = 0; + duration = $1 > 0 ? $1 : 0; + printf("Sampling iTerm2 self-time at 997Hz...\n"); + printf("Duration: %s\n", duration > 0 ? "$$1 seconds" : "until Ctrl-C"); + printf("Press Ctrl-C to stop and see results.\n\n"); +} + +tick-1sec +{ + seconds++; + printf("\r Elapsed: %d sec", seconds); +} + +tick-1sec +/duration > 0 && seconds >= duration/ +{ + exit(0); +} + +/* + * Profile at 997Hz (prime number to avoid aliasing with periodic events). + * Only sample when the target process is on-CPU. + * + * ustack(1) captures just the currently executing function - this is + * what gives us "self time" since it's the function actually on CPU + * when the sample fires. + */ +profile-997 +/pid == $target/ +{ + /* Count by single bottom frame for self-time */ + @self_time[ustack(1)] = count(); + + /* Also collect deeper stacks for context (top 15 frames) */ + @stacks[ustack(15)] = count(); +} + +dtrace:::END +{ + duration_ns = timestamp - start; + duration_sec = duration_ns / 1000000000; + + printf("\n\n"); + printf("===HEADER===\n"); + printf("duration_sec=%d\n", duration_sec); + printf("sample_hz=997\n"); + printf("===END_HEADER===\n"); + + printf("\n===SELF_TIME===\n"); + /* Show top 50 self-time entries */ + trunc(@self_time, 50); + printa(@self_time); + printf("===END_SELF_TIME===\n"); + + printf("\n===STACKS===\n"); + /* Show top 20 full stacks */ + trunc(@stacks, 20); + printa(@stacks); + printf("===END_STACKS===\n"); +} diff --git a/tools/perf/iterm_ux_metrics.d b/tools/perf/iterm_ux_metrics.d new file mode 100755 index 0000000000..cebc9ac6ac --- /dev/null +++ b/tools/perf/iterm_ux_metrics.d @@ -0,0 +1,154 @@ +#!/usr/sbin/dtrace -s + +/* + * User-experience focused metrics for iTerm2: + * 1. Apparent frame rate - how often does the user see updated content? + * 2. Latency - time from data sync to frame display + * 3. Lock contention - time wasted waiting for locks + */ + +#pragma D option quiet + +dtrace:::BEGIN +{ + start = timestamp; + printf("Tracing iTerm2 UX metrics... Ctrl-C to stop.\n"); + + /* For latency tracking */ + last_sync_time = 0; + + /* For contention tracking */ + total_lock_wait_ns = 0; + lock_acquisitions = 0; +} + +/* ============================================================ + * 1. APPARENT FRAME RATE + * Count actual frames handed to GPU for display + * ============================================================ */ + +objc$target:iTermMetalFrameData:-willHandOffToGPU:entry +{ + @frames = count(); +} + +/* Also track drawRect for non-Metal path */ +objc$target:PTYTextView:-drawRect?:entry +{ + @drawrect_frames = count(); +} + +/* ============================================================ + * 2. LATENCY - Time from data arrival to display + * Track sync -> frame handoff timing + * ============================================================ */ + +/* Record when sync happens (data is ready) */ +objc$target:VT100Screen*:-sync*:entry +{ + self->sync_start = timestamp; +} + +objc$target:VT100Screen*:-sync*:return +{ + last_sync_time = timestamp; + @syncs = count(); +} + +/* Measure time from last sync to frame handoff */ +objc$target:iTermMetalFrameData:-willHandOffToGPU:entry +/last_sync_time > 0/ +{ + this->latency_ns = timestamp - last_sync_time; + @latency_avg = avg(this->latency_ns); + @latency_min = min(this->latency_ns); + @latency_max = max(this->latency_ns); + /* Histogram in milliseconds */ + @latency_hist = quantize(this->latency_ns / 1000000); +} + +/* ============================================================ + * 3. LOCK CONTENTION - Time wasted waiting for locks + * ============================================================ */ + +pid$target:libsystem_pthread.dylib:pthread_mutex_lock:entry +{ + self->lock_entry = timestamp; +} + +pid$target:libsystem_pthread.dylib:pthread_mutex_lock:return +/self->lock_entry/ +{ + this->wait_time = timestamp - self->lock_entry; + /* Only count if we actually waited (> 1us suggests contention) */ + @lock_wait_total = sum(this->wait_time); + @lock_calls = count(); + /* Track waits > 100us as "significant" contention */ + @significant_waits = sum(this->wait_time > 100000 ? 1 : 0); + @significant_wait_time = sum(this->wait_time > 100000 ? this->wait_time : 0); + self->lock_entry = 0; +} + +/* ============================================================ + * Also track joined threads blocks (known contention point) + * ============================================================ */ + +objc$target:VT100ScreenMutableState:-performBlockWithJoinedThreads*:entry +{ + self->joined_entry = timestamp; + @joined_calls = count(); +} + +objc$target:VT100ScreenMutableState:-performBlockWithJoinedThreads*:return +/self->joined_entry/ +{ + this->joined_time = timestamp - self->joined_entry; + @joined_time_total = sum(this->joined_time); + @joined_time_avg = avg(this->joined_time); + self->joined_entry = 0; +} + +/* ============================================================ + * OUTPUT + * ============================================================ */ + +dtrace:::END +{ + duration_ns = timestamp - start; + duration_sec = duration_ns / 1000000000; + duration_ms = duration_ns / 1000000; + + printf("\n"); + printf("============================================================\n"); + printf("iTerm2 UX Metrics (duration: %d sec)\n", duration_sec); + printf("============================================================\n"); + + printf("\n--- APPARENT FRAME RATE ---\n"); + printa(" Metal frames: %@d\n", @frames); + printa(" drawRect frames: %@d\n", @drawrect_frames); + + printf("\n--- LATENCY (sync -> frame) ---\n"); + printa(" Syncs: %@d\n", @syncs); + printa(" Avg latency: %@d ns\n", @latency_avg); + printa(" Min latency: %@d ns\n", @latency_min); + printa(" Max latency: %@d ns\n", @latency_max); + printf("\n Latency distribution (ms):\n"); + printa("%@d\n", @latency_hist); + + printf("\n--- LOCK CONTENTION ---\n"); + printa(" Total lock calls: %@d\n", @lock_calls); + printa(" Total wait time: %@d ns\n", @lock_wait_total); + printa(" Significant waits: %@d (>100us)\n", @significant_waits); + printa(" Significant time: %@d ns\n", @significant_wait_time); + + printf("\n--- JOINED THREADS (sync contention) ---\n"); + printa(" Joined block calls: %@d\n", @joined_calls); + printa(" Total joined time: %@d ns\n", @joined_time_total); + printa(" Avg joined time: %@d ns\n", @joined_time_avg); + + printf("\n--- RATES ---\n"); + normalize(@frames, duration_sec); + normalize(@syncs, duration_sec); + printa(" Frames/sec: %@d\n", @frames); + printa(" Syncs/sec: %@d\n", @syncs); +} diff --git a/tools/perf/iterm_ux_metrics_v2.d b/tools/perf/iterm_ux_metrics_v2.d new file mode 100755 index 0000000000..df7c47f309 --- /dev/null +++ b/tools/perf/iterm_ux_metrics_v2.d @@ -0,0 +1,196 @@ +#!/usr/sbin/dtrace -s + +/* + * iTerm2 UX Metrics - User-focused performance measurement + * + * Measures: + * 1. Update cadence - UI refresh rate from cadence controller + * 2. Latency - time from PTY read to refresh (approximate) + * 3. Lock contention - time in performBlockWithJoinedThreads + * + * Usage: dtrace -s script.d DURATION -p PID + * Pass DURATION=0 for no auto-exit (Ctrl-C to stop). + */ + +#pragma D option quiet + +dtrace:::BEGIN { + start = timestamp; + seconds = 0; + printf("Tracing iTerm2 UX metrics... Ctrl-C to stop.\n"); +} + +tick-1sec { + seconds++; +} + +tick-1sec +/$1 > 0 && seconds >= $1/ { + exit(0); +} + +/* ============================================================ + * 1. UPDATE CADENCE + * Count content changes vs cadence-driven refreshes + * ============================================================ */ + +/* Content actually changed (lines marked dirty) */ +objc$target:PTYTextView:-setNeedsDisplayOnLine*:entry { + @content_frames = count(); +} + +/* Total refresh calls (cadence-driven) */ +objc$target:PTYTextView:-refresh:entry { + @refreshes = count(); +} + +/* Metal frames handed to GPU */ +objc$target:iTermMetalFrameData:-willHandOffToGPU:entry { + @metal_frames = count(); +} + +/* ============================================================ + * 2. ADAPTIVE FRAME RATE MODE + * See which cadence path is being used + * ============================================================ */ + +objc$target:iTermUpdateCadenceController:-fastAdaptiveInterval:entry { + @["60fps mode"] = count(); +} + +objc$target:iTermUpdateCadenceController:-slowAdaptiveInterval*:entry { + @["30fps mode"] = count(); +} + +objc$target:iTermUpdateCadenceController:-backgroundInterval:entry { + @["1fps mode"] = count(); +} + +/* ============================================================ + * 3. LATENCY (approximate) + * Time from PTY read to next refresh + * ============================================================ */ + +objc$target:PTYTask:-readTask*:entry { + self->read_time = timestamp; +} + +objc$target:PTYTextView:-refresh:return +/self->read_time/ { + this->lat = timestamp - self->read_time; + @latency_avg = avg(this->lat); + @latency_min = min(this->lat); + @latency_max = max(this->lat); + self->read_time = 0; +} + +/* ============================================================ + * 4. LOCK CONTENTION - Joined threads + * Time spent with mutation queue paused + * ============================================================ */ + +/* VT100Screen has the joined threads methods */ +objc$target:VT100Screen:-performBlockWithJoinedThreads*:entry { + self->join_start = timestamp; +} + +objc$target:VT100Screen:-performBlockWithJoinedThreads*:return +/self->join_start/ { + this->jt = timestamp - self->join_start; + @join_time_total = sum(this->jt); + @join_time_avg = avg(this->jt); + @join_calls = count(); + self->join_start = 0; +} + +/* Also track lightweight variant */ +objc$target:VT100Screen:-performLightweightBlockWithJoinedThreads*:entry { + self->light_join_start = timestamp; +} + +objc$target:VT100Screen:-performLightweightBlockWithJoinedThreads*:return +/self->light_join_start/ { + this->ljt = timestamp - self->light_join_start; + @light_join_time = sum(this->ljt); + @light_join_calls = count(); + self->light_join_start = 0; +} + +/* ============================================================ + * 5. SYNC OPERATIONS + * ============================================================ */ + +objc$target:VT100Screen*:-synchronize*:entry { + @syncs = count(); +} + +/* ============================================================ + * 6. FAIRNESS SCHEDULER + * These only fire when FairnessScheduler is active (not legacy path) + * Swift methods require pid provider, not objc provider + * ============================================================ */ + +pid$target:iTerm2:*FairnessScheduler?register*:entry { + @fairness_register = count(); +} + +pid$target:iTerm2:*FairnessScheduler?sessionDidEnqueueWork*:entry { + @fairness_enqueue = count(); +} + +pid$target:iTerm2:*TokenExecutor?executeTurn*:entry { + @fairness_execute_turn = count(); +} + +/* ============================================================ + * OUTPUT + * ============================================================ */ + +dtrace:::END { + duration_ns = timestamp - start; + duration_sec = duration_ns / 1000000000; + + printf("\n"); + printf("============================================================\n"); + printf("iTerm2 UX Metrics (duration: %d sec)\n", duration_sec); + printf("============================================================\n"); + + printf("\n--- UPDATE CADENCE ---\n"); + printa(" Content frames (setNeedsDisplay): %@d\n", @content_frames); + printa(" Total refreshes (cadence): %@d\n", @refreshes); + printa(" Metal frames (GPU): %@d\n", @metal_frames); + + printf("\n--- ADAPTIVE MODE ---\n"); + printa(" %s calls: %@d\n", @); + + printf("\n--- LATENCY (PTY read -> refresh) ---\n"); + printa(" Avg: %@d ns\n", @latency_avg); + printa(" Min: %@d ns\n", @latency_min); + printa(" Max: %@d ns\n", @latency_max); + + printf("\n--- JOINED THREAD CONTENTION ---\n"); + printa(" Full join calls: %@d\n", @join_calls); + printa(" Full join total time: %@d ns\n", @join_time_total); + printa(" Full join avg time: %@d ns\n", @join_time_avg); + printa(" Light join calls: %@d\n", @light_join_calls); + printa(" Light join time: %@d ns\n", @light_join_time); + + printf("\n--- SYNC OPERATIONS ---\n"); + printa(" Syncs: %@d\n", @syncs); + + printf("\n--- FAIRNESS SCHEDULER (0 = legacy path) ---\n"); + printa(" Register calls: %@d\n", @fairness_register); + printa(" SessionDidEnqueueWork: %@d\n", @fairness_enqueue); + printa(" ExecuteTurn calls: %@d\n", @fairness_execute_turn); + + printf("\n--- RATES ---\n"); + normalize(@content_frames, duration_sec); + normalize(@refreshes, duration_sec); + normalize(@metal_frames, duration_sec); + printa(" Content frames/sec: %@d\n", @content_frames); + printa(" Refreshes/sec: %@d\n", @refreshes); + printa(" Metal frames/sec: %@d\n", @metal_frames); + + printf("\n--- EFFICIENCY ---\n"); + printf(" (Content frames / Refreshes = how often refresh found new content)\n"); +} diff --git a/tools/perf/profile_stress_test.py b/tools/perf/profile_stress_test.py new file mode 100755 index 0000000000..fbf4713c32 --- /dev/null +++ b/tools/perf/profile_stress_test.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +""" +Stress test and profile iTerm2 hot paths. + +Usage: + python3 profile_stress_test.py [output_prefix] + +This script: +1. Finds the iTerm2 process +2. Starts a 15-second sample profiler in the background +3. Runs a 10-second stress test generating terminal output +4. Waits for profiler to complete +5. Summarizes results showing preference lookup frequency +""" + +import subprocess +import sys +import time +import os +import re +from pathlib import Path + +def find_iterm_pid(): + """Find the PID of the first iTerm2 process.""" + result = subprocess.run( + ["ps", "-axo", "pid,comm"], + capture_output=True, + text=True + ) + for line in result.stdout.strip().split('\n'): + if line.strip().endswith('/iTerm2'): + return line.split()[0] + + print("Error: No iTerm2 process found") + sys.exit(1) + +def start_profiler(pid, duration, output_file): + """Start the sample profiler in the background.""" + print(f"Starting profiler for {duration} seconds (PID: {pid})...") + proc = subprocess.Popen( + ["sample", pid, str(duration), "-f", output_file], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL + ) + return proc + +def stress_test(duration): + """Generate lots of terminal output to stress test rendering.""" + print(f"Running stress test for {duration} seconds...") + start = time.time() + iteration = 0 + + # Mix of different output patterns to exercise various code paths + # Especially targeting StringToScreenChars and related hot paths + patterns = [ + # Plain ASCII (baseline) + lambda i: "x" * 200, + + # ANSI escape sequences (SGR attributes) + lambda i: f"\033[{31 + (i % 7)}m\033[{40 + (i % 7)}mColored text iteration {i}\033[0m", + + # Wide characters (CJK) - exercises width calculation + lambda i: "漢字テスト中文한글" * 10, + + # Mixed ASCII and wide chars + lambda i: f"Line {i}: " + "日本語ABC中文DEF한글GHI" * 5, + + # RTL text (Arabic/Hebrew) - triggers bidi processing + lambda i: f"LTR start مرحبا بالعالم שלום עולם end LTR {i}", + + # Mixed bidi with numbers + lambda i: f"Price: ₪{i} or ${i}.99 - מחיר: {i} شيكل", + + # Emoji (variable width, variation selectors) + lambda i: "👨‍👩‍👧‍👦🏳️‍🌈👍🏽🇺🇸🎉✨🔥💯" * 5, + + # Combining characters (diacritics) + lambda i: "e\u0301a\u0300o\u0302u\u0308n\u0303" * 20, # éàôüñ as combining + + # Control characters and cursor movement + lambda i: f"\033[{1 + (i % 20)}C\033[{1 + (i % 5)}A\033[{1 + (i % 5)}B" + ">" * 30, + + # Tab characters (tab stop processing) + lambda i: f"col1\tcol2\tcol3\tcol4\t{i}", + + # Rapid attribute changes + lambda i: "".join(f"\033[{31 + (j % 7)}m{chr(65 + (j % 26))}" for j in range(80)) + "\033[0m", + + # Box drawing / special chars + lambda i: "┌─┬─┐│├─┼─┤│└─┴─┘" * 8, + + # Zero-width joiners and other Unicode specials + lambda i: "a\u200db\u200cc\uFEFFd" * 30, + + # Long line with mixed content (tests line wrapping with complex chars) + lambda i: ("ABC日本語🎉مرحبا" * 20)[:200], + ] + + while time.time() - start < duration: + pattern = patterns[iteration % len(patterns)] + try: + print(pattern(iteration)) + except UnicodeEncodeError: + print(f"[encoding error on iteration {iteration}]") + iteration += 1 + # Small delay to not completely overwhelm + if iteration % 100 == 0: + time.sleep(0.001) + + print(f"\nStress test complete: {iteration} iterations") + return iteration + +def analyze_profile(output_file): + """Analyze the profile output for hotspots and inefficiencies.""" + print(f"\nAnalyzing profile: {output_file}") + + if not os.path.exists(output_file): + print("Error: Profile output file not found") + return + + with open(output_file, 'r') as f: + content = f.read() + + # Count occurrences of key patterns. + patterns = { + "boolForKey": r'\[iTermPreferences boolForKey:\]', + "intForKey": r'\[iTermPreferences intForKey:\]', + "objectForKey": r'\[iTermPreferences objectForKey:\]', + "updateConfigurationFields": r'updateConfigurationFields', + "NSUserDefaults": r'NSUserDefaults', + "@synchronized": r'@synchronized', + "os_unfair_lock": r'os_unfair_lock', + # StringToScreenChars and text processing + "StringToScreenChars": r'StringToScreenChars', + "ScreenCharArray": r'ScreenCharArray', + "bidi/Bidi": r'[Bb]idi', + "VT100Terminal": r'VT100Terminal', + "executeToken": r'executeToken', + # Metal rendering + "Metal": r'Metal|metal|MTL|CAMetalLayer', + "iTermTextRenderer": r'iTermTextRenderer', + } + + # Broader categories for spotting redundant work or churn. + categories = { + "Allocations": r'\b(malloc|calloc|realloc|free|operator new|operator delete)\b', + "ObjC retain/release": r'objc_(retain|release|autoreleaseReturnValue|retainAutoreleasedReturnValue)', + "Autorelease pools": r'NSAutoreleasePool|autoreleasepool', + "Strings/Unicode": r'NSString|CFString|StringToScreenChars|ScreenCharArray', + "CoreText": r'CTLine|CTRun|CTFont|CoreText', + "CoreGraphics": r'CGContext|CGColor|CGPath|CGImage|CoreGraphics', + "AppKit geometry": r'NSRect|NSMakeRect|convertRect|bounds|frame', + "Locks/dispatch": r'os_unfair_lock|pthread_mutex|dispatch_semaphore|@synchronized', + "Terminal parsing": r'VT100Parser|VT100Terminal|VT100Screen|executeToken', + "Rendering": r'iTermTextRenderer|Metal|metal|MTL|CAMetalLayer', + "Process/cache": r'iTermProcessCache|TaskNotifier|deepestForegroundJob', + } + + print("\n" + "=" * 60) + print("Profile Summary") + print("=" * 60) + + for name, pattern in patterns.items(): + count = len(re.findall(pattern, content)) + print(f" {name}: {count} occurrences") + + print("=" * 60) + print("\n" + "=" * 60) + print("Category Summary") + print("=" * 60) + for name, pattern in categories.items(): + count = len(re.findall(pattern, content)) + print(f" {name}: {count} occurrences") + print("=" * 60) + + # Extract top iTerm2 symbols from the call graph. + symbol_pattern = re.compile(r'^\s*[+!:|]*\s*(\d+)\s+(.+?)\s+\(in iTerm2\)') + counts = {} + for line in content.splitlines(): + match = symbol_pattern.match(line) + if not match: + continue + count = int(match.group(1)) + symbol = match.group(2).strip() + if count > counts.get(symbol, 0): + counts[symbol] = count + + if counts: + print("\n" + "=" * 60) + print("Top iTerm2 Symbols (by sample count)") + print("=" * 60) + for symbol, count in sorted(counts.items(), key=lambda item: item[1], reverse=True)[:15]: + print(f" {count} {symbol}") + print("=" * 60) + + print(f"\nFull profile saved to: {output_file}") + +def main(): + prefix = sys.argv[1] if len(sys.argv) > 1 else "iterm_profile" + timestamp = time.strftime("%Y%m%d_%H%M%S") + output_file = f"/tmp/{prefix}_{timestamp}.txt" + + pid = find_iterm_pid() + print(f"Found iTerm2 PID: {pid}") + + # Start profiler (15 seconds) + profiler = start_profiler(pid, 15, output_file) + + # Give profiler a moment to attach + time.sleep(0.5) + + # Run stress test (10 seconds) + iterations = stress_test(10) + + # Wait for profiler to complete + print("\nWaiting for profiler to complete...") + profiler.wait() + + # Analyze results + analyze_profile(output_file) + + return output_file + +if __name__ == "__main__": + main() diff --git a/tools/perf/run_stress_test.sh b/tools/perf/run_stress_test.sh new file mode 100755 index 0000000000..7740ecfc9c --- /dev/null +++ b/tools/perf/run_stress_test.sh @@ -0,0 +1,1580 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Check that no iTerm2 instance is already running +if pgrep -x iTerm2 >/dev/null 2>&1; then + echo "Error: iTerm2 is already running. Please close all iTerm2 instances." >&2 + exit 1 +fi + +# Get script directory early (needed for relative paths) +script_dir_early="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +usage() { + cat <<'USAGE' +Usage: run_stress_test.sh [OPTIONS] /path/to/iTerm2.app + +Opens iTerm2, starts a single profiler, creates N tabs running a stress load +for the specified duration (default 20 seconds), waits for completion, then +analyzes the profile output. + +Options: + --synchronized-start=BOOL + Synchronize tab startup: all tabs signal ready, then start + generating load simultaneously. This separates tab creation + overhead from the actual stress test. + Values: true/false, yes/no, 1/0 (default: true) + + --inject Enable continuous interaction injection during stress test. + Exercises latency instrumentation with periodic events: + - Keyboard input every 500ms (single character) + - Scroll events every 2s (Page Up/Down alternating) + - Tab switches every 3s (cycles through all tabs) + Requires synchronized start (the default). + + --dtrace Enable enhanced DTrace UX metrics collection. Measures: + - Update cadence (UI refresh rate from cadence controller) + - Adaptive frame rate mode (60fps/30fps/1fps) + - Lock contention (joined thread time) + Requires root privileges. + + --mode=MODE Stress test mode. Available modes: + + Terminal output stress (stress_load.py): + normal - mixed output patterns, no screen clears (default) + buffer - long lines (~600 chars), stresses line buffers + clearcodes - all patterns including clear/erase sequences + flood - maximum throughput using 'yes' (no throttling) + all - runs normal, buffer, clearcodes in sequence + + Dashboard/UI stress (load_dashboard.py): + htop - CPU meters + scrolling process list + watch - full-screen clear + redraw every 100ms + progress - 20 progress bars updating in place + table - fixed header + scroll region body + status - grid of color-coded service status cells + + --title[=MS] Inject OSC 0 title changes every MS milliseconds (default 2000ms). + Exercises TitleUpdate and TabTitleUpdate latency instrumentation. + + --tabs=COUNTS Comma-separated list of tab counts to test sequentially (1-24). + Example: --tabs=1,3,5,10 + Runs a separate test for each count and shows a summary table. + If not specified, defaults to 10 tabs. + + -t, --time=SEC Duration in seconds (default: 20). + + --fps=N Target frame rate for dashboard modes (default: 30). + Use 0 for unthrottled (as fast as possible). + Ignored for stress modes (normal, buffer, clearcodes, flood). + + --speed=SPEED Output speed: normal (default) or slow. + slow: adds 100ms delay after each output iteration. + + --forever Run stress loads indefinitely without profiling or data + collection. Useful for manual testing or external profiling. + Press Ctrl-C to stop. Ignores duration parameter. + + --tmux Run stress_load.py inside tmux sessions within iTerm2 tabs. + Each tab gets its own tmux session with a unique name. + Sessions auto-close when the stress load completes. + Cleanup is attempted on Ctrl-C or script exit. + + --load-script=PATH + Use a custom load generator script instead of the built-in ones. + The script must accept: duration label --sync-dir DIR [--mode=X] + + --suite=NAME Use a custom UserDefaults suite for isolated preferences. + Default: com.iterm2.defaults (reproducible test environment) + Use --suite=user to use your normal preferences (com.googlecode.iterm2) + Use --suite=none to explicitly disable suite isolation + Example: --suite=com.mytest creates fresh preferences + Note: Passed to iTerm2 as "-suite NAME" (single dash, space-separated) + + --video[=DIR] Record the screen during each test iteration. + Videos are saved as MOV files with timestamps. + Optional DIR specifies output directory (default: /tmp). + Uses macOS screencapture; screen recording permission required. + + --self-time Run self-time profiling using DTrace profile provider. + Shows which functions actually burn CPU (not just call count). + Requires sudo. Output is analyzed to filter non-actionable symbols. + Results show iTerm2 code hotspots for optimization. + +USAGE +} + +# Parse arguments +synchronized_start=true # Default to synchronized (better for profiling) +inject_mode=false +dtrace_mode=false +self_time_mode=false # self-time profiling with profile provider +forever_mode=false +tmux_mode=false +title_arg="" # empty = disabled, value = milliseconds +stress_mode="" +tabs_arg="" +speed_arg="normal" +fps_arg="" # empty = use default 30fps +load_script_arg="" # empty = use default stress_load.py +duration_arg="" # empty = use default 20s +suite_name="com.iterm2.defaults" # default suite for reproducible tests +video_mode=false +video_output_dir="/tmp" # default video output directory +positional_args=() + +while [[ $# -gt 0 ]]; do + case "$1" in + --synchronized-start=*) + val="${1#--synchronized-start=}" + val_lower=$(echo "$val" | tr '[:upper:]' '[:lower:]') + case "$val_lower" in + false|no|0) synchronized_start=false ;; + true|yes|1) synchronized_start=true ;; + *) echo "Error: --synchronized-start requires true/false/yes/no/1/0, got '$val'" >&2; exit 1 ;; + esac + shift + ;; + --inject) + inject_mode=true + shift + ;; + --dtrace) + dtrace_mode=true + shift + ;; + --forever) + forever_mode=true + shift + ;; + --tmux) + tmux_mode=true + shift + ;; + --load-script=*) + load_script_arg="${1#--load-script=}" + shift + ;; + --suite=*) + suite_val="${1#--suite=}" + case "$suite_val" in + user) suite_name="com.googlecode.iterm2" ;; # Use normal iTerm2 prefs + none|"") suite_name="" ;; # No suite isolation + *) suite_name="$suite_val" ;; # Custom suite name + esac + shift + ;; + --video) + video_mode=true + shift + ;; + --video=*) + video_mode=true + video_output_dir="${1#--video=}" + shift + ;; + --self-time) + self_time_mode=true + shift + ;; + --title) + title_arg="2000" # default 2000ms + shift + ;; + --title=*) + title_arg="${1#--title=}" + if ! [[ "$title_arg" =~ ^[0-9]+$ ]]; then + echo "Error: --title requires a numeric millisecond value, got '$title_arg'" >&2 + exit 1 + fi + shift + ;; + --mode=*) + stress_mode="$1" + shift + ;; + --tabs=*) + tabs_arg="${1#--tabs=}" + shift + ;; + --speed=*) + speed_arg="${1#--speed=}" + if [[ "$speed_arg" != "normal" && "$speed_arg" != "slow" ]]; then + echo "Error: --speed requires 'normal' or 'slow', got '$speed_arg'" >&2 + exit 1 + fi + shift + ;; + --fps=*) + fps_arg="${1#--fps=}" + if ! [[ "$fps_arg" =~ ^[0-9]*\.?[0-9]+$ ]]; then + echo "Error: --fps must be a non-negative number, got '$fps_arg'" >&2 + exit 1 + fi + shift + ;; + --time=*) + duration_arg="${1#--time=}" + shift + ;; + -t) + if [[ $# -lt 2 ]]; then + echo "Error: -t requires a value" >&2 + exit 1 + fi + duration_arg="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + positional_args+=("$1") + shift + ;; + esac +done + +set -- "${positional_args[@]}" + +# Note: --tmux mode wraps stress_load.py in tmux sessions inside iTerm2 tabs, +# not the test harness itself. + +# Valid modes (all handled by stress_load.py) +valid_modes="normal buffer clearcodes flood htop watch progress table status all" + +# Validate mode(s) if specified +if [[ -n "$stress_mode" ]]; then + mode_value="${stress_mode#--mode=}" + IFS=',' read -ra mode_list <<< "$mode_value" + for m in "${mode_list[@]}"; do + if [[ ! " $valid_modes " =~ " $m " ]]; then + echo "Error: Unknown mode '$m'" >&2 + echo " Valid modes: $valid_modes" >&2 + exit 1 + fi + done +fi + +if [[ $# -lt 1 ]]; then + usage + exit 1 +fi + +app_path="$1" + +# Display suite configuration +if [[ -n "$suite_name" ]]; then + if [[ "$suite_name" == "com.iterm2.defaults" ]]; then + echo "Using test suite: $suite_name (isolated preferences for reproducible tests)" + elif [[ "$suite_name" == "com.googlecode.iterm2" ]]; then + echo "Using user's normal preferences (--suite=user)" + else + echo "Using custom suite: $suite_name" + fi +else + echo "Using user's normal preferences (no suite isolation)" +fi + +# Duration: prefer --time/-t, then positional arg, then default +if [[ -n "$duration_arg" ]]; then + duration="$duration_arg" +elif [[ $# -ge 2 ]]; then + duration="$2" +else + duration=20 +fi + +# Parse tab counts +tab_counts=() +if [[ -n "$tabs_arg" ]]; then + IFS=',' read -ra tab_counts <<< "$tabs_arg" + for tc in "${tab_counts[@]}"; do + if ! [[ "$tc" =~ ^[0-9]+$ ]] || [[ "$tc" -lt 1 ]] || [[ "$tc" -gt 24 ]]; then + echo "Error: tab count must be an integer between 1 and 24, got '$tc'" >&2 + exit 1 + fi + done +else + tab_counts=(10) # Default to 10 tabs +fi + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# Determine load script: explicit --load-script or default stress_load.py +if [[ -n "$load_script_arg" ]]; then + if [[ "$load_script_arg" == /* ]]; then + load_script="$load_script_arg" + else + load_script="$script_dir/$load_script_arg" + fi +else + load_script="$script_dir/stress_load.py" +fi +analyze_script="$script_dir/analyze_profile.py" + +if [[ ! -d "$app_path" ]]; then + echo "Error: iTerm2 app not found at '$app_path'" >&2 + exit 1 +fi + +# Extract build metadata from app's Info.plist +app_plist="$app_path/Contents/Info.plist" +if [[ -f "$app_plist" ]]; then + app_version=$(/usr/libexec/PlistBuddy -c "Print :CFBundleShortVersionString" "$app_plist" 2>/dev/null || echo "unknown") + build_commit=$(/usr/libexec/PlistBuddy -c "Print :BuildCommit" "$app_plist" 2>/dev/null || echo "") + build_branch=$(/usr/libexec/PlistBuddy -c "Print :BuildBranch" "$app_plist" 2>/dev/null || echo "") + build_date=$(/usr/libexec/PlistBuddy -c "Print :BuildDate" "$app_plist" 2>/dev/null || echo "") + build_uncommitted=$(/usr/libexec/PlistBuddy -c "Print :BuildUncommittedChanges" "$app_plist" 2>/dev/null || echo "") +else + echo "Warning: Could not read Info.plist from app bundle" >&2 + app_version="unknown" + build_commit="" + build_branch="" + build_date="" + build_uncommitted="" +fi + +if ! [[ "$duration" =~ ^[0-9]+$ ]] || [[ "$duration" -lt 1 ]]; then + echo "Error: duration must be a positive integer" >&2 + exit 1 +fi + +if [[ ! -f "$load_script" ]]; then + echo "Error: stress load script not found at '$load_script'" >&2 + exit 1 +fi + +if [[ ! -f "$analyze_script" ]]; then + echo "Error: analyze script not found at '$analyze_script'" >&2 + exit 1 +fi + +# Video mode validation +if [[ "$video_mode" == true ]]; then + # Check for GetWindowID tool + if ! command -v GetWindowID &>/dev/null; then + echo "Error: --video requires GetWindowID (brew install smokris/getwindowid/getwindowid)" >&2 + exit 1 + fi + # Validate output directory + video_output_dir="${video_output_dir/#\~/$HOME}" + if [[ ! -d "$video_output_dir" ]]; then + echo "Error: Video output directory does not exist: $video_output_dir" >&2 + exit 1 + fi + echo "Video recording enabled (output: $video_output_dir)" +fi + +# DTrace requires root +if [[ "$dtrace_mode" == true ]]; then + dtrace_script="$script_dir/iterm_ux_metrics_v2.d" + if [[ ! -f "$dtrace_script" ]]; then + echo "Error: DTrace script not found at '$dtrace_script'" >&2 + exit 1 + fi + echo "DTrace mode enabled (using $dtrace_script)" + if [[ $EUID -ne 0 ]]; then + echo "Warning: DTrace requires root. If dtrace fails, re-run as:" >&2 + echo " sudo $0 $*" >&2 + echo "Note: Running scripts as root is generally not recommended." >&2 + echo " Review the script contents before running with elevated privileges." >&2 + fi +fi + +# Global arrays to collect results across runs +declare -a RESULT_TAB_COUNTS=() +declare -a RESULT_ITERATION_RATES=() +declare -a RESULT_TOTAL_ITERATIONS=() +# DTrace UX metrics (only populated when --dtrace is used) +declare -a RESULT_CONTENT_FRAMES=() +declare -a RESULT_REFRESHES=() +declare -a RESULT_METAL_FRAMES=() +declare -a RESULT_CONTENT_FPS=() +declare -a RESULT_REFRESH_FPS=() +declare -a RESULT_METAL_FPS=() +declare -a RESULT_JOIN_CALLS=() +declare -a RESULT_JOIN_TIME_US=() +declare -a RESULT_ADAPTIVE_MODE=() +# Latency metrics (from mtperf_latency_*.txt) +declare -a RESULT_KEYDOWN_MEAN_MS=() +declare -a RESULT_KEYDOWN_MAX_MS=() +declare -a RESULT_TITLE_MEAN_MS=() +declare -a RESULT_TITLE_MAX_MS=() +declare -a RESULT_GCD_TIMER_CREATE=() +declare -a RESULT_GCD_TIMER_FIRE=() +declare -a RESULT_GCD_FIRE_RATIO=() +declare -a RESULT_NS_TIMER_CREATE=() +declare -a RESULT_NS_TIMER_FIRE=() +declare -a RESULT_NS_FIRE_RATIO=() +declare -a RESULT_CADENCE_MISMATCH_PCT=() +# Derived metrics +declare -a RESULT_ITERS_PER_FRAME=() + +# Create AppleScript file once (reused across runs) +applescript_file=$(mktemp) +cat > "$applescript_file" <<'APPLESCRIPT' +on run argv + set tabCount to (item 1 of argv) as integer + set scriptPath to item 2 of argv + set duration to item 3 of argv + set syncDir to item 4 of argv + set stressMode to item 5 of argv + set titleArg to item 6 of argv + set speedArg to item 7 of argv + set fpsArg to item 8 of argv + set tmuxPrefix to item 9 of argv + + tell application "iTerm2" + activate + -- Use the existing window (opened by app launch) instead of creating a new one + set theWindow to current window + + -- Set consistent window size and position for reproducible tests + set bounds of theWindow to {864, 34, 1728, 1117} + + set sessionList to {} + + repeat with i from 1 to tabCount + if i is not 1 then + tell theWindow to create tab with default profile + end if + delay 1 + set theSession to current session of theWindow + set innerCmd to "python3 " & quoted form of scriptPath & " " & duration & " tab_" & i + if syncDir is not "" then + set innerCmd to innerCmd & " --sync-dir " & quoted form of syncDir + end if + if stressMode is not "" then + set innerCmd to innerCmd & " " & stressMode + end if + if titleArg is not "" then + set innerCmd to innerCmd & " --title=" & titleArg + end if + if speedArg is not "" and speedArg is not "normal" then + set innerCmd to innerCmd & " --speed=" & speedArg + end if + if fpsArg is not "" then + set innerCmd to innerCmd & " --fps=" & fpsArg + end if + -- Wrap in tmux if prefix provided, otherwise run directly + if tmuxPrefix is not "" then + set sessionName to tmuxPrefix & "-tab" & i + set cmd to "tmux new-session -s " & quoted form of sessionName & " '" & innerCmd & "; exit'" + else + set cmd to innerCmd & "; exit" + end if + tell theSession to write text cmd + set end of sessionList to theSession + end repeat + + -- Wait for initial startup + delay 5 + + -- Poll until all sessions complete + repeat + set doneCount to 0 + repeat with s in sessionList + try + if is at shell prompt of s then + set doneCount to doneCount + 1 + end if + on error + set doneCount to doneCount + 1 + end try + end repeat + if doneCount = tabCount then exit repeat + delay 1 + end repeat + + -- Don't close window here; let main script handle shutdown order + -- (needed so dtrace can be signaled before iTerm2 exits) + end tell +end run +APPLESCRIPT + +# Create AppleScript for forever mode (no polling, no close) +applescript_forever_file=$(mktemp) +cat > "$applescript_forever_file" <<'APPLESCRIPT' +on run argv + set tabCount to (item 1 of argv) as integer + set scriptPath to item 2 of argv + set stressMode to item 3 of argv + set titleArg to item 4 of argv + set speedArg to item 5 of argv + set fpsArg to item 6 of argv + set tmuxPrefix to item 7 of argv + + tell application "iTerm2" + activate + set theWindow to current window + + -- Set consistent window size and position for reproducible tests + set bounds of theWindow to {864, 34, 1728, 1117} + + repeat with i from 1 to tabCount + if i is not 1 then + tell theWindow to create tab with default profile + end if + delay 1 + set theSession to current session of theWindow + -- Run forever (no duration limit) + set innerCmd to "python3 " & quoted form of scriptPath & " 999999999 tab_" & i + if stressMode is not "" then + set innerCmd to innerCmd & " " & stressMode + end if + if titleArg is not "" then + set innerCmd to innerCmd & " --title=" & titleArg + end if + if speedArg is not "" and speedArg is not "normal" then + set innerCmd to innerCmd & " --speed=" & speedArg + end if + if fpsArg is not "" then + set innerCmd to innerCmd & " --fps=" & fpsArg + end if + -- Wrap in tmux if prefix provided, otherwise run directly + if tmuxPrefix is not "" then + set sessionName to tmuxPrefix & "-tab" & i + set cmd to "tmux new-session -s " & quoted form of sessionName & " '" & innerCmd & "; exit'" + else + set cmd to innerCmd + end if + tell theSession to write text cmd + end repeat + end tell +end run +APPLESCRIPT + +# Get power source from pmset +get_power_source() { + pmset -g batt 2>/dev/null | head -1 | sed "s/.*Now drawing from '//;s/'.*//g" || echo "Unknown" +} + +# Get power/energy mode from pmset (based on current power source) +get_energy_mode() { + local power_source=$(get_power_source) + local section="" + + # Determine which section to read from pmset -g custom + case "$power_source" in + "AC Power") section="AC Power:" ;; + "Battery Power") section="Battery Power:" ;; + "UPS Power") section="UPS Power:" ;; + *) echo "Unknown"; return ;; + esac + + # Get powermode for the current power source + local powermode=$(pmset -g custom 2>/dev/null | awk -v sect="$section" ' + $0 ~ sect { in_section=1; next } + in_section && /^[A-Z]/ { in_section=0 } + in_section && /powermode/ { print $NF; exit } + ') + + case "$powermode" in + 0) echo "Automatic" ;; + 1) echo "Low Power" ;; + 2) echo "High Power/High Performance" ;; + *) echo "Unknown" ;; + esac +} + +# Cleanup function +# Track tmux session prefix for cleanup (set per-run when --tmux is used) +tmux_session_prefix="" +tmux_tab_count=0 + +cleanup() { + # Clean up tmux sessions if we created any + if [[ -n "$tmux_session_prefix" && "$tmux_tab_count" -gt 0 ]]; then + for i in $(seq 1 "$tmux_tab_count"); do + tmux kill-session -t "${tmux_session_prefix}-tab${i}" 2>/dev/null || true + done + fi + # Clean up temp files + if [[ -n "${sync_dir:-}" && -d "${sync_dir:-}" ]]; then + rm -rf "$sync_dir" + fi + if [[ -n "${applescript_file:-}" && -f "${applescript_file:-}" ]]; then + rm -f "$applescript_file" + fi + if [[ -n "${applescript_forever_file:-}" && -f "${applescript_forever_file:-}" ]]; then + rm -f "$applescript_forever_file" + fi + # Try to quit iTerm2 if it's running (best effort) + osascript -e 'tell application "iTerm2" to quit' 2>/dev/null || true +} +trap cleanup EXIT INT TERM + +# Function to run a single test with given tab count +# Results are stored in global RESULT_* arrays +run_single_test() { + local tab_count=$1 + local run_index=$2 + + echo "" + echo "Run $((run_index + 1)): $tab_count tab(s), ${duration}s duration" + + # Set up sync directory if in sync mode + # Use /tmp so it's accessible to all users (needed when running as root for dtrace) + local sync_dir="" + if [[ "$synchronized_start" == true ]]; then + sync_dir=$(mktemp -d /tmp/iterm2-perf-sync.XXXXXX) + chmod 777 "$sync_dir" + echo "Synchronized start enabled (sync dir: $sync_dir)" + fi + + # Open iTerm2 and wait for it to launch + local run_start_epoch + run_start_epoch=$(date +%s) + + # Launch iTerm2 with suite isolation + if [[ -n "$suite_name" ]]; then + open -a "$app_path" --args -suite "$suite_name" + else + open -a "$app_path" + fi + sleep 2 + + # Find iTerm2 PID + local iterm_pid="" + for _ in {1..10}; do + iterm_pid=$(pgrep -x iTerm2 || true) + if [[ -n "$iterm_pid" ]]; then + break + fi + sleep 0.5 + done + + if [[ -z "$iterm_pid" ]]; then + echo "Error: Could not find iTerm2 process" >&2 + [[ -n "$sync_dir" && -d "$sync_dir" ]] && rm -rf "$sync_dir" + return 1 + fi + + echo "Found iTerm2 PID: $iterm_pid" + + # Size and position window consistently (needed before video capture) + osascript -e 'tell application "iTerm2" to set bounds of current window to {864, 34, 1728, 1117}' + + # Set up profile output file + local timestamp + timestamp=$(date +%Y%m%d_%H%M%S) + local profile_output="/tmp/iterm2_multi_tab_profile_${timestamp}.txt" + local dtrace_output="/tmp/iterm2_dtrace_${timestamp}.txt" + local self_time_output="/tmp/iterm2_self_time_${timestamp}.txt" + + # Start video recording if enabled + local video_pid="" + local video_file="" + if [[ "$video_mode" == true ]]; then + video_file="${video_output_dir}/iterm2_perf_${tab_count}tabs_${timestamp}.mov" + local video_duration=$((duration + 10)) # Add buffer for startup/shutdown + # Get window ID (first non-empty title - safe since we ensure single instance) + local window_id + window_id=$(GetWindowID iTerm2 --list | grep -v '^""' | head -1 | sed 's/.*id=//') + if [[ -z "$window_id" ]]; then + echo "Warning: Could not get iTerm2 window ID, skipping video recording" + else + echo "Starting video recording (window $window_id): $video_file" + screencapture -v -V "$video_duration" -l"$window_id" "$video_file" & + video_pid=$! + sleep 1 # Give screencapture time to initialize + fi + fi + + # Calculate profiler duration (load duration + buffer) + local profiler_duration=$((duration + 5)) + + # In sync mode, we start profiler AFTER tabs are ready (to exclude tab creation overhead) + # In non-sync mode, we start profiler before launching tabs + local profiler_pid="" + local dtrace_pid="" + local self_time_pid="" + if [[ "$synchronized_start" != true ]]; then + echo "Starting profiler for ${profiler_duration} seconds..." + sample "$iterm_pid" "$profiler_duration" -f "$profile_output" &>/dev/null & + profiler_pid=$! + # Start DTrace if enabled (with timeout matching profiler duration) + if [[ "$dtrace_mode" == true ]]; then + timeout "$((profiler_duration + 30))" dtrace -Z -p "$iterm_pid" -s "$dtrace_script" $profiler_duration > "$dtrace_output" 2>&1 & + dtrace_pid=$! + # Wait for dtrace to attach (it prints "Tracing" when ready) + echo "Waiting for DTrace to attach..." + for _ in {1..50}; do + if grep -q "Tracing" "$dtrace_output" 2>/dev/null; then + echo "DTrace attached." + break + fi + sleep 0.2 + done + if ! grep -q "Tracing" "$dtrace_output" 2>/dev/null; then + echo "Warning: DTrace may not have attached properly" + fi + else + sleep 1 + fi + # Start self-time profiling if enabled + if [[ "$self_time_mode" == true ]]; then + local self_time_script="$script_dir/iterm_self_time.d" + echo "Starting self-time profiler for ${profiler_duration} seconds..." + timeout "$((profiler_duration + 30))" dtrace -Z -p "$iterm_pid" -s "$self_time_script" $profiler_duration > "$self_time_output" 2>&1 & + self_time_pid=$! + sleep 1 # Give dtrace time to attach + fi + fi + + echo "Launching $tab_count tabs, each running stress test for $duration seconds..." + + # Generate unique tmux session prefix if --tmux mode + local tmux_prefix="" + if [[ "$tmux_mode" == true ]]; then + tmux_prefix="iterm2-perf-$$-$(date +%s)" + tmux_session_prefix="$tmux_prefix" # Store globally for cleanup + tmux_tab_count="$tab_count" + fi + + # Launch tabs with load script (in background so we can coordinate sync) + osascript "$applescript_file" "$tab_count" "$load_script" "$duration" "${sync_dir:-}" "$stress_mode" "$title_arg" "$speed_arg" "$fps_arg" "$tmux_prefix" & + local applescript_pid=$! + + # If sync mode, wait for all ready signals, start profiler, then send go signal + if [[ "$synchronized_start" == true ]]; then + echo "Waiting for all tabs to signal ready..." + + # Wait for all ready files + local ready_count=0 + while [[ $ready_count -lt $tab_count ]]; do + ready_count=$(find "$sync_dir" -name "ready_tab_*" 2>/dev/null | wc -l | tr -d ' ') + if [[ $ready_count -lt $tab_count ]]; then + sleep 0.2 + fi + done + + echo "All $tab_count tabs ready." + + # Start profiler NOW (after tab creation, before load generation) + echo "Starting profiler for ${profiler_duration} seconds..." + sample "$iterm_pid" "$profiler_duration" -f "$profile_output" &>/dev/null & + profiler_pid=$! + # Start DTrace if enabled (with timeout matching profiler duration) + if [[ "$dtrace_mode" == true ]]; then + timeout "$((profiler_duration + 30))" dtrace -Z -p "$iterm_pid" -s "$dtrace_script" $profiler_duration > "$dtrace_output" 2>&1 & + dtrace_pid=$! + # Wait for dtrace to attach (it prints "Tracing" when ready) + echo "Waiting for DTrace to attach..." + for _ in {1..50}; do + if grep -q "Tracing" "$dtrace_output" 2>/dev/null; then + echo "DTrace attached." + break + fi + sleep 0.2 + done + if ! grep -q "Tracing" "$dtrace_output" 2>/dev/null; then + echo "Warning: DTrace may not have attached properly" + fi + else + sleep 1 # Brief pause for profiler to attach + fi + # Start self-time profiling if enabled + if [[ "$self_time_mode" == true ]]; then + local self_time_script="$script_dir/iterm_self_time.d" + echo "Starting self-time profiler for ${profiler_duration} seconds..." + timeout "$((profiler_duration + 30))" dtrace -Z -p "$iterm_pid" -s "$self_time_script" $profiler_duration > "$self_time_output" 2>&1 & + self_time_pid=$! + sleep 1 # Give dtrace time to attach + fi + + echo "Sending go signal..." + touch "$sync_dir/go" + + # Start interaction injection if enabled (runs in background) + if [[ "$inject_mode" == true ]]; then + echo "Starting interaction injection (keyboard, scroll, tab switch)..." + ( + sleep 2 # Let stress_load.py get going + + # Timing configuration (in 100ms ticks) + local tick=0 + local keyboard_ticks=5 # every 500ms - keyboard input + local scroll_ticks=20 # every 2s - scroll events + local tab_ticks=30 # every 3s - tab switches + local current_tab=1 + local scroll_direction=1 # 1 = down, -1 = up + + # Calculate end time (stop 2s before test ends) + local inject_end=$((SECONDS + duration - 2)) + + # Counters for summary + local keyboard_count=0 + local scroll_count=0 + local tab_switch_count=0 + + while [[ $SECONDS -lt $inject_end ]]; do + tick=$((tick + 1)) + + # Periodic keyboard input (every 500ms) + # Send a single character to exercise keyboard input latency path + if (( tick % keyboard_ticks == 0 )); then + osascript -e 'tell application "iTerm2" to tell current session of current window to write text "."' 2>/dev/null || true + keyboard_count=$((keyboard_count + 1)) + fi + + # Periodic scroll events (every 2s) + # Alternate between Page Down and Page Up to stay in view + if (( tick % scroll_ticks == 0 )); then + if (( scroll_direction == 1 )); then + # Page Down (key code 121) + osascript -e 'tell application "System Events" to tell process "iTerm2" to key code 121' 2>/dev/null || true + else + # Page Up (key code 116) + osascript -e 'tell application "System Events" to tell process "iTerm2" to key code 116' 2>/dev/null || true + fi + scroll_direction=$((scroll_direction * -1)) + scroll_count=$((scroll_count + 1)) + fi + + # Periodic tab switches (every 3s) + if (( tick % tab_ticks == 0 )); then + current_tab=$(( (current_tab % tab_count) + 1 )) + osascript -e "tell application \"iTerm2\" to tell current window to select tab $current_tab" 2>/dev/null || true + tab_switch_count=$((tab_switch_count + 1)) + fi + + sleep 0.1 + done + + echo "Injection complete: keyboard=$keyboard_count scroll=$scroll_count tabs=$tab_switch_count" + ) & + local inject_pid=$! + fi + fi + + # Wait for AppleScript to complete + wait "$applescript_pid" || true + + # Wait for injection to complete if it was started + if [[ "$inject_mode" == true && -n "${inject_pid:-}" ]]; then + wait "$inject_pid" 2>/dev/null || true + fi + + echo "All tabs completed. Waiting for profiler..." + if [[ -n "$profiler_pid" ]]; then + wait "$profiler_pid" 2>/dev/null || true + fi + + # Signal DTrace to output its END block summary before quitting iTerm2 + # (timeout is just a failsafe; we want a clean shutdown here) + if [[ "$dtrace_mode" == true && -n "$dtrace_pid" ]]; then + echo "Signaling DTrace to finish..." + kill -INT "$dtrace_pid" 2>/dev/null || true + # Give dtrace time to output its END block + for _ in {1..20}; do + if ! kill -0 "$dtrace_pid" 2>/dev/null; then + break + fi + sleep 0.2 + done + wait "$dtrace_pid" 2>/dev/null || true + fi + + # Aggregate iteration stats from tabs (if sync mode) + local total_iterations=0 + local iteration_rate=0 + if [[ "$synchronized_start" == true && -d "$sync_dir" ]]; then + for stats_file in "$sync_dir"/stats_*; do + if [[ -f "$stats_file" ]]; then + local iterations + iterations=$(head -1 "$stats_file") + total_iterations=$((total_iterations + iterations)) + fi + done + iteration_rate=$((total_iterations / duration)) + echo "" + echo "============================================================" + echo "Iteration Summary" + echo "============================================================" + echo " Total iterations: $total_iterations" + echo " Tabs: $tab_count" + echo " Duration: ${duration}s" + echo " Rate: $iteration_rate iterations/sec" + echo "============================================================" + + # Clean up sync dir + rm -rf "$sync_dir" + fi + + echo "" + python3 "$analyze_script" "$profile_output" + + # Stop video recording if it was started + if [[ -n "$video_pid" ]]; then + echo "Stopping video recording..." + kill -INT "$video_pid" 2>/dev/null || true + wait "$video_pid" 2>/dev/null || true + if [[ -f "$video_file" ]]; then + echo "Video saved: $video_file" + fi + fi + + # Quit the test iTerm2 instance cleanly via AppleScript (Command-Q) + echo "" + echo "Shutting down test iTerm2..." + osascript -e 'tell application "iTerm2" to quit' 2>/dev/null || true + + # Wait for app to fully terminate (needed for MTPerfWriteToFile to complete) + for _ in {1..30}; do + if ! pgrep -f "$app_path" >/dev/null 2>&1; then + break + fi + sleep 0.2 + done + + # Parse and display DTrace UX metrics (only if --dtrace was specified) + local content_frames="0" refreshes="0" metal_frames="0" + local content_fps="0" refresh_fps="0" metal_fps="0" + local join_calls="0" join_time_ns="0" join_time_us="0" + local adaptive_mode="-" + local dtrace_duration="$duration" + + if [[ "$dtrace_mode" == true && -f "$dtrace_output" ]]; then + # Parse DTrace UX metrics output + dtrace_duration=$(awk '/duration:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + [[ -z "$dtrace_duration" || "$dtrace_duration" == "0" ]] && dtrace_duration="$duration" + + # Parse frame counts + content_frames=$(awk '/Content frames.*setNeedsDisplay/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + refreshes=$(awk '/Total refreshes.*cadence/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + metal_frames=$(awk '/Metal frames.*GPU/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + + # Parse rates (from RATES section) + content_fps=$(awk '/Content frames\/sec:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + refresh_fps=$(awk '/Refreshes\/sec:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + metal_fps=$(awk '/Metal frames\/sec:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + + # Parse join contention + join_calls=$(awk '/Light join calls:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + join_time_ns=$(awk '/Light join time:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + + # Parse adaptive mode (count which mode was used most) + local mode_60fps=$(awk '/60fps mode calls:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + local mode_30fps=$(awk '/30fps mode calls:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + local mode_1fps=$(awk '/1fps mode calls:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + + # Parse FairnessScheduler metrics (0 = legacy path) + local fairness_register=$(awk '/Register calls:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + local fairness_enqueue=$(awk '/SessionDidEnqueueWork:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + local fairness_execute=$(awk '/ExecuteTurn calls:/ {for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit}}' "$dtrace_output") + + [[ -z "$content_frames" ]] && content_frames="0" + [[ -z "$refreshes" ]] && refreshes="0" + [[ -z "$metal_frames" ]] && metal_frames="0" + [[ -z "$content_fps" ]] && content_fps="0" + [[ -z "$refresh_fps" ]] && refresh_fps="0" + [[ -z "$metal_fps" ]] && metal_fps="0" + [[ -z "$join_calls" ]] && join_calls="0" + [[ -z "$join_time_ns" ]] && join_time_ns="0" + [[ -z "$mode_60fps" ]] && mode_60fps="0" + [[ -z "$mode_30fps" ]] && mode_30fps="0" + [[ -z "$mode_1fps" ]] && mode_1fps="0" + [[ -z "$fairness_register" ]] && fairness_register="0" + [[ -z "$fairness_enqueue" ]] && fairness_enqueue="0" + [[ -z "$fairness_execute" ]] && fairness_execute="0" + + # Convert join time to microseconds + if [[ "$join_time_ns" -gt 0 ]]; then + join_time_us=$((join_time_ns / 1000)) + fi + + # Determine dominant adaptive mode + if [[ "$mode_30fps" -gt "$mode_60fps" && "$mode_30fps" -gt "$mode_1fps" ]]; then + adaptive_mode="30fps" + elif [[ "$mode_60fps" -gt "$mode_1fps" ]]; then + adaptive_mode="60fps" + elif [[ "$mode_1fps" -gt 0 ]]; then + adaptive_mode="1fps" + fi + + if [[ "$refreshes" != "0" ]]; then + echo "" + echo "============================================================" + echo "DTrace UX Metrics (duration: ${dtrace_duration}s)" + echo "============================================================" + printf " Adaptive mode: %s\n" "$adaptive_mode" + printf " Content frames: %s (lines marked dirty)\n" "$content_frames" + printf " Refreshes (cadence): %s\n" "$refreshes" + printf " Metal frames (GPU): %s\n" "$metal_frames" + echo " ---" + printf " Update cadence: %s fps (refreshes/sec)\n" "$refresh_fps" + printf " Metal frame rate: %s fps\n" "$metal_fps" + echo " ---" + printf " Join calls: %s\n" "$join_calls" + printf " Join time: %s us (total)\n" "$join_time_us" + echo " ---" + if [[ "$fairness_execute" -gt 0 ]]; then + printf " FairnessScheduler: ACTIVE\n" + printf " Enqueue calls: %s\n" "$fairness_enqueue" + printf " ExecuteTurn calls: %s\n" "$fairness_execute" + else + printf " FairnessScheduler: INACTIVE (legacy path)\n" + fi + echo "============================================================" + fi + fi + + # Display self-time analysis if enabled + if [[ "$self_time_mode" == true && -f "$self_time_output" ]]; then + echo "" + echo "============================================================" + echo "Self-Time Analysis (functions that burn CPU directly)" + echo "============================================================" + local self_time_analyze_script="$script_dir/analyze_self_time.py" + if [[ -f "$self_time_analyze_script" ]]; then + python3 "$self_time_analyze_script" "$self_time_output" + else + echo "Warning: analyze_self_time.py not found, showing raw output:" + cat "$self_time_output" + fi + echo "" + echo "Raw self-time output: $self_time_output" + fi + + # Store iteration stats in result arrays + RESULT_TAB_COUNTS+=("$tab_count") + RESULT_ITERATION_RATES+=("$iteration_rate") + RESULT_TOTAL_ITERATIONS+=("$total_iterations") + + # Store DTrace metrics in result arrays (if enabled) + if [[ "$dtrace_mode" == true ]]; then + RESULT_CONTENT_FRAMES+=("$content_frames") + RESULT_REFRESHES+=("$refreshes") + RESULT_METAL_FRAMES+=("$metal_frames") + RESULT_CONTENT_FPS+=("$content_fps") + RESULT_REFRESH_FPS+=("$refresh_fps") + RESULT_METAL_FPS+=("$metal_fps") + RESULT_JOIN_CALLS+=("$join_calls") + RESULT_JOIN_TIME_US+=("$join_time_us") + RESULT_ADAPTIVE_MODE+=("$adaptive_mode") + # Derived: iterations per metal frame + if [[ "$metal_frames" -gt 0 ]]; then + local iters_per_frame + iters_per_frame=$(awk "BEGIN {printf \"%.1f\", $total_iterations / $metal_frames}") + RESULT_ITERS_PER_FRAME+=("$iters_per_frame") + else + RESULT_ITERS_PER_FRAME+=("-") + fi + fi + + # Look for latency instrumentation file (from MTPerfMetrics) + local latency_file="" + for _ in {1..30}; do + latency_file="$(ls -t /tmp/mtperf_latency_*.txt 2>/dev/null | head -1 || true)" + if [[ -n "$latency_file" && -f "$latency_file" ]]; then + local latency_mtime + latency_mtime=$(stat -f "%m" "$latency_file" 2>/dev/null || echo 0) + if [[ "$latency_mtime" -ge "$run_start_epoch" ]]; then + break + fi + fi + latency_file="" + sleep 0.2 + done + + # Parse and display latency metrics + if [[ -n "$latency_file" && -f "$latency_file" ]]; then + echo "" + echo "============================================================" + echo "Latency Instrumentation" + echo "============================================================" + + local section="header" # header, context, latency, counters + # Parse latency CSV with multiple sections: + # - Context section: key,value pairs for settings/state + # - Latency section: metric,count,mean_ns,min_ns,max_ns,stddev_ns + # - Counters section: metric,count + while IFS=',' read -r field1 field2 field3 field4 field5 field6; do + # Track section changes + case "$field1" in + "# Context") + section="context" + echo " --- Context ---" + continue + ;; + "# metric") + section="latency" + echo "" + continue + ;; + "# Counters") + section="counters" + echo "" + echo " --- Counters ---" + continue + ;; + \#*) + # Skip other comment lines + continue + ;; + esac + + case "$section" in + context) + # Context format: key,value (2 fields) + # Display context settings + printf " %-40s %s\n" "$field1" "$field2" + ;; + latency) + # Latency format: metric,count,mean_ns,min_ns,max_ns,stddev_ns (6 fields) + local metric="$field1" count="$field2" mean_ns="$field3" min_ns="$field4" max_ns="$field5" + # Skip metrics with zero count + [[ "$count" == "0" ]] && continue + # Convert nanoseconds to milliseconds for display + local mean_ms min_ms max_ms + mean_ms=$(awk "BEGIN {printf \"%.2f\", $mean_ns / 1000000}") + min_ms=$(awk "BEGIN {printf \"%.2f\", $min_ns / 1000000}") + max_ms=$(awk "BEGIN {printf \"%.2f\", $max_ns / 1000000}") + printf " %-20s count: %6s mean: %8s ms min: %8s ms max: %8s ms\n" \ + "$metric" "$count" "$mean_ms" "$min_ms" "$max_ms" + ;; + counters) + # Counter format: metric,count (2 fields) + local metric="$field1" count="$field2" + [[ "$count" == "0" ]] && continue + printf " %-24s %s\n" "$metric" "$count" + ;; + esac + done < "$latency_file" + + # Extract timer metrics for analysis + local gcd_timer_create gcd_timer_fire ns_timer_create ns_timer_fire + local cadence_no_change cadence_mismatch + gcd_timer_create=$(grep "^GCDTimerCreate," "$latency_file" 2>/dev/null | cut -d, -f2 || echo 0) + gcd_timer_fire=$(grep "^GCDTimerFire," "$latency_file" 2>/dev/null | cut -d, -f2 || echo 0) + ns_timer_create=$(grep "^NSTimerCreate," "$latency_file" 2>/dev/null | cut -d, -f2 || echo 0) + ns_timer_fire=$(grep "^NSTimerFire," "$latency_file" 2>/dev/null | cut -d, -f2 || echo 0) + cadence_no_change=$(grep "^CadenceNoChange," "$latency_file" 2>/dev/null | cut -d, -f2 || echo 0) + cadence_mismatch=$(grep "^CadenceMismatch," "$latency_file" 2>/dev/null | cut -d, -f2 || echo 0) + + # Show timer efficiency analysis if we have timer data + local has_timer_data=false + [[ "$gcd_timer_create" -gt 0 || "$gcd_timer_fire" -gt 0 || "$ns_timer_create" -gt 0 || "$ns_timer_fire" -gt 0 ]] && has_timer_data=true + + if [[ "$has_timer_data" == true ]]; then + echo "" + echo " --- Timer Analysis ---" + if [[ "$gcd_timer_create" -gt 0 ]]; then + local gcd_fire_ratio + gcd_fire_ratio=$(awk "BEGIN {printf \"%.1f\", $gcd_timer_fire / $gcd_timer_create}") + printf " GCD fires/create: %s (create: %s, fire: %s)\n" "$gcd_fire_ratio" "$gcd_timer_create" "$gcd_timer_fire" + fi + if [[ "$ns_timer_create" -gt 0 ]]; then + local ns_fire_ratio + ns_fire_ratio=$(awk "BEGIN {printf \"%.1f\", $ns_timer_fire / $ns_timer_create}") + printf " NSTimer fires/create: %s (create: %s, fire: %s)\n" "$ns_fire_ratio" "$ns_timer_create" "$ns_timer_fire" + fi + if [[ "$cadence_mismatch" -gt 0 || "$cadence_no_change" -gt 0 ]]; then + local total_checks=$((cadence_mismatch + cadence_no_change)) + local mismatch_pct + mismatch_pct=$(awk "BEGIN {printf \"%.1f\", 100 * $cadence_mismatch / $total_checks}") + printf " Cadence mismatch rate: %s%% (%s of %s checks)\n" "$mismatch_pct" "$cadence_mismatch" "$total_checks" + fi + fi + + echo "" + echo " Latency file: $latency_file" + echo "============================================================" + + # Store latency metrics in result arrays for summary table + # Extract KeyboardInput latency (mean and max in ms) + local keydown_line keydown_mean_ns keydown_max_ns keydown_mean_ms keydown_max_ms + keydown_line=$(grep "^KeyboardInput," "$latency_file" 2>/dev/null || true) + if [[ -n "$keydown_line" ]]; then + keydown_mean_ns=$(echo "$keydown_line" | cut -d, -f3) + keydown_max_ns=$(echo "$keydown_line" | cut -d, -f5) + keydown_mean_ms=$(awk "BEGIN {printf \"%.2f\", $keydown_mean_ns / 1000000}") + keydown_max_ms=$(awk "BEGIN {printf \"%.2f\", $keydown_max_ns / 1000000}") + RESULT_KEYDOWN_MEAN_MS+=("$keydown_mean_ms") + RESULT_KEYDOWN_MAX_MS+=("$keydown_max_ms") + else + RESULT_KEYDOWN_MEAN_MS+=("-") + RESULT_KEYDOWN_MAX_MS+=("-") + fi + + # Extract TitleUpdate latency (if --title was used) + local title_line title_mean_ns title_max_ns title_mean_ms title_max_ms + title_line=$(grep "^TitleUpdate," "$latency_file" 2>/dev/null || true) + if [[ -n "$title_line" ]]; then + title_mean_ns=$(echo "$title_line" | cut -d, -f3) + title_max_ns=$(echo "$title_line" | cut -d, -f5) + title_mean_ms=$(awk "BEGIN {printf \"%.2f\", $title_mean_ns / 1000000}") + title_max_ms=$(awk "BEGIN {printf \"%.2f\", $title_max_ns / 1000000}") + RESULT_TITLE_MEAN_MS+=("$title_mean_ms") + RESULT_TITLE_MAX_MS+=("$title_max_ms") + else + RESULT_TITLE_MEAN_MS+=("-") + RESULT_TITLE_MAX_MS+=("-") + fi + + # Store timer metrics (GCD and NS separately) + RESULT_GCD_TIMER_CREATE+=("$gcd_timer_create") + RESULT_GCD_TIMER_FIRE+=("$gcd_timer_fire") + if [[ "$gcd_timer_create" -gt 0 ]]; then + local gcd_ratio + gcd_ratio=$(awk "BEGIN {printf \"%.1f\", $gcd_timer_fire / $gcd_timer_create}") + RESULT_GCD_FIRE_RATIO+=("$gcd_ratio") + else + RESULT_GCD_FIRE_RATIO+=("-") + fi + RESULT_NS_TIMER_CREATE+=("$ns_timer_create") + RESULT_NS_TIMER_FIRE+=("$ns_timer_fire") + if [[ "$ns_timer_create" -gt 0 ]]; then + local ns_ratio + ns_ratio=$(awk "BEGIN {printf \"%.1f\", $ns_timer_fire / $ns_timer_create}") + RESULT_NS_FIRE_RATIO+=("$ns_ratio") + else + RESULT_NS_FIRE_RATIO+=("-") + fi + + # Store cadence mismatch percentage + if [[ "$cadence_mismatch" -gt 0 || "$cadence_no_change" -gt 0 ]]; then + local total_checks=$((cadence_mismatch + cadence_no_change)) + local mismatch_pct + mismatch_pct=$(awk "BEGIN {printf \"%.1f\", 100 * $cadence_mismatch / $total_checks}") + RESULT_CADENCE_MISMATCH_PCT+=("$mismatch_pct") + else + RESULT_CADENCE_MISMATCH_PCT+=("-") + fi + else + # No latency file - store placeholders + RESULT_KEYDOWN_MEAN_MS+=("-") + RESULT_KEYDOWN_MAX_MS+=("-") + RESULT_TITLE_MEAN_MS+=("-") + RESULT_TITLE_MAX_MS+=("-") + RESULT_GCD_TIMER_CREATE+=("-") + RESULT_GCD_TIMER_FIRE+=("-") + RESULT_GCD_FIRE_RATIO+=("-") + RESULT_NS_TIMER_CREATE+=("-") + RESULT_NS_TIMER_FIRE+=("-") + RESULT_NS_FIRE_RATIO+=("-") + RESULT_CADENCE_MISMATCH_PCT+=("-") + fi + + # Wait a bit before next run to ensure clean state + sleep 2 +} + +# Function to print summary table with Unicode box drawing +print_summary_table() { + local num_runs=${#RESULT_TAB_COUNTS[@]} + if [[ $num_runs -eq 0 ]]; then + echo "No results to display" + return + fi + + # Calculate column widths + local label_width=24 + local col_width=10 + + # Build header row with tab counts (single-line box drawing) + local header="│ Metric " + local top_border="┌──────────────────────────" + local header_sep="├──────────────────────────" + local section_sep="├──────────────────────────" + local bottom_border="└──────────────────────────" + + for i in "${!RESULT_TAB_COUNTS[@]}"; do + local tc="${RESULT_TAB_COUNTS[$i]}" + if [[ $tc -eq 1 ]]; then + header+="│$(printf "%${col_width}s" "1 Tab")" + else + header+="│$(printf "%${col_width}s" "$tc Tabs")" + fi + top_border+="┬$(printf '─%.0s' $(seq 1 $col_width))" + header_sep+="┼$(printf '─%.0s' $(seq 1 $col_width))" + section_sep+="┼$(printf '─%.0s' $(seq 1 $col_width))" + bottom_border+="┴$(printf '─%.0s' $(seq 1 $col_width))" + done + header+="│" + top_border+="┐" + header_sep+="┤" + section_sep+="┤" + bottom_border+="┘" + + echo "$top_border" + echo "$header" + echo "$header_sep" + + # Helper function to print a row + print_row() { + local label=$1 + shift + local values=("$@") + printf "│ %-${label_width}s " "$label" + for val in "${values[@]}"; do + printf "│%${col_width}s" "$val" + done + echo "│" + } + + # Iteration stats (stress_load.py throughput) + print_row "Iteration rate" "${RESULT_ITERATION_RATES[@]/%//s}" + print_row "Total iterations" "${RESULT_TOTAL_ITERATIONS[@]}" + + # Latency metrics (from instrumentation) + local has_latency=false + for val in "${RESULT_KEYDOWN_MEAN_MS[@]}"; do + [[ "$val" != "-" ]] && has_latency=true && break + done + if [[ "$has_latency" == true ]]; then + echo "$section_sep" + print_row "KeyDown mean (ms)" "${RESULT_KEYDOWN_MEAN_MS[@]}" + print_row "KeyDown max (ms)" "${RESULT_KEYDOWN_MAX_MS[@]}" + # Only show title latency if we have data + local has_title=false + for val in "${RESULT_TITLE_MEAN_MS[@]}"; do + [[ "$val" != "-" ]] && has_title=true && break + done + if [[ "$has_title" == true ]]; then + print_row "TitleUpdate mean (ms)" "${RESULT_TITLE_MEAN_MS[@]}" + print_row "TitleUpdate max (ms)" "${RESULT_TITLE_MAX_MS[@]}" + fi + fi + + # Timer efficiency metrics (GCD and NS separately) + local has_gcd_timer=false has_ns_timer=false + for val in "${RESULT_GCD_FIRE_RATIO[@]}"; do + [[ "$val" != "-" && "$val" != "0" ]] && has_gcd_timer=true && break + done + for val in "${RESULT_NS_FIRE_RATIO[@]}"; do + [[ "$val" != "-" && "$val" != "0" ]] && has_ns_timer=true && break + done + if [[ "$has_gcd_timer" == true || "$has_ns_timer" == true ]]; then + echo "$section_sep" + if [[ "$has_gcd_timer" == true ]]; then + print_row "GCD fire/create" "${RESULT_GCD_FIRE_RATIO[@]}" + fi + if [[ "$has_ns_timer" == true ]]; then + print_row "NSTimer fire/create" "${RESULT_NS_FIRE_RATIO[@]}" + fi + print_row "Cadence mismatch %" "${RESULT_CADENCE_MISMATCH_PCT[@]}" + fi + + # DTrace UX metrics (only if --dtrace was used) + if [[ "$dtrace_mode" == true && ${#RESULT_REFRESHES[@]} -gt 0 ]]; then + echo "$section_sep" + print_row "Adaptive mode" "${RESULT_ADAPTIVE_MODE[@]}" + print_row "Refresh FPS" "${RESULT_REFRESH_FPS[@]}" + print_row "Metal FPS" "${RESULT_METAL_FPS[@]}" + print_row "Iters/frame" "${RESULT_ITERS_PER_FRAME[@]}" + echo "$section_sep" + print_row "Refreshes" "${RESULT_REFRESHES[@]}" + print_row "Metal frames" "${RESULT_METAL_FRAMES[@]}" + print_row "Content updates" "${RESULT_CONTENT_FRAMES[@]}" + echo "$section_sep" + print_row "Join calls" "${RESULT_JOIN_CALLS[@]}" + print_row "Join time (us)" "${RESULT_JOIN_TIME_US[@]}" + fi + + echo "$bottom_border" + echo "" + echo "Legend:" + echo " Iteration rate - stress_load.py output lines/sec (terminal throughput)" + if [[ "$has_latency" == true ]]; then + echo " KeyDown - Latency from keypress to screen update (ms)" + if [[ "$has_title" == true ]]; then + echo " TitleUpdate - Latency for OSC 0 title change processing (ms)" + fi + fi + if [[ "$has_gcd_timer" == true || "$has_ns_timer" == true ]]; then + [[ "$has_gcd_timer" == true ]] && echo " GCD fire/create - GCD timer fires per create (higher = better reuse)" + [[ "$has_ns_timer" == true ]] && echo " NSTimer fire/create - NSTimer fires per create (higher = better reuse)" + echo " Cadence mismatch - % of cadence checks with timing drift" + fi + if [[ "$dtrace_mode" == true ]]; then + echo " Adaptive mode - Frame rate mode (60fps=low load, 30fps=high load)" + echo " Refresh FPS - Cadence-driven refresh rate" + echo " Metal FPS - GPU frame submissions/sec" + echo " Iters/frame - Stress iterations per Metal frame (throughput)" + echo " Refreshes - Total cadence-driven refresh calls" + echo " Metal frames - Total GPU frame submissions" + echo " Content updates - Lines marked dirty (setNeedsDisplayOnLine calls)" + echo " Join calls - performBlockWithJoinedThreads calls (thread sync)" + echo " Join time - Total time in joined blocks (microseconds)" + fi +} + +# Function to run forever mode (no profiling, no data collection) +run_forever() { + local tab_count=${tab_counts[0]} # Use first tab count only + + echo "" + echo "Forever mode: $tab_count tab(s), no profiling" + + # Warn about tmux + forever combination + if [[ "$tmux_mode" == true ]]; then + echo "" + echo "WARNING: --tmux with --forever mode cannot guarantee session cleanup." + echo " tmux sessions will run until manually stopped." + echo " Press Ctrl-C to attempt cleanup of tmux sessions and iTerm2." + echo "" + fi + + # Video mode not supported with forever mode + if [[ "$video_mode" == true ]]; then + echo "" + echo "WARNING: --video is not supported with --forever mode (requires fixed duration)." + echo " Use macOS screen recording (Cmd-Shift-5) for manual recording." + echo "" + fi + + # Open iTerm2 and wait for it to launch + # Launch iTerm2 with suite isolation + if [[ -n "$suite_name" ]]; then + open -a "$app_path" --args -suite "$suite_name" + else + open -a "$app_path" + fi + sleep 2 + + # Find iTerm2 PID + local iterm_pid="" + for _ in {1..10}; do + iterm_pid=$(pgrep -x iTerm2 || true) + if [[ -n "$iterm_pid" ]]; then + break + fi + sleep 0.5 + done + + if [[ -z "$iterm_pid" ]]; then + echo "Error: Could not find iTerm2 process" >&2 + exit 1 + fi + + echo "Found iTerm2 PID: $iterm_pid" + echo "Launching $tab_count tabs with stress load..." + + # Generate unique tmux session prefix if --tmux mode + local tmux_prefix="" + if [[ "$tmux_mode" == true ]]; then + tmux_prefix="iterm2-perf-$$-$(date +%s)" + tmux_session_prefix="$tmux_prefix" # Store globally for cleanup + tmux_tab_count="$tab_count" + fi + + # Launch tabs (runs in background) + osascript "$applescript_forever_file" "$tab_count" "$load_script" "$stress_mode" "$title_arg" "$speed_arg" "$fps_arg" "$tmux_prefix" & + local applescript_pid=$! + + # Wait for AppleScript to finish launching tabs + wait "$applescript_pid" || true + + echo "" + echo "Stress load running." +} + +# Capture power information at script start +power_source=$(get_power_source) +energy_mode=$(get_energy_mode) + +# Main execution +echo "Multi-Tab Stress Test" +echo "=====================" +echo "App: $app_path" +echo "Version: $app_version" +if [[ -n "$build_commit" ]]; then + echo "Commit: ${build_commit:0:12}" + [[ -n "$build_branch" ]] && echo "Branch: $build_branch" + [[ -n "$build_date" ]] && echo "Build date: $build_date" + [[ "$build_uncommitted" == "true" ]] && echo "Uncommitted changes: yes" +fi +echo "Power Source: $power_source" +echo "Energy Mode: $energy_mode" + +# Check if using isolated suite (not user's normal prefs) +using_isolated_suite=false +if [[ -n "$suite_name" && "$suite_name" != "com.googlecode.iterm2" ]]; then + using_isolated_suite=true +fi + +# Warn if not on AC power, not in high performance mode, or not using isolated suite +if [[ "$power_source" != "AC Power" || "$energy_mode" != "High Power/High Performance" || "$using_isolated_suite" == false ]]; then + echo "" + echo "WARNING: Performance testing should use standard conditions for reliable results!" + echo "" + echo " Current issues:" + [[ "$power_source" != "AC Power" ]] && echo " - Running on $power_source (not AC Power)" + [[ "$energy_mode" != "High Power/High Performance" ]] && echo " - Energy Mode: $energy_mode (not High Power/High Performance)" + [[ "$using_isolated_suite" == false ]] && echo " - Not using isolated suite (user prefs may affect results)" + echo "" + echo " These conditions may produce unreliable or unexpected performance results." + echo "" + echo " Recommendations:" + [[ "$power_source" != "AC Power" ]] && echo " - Connect to AC Power" + [[ "$energy_mode" != "High Power/High Performance" ]] && echo " - Set Energy Mode to High Power/High Performance" + [[ "$using_isolated_suite" == false ]] && echo " - Use isolated suite (default: --suite=com.iterm2.defaults)" + echo "" +fi + +if [[ "$forever_mode" == true ]]; then + echo "Mode: forever (no profiling)" +else + echo "Duration: ${duration}s" +fi +echo "Tab counts: ${tab_counts[*]}" +echo "Synchronized start: $synchronized_start" +echo "Inject interactions: $inject_mode" +echo "DTrace mode: $dtrace_mode" +echo "Self-time profiling: $self_time_mode" +echo "Tmux wrapped: $tmux_mode" +echo "Video recording: $video_mode" +[[ "$video_mode" == true ]] && echo "Video output dir: $video_output_dir" +echo "Speed: $speed_arg" +if [[ -n "$suite_name" ]]; then + echo "Suite: $suite_name" +else + echo "Suite: (none - using user preferences)" +fi +if [[ -n "$stress_mode" ]]; then + echo "Mode: ${stress_mode#--mode=}" +fi +echo "Load script: $(basename "$load_script")" +[[ -n "$title_arg" ]] && echo "Title injection: ${title_arg}ms" + +# Run forever mode or normal mode +if [[ "$forever_mode" == true ]]; then + run_forever +else + # Run tests for each tab count + for i in "${!tab_counts[@]}"; do + run_single_test "${tab_counts[$i]}" "$i" + done + + # Print summary table if multiple runs + if [[ ${#tab_counts[@]} -gt 1 ]]; then + echo "" + echo "Summary Table" + print_summary_table + fi + + # Repeat warning at end if not on AC power, not in high performance mode, or not using isolated suite + if [[ "$power_source" != "AC Power" || "$energy_mode" != "High Power/High Performance" || "$using_isolated_suite" == false ]]; then + echo "" + echo "============================================================" + echo "WARNING: These results may not reflect optimal performance!" + echo "============================================================" + echo "" + echo "Issues detected:" + [[ "$power_source" != "AC Power" ]] && echo " - Power Source: $power_source (not AC Power)" + [[ "$energy_mode" != "High Power/High Performance" ]] && echo " - Energy Mode: $energy_mode (not High Power/High Performance)" + [[ "$using_isolated_suite" == false ]] && echo " - Not using isolated suite (user prefs may affect results)" + echo "" + echo "Performance testing should use standard conditions for reliable results." + echo "Current configuration may produce unreliable or unexpected performance." + echo "" + echo "Recommendations:" + [[ "$power_source" != "AC Power" ]] && echo " - Connect to AC Power" + [[ "$energy_mode" != "High Power/High Performance" ]] && echo " - Set Energy Mode to High Power/High Performance" + [[ "$using_isolated_suite" == false ]] && echo " - Use isolated suite (default: --suite=com.iterm2.defaults)" + echo "============================================================" + fi +fi diff --git a/tools/perf/sample_results.txt b/tools/perf/sample_results.txt new file mode 100644 index 0000000000..68a9b73f1c --- /dev/null +++ b/tools/perf/sample_results.txt @@ -0,0 +1,341 @@ +DTrace mode enabled (using /... +Multi-Tab Stress Test +===================== +App: /... +Duration: 15s +Tab counts: 1 2 5 +Synchronized start: true +Inject interactions: false +DTrace mode: true +Tmux wrapped: false +Speed: normal +Load script: stress_load.py + +Run 1: 1 tab(s), 15s duration +Synchronized start enabled (sync dir: /tmp/iterm2-perf-sync.NtR95s) +Found iTerm2 PID: 14927 +Launching 1 tabs, each running stress test for 15 seconds... +Waiting for all tabs to signal ready... +All 1 tabs ready. +Starting profiler for 20 seconds... +Waiting for DTrace to attach... +DTrace attached. +Sending go signal... +All tabs completed. Waiting for profiler... +Signaling DTrace to finish... + +============================================================ +Iteration Summary +============================================================ + Total iterations: 139100 + Tabs: 1 + Duration: 15s + Rate: 9273 iterations/sec +============================================================ + + +Analyzing profile: /tmp/iterm2_multi_tab_profile_20260127_165839.txt + +============================================================ +Profile Summary +============================================================ + boolForKey: 8 occurrences + intForKey: 9 occurrences + objectForKey: 23 occurrences + updateConfigurationFields: 22 occurrences + NSUserDefaults: 24 occurrences + @synchronized: 0 occurrences + os_unfair_lock: 30 occurrences + StringToScreenChars: 55 occurrences + ScreenCharArray: 91 occurrences + bidi/Bidi: 18 occurrences + VT100Terminal: 106 occurrences + executeToken: 19 occurrences + Metal: 874 occurrences + iTermTextRenderer: 95 occurrences +============================================================ + +============================================================ +Category Summary +============================================================ + Allocations: 27 occurrences + ObjC retain/release: 718 occurrences + Autorelease pools: 2 occurrences + Strings/Unicode: 802 occurrences + CoreText: 240 occurrences + CoreGraphics: 468 occurrences + AppKit geometry: 1059 occurrences + Locks/dispatch: 55 occurrences + Terminal parsing: 715 occurrences + Rendering: 969 occurrences + Process/cache: 48 occurrences +============================================================ + +============================================================ +Top iTerm2 Symbols (by sample count) +============================================================ + 17140 main + 17140 __40-[iTermSocket listenWithBacklog:accept:]_block_invoke + 16359 -[TaskNotifier run] + 85 DYLD-STUB$$objc_release + 52 DYLD-STUB$$objc_retainAutoreleasedReturnValue + 44 DYLD-STUB$$objc_retain + 28 __35-[iTermProcessCache updateIfNeeded]_block_invoke_2 + 22 -[iTermMetalFrameData measureTimeForStat:ofBlock:] + 21 -[iTermProcessCache reallyUpdate] + 14 DYLD-STUB$$objc_autoreleaseReturnValue + 13 iTerm2::Sampler::value_for_percentile(double const&) const + 10 -[PTYTask fd] + 10 -[iTermMultiServerJobManager fd] + 10 + 10 +[iTermPreferences objectForKey:] +============================================================ + +Full profile saved to: /tmp/iterm2_multi_tab_profile_20260127_165839.txt + +Shutting down test iTerm2... + +============================================================ +DTrace UX Metrics (duration: 18s) +============================================================ + Adaptive mode: 30fps + Content frames: 39947 (lines marked dirty) + Refreshes (cadence): 385 + Metal frames (GPU): 600 + --- + Apparent frame rate: 21 fps (refreshes/sec) + Metal frame rate: 33 fps + --- + Join calls: 380 + Join time: 1323 us (total) +============================================================ + +Run 2: 2 tab(s), 15s duration +Synchronized start enabled (sync dir: /tmp/iterm2-perf-sync.V73bGF) +Found iTerm2 PID: 21299 +Launching 2 tabs, each running stress test for 15 seconds... +Waiting for all tabs to signal ready... +All 2 tabs ready. +Starting profiler for 20 seconds... +Waiting for DTrace to attach... +DTrace attached. +Sending go signal... +All tabs completed. Waiting for profiler... +Signaling DTrace to finish... + +============================================================ +Iteration Summary +============================================================ + Total iterations: 153503 + Tabs: 2 + Duration: 15s + Rate: 10233 iterations/sec +============================================================ + + +Analyzing profile: /tmp/iterm2_multi_tab_profile_20260127_165919.txt + +============================================================ +Profile Summary +============================================================ + boolForKey: 10 occurrences + intForKey: 3 occurrences + objectForKey: 16 occurrences + updateConfigurationFields: 30 occurrences + NSUserDefaults: 33 occurrences + @synchronized: 0 occurrences + os_unfair_lock: 48 occurrences + StringToScreenChars: 55 occurrences + ScreenCharArray: 88 occurrences + bidi/Bidi: 40 occurrences + VT100Terminal: 103 occurrences + executeToken: 24 occurrences + Metal: 986 occurrences + iTermTextRenderer: 96 occurrences +============================================================ + +============================================================ +Category Summary +============================================================ + Allocations: 28 occurrences + ObjC retain/release: 760 occurrences + Autorelease pools: 4 occurrences + Strings/Unicode: 900 occurrences + CoreText: 330 occurrences + CoreGraphics: 493 occurrences + AppKit geometry: 1067 occurrences + Locks/dispatch: 71 occurrences + Terminal parsing: 752 occurrences + Rendering: 1082 occurrences + Process/cache: 60 occurrences +============================================================ + +============================================================ +Top iTerm2 Symbols (by sample count) +============================================================ + 16942 main + 16942 __40-[iTermSocket listenWithBacklog:accept:]_block_invoke + 16040 -[TaskNotifier run] + 253 + 248 closure #3 in TwoTierTokenQueue.enumerateTokenArrayGroups(_:) + 145 -[iTermApplication sendEvent:] + 103 DYLD-STUB$$objc_release + 48 DYLD-STUB$$objc_retainAutoreleasedReturnValue + 42 DYLD-STUB$$objc_retain + 22 __35-[iTermProcessCache updateIfNeeded]_block_invoke_2 + 21 -[iTermMetalFrameData measureTimeForStat:ofBlock:] + 16 -[iTermProcessCache reallyUpdate] + 14 DYLD-STUB$$objc_autoreleaseReturnValue + 14 iTerm2::Sampler::value_for_percentile(double const&) const + 13 +[iTermPreferences objectForKey:] +============================================================ + +Full profile saved to: /tmp/iterm2_multi_tab_profile_20260127_165919.txt + +Shutting down test iTerm2... + +============================================================ +DTrace UX Metrics (duration: 18s) +============================================================ + Adaptive mode: 30fps + Content frames: 40255 (lines marked dirty) + Refreshes (cadence): 404 + Metal frames (GPU): 638 + --- + Apparent frame rate: 22 fps (refreshes/sec) + Metal frame rate: 35 fps + --- + Join calls: 396 + Join time: 1541 us (total) +============================================================ + +Run 3: 5 tab(s), 15s duration +Synchronized start enabled (sync dir: /tmp/iterm2-perf-sync.4DNwfq) +Found iTerm2 PID: 28157 +Launching 5 tabs, each running stress test for 15 seconds... +Waiting for all tabs to signal ready... +All 5 tabs ready. +Starting profiler for 20 seconds... +Waiting for DTrace to attach... +DTrace attached. +Sending go signal... +All tabs completed. Waiting for profiler... +Signaling DTrace to finish... + +============================================================ +Iteration Summary +============================================================ + Total iterations: 183299 + Tabs: 5 + Duration: 15s + Rate: 12219 iterations/sec +============================================================ + + +Analyzing profile: /tmp/iterm2_multi_tab_profile_20260127_170000.txt + +============================================================ +Profile Summary +============================================================ + boolForKey: 4 occurrences + intForKey: 13 occurrences + objectForKey: 20 occurrences + updateConfigurationFields: 18 occurrences + NSUserDefaults: 28 occurrences + @synchronized: 0 occurrences + os_unfair_lock: 43 occurrences + StringToScreenChars: 53 occurrences + ScreenCharArray: 87 occurrences + bidi/Bidi: 53 occurrences + VT100Terminal: 116 occurrences + executeToken: 26 occurrences + Metal: 1174 occurrences + iTermTextRenderer: 96 occurrences +============================================================ + +============================================================ +Category Summary +============================================================ + Allocations: 34 occurrences + ObjC retain/release: 781 occurrences + Autorelease pools: 4 occurrences + Strings/Unicode: 973 occurrences + CoreText: 636 occurrences + CoreGraphics: 888 occurrences + AppKit geometry: 1087 occurrences + Locks/dispatch: 78 occurrences + Terminal parsing: 853 occurrences + Rendering: 1270 occurrences + Process/cache: 75 occurrences +============================================================ + +============================================================ +Top iTerm2 Symbols (by sample count) +============================================================ + 16869 main + 16869 __40-[iTermSocket listenWithBacklog:accept:]_block_invoke + 15734 -[TaskNotifier run] + 222 + 218 closure #1 in closure #1 in implicit closure #2 in iTermMetalView.fetchDrawable(timeout:) + 100 DYLD-STUB$$objc_release + 54 DYLD-STUB$$objc_retainAutoreleasedReturnValue + 50 DYLD-STUB$$objc_retain + 29 -[iTermMetalFrameData measureTimeForStat:ofBlock:] + 25 __35-[iTermProcessCache updateIfNeeded]_block_invoke_2 + 20 -[PTYTask fd] + 19 -[iTermMultiServerJobManager fd] + 19 -[iTermThread dispatchSync:] + 18 -[iTermProcessCache reallyUpdate] + 16 __28-[iTermThread dispatchSync:]_block_invoke +============================================================ + +Full profile saved to: /tmp/iterm2_multi_tab_profile_20260127_170000.txt + +Shutting down test iTerm2... + +============================================================ +DTrace UX Metrics (duration: 18s) +============================================================ + Adaptive mode: 30fps + Content frames: 44053 (lines marked dirty) + Refreshes (cadence): 461 + Metal frames (GPU): 617 + --- + Apparent frame rate: 25 fps (refreshes/sec) + Metal frame rate: 34 fps + --- + Join calls: 438 + Join time: 1446 us (total) +============================================================ + +Summary Table +┌──────────────────────────┬──────────┬──────────┬──────────┐ +│ Metric │ 1 Tab│ 2 Tabs│ 5 Tabs│ +├──────────────────────────┼──────────┼──────────┼──────────┤ +│ Iteration rate │ 9273/s│ 10233/s│ 12219/s│ +│ Total iterations │ 139100│ 153503│ 183299│ +├──────────────────────────┼──────────┼──────────┼──────────┤ +│ Adaptive mode │ 30fps│ 30fps│ 30fps│ +│ Refresh FPS │ 21│ 22│ 25│ +│ Metal FPS │ 33│ 35│ 34│ +│ Iters/frame │ 231.8│ 240.6│ 297.1│ +├──────────────────────────┼──────────┼──────────┼──────────┤ +│ Refreshes │ 385│ 404│ 461│ +│ Metal frames │ 600│ 638│ 617│ +│ Content updates │ 39947│ 40255│ 44053│ +├──────────────────────────┼──────────┼──────────┼──────────┤ +│ Join calls │ 380│ 396│ 438│ +│ Join time (us) │ 1323│ 1541│ 1446│ +└──────────────────────────┴──────────┴──────────┴──────────┘ + +Legend: + Iteration rate - stress_load.py output lines/sec (terminal throughput) + Adaptive mode - Frame rate mode (60fps=low load, 30fps=high load) + Refresh FPS - Cadence-driven refresh rate + Metal FPS - GPU frame submissions/sec + Iters/frame - Stress iterations per Metal frame (throughput) + Refreshes - Total cadence-driven refresh calls + Metal frames - Total GPU frame submissions + Content updates - Lines marked dirty (setNeedsDisplayOnLine calls) + Join calls - performBlockWithJoinedThreads calls (thread sync) + Join time - Total time in joined blocks (microseconds) diff --git a/tools/perf/stress_load.py b/tools/perf/stress_load.py new file mode 100644 index 0000000000..7fd73d25f4 --- /dev/null +++ b/tools/perf/stress_load.py @@ -0,0 +1,795 @@ +#!/usr/bin/env python3 +""" +Generate terminal load for stress testing iTerm2. + +Usage: + python3 stress_load.py [duration_seconds] [label] [--sync-dir DIR] [--mode=MODE] [--title] [--speed=SPEED] + +This script generates various types of terminal output to exercise +iTerm2's rendering and text processing code paths. It does NOT run +a profiler - use this with run_stress_test.sh for multi-tab +profiled stress testing, or profile_stress_test.py for single-tab use. + +With --sync-dir, the script signals readiness and waits for a "go" +signal before starting, allowing synchronized startup across tabs. + +Options: + --title[=MS] Inject OSC 0 title changes every MS milliseconds (default 2000ms) + --speed=SPEED Output speed: normal (default) or slow (100ms delay per iteration) + --fps=N Target frame rate for dashboard modes (default 30, 0 = unthrottled) + Accepts decimals (e.g., 0.5 for one frame per 2 seconds). + Ignored for stress modes which always run unthrottled. + +Terminal output stress modes: + normal - mixed output patterns, no screen clears (default) + buffer - long lines (~600 chars), stresses line buffers + clearcodes - all patterns including clear/erase sequences + flood - maximum throughput, like 'yes' command (no throttling) + +Dashboard/UI stress modes (30fps, cursor positioning): + htop - CPU meters + scrolling process list + watch - full-screen clear + redraw every 100ms + progress - 20 progress bars updating in place + table - fixed header + scroll region body + status - grid of color-coded service status cells + +Special: + all - runs all modes sequentially (separate test per mode) +""" + +import os +import random +import shutil +import subprocess +import sys +import time +import threading +from pathlib import Path + + +# ============================================================================= +# ANSI escape sequences for dashboard modes +# ============================================================================= + +ESC = "\033" +CSI = f"{ESC}[" + +# Colors +RESET = f"{CSI}0m" +BOLD = f"{CSI}1m" +DIM = f"{CSI}2m" +REVERSE = f"{CSI}7m" + +# Foreground colors +FG_BLACK = f"{CSI}30m" +FG_RED = f"{CSI}31m" +FG_GREEN = f"{CSI}32m" +FG_YELLOW = f"{CSI}33m" +FG_BLUE = f"{CSI}34m" +FG_MAGENTA = f"{CSI}35m" +FG_CYAN = f"{CSI}36m" +FG_WHITE = f"{CSI}37m" + +# Background colors +BG_BLACK = f"{CSI}40m" +BG_RED = f"{CSI}41m" +BG_GREEN = f"{CSI}42m" +BG_YELLOW = f"{CSI}43m" +BG_BLUE = f"{CSI}44m" +BG_MAGENTA = f"{CSI}45m" +BG_CYAN = f"{CSI}46m" +BG_WHITE = f"{CSI}47m" + + +def cursor_home(): + return f"{CSI}H" + +def cursor_to(row, col): + return f"{CSI}{row};{col}H" + +def cursor_hide(): + return f"{CSI}?25l" + +def cursor_show(): + return f"{CSI}?25h" + +def clear_screen(): + return f"{CSI}2J" + +def clear_line(): + return f"{CSI}2K" + +def clear_to_eol(): + return f"{CSI}K" + +def set_scroll_region(top, bottom): + return f"{CSI}{top};{bottom}r" + +def reset_scroll_region(): + return f"{CSI}r" + +def enter_alt_screen(): + return f"{CSI}?1049h" + +def exit_alt_screen(): + return f"{CSI}?1049l" + + +def get_terminal_size(): + """Get terminal dimensions.""" + size = shutil.get_terminal_size(fallback=(80, 24)) + return size.columns, size.lines + + +def progress_bar(value, width, filled_char="█", empty_char="░", color_thresholds=None): + """Generate a progress bar string.""" + inner_width = width - 2 + filled = int(value * inner_width) + empty = inner_width - filled + + color = FG_GREEN + if color_thresholds: + for threshold, c in color_thresholds: + if value >= threshold: + color = c + + return f"[{color}{filled_char * filled}{RESET}{empty_char * empty}]" + + +def generate_fake_process(): + """Generate a fake process entry for htop-style display.""" + pid = random.randint(1, 99999) + user = random.choice(["root", "admin", "www-data", "postgres", "_windowserver", "daemon"]) + cpu = random.uniform(0, 100) + mem = random.uniform(0, 50) + commands = [ + "python3 stress_load.py", "/usr/bin/sample iTerm2", "iTerm2 --server", + "/System/Library/Metal", "WindowServer", "mds_stores", "kernel_task", + "launchd", "sshd: admin", "vim /etc/hosts", "docker compose up", + "node server.js", "postgres: writer", + ] + command = random.choice(commands) + + if cpu > 80: + cpu_color = FG_RED + elif cpu > 50: + cpu_color = FG_YELLOW + else: + cpu_color = FG_GREEN + + return {"pid": pid, "user": user, "cpu": cpu, "mem": mem, + "command": command, "cpu_color": cpu_color} + + +# ============================================================================= +# Dashboard mode classes +# ============================================================================= + +class DashboardMode: + """Base class for dashboard modes.""" + + def __init__(self, label=""): + self.label = label + self.iteration = 0 + self.width, self.height = get_terminal_size() + + def setup(self): + pass + + def update(self): + self.iteration += 1 + return "" + + def teardown(self): + pass + + +class HtopMode(DashboardMode): + """Htop-style display with CPU meters and process list.""" + + def __init__(self, label=""): + super().__init__(label) + self.cpu_count = min(8, (self.height - 10) // 2) + self.cpu_values = [random.uniform(0, 1) for _ in range(self.cpu_count)] + self.mem_value = random.uniform(0.3, 0.7) + self.swap_value = random.uniform(0, 0.3) + self.processes = [generate_fake_process() for _ in range(50)] + self.header_lines = 3 + self.cpu_count + 2 + + def setup(self): + print(enter_alt_screen() + cursor_hide() + clear_screen(), end="", flush=True) + + def update(self): + self.iteration += 1 + output = [cursor_home()] + + for i in range(self.cpu_count): + self.cpu_values[i] = max(0, min(1, self.cpu_values[i] + random.uniform(-0.1, 0.1))) + + self.mem_value = max(0.1, min(0.95, self.mem_value + random.uniform(-0.02, 0.02))) + self.swap_value = max(0, min(0.5, self.swap_value + random.uniform(-0.01, 0.01))) + + bar_width = min(40, self.width - 20) + + for i in range(self.cpu_count): + bar = progress_bar(self.cpu_values[i], bar_width, + color_thresholds=[(0.5, FG_YELLOW), (0.8, FG_RED)]) + output.append(f"{clear_line()}CPU{i}: {bar} {self.cpu_values[i]*100:5.1f}%\n") + + mem_bar = progress_bar(self.mem_value, bar_width, color_thresholds=[(0.7, FG_YELLOW), (0.9, FG_RED)]) + swap_bar = progress_bar(self.swap_value, bar_width, color_thresholds=[(0.5, FG_YELLOW), (0.8, FG_RED)]) + output.append(f"{clear_line()}Mem: {mem_bar} {self.mem_value*100:5.1f}%\n") + output.append(f"{clear_line()}Swp: {swap_bar} {self.swap_value*100:5.1f}%\n") + output.append(f"{clear_line()}{BOLD}{REVERSE} PID USER CPU% MEM% COMMAND{' ' * (self.width - 45)}{RESET}\n") + + process_lines = self.height - self.header_lines - 2 + for _ in range(5): + self.processes[random.randint(0, len(self.processes) - 1)] = generate_fake_process() + + sorted_procs = sorted(self.processes, key=lambda p: p["cpu"], reverse=True) + for i in range(min(process_lines, len(sorted_procs))): + p = sorted_procs[i] + line = f"{p['pid']:>5} {p['user']:<9} {p['cpu_color']}{p['cpu']:5.1f}{RESET} {p['mem']:4.1f} {p['command']}" + output.append(f"{clear_line()}{line[:self.width-1]}\n") + + output.append(cursor_to(self.height, 1)) + status = f" {BOLD}Iteration: {self.iteration}{RESET} | {self.label} | Press Ctrl-C to stop " + output.append(f"{REVERSE}{status}{' ' * (self.width - len(status) - 10)}{RESET}") + + return "".join(output) + + def teardown(self): + print(reset_scroll_region() + cursor_show() + exit_alt_screen(), end="", flush=True) + + +class WatchMode(DashboardMode): + """Full-screen clear and redraw, like watch command.""" + + def setup(self): + print(enter_alt_screen() + cursor_hide(), end="", flush=True) + + def update(self): + self.iteration += 1 + output = [clear_screen(), cursor_home()] + + timestamp = time.strftime("%Y-%m-%d %H:%M:%S") + output.append(f"{BOLD}Every 0.1s: dashboard watch mode{RESET} {timestamp}\n\n") + output.append(f"Load average: {random.uniform(0, 4):.2f} {random.uniform(0, 4):.2f} {random.uniform(0, 4):.2f}\n") + output.append(f"Tasks: {random.randint(100, 300)} total, {random.randint(1, 5)} running\n") + output.append(f"Memory: {random.randint(4000, 8000)}M used / 16384M total\n\n") + output.append(f"{BOLD}{'PID':>7} {'USER':<10} {'%CPU':>6} {'%MEM':>6} {'COMMAND':<30}{RESET}\n") + + for _ in range(min(15, self.height - 10)): + pid = random.randint(1, 99999) + user = random.choice(["root", "admin", "www-data", "daemon"]) + cpu = random.uniform(0, 100) + mem = random.uniform(0, 20) + cmd = random.choice(["python3", "node", "docker", "iTerm2", "bash", "vim"]) + line_color = FG_RED if cpu > 50 else (FG_YELLOW if cpu > 20 else "") + output.append(f"{line_color}{pid:>7} {user:<10} {cpu:>6.1f} {mem:>6.1f} {cmd:<30}{RESET}\n") + + return "".join(output) + + def teardown(self): + print(cursor_show() + exit_alt_screen(), end="", flush=True) + + +class ProgressMode(DashboardMode): + """Multiple progress bars updating in place.""" + + def __init__(self, label=""): + super().__init__(label) + self.bar_count = 20 + self.progress = [random.uniform(0, 1) for _ in range(self.bar_count)] + self.speeds = [random.uniform(0.01, 0.05) for _ in range(self.bar_count)] + self.directions = [1] * self.bar_count + + def setup(self): + print(enter_alt_screen() + cursor_hide() + clear_screen(), end="", flush=True) + print(cursor_home(), end="") + print(f"{BOLD}Progress Bars Stress Test{RESET}") + print(f"Iteration: 0") + for i in range(self.bar_count): + print(f"Task {i+1:2}: ") + + def update(self): + self.iteration += 1 + output = [] + + for i in range(self.bar_count): + self.progress[i] += self.speeds[i] * self.directions[i] + if self.progress[i] >= 1: + self.progress[i] = 1 + self.directions[i] = -1 + self.speeds[i] = random.uniform(0.01, 0.05) + elif self.progress[i] <= 0: + self.progress[i] = 0 + self.directions[i] = 1 + self.speeds[i] = random.uniform(0.01, 0.05) + + output.append(cursor_to(2, 12)) + output.append(f"{self.iteration}") + + bar_width = min(50, self.width - 15) + for i in range(self.bar_count): + output.append(cursor_to(3 + i, 10)) + bar = progress_bar(self.progress[i], bar_width, + color_thresholds=[(0.5, FG_YELLOW), (0.8, FG_RED)]) + output.append(f"{bar} {self.progress[i]*100:5.1f}%{clear_to_eol()}") + + return "".join(output) + + def teardown(self): + print(cursor_show() + exit_alt_screen(), end="", flush=True) + + +class TableMode(DashboardMode): + """Fixed header with scrolling body using scroll regions.""" + + def __init__(self, label=""): + super().__init__(label) + self.header_height = 4 + self.row_id = 0 + + def setup(self): + print(enter_alt_screen() + cursor_hide() + clear_screen(), end="", flush=True) + print(cursor_home(), end="") + print(f"{BOLD}Table with Scroll Region - {self.label}{RESET}") + print(f"{'─' * (self.width - 1)}") + print(f"{REVERSE}{'ID':>6} {'Timestamp':<20} {'Value':>10} {'Status':<12} {'Message':<30}{RESET}") + print(f"{'─' * (self.width - 1)}") + print(set_scroll_region(self.header_height + 1, self.height - 1), end="", flush=True) + + def update(self): + self.iteration += 1 + self.row_id += 1 + timestamp = time.strftime("%H:%M:%S.") + f"{int(time.time() * 1000) % 1000:03d}" + value = random.randint(0, 10000) + status = random.choice(["OK", "WARN", "ERROR", "PENDING"]) + status_color = {"OK": FG_GREEN, "WARN": FG_YELLOW, "ERROR": FG_RED, "PENDING": FG_CYAN}[status] + messages = ["Processing request", "Data synchronized", "Cache miss", "Connection reset", + "Timeout occurred", "Retry scheduled", "Batch complete"] + message = random.choice(messages) + + output = [cursor_to(self.height - 1, 1)] + output.append(f"{self.row_id:>6} {timestamp:<20} {value:>10} {status_color}{status:<12}{RESET} {message:<30}\n") + return "".join(output) + + def teardown(self): + print(reset_scroll_region() + cursor_show() + exit_alt_screen(), end="", flush=True) + + +class StatusMode(DashboardMode): + """Grid of color-coded service status cells.""" + + def __init__(self, label=""): + super().__init__(label) + self.services = [ + "web-1", "web-2", "web-3", "api-1", "api-2", "db-master", "db-replica", + "cache-1", "cache-2", "queue", "worker-1", "worker-2", "worker-3", + "monitor", "logging", "auth", "storage", "cdn", "dns", "lb-1", "lb-2", + "backup", "cron", "mailer" + ] + self.statuses = {s: random.choice(["up", "up", "up", "up", "degraded", "down"]) + for s in self.services} + self.last_change = {s: 0 for s in self.services} + + def setup(self): + print(enter_alt_screen() + cursor_hide() + clear_screen(), end="", flush=True) + + def update(self): + self.iteration += 1 + output = [cursor_home()] + output.append(f"{BOLD}Service Status Dashboard{RESET} - Iteration {self.iteration}\n") + output.append(f"{'─' * (self.width - 1)}\n\n") + + for service in random.sample(self.services, min(3, len(self.services))): + old = self.statuses[service] + new = random.choice(["up", "up", "up", "degraded", "down"]) + if old != new: + self.statuses[service] = new + self.last_change[service] = self.iteration + + cell_width = 14 + cols = max(1, (self.width - 2) // cell_width) + + for i, service in enumerate(self.services): + if i > 0 and i % cols == 0: + output.append("\n\n") + + status = self.statuses[service] + recently_changed = (self.iteration - self.last_change[service]) < 10 + + if status == "up": + color = f"{BG_GREEN}{FG_BLACK}" + elif status == "degraded": + color = f"{BG_YELLOW}{FG_BLACK}" + else: + color = f"{BG_RED}{FG_WHITE}" + + if recently_changed and self.iteration % 2 == 0: + color = f"{BOLD}{color}" + + name = service[:10] + output.append(f"{color} {name:<10} {RESET} ") + + output.append(f"\n\n{'─' * (self.width - 1)}\n") + output.append(f"{BG_GREEN}{FG_BLACK} UP {RESET} ") + output.append(f"{BG_YELLOW}{FG_BLACK} DEGRADED {RESET} ") + output.append(f"{BG_RED}{FG_WHITE} DOWN {RESET}\n") + + return "".join(output) + + def teardown(self): + print(cursor_show() + exit_alt_screen(), end="", flush=True) + + +# ============================================================================= +# Dashboard runner +# ============================================================================= + +DASHBOARD_MODES = { + "htop": HtopMode, + "watch": WatchMode, + "progress": ProgressMode, + "table": TableMode, + "status": StatusMode, +} + + +def run_dashboard(duration, label="", mode_name="htop", fps=30): + """Run a dashboard mode for the specified duration at target fps.""" + if mode_name not in DASHBOARD_MODES: + print(f"Unknown dashboard mode: {mode_name}") + sys.exit(1) + + mode = DASHBOARD_MODES[mode_name](label) + prefix = f"[{label}] " if label else "" + + # Clamp fps to at least 1 frame per duration + min_fps = 1.0 / duration if duration > 0 else 1.0 + if fps > 0 and fps < min_fps: + fps = min_fps + + # fps=0 or frame time < 0.1ms means no throttling + if fps == 0: + throttle = False + frame_time = 0 + else: + frame_time = 1.0 / fps + throttle = frame_time >= 0.0001 # 0.1ms threshold + + if throttle: + print(f"{prefix}Running dashboard mode '{mode_name}' for {duration} seconds @ {fps}fps...") + else: + print(f"{prefix}Running dashboard mode '{mode_name}' for {duration} seconds (unthrottled)...") + + mode.setup() + start = time.time() + + try: + if throttle: + while time.time() - start < duration: + frame_start = time.time() + output = mode.update() + print(output, end="", flush=True) + sleep_time = frame_time - (time.time() - frame_start) + if sleep_time > 0: + time.sleep(sleep_time) + else: + # Unthrottled - no sleep calls in hot loop + while time.time() - start < duration: + output = mode.update() + print(output, end="", flush=True) + except KeyboardInterrupt: + pass + finally: + mode.teardown() + print(f"{prefix}Dashboard complete: {mode.iteration} iterations") + + return mode.iteration + + +# ============================================================================= +# Terminal output stress test +# ============================================================================= + +STRESS_MODES = {"normal", "buffer", "clearcodes", "flood"} + + +def stress_test(duration, label="", modes=None, title_interval_ms=0, speed="normal"): + """Generate lots of terminal output to stress test rendering.""" + prefix = f"[{label}] " if label else "" + modes = modes or ["normal"] + + # Flood mode: run 'yes' directly for maximum throughput + if modes == ["flood"]: + print(f"{prefix}Running flood mode for {duration} seconds (using 'yes')...") + try: + proc = subprocess.Popen(["yes"], stdout=sys.stdout, stderr=subprocess.DEVNULL) + time.sleep(duration) + proc.terminate() + proc.wait() + except KeyboardInterrupt: + proc.terminate() + proc.wait() + print(f"{prefix}Flood mode complete") + return 0, None + + title_info = f", titles every {title_interval_ms}ms" if title_interval_ms > 0 else "" + speed_info = ", slow mode (100ms/iter)" if speed == "slow" else "" + print(f"{prefix}Running stress test for {duration} seconds (modes: {','.join(modes)}{title_info}{speed_info})...") + start = time.time() + iteration = 0 + + # Output patterns - tuple of (lambda, is_clear_code) + all_patterns = [ + (lambda i: "x" * 200, False), + (lambda i: f"\033[{31 + (i % 7)}m\033[{40 + (i % 7)}mColored text iteration {i}\033[0m", False), + (lambda i: "漢字テスト中文한글" * 10, False), + (lambda i: f"Line {i}: " + "日本語ABC中文DEF한글GHI" * 5, False), + (lambda i: f"LTR start مرحبا بالعالم שלום עולם end LTR {i}", False), + (lambda i: f"Price: ₪{i} or ${i}.99 - מחיר: {i} شيكل", False), + (lambda i: "👨‍👩‍👧‍👦🏳️‍🌈👍🏽🇺🇸🎉✨🔥💯" * 5, False), + (lambda i: "e\u0301a\u0300o\u0302u\u0308n\u0303" * 20, False), + (lambda i: f"\033[{1 + (i % 20)}C\033[{1 + (i % 5)}A\033[{1 + (i % 5)}B" + ">" * 30, False), + (lambda i: f"col1\tcol2\tcol3\tcol4\t{i}", False), + (lambda i: "".join(f"\033[{31 + (j % 7)}m{chr(65 + (j % 26))}" for j in range(80)) + "\033[0m", False), + (lambda i: f"\033[5mBLINK {i}\033[0m", False), + (lambda i: f"\033[7;4;3mSTYLE {i}\033[0m", False), + (lambda i: f"\033[sSaved{i}\033[uRestored{i}", False), + (lambda i: f"\033[2LInserted{i}\n\033[2MDeleted{i}", True), + (lambda i: f"\033[5;20rRegion{i}\n\033[r", True), + (lambda i: f"Erase{i}\033[2K\033[2J", True), + (lambda i: f"\033[?1049hAlt{i}\n\033[?1049l", True), + (lambda i: f"\033]8;;https://example.com/{i}\033\\link{i}\033]8;;\033\\", False), + (lambda i: "┌─┬─┐│├─┼─┤│└─┴─┘" * 8, False), + (lambda i: "a\u200db\u200cc\uFEFFd" * 30, False), + (lambda i: ("ABC日本語🎉مرحبا" * 20)[:200], False), + ] + + normal_patterns = [p[0] for p in all_patterns if not p[1]] + clearcodes_patterns = [p[0] for p in all_patterns] + + buffer_line = ("BUFFER:" + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" * 10)[:600] + bidi_segment = "Text123مرحبا456שלום789עולם012العالم345" + bidi_line = (bidi_segment * 15)[:600] + buffer_patterns = [ + lambda i, line=buffer_line: f"{line}{i % 10000:04d}", + lambda i: "日本語中文한글" * 60, + lambda i, line=buffer_line: f"\033[32m{line}\033[0m", + lambda i, line=bidi_line: f"{line}{i % 1000:03d}", + ] + + pattern_sets = { + "normal": normal_patterns, + "clearcodes": clearcodes_patterns, + "buffer": buffer_patterns, + } + + # Validate modes + for mode in modes: + if mode not in pattern_sets: + print(f"{prefix}Warning: unknown mode '{mode}', using 'normal'") + modes = ["normal"] + break + + current_mode_idx = 0 + current_patterns = pattern_sets[modes[current_mode_idx]] + + if len(modes) > 1: + time_slice = duration / len(modes) + mode_iterations = {mode: 0 for mode in modes} + else: + time_slice = duration + + # Title injection via timer thread + title_count = [0] + title_timer = [None] + title_interval_sec = title_interval_ms / 1000.0 if title_interval_ms > 0 else 0 + + def inject_title(): + title_count[0] += 1 + elapsed = time.time() - start + print(f"\033]0;{prefix}Title {title_count[0]} @ {elapsed:.1f}s\007", flush=True) + remaining = duration - (time.time() - start) + if remaining >= 4.0: + title_timer[0] = threading.Timer(title_interval_sec, inject_title) + title_timer[0].daemon = True + title_timer[0].start() + + if title_interval_ms > 0 and duration > 4: + title_timer[0] = threading.Timer(title_interval_sec, inject_title) + title_timer[0].daemon = True + title_timer[0].start() + + while time.time() - start < duration: + if len(modes) > 1: + elapsed = time.time() - start + expected_mode_idx = min(int(elapsed / time_slice), len(modes) - 1) + if expected_mode_idx != current_mode_idx: + mode_iterations[modes[current_mode_idx]] = iteration - sum( + v for k, v in mode_iterations.items() if k != modes[current_mode_idx] + ) + current_mode_idx = expected_mode_idx + current_patterns = pattern_sets[modes[current_mode_idx]] + print(f"{prefix}Switching to mode: {modes[current_mode_idx]} (at {elapsed:.1f}s)") + + pattern = current_patterns[iteration % len(current_patterns)] + try: + print(pattern(iteration)) + except UnicodeEncodeError: + print(f"[encoding error on iteration {iteration}]") + iteration += 1 + + if speed == "slow": + time.sleep(0.1) + elif iteration % 100 == 0: + time.sleep(0.001) + + if title_timer[0]: + title_timer[0].cancel() + + if len(modes) > 1: + mode_iterations[modes[current_mode_idx]] = iteration - sum( + v for k, v in mode_iterations.items() if k != modes[current_mode_idx] + ) + + title_info = f", {title_count[0]} title updates" if title_interval_ms > 0 else "" + print(f"{prefix}Stress test complete: {iteration} iterations{title_info}") + return iteration, mode_iterations if len(modes) > 1 else None + + +# ============================================================================= +# Sync protocol +# ============================================================================= + +def wait_for_sync(sync_dir, label): + """Signal ready and wait for go signal.""" + sync_path = Path(sync_dir) + ready_file = sync_path / f"ready_{label}" + go_file = sync_path / "go" + + print(f"[{label}] Signaling ready...") + ready_file.touch() + + print(f"[{label}] Waiting for go signal...") + while not go_file.exists(): + time.sleep(0.05) + + print(f"[{label}] Go signal received!") + + +def write_stats(sync_dir, label, iterations, duration, mode_iterations=None): + """Write stats to sync_dir for aggregation.""" + if not sync_dir: + return + sync_path = Path(sync_dir) + stats_file = sync_path / f"stats_{label}" + lines = [f"{iterations}", f"{duration}"] + if mode_iterations: + for mode, iters in mode_iterations.items(): + lines.append(f"{mode}:{iters}") + stats_file.write_text("\n".join(lines) + "\n") + + +# ============================================================================= +# All-modes runner +# ============================================================================= + +ALL_MODES = ["normal", "buffer", "clearcodes", "htop", "watch", "progress", "table", "status"] + + +def run_mode_list(modes, duration, label="", title_interval_ms=0, speed="normal", fps=30): + """Run a list of modes in sequence, time-sliced within a single test run.""" + prefix = f"[{label}] " if label else "" + num_modes = len(modes) + time_per_mode = duration / num_modes + + print(f"{prefix}Running {num_modes} modes ({time_per_mode:.1f}s each): {','.join(modes)}") + total_iterations = 0 + mode_iterations = {} + start = time.time() + + for i, mode_name in enumerate(modes): + mode_start = time.time() + elapsed = mode_start - start + remaining = duration - elapsed + mode_duration = min(time_per_mode, remaining) + + if mode_duration <= 0: + break + + print(f"{prefix}[{elapsed:.1f}s] Switching to mode: {mode_name} (for {mode_duration:.1f}s)") + + if mode_name in DASHBOARD_MODES: + iters = run_dashboard(mode_duration, label, mode_name, fps) + mode_iterations[mode_name] = iters + total_iterations += iters + else: + iters, _ = stress_test(mode_duration, label, [mode_name], title_interval_ms, speed) + mode_iterations[mode_name] = iters + total_iterations += iters + + elapsed = time.time() - start + print(f"{prefix}Mode sequence complete: {total_iterations} total iterations in {elapsed:.1f}s") + return total_iterations, mode_iterations + + +def run_all_modes(duration, label="", title_interval_ms=0, speed="normal", fps=30): + """Run all modes in sequence, time-sliced within a single test run.""" + return run_mode_list(ALL_MODES, duration, label, title_interval_ms, speed, fps) + + +# ============================================================================= +# Main +# ============================================================================= + +def main(): + duration = 10 + label = "" + sync_dir = None + mode = None + title_interval_ms = 0 + speed = "normal" + fps = 30.0 + + args = sys.argv[1:] + i = 0 + while i < len(args): + if args[i] == "--sync-dir" and i + 1 < len(args): + sync_dir = args[i + 1] + i += 2 + elif args[i].startswith("--mode="): + mode = args[i].split("=", 1)[1] + i += 1 + elif args[i] == "--title": + title_interval_ms = 2000 + i += 1 + elif args[i].startswith("--title="): + title_interval_ms = int(args[i].split("=", 1)[1]) + i += 1 + elif args[i].startswith("--speed="): + speed = args[i].split("=", 1)[1] + if speed not in ("normal", "slow"): + print(f"Warning: invalid --speed value '{speed}', using 'normal'") + speed = "normal" + i += 1 + elif args[i].startswith("--fps="): + fps = float(args[i].split("=", 1)[1]) + if fps < 0: + print(f"Warning: --fps={fps} must be non-negative, using 30") + fps = 30 + i += 1 + elif duration == 10 and args[i].isdigit(): + duration = int(args[i]) + i += 1 + elif not label: + label = args[i] + i += 1 + else: + i += 1 + + if sync_dir: + wait_for_sync(sync_dir, label or "unknown") + + # Handle 'all' mode - cycles through all modes in a single run + if mode == "all": + iterations, mode_iterations = run_all_modes(duration, label, title_interval_ms, speed, fps) + elif mode and "," in mode: + # Comma-separated modes - run each in sequence + modes = mode.split(",") + iterations, mode_iterations = run_mode_list(modes, duration, label, title_interval_ms, speed, fps) + elif mode in DASHBOARD_MODES: + iterations = run_dashboard(duration, label, mode, fps) + mode_iterations = None + else: + # Single stress mode or default + modes = [mode] if mode else None + iterations, mode_iterations = stress_test(duration, label, modes, title_interval_ms, speed) + + write_stats(sync_dir, label or "unknown", iterations, duration, mode_iterations) + + +if __name__ == "__main__": + main() diff --git a/tools/perf/suites/com.iterm2.defaults.plist b/tools/perf/suites/com.iterm2.defaults.plist new file mode 100644 index 0000000000..ccdabb42a0 --- /dev/null +++ b/tools/perf/suites/com.iterm2.defaults.plist @@ -0,0 +1,7 @@ + + + + + + + diff --git a/tools/perf/suites/com.iterm2.fairness.plist b/tools/perf/suites/com.iterm2.fairness.plist new file mode 100644 index 0000000000..0e12fd61e4 --- /dev/null +++ b/tools/perf/suites/com.iterm2.fairness.plist @@ -0,0 +1,9 @@ + + + + + + useFairnessScheduler + + +