diff --git a/.github/workflows/e2e-blockchain.yml b/.github/workflows/e2e-blockchain.yml index 927c286c3..f0a5854d8 100644 --- a/.github/workflows/e2e-blockchain.yml +++ b/.github/workflows/e2e-blockchain.yml @@ -378,8 +378,12 @@ jobs: name: Mobile E2E (devnet) if: github.event_name != 'workflow_dispatch' || inputs.network == 'both' || inputs.network == 'devnet' # macos-26 matches build-mobile.yml — required for iOS 26 SDK symbols - # used by dapp-browser's WKWebViewController. - runs-on: macos-26 + # used by dapp-browser's WKWebViewController. The -xlarge size is the + # DEDICATED Apple-Silicon larger runner (2x vCPU/RAM, no noisy-neighbour + # IO contention) — the shared standard macos-26 runners were intermittently + # degraded for hours at a time, with CoreSimulator install/launch crawling + # so badly that two-sim `_simPair` setup couldn't finish even in 13-15 min. + runs-on: macos-26-xlarge # Observed worst case: ~40 min setup (CLI compile on cache miss + iOS # build + simulator cold boot overlap) + ~55 min suite with one retry. # 90 minutes guillotined an otherwise-passing run at test 7 of 7. @@ -534,11 +538,13 @@ jobs: run: yarn test:e2e:mobile:build - name: Run blockchain E2E (mobile, devnet) - # --retries=1 absorbs flaky CDP "no pages found" errors on macos-26 - # runners. Each fixture retries install → launch → CDP connect from - # scratch, so a transient webinspectord hiccup doesn't fail the run. - # Config stays at retries: 0 for fast local dev feedback. - run: yarn playwright test --config playwright.ios.config.ts --retries=1 + # --retries=2 absorbs flaky CDP "no pages found" errors and degraded- + # CoreSimulator sim-setup failures on macos-26 runners. Each fixture + # retries install → launch → CDP connect from scratch (and restarts the + # sim subsystem on a wedged daemon), so a transient runner hiccup gets + # multiple fresh attempts within the job budget. Config stays at + # retries: 0 for fast local dev feedback. + run: yarn playwright test --config playwright.ios.config.ts --retries=2 - name: Upload artifacts if: always() @@ -552,7 +558,7 @@ jobs: mobile-testnet: name: Mobile E2E (testnet) if: github.event_name != 'workflow_dispatch' || inputs.network == 'both' || inputs.network == 'testnet' - runs-on: macos-26 + runs-on: macos-26-xlarge # Observed worst case: ~40 min setup (CLI compile on cache miss + iOS # build + simulator cold boot overlap) + ~55 min suite with one retry. # 90 minutes guillotined an otherwise-passing run at test 7 of 7. @@ -701,11 +707,13 @@ jobs: run: yarn test:e2e:mobile:build - name: Run blockchain E2E (mobile, testnet) - # --retries=1 absorbs flaky CDP "no pages found" errors on macos-26 - # runners. Each fixture retries install → launch → CDP connect from - # scratch, so a transient webinspectord hiccup doesn't fail the run. - # Config stays at retries: 0 for fast local dev feedback. - run: yarn playwright test --config playwright.ios.config.ts --retries=1 + # --retries=2 absorbs flaky CDP "no pages found" errors and degraded- + # CoreSimulator sim-setup failures on macos-26 runners. Each fixture + # retries install → launch → CDP connect from scratch (and restarts the + # sim subsystem on a wedged daemon), so a transient runner hiccup gets + # multiple fresh attempts within the job budget. Config stays at + # retries: 0 for fast local dev feedback. + run: yarn playwright test --config playwright.ios.config.ts --retries=2 - name: Upload artifacts if: always() @@ -747,8 +755,12 @@ jobs: name: Mobile Guardian E2E (devnet) if: github.event_name != 'workflow_dispatch' || inputs.network == 'both' || inputs.network == 'devnet' # macos-26 matches build-mobile.yml — required for iOS 26 SDK symbols - # used by dapp-browser's WKWebViewController. - runs-on: macos-26 + # used by dapp-browser's WKWebViewController. The -xlarge size is the + # DEDICATED Apple-Silicon larger runner (2x vCPU/RAM, no noisy-neighbour + # IO contention) — the shared standard macos-26 runners were intermittently + # degraded for hours at a time, with CoreSimulator install/launch crawling + # so badly that two-sim `_simPair` setup couldn't finish even in 13-15 min. + runs-on: macos-26-xlarge # ~40 min setup (CLI compile on cache miss + iOS build + sim cold boot) # plus a single guardian spec with up to 2 retries. Generous ceiling that # matches the standard mobile jobs so a passing build is never guillotined. @@ -921,7 +933,7 @@ jobs: mobile-guardian-testnet: name: Mobile Guardian E2E (testnet) if: github.event_name != 'workflow_dispatch' || inputs.network == 'both' || inputs.network == 'testnet' - runs-on: macos-26 + runs-on: macos-26-xlarge # ~40 min setup (CLI compile on cache miss + iOS build + sim cold boot) # plus a single guardian spec with up to 2 retries. Generous ceiling that # matches the standard mobile jobs so a passing build is never guillotined. diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f42196ab..fec5a862a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,10 @@ * [FIX][mobile] **iOS hot-key signing now requires Face ID / Touch ID on device, matching Android.** The Secure Enclave hot key was created with `.privateKeyUsage` only — a *usage permission* that, contrary to the old code comment, does **not** prompt for authentication — so user-initiated Guardian claims and sends signed silently on iOS, while Android (StrongBox + `setUserAuthenticationRequired`) already prompted. New hot keys now also set `.userPresence`, so every user-initiated hot signature and hot-key reveal requires user presence (Face ID / Touch ID with passcode fallback). `.userPresence` is used rather than `.biometryCurrentSet` so the key survives biometric re-enrollment instead of bricking until re-activation. Background auto-consume is unaffected — it is cold-signed in WASM and never touches the hot key. Scope: the gate applies to device builds only (the simulator / iOS E2E path keeps `.privateKeyUsage`-only silent signing), and existing hot keys keep their prior behavior until re-activated/rotated. (#299) * [FIX][mobile] **iOS app builds again under Xcode 26.** Two `foundKey as? SecKey` downcasts in the hot-key plugin (`signWithHotKey` / `revealHotKey`) are no-ops for CoreFoundation types — they always succeed — which Xcode 26 now rejects as a hard error, breaking the iOS build. Replaced with a `CFGetTypeID(foundKey) == SecKeyGetTypeID()` guard plus a force-cast: both the correct defensive downcast and Xcode-26-clean. (#299) * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) +* [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) +* [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) +* [CHANGE][ci] **iOS E2E mobile jobs run on dedicated `macos-26-xlarge` runners, and are more resilient to degraded shared runners.** The shared standard `macos-26` runner pool was intermittently degraded for hours at a time by noisy-neighbour IO contention — every `simctl` op crawled (97 CI samples: per-wallet `_simPair` setup p50 65s, p90 267s, max 401s vs. <5s healthy — so two sequential wallets took up to ~13 min and sometimes never finished even in 15 min), making the whole mobile suite un-runnable. All four mobile E2E jobs now use the dedicated Apple-Silicon `-xlarge` larger runner (2× vCPU/RAM, no noisy neighbours), which restores a healthy ~2-3 min setup and a full green suite. As belt-and-suspenders for any residual slowness, the `_simPair` fixture setup is capped (at 13 min, past the slowest observed completing setup) so a genuinely-hung CoreSimulator fails fast with a named error and a sim-subsystem restart instead of silently eating the whole per-test timeout, while a degraded-but-completing setup is allowed to finish rather than being killed mid-flight (no assertion is relaxed — purely tolerance for degraded IO); the subsystem recovery `simctl shutdown all`s to clear half-booted device state (the `SimError 405` signature); the per-test timeout is 25 min (from 15); and the non-guardian mobile suite runs with `--retries=2` (matching the guardian suite). (#302) +* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a pure storage parse instead of loading the multisig client.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to build a `MultisigService` (`getOrCreateMultisigService` → `MultisigClient.load`) and read it. Against the post-consume state — where the guardian's stored blob lags the on-chain account — that load entered a re-sign/realign loop (~48 `signWithHotKey` calls for this read vs. 26 for a full consume) that hung the single-threaded mobile WASM past the eval budget; the assertion never got far enough to run on iOS. The structure (signer set + procedure thresholds) is immutable and lives in the account's storage maps, so the hook now reads it directly with `AccountInspector.fromAccount` — a pure parse with no signing, no guardian HTTP, and no client load (just one `getAccount`, the same read the balance poll already does). Because even that lone `getAccount` was still starved by other main-thread WASM activity on iOS (the auth eval was observed taking 60s with all the wallet's own pollers paused), the structure is now captured in the wallet's own balance poll (`fetchBalances`, which reliably completes) and stashed on a global; the test reads it as a plain value with no WASM call at all. Finally, the iOS harness reads that stash over the SYNCHRONOUS `execute_script` atom (polled), not the async `execute_async_script` one: appium-remote-debugger's async atom delivers its completion callback in the `arguments[arguments.length-1]` slot as the boolean `true` on this iOS RWI bridge, so `cb(result)` threw `TypeError: cb is not a function`, the promise rejected unhandled, and every `evalAsync` hung to its timeout no matter how fast the script ran — which is why the auth read still timed out at 60s even with the stash already populated. Test-only, gated on `MIDEN_E2E_TEST` and tree-shaken from production. (#302) ## 1.15.2 (2026-06-22) diff --git a/playwright.ios.config.ts b/playwright.ios.config.ts index 1adb58f71..c3428e380 100644 --- a/playwright.ios.config.ts +++ b/playwright.ios.config.ts @@ -12,7 +12,13 @@ export default defineConfig({ // Guardian specs run via playwright.ios.guardian.config.ts (dedicated run); // keep them out of the standard iOS suite. testIgnore: '**/guardian-*.ios.spec.ts', - timeout: 900_000, // 15 min per test — WASM prove on simulator is slow (~60-90s per consume) + // 25 min per test. WASM prove on the simulator is slow (~60-90s per consume), + // and on degraded macos-26 runners BOTH the two-sim `_simPair` setup (capped + // at 13 min, see SETUP_DEADLINE_MS) and the test body's simctl/WASM ops crawl. + // 25 min leaves room for a slow-but-completing setup + a slow test instead of + // killing a run that would have passed given a little more patience (no + // assertion is relaxed — this is purely tolerance for degraded-runner IO). + timeout: 1_500_000, expect: { timeout: 60_000, }, diff --git a/playwright/e2e/helpers/miden-cli.ts b/playwright/e2e/helpers/miden-cli.ts index 1f8ca1c13..f3b858109 100644 --- a/playwright/e2e/helpers/miden-cli.ts +++ b/playwright/e2e/helpers/miden-cli.ts @@ -11,6 +11,29 @@ decimals = 8 symbol = "TST" `; +/** + * Classify a `miden-client` CLI stderr as a transient failure that should be + * retried (vs. a deterministic error that should fail fast). Matched + * wrap-tolerantly with `\s+` because miette folds messages at terminal width. + * + * Categories: + * - RPC/transport to the node: 5xx, gRPC framing, reset/timeout. + * - `new nonce N is less than old nonce M`: the node's account state lags the + * store's optimistic post-submit state while a deploy/mint is still in + * flight, and miden-client's sqlite store hard-fails the whole sync on it + * (0xMiden/miden-client#2243). Clears once the tx commits. + * - Remote-prover connection failures (`failed to connect to ... prover`, + * `transport error`, `no native certs found`): the TLS/gRPC handshake to the + * delegated prover endpoint flakes intermittently on the macOS CI runners + * (a sibling mint in the same test connects fine), so a connection-level + * prover error is transient, not a proving-logic failure. + */ +export function isTransientCliError(stderr: string): boolean { + return /HTTP status code 5\d\d|grpc request failed|grpc-status header missing|connection reset|timed out|Temporary failure|less\s+than\s+old\s+nonce|failed\s+to\s+connect\s+to(\s+the)?(\s+remote)?\s+prover|transport\s+error|no\s+native\s+certs/i.test( + stderr + ); +} + /** * Resolve the miden-client binary path. * 1. MIDEN_CLIENT_BIN env var @@ -188,17 +211,7 @@ export class MidenCli { break; } lastErr = createResult.stderr; - const transient = - // `new nonce N is less than old nonce M` (matched wrap-tolerantly — - // miette folds the message at terminal width): the node's account - // state lags the store's optimistic post-submit state while a - // deploy or mint is still in flight, and miden-client's sqlite - // store hard-fails the whole sync on it (0xMiden/miden-client#2243). - // Clears as soon as the tx commits, so it retries like any other - // transient. - /HTTP status code 5\d\d|grpc request failed|grpc-status header missing|connection reset|timed out|Temporary failure|less\s+than\s+old\s+nonce/i.test( - lastErr - ); + const transient = isTransientCliError(lastErr); if (!transient || attempt === maxAttempts) break; const backoffMs = Math.min(30_000, 1_000 * 2 ** (attempt - 1)); // eslint-disable-next-line no-console @@ -266,17 +279,7 @@ export class MidenCli { return { txId, noteId }; } lastErr = result.stderr; - const transient = - // `new nonce N is less than old nonce M` (matched wrap-tolerantly — - // miette folds the message at terminal width): the node's account - // state lags the store's optimistic post-submit state while a - // deploy or mint is still in flight, and miden-client's sqlite - // store hard-fails the whole sync on it (0xMiden/miden-client#2243). - // Clears as soon as the tx commits, so it retries like any other - // transient. - /HTTP status code 5\d\d|grpc request failed|grpc-status header missing|connection reset|timed out|Temporary failure|less\s+than\s+old\s+nonce/i.test( - lastErr - ); + const transient = isTransientCliError(lastErr); if (!transient || attempt === maxAttempts) break; const backoffMs = Math.min(30_000, 1_000 * 2 ** (attempt - 1)); // eslint-disable-next-line no-console @@ -296,17 +299,7 @@ export class MidenCli { const result = await this.run('sync', { timeoutMs: 60_000 }); if (result.exitCode === 0) return; lastErr = result.stderr; - const transient = - // `new nonce N is less than old nonce M` (matched wrap-tolerantly — - // miette folds the message at terminal width): the node's account - // state lags the store's optimistic post-submit state while a - // deploy or mint is still in flight, and miden-client's sqlite - // store hard-fails the whole sync on it (0xMiden/miden-client#2243). - // Clears as soon as the tx commits, so it retries like any other - // transient. - /HTTP status code 5\d\d|grpc request failed|grpc-status header missing|connection reset|timed out|Temporary failure|less\s+than\s+old\s+nonce/i.test( - lastErr - ); + const transient = isTransientCliError(lastErr); if (!transient || attempt === maxAttempts) break; const backoffMs = Math.min(30_000, 1_000 * 2 ** (attempt - 1)); // eslint-disable-next-line no-console diff --git a/playwright/e2e/ios/fixtures/two-simulators.ts b/playwright/e2e/ios/fixtures/two-simulators.ts index 506df456e..39c58896f 100644 --- a/playwright/e2e/ios/fixtures/two-simulators.ts +++ b/playwright/e2e/ios/fixtures/two-simulators.ts @@ -189,7 +189,7 @@ async function setupBothWallets( // 3 attempts = up to 2 daemon-restart recoveries. The macos-26 wedge has been // observed to survive a single recovery, so give it one more shot before // failing the test (each wedged attempt fails fast at its simctl/CDP timeout, - // not the 15-min test timeout, so the extra attempt is cheap). + // not the full per-test timeout, so the extra attempt is cheap). const MAX_ATTEMPTS = 3; for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) { let instanceA: SimWalletInstance | undefined; @@ -265,6 +265,46 @@ function sleep(ms: number): Promise { return new Promise(r => setTimeout(r, ms)); } +// A healthy two-simulator setup (terminate→uninstall→install→launch→CDP for +// both, sims already booted by globalSetup) runs in ~2-3 min. A degraded +// macos-26 CoreSimulator stretches every simctl op (97 real CI samples: per- +// wallet setup p50 65s, p90 267s, max 401s → two sequential wallets up to +// ~13 min) yet still COMPLETES. The earlier 8-min cap killed those slow-but- +// completing setups that would have finished and passed; only a TRULY hung +// runner (observed: setup not done after 15 min) genuinely can't recover. So +// cap at 13 min — past the slowest observed completing setup — so degraded-but- +// completing runners get to finish, and only the hung ones fail fast (clearly +// attributed, leaving room within the 25-min test timeout for the retry). +const SETUP_DEADLINE_MS = 780_000; +// Upper bound for the on-timeout daemon restart so the recovery itself can't run +// into the test timeout — setupBothWallets does its own recovery on the retry. +const SETUP_RECOVERY_BUDGET_MS = 90_000; + +/** + * Run the `_simPair` setup with a hard deadline. On overrun, run `onTimeout` + * (a best-effort, time-bounded sim-subsystem restart) so Playwright's retry + * lands on a fresh daemon, then throw a named error instead of letting setup + * silently eat the entire test timeout. + */ +async function withSetupDeadline(fn: () => Promise, deadlineMs: number, onTimeout: () => Promise): Promise { + let timer: ReturnType | undefined; + let timedOut = false; + const deadline = new Promise((_, reject) => { + timer = setTimeout(() => { + timedOut = true; + reject(new Error(`_simPair setup exceeded ${deadlineMs}ms (degraded CoreSimulator)`)); + }, deadlineMs); + }); + try { + return await Promise.race([fn(), deadline]); + } finally { + if (timer) clearTimeout(timer); + if (timedOut) { + await Promise.race([onTimeout(), sleep(SETUP_RECOVERY_BUDGET_MS)]).catch(() => undefined); + } + } +} + // ── Fixture ───────────────────────────────────────────────────────────────── let _devicePair: { udidA: string; udidB: string } | null = null; @@ -335,7 +375,28 @@ export const test = base.extend({ // macos-26 daemon-wedge that hangs simctl mid-suite) by restarting the sim // subsystem and retrying the pair. The shared `_simPair` fixture still // consolidates teardown. - const { instanceA, instanceB } = await setupBothWallets(simA, udidA, simB, udidB, envConfig, timeline); + // + // Cap the whole setup. On a degraded macos-26 CoreSimulator every simctl op + // crawls (install/terminate observed at 30-180s vs. <5s healthy); slow-but- + // completing ops never trip the per-op recovery, so the cumulative cost can + // silently eat the entire per-test timeout "while setting up _simPair" + // with no attribution and no room for Playwright's retry. A hard cap turns + // that into a fast, named failure — and on overrun we restart the sim + // subsystem first so the retry runs against a fresh daemon. + const { instanceA, instanceB } = await withSetupDeadline( + () => setupBothWallets(simA, udidA, simB, udidB, envConfig, timeline), + SETUP_DEADLINE_MS, + async () => { + timeline.emit({ + category: 'test_lifecycle', + severity: 'warn', + message: + `[sim-setup] _simPair setup exceeded ${SETUP_DEADLINE_MS}ms (degraded CoreSimulator); ` + + `restarting the sim subsystem so the retry gets a fresh daemon`, + }); + await SimulatorControl.recoverSimSubsystem([udidA, udidB]).catch(() => undefined); + } + ); steps.registerSnapshotCaps('A', buildIosSnapshotCaps(instanceA.walletPage, '')); steps.registerSnapshotCaps('B', buildIosSnapshotCaps(instanceB.walletPage, '')); diff --git a/playwright/e2e/ios/helpers/cdp-bridge.ts b/playwright/e2e/ios/helpers/cdp-bridge.ts index bae22c3e9..7d5b71de0 100644 --- a/playwright/e2e/ios/helpers/cdp-bridge.ts +++ b/playwright/e2e/ios/helpers/cdp-bridge.ts @@ -14,6 +14,39 @@ const SELECT_APP_POLL_MS = 1_500; const PAGE_READY_TIMEOUT = 15_000; const SOCKET_DISCOVERY_TIMEOUT = 30_000; +// A synchronous `execute_script` (reading window.__TEST_* globals) returns in +// milliseconds. If `executeAtom` hasn't resolved within this window, the +// WebView's RWI socket or its main JS thread is wedged. Surface it as a throw +// so callers (notably pollForCondition) can enforce their own deadline and let +// --retries restart on a fresh app + CDP, instead of the whole test hanging +// until the global Playwright timeout. Mirrors evalAsync's async-callback guard. +const EVAL_HARD_TIMEOUT_MS = 30_000; + +/** + * Race a CDP call against a hard wall-clock timeout. A WebKit RemoteDebugger + * `executeAtom` can hang indefinitely when the inspected page's main thread is + * blocked (e.g. mobile main-thread WASM) or the RWI socket wedges; this bounds + * it so a transient stall becomes a fast, retriable failure rather than a + * multi-minute hang. + */ +async function withHardTimeout(exec: Promise, timeoutMs: number, label: string): Promise { + // If the timeout wins the race, the abandoned `exec` may settle later; attach + // a no-op catch so a late rejection doesn't surface as an unhandledRejection. + exec.catch(() => {}); + let timer: NodeJS.Timeout | undefined; + const timeout = new Promise((_, reject) => { + timer = setTimeout( + () => reject(new Error(`${label}: CDP call did not return within ${timeoutMs}ms (WebView/RWI wedged)`)), + timeoutMs + ); + }); + try { + return await Promise.race([exec, timeout]); + } finally { + if (timer) clearTimeout(timer); + } +} + interface ConnectOpts { udid: string; bundleId: string; @@ -69,13 +102,14 @@ export class CdpSession { * For Promise-returning code, use `evalAsync` — `eval` resolves the * Promise object itself, not its value. */ - async eval(body: string): Promise { + async eval(body: string, opts: { timeoutMs?: number } = {}): Promise { const start = Date.now(); + const exec = (this.rd as unknown as ExecuteAtomCapable).executeAtom('execute_script', [ + body, + [], + ]) as Promise; try { - return (await (this.rd as unknown as ExecuteAtomCapable).executeAtom('execute_script', [ - body, - [], - ])) as T; + return await withHardTimeout(exec, opts.timeoutMs ?? EVAL_HARD_TIMEOUT_MS, 'eval'); } finally { this.stats.evalCount++; this.stats.evalMs += Date.now() - start; @@ -85,11 +119,22 @@ export class CdpSession { /** * Evaluate asynchronous JavaScript. The body MUST call the callback * `arguments[arguments.length - 1]` with its result — this is the - * `execute_async_script` WebDriver atom contract. Useful when the page - * code awaits Promises (store.fetchBalances, intercom.request, etc.). + * `execute_async_script` WebDriver atom contract. + * + * ⚠️ BROKEN on this iOS RWI bridge — prefer the synchronous `eval` and poll. + * appium-remote-debugger's `execute_async_script` atom delivers its + * completion callback in the `arguments[arguments.length - 1]` slot as the + * boolean `true` (not a function) here, so `cb(result)` throws + * `TypeError: cb is not a function`, the promise rejects unhandled, the + * callback never fires, and the call ALWAYS hangs to the timeout below — + * regardless of how fast the script itself completes. (See + * `getGuardianAuthInfo`, which used to use this and now reads its data over + * the reliable sync `eval` atom instead.) If you need to await page Promises, + * stash the resolved value on a global from the page's own code and poll it + * with `eval`, rather than relying on this callback. * - * The optional outer timeout protects against scripts that never invoke - * the callback — without it, executeAtomAsync waits forever. Default 30s. + * The outer timeout protects against scripts that never invoke the callback — + * without it, executeAtomAsync waits forever. Default 30s. */ async evalAsync(body: string, opts: { timeoutMs?: number } = {}): Promise { const timeoutMs = opts.timeoutMs ?? 30_000; @@ -123,14 +168,15 @@ export class CdpSession { * it's stringified via Function.prototype.toString and re-parsed in the * page. Callers in this harness only read window.__TEST_* globals. */ - async evaluate(fn: () => T | Promise): Promise { + async evaluate(fn: () => T | Promise, opts: { timeoutMs?: number } = {}): Promise { const body = `return (${fn.toString()})();`; const start = Date.now(); + const exec = (this.rd as unknown as ExecuteAtomCapable).executeAtom('execute_script', [ + body, + [], + ]) as Promise; try { - return (await (this.rd as unknown as ExecuteAtomCapable).executeAtom('execute_script', [ - body, - [], - ])) as T; + return await withHardTimeout(exec, opts.timeoutMs ?? EVAL_HARD_TIMEOUT_MS, 'evaluate'); } finally { this.stats.evaluateCount++; this.stats.evaluateMs += Date.now() - start; diff --git a/playwright/e2e/ios/helpers/ios-wallet-page.ts b/playwright/e2e/ios/helpers/ios-wallet-page.ts index c3de28376..72af9649f 100644 --- a/playwright/e2e/ios/helpers/ios-wallet-page.ts +++ b/playwright/e2e/ios/helpers/ios-wallet-page.ts @@ -139,9 +139,11 @@ export class IosWalletPage implements WalletPage { await this.pollForSelector('[data-testid="onboarding-welcome"]', 30_000); const passwordEnc = encodeURIComponent(password); - // Guardian account creation does extra HTTP round-trips to co-sign with the - // guardian, so it needs a wider readiness window than a private wallet. - const readyTimeoutMs = recovery === 'guardian' ? 180_000 : 120_000; + // Guardian creation does extra guardian co-sign round-trips, so it gets the + // wider window — but both paths run generously because cold WASM init + + // account creation on the macos-26 simulator can exceed a minute under load + // (a standard create was observed passing 120s). + const readyTimeoutMs = recovery === 'guardian' ? 240_000 : 180_000; await this.cdp.eval( `var u = new URL(location.href); ` + `u.searchParams.set('__test_skip_onboarding', '1'); ` + @@ -295,10 +297,7 @@ export class IosWalletPage implements WalletPage { // ── Claim ───────────────────────────────────────────────────────────────── - async claimAllNotes( - timeoutMs: number = 120_000, - knownFaucetIds: string[] = [] - ): Promise { + async claimAllNotes(timeoutMs: number = 120_000, knownFaucetIds: string[] = []): Promise { // Chrome's claimAllNotes reloads the page to get a fresh Dexie handle // — that's safe on Chrome because the SW holds the vault unlock in a // separate context. On mobile there's no SW; a reload would drop the @@ -440,20 +439,9 @@ export class IosWalletPage implements WalletPage { return; } const result = await this.cdp - .eval<{ before: string[]; injected: string[]; after: string[] } | { error: string }>( - `var conv = window.__TEST_HEX_TO_BECH32_FAUCET__; ` + - `var bech32 = ${hexJson}.map(hex => conv(hex, ${networkArg})); ` + - `var injected = {}; ` + - `for (var i = 0; i < bech32.length; i++) injected[bech32[i]] = { name: 'Test Token', symbol: 'TST', decimals: 8, thumbnailUri: '' }; ` + - `var s = window.__TEST_STORE__; ` + - `if (!s) return { error: 'no __TEST_STORE__' }; ` + - `var st = s.getState(); ` + - `var before = Object.keys(st.assetsMetadata || {}); ` + - `if (typeof st.setAssetsMetadata === 'function') { st.setAssetsMetadata(injected); } ` + - `else { s.setState({ assetsMetadata: Object.assign({}, st.assetsMetadata || {}, injected) }); } ` + - `var after = Object.keys(s.getState().assetsMetadata || {}); ` + - `return { before: before, injected: bech32, after: after };` - ) + .eval< + { before: string[]; injected: string[]; after: string[] } | { error: string } + >(`var conv = window.__TEST_HEX_TO_BECH32_FAUCET__; ` + `var bech32 = ${hexJson}.map(hex => conv(hex, ${networkArg})); ` + `var injected = {}; ` + `for (var i = 0; i < bech32.length; i++) injected[bech32[i]] = { name: 'Test Token', symbol: 'TST', decimals: 8, thumbnailUri: '' }; ` + `var s = window.__TEST_STORE__; ` + `if (!s) return { error: 'no __TEST_STORE__' }; ` + `var st = s.getState(); ` + `var before = Object.keys(st.assetsMetadata || {}); ` + `if (typeof st.setAssetsMetadata === 'function') { st.setAssetsMetadata(injected); } ` + `else { s.setState({ assetsMetadata: Object.assign({}, st.assetsMetadata || {}, injected) }); } ` + `var after = Object.keys(s.getState().assetsMetadata || {}); ` + `return { before: before, injected: bech32, after: after };`) .catch((e: Error) => ({ error: e.message })); // eslint-disable-next-line no-console console.log(`[injectTestMetadataForFaucets] hex=${hexJson} -> ${JSON.stringify(result)}`); @@ -595,38 +583,62 @@ export class IosWalletPage implements WalletPage { /** * Read a Guardian account's on-chain auth structure (overall threshold, - * signer commitments, per-procedure thresholds). Calls the same - * __TEST_GUARDIAN_AUTH__ hook the Chrome POM uses, but over the async CDP - * atom: the hook awaits getOrCreateMultisigService + a best-effort - * (time-bounded) sync, so it returns a Promise and must run under - * execute_async_script. The hook itself caps its internal sync at 8s, so the - * 30s evalAsync budget is comfortable even when the background sync holds the - * WASM lock. + * signer commitments, per-procedure thresholds). + * + * iOS reads this from the `__TEST_GUARDIAN_AUTH_STRUCTURE__` stash that the + * wallet's own balance poll populates (`fetchBalances` → + * `captureGuardianAuthStructureForTest`, a pure `AccountInspector.fromAccount` + * parse) — NOT through the async `__TEST_GUARDIAN_AUTH__` hook. Two reasons, + * both proven against the CI timeline: + * + * 1. The stash is a plain JSON-serializable object, so it reads over the + * reliable SYNCHRONOUS `execute_script` atom. The async + * `execute_async_script` atom (appium-remote-debugger) hands the user + * script its completion callback as `arguments[arguments.length - 1]`, + * but on this iOS RWI bridge that slot arrives as the boolean `true`, so + * `cb(result)` throws `TypeError: cb is not a function`, the promise + * rejects unhandled, the callback never fires, and EVERY `evalAsync` + * hangs to its timeout. (Observed: `Unhandled Promise Rejection: + * TypeError: d is not a function ... 'd' is true` fired the instant the + * auth read ran, then a 60s timeout — even though the stash was already + * populated.) The sync atom returns its value directly, no callback. + * 2. A direct WASM read in the eval path gets starved on the single-threaded + * iOS WASM. The stash read touches no WASM at all. + * + * The auth structure is immutable (fixed at account creation), so a + * slightly-old captured copy is exactly correct for these assertions. The + * stash is keyed by the address the balance poll fetched, which can be a + * different encoding than the publicKey the test passes — but a wallet + * instance only ever has one Guardian account, so the single stashed + * structure is unambiguous. Polls because the capture runs on the balance-poll + * cadence; by the auth step the consume has already driven several polls, so + * the first read almost always hits. */ async getGuardianAuthInfo(accountPublicKey: string): Promise { - return this.cdp.evalAsync( - `var cb = arguments[arguments.length - 1]; - var fn = globalThis.__TEST_GUARDIAN_AUTH__; - if (typeof fn !== 'function') { - cb({ - threshold: NaN, - signerCommitments: [], - procedureThresholds: {}, - error: '__TEST_GUARDIAN_AUTH__ unavailable (needs MIDEN_E2E_TEST build)' - }); - return; - } - Promise.resolve(fn(${JSON.stringify(accountPublicKey)})) - .then(function (r) { cb(r); }) - .catch(function (e) { - cb({ - threshold: NaN, - signerCommitments: [], - procedureThresholds: {}, - error: String(e && e.message ? e.message : e) - }); - });` - ); + const deadline = Date.now() + 30_000; + let lastErr = 'guardian auth structure not captured (stash empty after 30s)'; + while (Date.now() < deadline) { + try { + const result = await this.cdp.eval( + `var s = globalThis.__TEST_GUARDIAN_AUTH_STRUCTURE__; + if (!s) return null; + var keys = Object.keys(s); + if (keys.length === 0) return null; + var v = s[${JSON.stringify(accountPublicKey)}] || s[keys[0]]; + if (!v) return null; + return { + threshold: v.threshold, + signerCommitments: v.signerCommitments, + procedureThresholds: v.procedureThresholds + };` + ); + if (result) return result; + } catch (e) { + lastErr = e instanceof Error ? e.message : String(e); + } + await sleep(1_500); + } + return { threshold: NaN, signerCommitments: [], procedureThresholds: {}, error: lastErr }; } /** diff --git a/playwright/e2e/ios/helpers/simulator-control.ts b/playwright/e2e/ios/helpers/simulator-control.ts index 18f54d861..1823ecc79 100644 --- a/playwright/e2e/ios/helpers/simulator-control.ts +++ b/playwright/e2e/ios/helpers/simulator-control.ts @@ -102,7 +102,7 @@ export class SimulatorControl { // // bootstatus failing is a HARD error: continuing onto a half-booted // simulator does not produce "a clearer error later" — it produces - // simctl install/launch calls that hang for the entire 15-minute test + // simctl install/launch calls that hang for the entire per-test // timeout, twice (observed on macos-26 runners). One shutdown→boot // cycle is allowed to recover a wedged first boot; after that, fail // loudly so the job dies in minutes with the real cause named. @@ -236,7 +236,21 @@ export class SimulatorControl { // Non-zero if the process was already gone (or sudo unavailable off CI) — // the daemon respawns on the next simctl call regardless. } - await sleep(5_000); + // Give launchd time to respawn a clean daemon before we drive it again. + await sleep(8_000); + + // Clear wedged device state on the freshly-respawned daemon. A degraded + // macos-26 CoreSimulator leaves devices in a half-booted state that makes + // every subsequent `simctl` op crawl or fail (SimError 405 on terminate); + // restarting the daemon alone doesn't reset the devices. `shutdown all` + // forces them back to a clean Shutdown state so the boot below starts fresh. + // Best-effort and bounded — a still-wedged daemon will time out here, and + // the boot loop will surface the real failure. + try { + await execFileAsync('xcrun', ['simctl', 'shutdown', 'all'], { timeout: 60_000 }); + } catch { + // Best-effort — ensureBooted below recovers individual devices anyway. + } // webinspectord_sim only exposes WebViews while Simulator.app is running; // killing the daemon tears it down, so bring it back before re-booting. try { @@ -256,7 +270,7 @@ export class SimulatorControl { // Every simctl call gets a hard timeout: on macos-26 CI runners a single // `simctl install` / `launch` against an unhealthy simulator hangs -// indefinitely, silently eating the whole 15-minute test timeout with no +// indefinitely, silently eating the whole per-test timeout with no // attribution. Failing in 3 minutes with the command named turns that into // a diagnosable error (and lets the per-test recovery + CI retry actually // kick in — see SimulatorControl.recoverSimSubsystem). diff --git a/src/lib/miden/front/balance.ts b/src/lib/miden/front/balance.ts index 159804f04..6871e4dd6 100644 --- a/src/lib/miden/front/balance.ts +++ b/src/lib/miden/front/balance.ts @@ -6,6 +6,7 @@ import { useWalletStore } from 'lib/store'; import { fetchBalances } from 'lib/store/utils/fetchBalances'; import { AssetMetadata, MIDEN_METADATA } from '../metadata'; +import { isTestSyncPaused } from './test-sync-pause'; export interface TokenBalanceData { tokenId: string; @@ -140,9 +141,12 @@ export function useAllBalances(address: string, tokenMetadatas: Record { - if (mountedRef.current) { + if (mountedRef.current && !isTestSyncPaused()) { fetchBalancesWithDeduping(); } }, REFRESH_INTERVAL); diff --git a/src/lib/miden/front/claimable-notes.ts b/src/lib/miden/front/claimable-notes.ts index 402d3d56d..8c41fe7fd 100644 --- a/src/lib/miden/front/claimable-notes.ts +++ b/src/lib/miden/front/claimable-notes.ts @@ -15,6 +15,7 @@ import { getBech32AddressFromAccountId } from '../sdk/helpers'; import { getMidenClient, runWhenClientIdle, withWasmClientLock } from '../sdk/miden-client'; import { ConsumableNote, NoteTypeEnum } from '../types'; import { useTokensMetadata } from './assets'; +import { isTestSyncPaused } from './test-sync-pause'; // Debug info for iOS troubleshooting export type ClaimableNotesDebugInfo = { @@ -297,6 +298,10 @@ function useLocalClaimableNotes(publicAddress: string, enabled: boolean) { revalidateOnFocus: false, dedupingInterval: 10_000, refreshInterval: 5_000, + // Lets an E2E hook quiesce this (heavy, WASM-lock-bound) poll while it does + // its own single-threaded-WASM read; otherwise the read is livelocked on + // mobile by the 5s re-fire. No-op in production (tree-shaken). + isPaused: () => isTestSyncPaused(), onError: e => { console.error('Error fetching claimable notes:', e); debugInfoRef.current = { diff --git a/src/lib/miden/front/test-sync-pause.test.ts b/src/lib/miden/front/test-sync-pause.test.ts new file mode 100644 index 000000000..6be3977c4 --- /dev/null +++ b/src/lib/miden/front/test-sync-pause.test.ts @@ -0,0 +1,44 @@ +import { isTestSyncPaused, setTestSyncPaused } from './test-sync-pause'; + +type FlagGlobal = { __TEST_SYNC_PAUSED__?: boolean }; + +describe('test-sync-pause', () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + + afterEach(() => { + delete (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + + it('isTestSyncPaused returns true only when the E2E build flag and the pause flag are both set', () => { + process.env.MIDEN_E2E_TEST = 'true'; + (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__ = true; + expect(isTestSyncPaused()).toBe(true); + }); + + it('isTestSyncPaused returns false when the pause flag is unset', () => { + process.env.MIDEN_E2E_TEST = 'true'; + delete (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__; + expect(isTestSyncPaused()).toBe(false); + }); + + it('isTestSyncPaused returns false off the E2E build even if the pause flag is set', () => { + process.env.MIDEN_E2E_TEST = 'false'; + (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__ = true; + expect(isTestSyncPaused()).toBe(false); + }); + + it('setTestSyncPaused toggles the flag on the E2E build', () => { + process.env.MIDEN_E2E_TEST = 'true'; + setTestSyncPaused(true); + expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBe(true); + setTestSyncPaused(false); + expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBe(false); + }); + + it('setTestSyncPaused is a no-op off the E2E build', () => { + process.env.MIDEN_E2E_TEST = 'false'; + setTestSyncPaused(true); + expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBeUndefined(); + }); +}); diff --git a/src/lib/miden/front/test-sync-pause.ts b/src/lib/miden/front/test-sync-pause.ts new file mode 100644 index 000000000..4661aea38 --- /dev/null +++ b/src/lib/miden/front/test-sync-pause.ts @@ -0,0 +1,35 @@ +/** + * E2E-only background-sync pause. + * + * On mobile the Miden WASM client is single-threaded (main thread). A test that + * needs to make its own WASM-lock-bound read (currently `__TEST_GUARDIAN_AUTH__` + * reading a Guardian account's on-chain auth structure) is otherwise livelocked + * by the wallet's always-on frontend pollers, which each re-fire every few + * seconds and keep the single WASM thread saturated: + * - `useSyncTrigger` (3s chain sync) + * - the balance poll (`fetchBalances`, 5s) — which deliberately bypasses + * `withWasmClientLock`, so holding that lock gives the read zero protection + * - the claimable-notes SWR (`getConsumableNotes`, 5s) + * + * A test hook sets `__TEST_SYNC_PAUSED__` for the duration of its read; every + * one of those pollers checks `isTestSyncPaused()` and skips a cycle while it is + * set, so the read runs against an idle main thread and completes in seconds. + * + * Zero production impact: `MIDEN_E2E_TEST` is statically replaced with `'false'` + * in production builds, so these helpers short-circuit and the global lookup is + * dead-code-eliminated. This module is intentionally dependency-free so any + * layer (front hooks, store) can import it without a cycle. + */ + +interface TestSyncPauseGlobal { + __TEST_SYNC_PAUSED__?: boolean; +} + +export function isTestSyncPaused(): boolean { + return process.env.MIDEN_E2E_TEST === 'true' && (globalThis as TestSyncPauseGlobal).__TEST_SYNC_PAUSED__ === true; +} + +export function setTestSyncPaused(paused: boolean): void { + if (process.env.MIDEN_E2E_TEST !== 'true') return; + (globalThis as TestSyncPauseGlobal).__TEST_SYNC_PAUSED__ = paused; +} diff --git a/src/lib/miden/front/useSyncTrigger.test.tsx b/src/lib/miden/front/useSyncTrigger.test.tsx index 6fe6d4f6e..2e12a71c0 100644 --- a/src/lib/miden/front/useSyncTrigger.test.tsx +++ b/src/lib/miden/front/useSyncTrigger.test.tsx @@ -188,6 +188,52 @@ describe('useSyncTrigger', () => { expect(mockSyncState).not.toHaveBeenCalled(); }); + it('extension: skips SyncRequest while a test pauses sync via __TEST_SYNC_PAUSED__', async () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + process.env.MIDEN_E2E_TEST = 'true'; + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; + mockIsExtension.mockReturnValue(true); + + const { unmount } = render(); + + await flush(); + expect(mockIntercomRequest).not.toHaveBeenCalled(); + + unmount(); + delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + + it('mobile/desktop: skips syncState while a test pauses sync via __TEST_SYNC_PAUSED__', async () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + process.env.MIDEN_E2E_TEST = 'true'; + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; + + const { unmount } = render(); + + await flush(); + expect(mockSyncState).not.toHaveBeenCalled(); + + unmount(); + delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + + it('does not pause sync when __TEST_SYNC_PAUSED__ is set but MIDEN_E2E_TEST is off (production)', async () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + process.env.MIDEN_E2E_TEST = 'false'; + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; + + const { unmount } = render(); + + // The flag is ignored off the E2E build, so the normal mobile sync still runs. + await waitFor(() => expect(mockSyncState).toHaveBeenCalled()); + + unmount(); + delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + it('extension: clears the interval on unmount', async () => { jest.useFakeTimers(); mockIsExtension.mockReturnValue(true); diff --git a/src/lib/miden/front/useSyncTrigger.ts b/src/lib/miden/front/useSyncTrigger.ts index 5471a2131..3da3c68b0 100644 --- a/src/lib/miden/front/useSyncTrigger.ts +++ b/src/lib/miden/front/useSyncTrigger.ts @@ -9,11 +9,12 @@ import { getIntercom, useWalletStore } from 'lib/store'; import { WalletType } from 'screens/onboarding/types'; import { syncGuardianAccounts } from './guardian-sync'; +import { isTestSyncPaused } from './test-sync-pause'; const SYNC_INTERVAL_MS = 3_000; function triggerSync(intercom: ReturnType) { - if (isInsideSendFlow()) return; + if (isInsideSendFlow() || isTestSyncPaused()) return; intercom .request({ type: WalletMessageType.SyncRequest }) .then(() => { @@ -97,7 +98,7 @@ export function useSyncTrigger() { const mobileTxModalOpen = isMobile() && storeState.isTransactionModalOpen; const inSendFlow = isInsideSendFlow(); - if (!onGeneratingTxPage && !mobileTxModalOpen && !inSendFlow) { + if (!onGeneratingTxPage && !mobileTxModalOpen && !inSendFlow && !isTestSyncPaused()) { useWalletStore.getState().setSyncStatus(true); try { await withWasmClientLock(async () => { diff --git a/src/lib/store/index.ts b/src/lib/store/index.ts index d607df5bf..db61ef360 100644 --- a/src/lib/store/index.ts +++ b/src/lib/store/index.ts @@ -3,6 +3,7 @@ import { subscribeWithSelector } from 'zustand/middleware'; import { createIntercomClient, IIntercomClient } from 'lib/intercom/client'; import { clearPersistedSeenNoteIds, persistSeenNoteIds } from 'lib/miden/back/note-checker-storage'; +import { setTestSyncPaused } from 'lib/miden/front/test-sync-pause'; import { fetchTokenMetadata } from 'lib/miden/metadata'; import { MidenMessageType, MidenState } from 'lib/miden/types'; import { isExtension } from 'lib/platform'; @@ -723,29 +724,71 @@ if (process.env.MIDEN_E2E_TEST === 'true') { // Guardian on-chain auth structure (overall threshold + signer set + procedure // thresholds) for E2E assertions — the harness's balance checks can't see the - // 3-key shape. Reads the cached front-end MultisigService; dynamic imports - // avoid a static cycle (guardian-sync pulls in this store module). + // 3-key shape. Dynamic imports avoid a static cycle. (globalThis as any).__TEST_GUARDIAN_AUTH__ = async (accountPublicKey: string) => { + // Fast path: the balance poll (`fetchBalances`, which reliably completes in + // the wallet's own flow) stashes this account's auth structure on + // `__TEST_GUARDIAN_AUTH_STRUCTURE__`. Serving it here is a plain object read + // with NO WASM call, so it can't be starved by other main-thread WASM + // activity on the single-threaded iOS WASM (the live read below otherwise + // times out: the auth eval was observed taking 60s with the WebView main + // thread saturated even after all the wallet's own pollers were paused). + const stashStore = ( + globalThis as { + __TEST_GUARDIAN_AUTH_STRUCTURE__?: Record< + string, + { threshold: number; signerCommitments: string[]; procedureThresholds: Record } + >; + } + ).__TEST_GUARDIAN_AUTH_STRUCTURE__; + // Prefer the exact-key match; fall back to the single stashed entry. The + // balance poll keys the stash by the address it's called with, which can be + // a different encoding of the same account than the publicKey the test + // passes here — and a wallet instance only ever has one Guardian account, so + // any stashed multisig structure on this page belongs to it. + const stashed = stashStore?.[accountPublicKey] ?? (stashStore ? Object.values(stashStore)[0] : undefined); + if (stashed) { + return stashed; + } + + // Read the structure with a PURE storage parse (`AccountInspector.fromAccount`), + // not the transaction-oriented MultisigService. Going through + // `getOrCreateMultisigService` → `MultisigClient.load` drove a re-sign/realign + // loop (~48 `signWithHotKey` calls vs. 26 for a full consume) when loading + // against the post-consume state where the guardian's stored blob lags the + // on-chain account — on the single-threaded mobile WASM that loop hung the + // read past the eval budget. The inspector only reads the account's storage + // maps (signers, threshold_config, procedure_thresholds): no signing, no + // guardian HTTP, no load. A single `getAccount` (the same read the balance + // poll already does) plus the parse is cheap and correct — the structure is + // immutable. + // The read still needs one `getAccount`, and on the single-threaded mobile + // WASM even that lone call queues behind an in-flight background sync + // (`syncState` can hold the SDK's internal call-queue for tens of seconds). + // So quiesce the always-on frontend WASM pollers (`useSyncTrigger`, the + // balance poll — which bypasses the wallet mutex — and the claimable-notes + // SWR) via `__TEST_SYNC_PAUSED__` for the read, restored in `finally`. Gated + // on MIDEN_E2E_TEST, tree-shaken from production. + setTestSyncPaused(true); try { - const [{ getOrCreateMultisigService }, { zustandProvider }] = await Promise.all([ - import('lib/miden/front/guardian-manager'), - import('lib/miden/front/guardian-sync') + const [{ AccountInspector }, { getMidenClient }] = await Promise.all([ + import('@openzeppelin/miden-multisig-client'), + import('lib/miden/sdk/miden-client') ]); - const service = await getOrCreateMultisigService(accountPublicKey, zustandProvider); - try { - // Best-effort refresh of on-chain state before reading. service.sync() - // takes the global WASM lock; on mobile the background sync can hold it - // for tens of seconds, which would blow the 30s execute_async_script - // budget the iOS bridge runs this under. Cap the wait — the auth - // structure (signers + procedure thresholds) is immutable during this - // assertion, so a slightly stale local read is still correct. - await Promise.race([service.sync(), new Promise(resolve => setTimeout(resolve, 8_000))]); - } catch { - // best-effort — fall back to last-synced state + const account = await (await getMidenClient()).getAccount(accountPublicKey); + if (!account) { + return { error: `Guardian account ${accountPublicKey} not found in local client` }; } - return service.getAuthInfo(); + const config = AccountInspector.fromAccount(account); + return { + threshold: config.threshold, + signerCommitments: config.signerCommitments, + procedureThresholds: Object.fromEntries(config.procedureThresholds) + }; } catch (e) { return { error: e instanceof Error ? e.message : String(e) }; + } finally { + setTestSyncPaused(false); } }; } diff --git a/src/lib/store/utils/fetchBalances.ts b/src/lib/store/utils/fetchBalances.ts index 2432c7946..d6c8c1e47 100644 --- a/src/lib/store/utils/fetchBalances.ts +++ b/src/lib/store/utils/fetchBalances.ts @@ -18,6 +18,46 @@ export interface FetchBalancesOptions { tokenPrices?: TokenPrices; } +type SdkAccount = NonNullable>['getAccount']>>>; + +/** + * E2E-only: parse a Guardian account's on-chain auth structure (signer set + + * procedure thresholds) with `AccountInspector` — a pure storage read, no + * signing/load — and stash it on `globalThis.__TEST_GUARDIAN_AUTH_STRUCTURE__` + * keyed by address, so `__TEST_GUARDIAN_AUTH__` can serve it without any WASM + * call. No-op for non-multisig accounts. Tree-shaken from production. + */ +async function captureGuardianAuthStructureForTest(address: string, account: SdkAccount): Promise { + try { + const { AccountInspector } = await import('@openzeppelin/miden-multisig-client'); + const config = AccountInspector.fromAccount(account); + if (!config.signerCommitments || config.signerCommitments.length === 0) { + // eslint-disable-next-line no-console + console.log('[E2E] captureGuardianAuthStructure: not a multisig account (0 signers), skipping', address); + return; + } + const holder = globalThis as { + __TEST_GUARDIAN_AUTH_STRUCTURE__?: Record< + string, + { threshold: number; signerCommitments: string[]; procedureThresholds: Record } + >; + }; + holder.__TEST_GUARDIAN_AUTH_STRUCTURE__ = { + ...(holder.__TEST_GUARDIAN_AUTH_STRUCTURE__ ?? {}), + [address]: { + threshold: config.threshold, + signerCommitments: config.signerCommitments, + procedureThresholds: Object.fromEntries(config.procedureThresholds) + } + }; + // eslint-disable-next-line no-console + console.log('[E2E] captureGuardianAuthStructure: stashed', address, 'signers=', config.signerCommitments.length); + } catch (e) { + // eslint-disable-next-line no-console + console.log('[E2E] captureGuardianAuthStructure failed:', e instanceof Error ? e.message : String(e)); + } +} + /** * Fetch all token balances for an account * @@ -54,6 +94,23 @@ export async function fetchBalances( // queued behind long-running writes like `syncState`. const midenClient = await getMidenClient(); const acc = await midenClient.getAccount(address); + + // E2E-only: capture a Guardian account's on-chain auth structure HERE, inside + // the wallet's own working balance poll (which reliably completes), so the + // `__TEST_GUARDIAN_AUTH__` test hook can read it as a plain value instead of + // doing its own blocking-eval WASM read — which on the single-threaded iOS + // WASM gets starved by other main-thread WASM activity and times out. The + // structure is immutable, so a slightly-old capture is correct. Best-effort, + // fire-and-forget; gated on MIDEN_E2E_TEST and tree-shaken from production. + if (process.env.MIDEN_E2E_TEST === 'true' && acc) { + // Awaited (not fire-and-forget): tie the capture to this balance fetch so it + // is stashed before `verify_balance` passes and the auth step reads it — a + // fire-and-forget capture loses the race against the test on the contended + // iOS main thread. The `@openzeppelin/...` import is already warm (the + // guardian flow loaded it), so this adds negligible latency. + await captureGuardianAuthStructureForTest(address, acc); + } + let account: typeof acc | null = null; let assets: FungibleAsset[] = []; if (acc) {