From 3c27f16539f6853b29c2175da0b39c7c759c143f Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sat, 27 Jun 2026 14:55:34 +0200 Subject: [PATCH 01/20] fix(e2e): hard-timeout iOS CDP eval/evaluate so a wedged WebView fails fast instead of hanging the whole test --- playwright/e2e/ios/helpers/cdp-bridge.ts | 55 +++++++++++++++++++----- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/playwright/e2e/ios/helpers/cdp-bridge.ts b/playwright/e2e/ios/helpers/cdp-bridge.ts index bae22c3e9..7dc3af196 100644 --- a/playwright/e2e/ios/helpers/cdp-bridge.ts +++ b/playwright/e2e/ios/helpers/cdp-bridge.ts @@ -14,6 +14,39 @@ const SELECT_APP_POLL_MS = 1_500; const PAGE_READY_TIMEOUT = 15_000; const SOCKET_DISCOVERY_TIMEOUT = 30_000; +// A synchronous `execute_script` (reading window.__TEST_* globals) returns in +// milliseconds. If `executeAtom` hasn't resolved within this window, the +// WebView's RWI socket or its main JS thread is wedged. Surface it as a throw +// so callers (notably pollForCondition) can enforce their own deadline and let +// --retries restart on a fresh app + CDP, instead of the whole test hanging +// until the global Playwright timeout. Mirrors evalAsync's async-callback guard. +const EVAL_HARD_TIMEOUT_MS = 30_000; + +/** + * Race a CDP call against a hard wall-clock timeout. A WebKit RemoteDebugger + * `executeAtom` can hang indefinitely when the inspected page's main thread is + * blocked (e.g. mobile main-thread WASM) or the RWI socket wedges; this bounds + * it so a transient stall becomes a fast, retriable failure rather than a + * multi-minute hang. + */ +async function withHardTimeout(exec: Promise, timeoutMs: number, label: string): Promise { + // If the timeout wins the race, the abandoned `exec` may settle later; attach + // a no-op catch so a late rejection doesn't surface as an unhandledRejection. + exec.catch(() => {}); + let timer: NodeJS.Timeout | undefined; + const timeout = new Promise((_, reject) => { + timer = setTimeout( + () => reject(new Error(`${label}: CDP call did not return within ${timeoutMs}ms (WebView/RWI wedged)`)), + timeoutMs + ); + }); + try { + return await Promise.race([exec, timeout]); + } finally { + if (timer) clearTimeout(timer); + } +} + interface ConnectOpts { udid: string; bundleId: string; @@ -69,13 +102,14 @@ export class CdpSession { * For Promise-returning code, use `evalAsync` — `eval` resolves the * Promise object itself, not its value. */ - async eval(body: string): Promise { + async eval(body: string, opts: { timeoutMs?: number } = {}): Promise { const start = Date.now(); + const exec = (this.rd as unknown as ExecuteAtomCapable).executeAtom('execute_script', [ + body, + [], + ]) as Promise; try { - return (await (this.rd as unknown as ExecuteAtomCapable).executeAtom('execute_script', [ - body, - [], - ])) as T; + return await withHardTimeout(exec, opts.timeoutMs ?? EVAL_HARD_TIMEOUT_MS, 'eval'); } finally { this.stats.evalCount++; this.stats.evalMs += Date.now() - start; @@ -123,14 +157,15 @@ export class CdpSession { * it's stringified via Function.prototype.toString and re-parsed in the * page. Callers in this harness only read window.__TEST_* globals. */ - async evaluate(fn: () => T | Promise): Promise { + async evaluate(fn: () => T | Promise, opts: { timeoutMs?: number } = {}): Promise { const body = `return (${fn.toString()})();`; const start = Date.now(); + const exec = (this.rd as unknown as ExecuteAtomCapable).executeAtom('execute_script', [ + body, + [], + ]) as Promise; try { - return (await (this.rd as unknown as ExecuteAtomCapable).executeAtom('execute_script', [ - body, - [], - ])) as T; + return await withHardTimeout(exec, opts.timeoutMs ?? EVAL_HARD_TIMEOUT_MS, 'evaluate'); } finally { this.stats.evaluateCount++; this.stats.evaluateMs += Date.now() - start; From 4d70c1fdbdcd16d00e40d30e00681d899a7b2c60 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sat, 27 Jun 2026 14:57:27 +0200 Subject: [PATCH 02/20] docs(changelog): note iOS CDP eval hard-timeout --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f42196ab..2be0c2ee8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ * [FIX][mobile] **iOS hot-key signing now requires Face ID / Touch ID on device, matching Android.** The Secure Enclave hot key was created with `.privateKeyUsage` only — a *usage permission* that, contrary to the old code comment, does **not** prompt for authentication — so user-initiated Guardian claims and sends signed silently on iOS, while Android (StrongBox + `setUserAuthenticationRequired`) already prompted. New hot keys now also set `.userPresence`, so every user-initiated hot signature and hot-key reveal requires user presence (Face ID / Touch ID with passcode fallback). `.userPresence` is used rather than `.biometryCurrentSet` so the key survives biometric re-enrollment instead of bricking until re-activation. Background auto-consume is unaffected — it is cold-signed in WASM and never touches the hot key. Scope: the gate applies to device builds only (the simulator / iOS E2E path keeps `.privateKeyUsage`-only silent signing), and existing hot keys keep their prior behavior until re-activated/rotated. (#299) * [FIX][mobile] **iOS app builds again under Xcode 26.** Two `foundKey as? SecKey` downcasts in the hot-key plugin (`signWithHotKey` / `revealHotKey`) are no-ops for CoreFoundation types — they always succeed — which Xcode 26 now rejects as a hard error, breaking the iOS build. Replaced with a `CFGetTypeID(foundKey) == SecKeyGetTypeID()` guard plus a force-cast: both the correct defensive downcast and Xcode-26-clean. (#299) * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) +* [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) ## 1.15.2 (2026-06-22) From 8b4021a082abc1e32a22c75051f1e331b333e083 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sat, 27 Jun 2026 16:18:48 +0200 Subject: [PATCH 03/20] fix(e2e): retry miden-client CLI on transient remote-prover connection errors --- CHANGELOG.md | 1 + playwright/e2e/helpers/miden-cli.ts | 59 +++++++++++++---------------- 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2be0c2ee8..93350a4df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ * [FIX][mobile] **iOS app builds again under Xcode 26.** Two `foundKey as? SecKey` downcasts in the hot-key plugin (`signWithHotKey` / `revealHotKey`) are no-ops for CoreFoundation types — they always succeed — which Xcode 26 now rejects as a hard error, breaking the iOS build. Replaced with a `CFGetTypeID(foundKey) == SecKeyGetTypeID()` guard plus a force-cast: both the correct defensive downcast and Xcode-26-clean. (#299) * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) +* [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) ## 1.15.2 (2026-06-22) diff --git a/playwright/e2e/helpers/miden-cli.ts b/playwright/e2e/helpers/miden-cli.ts index 1f8ca1c13..f3b858109 100644 --- a/playwright/e2e/helpers/miden-cli.ts +++ b/playwright/e2e/helpers/miden-cli.ts @@ -11,6 +11,29 @@ decimals = 8 symbol = "TST" `; +/** + * Classify a `miden-client` CLI stderr as a transient failure that should be + * retried (vs. a deterministic error that should fail fast). Matched + * wrap-tolerantly with `\s+` because miette folds messages at terminal width. + * + * Categories: + * - RPC/transport to the node: 5xx, gRPC framing, reset/timeout. + * - `new nonce N is less than old nonce M`: the node's account state lags the + * store's optimistic post-submit state while a deploy/mint is still in + * flight, and miden-client's sqlite store hard-fails the whole sync on it + * (0xMiden/miden-client#2243). Clears once the tx commits. + * - Remote-prover connection failures (`failed to connect to ... prover`, + * `transport error`, `no native certs found`): the TLS/gRPC handshake to the + * delegated prover endpoint flakes intermittently on the macOS CI runners + * (a sibling mint in the same test connects fine), so a connection-level + * prover error is transient, not a proving-logic failure. + */ +export function isTransientCliError(stderr: string): boolean { + return /HTTP status code 5\d\d|grpc request failed|grpc-status header missing|connection reset|timed out|Temporary failure|less\s+than\s+old\s+nonce|failed\s+to\s+connect\s+to(\s+the)?(\s+remote)?\s+prover|transport\s+error|no\s+native\s+certs/i.test( + stderr + ); +} + /** * Resolve the miden-client binary path. * 1. MIDEN_CLIENT_BIN env var @@ -188,17 +211,7 @@ export class MidenCli { break; } lastErr = createResult.stderr; - const transient = - // `new nonce N is less than old nonce M` (matched wrap-tolerantly — - // miette folds the message at terminal width): the node's account - // state lags the store's optimistic post-submit state while a - // deploy or mint is still in flight, and miden-client's sqlite - // store hard-fails the whole sync on it (0xMiden/miden-client#2243). - // Clears as soon as the tx commits, so it retries like any other - // transient. - /HTTP status code 5\d\d|grpc request failed|grpc-status header missing|connection reset|timed out|Temporary failure|less\s+than\s+old\s+nonce/i.test( - lastErr - ); + const transient = isTransientCliError(lastErr); if (!transient || attempt === maxAttempts) break; const backoffMs = Math.min(30_000, 1_000 * 2 ** (attempt - 1)); // eslint-disable-next-line no-console @@ -266,17 +279,7 @@ export class MidenCli { return { txId, noteId }; } lastErr = result.stderr; - const transient = - // `new nonce N is less than old nonce M` (matched wrap-tolerantly — - // miette folds the message at terminal width): the node's account - // state lags the store's optimistic post-submit state while a - // deploy or mint is still in flight, and miden-client's sqlite - // store hard-fails the whole sync on it (0xMiden/miden-client#2243). - // Clears as soon as the tx commits, so it retries like any other - // transient. - /HTTP status code 5\d\d|grpc request failed|grpc-status header missing|connection reset|timed out|Temporary failure|less\s+than\s+old\s+nonce/i.test( - lastErr - ); + const transient = isTransientCliError(lastErr); if (!transient || attempt === maxAttempts) break; const backoffMs = Math.min(30_000, 1_000 * 2 ** (attempt - 1)); // eslint-disable-next-line no-console @@ -296,17 +299,7 @@ export class MidenCli { const result = await this.run('sync', { timeoutMs: 60_000 }); if (result.exitCode === 0) return; lastErr = result.stderr; - const transient = - // `new nonce N is less than old nonce M` (matched wrap-tolerantly — - // miette folds the message at terminal width): the node's account - // state lags the store's optimistic post-submit state while a - // deploy or mint is still in flight, and miden-client's sqlite - // store hard-fails the whole sync on it (0xMiden/miden-client#2243). - // Clears as soon as the tx commits, so it retries like any other - // transient. - /HTTP status code 5\d\d|grpc request failed|grpc-status header missing|connection reset|timed out|Temporary failure|less\s+than\s+old\s+nonce/i.test( - lastErr - ); + const transient = isTransientCliError(lastErr); if (!transient || attempt === maxAttempts) break; const backoffMs = Math.min(30_000, 1_000 * 2 ** (attempt - 1)); // eslint-disable-next-line no-console From 4b20eb710d6290a9b8a3102b23e0e85e9b8c5a07 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sat, 27 Jun 2026 17:58:10 +0200 Subject: [PATCH 04/20] fix(e2e): give guardian iOS auth-structure read a 90s budget (slow runner + WASM-lock contention) --- CHANGELOG.md | 1 + playwright/e2e/ios/helpers/ios-wallet-page.ts | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93350a4df..378d0abd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) +* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a 90s budget instead of 30s.** `getGuardianAuthInfo` runs `__TEST_GUARDIAN_AUTH__` (multisig-service build + Guardian co-sign round-trips + hot signature + a short sync) under the 30s `evalAsync` default. On the slow iOS simulator runners that work legitimately exceeds 30s — especially when it queues behind `useSyncTrigger`'s 30-60s WASM-lock hold — and was observed still progressing (hot signature returning) at the cutoff, i.e. slow, not wedged. The call now gets a 90s budget so the guardian auth read completes instead of tripping the eval timeout. (#302) ## 1.15.2 (2026-06-22) diff --git a/playwright/e2e/ios/helpers/ios-wallet-page.ts b/playwright/e2e/ios/helpers/ios-wallet-page.ts index c3de28376..f529d4bb1 100644 --- a/playwright/e2e/ios/helpers/ios-wallet-page.ts +++ b/playwright/e2e/ios/helpers/ios-wallet-page.ts @@ -599,9 +599,16 @@ export class IosWalletPage implements WalletPage { * __TEST_GUARDIAN_AUTH__ hook the Chrome POM uses, but over the async CDP * atom: the hook awaits getOrCreateMultisigService + a best-effort * (time-bounded) sync, so it returns a Promise and must run under - * execute_async_script. The hook itself caps its internal sync at 8s, so the - * 30s evalAsync budget is comfortable even when the background sync holds the - * WASM lock. + * execute_async_script. + * + * Budget: 90s, not the 30s evalAsync default. Building the multisig service + * co-signs with the Guardian (HTTP round-trips) and a hot signature, and on + * the slow iOS simulator runners that work can also queue behind + * useSyncTrigger's WASM-lock hold (documented at 30-60s). The hook's internal + * sync is capped at 8s, but the service build itself is not — observed + * actively progressing (HotKey.signWithHotKey returning) right up to a 30s + * cutoff, i.e. it was slow, not wedged. 90s clears the lock-hold window with + * headroom; a genuine wedge still fails fast enough for --retries. */ async getGuardianAuthInfo(accountPublicKey: string): Promise { return this.cdp.evalAsync( @@ -625,7 +632,8 @@ export class IosWalletPage implements WalletPage { procedureThresholds: {}, error: String(e && e.message ? e.message : e) }); - });` + });`, + { timeoutMs: 90_000 } ); } From 2a59246495eb747321849225e5d9f9a7182e8f1e Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sat, 27 Jun 2026 18:41:19 +0200 Subject: [PATCH 05/20] Revert "fix(e2e): give guardian iOS auth-structure read a 90s budget" 90s still times out (proven in CI run 28294249613): the guardian auth read is WASM-lock-starved by useSyncTrigger's 3s-cadence sync, not merely slow, so no fixed eval budget fixes it. Reverting to keep this PR to verified-working fixes. --- CHANGELOG.md | 1 - playwright/e2e/ios/helpers/ios-wallet-page.ts | 16 ++++------------ 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378d0abd7..93350a4df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,6 @@ * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) -* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a 90s budget instead of 30s.** `getGuardianAuthInfo` runs `__TEST_GUARDIAN_AUTH__` (multisig-service build + Guardian co-sign round-trips + hot signature + a short sync) under the 30s `evalAsync` default. On the slow iOS simulator runners that work legitimately exceeds 30s — especially when it queues behind `useSyncTrigger`'s 30-60s WASM-lock hold — and was observed still progressing (hot signature returning) at the cutoff, i.e. slow, not wedged. The call now gets a 90s budget so the guardian auth read completes instead of tripping the eval timeout. (#302) ## 1.15.2 (2026-06-22) diff --git a/playwright/e2e/ios/helpers/ios-wallet-page.ts b/playwright/e2e/ios/helpers/ios-wallet-page.ts index f529d4bb1..c3de28376 100644 --- a/playwright/e2e/ios/helpers/ios-wallet-page.ts +++ b/playwright/e2e/ios/helpers/ios-wallet-page.ts @@ -599,16 +599,9 @@ export class IosWalletPage implements WalletPage { * __TEST_GUARDIAN_AUTH__ hook the Chrome POM uses, but over the async CDP * atom: the hook awaits getOrCreateMultisigService + a best-effort * (time-bounded) sync, so it returns a Promise and must run under - * execute_async_script. - * - * Budget: 90s, not the 30s evalAsync default. Building the multisig service - * co-signs with the Guardian (HTTP round-trips) and a hot signature, and on - * the slow iOS simulator runners that work can also queue behind - * useSyncTrigger's WASM-lock hold (documented at 30-60s). The hook's internal - * sync is capped at 8s, but the service build itself is not — observed - * actively progressing (HotKey.signWithHotKey returning) right up to a 30s - * cutoff, i.e. it was slow, not wedged. 90s clears the lock-hold window with - * headroom; a genuine wedge still fails fast enough for --retries. + * execute_async_script. The hook itself caps its internal sync at 8s, so the + * 30s evalAsync budget is comfortable even when the background sync holds the + * WASM lock. */ async getGuardianAuthInfo(accountPublicKey: string): Promise { return this.cdp.evalAsync( @@ -632,8 +625,7 @@ export class IosWalletPage implements WalletPage { procedureThresholds: {}, error: String(e && e.message ? e.message : e) }); - });`, - { timeoutMs: 90_000 } + });` ); } From 3f5c9e7974dabfd56cb8a651e366b3d24fb41355 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sat, 27 Jun 2026 18:51:12 +0200 Subject: [PATCH 06/20] fix(e2e): pause background sync during guardian iOS auth read to break WASM-lock starvation --- CHANGELOG.md | 1 + playwright/e2e/ios/helpers/ios-wallet-page.ts | 15 ++++-- src/lib/miden/front/useSyncTrigger.test.tsx | 46 +++++++++++++++++++ src/lib/miden/front/useSyncTrigger.ts | 21 ++++++++- src/lib/store/index.ts | 19 ++++++-- 5 files changed, 91 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93350a4df..5d46e7e27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) +* [CHANGE][ci] **Guardian iOS E2E suspends the background sync while it reads the on-chain auth structure.** The `verify_guardian_auth_structure` assertion calls `__TEST_GUARDIAN_AUTH__`, which builds the multisig service and reads it under the single-threaded WASM lock. On mobile the 3s in-process auto-sync (`useSyncTrigger`) holds that same lock and kept re-acquiring it faster than the read could progress, starving the read so it never completed (a raised eval budget alone still timed out — it was lock contention, not slowness). The hook now sets a test-only `__TEST_SYNC_PAUSED__` flag that pauses `useSyncTrigger` for the duration of the read and always clears it afterward, so the read gets the lock and completes. Production is unaffected — the flag is gated on `MIDEN_E2E_TEST` and tree-shaken out. (#302) ## 1.15.2 (2026-06-22) diff --git a/playwright/e2e/ios/helpers/ios-wallet-page.ts b/playwright/e2e/ios/helpers/ios-wallet-page.ts index c3de28376..4319877ee 100644 --- a/playwright/e2e/ios/helpers/ios-wallet-page.ts +++ b/playwright/e2e/ios/helpers/ios-wallet-page.ts @@ -599,9 +599,15 @@ export class IosWalletPage implements WalletPage { * __TEST_GUARDIAN_AUTH__ hook the Chrome POM uses, but over the async CDP * atom: the hook awaits getOrCreateMultisigService + a best-effort * (time-bounded) sync, so it returns a Promise and must run under - * execute_async_script. The hook itself caps its internal sync at 8s, so the - * 30s evalAsync budget is comfortable even when the background sync holds the - * WASM lock. + * execute_async_script. + * + * Budget: 90s, not the 30s evalAsync default. The hook pauses the background + * sync while it reads (so it's no longer starved — see __TEST_GUARDIAN_AUTH__), + * but a sync already in flight when the read starts still holds the WASM lock + * until it releases, and on the slow iOS runners that wait plus the service + * build/sign can exceed 30s. 90s comfortably covers waiting out one in-flight + * sync; with the pause in place the read completes well within it rather than + * hanging indefinitely. */ async getGuardianAuthInfo(accountPublicKey: string): Promise { return this.cdp.evalAsync( @@ -625,7 +631,8 @@ export class IosWalletPage implements WalletPage { procedureThresholds: {}, error: String(e && e.message ? e.message : e) }); - });` + });`, + { timeoutMs: 90_000 } ); } diff --git a/src/lib/miden/front/useSyncTrigger.test.tsx b/src/lib/miden/front/useSyncTrigger.test.tsx index 6fe6d4f6e..2e12a71c0 100644 --- a/src/lib/miden/front/useSyncTrigger.test.tsx +++ b/src/lib/miden/front/useSyncTrigger.test.tsx @@ -188,6 +188,52 @@ describe('useSyncTrigger', () => { expect(mockSyncState).not.toHaveBeenCalled(); }); + it('extension: skips SyncRequest while a test pauses sync via __TEST_SYNC_PAUSED__', async () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + process.env.MIDEN_E2E_TEST = 'true'; + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; + mockIsExtension.mockReturnValue(true); + + const { unmount } = render(); + + await flush(); + expect(mockIntercomRequest).not.toHaveBeenCalled(); + + unmount(); + delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + + it('mobile/desktop: skips syncState while a test pauses sync via __TEST_SYNC_PAUSED__', async () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + process.env.MIDEN_E2E_TEST = 'true'; + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; + + const { unmount } = render(); + + await flush(); + expect(mockSyncState).not.toHaveBeenCalled(); + + unmount(); + delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + + it('does not pause sync when __TEST_SYNC_PAUSED__ is set but MIDEN_E2E_TEST is off (production)', async () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + process.env.MIDEN_E2E_TEST = 'false'; + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; + + const { unmount } = render(); + + // The flag is ignored off the E2E build, so the normal mobile sync still runs. + await waitFor(() => expect(mockSyncState).toHaveBeenCalled()); + + unmount(); + delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + it('extension: clears the interval on unmount', async () => { jest.useFakeTimers(); mockIsExtension.mockReturnValue(true); diff --git a/src/lib/miden/front/useSyncTrigger.ts b/src/lib/miden/front/useSyncTrigger.ts index 5471a2131..b5614c009 100644 --- a/src/lib/miden/front/useSyncTrigger.ts +++ b/src/lib/miden/front/useSyncTrigger.ts @@ -12,8 +12,25 @@ import { syncGuardianAccounts } from './guardian-sync'; const SYNC_INTERVAL_MS = 3_000; +/** + * E2E-only: lets a test hook suspend the background sync while it performs a + * WASM-lock-bound read. On mobile the sync runs in-process and holds the + * single-threaded WASM lock; its 3s cadence re-acquires the lock faster than a + * contending read can make progress, starving the read indefinitely (no fixed + * eval budget survives it — see `__TEST_GUARDIAN_AUTH__`). The hook sets + * `__TEST_SYNC_PAUSED__` around its read to remove the contention. Tree-shaken + * out of production: `MIDEN_E2E_TEST` is statically `'false'` there, so this + * short-circuits and the global lookup is dead-code-eliminated. + */ +function isTestSyncPaused(): boolean { + return ( + process.env.MIDEN_E2E_TEST === 'true' && + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ === true + ); +} + function triggerSync(intercom: ReturnType) { - if (isInsideSendFlow()) return; + if (isInsideSendFlow() || isTestSyncPaused()) return; intercom .request({ type: WalletMessageType.SyncRequest }) .then(() => { @@ -97,7 +114,7 @@ export function useSyncTrigger() { const mobileTxModalOpen = isMobile() && storeState.isTransactionModalOpen; const inSendFlow = isInsideSendFlow(); - if (!onGeneratingTxPage && !mobileTxModalOpen && !inSendFlow) { + if (!onGeneratingTxPage && !mobileTxModalOpen && !inSendFlow && !isTestSyncPaused()) { useWalletStore.getState().setSyncStatus(true); try { await withWasmClientLock(async () => { diff --git a/src/lib/store/index.ts b/src/lib/store/index.ts index d607df5bf..0fc508d94 100644 --- a/src/lib/store/index.ts +++ b/src/lib/store/index.ts @@ -726,6 +726,15 @@ if (process.env.MIDEN_E2E_TEST === 'true') { // 3-key shape. Reads the cached front-end MultisigService; dynamic imports // avoid a static cycle (guardian-sync pulls in this store module). (globalThis as any).__TEST_GUARDIAN_AUTH__ = async (accountPublicKey: string) => { + // Suspend the background sync (useSyncTrigger) for the duration of this + // read. getOrCreateMultisigService + service.sync() + getAuthInfo all take + // the single-threaded WASM lock; on mobile the 3s in-process auto-sync + // holds that same lock and keeps re-acquiring it, starving this read so it + // never completes (a 90s eval budget still timed out — it's contention, not + // slowness). Pausing removes the competing lock-holder; the auth structure + // is immutable during the assertion, so a slightly stale read is still + // correct. Always restored in `finally` so later sync-dependent steps work. + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; try { const [{ getOrCreateMultisigService }, { zustandProvider }] = await Promise.all([ import('lib/miden/front/guardian-manager'), @@ -734,11 +743,9 @@ if (process.env.MIDEN_E2E_TEST === 'true') { const service = await getOrCreateMultisigService(accountPublicKey, zustandProvider); try { // Best-effort refresh of on-chain state before reading. service.sync() - // takes the global WASM lock; on mobile the background sync can hold it - // for tens of seconds, which would blow the 30s execute_async_script - // budget the iOS bridge runs this under. Cap the wait — the auth - // structure (signers + procedure thresholds) is immutable during this - // assertion, so a slightly stale local read is still correct. + // takes the global WASM lock; with the background sync paused above it + // gets the lock cleanly. Still capped so a slow network can't stall the + // read past the eval budget — the structure is immutable here anyway. await Promise.race([service.sync(), new Promise(resolve => setTimeout(resolve, 8_000))]); } catch { // best-effort — fall back to last-synced state @@ -746,6 +753,8 @@ if (process.env.MIDEN_E2E_TEST === 'true') { return service.getAuthInfo(); } catch (e) { return { error: e instanceof Error ? e.message : String(e) }; + } finally { + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = false; } }; } From 1de78c97030ab0444f4e86e43c0c77028fd5da81 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sat, 27 Jun 2026 20:24:19 +0200 Subject: [PATCH 07/20] fix(e2e): quiesce all frontend WASM pollers during guardian iOS auth read (livelock fix) --- CHANGELOG.md | 2 +- src/lib/miden/front/balance.ts | 8 +++- src/lib/miden/front/claimable-notes.ts | 5 +++ src/lib/miden/front/test-sync-pause.test.ts | 44 +++++++++++++++++++++ src/lib/miden/front/test-sync-pause.ts | 35 ++++++++++++++++ src/lib/miden/front/useSyncTrigger.ts | 18 +-------- src/lib/store/index.ts | 22 ++++++----- 7 files changed, 104 insertions(+), 30 deletions(-) create mode 100644 src/lib/miden/front/test-sync-pause.test.ts create mode 100644 src/lib/miden/front/test-sync-pause.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d46e7e27..3449ef7cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) -* [CHANGE][ci] **Guardian iOS E2E suspends the background sync while it reads the on-chain auth structure.** The `verify_guardian_auth_structure` assertion calls `__TEST_GUARDIAN_AUTH__`, which builds the multisig service and reads it under the single-threaded WASM lock. On mobile the 3s in-process auto-sync (`useSyncTrigger`) holds that same lock and kept re-acquiring it faster than the read could progress, starving the read so it never completed (a raised eval budget alone still timed out — it was lock contention, not slowness). The hook now sets a test-only `__TEST_SYNC_PAUSED__` flag that pauses `useSyncTrigger` for the duration of the read and always clears it afterward, so the read gets the lock and completes. Production is unaffected — the flag is gated on `MIDEN_E2E_TEST` and tree-shaken out. (#302) +* [CHANGE][ci] **Guardian iOS E2E quiesces the always-on WASM pollers while it reads the on-chain auth structure.** The `verify_guardian_auth_structure` assertion calls `__TEST_GUARDIAN_AUTH__`, which builds the multisig service and reads it on the single-threaded mobile WASM. Three always-on frontend pollers — the 3s `useSyncTrigger` chain sync, the 5s balance poll (which deliberately bypasses the WASM lock, so holding the lock gave the read zero protection), and the 5s claimable-notes SWR — each re-fire every few seconds and *livelocked* the read so it never completed (neither a raised eval budget nor pausing `useSyncTrigger` alone helped — it was contention from all three, not slowness). A shared test-only `__TEST_SYNC_PAUSED__` flag (in `lib/miden/front/test-sync-pause`) now lets the hook pause all three pollers for the duration of the read and always clears it afterward, so the read runs against an idle main thread and completes in seconds. Production is unaffected — the flag is gated on `MIDEN_E2E_TEST` and tree-shaken out. (#302) ## 1.15.2 (2026-06-22) diff --git a/src/lib/miden/front/balance.ts b/src/lib/miden/front/balance.ts index 159804f04..6871e4dd6 100644 --- a/src/lib/miden/front/balance.ts +++ b/src/lib/miden/front/balance.ts @@ -6,6 +6,7 @@ import { useWalletStore } from 'lib/store'; import { fetchBalances } from 'lib/store/utils/fetchBalances'; import { AssetMetadata, MIDEN_METADATA } from '../metadata'; +import { isTestSyncPaused } from './test-sync-pause'; export interface TokenBalanceData { tokenId: string; @@ -140,9 +141,12 @@ export function useAllBalances(address: string, tokenMetadatas: Record { - if (mountedRef.current) { + if (mountedRef.current && !isTestSyncPaused()) { fetchBalancesWithDeduping(); } }, REFRESH_INTERVAL); diff --git a/src/lib/miden/front/claimable-notes.ts b/src/lib/miden/front/claimable-notes.ts index 402d3d56d..8c41fe7fd 100644 --- a/src/lib/miden/front/claimable-notes.ts +++ b/src/lib/miden/front/claimable-notes.ts @@ -15,6 +15,7 @@ import { getBech32AddressFromAccountId } from '../sdk/helpers'; import { getMidenClient, runWhenClientIdle, withWasmClientLock } from '../sdk/miden-client'; import { ConsumableNote, NoteTypeEnum } from '../types'; import { useTokensMetadata } from './assets'; +import { isTestSyncPaused } from './test-sync-pause'; // Debug info for iOS troubleshooting export type ClaimableNotesDebugInfo = { @@ -297,6 +298,10 @@ function useLocalClaimableNotes(publicAddress: string, enabled: boolean) { revalidateOnFocus: false, dedupingInterval: 10_000, refreshInterval: 5_000, + // Lets an E2E hook quiesce this (heavy, WASM-lock-bound) poll while it does + // its own single-threaded-WASM read; otherwise the read is livelocked on + // mobile by the 5s re-fire. No-op in production (tree-shaken). + isPaused: () => isTestSyncPaused(), onError: e => { console.error('Error fetching claimable notes:', e); debugInfoRef.current = { diff --git a/src/lib/miden/front/test-sync-pause.test.ts b/src/lib/miden/front/test-sync-pause.test.ts new file mode 100644 index 000000000..6be3977c4 --- /dev/null +++ b/src/lib/miden/front/test-sync-pause.test.ts @@ -0,0 +1,44 @@ +import { isTestSyncPaused, setTestSyncPaused } from './test-sync-pause'; + +type FlagGlobal = { __TEST_SYNC_PAUSED__?: boolean }; + +describe('test-sync-pause', () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + + afterEach(() => { + delete (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + + it('isTestSyncPaused returns true only when the E2E build flag and the pause flag are both set', () => { + process.env.MIDEN_E2E_TEST = 'true'; + (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__ = true; + expect(isTestSyncPaused()).toBe(true); + }); + + it('isTestSyncPaused returns false when the pause flag is unset', () => { + process.env.MIDEN_E2E_TEST = 'true'; + delete (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__; + expect(isTestSyncPaused()).toBe(false); + }); + + it('isTestSyncPaused returns false off the E2E build even if the pause flag is set', () => { + process.env.MIDEN_E2E_TEST = 'false'; + (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__ = true; + expect(isTestSyncPaused()).toBe(false); + }); + + it('setTestSyncPaused toggles the flag on the E2E build', () => { + process.env.MIDEN_E2E_TEST = 'true'; + setTestSyncPaused(true); + expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBe(true); + setTestSyncPaused(false); + expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBe(false); + }); + + it('setTestSyncPaused is a no-op off the E2E build', () => { + process.env.MIDEN_E2E_TEST = 'false'; + setTestSyncPaused(true); + expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBeUndefined(); + }); +}); diff --git a/src/lib/miden/front/test-sync-pause.ts b/src/lib/miden/front/test-sync-pause.ts new file mode 100644 index 000000000..4661aea38 --- /dev/null +++ b/src/lib/miden/front/test-sync-pause.ts @@ -0,0 +1,35 @@ +/** + * E2E-only background-sync pause. + * + * On mobile the Miden WASM client is single-threaded (main thread). A test that + * needs to make its own WASM-lock-bound read (currently `__TEST_GUARDIAN_AUTH__` + * reading a Guardian account's on-chain auth structure) is otherwise livelocked + * by the wallet's always-on frontend pollers, which each re-fire every few + * seconds and keep the single WASM thread saturated: + * - `useSyncTrigger` (3s chain sync) + * - the balance poll (`fetchBalances`, 5s) — which deliberately bypasses + * `withWasmClientLock`, so holding that lock gives the read zero protection + * - the claimable-notes SWR (`getConsumableNotes`, 5s) + * + * A test hook sets `__TEST_SYNC_PAUSED__` for the duration of its read; every + * one of those pollers checks `isTestSyncPaused()` and skips a cycle while it is + * set, so the read runs against an idle main thread and completes in seconds. + * + * Zero production impact: `MIDEN_E2E_TEST` is statically replaced with `'false'` + * in production builds, so these helpers short-circuit and the global lookup is + * dead-code-eliminated. This module is intentionally dependency-free so any + * layer (front hooks, store) can import it without a cycle. + */ + +interface TestSyncPauseGlobal { + __TEST_SYNC_PAUSED__?: boolean; +} + +export function isTestSyncPaused(): boolean { + return process.env.MIDEN_E2E_TEST === 'true' && (globalThis as TestSyncPauseGlobal).__TEST_SYNC_PAUSED__ === true; +} + +export function setTestSyncPaused(paused: boolean): void { + if (process.env.MIDEN_E2E_TEST !== 'true') return; + (globalThis as TestSyncPauseGlobal).__TEST_SYNC_PAUSED__ = paused; +} diff --git a/src/lib/miden/front/useSyncTrigger.ts b/src/lib/miden/front/useSyncTrigger.ts index b5614c009..3da3c68b0 100644 --- a/src/lib/miden/front/useSyncTrigger.ts +++ b/src/lib/miden/front/useSyncTrigger.ts @@ -9,26 +9,10 @@ import { getIntercom, useWalletStore } from 'lib/store'; import { WalletType } from 'screens/onboarding/types'; import { syncGuardianAccounts } from './guardian-sync'; +import { isTestSyncPaused } from './test-sync-pause'; const SYNC_INTERVAL_MS = 3_000; -/** - * E2E-only: lets a test hook suspend the background sync while it performs a - * WASM-lock-bound read. On mobile the sync runs in-process and holds the - * single-threaded WASM lock; its 3s cadence re-acquires the lock faster than a - * contending read can make progress, starving the read indefinitely (no fixed - * eval budget survives it — see `__TEST_GUARDIAN_AUTH__`). The hook sets - * `__TEST_SYNC_PAUSED__` around its read to remove the contention. Tree-shaken - * out of production: `MIDEN_E2E_TEST` is statically `'false'` there, so this - * short-circuits and the global lookup is dead-code-eliminated. - */ -function isTestSyncPaused(): boolean { - return ( - process.env.MIDEN_E2E_TEST === 'true' && - (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ === true - ); -} - function triggerSync(intercom: ReturnType) { if (isInsideSendFlow() || isTestSyncPaused()) return; intercom diff --git a/src/lib/store/index.ts b/src/lib/store/index.ts index 0fc508d94..0d383c4b0 100644 --- a/src/lib/store/index.ts +++ b/src/lib/store/index.ts @@ -3,6 +3,7 @@ import { subscribeWithSelector } from 'zustand/middleware'; import { createIntercomClient, IIntercomClient } from 'lib/intercom/client'; import { clearPersistedSeenNoteIds, persistSeenNoteIds } from 'lib/miden/back/note-checker-storage'; +import { setTestSyncPaused } from 'lib/miden/front/test-sync-pause'; import { fetchTokenMetadata } from 'lib/miden/metadata'; import { MidenMessageType, MidenState } from 'lib/miden/types'; import { isExtension } from 'lib/platform'; @@ -726,15 +727,16 @@ if (process.env.MIDEN_E2E_TEST === 'true') { // 3-key shape. Reads the cached front-end MultisigService; dynamic imports // avoid a static cycle (guardian-sync pulls in this store module). (globalThis as any).__TEST_GUARDIAN_AUTH__ = async (accountPublicKey: string) => { - // Suspend the background sync (useSyncTrigger) for the duration of this - // read. getOrCreateMultisigService + service.sync() + getAuthInfo all take - // the single-threaded WASM lock; on mobile the 3s in-process auto-sync - // holds that same lock and keeps re-acquiring it, starving this read so it - // never completes (a 90s eval budget still timed out — it's contention, not - // slowness). Pausing removes the competing lock-holder; the auth structure - // is immutable during the assertion, so a slightly stale read is still - // correct. Always restored in `finally` so later sync-dependent steps work. - (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; + // Quiesce the always-on frontend WASM pollers for the duration of this read. + // getOrCreateMultisigService + service.sync() + getAuthInfo run on the + // single-threaded mobile WASM; the balance poll (which bypasses the WASM + // lock), the claimable-notes SWR, and useSyncTrigger each re-fire every few + // seconds and would livelock this read so it never completes (a 90s eval + // budget timed out — it's contention, not slowness). `setTestSyncPaused` + // flips the shared flag those pollers check; the auth structure is immutable + // during the assertion, so a slightly stale read is still correct. Always + // restored in `finally` so later sync-dependent steps resume. + setTestSyncPaused(true); try { const [{ getOrCreateMultisigService }, { zustandProvider }] = await Promise.all([ import('lib/miden/front/guardian-manager'), @@ -754,7 +756,7 @@ if (process.env.MIDEN_E2E_TEST === 'true') { } catch (e) { return { error: e instanceof Error ? e.message : String(e) }; } finally { - (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = false; + setTestSyncPaused(false); } }; } From 775c9f1336a89c65648c88903e58aedc32a083b9 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sat, 27 Jun 2026 21:41:33 +0200 Subject: [PATCH 08/20] fix(e2e): guardian iOS auth read skips service.sync() to avoid the realign signing-loop (root cause) --- CHANGELOG.md | 2 +- src/lib/store/index.ts | 34 ++++++++++++++++------------------ 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3449ef7cf..6a7a7dfae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) -* [CHANGE][ci] **Guardian iOS E2E quiesces the always-on WASM pollers while it reads the on-chain auth structure.** The `verify_guardian_auth_structure` assertion calls `__TEST_GUARDIAN_AUTH__`, which builds the multisig service and reads it on the single-threaded mobile WASM. Three always-on frontend pollers — the 3s `useSyncTrigger` chain sync, the 5s balance poll (which deliberately bypasses the WASM lock, so holding the lock gave the read zero protection), and the 5s claimable-notes SWR — each re-fire every few seconds and *livelocked* the read so it never completed (neither a raised eval budget nor pausing `useSyncTrigger` alone helped — it was contention from all three, not slowness). A shared test-only `__TEST_SYNC_PAUSED__` flag (in `lib/miden/front/test-sync-pause`) now lets the hook pause all three pollers for the duration of the read and always clears it afterward, so the read runs against an idle main thread and completes in seconds. Production is unaffected — the flag is gated on `MIDEN_E2E_TEST` and tree-shaken out. (#302) +* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure without driving a transaction sync.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to call `MultisigService.sync()` before reading the structure. That runs the transaction-oriented `runSync` loop, whose realign path re-registers on the guardian (signing + HTTP round-trips) whenever the guardian's stored blob lags the post-consume on-chain state — and the hook's 8s `Promise.race` cap doesn't *cancel* it, so on the single-threaded mobile WASM the loop kept churning and starved the read for >90s (one run logged 44 `signWithHotKey` calls for this read vs. 26 for a full consume). The auth structure (signer set + procedure thresholds) is immutable — set at account creation, unchanged by consume/send — so the hook now just loads the account (`getOrCreateMultisigService`, the same bounded init the passing consume step already does) and reads it synchronously (`getAuthInfo`), with no sync. As defense-in-depth it also quiesces the always-on frontend WASM pollers (balance poll — which bypasses the WASM lock — claimable-notes SWR, `useSyncTrigger`) via a shared test-only `__TEST_SYNC_PAUSED__` flag for the duration of the load. Production is unaffected — the flag is gated on `MIDEN_E2E_TEST` and tree-shaken out. (#302) ## 1.15.2 (2026-06-22) diff --git a/src/lib/store/index.ts b/src/lib/store/index.ts index 0d383c4b0..0c3e7ae54 100644 --- a/src/lib/store/index.ts +++ b/src/lib/store/index.ts @@ -727,15 +727,22 @@ if (process.env.MIDEN_E2E_TEST === 'true') { // 3-key shape. Reads the cached front-end MultisigService; dynamic imports // avoid a static cycle (guardian-sync pulls in this store module). (globalThis as any).__TEST_GUARDIAN_AUTH__ = async (accountPublicKey: string) => { - // Quiesce the always-on frontend WASM pollers for the duration of this read. - // getOrCreateMultisigService + service.sync() + getAuthInfo run on the - // single-threaded mobile WASM; the balance poll (which bypasses the WASM - // lock), the claimable-notes SWR, and useSyncTrigger each re-fire every few - // seconds and would livelock this read so it never completes (a 90s eval - // budget timed out — it's contention, not slowness). `setTestSyncPaused` - // flips the shared flag those pollers check; the auth structure is immutable - // during the assertion, so a slightly stale read is still correct. Always - // restored in `finally` so later sync-dependent steps resume. + // Read-only inspection of the on-chain auth structure (signers + procedure + // thresholds). It must NOT drive `service.sync()`: that runs the + // transaction-oriented runSync loop, whose realign path re-registers on the + // guardian (signing + HTTP round-trips) when the guardian's blob lags the + // post-consume on-chain state. On the single-threaded mobile WASM that loop + // is uncancellable and starves this read for >90s (a prior run logged 44 + // `signWithHotKey` calls here vs. 26 for a full consume). The auth structure + // is immutable — set at account creation, unchanged by consume/send — so + // `getOrCreateMultisigService` (which loads it via `client.load`, the same + // bounded init the passing consume step already does) plus a synchronous + // `getAuthInfo()` read is both correct and sufficient. No sync needed. + // + // We still quiesce the always-on frontend WASM pollers (balance poll — + // which bypasses the WASM lock — claimable-notes SWR, useSyncTrigger) around + // the load so the single-threaded init runs unobstructed; always restored in + // `finally`. Gated on MIDEN_E2E_TEST, tree-shaken from production. setTestSyncPaused(true); try { const [{ getOrCreateMultisigService }, { zustandProvider }] = await Promise.all([ @@ -743,15 +750,6 @@ if (process.env.MIDEN_E2E_TEST === 'true') { import('lib/miden/front/guardian-sync') ]); const service = await getOrCreateMultisigService(accountPublicKey, zustandProvider); - try { - // Best-effort refresh of on-chain state before reading. service.sync() - // takes the global WASM lock; with the background sync paused above it - // gets the lock cleanly. Still capped so a slow network can't stall the - // read past the eval budget — the structure is immutable here anyway. - await Promise.race([service.sync(), new Promise(resolve => setTimeout(resolve, 8_000))]); - } catch { - // best-effort — fall back to last-synced state - } return service.getAuthInfo(); } catch (e) { return { error: e instanceof Error ? e.message : String(e) }; From 9d14e6f083791f2cb81a20933c71a02b0eaa4f70 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sat, 27 Jun 2026 23:17:50 +0200 Subject: [PATCH 09/20] fix(e2e): read guardian auth structure via AccountInspector (pure storage parse) to avoid the OZ multisig load signing-loop --- CHANGELOG.md | 2 +- playwright/e2e/ios/helpers/ios-wallet-page.ts | 15 ++---- src/lib/miden/front/balance.ts | 8 +-- src/lib/miden/front/claimable-notes.ts | 5 -- src/lib/miden/front/test-sync-pause.test.ts | 44 ---------------- src/lib/miden/front/test-sync-pause.ts | 35 ------------- src/lib/miden/front/useSyncTrigger.test.tsx | 46 ---------------- src/lib/miden/front/useSyncTrigger.ts | 5 +- src/lib/store/index.ts | 52 +++++++++---------- 9 files changed, 34 insertions(+), 178 deletions(-) delete mode 100644 src/lib/miden/front/test-sync-pause.test.ts delete mode 100644 src/lib/miden/front/test-sync-pause.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a7a7dfae..6bbd0f1cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) -* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure without driving a transaction sync.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to call `MultisigService.sync()` before reading the structure. That runs the transaction-oriented `runSync` loop, whose realign path re-registers on the guardian (signing + HTTP round-trips) whenever the guardian's stored blob lags the post-consume on-chain state — and the hook's 8s `Promise.race` cap doesn't *cancel* it, so on the single-threaded mobile WASM the loop kept churning and starved the read for >90s (one run logged 44 `signWithHotKey` calls for this read vs. 26 for a full consume). The auth structure (signer set + procedure thresholds) is immutable — set at account creation, unchanged by consume/send — so the hook now just loads the account (`getOrCreateMultisigService`, the same bounded init the passing consume step already does) and reads it synchronously (`getAuthInfo`), with no sync. As defense-in-depth it also quiesces the always-on frontend WASM pollers (balance poll — which bypasses the WASM lock — claimable-notes SWR, `useSyncTrigger`) via a shared test-only `__TEST_SYNC_PAUSED__` flag for the duration of the load. Production is unaffected — the flag is gated on `MIDEN_E2E_TEST` and tree-shaken out. (#302) +* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a pure storage parse instead of loading the multisig client.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to build a `MultisigService` (`getOrCreateMultisigService` → `MultisigClient.load`) and read it. Against the post-consume state — where the guardian's stored blob lags the on-chain account — that load entered a re-sign/realign loop (~48 `signWithHotKey` calls for this read vs. 26 for a full consume) that hung the single-threaded mobile WASM past the eval budget; the assertion never got far enough to run on iOS. The structure (signer set + procedure thresholds) is immutable and lives in the account's storage maps, so the hook now reads it directly with `AccountInspector.fromAccount` — a pure parse with no signing, no guardian HTTP, and no client load (just one `getAccount`, the same read the balance poll already does). Test-only, gated on `MIDEN_E2E_TEST`. (#302) ## 1.15.2 (2026-06-22) diff --git a/playwright/e2e/ios/helpers/ios-wallet-page.ts b/playwright/e2e/ios/helpers/ios-wallet-page.ts index 4319877ee..c3de28376 100644 --- a/playwright/e2e/ios/helpers/ios-wallet-page.ts +++ b/playwright/e2e/ios/helpers/ios-wallet-page.ts @@ -599,15 +599,9 @@ export class IosWalletPage implements WalletPage { * __TEST_GUARDIAN_AUTH__ hook the Chrome POM uses, but over the async CDP * atom: the hook awaits getOrCreateMultisigService + a best-effort * (time-bounded) sync, so it returns a Promise and must run under - * execute_async_script. - * - * Budget: 90s, not the 30s evalAsync default. The hook pauses the background - * sync while it reads (so it's no longer starved — see __TEST_GUARDIAN_AUTH__), - * but a sync already in flight when the read starts still holds the WASM lock - * until it releases, and on the slow iOS runners that wait plus the service - * build/sign can exceed 30s. 90s comfortably covers waiting out one in-flight - * sync; with the pause in place the read completes well within it rather than - * hanging indefinitely. + * execute_async_script. The hook itself caps its internal sync at 8s, so the + * 30s evalAsync budget is comfortable even when the background sync holds the + * WASM lock. */ async getGuardianAuthInfo(accountPublicKey: string): Promise { return this.cdp.evalAsync( @@ -631,8 +625,7 @@ export class IosWalletPage implements WalletPage { procedureThresholds: {}, error: String(e && e.message ? e.message : e) }); - });`, - { timeoutMs: 90_000 } + });` ); } diff --git a/src/lib/miden/front/balance.ts b/src/lib/miden/front/balance.ts index 6871e4dd6..159804f04 100644 --- a/src/lib/miden/front/balance.ts +++ b/src/lib/miden/front/balance.ts @@ -6,7 +6,6 @@ import { useWalletStore } from 'lib/store'; import { fetchBalances } from 'lib/store/utils/fetchBalances'; import { AssetMetadata, MIDEN_METADATA } from '../metadata'; -import { isTestSyncPaused } from './test-sync-pause'; export interface TokenBalanceData { tokenId: string; @@ -141,12 +140,9 @@ export function useAllBalances(address: string, tokenMetadatas: Record { - if (mountedRef.current && !isTestSyncPaused()) { + if (mountedRef.current) { fetchBalancesWithDeduping(); } }, REFRESH_INTERVAL); diff --git a/src/lib/miden/front/claimable-notes.ts b/src/lib/miden/front/claimable-notes.ts index 8c41fe7fd..402d3d56d 100644 --- a/src/lib/miden/front/claimable-notes.ts +++ b/src/lib/miden/front/claimable-notes.ts @@ -15,7 +15,6 @@ import { getBech32AddressFromAccountId } from '../sdk/helpers'; import { getMidenClient, runWhenClientIdle, withWasmClientLock } from '../sdk/miden-client'; import { ConsumableNote, NoteTypeEnum } from '../types'; import { useTokensMetadata } from './assets'; -import { isTestSyncPaused } from './test-sync-pause'; // Debug info for iOS troubleshooting export type ClaimableNotesDebugInfo = { @@ -298,10 +297,6 @@ function useLocalClaimableNotes(publicAddress: string, enabled: boolean) { revalidateOnFocus: false, dedupingInterval: 10_000, refreshInterval: 5_000, - // Lets an E2E hook quiesce this (heavy, WASM-lock-bound) poll while it does - // its own single-threaded-WASM read; otherwise the read is livelocked on - // mobile by the 5s re-fire. No-op in production (tree-shaken). - isPaused: () => isTestSyncPaused(), onError: e => { console.error('Error fetching claimable notes:', e); debugInfoRef.current = { diff --git a/src/lib/miden/front/test-sync-pause.test.ts b/src/lib/miden/front/test-sync-pause.test.ts deleted file mode 100644 index 6be3977c4..000000000 --- a/src/lib/miden/front/test-sync-pause.test.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { isTestSyncPaused, setTestSyncPaused } from './test-sync-pause'; - -type FlagGlobal = { __TEST_SYNC_PAUSED__?: boolean }; - -describe('test-sync-pause', () => { - const prevEnv = process.env.MIDEN_E2E_TEST; - - afterEach(() => { - delete (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__; - process.env.MIDEN_E2E_TEST = prevEnv; - }); - - it('isTestSyncPaused returns true only when the E2E build flag and the pause flag are both set', () => { - process.env.MIDEN_E2E_TEST = 'true'; - (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__ = true; - expect(isTestSyncPaused()).toBe(true); - }); - - it('isTestSyncPaused returns false when the pause flag is unset', () => { - process.env.MIDEN_E2E_TEST = 'true'; - delete (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__; - expect(isTestSyncPaused()).toBe(false); - }); - - it('isTestSyncPaused returns false off the E2E build even if the pause flag is set', () => { - process.env.MIDEN_E2E_TEST = 'false'; - (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__ = true; - expect(isTestSyncPaused()).toBe(false); - }); - - it('setTestSyncPaused toggles the flag on the E2E build', () => { - process.env.MIDEN_E2E_TEST = 'true'; - setTestSyncPaused(true); - expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBe(true); - setTestSyncPaused(false); - expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBe(false); - }); - - it('setTestSyncPaused is a no-op off the E2E build', () => { - process.env.MIDEN_E2E_TEST = 'false'; - setTestSyncPaused(true); - expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBeUndefined(); - }); -}); diff --git a/src/lib/miden/front/test-sync-pause.ts b/src/lib/miden/front/test-sync-pause.ts deleted file mode 100644 index 4661aea38..000000000 --- a/src/lib/miden/front/test-sync-pause.ts +++ /dev/null @@ -1,35 +0,0 @@ -/** - * E2E-only background-sync pause. - * - * On mobile the Miden WASM client is single-threaded (main thread). A test that - * needs to make its own WASM-lock-bound read (currently `__TEST_GUARDIAN_AUTH__` - * reading a Guardian account's on-chain auth structure) is otherwise livelocked - * by the wallet's always-on frontend pollers, which each re-fire every few - * seconds and keep the single WASM thread saturated: - * - `useSyncTrigger` (3s chain sync) - * - the balance poll (`fetchBalances`, 5s) — which deliberately bypasses - * `withWasmClientLock`, so holding that lock gives the read zero protection - * - the claimable-notes SWR (`getConsumableNotes`, 5s) - * - * A test hook sets `__TEST_SYNC_PAUSED__` for the duration of its read; every - * one of those pollers checks `isTestSyncPaused()` and skips a cycle while it is - * set, so the read runs against an idle main thread and completes in seconds. - * - * Zero production impact: `MIDEN_E2E_TEST` is statically replaced with `'false'` - * in production builds, so these helpers short-circuit and the global lookup is - * dead-code-eliminated. This module is intentionally dependency-free so any - * layer (front hooks, store) can import it without a cycle. - */ - -interface TestSyncPauseGlobal { - __TEST_SYNC_PAUSED__?: boolean; -} - -export function isTestSyncPaused(): boolean { - return process.env.MIDEN_E2E_TEST === 'true' && (globalThis as TestSyncPauseGlobal).__TEST_SYNC_PAUSED__ === true; -} - -export function setTestSyncPaused(paused: boolean): void { - if (process.env.MIDEN_E2E_TEST !== 'true') return; - (globalThis as TestSyncPauseGlobal).__TEST_SYNC_PAUSED__ = paused; -} diff --git a/src/lib/miden/front/useSyncTrigger.test.tsx b/src/lib/miden/front/useSyncTrigger.test.tsx index 2e12a71c0..6fe6d4f6e 100644 --- a/src/lib/miden/front/useSyncTrigger.test.tsx +++ b/src/lib/miden/front/useSyncTrigger.test.tsx @@ -188,52 +188,6 @@ describe('useSyncTrigger', () => { expect(mockSyncState).not.toHaveBeenCalled(); }); - it('extension: skips SyncRequest while a test pauses sync via __TEST_SYNC_PAUSED__', async () => { - const prevEnv = process.env.MIDEN_E2E_TEST; - process.env.MIDEN_E2E_TEST = 'true'; - (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; - mockIsExtension.mockReturnValue(true); - - const { unmount } = render(); - - await flush(); - expect(mockIntercomRequest).not.toHaveBeenCalled(); - - unmount(); - delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; - process.env.MIDEN_E2E_TEST = prevEnv; - }); - - it('mobile/desktop: skips syncState while a test pauses sync via __TEST_SYNC_PAUSED__', async () => { - const prevEnv = process.env.MIDEN_E2E_TEST; - process.env.MIDEN_E2E_TEST = 'true'; - (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; - - const { unmount } = render(); - - await flush(); - expect(mockSyncState).not.toHaveBeenCalled(); - - unmount(); - delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; - process.env.MIDEN_E2E_TEST = prevEnv; - }); - - it('does not pause sync when __TEST_SYNC_PAUSED__ is set but MIDEN_E2E_TEST is off (production)', async () => { - const prevEnv = process.env.MIDEN_E2E_TEST; - process.env.MIDEN_E2E_TEST = 'false'; - (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; - - const { unmount } = render(); - - // The flag is ignored off the E2E build, so the normal mobile sync still runs. - await waitFor(() => expect(mockSyncState).toHaveBeenCalled()); - - unmount(); - delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; - process.env.MIDEN_E2E_TEST = prevEnv; - }); - it('extension: clears the interval on unmount', async () => { jest.useFakeTimers(); mockIsExtension.mockReturnValue(true); diff --git a/src/lib/miden/front/useSyncTrigger.ts b/src/lib/miden/front/useSyncTrigger.ts index 3da3c68b0..5471a2131 100644 --- a/src/lib/miden/front/useSyncTrigger.ts +++ b/src/lib/miden/front/useSyncTrigger.ts @@ -9,12 +9,11 @@ import { getIntercom, useWalletStore } from 'lib/store'; import { WalletType } from 'screens/onboarding/types'; import { syncGuardianAccounts } from './guardian-sync'; -import { isTestSyncPaused } from './test-sync-pause'; const SYNC_INTERVAL_MS = 3_000; function triggerSync(intercom: ReturnType) { - if (isInsideSendFlow() || isTestSyncPaused()) return; + if (isInsideSendFlow()) return; intercom .request({ type: WalletMessageType.SyncRequest }) .then(() => { @@ -98,7 +97,7 @@ export function useSyncTrigger() { const mobileTxModalOpen = isMobile() && storeState.isTransactionModalOpen; const inSendFlow = isInsideSendFlow(); - if (!onGeneratingTxPage && !mobileTxModalOpen && !inSendFlow && !isTestSyncPaused()) { + if (!onGeneratingTxPage && !mobileTxModalOpen && !inSendFlow) { useWalletStore.getState().setSyncStatus(true); try { await withWasmClientLock(async () => { diff --git a/src/lib/store/index.ts b/src/lib/store/index.ts index 0c3e7ae54..4d6adad94 100644 --- a/src/lib/store/index.ts +++ b/src/lib/store/index.ts @@ -3,7 +3,6 @@ import { subscribeWithSelector } from 'zustand/middleware'; import { createIntercomClient, IIntercomClient } from 'lib/intercom/client'; import { clearPersistedSeenNoteIds, persistSeenNoteIds } from 'lib/miden/back/note-checker-storage'; -import { setTestSyncPaused } from 'lib/miden/front/test-sync-pause'; import { fetchTokenMetadata } from 'lib/miden/metadata'; import { MidenMessageType, MidenState } from 'lib/miden/types'; import { isExtension } from 'lib/platform'; @@ -724,37 +723,36 @@ if (process.env.MIDEN_E2E_TEST === 'true') { // Guardian on-chain auth structure (overall threshold + signer set + procedure // thresholds) for E2E assertions — the harness's balance checks can't see the - // 3-key shape. Reads the cached front-end MultisigService; dynamic imports - // avoid a static cycle (guardian-sync pulls in this store module). + // 3-key shape. Dynamic imports avoid a static cycle. (globalThis as any).__TEST_GUARDIAN_AUTH__ = async (accountPublicKey: string) => { - // Read-only inspection of the on-chain auth structure (signers + procedure - // thresholds). It must NOT drive `service.sync()`: that runs the - // transaction-oriented runSync loop, whose realign path re-registers on the - // guardian (signing + HTTP round-trips) when the guardian's blob lags the - // post-consume on-chain state. On the single-threaded mobile WASM that loop - // is uncancellable and starves this read for >90s (a prior run logged 44 - // `signWithHotKey` calls here vs. 26 for a full consume). The auth structure - // is immutable — set at account creation, unchanged by consume/send — so - // `getOrCreateMultisigService` (which loads it via `client.load`, the same - // bounded init the passing consume step already does) plus a synchronous - // `getAuthInfo()` read is both correct and sufficient. No sync needed. - // - // We still quiesce the always-on frontend WASM pollers (balance poll — - // which bypasses the WASM lock — claimable-notes SWR, useSyncTrigger) around - // the load so the single-threaded init runs unobstructed; always restored in - // `finally`. Gated on MIDEN_E2E_TEST, tree-shaken from production. - setTestSyncPaused(true); + // Read the structure with a PURE storage parse (`AccountInspector.fromAccount`), + // not the transaction-oriented MultisigService. Going through + // `getOrCreateMultisigService` → `MultisigClient.load` drove a re-sign/realign + // loop (~48 `signWithHotKey` calls vs. 26 for a full consume) when loading + // against the post-consume state where the guardian's stored blob lags the + // on-chain account — on the single-threaded mobile WASM that loop hung the + // read past the eval budget. The inspector only reads the account's storage + // maps (signers, threshold_config, procedure_thresholds): no signing, no + // guardian HTTP, no load. A single `getAccount` (the same read the balance + // poll already does) plus the parse is cheap and correct — the structure is + // immutable. try { - const [{ getOrCreateMultisigService }, { zustandProvider }] = await Promise.all([ - import('lib/miden/front/guardian-manager'), - import('lib/miden/front/guardian-sync') + const [{ AccountInspector }, { getMidenClient, withWasmClientLock }] = await Promise.all([ + import('@openzeppelin/miden-multisig-client'), + import('lib/miden/sdk/miden-client') ]); - const service = await getOrCreateMultisigService(accountPublicKey, zustandProvider); - return service.getAuthInfo(); + const account = await withWasmClientLock(async () => (await getMidenClient()).getAccount(accountPublicKey)); + if (!account) { + return { error: `Guardian account ${accountPublicKey} not found in local client` }; + } + const config = AccountInspector.fromAccount(account); + return { + threshold: config.threshold, + signerCommitments: config.signerCommitments, + procedureThresholds: Object.fromEntries(config.procedureThresholds) + }; } catch (e) { return { error: e instanceof Error ? e.message : String(e) }; - } finally { - setTestSyncPaused(false); } }; } From fd892ce68c92e5f9e10d21cc88bed07d674f56b4 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sun, 28 Jun 2026 00:33:15 +0200 Subject: [PATCH 10/20] fix(e2e): unblock guardian iOS auth read (skip wallet mutex on getAccount, 60s eval budget) + widen iOS wallet-create timeouts --- playwright/e2e/ios/helpers/ios-wallet-page.ts | 37 ++++++++----------- src/lib/store/index.ts | 9 ++++- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/playwright/e2e/ios/helpers/ios-wallet-page.ts b/playwright/e2e/ios/helpers/ios-wallet-page.ts index c3de28376..80f1c353a 100644 --- a/playwright/e2e/ios/helpers/ios-wallet-page.ts +++ b/playwright/e2e/ios/helpers/ios-wallet-page.ts @@ -139,9 +139,11 @@ export class IosWalletPage implements WalletPage { await this.pollForSelector('[data-testid="onboarding-welcome"]', 30_000); const passwordEnc = encodeURIComponent(password); - // Guardian account creation does extra HTTP round-trips to co-sign with the - // guardian, so it needs a wider readiness window than a private wallet. - const readyTimeoutMs = recovery === 'guardian' ? 180_000 : 120_000; + // Guardian creation does extra guardian co-sign round-trips, so it gets the + // wider window — but both paths run generously because cold WASM init + + // account creation on the macos-26 simulator can exceed a minute under load + // (a standard create was observed passing 120s). + const readyTimeoutMs = recovery === 'guardian' ? 240_000 : 180_000; await this.cdp.eval( `var u = new URL(location.href); ` + `u.searchParams.set('__test_skip_onboarding', '1'); ` + @@ -295,10 +297,7 @@ export class IosWalletPage implements WalletPage { // ── Claim ───────────────────────────────────────────────────────────────── - async claimAllNotes( - timeoutMs: number = 120_000, - knownFaucetIds: string[] = [] - ): Promise { + async claimAllNotes(timeoutMs: number = 120_000, knownFaucetIds: string[] = []): Promise { // Chrome's claimAllNotes reloads the page to get a fresh Dexie handle // — that's safe on Chrome because the SW holds the vault unlock in a // separate context. On mobile there's no SW; a reload would drop the @@ -440,20 +439,9 @@ export class IosWalletPage implements WalletPage { return; } const result = await this.cdp - .eval<{ before: string[]; injected: string[]; after: string[] } | { error: string }>( - `var conv = window.__TEST_HEX_TO_BECH32_FAUCET__; ` + - `var bech32 = ${hexJson}.map(hex => conv(hex, ${networkArg})); ` + - `var injected = {}; ` + - `for (var i = 0; i < bech32.length; i++) injected[bech32[i]] = { name: 'Test Token', symbol: 'TST', decimals: 8, thumbnailUri: '' }; ` + - `var s = window.__TEST_STORE__; ` + - `if (!s) return { error: 'no __TEST_STORE__' }; ` + - `var st = s.getState(); ` + - `var before = Object.keys(st.assetsMetadata || {}); ` + - `if (typeof st.setAssetsMetadata === 'function') { st.setAssetsMetadata(injected); } ` + - `else { s.setState({ assetsMetadata: Object.assign({}, st.assetsMetadata || {}, injected) }); } ` + - `var after = Object.keys(s.getState().assetsMetadata || {}); ` + - `return { before: before, injected: bech32, after: after };` - ) + .eval< + { before: string[]; injected: string[]; after: string[] } | { error: string } + >(`var conv = window.__TEST_HEX_TO_BECH32_FAUCET__; ` + `var bech32 = ${hexJson}.map(hex => conv(hex, ${networkArg})); ` + `var injected = {}; ` + `for (var i = 0; i < bech32.length; i++) injected[bech32[i]] = { name: 'Test Token', symbol: 'TST', decimals: 8, thumbnailUri: '' }; ` + `var s = window.__TEST_STORE__; ` + `if (!s) return { error: 'no __TEST_STORE__' }; ` + `var st = s.getState(); ` + `var before = Object.keys(st.assetsMetadata || {}); ` + `if (typeof st.setAssetsMetadata === 'function') { st.setAssetsMetadata(injected); } ` + `else { s.setState({ assetsMetadata: Object.assign({}, st.assetsMetadata || {}, injected) }); } ` + `var after = Object.keys(s.getState().assetsMetadata || {}); ` + `return { before: before, injected: bech32, after: after };`) .catch((e: Error) => ({ error: e.message })); // eslint-disable-next-line no-console console.log(`[injectTestMetadataForFaucets] hex=${hexJson} -> ${JSON.stringify(result)}`); @@ -625,7 +613,12 @@ export class IosWalletPage implements WalletPage { procedureThresholds: {}, error: String(e && e.message ? e.message : e) }); - });` + });`, + // The read is a pure storage parse (no signing/load), but its one + // `getAccount` can still queue behind in-flight WASM work on the loaded + // single-threaded iOS runner — 60s clears that without the 30s default + // tripping. + { timeoutMs: 60_000 } ); } diff --git a/src/lib/store/index.ts b/src/lib/store/index.ts index 4d6adad94..79dd55c19 100644 --- a/src/lib/store/index.ts +++ b/src/lib/store/index.ts @@ -737,11 +737,16 @@ if (process.env.MIDEN_E2E_TEST === 'true') { // poll already does) plus the parse is cheap and correct — the structure is // immutable. try { - const [{ AccountInspector }, { getMidenClient, withWasmClientLock }] = await Promise.all([ + const [{ AccountInspector }, { getMidenClient }] = await Promise.all([ import('@openzeppelin/miden-multisig-client'), import('lib/miden/sdk/miden-client') ]); - const account = await withWasmClientLock(async () => (await getMidenClient()).getAccount(accountPublicKey)); + // `getAccount` is serialized internally by the SDK (`_serializeWasmCall`), + // so it's read-safe without the wallet mutex — and skipping the mutex (the + // same deliberate bypass the balance poll uses) keeps this read from + // waiting out a `useSyncTrigger` sync that's holding the lock for tens of + // seconds on the single-threaded mobile WASM. + const account = await (await getMidenClient()).getAccount(accountPublicKey); if (!account) { return { error: `Guardian account ${accountPublicKey} not found in local client` }; } From a2e1e0cb6e2830078fdcfcae7ef65f345780cfe3 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sun, 28 Jun 2026 01:52:26 +0200 Subject: [PATCH 11/20] fix(e2e): quiesce frontend WASM pollers during guardian iOS auth read so the lone getAccount isn't queued behind a slow sync --- CHANGELOG.md | 2 +- src/lib/miden/front/balance.ts | 8 +++- src/lib/miden/front/claimable-notes.ts | 5 +++ src/lib/miden/front/test-sync-pause.test.ts | 44 ++++++++++++++++++++ src/lib/miden/front/test-sync-pause.ts | 35 ++++++++++++++++ src/lib/miden/front/useSyncTrigger.test.tsx | 46 +++++++++++++++++++++ src/lib/miden/front/useSyncTrigger.ts | 5 ++- src/lib/store/index.ts | 16 ++++--- 8 files changed, 151 insertions(+), 10 deletions(-) create mode 100644 src/lib/miden/front/test-sync-pause.test.ts create mode 100644 src/lib/miden/front/test-sync-pause.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bbd0f1cf..68812c928 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) -* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a pure storage parse instead of loading the multisig client.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to build a `MultisigService` (`getOrCreateMultisigService` → `MultisigClient.load`) and read it. Against the post-consume state — where the guardian's stored blob lags the on-chain account — that load entered a re-sign/realign loop (~48 `signWithHotKey` calls for this read vs. 26 for a full consume) that hung the single-threaded mobile WASM past the eval budget; the assertion never got far enough to run on iOS. The structure (signer set + procedure thresholds) is immutable and lives in the account's storage maps, so the hook now reads it directly with `AccountInspector.fromAccount` — a pure parse with no signing, no guardian HTTP, and no client load (just one `getAccount`, the same read the balance poll already does). Test-only, gated on `MIDEN_E2E_TEST`. (#302) +* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a pure storage parse instead of loading the multisig client.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to build a `MultisigService` (`getOrCreateMultisigService` → `MultisigClient.load`) and read it. Against the post-consume state — where the guardian's stored blob lags the on-chain account — that load entered a re-sign/realign loop (~48 `signWithHotKey` calls for this read vs. 26 for a full consume) that hung the single-threaded mobile WASM past the eval budget; the assertion never got far enough to run on iOS. The structure (signer set + procedure thresholds) is immutable and lives in the account's storage maps, so the hook now reads it directly with `AccountInspector.fromAccount` — a pure parse with no signing, no guardian HTTP, and no client load (just one `getAccount`, the same read the balance poll already does). Because even that lone `getAccount` can queue behind an in-flight background `syncState` on the single-threaded mobile WASM, the read also briefly quiesces the always-on frontend WASM pollers (`useSyncTrigger`, the balance poll, the claimable-notes SWR) via a shared `__TEST_SYNC_PAUSED__` flag. Test-only, gated on `MIDEN_E2E_TEST` and tree-shaken from production. (#302) ## 1.15.2 (2026-06-22) diff --git a/src/lib/miden/front/balance.ts b/src/lib/miden/front/balance.ts index 159804f04..6871e4dd6 100644 --- a/src/lib/miden/front/balance.ts +++ b/src/lib/miden/front/balance.ts @@ -6,6 +6,7 @@ import { useWalletStore } from 'lib/store'; import { fetchBalances } from 'lib/store/utils/fetchBalances'; import { AssetMetadata, MIDEN_METADATA } from '../metadata'; +import { isTestSyncPaused } from './test-sync-pause'; export interface TokenBalanceData { tokenId: string; @@ -140,9 +141,12 @@ export function useAllBalances(address: string, tokenMetadatas: Record { - if (mountedRef.current) { + if (mountedRef.current && !isTestSyncPaused()) { fetchBalancesWithDeduping(); } }, REFRESH_INTERVAL); diff --git a/src/lib/miden/front/claimable-notes.ts b/src/lib/miden/front/claimable-notes.ts index 402d3d56d..8c41fe7fd 100644 --- a/src/lib/miden/front/claimable-notes.ts +++ b/src/lib/miden/front/claimable-notes.ts @@ -15,6 +15,7 @@ import { getBech32AddressFromAccountId } from '../sdk/helpers'; import { getMidenClient, runWhenClientIdle, withWasmClientLock } from '../sdk/miden-client'; import { ConsumableNote, NoteTypeEnum } from '../types'; import { useTokensMetadata } from './assets'; +import { isTestSyncPaused } from './test-sync-pause'; // Debug info for iOS troubleshooting export type ClaimableNotesDebugInfo = { @@ -297,6 +298,10 @@ function useLocalClaimableNotes(publicAddress: string, enabled: boolean) { revalidateOnFocus: false, dedupingInterval: 10_000, refreshInterval: 5_000, + // Lets an E2E hook quiesce this (heavy, WASM-lock-bound) poll while it does + // its own single-threaded-WASM read; otherwise the read is livelocked on + // mobile by the 5s re-fire. No-op in production (tree-shaken). + isPaused: () => isTestSyncPaused(), onError: e => { console.error('Error fetching claimable notes:', e); debugInfoRef.current = { diff --git a/src/lib/miden/front/test-sync-pause.test.ts b/src/lib/miden/front/test-sync-pause.test.ts new file mode 100644 index 000000000..6be3977c4 --- /dev/null +++ b/src/lib/miden/front/test-sync-pause.test.ts @@ -0,0 +1,44 @@ +import { isTestSyncPaused, setTestSyncPaused } from './test-sync-pause'; + +type FlagGlobal = { __TEST_SYNC_PAUSED__?: boolean }; + +describe('test-sync-pause', () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + + afterEach(() => { + delete (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + + it('isTestSyncPaused returns true only when the E2E build flag and the pause flag are both set', () => { + process.env.MIDEN_E2E_TEST = 'true'; + (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__ = true; + expect(isTestSyncPaused()).toBe(true); + }); + + it('isTestSyncPaused returns false when the pause flag is unset', () => { + process.env.MIDEN_E2E_TEST = 'true'; + delete (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__; + expect(isTestSyncPaused()).toBe(false); + }); + + it('isTestSyncPaused returns false off the E2E build even if the pause flag is set', () => { + process.env.MIDEN_E2E_TEST = 'false'; + (globalThis as FlagGlobal).__TEST_SYNC_PAUSED__ = true; + expect(isTestSyncPaused()).toBe(false); + }); + + it('setTestSyncPaused toggles the flag on the E2E build', () => { + process.env.MIDEN_E2E_TEST = 'true'; + setTestSyncPaused(true); + expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBe(true); + setTestSyncPaused(false); + expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBe(false); + }); + + it('setTestSyncPaused is a no-op off the E2E build', () => { + process.env.MIDEN_E2E_TEST = 'false'; + setTestSyncPaused(true); + expect((globalThis as FlagGlobal).__TEST_SYNC_PAUSED__).toBeUndefined(); + }); +}); diff --git a/src/lib/miden/front/test-sync-pause.ts b/src/lib/miden/front/test-sync-pause.ts new file mode 100644 index 000000000..4661aea38 --- /dev/null +++ b/src/lib/miden/front/test-sync-pause.ts @@ -0,0 +1,35 @@ +/** + * E2E-only background-sync pause. + * + * On mobile the Miden WASM client is single-threaded (main thread). A test that + * needs to make its own WASM-lock-bound read (currently `__TEST_GUARDIAN_AUTH__` + * reading a Guardian account's on-chain auth structure) is otherwise livelocked + * by the wallet's always-on frontend pollers, which each re-fire every few + * seconds and keep the single WASM thread saturated: + * - `useSyncTrigger` (3s chain sync) + * - the balance poll (`fetchBalances`, 5s) — which deliberately bypasses + * `withWasmClientLock`, so holding that lock gives the read zero protection + * - the claimable-notes SWR (`getConsumableNotes`, 5s) + * + * A test hook sets `__TEST_SYNC_PAUSED__` for the duration of its read; every + * one of those pollers checks `isTestSyncPaused()` and skips a cycle while it is + * set, so the read runs against an idle main thread and completes in seconds. + * + * Zero production impact: `MIDEN_E2E_TEST` is statically replaced with `'false'` + * in production builds, so these helpers short-circuit and the global lookup is + * dead-code-eliminated. This module is intentionally dependency-free so any + * layer (front hooks, store) can import it without a cycle. + */ + +interface TestSyncPauseGlobal { + __TEST_SYNC_PAUSED__?: boolean; +} + +export function isTestSyncPaused(): boolean { + return process.env.MIDEN_E2E_TEST === 'true' && (globalThis as TestSyncPauseGlobal).__TEST_SYNC_PAUSED__ === true; +} + +export function setTestSyncPaused(paused: boolean): void { + if (process.env.MIDEN_E2E_TEST !== 'true') return; + (globalThis as TestSyncPauseGlobal).__TEST_SYNC_PAUSED__ = paused; +} diff --git a/src/lib/miden/front/useSyncTrigger.test.tsx b/src/lib/miden/front/useSyncTrigger.test.tsx index 6fe6d4f6e..2e12a71c0 100644 --- a/src/lib/miden/front/useSyncTrigger.test.tsx +++ b/src/lib/miden/front/useSyncTrigger.test.tsx @@ -188,6 +188,52 @@ describe('useSyncTrigger', () => { expect(mockSyncState).not.toHaveBeenCalled(); }); + it('extension: skips SyncRequest while a test pauses sync via __TEST_SYNC_PAUSED__', async () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + process.env.MIDEN_E2E_TEST = 'true'; + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; + mockIsExtension.mockReturnValue(true); + + const { unmount } = render(); + + await flush(); + expect(mockIntercomRequest).not.toHaveBeenCalled(); + + unmount(); + delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + + it('mobile/desktop: skips syncState while a test pauses sync via __TEST_SYNC_PAUSED__', async () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + process.env.MIDEN_E2E_TEST = 'true'; + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; + + const { unmount } = render(); + + await flush(); + expect(mockSyncState).not.toHaveBeenCalled(); + + unmount(); + delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + + it('does not pause sync when __TEST_SYNC_PAUSED__ is set but MIDEN_E2E_TEST is off (production)', async () => { + const prevEnv = process.env.MIDEN_E2E_TEST; + process.env.MIDEN_E2E_TEST = 'false'; + (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__ = true; + + const { unmount } = render(); + + // The flag is ignored off the E2E build, so the normal mobile sync still runs. + await waitFor(() => expect(mockSyncState).toHaveBeenCalled()); + + unmount(); + delete (globalThis as { __TEST_SYNC_PAUSED__?: boolean }).__TEST_SYNC_PAUSED__; + process.env.MIDEN_E2E_TEST = prevEnv; + }); + it('extension: clears the interval on unmount', async () => { jest.useFakeTimers(); mockIsExtension.mockReturnValue(true); diff --git a/src/lib/miden/front/useSyncTrigger.ts b/src/lib/miden/front/useSyncTrigger.ts index 5471a2131..3da3c68b0 100644 --- a/src/lib/miden/front/useSyncTrigger.ts +++ b/src/lib/miden/front/useSyncTrigger.ts @@ -9,11 +9,12 @@ import { getIntercom, useWalletStore } from 'lib/store'; import { WalletType } from 'screens/onboarding/types'; import { syncGuardianAccounts } from './guardian-sync'; +import { isTestSyncPaused } from './test-sync-pause'; const SYNC_INTERVAL_MS = 3_000; function triggerSync(intercom: ReturnType) { - if (isInsideSendFlow()) return; + if (isInsideSendFlow() || isTestSyncPaused()) return; intercom .request({ type: WalletMessageType.SyncRequest }) .then(() => { @@ -97,7 +98,7 @@ export function useSyncTrigger() { const mobileTxModalOpen = isMobile() && storeState.isTransactionModalOpen; const inSendFlow = isInsideSendFlow(); - if (!onGeneratingTxPage && !mobileTxModalOpen && !inSendFlow) { + if (!onGeneratingTxPage && !mobileTxModalOpen && !inSendFlow && !isTestSyncPaused()) { useWalletStore.getState().setSyncStatus(true); try { await withWasmClientLock(async () => { diff --git a/src/lib/store/index.ts b/src/lib/store/index.ts index 79dd55c19..856a1b156 100644 --- a/src/lib/store/index.ts +++ b/src/lib/store/index.ts @@ -3,6 +3,7 @@ import { subscribeWithSelector } from 'zustand/middleware'; import { createIntercomClient, IIntercomClient } from 'lib/intercom/client'; import { clearPersistedSeenNoteIds, persistSeenNoteIds } from 'lib/miden/back/note-checker-storage'; +import { setTestSyncPaused } from 'lib/miden/front/test-sync-pause'; import { fetchTokenMetadata } from 'lib/miden/metadata'; import { MidenMessageType, MidenState } from 'lib/miden/types'; import { isExtension } from 'lib/platform'; @@ -736,16 +737,19 @@ if (process.env.MIDEN_E2E_TEST === 'true') { // guardian HTTP, no load. A single `getAccount` (the same read the balance // poll already does) plus the parse is cheap and correct — the structure is // immutable. + // The read still needs one `getAccount`, and on the single-threaded mobile + // WASM even that lone call queues behind an in-flight background sync + // (`syncState` can hold the SDK's internal call-queue for tens of seconds). + // So quiesce the always-on frontend WASM pollers (`useSyncTrigger`, the + // balance poll — which bypasses the wallet mutex — and the claimable-notes + // SWR) via `__TEST_SYNC_PAUSED__` for the read, restored in `finally`. Gated + // on MIDEN_E2E_TEST, tree-shaken from production. + setTestSyncPaused(true); try { const [{ AccountInspector }, { getMidenClient }] = await Promise.all([ import('@openzeppelin/miden-multisig-client'), import('lib/miden/sdk/miden-client') ]); - // `getAccount` is serialized internally by the SDK (`_serializeWasmCall`), - // so it's read-safe without the wallet mutex — and skipping the mutex (the - // same deliberate bypass the balance poll uses) keeps this read from - // waiting out a `useSyncTrigger` sync that's holding the lock for tens of - // seconds on the single-threaded mobile WASM. const account = await (await getMidenClient()).getAccount(accountPublicKey); if (!account) { return { error: `Guardian account ${accountPublicKey} not found in local client` }; @@ -758,6 +762,8 @@ if (process.env.MIDEN_E2E_TEST === 'true') { }; } catch (e) { return { error: e instanceof Error ? e.message : String(e) }; + } finally { + setTestSyncPaused(false); } }; } From 8c903ca8deb624dcaade1e158d93dc39ab7af0da Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sun, 28 Jun 2026 03:21:33 +0200 Subject: [PATCH 12/20] fix(e2e): serve guardian iOS auth structure from a balance-poll-captured stash (no WASM call in the test eval path) --- CHANGELOG.md | 2 +- src/lib/store/index.ts | 19 ++++++++++++ src/lib/store/utils/fetchBalances.ts | 45 ++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68812c928..85b992263 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) -* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a pure storage parse instead of loading the multisig client.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to build a `MultisigService` (`getOrCreateMultisigService` → `MultisigClient.load`) and read it. Against the post-consume state — where the guardian's stored blob lags the on-chain account — that load entered a re-sign/realign loop (~48 `signWithHotKey` calls for this read vs. 26 for a full consume) that hung the single-threaded mobile WASM past the eval budget; the assertion never got far enough to run on iOS. The structure (signer set + procedure thresholds) is immutable and lives in the account's storage maps, so the hook now reads it directly with `AccountInspector.fromAccount` — a pure parse with no signing, no guardian HTTP, and no client load (just one `getAccount`, the same read the balance poll already does). Because even that lone `getAccount` can queue behind an in-flight background `syncState` on the single-threaded mobile WASM, the read also briefly quiesces the always-on frontend WASM pollers (`useSyncTrigger`, the balance poll, the claimable-notes SWR) via a shared `__TEST_SYNC_PAUSED__` flag. Test-only, gated on `MIDEN_E2E_TEST` and tree-shaken from production. (#302) +* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a pure storage parse instead of loading the multisig client.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to build a `MultisigService` (`getOrCreateMultisigService` → `MultisigClient.load`) and read it. Against the post-consume state — where the guardian's stored blob lags the on-chain account — that load entered a re-sign/realign loop (~48 `signWithHotKey` calls for this read vs. 26 for a full consume) that hung the single-threaded mobile WASM past the eval budget; the assertion never got far enough to run on iOS. The structure (signer set + procedure thresholds) is immutable and lives in the account's storage maps, so the hook now reads it directly with `AccountInspector.fromAccount` — a pure parse with no signing, no guardian HTTP, and no client load (just one `getAccount`, the same read the balance poll already does). Because even that lone `getAccount` was still starved by other main-thread WASM activity on iOS (the auth eval was observed taking 60s with all the wallet's own pollers paused), the structure is now captured in the wallet's own balance poll (`fetchBalances`, which reliably completes) and stashed on a global; the test hook serves it as a plain value with no WASM call at all. Test-only, gated on `MIDEN_E2E_TEST` and tree-shaken from production. (#302) ## 1.15.2 (2026-06-22) diff --git a/src/lib/store/index.ts b/src/lib/store/index.ts index 856a1b156..29a229b85 100644 --- a/src/lib/store/index.ts +++ b/src/lib/store/index.ts @@ -726,6 +726,25 @@ if (process.env.MIDEN_E2E_TEST === 'true') { // thresholds) for E2E assertions — the harness's balance checks can't see the // 3-key shape. Dynamic imports avoid a static cycle. (globalThis as any).__TEST_GUARDIAN_AUTH__ = async (accountPublicKey: string) => { + // Fast path: the balance poll (`fetchBalances`, which reliably completes in + // the wallet's own flow) stashes this account's auth structure on + // `__TEST_GUARDIAN_AUTH_STRUCTURE__`. Serving it here is a plain object read + // with NO WASM call, so it can't be starved by other main-thread WASM + // activity on the single-threaded iOS WASM (the live read below otherwise + // times out: the auth eval was observed taking 60s with the WebView main + // thread saturated even after all the wallet's own pollers were paused). + const stashed = ( + globalThis as { + __TEST_GUARDIAN_AUTH_STRUCTURE__?: Record< + string, + { threshold: number; signerCommitments: string[]; procedureThresholds: Record } + >; + } + ).__TEST_GUARDIAN_AUTH_STRUCTURE__?.[accountPublicKey]; + if (stashed) { + return stashed; + } + // Read the structure with a PURE storage parse (`AccountInspector.fromAccount`), // not the transaction-oriented MultisigService. Going through // `getOrCreateMultisigService` → `MultisigClient.load` drove a re-sign/realign diff --git a/src/lib/store/utils/fetchBalances.ts b/src/lib/store/utils/fetchBalances.ts index 2432c7946..ff0d38ea2 100644 --- a/src/lib/store/utils/fetchBalances.ts +++ b/src/lib/store/utils/fetchBalances.ts @@ -18,6 +18,39 @@ export interface FetchBalancesOptions { tokenPrices?: TokenPrices; } +type SdkAccount = NonNullable>['getAccount']>>>; + +/** + * E2E-only: parse a Guardian account's on-chain auth structure (signer set + + * procedure thresholds) with `AccountInspector` — a pure storage read, no + * signing/load — and stash it on `globalThis.__TEST_GUARDIAN_AUTH_STRUCTURE__` + * keyed by address, so `__TEST_GUARDIAN_AUTH__` can serve it without any WASM + * call. No-op for non-multisig accounts. Tree-shaken from production. + */ +async function captureGuardianAuthStructureForTest(address: string, account: SdkAccount): Promise { + try { + const { AccountInspector } = await import('@openzeppelin/miden-multisig-client'); + const config = AccountInspector.fromAccount(account); + if (!config.signerCommitments || config.signerCommitments.length === 0) return; + const holder = globalThis as { + __TEST_GUARDIAN_AUTH_STRUCTURE__?: Record< + string, + { threshold: number; signerCommitments: string[]; procedureThresholds: Record } + >; + }; + holder.__TEST_GUARDIAN_AUTH_STRUCTURE__ = { + ...(holder.__TEST_GUARDIAN_AUTH_STRUCTURE__ ?? {}), + [address]: { + threshold: config.threshold, + signerCommitments: config.signerCommitments, + procedureThresholds: Object.fromEntries(config.procedureThresholds) + } + }; + } catch { + // best-effort — the test hook falls back to a live read if nothing is stashed + } +} + /** * Fetch all token balances for an account * @@ -54,6 +87,18 @@ export async function fetchBalances( // queued behind long-running writes like `syncState`. const midenClient = await getMidenClient(); const acc = await midenClient.getAccount(address); + + // E2E-only: capture a Guardian account's on-chain auth structure HERE, inside + // the wallet's own working balance poll (which reliably completes), so the + // `__TEST_GUARDIAN_AUTH__` test hook can read it as a plain value instead of + // doing its own blocking-eval WASM read — which on the single-threaded iOS + // WASM gets starved by other main-thread WASM activity and times out. The + // structure is immutable, so a slightly-old capture is correct. Best-effort, + // fire-and-forget; gated on MIDEN_E2E_TEST and tree-shaken from production. + if (process.env.MIDEN_E2E_TEST === 'true' && acc) { + void captureGuardianAuthStructureForTest(address, acc); + } + let account: typeof acc | null = null; let assets: FungibleAsset[] = []; if (acc) { From 697b497dbd8142ff91d88e4e7ba206bc3b1a1f9a Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sun, 28 Jun 2026 03:25:34 +0200 Subject: [PATCH 13/20] fix(e2e): cap _simPair setup at 8min + restart sim subsystem on overrun so a degraded CoreSimulator fails fast and the retry gets a fresh daemon --- playwright/e2e/ios/fixtures/two-simulators.ts | 57 ++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/playwright/e2e/ios/fixtures/two-simulators.ts b/playwright/e2e/ios/fixtures/two-simulators.ts index 506df456e..0523c4ed4 100644 --- a/playwright/e2e/ios/fixtures/two-simulators.ts +++ b/playwright/e2e/ios/fixtures/two-simulators.ts @@ -265,6 +265,40 @@ function sleep(ms: number): Promise { return new Promise(r => setTimeout(r, ms)); } +// A healthy two-simulator setup (terminate→uninstall→install→launch→CDP for +// both, sims already booted by globalSetup) runs in ~2-3 min. A degraded +// macos-26 CoreSimulator stretches it past 8 min; cap there so the whole 15-min +// test timeout isn't consumed in fixture setup with no room for a retry. +const SETUP_DEADLINE_MS = 480_000; +// Upper bound for the on-timeout daemon restart so the recovery itself can't run +// into the test timeout — setupBothWallets does its own recovery on the retry. +const SETUP_RECOVERY_BUDGET_MS = 90_000; + +/** + * Run the `_simPair` setup with a hard deadline. On overrun, run `onTimeout` + * (a best-effort, time-bounded sim-subsystem restart) so Playwright's retry + * lands on a fresh daemon, then throw a named error instead of letting setup + * silently eat the entire test timeout. + */ +async function withSetupDeadline(fn: () => Promise, deadlineMs: number, onTimeout: () => Promise): Promise { + let timer: ReturnType | undefined; + let timedOut = false; + const deadline = new Promise((_, reject) => { + timer = setTimeout(() => { + timedOut = true; + reject(new Error(`_simPair setup exceeded ${deadlineMs}ms (degraded CoreSimulator)`)); + }, deadlineMs); + }); + try { + return await Promise.race([fn(), deadline]); + } finally { + if (timer) clearTimeout(timer); + if (timedOut) { + await Promise.race([onTimeout(), sleep(SETUP_RECOVERY_BUDGET_MS)]).catch(() => undefined); + } + } +} + // ── Fixture ───────────────────────────────────────────────────────────────── let _devicePair: { udidA: string; udidB: string } | null = null; @@ -335,7 +369,28 @@ export const test = base.extend({ // macos-26 daemon-wedge that hangs simctl mid-suite) by restarting the sim // subsystem and retrying the pair. The shared `_simPair` fixture still // consolidates teardown. - const { instanceA, instanceB } = await setupBothWallets(simA, udidA, simB, udidB, envConfig, timeline); + // + // Cap the whole setup. On a degraded macos-26 CoreSimulator every simctl op + // crawls (install/terminate observed at 30-180s vs. <5s healthy); slow-but- + // completing ops never trip the per-op recovery, so the cumulative cost can + // silently eat the entire 15-min test timeout "while setting up _simPair" + // with no attribution and no room for Playwright's retry. A hard cap turns + // that into a fast, named failure — and on overrun we restart the sim + // subsystem first so the retry runs against a fresh daemon. + const { instanceA, instanceB } = await withSetupDeadline( + () => setupBothWallets(simA, udidA, simB, udidB, envConfig, timeline), + SETUP_DEADLINE_MS, + async () => { + timeline.emit({ + category: 'test_lifecycle', + severity: 'warn', + message: + `[sim-setup] _simPair setup exceeded ${SETUP_DEADLINE_MS}ms (degraded CoreSimulator); ` + + `restarting the sim subsystem so the retry gets a fresh daemon`, + }); + await SimulatorControl.recoverSimSubsystem([udidA, udidB]).catch(() => undefined); + } + ); steps.registerSnapshotCaps('A', buildIosSnapshotCaps(instanceA.walletPage, '')); steps.registerSnapshotCaps('B', buildIosSnapshotCaps(instanceB.walletPage, '')); From 6f3eb4bd6f42d09bc10752ed133fd70531a8e89a Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sun, 28 Jun 2026 04:41:40 +0200 Subject: [PATCH 14/20] fix(e2e): harden macos-26 sim recovery (shutdown all wedged devices) + --retries=2 for non-guardian mobile --- .github/workflows/e2e-blockchain.yml | 24 +++++++++++-------- CHANGELOG.md | 1 + .../e2e/ios/helpers/simulator-control.ts | 16 ++++++++++++- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/.github/workflows/e2e-blockchain.yml b/.github/workflows/e2e-blockchain.yml index 927c286c3..7ee98da9e 100644 --- a/.github/workflows/e2e-blockchain.yml +++ b/.github/workflows/e2e-blockchain.yml @@ -534,11 +534,13 @@ jobs: run: yarn test:e2e:mobile:build - name: Run blockchain E2E (mobile, devnet) - # --retries=1 absorbs flaky CDP "no pages found" errors on macos-26 - # runners. Each fixture retries install → launch → CDP connect from - # scratch, so a transient webinspectord hiccup doesn't fail the run. - # Config stays at retries: 0 for fast local dev feedback. - run: yarn playwright test --config playwright.ios.config.ts --retries=1 + # --retries=2 absorbs flaky CDP "no pages found" errors and degraded- + # CoreSimulator sim-setup failures on macos-26 runners. Each fixture + # retries install → launch → CDP connect from scratch (and restarts the + # sim subsystem on a wedged daemon), so a transient runner hiccup gets + # multiple fresh attempts within the job budget. Config stays at + # retries: 0 for fast local dev feedback. + run: yarn playwright test --config playwright.ios.config.ts --retries=2 - name: Upload artifacts if: always() @@ -701,11 +703,13 @@ jobs: run: yarn test:e2e:mobile:build - name: Run blockchain E2E (mobile, testnet) - # --retries=1 absorbs flaky CDP "no pages found" errors on macos-26 - # runners. Each fixture retries install → launch → CDP connect from - # scratch, so a transient webinspectord hiccup doesn't fail the run. - # Config stays at retries: 0 for fast local dev feedback. - run: yarn playwright test --config playwright.ios.config.ts --retries=1 + # --retries=2 absorbs flaky CDP "no pages found" errors and degraded- + # CoreSimulator sim-setup failures on macos-26 runners. Each fixture + # retries install → launch → CDP connect from scratch (and restarts the + # sim subsystem on a wedged daemon), so a transient runner hiccup gets + # multiple fresh attempts within the job budget. Config stays at + # retries: 0 for fast local dev feedback. + run: yarn playwright test --config playwright.ios.config.ts --retries=2 - name: Upload artifacts if: always() diff --git a/CHANGELOG.md b/CHANGELOG.md index 85b992263..fe4c9681c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) +* [CHANGE][ci] **iOS E2E is more resilient to degraded macos-26 CoreSimulator runners.** The `_simPair` fixture setup is now capped at 8 minutes (a healthy two-sim setup is ~2-3 min) so a wedged CoreSimulator — which makes every `simctl` op crawl (install/terminate observed at 30-180s vs. <5s) — fails fast with a named error and a sim-subsystem restart instead of silently eating the whole 15-min test timeout; the subsystem recovery also `simctl shutdown all`s to clear half-booted device state (the `SimError 405` signature); and the non-guardian mobile suite runs with `--retries=2` (matching the guardian suite) so a transient runner hiccup gets multiple fresh attempts. (#302) * [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a pure storage parse instead of loading the multisig client.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to build a `MultisigService` (`getOrCreateMultisigService` → `MultisigClient.load`) and read it. Against the post-consume state — where the guardian's stored blob lags the on-chain account — that load entered a re-sign/realign loop (~48 `signWithHotKey` calls for this read vs. 26 for a full consume) that hung the single-threaded mobile WASM past the eval budget; the assertion never got far enough to run on iOS. The structure (signer set + procedure thresholds) is immutable and lives in the account's storage maps, so the hook now reads it directly with `AccountInspector.fromAccount` — a pure parse with no signing, no guardian HTTP, and no client load (just one `getAccount`, the same read the balance poll already does). Because even that lone `getAccount` was still starved by other main-thread WASM activity on iOS (the auth eval was observed taking 60s with all the wallet's own pollers paused), the structure is now captured in the wallet's own balance poll (`fetchBalances`, which reliably completes) and stashed on a global; the test hook serves it as a plain value with no WASM call at all. Test-only, gated on `MIDEN_E2E_TEST` and tree-shaken from production. (#302) ## 1.15.2 (2026-06-22) diff --git a/playwright/e2e/ios/helpers/simulator-control.ts b/playwright/e2e/ios/helpers/simulator-control.ts index 18f54d861..9fa4121f9 100644 --- a/playwright/e2e/ios/helpers/simulator-control.ts +++ b/playwright/e2e/ios/helpers/simulator-control.ts @@ -236,7 +236,21 @@ export class SimulatorControl { // Non-zero if the process was already gone (or sudo unavailable off CI) — // the daemon respawns on the next simctl call regardless. } - await sleep(5_000); + // Give launchd time to respawn a clean daemon before we drive it again. + await sleep(8_000); + + // Clear wedged device state on the freshly-respawned daemon. A degraded + // macos-26 CoreSimulator leaves devices in a half-booted state that makes + // every subsequent `simctl` op crawl or fail (SimError 405 on terminate); + // restarting the daemon alone doesn't reset the devices. `shutdown all` + // forces them back to a clean Shutdown state so the boot below starts fresh. + // Best-effort and bounded — a still-wedged daemon will time out here, and + // the boot loop will surface the real failure. + try { + await execFileAsync('xcrun', ['simctl', 'shutdown', 'all'], { timeout: 60_000 }); + } catch { + // Best-effort — ensureBooted below recovers individual devices anyway. + } // webinspectord_sim only exposes WebViews while Simulator.app is running; // killing the daemon tears it down, so bring it back before re-booting. try { From 0d00355f55191f2192aeab29ec7d0acc8b5c2093 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sun, 28 Jun 2026 05:37:02 +0200 Subject: [PATCH 15/20] fix(e2e): await the guardian auth-structure capture + log it, so it stashes before the auth step reads it (was racing fire-and-forget on iOS) --- src/lib/store/utils/fetchBalances.ts | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/lib/store/utils/fetchBalances.ts b/src/lib/store/utils/fetchBalances.ts index ff0d38ea2..d6c8c1e47 100644 --- a/src/lib/store/utils/fetchBalances.ts +++ b/src/lib/store/utils/fetchBalances.ts @@ -31,7 +31,11 @@ async function captureGuardianAuthStructureForTest(address: string, account: Sdk try { const { AccountInspector } = await import('@openzeppelin/miden-multisig-client'); const config = AccountInspector.fromAccount(account); - if (!config.signerCommitments || config.signerCommitments.length === 0) return; + if (!config.signerCommitments || config.signerCommitments.length === 0) { + // eslint-disable-next-line no-console + console.log('[E2E] captureGuardianAuthStructure: not a multisig account (0 signers), skipping', address); + return; + } const holder = globalThis as { __TEST_GUARDIAN_AUTH_STRUCTURE__?: Record< string, @@ -46,8 +50,11 @@ async function captureGuardianAuthStructureForTest(address: string, account: Sdk procedureThresholds: Object.fromEntries(config.procedureThresholds) } }; - } catch { - // best-effort — the test hook falls back to a live read if nothing is stashed + // eslint-disable-next-line no-console + console.log('[E2E] captureGuardianAuthStructure: stashed', address, 'signers=', config.signerCommitments.length); + } catch (e) { + // eslint-disable-next-line no-console + console.log('[E2E] captureGuardianAuthStructure failed:', e instanceof Error ? e.message : String(e)); } } @@ -96,7 +103,12 @@ export async function fetchBalances( // structure is immutable, so a slightly-old capture is correct. Best-effort, // fire-and-forget; gated on MIDEN_E2E_TEST and tree-shaken from production. if (process.env.MIDEN_E2E_TEST === 'true' && acc) { - void captureGuardianAuthStructureForTest(address, acc); + // Awaited (not fire-and-forget): tie the capture to this balance fetch so it + // is stashed before `verify_balance` passes and the auth step reads it — a + // fire-and-forget capture loses the race against the test on the contended + // iOS main thread. The `@openzeppelin/...` import is already warm (the + // guardian flow loaded it), so this adds negligible latency. + await captureGuardianAuthStructureForTest(address, acc); } let account: typeof acc | null = null; From 695dfef063f044189a499762a47932bc762e49d1 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sun, 28 Jun 2026 06:38:23 +0200 Subject: [PATCH 16/20] fix(e2e): guardian auth hook falls back to the single stashed structure when the exact address key differs (stash was populated but keyed differently) --- src/lib/store/index.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/lib/store/index.ts b/src/lib/store/index.ts index 29a229b85..db61ef360 100644 --- a/src/lib/store/index.ts +++ b/src/lib/store/index.ts @@ -733,14 +733,20 @@ if (process.env.MIDEN_E2E_TEST === 'true') { // activity on the single-threaded iOS WASM (the live read below otherwise // times out: the auth eval was observed taking 60s with the WebView main // thread saturated even after all the wallet's own pollers were paused). - const stashed = ( + const stashStore = ( globalThis as { __TEST_GUARDIAN_AUTH_STRUCTURE__?: Record< string, { threshold: number; signerCommitments: string[]; procedureThresholds: Record } >; } - ).__TEST_GUARDIAN_AUTH_STRUCTURE__?.[accountPublicKey]; + ).__TEST_GUARDIAN_AUTH_STRUCTURE__; + // Prefer the exact-key match; fall back to the single stashed entry. The + // balance poll keys the stash by the address it's called with, which can be + // a different encoding of the same account than the publicKey the test + // passes here — and a wallet instance only ever has one Guardian account, so + // any stashed multisig structure on this page belongs to it. + const stashed = stashStore?.[accountPublicKey] ?? (stashStore ? Object.values(stashStore)[0] : undefined); if (stashed) { return stashed; } From ae9767b6757cff2788624a1d2e1a83bafef0189b Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sun, 28 Jun 2026 07:46:51 +0200 Subject: [PATCH 17/20] =?UTF-8?q?fix(e2e):=20guardian=20iOS=20auth=20read?= =?UTF-8?q?=20over=20sync=20atom=20=E2=80=94=20async=20execute=5Fasync=5Fs?= =?UTF-8?q?cript=20callback=20arrives=20as=20boolean=20true,=20hangs=20eve?= =?UTF-8?q?ry=20evalAsync?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 2 +- playwright/e2e/ios/helpers/cdp-bridge.ts | 19 +++- playwright/e2e/ios/helpers/ios-wallet-page.ts | 89 +++++++++++-------- 3 files changed, 70 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe4c9681c..a8bee5700 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) * [CHANGE][ci] **iOS E2E is more resilient to degraded macos-26 CoreSimulator runners.** The `_simPair` fixture setup is now capped at 8 minutes (a healthy two-sim setup is ~2-3 min) so a wedged CoreSimulator — which makes every `simctl` op crawl (install/terminate observed at 30-180s vs. <5s) — fails fast with a named error and a sim-subsystem restart instead of silently eating the whole 15-min test timeout; the subsystem recovery also `simctl shutdown all`s to clear half-booted device state (the `SimError 405` signature); and the non-guardian mobile suite runs with `--retries=2` (matching the guardian suite) so a transient runner hiccup gets multiple fresh attempts. (#302) -* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a pure storage parse instead of loading the multisig client.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to build a `MultisigService` (`getOrCreateMultisigService` → `MultisigClient.load`) and read it. Against the post-consume state — where the guardian's stored blob lags the on-chain account — that load entered a re-sign/realign loop (~48 `signWithHotKey` calls for this read vs. 26 for a full consume) that hung the single-threaded mobile WASM past the eval budget; the assertion never got far enough to run on iOS. The structure (signer set + procedure thresholds) is immutable and lives in the account's storage maps, so the hook now reads it directly with `AccountInspector.fromAccount` — a pure parse with no signing, no guardian HTTP, and no client load (just one `getAccount`, the same read the balance poll already does). Because even that lone `getAccount` was still starved by other main-thread WASM activity on iOS (the auth eval was observed taking 60s with all the wallet's own pollers paused), the structure is now captured in the wallet's own balance poll (`fetchBalances`, which reliably completes) and stashed on a global; the test hook serves it as a plain value with no WASM call at all. Test-only, gated on `MIDEN_E2E_TEST` and tree-shaken from production. (#302) +* [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a pure storage parse instead of loading the multisig client.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to build a `MultisigService` (`getOrCreateMultisigService` → `MultisigClient.load`) and read it. Against the post-consume state — where the guardian's stored blob lags the on-chain account — that load entered a re-sign/realign loop (~48 `signWithHotKey` calls for this read vs. 26 for a full consume) that hung the single-threaded mobile WASM past the eval budget; the assertion never got far enough to run on iOS. The structure (signer set + procedure thresholds) is immutable and lives in the account's storage maps, so the hook now reads it directly with `AccountInspector.fromAccount` — a pure parse with no signing, no guardian HTTP, and no client load (just one `getAccount`, the same read the balance poll already does). Because even that lone `getAccount` was still starved by other main-thread WASM activity on iOS (the auth eval was observed taking 60s with all the wallet's own pollers paused), the structure is now captured in the wallet's own balance poll (`fetchBalances`, which reliably completes) and stashed on a global; the test reads it as a plain value with no WASM call at all. Finally, the iOS harness reads that stash over the SYNCHRONOUS `execute_script` atom (polled), not the async `execute_async_script` one: appium-remote-debugger's async atom delivers its completion callback in the `arguments[arguments.length-1]` slot as the boolean `true` on this iOS RWI bridge, so `cb(result)` threw `TypeError: cb is not a function`, the promise rejected unhandled, and every `evalAsync` hung to its timeout no matter how fast the script ran — which is why the auth read still timed out at 60s even with the stash already populated. Test-only, gated on `MIDEN_E2E_TEST` and tree-shaken from production. (#302) ## 1.15.2 (2026-06-22) diff --git a/playwright/e2e/ios/helpers/cdp-bridge.ts b/playwright/e2e/ios/helpers/cdp-bridge.ts index 7dc3af196..7d5b71de0 100644 --- a/playwright/e2e/ios/helpers/cdp-bridge.ts +++ b/playwright/e2e/ios/helpers/cdp-bridge.ts @@ -119,11 +119,22 @@ export class CdpSession { /** * Evaluate asynchronous JavaScript. The body MUST call the callback * `arguments[arguments.length - 1]` with its result — this is the - * `execute_async_script` WebDriver atom contract. Useful when the page - * code awaits Promises (store.fetchBalances, intercom.request, etc.). + * `execute_async_script` WebDriver atom contract. * - * The optional outer timeout protects against scripts that never invoke - * the callback — without it, executeAtomAsync waits forever. Default 30s. + * ⚠️ BROKEN on this iOS RWI bridge — prefer the synchronous `eval` and poll. + * appium-remote-debugger's `execute_async_script` atom delivers its + * completion callback in the `arguments[arguments.length - 1]` slot as the + * boolean `true` (not a function) here, so `cb(result)` throws + * `TypeError: cb is not a function`, the promise rejects unhandled, the + * callback never fires, and the call ALWAYS hangs to the timeout below — + * regardless of how fast the script itself completes. (See + * `getGuardianAuthInfo`, which used to use this and now reads its data over + * the reliable sync `eval` atom instead.) If you need to await page Promises, + * stash the resolved value on a global from the page's own code and poll it + * with `eval`, rather than relying on this callback. + * + * The outer timeout protects against scripts that never invoke the callback — + * without it, executeAtomAsync waits forever. Default 30s. */ async evalAsync(body: string, opts: { timeoutMs?: number } = {}): Promise { const timeoutMs = opts.timeoutMs ?? 30_000; diff --git a/playwright/e2e/ios/helpers/ios-wallet-page.ts b/playwright/e2e/ios/helpers/ios-wallet-page.ts index 80f1c353a..72af9649f 100644 --- a/playwright/e2e/ios/helpers/ios-wallet-page.ts +++ b/playwright/e2e/ios/helpers/ios-wallet-page.ts @@ -583,43 +583,62 @@ export class IosWalletPage implements WalletPage { /** * Read a Guardian account's on-chain auth structure (overall threshold, - * signer commitments, per-procedure thresholds). Calls the same - * __TEST_GUARDIAN_AUTH__ hook the Chrome POM uses, but over the async CDP - * atom: the hook awaits getOrCreateMultisigService + a best-effort - * (time-bounded) sync, so it returns a Promise and must run under - * execute_async_script. The hook itself caps its internal sync at 8s, so the - * 30s evalAsync budget is comfortable even when the background sync holds the - * WASM lock. + * signer commitments, per-procedure thresholds). + * + * iOS reads this from the `__TEST_GUARDIAN_AUTH_STRUCTURE__` stash that the + * wallet's own balance poll populates (`fetchBalances` → + * `captureGuardianAuthStructureForTest`, a pure `AccountInspector.fromAccount` + * parse) — NOT through the async `__TEST_GUARDIAN_AUTH__` hook. Two reasons, + * both proven against the CI timeline: + * + * 1. The stash is a plain JSON-serializable object, so it reads over the + * reliable SYNCHRONOUS `execute_script` atom. The async + * `execute_async_script` atom (appium-remote-debugger) hands the user + * script its completion callback as `arguments[arguments.length - 1]`, + * but on this iOS RWI bridge that slot arrives as the boolean `true`, so + * `cb(result)` throws `TypeError: cb is not a function`, the promise + * rejects unhandled, the callback never fires, and EVERY `evalAsync` + * hangs to its timeout. (Observed: `Unhandled Promise Rejection: + * TypeError: d is not a function ... 'd' is true` fired the instant the + * auth read ran, then a 60s timeout — even though the stash was already + * populated.) The sync atom returns its value directly, no callback. + * 2. A direct WASM read in the eval path gets starved on the single-threaded + * iOS WASM. The stash read touches no WASM at all. + * + * The auth structure is immutable (fixed at account creation), so a + * slightly-old captured copy is exactly correct for these assertions. The + * stash is keyed by the address the balance poll fetched, which can be a + * different encoding than the publicKey the test passes — but a wallet + * instance only ever has one Guardian account, so the single stashed + * structure is unambiguous. Polls because the capture runs on the balance-poll + * cadence; by the auth step the consume has already driven several polls, so + * the first read almost always hits. */ async getGuardianAuthInfo(accountPublicKey: string): Promise { - return this.cdp.evalAsync( - `var cb = arguments[arguments.length - 1]; - var fn = globalThis.__TEST_GUARDIAN_AUTH__; - if (typeof fn !== 'function') { - cb({ - threshold: NaN, - signerCommitments: [], - procedureThresholds: {}, - error: '__TEST_GUARDIAN_AUTH__ unavailable (needs MIDEN_E2E_TEST build)' - }); - return; - } - Promise.resolve(fn(${JSON.stringify(accountPublicKey)})) - .then(function (r) { cb(r); }) - .catch(function (e) { - cb({ - threshold: NaN, - signerCommitments: [], - procedureThresholds: {}, - error: String(e && e.message ? e.message : e) - }); - });`, - // The read is a pure storage parse (no signing/load), but its one - // `getAccount` can still queue behind in-flight WASM work on the loaded - // single-threaded iOS runner — 60s clears that without the 30s default - // tripping. - { timeoutMs: 60_000 } - ); + const deadline = Date.now() + 30_000; + let lastErr = 'guardian auth structure not captured (stash empty after 30s)'; + while (Date.now() < deadline) { + try { + const result = await this.cdp.eval( + `var s = globalThis.__TEST_GUARDIAN_AUTH_STRUCTURE__; + if (!s) return null; + var keys = Object.keys(s); + if (keys.length === 0) return null; + var v = s[${JSON.stringify(accountPublicKey)}] || s[keys[0]]; + if (!v) return null; + return { + threshold: v.threshold, + signerCommitments: v.signerCommitments, + procedureThresholds: v.procedureThresholds + };` + ); + if (result) return result; + } catch (e) { + lastErr = e instanceof Error ? e.message : String(e); + } + await sleep(1_500); + } + return { threshold: NaN, signerCommitments: [], procedureThresholds: {}, error: lastErr }; } /** From 028a3fa85d0c6c895d546b3ee1dd51047b376dc5 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sun, 28 Jun 2026 08:52:41 +0200 Subject: [PATCH 18/20] fix(e2e): let degraded-but-completing iOS sim setup finish (cap 8->13min, test timeout 15->25min) instead of killing runs that would pass --- CHANGELOG.md | 2 +- playwright.ios.config.ts | 8 +++++++- playwright/e2e/ios/fixtures/two-simulators.ts | 16 +++++++++++----- playwright/e2e/ios/helpers/simulator-control.ts | 4 ++-- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8bee5700..09159e60c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) -* [CHANGE][ci] **iOS E2E is more resilient to degraded macos-26 CoreSimulator runners.** The `_simPair` fixture setup is now capped at 8 minutes (a healthy two-sim setup is ~2-3 min) so a wedged CoreSimulator — which makes every `simctl` op crawl (install/terminate observed at 30-180s vs. <5s) — fails fast with a named error and a sim-subsystem restart instead of silently eating the whole 15-min test timeout; the subsystem recovery also `simctl shutdown all`s to clear half-booted device state (the `SimError 405` signature); and the non-guardian mobile suite runs with `--retries=2` (matching the guardian suite) so a transient runner hiccup gets multiple fresh attempts. (#302) +* [CHANGE][ci] **iOS E2E is more resilient to degraded macos-26 CoreSimulator runners.** On a degraded runner every `simctl` op crawls (97 CI samples: per-wallet `_simPair` setup p50 65s, p90 267s, max 401s vs. <5s healthy — so two sequential wallets can take up to ~13 min and STILL complete). The `_simPair` fixture setup is capped (at 13 min, past the slowest observed completing setup) so a genuinely-hung CoreSimulator fails fast with a named error and a sim-subsystem restart instead of silently eating the whole per-test timeout, while a degraded-but-completing setup is allowed to finish rather than being killed mid-flight (no assertion is relaxed — purely tolerance for degraded-runner IO); the subsystem recovery `simctl shutdown all`s to clear half-booted device state (the `SimError 405` signature); the per-test timeout is 25 min (from 15) to leave room for a slow setup plus a slow test body; and the non-guardian mobile suite runs with `--retries=2` (matching the guardian suite) so a transient runner hiccup gets multiple fresh attempts. (#302) * [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a pure storage parse instead of loading the multisig client.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to build a `MultisigService` (`getOrCreateMultisigService` → `MultisigClient.load`) and read it. Against the post-consume state — where the guardian's stored blob lags the on-chain account — that load entered a re-sign/realign loop (~48 `signWithHotKey` calls for this read vs. 26 for a full consume) that hung the single-threaded mobile WASM past the eval budget; the assertion never got far enough to run on iOS. The structure (signer set + procedure thresholds) is immutable and lives in the account's storage maps, so the hook now reads it directly with `AccountInspector.fromAccount` — a pure parse with no signing, no guardian HTTP, and no client load (just one `getAccount`, the same read the balance poll already does). Because even that lone `getAccount` was still starved by other main-thread WASM activity on iOS (the auth eval was observed taking 60s with all the wallet's own pollers paused), the structure is now captured in the wallet's own balance poll (`fetchBalances`, which reliably completes) and stashed on a global; the test reads it as a plain value with no WASM call at all. Finally, the iOS harness reads that stash over the SYNCHRONOUS `execute_script` atom (polled), not the async `execute_async_script` one: appium-remote-debugger's async atom delivers its completion callback in the `arguments[arguments.length-1]` slot as the boolean `true` on this iOS RWI bridge, so `cb(result)` threw `TypeError: cb is not a function`, the promise rejected unhandled, and every `evalAsync` hung to its timeout no matter how fast the script ran — which is why the auth read still timed out at 60s even with the stash already populated. Test-only, gated on `MIDEN_E2E_TEST` and tree-shaken from production. (#302) ## 1.15.2 (2026-06-22) diff --git a/playwright.ios.config.ts b/playwright.ios.config.ts index 1adb58f71..c3428e380 100644 --- a/playwright.ios.config.ts +++ b/playwright.ios.config.ts @@ -12,7 +12,13 @@ export default defineConfig({ // Guardian specs run via playwright.ios.guardian.config.ts (dedicated run); // keep them out of the standard iOS suite. testIgnore: '**/guardian-*.ios.spec.ts', - timeout: 900_000, // 15 min per test — WASM prove on simulator is slow (~60-90s per consume) + // 25 min per test. WASM prove on the simulator is slow (~60-90s per consume), + // and on degraded macos-26 runners BOTH the two-sim `_simPair` setup (capped + // at 13 min, see SETUP_DEADLINE_MS) and the test body's simctl/WASM ops crawl. + // 25 min leaves room for a slow-but-completing setup + a slow test instead of + // killing a run that would have passed given a little more patience (no + // assertion is relaxed — this is purely tolerance for degraded-runner IO). + timeout: 1_500_000, expect: { timeout: 60_000, }, diff --git a/playwright/e2e/ios/fixtures/two-simulators.ts b/playwright/e2e/ios/fixtures/two-simulators.ts index 0523c4ed4..39c58896f 100644 --- a/playwright/e2e/ios/fixtures/two-simulators.ts +++ b/playwright/e2e/ios/fixtures/two-simulators.ts @@ -189,7 +189,7 @@ async function setupBothWallets( // 3 attempts = up to 2 daemon-restart recoveries. The macos-26 wedge has been // observed to survive a single recovery, so give it one more shot before // failing the test (each wedged attempt fails fast at its simctl/CDP timeout, - // not the 15-min test timeout, so the extra attempt is cheap). + // not the full per-test timeout, so the extra attempt is cheap). const MAX_ATTEMPTS = 3; for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) { let instanceA: SimWalletInstance | undefined; @@ -267,9 +267,15 @@ function sleep(ms: number): Promise { // A healthy two-simulator setup (terminate→uninstall→install→launch→CDP for // both, sims already booted by globalSetup) runs in ~2-3 min. A degraded -// macos-26 CoreSimulator stretches it past 8 min; cap there so the whole 15-min -// test timeout isn't consumed in fixture setup with no room for a retry. -const SETUP_DEADLINE_MS = 480_000; +// macos-26 CoreSimulator stretches every simctl op (97 real CI samples: per- +// wallet setup p50 65s, p90 267s, max 401s → two sequential wallets up to +// ~13 min) yet still COMPLETES. The earlier 8-min cap killed those slow-but- +// completing setups that would have finished and passed; only a TRULY hung +// runner (observed: setup not done after 15 min) genuinely can't recover. So +// cap at 13 min — past the slowest observed completing setup — so degraded-but- +// completing runners get to finish, and only the hung ones fail fast (clearly +// attributed, leaving room within the 25-min test timeout for the retry). +const SETUP_DEADLINE_MS = 780_000; // Upper bound for the on-timeout daemon restart so the recovery itself can't run // into the test timeout — setupBothWallets does its own recovery on the retry. const SETUP_RECOVERY_BUDGET_MS = 90_000; @@ -373,7 +379,7 @@ export const test = base.extend({ // Cap the whole setup. On a degraded macos-26 CoreSimulator every simctl op // crawls (install/terminate observed at 30-180s vs. <5s healthy); slow-but- // completing ops never trip the per-op recovery, so the cumulative cost can - // silently eat the entire 15-min test timeout "while setting up _simPair" + // silently eat the entire per-test timeout "while setting up _simPair" // with no attribution and no room for Playwright's retry. A hard cap turns // that into a fast, named failure — and on overrun we restart the sim // subsystem first so the retry runs against a fresh daemon. diff --git a/playwright/e2e/ios/helpers/simulator-control.ts b/playwright/e2e/ios/helpers/simulator-control.ts index 9fa4121f9..1823ecc79 100644 --- a/playwright/e2e/ios/helpers/simulator-control.ts +++ b/playwright/e2e/ios/helpers/simulator-control.ts @@ -102,7 +102,7 @@ export class SimulatorControl { // // bootstatus failing is a HARD error: continuing onto a half-booted // simulator does not produce "a clearer error later" — it produces - // simctl install/launch calls that hang for the entire 15-minute test + // simctl install/launch calls that hang for the entire per-test // timeout, twice (observed on macos-26 runners). One shutdown→boot // cycle is allowed to recover a wedged first boot; after that, fail // loudly so the job dies in minutes with the real cause named. @@ -270,7 +270,7 @@ export class SimulatorControl { // Every simctl call gets a hard timeout: on macos-26 CI runners a single // `simctl install` / `launch` against an unhealthy simulator hangs -// indefinitely, silently eating the whole 15-minute test timeout with no +// indefinitely, silently eating the whole per-test timeout with no // attribution. Failing in 3 minutes with the command named turns that into // a diagnosable error (and lets the per-test recovery + CI retry actually // kick in — see SimulatorControl.recoverSimSubsystem). From d5c0c45391f4fbb2375051a7f9cf84c4473bd917 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sun, 28 Jun 2026 10:45:47 +0200 Subject: [PATCH 19/20] ci(e2e): run iOS mobile E2E jobs on dedicated macos-26-xlarge runners (shared macos-26 pool degraded for hours, _simPair setup couldn't finish) --- .github/workflows/e2e-blockchain.yml | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/.github/workflows/e2e-blockchain.yml b/.github/workflows/e2e-blockchain.yml index 7ee98da9e..f0a5854d8 100644 --- a/.github/workflows/e2e-blockchain.yml +++ b/.github/workflows/e2e-blockchain.yml @@ -378,8 +378,12 @@ jobs: name: Mobile E2E (devnet) if: github.event_name != 'workflow_dispatch' || inputs.network == 'both' || inputs.network == 'devnet' # macos-26 matches build-mobile.yml — required for iOS 26 SDK symbols - # used by dapp-browser's WKWebViewController. - runs-on: macos-26 + # used by dapp-browser's WKWebViewController. The -xlarge size is the + # DEDICATED Apple-Silicon larger runner (2x vCPU/RAM, no noisy-neighbour + # IO contention) — the shared standard macos-26 runners were intermittently + # degraded for hours at a time, with CoreSimulator install/launch crawling + # so badly that two-sim `_simPair` setup couldn't finish even in 13-15 min. + runs-on: macos-26-xlarge # Observed worst case: ~40 min setup (CLI compile on cache miss + iOS # build + simulator cold boot overlap) + ~55 min suite with one retry. # 90 minutes guillotined an otherwise-passing run at test 7 of 7. @@ -554,7 +558,7 @@ jobs: mobile-testnet: name: Mobile E2E (testnet) if: github.event_name != 'workflow_dispatch' || inputs.network == 'both' || inputs.network == 'testnet' - runs-on: macos-26 + runs-on: macos-26-xlarge # Observed worst case: ~40 min setup (CLI compile on cache miss + iOS # build + simulator cold boot overlap) + ~55 min suite with one retry. # 90 minutes guillotined an otherwise-passing run at test 7 of 7. @@ -751,8 +755,12 @@ jobs: name: Mobile Guardian E2E (devnet) if: github.event_name != 'workflow_dispatch' || inputs.network == 'both' || inputs.network == 'devnet' # macos-26 matches build-mobile.yml — required for iOS 26 SDK symbols - # used by dapp-browser's WKWebViewController. - runs-on: macos-26 + # used by dapp-browser's WKWebViewController. The -xlarge size is the + # DEDICATED Apple-Silicon larger runner (2x vCPU/RAM, no noisy-neighbour + # IO contention) — the shared standard macos-26 runners were intermittently + # degraded for hours at a time, with CoreSimulator install/launch crawling + # so badly that two-sim `_simPair` setup couldn't finish even in 13-15 min. + runs-on: macos-26-xlarge # ~40 min setup (CLI compile on cache miss + iOS build + sim cold boot) # plus a single guardian spec with up to 2 retries. Generous ceiling that # matches the standard mobile jobs so a passing build is never guillotined. @@ -925,7 +933,7 @@ jobs: mobile-guardian-testnet: name: Mobile Guardian E2E (testnet) if: github.event_name != 'workflow_dispatch' || inputs.network == 'both' || inputs.network == 'testnet' - runs-on: macos-26 + runs-on: macos-26-xlarge # ~40 min setup (CLI compile on cache miss + iOS build + sim cold boot) # plus a single guardian spec with up to 2 retries. Generous ceiling that # matches the standard mobile jobs so a passing build is never guillotined. From 22d8d9d35f51b3a130f1356f690e2b4617ff0b5e Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sun, 28 Jun 2026 11:14:29 +0200 Subject: [PATCH 20/20] docs(changelog): record macos-26-xlarge runner move for iOS mobile E2E --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09159e60c..fec5a862a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ * [FIX][all] **Guardian accounts can now connect to dApps (faucet, etc.) instead of failing with "Connection Failed" / `NOT_GRANTED`.** A Guardian account's auth component is built by `@openzeppelin/miden-multisig-client` and its procedures live in the `openzeppelin::auth::*` MASM namespace, so they don't MAST-match any bundled `miden-standards` template. The SDK's `AccountInterface` therefore classifies the component as `Custom` and `Account.getPublicKeyCommitments()` returns `[]`; the wallet's connect flow read that as "no public key" and rejected the connection (surfaced to the dApp as `NOT_GRANTED`). Public-key resolution now falls back, for accounts the SDK can't classify, to reading the hot signer's commitment directly from the account's `openzeppelin::multisig::signer_public_keys` storage map — the key the wallet actually signs with — so Guardian accounts resolve a usable session key. Plain single-key accounts are unaffected (their `AuthSingleSig` component is recognized as before). The same resolution covers the reveal-private-key and advanced-settings public-key views, which broke identically for Guardian accounts. (#300) * [CHANGE][ci] **iOS E2E no longer hangs the full timeout when the simulator's CDP bridge wedges.** `CdpBridge.eval`/`evaluate` now race the WebKit `executeAtom` call against a 30s hard timeout (matching `evalAsync`), so a wedged RWI socket or a momentarily-blocked WebView main thread surfaces as a fast throw instead of an indefinite await. Previously `pollForCondition` could only check its deadline *between* iterations, so a single hung `eval` stalled the whole test until Playwright's 15-minute kill (and the rest of the serial suite then skipped); now the poll enforces its own budget and `--retries` restarts on a fresh app + CDP. (#302) * [CHANGE][ci] **Blockchain E2E retries the `miden-client` harness CLI on transient remote-prover connection failures.** The CLI deploy/mint/sync retry loop classified only node-RPC and nonce-lag errors as transient; an intermittent TLS/gRPC handshake failure to the delegated prover endpoint on the macOS runners (`failed to connect to the remote prover` / `transport error` / `no native certs found`) was treated as fatal, so a mint failed outright even though a sibling mint in the same test connected fine. These connection-level prover errors are now recognized as transient and retried with backoff, and the three duplicated classifiers were unified into one `isTransientCliError` helper. (#302) -* [CHANGE][ci] **iOS E2E is more resilient to degraded macos-26 CoreSimulator runners.** On a degraded runner every `simctl` op crawls (97 CI samples: per-wallet `_simPair` setup p50 65s, p90 267s, max 401s vs. <5s healthy — so two sequential wallets can take up to ~13 min and STILL complete). The `_simPair` fixture setup is capped (at 13 min, past the slowest observed completing setup) so a genuinely-hung CoreSimulator fails fast with a named error and a sim-subsystem restart instead of silently eating the whole per-test timeout, while a degraded-but-completing setup is allowed to finish rather than being killed mid-flight (no assertion is relaxed — purely tolerance for degraded-runner IO); the subsystem recovery `simctl shutdown all`s to clear half-booted device state (the `SimError 405` signature); the per-test timeout is 25 min (from 15) to leave room for a slow setup plus a slow test body; and the non-guardian mobile suite runs with `--retries=2` (matching the guardian suite) so a transient runner hiccup gets multiple fresh attempts. (#302) +* [CHANGE][ci] **iOS E2E mobile jobs run on dedicated `macos-26-xlarge` runners, and are more resilient to degraded shared runners.** The shared standard `macos-26` runner pool was intermittently degraded for hours at a time by noisy-neighbour IO contention — every `simctl` op crawled (97 CI samples: per-wallet `_simPair` setup p50 65s, p90 267s, max 401s vs. <5s healthy — so two sequential wallets took up to ~13 min and sometimes never finished even in 15 min), making the whole mobile suite un-runnable. All four mobile E2E jobs now use the dedicated Apple-Silicon `-xlarge` larger runner (2× vCPU/RAM, no noisy neighbours), which restores a healthy ~2-3 min setup and a full green suite. As belt-and-suspenders for any residual slowness, the `_simPair` fixture setup is capped (at 13 min, past the slowest observed completing setup) so a genuinely-hung CoreSimulator fails fast with a named error and a sim-subsystem restart instead of silently eating the whole per-test timeout, while a degraded-but-completing setup is allowed to finish rather than being killed mid-flight (no assertion is relaxed — purely tolerance for degraded IO); the subsystem recovery `simctl shutdown all`s to clear half-booted device state (the `SimError 405` signature); the per-test timeout is 25 min (from 15); and the non-guardian mobile suite runs with `--retries=2` (matching the guardian suite). (#302) * [CHANGE][ci] **Guardian iOS E2E reads the on-chain auth structure with a pure storage parse instead of loading the multisig client.** The `verify_guardian_auth_structure` assertion's `__TEST_GUARDIAN_AUTH__` hook used to build a `MultisigService` (`getOrCreateMultisigService` → `MultisigClient.load`) and read it. Against the post-consume state — where the guardian's stored blob lags the on-chain account — that load entered a re-sign/realign loop (~48 `signWithHotKey` calls for this read vs. 26 for a full consume) that hung the single-threaded mobile WASM past the eval budget; the assertion never got far enough to run on iOS. The structure (signer set + procedure thresholds) is immutable and lives in the account's storage maps, so the hook now reads it directly with `AccountInspector.fromAccount` — a pure parse with no signing, no guardian HTTP, and no client load (just one `getAccount`, the same read the balance poll already does). Because even that lone `getAccount` was still starved by other main-thread WASM activity on iOS (the auth eval was observed taking 60s with all the wallet's own pollers paused), the structure is now captured in the wallet's own balance poll (`fetchBalances`, which reliably completes) and stashed on a global; the test reads it as a plain value with no WASM call at all. Finally, the iOS harness reads that stash over the SYNCHRONOUS `execute_script` atom (polled), not the async `execute_async_script` one: appium-remote-debugger's async atom delivers its completion callback in the `arguments[arguments.length-1]` slot as the boolean `true` on this iOS RWI bridge, so `cb(result)` threw `TypeError: cb is not a function`, the promise rejected unhandled, and every `evalAsync` hung to its timeout no matter how fast the script ran — which is why the auth read still timed out at 60s even with the stash already populated. Test-only, gated on `MIDEN_E2E_TEST` and tree-shaken from production. (#302) ## 1.15.2 (2026-06-22)