diff --git a/cspell.config.yaml b/cspell.config.yaml index 52a5a629..be1e8021 100644 --- a/cspell.config.yaml +++ b/cspell.config.yaml @@ -39,8 +39,9 @@ words: - dllexport - dlopen - downloadables - - EDQUOT - EACCES + - EDQUOT + - embedder - embedders - EOCD - endtemplate @@ -51,6 +52,7 @@ words: - gclient - hdpi - ifdef + - keypair - libapp - libc - libflutter @@ -68,7 +70,9 @@ words: - repr - reqwest - rlib + - roundtrips - rsplit + - rollforward - rollouts - RTLD - rustflags @@ -82,9 +86,11 @@ words: - subosito - swiftpm - symbolication + - tombstoned - ureq - unbootable - unbooted + - unparseable - unreviewable - unwritable - usize diff --git a/library/src/c_api/mod.rs b/library/src/c_api/mod.rs index 0c052431..96319fbd 100644 --- a/library/src/c_api/mod.rs +++ b/library/src/c_api/mod.rs @@ -876,6 +876,105 @@ mod test { assert_eq!(shorebird_current_boot_patch_number(), 0); } + /// Server rolls back patch 1, then later rolls it forward again with + /// the same number and same hash. The pre-lifecycle code added the + /// rolled-back number to `known_bad_patches` permanently for the + /// release, so the rollforward was silently dropped on the device. + /// The lifecycle's `cleanup` is state-aware: a server-driven + /// rollback on a non-`Bad` patch forgets the patch entirely (no + /// tombstone), leaving the number free to be reinstalled. + /// shorebirdtech/shorebird#3728. + #[serial] + #[test] + fn rollforward_after_server_rollback_reinstalls_patch() { + testing_reset_config(); + let tmp_dir = TempDir::new().unwrap(); + + let apk_path = tmp_dir.path().join("base.apk"); + write_fake_apk( + apk_path.to_str().unwrap(), + HELLO_TESTS_PATCH.base.as_bytes(), + ); + let fake_libapp_path = tmp_dir.path().join("lib/arch/ignored.so"); + let c_params = parameters(&tmp_dir, fake_libapp_path.to_str().unwrap()); + let c_yaml = c_string("app_id: foo"); + assert!(shorebird_init(&c_params, FileCallbacks::new(), c_yaml)); + free_c_string(c_yaml); + free_parameters(c_params); + + // Phase 1: install patch 1 and report a successful launch. + testing_set_network_hooks( + |_url, _request| { + Ok(PatchCheckResponse { + patch_available: true, + patch: Some(crate::Patch { + number: 1, + hash: HELLO_TESTS_PATCH.hash.to_owned(), + download_url: "ignored".to_owned(), + hash_signature: None, + }), + rolled_back_patch_numbers: None, + }) + }, + |_url, dest: &Path, _resume_from: u64| HELLO_TESTS_PATCH.write_to(dest), + |_url, _event| Ok(()), + ); + assert!(shorebird_check_for_downloadable_update(std::ptr::null())); + run_update_expecting(std::ptr::null(), SHOREBIRD_UPDATE_INSTALLED); + shorebird_report_launch_start(); + shorebird_report_launch_success(); + assert_eq!(shorebird_current_boot_patch_number(), 1); + + // Phase 2: server rolls patch 1 back with no replacement. + // Phase-1 spawned threads (PatchDownload, PatchInstallSuccess) + // hold a clone of the config from when they were spawned, so they + // hit the old report hook above. Nothing in phase 2 should report + // or download — only a patch-check happens. + testing_set_network_hooks( + |_url, _request| { + Ok(PatchCheckResponse { + patch_available: false, + patch: None, + rolled_back_patch_numbers: Some(vec![1]), + }) + }, + UNEXPECTED_DOWNLOAD, + UNEXPECTED_REPORT, + ); + assert!(!shorebird_check_for_downloadable_update(std::ptr::null())); + assert_eq!(shorebird_next_boot_patch_number(), 0); + + // Phase 3: server rolls patch 1 forward — same number, same hash, + // empty rolled_back list (the row's `is_rolled_back` flipped back + // to false on the server). The device must accept this as a normal + // "patch available" response and reinstall. + testing_set_network_hooks( + |_url, _request| { + Ok(PatchCheckResponse { + patch_available: true, + patch: Some(crate::Patch { + number: 1, + hash: HELLO_TESTS_PATCH.hash.to_owned(), + download_url: "ignored".to_owned(), + hash_signature: None, + }), + rolled_back_patch_numbers: Some(vec![]), + }) + }, + |_url, dest: &Path, _resume_from: u64| HELLO_TESTS_PATCH.write_to(dest), + |_url, _event| Ok(()), + ); + + // Pre-lifecycle: returns false because patch 1 sat in + // `known_bad_patches` from phase 2's `remove_patch` call. + // Post-lifecycle: phase 2's cleanup forgot patch 1 entirely + // (no Bad tombstone for server-driven rollbacks), so this + // installs cleanly. + assert!(shorebird_check_for_downloadable_update(std::ptr::null())); + run_update_expecting(std::ptr::null(), SHOREBIRD_UPDATE_INSTALLED); + assert_eq!(shorebird_next_boot_patch_number(), 1); + } + /// Patch-to-patch rollback: device on patch 2, server rolls back to /// patch 1 (sends rollback signal AND a downloadable replacement). /// `check_for_downloadable_update` returns true (replacement available), diff --git a/library/src/cache/lifecycle.rs b/library/src/cache/lifecycle.rs new file mode 100644 index 00000000..6268e237 --- /dev/null +++ b/library/src/cache/lifecycle.rs @@ -0,0 +1,2178 @@ +//! Per-patch lifecycle state machine. +//! +//! Replaces the scattered storage of patch state across `download_state.rs` +//! sidecars, the bare files in `downloads/`, and the `next_boot_patch` / +//! `last_booted_patch` / `known_bad_patches` fields of `PatchesState`. +//! +//! On-disk layout (per release): +//! {state_root}/ +//! pointers.json # ReleasePointers +//! patches/ +//! {N}/ +//! state.json # PatchState +//! dlc.vmcode # installed artifact (Installed only) +//! {download_root}/ +//! {N} # compressed bytes (Downloading/Downloaded) +//! +//! `state_root` is the persistent app-storage directory; `download_root` +//! is the OS-managed cache directory (e.g. iOS `NSCachesDirectory`). +//! Putting the compressed download bytes in cache lets the OS evict +//! them under storage pressure — `decide_start` falls back to `Fresh` +//! when the file is gone, costing a redownload but no data loss. +//! +//! state.json is the source of truth for "what state is patch N in?" and +//! survives within a release as a tombstone for `Bad` patches even after +//! their artifact files are removed. Everything under `patches/` is wiped +//! on release-version change; `download_root` is also wiped at that +//! point (see `UpdaterState::create_new_and_save`). +//! +//! Mutations are exposed as two operations on top of the raw read/write: +//! - `mark_bad(n, reason)` writes a Bad tombstone and deletes artifact +//! files (sugar over `write_state` + `cleanup`). +//! - `cleanup(n)` is state-aware: keeps the tombstone if the patch is +//! already Bad, otherwise removes the patch directory entirely. +//! +//! Callers never pick between "delete tombstone" and "preserve tombstone"; +//! the state on disk decides. See the design notes that led here in +//! shorebirdtech/shorebird#3737. +//! +//! The session-scoped "what patch is the engine currently executing" +//! signal lives in `config.rs` as `running_patch`, not here — it's not +//! persisted across launches and is set from `report_launch_start` +//! before any lifecycle transitions happen. `shorebird_current_boot_patch_number` +//! reads that, not anything in this module. + +use std::path::{Path, PathBuf}; + +use anyhow::{bail, Context, Result}; +use serde::{Deserialize, Serialize}; + +use super::{disk_io, signing}; +use crate::yaml::PatchVerificationMode; + +const PATCHES_DIR: &str = "patches"; +const PATCH_STATE_FILE: &str = "state.json"; +const POINTERS_FILE: &str = "pointers.json"; + +/// Per-patch lifecycle state. Persisted at `{root}/patches/{N}/state.json`. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +#[serde(tag = "kind")] +pub enum PatchState { + /// Compressed bytes are partially on disk. The current bytes-on-disk + /// count is read from the `download` file at resume time — the state + /// itself just records "we're mid-download for this url+hash." + /// + /// `hash`/`signature` here are *comparators*, not trusted values. + /// `decide_start` checks them against the server's freshly-delivered + /// hash for this number; a mismatch (e.g. a server-side reupload + /// under the same patch number) discards the prior bytes and + /// restarts. A tampered on-disk hash just causes a redownload. + Downloading { + url: String, + hash: String, + signature: Option, + }, + /// Compressed bytes are fully on disk and the size matches what we + /// recorded after the download completed. Bytes are untrusted until + /// install validates them (inflate + check_hash). + /// + /// As with `Downloading`, `hash`/`signature` are comparators only. + /// `record_install_complete` carries `signature` forward into + /// `Installed`, where Strict-mode boot validation re-verifies it + /// against the on-disk artifact's freshly-recomputed hash. + Downloaded { + url: String, + hash: String, + signature: Option, + size: u64, + }, + /// `dlc.vmcode` is present; the patch is bootable. + /// + /// `hash` is intentionally absent. Install-time validation + /// (`check_hash` against the server-fresh hash held in memory) + /// has already happened, and we don't trust a hash we'd have to + /// re-read from disk to redo that check. Strict-mode boot + /// validation recomputes the artifact's hash from bytes and feeds + /// it to `check_signature` — `signature` is enough. + Installed { + signature: Option, + size: u64, + }, + /// Tombstone. The patch will not be re-attempted within this release. + /// Optional fields preserve what we knew about the patch for diagnostics + /// and for the `PatchInstallFailure` event we queue. + Bad { + reason: BadReason, + hash: Option, + signature: Option, + size: Option, + }, +} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub enum BadReason { + /// Boot started but never recorded success — process crashed during boot. + BootCrash, + /// `inflate` failed (zstd magic / decompression error). + InvalidPatchBytes, + /// Inflated bytes' hash didn't match the server-claimed hash. + InstallHashMismatch, + /// `validate_patch_is_bootable` failed at boot time (size mismatch + /// vs Installed.size, or signature failed in Strict mode). + ValidationFailed, +} + +/// Per-release pointers. Single document at `{root}/pointers.json`. +/// References patch numbers — the metadata for each lives in that patch's +/// `state.json`. +#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] +pub struct ReleasePointers { + /// Boot target on next launch. Must reference a patch in `Installed`. + /// `None` means base release. + pub next_boot_patch: Option, + + /// Most recent patch that successfully booted on a prior run. Used as + /// a fallback target when `next_boot_patch` becomes invalid. + pub last_booted_patch: Option, + + /// Boot-in-progress breadcrumb. Set at `record_boot_start`, cleared + /// at `record_boot_success` / `record_boot_failure`. If still set on + /// next init, treat as a crashed boot. + pub currently_booting_patch: Option, + + /// Unix timestamp (seconds) when `currently_booting_patch` was set. + pub boot_started_at: Option, +} + +/// Per-release patch lifecycle and storage. Owns +/// `{state_root}/patches/`, `{state_root}/pointers.json`, and +/// `{download_root}/{N}` files for in-flight compressed downloads. +#[derive(Debug)] +pub struct PatchLifecycle { + /// Persistent app-storage root. Holds `pointers.json` and + /// `patches/{N}/{state.json,dlc.vmcode}`. + state_root: PathBuf, + /// OS-managed cache root. Holds `{N}` files for in-flight or + /// completed compressed download bytes. The OS may evict these + /// under storage pressure; `decide_start` recovers by falling + /// back to `Fresh`. + download_root: PathBuf, + pointers: ReleasePointers, +} + +impl PatchLifecycle { + /// Loads the lifecycle from disk. `state_root` is the persistent + /// app-storage dir; `download_root` is the OS-managed cache dir + /// (typically `{code_cache_dir}/downloads`). Missing or + /// unparseable `pointers.json` falls back to defaults; per-patch + /// state files are read lazily. + pub fn load_or_default(state_root: PathBuf, download_root: PathBuf) -> Self { + let pointers_path = state_root.join(POINTERS_FILE); + let pointers = if pointers_path.exists() { + match disk_io::read(&pointers_path) { + Ok(p) => p, + Err(e) => { + shorebird_error!( + "Failed to read pointers from {:?}: {:?}; using defaults", + pointers_path, + e + ); + ReleasePointers::default() + } + } + } else { + ReleasePointers::default() + }; + Self { + state_root, + download_root, + pointers, + } + } + + pub fn pointers(&self) -> &ReleasePointers { + &self.pointers + } + + /// Returns the on-disk state for patch `n`, or `None` if the patch has + /// no record on disk (i.e. is in the conceptual "Unknown" state). + pub fn read_state(&self, n: usize) -> Option { + let path = self.state_path(n); + if !path.exists() { + return None; + } + match disk_io::read(&path) { + Ok(state) => Some(state), + Err(e) => { + shorebird_error!("Failed to read state for patch {}: {:?}", n, e); + None + } + } + } + + /// Persists `state` for patch `n`. Creates the patch directory if + /// needed. Atomic via `disk_io::write`. + pub fn write_state(&self, n: usize, state: &PatchState) -> Result<()> { + disk_io::write(state, &self.state_path(n)) + } + + /// Persists the current pointers. + pub fn save_pointers(&self) -> Result<()> { + disk_io::write(&self.pointers, &self.pointers_path()) + } + + /// Transitions patch `n` to `Bad{reason}`, preserving any prior + /// hash/signature/size info as best-effort diagnostics. Then deletes + /// the patch's artifact files (state.json stays as the tombstone). + /// + /// Write-then-cleanup ordering means a crash between the two leaves a + /// tombstone with stale-but-unused artifact bytes — sweeping picks + /// them up on the next `cleanup` call. + /// + /// Marking an already-Bad patch overwrites the `reason` field with + /// the new one (the old hash/signature/size are preserved). In + /// practice we don't double-fail patches — this just makes the + /// behavior obvious if it ever happens. + pub fn mark_bad(&self, n: usize, reason: BadReason) -> Result<()> { + let (hash, signature, size) = match self.read_state(n) { + Some(PatchState::Downloading { + hash, signature, .. + }) => { + let size = self + .download_artifact_path(n) + .metadata() + .ok() + .map(|m| m.len()); + (Some(hash), signature, size) + } + Some(PatchState::Downloaded { + hash, + signature, + size, + .. + }) => (Some(hash), signature, Some(size)), + Some(PatchState::Installed { signature, size }) => (None, signature, Some(size)), + Some(PatchState::Bad { + hash, + signature, + size, + .. + }) => (hash, signature, size), + None => (None, None, None), + }; + self.write_state( + n, + &PatchState::Bad { + reason, + hash, + signature, + size, + }, + )?; + self.cleanup(n) + } + + /// State-aware retirement. If patch `n` is in `Bad`, the tombstone is + /// preserved and only artifact files are removed. Otherwise the entire + /// patch directory is removed. Idempotent — safe to call on patches + /// that don't exist. + pub fn cleanup(&self, n: usize) -> Result<()> { + match self.read_state(n) { + Some(PatchState::Bad { .. }) => self.delete_artifact_files(n), + Some(_) | None => self.forget_dir(n), + } + } + + /// Removes the artifact files for patch `n` while preserving its + /// `state.json` tombstone: everything under + /// `{state_root}/patches/{N}/` except `state.json`, plus the + /// compressed download at `{download_root}/{N}` if any. + fn delete_artifact_files(&self, n: usize) -> Result<()> { + let dir = self.patch_dir(n); + if let Ok(entries) = std::fs::read_dir(&dir) { + for entry in entries.flatten() { + if entry.file_name() == PATCH_STATE_FILE { + continue; + } + let path = entry.path(); + let result = if path.is_dir() { + std::fs::remove_dir_all(&path) + } else { + std::fs::remove_file(&path) + }; + if let Err(e) = result { + shorebird_error!("Failed to remove {:?}: {:?}", path, e); + } + } + } + // Compressed download lives in the cache root, not the patch dir. + let download = self.download_artifact_path(n); + if download.exists() { + if let Err(e) = std::fs::remove_file(&download) { + shorebird_error!("Failed to remove {:?}: {:?}", download, e); + } + } + Ok(()) + } + + /// Removes everything for patch `n`: `{state_root}/patches/{N}/` + /// (including `state.json`) and `{download_root}/{N}`. + fn forget_dir(&self, n: usize) -> Result<()> { + let dir = self.patch_dir(n); + if dir.exists() { + std::fs::remove_dir_all(&dir)?; + } + let download = self.download_artifact_path(n); + if download.exists() { + if let Err(e) = std::fs::remove_file(&download) { + shorebird_error!("Failed to remove {:?}: {:?}", download, e); + } + } + Ok(()) + } + + /// Path the caller streams compressed download bytes to. Lives at + /// `{download_root}/{N}` — flat layout under the OS-managed cache + /// dir. Convenience wrapper around the free + /// [`download_artifact_path`]. + pub fn download_artifact_path(&self, n: usize) -> PathBuf { + download_artifact_path(&self.download_root, n) + } + + /// Path of the installed (inflated) artifact. Lives at + /// `{state_root}/patches/{N}/dlc.vmcode`. Convenience wrapper + /// around the free [`installed_artifact_path`]. + pub fn installed_artifact_path(&self, n: usize) -> PathBuf { + installed_artifact_path(&self.state_root, n) + } + + fn patches_root(&self) -> PathBuf { + self.state_root.join(PATCHES_DIR) + } + + fn patch_dir(&self, n: usize) -> PathBuf { + self.patches_root().join(n.to_string()) + } + + fn state_path(&self, n: usize) -> PathBuf { + self.patch_dir(n).join(PATCH_STATE_FILE) + } + + fn pointers_path(&self) -> PathBuf { + self.state_root.join(POINTERS_FILE) + } +} + +/// Path the caller streams compressed download bytes to. Lives at +/// `{download_root}/{N}` (flat under the OS-managed cache dir). Free +/// function so callers in `update_internal` can compute it without +/// holding a lifecycle handle. +pub fn download_artifact_path(download_root: &Path, n: usize) -> PathBuf { + download_root.join(n.to_string()) +} + +/// Path of the installed (inflated) artifact. Lives at +/// `{state_root}/patches/{N}/dlc.vmcode`. See [`download_artifact_path`] +/// for why this is a free function rather than only a method. +pub fn installed_artifact_path(state_root: &Path, n: usize) -> PathBuf { + state_root + .join(PATCHES_DIR) + .join(n.to_string()) + .join("dlc.vmcode") +} + +/// What `update_internal` should do when starting work on a patch. +/// +/// Returned by [`PatchLifecycle::decide_start`] after inspecting the +/// patch's current on-disk state. The caller uses this to decide whether +/// to send a fresh GET, a Range GET, skip the network entirely, or bail. +#[derive(Debug, Clone, PartialEq)] +pub enum DownloadAction { + /// No usable prior bytes — start a fresh download. The caller should + /// `record_download_started(...)` and issue a GET without a Range + /// header. + Fresh, + /// Partial bytes from a matching prior attempt are on disk. The + /// caller resumes from `offset` (the existing partial file size) + /// and issues a GET with `Range: bytes={offset}-`. + Resume { offset: u64 }, + /// Bytes for this exact url+hash are fully on disk. Skip the network + /// request entirely and proceed to install. + Complete, + /// The patch is in a terminal state and shouldn't be re-fetched. + Skip(SkipReason), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum SkipReason { + /// Already installed; `should_install_patch` returns NoUpdate to + /// avoid downloading the patch we're already running. + AlreadyInstalled, + /// Tombstoned in this release. Subsequent attempts short-circuit. + KnownBad, +} + +impl PatchLifecycle { + /// Decide what to do when the server offers patch `n`. Reads the + /// on-disk state and matches it against the server's `url` + `hash`. + /// A mismatch on either field discards the prior state — the patch + /// was deleted and re-uploaded with the same number, or routed + /// through a different CDN URL, and the prior bytes can't be + /// trusted. + pub fn decide_start(&self, n: usize, url: &str, hash: &str) -> DownloadAction { + // For Downloading/Downloaded, the on-disk file is the source + // of truth for "how many bytes do we have." The state itself + // just records the url/hash/signature so we can detect a + // server change since the prior attempt. + let download_path = self.download_artifact_path(n); + match self.read_state(n) { + None => DownloadAction::Fresh, + Some(PatchState::Downloading { + url: prior_url, + hash: prior_hash, + .. + }) if prior_url == url && prior_hash == hash => { + match std::fs::metadata(&download_path) { + Ok(meta) => DownloadAction::Resume { offset: meta.len() }, + Err(_) => DownloadAction::Fresh, + } + } + Some(PatchState::Downloading { .. }) => DownloadAction::Fresh, + Some(PatchState::Downloaded { + url: prior_url, + hash: prior_hash, + .. + }) if prior_url == url && prior_hash == hash => { + if download_path.exists() { + DownloadAction::Complete + } else { + DownloadAction::Fresh + } + } + Some(PatchState::Downloaded { .. }) => DownloadAction::Fresh, + Some(PatchState::Installed { .. }) => { + DownloadAction::Skip(SkipReason::AlreadyInstalled) + } + Some(PatchState::Bad { .. }) => DownloadAction::Skip(SkipReason::KnownBad), + } + } + + /// Records that a download is starting (or restarting). The actual + /// bytes-on-disk count comes from the `download` file at resume + /// time; this just persists the url/hash/signature so a subsequent + /// `decide_start` can match against the server's current offer. + pub fn record_download_started( + &self, + n: usize, + url: &str, + hash: &str, + signature: Option<&str>, + ) -> Result<()> { + self.write_state( + n, + &PatchState::Downloading { + url: url.to_string(), + hash: hash.to_string(), + signature: signature.map(String::from), + }, + ) + } + + /// Transitions `n` from `Downloading` to `Downloaded` after the + /// download completes. `size` is the actual on-disk size of the + /// compressed bytes. + pub fn record_download_complete(&self, n: usize, size: u64) -> Result<()> { + let (url, hash, signature) = match self.read_state(n) { + Some(PatchState::Downloading { + url, + hash, + signature, + .. + }) => (url, hash, signature), + // Idempotent: a second "complete" call on an already-Downloaded + // patch is a no-op (e.g. process restarted just before this). + Some(PatchState::Downloaded { + url, + hash, + signature, + .. + }) => (url, hash, signature), + other => { + anyhow::bail!( + "record_download_complete called on patch {n} in unexpected state: {other:?}" + ); + } + }; + self.write_state( + n, + &PatchState::Downloaded { + url, + hash, + signature, + size, + }, + ) + } + + /// Records that this process is starting to boot patch `n`. The + /// breadcrumb in `pointers.currently_booting_patch` survives a process + /// crash, which is how we detect boot-time crashes on the next init + /// (see [`detect_boot_crash_on_init`]). + /// + /// Sanity-checks that `n` matches `next_boot_patch` — guards against + /// the engine reporting that it's booting some patch other than the + /// one we said to boot. Carries forward the same defensive check the + /// prior `PatchManager::record_boot_start_for_patch` had. + pub fn record_boot_start(&mut self, n: usize) -> Result<()> { + match self.read_state(n) { + Some(PatchState::Installed { .. }) => {} + other => { + bail!("record_boot_start({n}) expected Installed, got {other:?}"); + } + } + match self.pointers.next_boot_patch { + Some(next) if next == n => {} + Some(next) => bail!("record_boot_start({n}) but next_boot_patch is {next}"), + None => bail!("record_boot_start({n}) but next_boot_patch is unset"), + } + self.pointers.currently_booting_patch = Some(n); + self.pointers.boot_started_at = Some(crate::time::unix_timestamp()); + self.save_pointers() + } + + /// Records a successful boot. Promotes `currently_booting_patch` to + /// `last_booted_patch` and runs cleanup on older patches (per-patch + /// state-aware: Bad tombstones survive, others are forgotten). + pub fn record_boot_success(&mut self) -> Result<()> { + let n = self + .pointers + .currently_booting_patch + .context("record_boot_success without currently_booting_patch")?; + self.pointers.last_booted_patch = Some(n); + self.pointers.currently_booting_patch = None; + self.pointers.boot_started_at = None; + self.save_pointers()?; + self.cleanup_older_than(n); + Ok(()) + } + + /// Records that patch `n` failed to boot. Marks the patch + /// `Bad{BootCrash}`, clears the boot breadcrumb, and recomputes + /// `next_boot_patch`. + /// + /// The patch number is passed in (rather than read from + /// `currently_booting_patch`) to match the prior PatchManager API + /// shape — most call sites already have the number in hand. The + /// breadcrumb is cleared regardless of whether it matched. + /// + /// `mark_bad` runs *before* clearing the breadcrumb so a crash + /// between the two leaves a still-set `currently_booting_patch` + /// pointing at an already-Bad patch. Next init's + /// `detect_boot_crash_on_init` re-runs this path; `mark_bad` is + /// idempotent on Bad → Bad. Reverse the order and a crash strands + /// an `Installed` patch with no breadcrumb — the next boot retries + /// it silently. + pub fn record_boot_failure(&mut self, n: usize) -> Result<()> { + self.mark_bad(n, BadReason::BootCrash)?; + self.pointers.currently_booting_patch = None; + self.pointers.boot_started_at = None; + self.save_pointers()?; + self.recompute_next_boot() + } + + /// Called at init. If `currently_booting_patch` is still set from a + /// prior process, that boot crashed without recording success or + /// failure — transition the patch to `Bad{BootCrash}` and recompute + /// `next_boot_patch`. Returns the patch number that was recovered, + /// if any. + pub fn detect_boot_crash_on_init(&mut self) -> Result> { + let Some(n) = self.pointers.currently_booting_patch else { + return Ok(None); + }; + self.record_boot_failure(n)?; + Ok(Some(n)) + } + + /// Validates that `next_boot_patch` is bootable (its on-disk size + /// matches `Installed.size`, and in `Strict` mode its signature + /// verifies against `public_key`). On failure, marks the patch + /// `Bad{ValidationFailed}` and recomputes `next_boot_patch`. + pub fn validate_next_boot_patch( + &mut self, + public_key: Option<&str>, + mode: PatchVerificationMode, + ) -> Result<()> { + let Some(n) = self.pointers.next_boot_patch else { + return Ok(()); + }; + if let Err(e) = self.validate_installed_patch(n, public_key, mode) { + shorebird_error!("Patch {} failed validation: {:?}", n, e); + self.mark_bad(n, BadReason::ValidationFailed)?; + self.recompute_next_boot()?; + return Err(e); + } + Ok(()) + } + + /// Ensures `next_boot_patch` points at a usable Installed patch. + /// If it already does, no-op. Otherwise (None, Bad, or Unknown) it + /// falls back to `last_booted_patch` if that patch is currently + /// Installed; otherwise None (boot the base release). + /// + /// Crucially, this does not stomp a valid `next_boot_patch` — + /// otherwise a check that processes server rollbacks would clobber + /// a freshly installed newer patch by promoting the older + /// `last_booted_patch` back into `next_boot_patch`. + /// + /// Also clears `last_booted_patch` if its on-disk record is gone + /// (Unknown), so `pointers.json` doesn't accumulate references to + /// nothing. A `last_booted_patch` whose state is `Bad` is left + /// alone — that's a useful historical breadcrumb and recompute + /// will simply not promote it. + /// + /// We deliberately don't scan `patches/` for arbitrary Installed + /// patches — within a release there are at most a couple of patches + /// active at once, and the last successfully booted patch is the + /// only one we have evidence works on this device. + /// + /// Concretely: this is a fall-back-to-`last_booted_patch`-only + /// policy, not a fall-back-to-anything-bootable scan. If + /// `last_booted_patch` is `None` (e.g. fresh install of a release + /// where the freshly Installed `next_boot_patch` then fails + /// validation), we go to base release even when an older Installed + /// patch may be sitting in `patches/`. + pub fn recompute_next_boot(&mut self) -> Result<()> { + let mut dirty = false; + if let Some(lb) = self.pointers.last_booted_patch { + if self.read_state(lb).is_none() { + self.pointers.last_booted_patch = None; + dirty = true; + } + } + let already_valid = self + .pointers + .next_boot_patch + .is_some_and(|n| matches!(self.read_state(n), Some(PatchState::Installed { .. }))); + if !already_valid { + let new_target = self + .pointers + .last_booted_patch + .filter(|&lb| matches!(self.read_state(lb), Some(PatchState::Installed { .. }))); + if self.pointers.next_boot_patch != new_target { + self.pointers.next_boot_patch = new_target; + dirty = true; + } + } + if dirty { + self.save_pointers()?; + } + Ok(()) + } + + /// Sets `next_boot_patch` to a freshly Installed patch. Replaces any + /// prior `next_boot_patch` that was Installed-but-never-booted (those + /// are forgotten via [`cleanup`]); a Bad tombstone in that slot is + /// preserved. + pub fn promote_to_next_boot(&mut self, n: usize) -> Result<()> { + if !matches!(self.read_state(n), Some(PatchState::Installed { .. })) { + bail!("promote_to_next_boot({n}) requires Installed state"); + } + // If we're replacing an Installed-but-never-booted previous + // next_boot, retire it. cleanup handles tombstones correctly. + let last_booted = self.pointers.last_booted_patch; + if let Some(prev) = self.pointers.next_boot_patch { + if prev != n && Some(prev) != last_booted { + self.cleanup(prev)?; + } + } + self.pointers.next_boot_patch = Some(n); + self.save_pointers() + } + + /// Validates a specific Installed patch against its on-disk artifact. + fn validate_installed_patch( + &self, + n: usize, + public_key: Option<&str>, + mode: PatchVerificationMode, + ) -> Result<()> { + let (expected_size, signature) = match self.read_state(n) { + Some(PatchState::Installed { size, signature }) => (size, signature), + other => bail!("Patch {n} is not Installed: {other:?}"), + }; + let path = self.installed_artifact_path(n); + if !path.exists() { + bail!("Patch {n} artifact missing at {}", path.display()); + } + let actual_size = std::fs::metadata(&path)?.len(); + if actual_size != expected_size { + bail!( + "Patch {n} size {} on disk, expected {}", + actual_size, + expected_size + ); + } + if mode == PatchVerificationMode::Strict { + if let Some(public_key) = public_key { + let signature = signature.context("Patch signature is missing")?; + let actual_hash = signing::hash_file(&path)?; + signing::check_signature(&actual_hash, &signature, public_key)?; + } else { + shorebird_info!("No public key configured; skipping signature verification"); + } + } + Ok(()) + } + + /// Walks `state_root/patches/` and runs [`cleanup`] on every patch + /// with number < `n`. State-aware per-patch: Bad tombstones + /// survive, everything else is forgotten. Anything in `patches/` + /// whose name doesn't parse as a patch number is unrecognized + /// garbage (we own the directory) and gets removed wholesale. + /// + /// Then sweeps `download_root/` via [`cleanup_orphan_downloads`]: + /// every file in there should correspond to a patch in + /// `Downloading` or `Downloaded` state; anything else is orphan + /// or stale and gets removed. We own `download_root/` and don't + /// rely on OS cache eviction to clean up after us. + /// + /// Best-effort — errors are logged so a single bad entry can't + /// block the cleanup of others. + fn cleanup_older_than(&self, n: usize) { + if let Ok(entries) = std::fs::read_dir(self.patches_root()) { + for entry in entries.flatten() { + let path = entry.path(); + let name_string = entry.file_name().to_string_lossy().into_owned(); + match name_string.parse::() { + Ok(num) if num < n => { + if let Err(e) = self.cleanup(num) { + shorebird_error!("cleanup({}) failed: {:?}", num, e); + } + } + Ok(_) => {} // current or newer; leave alone. + Err(_) => { + // Anything in patches/ whose name isn't a patch + // number is corruption / leftover from prior + // versions / debug residue and is safe to remove. + let result = if path.is_dir() { + std::fs::remove_dir_all(&path) + } else { + std::fs::remove_file(&path) + }; + if let Err(e) = result { + shorebird_error!( + "Failed to remove unrecognized entry {:?}: {:?}", + path, + e + ); + } + } + } + } + } + self.cleanup_orphan_downloads(); + } + + /// Sweeps `download_root/` for files that don't correspond to a + /// live download. A download file is "live" only when its patch + /// is in `Downloading` or `Downloaded` state — any other + /// situation (no state.json, state is `Installed` or `Bad`, name + /// isn't a patch number) is an orphan we should clean up. The + /// `Installed` and `Bad` cases shouldn't happen in normal flow + /// (`record_install_complete` and `mark_bad` already remove the + /// download), but the safety net costs nothing. + fn cleanup_orphan_downloads(&self) { + let Ok(entries) = std::fs::read_dir(&self.download_root) else { + return; + }; + for entry in entries.flatten() { + let path = entry.path(); + let name = entry.file_name().to_string_lossy().into_owned(); + let keep = match name.parse::() { + Ok(num) => matches!( + self.read_state(num), + Some(PatchState::Downloading { .. } | PatchState::Downloaded { .. }) + ), + Err(_) => false, + }; + if keep { + continue; + } + let result = if path.is_dir() { + std::fs::remove_dir_all(&path) + } else { + std::fs::remove_file(&path) + }; + if let Err(e) = result { + shorebird_error!("Failed to remove orphan download {:?}: {:?}", path, e); + } + } + } + + /// Transitions `n` from `Downloaded` to `Installed`. `installed_size` + /// is the on-disk size of the inflated artifact (what + /// `validate_installed_patch` will check against on next boot). + /// Also removes the now-unneeded compressed `download` file. + pub fn record_install_complete(&self, n: usize, installed_size: u64) -> Result<()> { + let signature = match self.read_state(n) { + Some(PatchState::Downloaded { signature, .. }) => signature, + other => { + anyhow::bail!( + "record_install_complete called on patch {n} in unexpected state: {other:?}" + ); + } + }; + self.write_state( + n, + &PatchState::Installed { + signature, + size: installed_size, + }, + )?; + // The compressed bytes in the cache dir are no longer needed; + // the dlc.vmcode is the canonical artifact going forward. + let download = self.download_artifact_path(n); + if download.exists() { + if let Err(e) = std::fs::remove_file(&download) { + shorebird_error!("Failed to remove download file for patch {}: {:?}", n, e); + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn fixture() -> (TempDir, PatchLifecycle) { + let tmp = TempDir::new().unwrap(); + let state_root = tmp.path().to_path_buf(); + let download_root = tmp.path().join("downloads"); + let lifecycle = PatchLifecycle::load_or_default(state_root, download_root); + (tmp, lifecycle) + } + + /// Two-process lifecycle helper: rebuilds a `PatchLifecycle` + /// against the same on-disk roots a prior `fixture()` set up. + fn reload_at(tmp_path: &Path) -> PatchLifecycle { + PatchLifecycle::load_or_default(tmp_path.to_path_buf(), tmp_path.join("downloads")) + } + + #[test] + fn read_state_returns_none_when_patch_unknown() { + let (_tmp, lifecycle) = fixture(); + assert!(lifecycle.read_state(1).is_none()); + } + + #[test] + fn write_then_read_roundtrips() { + let (_tmp, lifecycle) = fixture(); + let state = PatchState::Downloaded { + url: "https://example.com/p1".into(), + hash: "abc".into(), + signature: Some("sig".into()), + size: 1234, + }; + lifecycle.write_state(1, &state).unwrap(); + assert_eq!(lifecycle.read_state(1), Some(state)); + } + + #[test] + fn read_state_is_none_for_corrupt_state_json() { + let (_tmp, lifecycle) = fixture(); + let path = lifecycle.state_path(1); + std::fs::create_dir_all(path.parent().unwrap()).unwrap(); + std::fs::write(&path, "not json").unwrap(); + // Corrupt JSON returns None — caller treats as Unknown and starts fresh. + assert!(lifecycle.read_state(1).is_none()); + } + + #[test] + fn mark_bad_preserves_metadata_from_installed() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Installed { + signature: Some("s".into()), + size: 999, + }, + ) + .unwrap(); + lifecycle.mark_bad(1, BadReason::BootCrash).unwrap(); + match lifecycle.read_state(1).unwrap() { + PatchState::Bad { + reason, + hash, + signature, + size, + } => { + assert_eq!(reason, BadReason::BootCrash); + // Installed has no hash field; Bad.hash is None for + // Installed→Bad transitions. + assert_eq!(hash, None); + assert_eq!(signature, Some("s".into())); + assert_eq!(size, Some(999)); + } + other => panic!("expected Bad, got {other:?}"), + } + } + + #[test] + fn mark_bad_on_unknown_patch_records_no_metadata() { + let (_tmp, lifecycle) = fixture(); + lifecycle.mark_bad(1, BadReason::ValidationFailed).unwrap(); + match lifecycle.read_state(1).unwrap() { + PatchState::Bad { + reason, + hash, + signature, + size, + } => { + assert_eq!(reason, BadReason::ValidationFailed); + assert!(hash.is_none()); + assert!(signature.is_none()); + assert!(size.is_none()); + } + other => panic!("expected Bad, got {other:?}"), + } + } + + #[test] + fn mark_bad_from_downloading_records_partial_file_size() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Downloading { + url: "u".into(), + hash: "h".into(), + signature: Some("s".into()), + }, + ) + .unwrap(); + // File size on disk is what gets recorded as the patch's "size" + // — there's no recorded count in Downloading anymore. + let path = lifecycle.download_artifact_path(1); + std::fs::create_dir_all(path.parent().unwrap()).unwrap(); + std::fs::write(&path, vec![0u8; 250]).unwrap(); + + lifecycle.mark_bad(1, BadReason::InvalidPatchBytes).unwrap(); + + match lifecycle.read_state(1).unwrap() { + PatchState::Bad { + reason, + hash, + signature, + size, + } => { + assert_eq!(reason, BadReason::InvalidPatchBytes); + assert_eq!(hash, Some("h".into())); + assert_eq!(signature, Some("s".into())); + assert_eq!(size, Some(250)); + } + other => panic!("expected Bad, got {other:?}"), + } + } + + #[test] + fn mark_bad_overwrites_reason_when_already_bad() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Bad { + reason: BadReason::BootCrash, + hash: Some("h".into()), + signature: Some("s".into()), + size: Some(99), + }, + ) + .unwrap(); + lifecycle.mark_bad(1, BadReason::ValidationFailed).unwrap(); + // Reason changed, other fields preserved. + match lifecycle.read_state(1).unwrap() { + PatchState::Bad { + reason, + hash, + signature, + size, + } => { + assert_eq!(reason, BadReason::ValidationFailed); + assert_eq!(hash, Some("h".into())); + assert_eq!(signature, Some("s".into())); + assert_eq!(size, Some(99)); + } + other => panic!("expected Bad, got {other:?}"), + } + } + + #[test] + // Ports the artifact-deletion half of + // `patch_manager.rs::record_boot_failure_for_patch_tests::deletes_failed_patch_artifacts`. + // The state-machine equivalent is "mark_bad records the tombstone + // and deletes the artifact"; tested separately from the + // `record_boot_failure` flow because the same path is used by + // multiple Bad transitions (BootCrash, ValidationFailed, etc.). + fn mark_bad_deletes_artifact_files_but_keeps_tombstone() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Downloaded { + url: "u".into(), + hash: "h".into(), + signature: None, + size: 100, + }, + ) + .unwrap(); + // Drop fake artifact files alongside state.json. + let dir = lifecycle.patch_dir(1); + std::fs::write(dir.join("download"), b"compressed bytes").unwrap(); + std::fs::write(dir.join("dlc.vmcode"), b"installed bytes").unwrap(); + + lifecycle.mark_bad(1, BadReason::InvalidPatchBytes).unwrap(); + + assert!(lifecycle.state_path(1).exists(), "tombstone preserved"); + assert!(!dir.join("download").exists(), "artifact gone"); + assert!(!dir.join("dlc.vmcode").exists(), "artifact gone"); + } + + #[test] + fn cleanup_on_bad_patch_keeps_tombstone() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Bad { + reason: BadReason::BootCrash, + hash: Some("h".into()), + signature: None, + size: Some(50), + }, + ) + .unwrap(); + // Stale artifact bytes left around (e.g. from a crash between + // mark_bad's state write and its cleanup) should be swept up. + let dir = lifecycle.patch_dir(1); + std::fs::write(dir.join("download"), b"stale").unwrap(); + + lifecycle.cleanup(1).unwrap(); + + assert!(lifecycle.state_path(1).exists()); + assert!(!dir.join("download").exists()); + } + + #[test] + fn cleanup_on_non_bad_patch_forgets_entirely() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Installed { + signature: None, + size: 100, + }, + ) + .unwrap(); + std::fs::write(lifecycle.patch_dir(1).join("dlc.vmcode"), b"x").unwrap(); + + lifecycle.cleanup(1).unwrap(); + + assert!(!lifecycle.patch_dir(1).exists()); + assert!(lifecycle.read_state(1).is_none()); + } + + #[test] + fn cleanup_on_unknown_patch_is_noop() { + let (_tmp, lifecycle) = fixture(); + lifecycle.cleanup(99).unwrap(); // Should not error. + } + + #[test] + fn cleanup_is_idempotent() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Installed { + signature: None, + size: 1, + }, + ) + .unwrap(); + lifecycle.cleanup(1).unwrap(); + lifecycle.cleanup(1).unwrap(); // No-op the second time. + } + + #[test] + fn pointers_load_default_when_missing() { + let (_tmp, lifecycle) = fixture(); + assert_eq!(lifecycle.pointers(), &ReleasePointers::default()); + } + + #[test] + fn pointers_save_and_reload_roundtrip() { + let tmp = TempDir::new().unwrap(); + { + let mut lifecycle = reload_at(tmp.path()); + lifecycle.pointers = ReleasePointers { + next_boot_patch: Some(3), + last_booted_patch: Some(2), + currently_booting_patch: None, + boot_started_at: None, + }; + lifecycle.save_pointers().unwrap(); + } + let reloaded = reload_at(tmp.path()); + assert_eq!(reloaded.pointers().next_boot_patch, Some(3)); + assert_eq!(reloaded.pointers().last_booted_patch, Some(2)); + } + + #[test] + fn pointers_load_default_on_corrupt_file() { + let tmp = TempDir::new().unwrap(); + std::fs::write(tmp.path().join(POINTERS_FILE), "not json").unwrap(); + let lifecycle = reload_at(tmp.path()); + assert_eq!(lifecycle.pointers(), &ReleasePointers::default()); + } + + #[test] + fn artifact_path_helpers_live_in_their_respective_roots() { + // The installed artifact lives under state_root/patches/{N}/. + // The compressed download lives under download_root/{N} (flat). + // They're in DIFFERENT roots so the OS can evict downloads + // independently of installed patches. + let (_tmp, lifecycle) = fixture(); + let download = lifecycle.download_artifact_path(7); + let installed = lifecycle.installed_artifact_path(7); + assert_eq!(installed.parent().unwrap(), lifecycle.patch_dir(7)); + // Download path is `{download_root}/{N}` — its parent is the + // download root, distinct from patch_dir. + assert_ne!(download.parent().unwrap(), lifecycle.patch_dir(7)); + assert_eq!(download.file_name().unwrap(), "7"); + } + + #[test] + fn decide_start_unknown_patch_is_fresh() { + let (_tmp, lifecycle) = fixture(); + assert_eq!( + lifecycle.decide_start(1, "https://example/p", "h"), + DownloadAction::Fresh + ); + } + + #[test] + fn decide_start_resumes_matching_downloading() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Downloading { + url: "https://example/p".into(), + hash: "h".into(), + signature: None, + }, + ) + .unwrap(); + let dl = lifecycle.download_artifact_path(1); + std::fs::create_dir_all(dl.parent().unwrap()).unwrap(); + std::fs::write(&dl, vec![0u8; 250]).unwrap(); + assert_eq!( + lifecycle.decide_start(1, "https://example/p", "h"), + DownloadAction::Resume { offset: 250 } + ); + } + + #[test] + fn decide_start_downloading_with_missing_file_starts_fresh() { + let (_tmp, lifecycle) = fixture(); + // State says we were 250 bytes in, but the file is gone (e.g. + // OS evicted it from the code cache). + lifecycle + .write_state( + 1, + &PatchState::Downloading { + url: "https://example/p".into(), + hash: "h".into(), + signature: None, + }, + ) + .unwrap(); + assert_eq!( + lifecycle.decide_start(1, "https://example/p", "h"), + DownloadAction::Fresh + ); + } + + #[test] + fn decide_start_url_mismatch_starts_fresh() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Downloading { + url: "https://old.example/p".into(), + hash: "h".into(), + signature: None, + }, + ) + .unwrap(); + assert_eq!( + lifecycle.decide_start(1, "https://new.example/p", "h"), + DownloadAction::Fresh + ); + } + + #[test] + fn decide_start_hash_mismatch_starts_fresh() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Downloaded { + url: "u".into(), + hash: "old".into(), + signature: None, + size: 1000, + }, + ) + .unwrap(); + assert_eq!(lifecycle.decide_start(1, "u", "new"), DownloadAction::Fresh); + } + + #[test] + fn decide_start_complete_skips_fetch() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Downloaded { + url: "u".into(), + hash: "h".into(), + signature: None, + size: 1000, + }, + ) + .unwrap(); + let dl = lifecycle.download_artifact_path(1); + std::fs::create_dir_all(dl.parent().unwrap()).unwrap(); + std::fs::write(&dl, vec![0u8; 1000]).unwrap(); + assert_eq!( + lifecycle.decide_start(1, "u", "h"), + DownloadAction::Complete + ); + } + + #[test] + fn decide_start_downloaded_with_missing_file_starts_fresh() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Downloaded { + url: "u".into(), + hash: "h".into(), + signature: None, + size: 1000, + }, + ) + .unwrap(); + assert_eq!(lifecycle.decide_start(1, "u", "h"), DownloadAction::Fresh); + } + + #[test] + fn decide_start_skips_installed() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Installed { + signature: None, + size: 1000, + }, + ) + .unwrap(); + assert_eq!( + lifecycle.decide_start(1, "u", "h"), + DownloadAction::Skip(SkipReason::AlreadyInstalled) + ); + } + + #[test] + fn decide_start_skips_bad() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Bad { + reason: BadReason::BootCrash, + hash: None, + signature: None, + size: None, + }, + ) + .unwrap(); + assert_eq!( + lifecycle.decide_start(1, "u", "h"), + DownloadAction::Skip(SkipReason::KnownBad) + ); + } + + #[test] + fn record_download_started_writes_downloading_state() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .record_download_started(1, "u", "h", Some("s")) + .unwrap(); + assert_eq!( + lifecycle.read_state(1).unwrap(), + PatchState::Downloading { + url: "u".into(), + hash: "h".into(), + signature: Some("s".into()), + } + ); + } + + #[test] + fn record_download_complete_transitions_downloading_to_downloaded() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .record_download_started(1, "u", "h", None) + .unwrap(); + lifecycle.record_download_complete(1, 1234).unwrap(); + assert_eq!( + lifecycle.read_state(1).unwrap(), + PatchState::Downloaded { + url: "u".into(), + hash: "h".into(), + signature: None, + size: 1234, + } + ); + } + + #[test] + fn record_download_complete_is_idempotent_on_downloaded() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Downloaded { + url: "u".into(), + hash: "h".into(), + signature: None, + size: 1234, + }, + ) + .unwrap(); + // Second call doesn't error; size update reflects new value (e.g. + // a server that retried with a different chunked-encoding total). + lifecycle.record_download_complete(1, 5678).unwrap(); + match lifecycle.read_state(1).unwrap() { + PatchState::Downloaded { size, .. } => assert_eq!(size, 5678), + _ => panic!("expected Downloaded"), + } + } + + #[test] + fn record_download_complete_errors_on_invalid_state() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Installed { + signature: None, + size: 100, + }, + ) + .unwrap(); + assert!(lifecycle.record_download_complete(1, 1234).is_err()); + } + + #[test] + fn record_install_complete_transitions_to_installed_and_removes_download() { + let (_tmp, lifecycle) = fixture(); + lifecycle + .write_state( + 1, + &PatchState::Downloaded { + url: "u".into(), + hash: "h".into(), + signature: Some("s".into()), + size: 1234, + }, + ) + .unwrap(); + let download_path = lifecycle.download_artifact_path(1); + std::fs::create_dir_all(download_path.parent().unwrap()).unwrap(); + std::fs::write(&download_path, b"compressed").unwrap(); + + lifecycle.record_install_complete(1, 9999).unwrap(); + + assert_eq!( + lifecycle.read_state(1).unwrap(), + PatchState::Installed { + signature: Some("s".into()), + size: 9999, + } + ); + assert!( + !download_path.exists(), + "download file should be removed after install" + ); + } + + #[test] + fn record_install_complete_errors_on_invalid_state() { + let (_tmp, lifecycle) = fixture(); + // No prior state: not Downloaded. + assert!(lifecycle.record_install_complete(1, 1234).is_err()); + } + + /// Test helper: writes `Installed` state and the artifact file for + /// patch `n` with the given size. Does *not* touch pointers — + /// tests that exercise pointer management set them explicitly. + fn install_state(lifecycle: &PatchLifecycle, n: usize, size: u64) { + let path = lifecycle.installed_artifact_path(n); + std::fs::create_dir_all(path.parent().unwrap()).unwrap(); + std::fs::write(&path, vec![0u8; size as usize]).unwrap(); + lifecycle + .write_state( + n, + &PatchState::Installed { + signature: None, + size, + }, + ) + .unwrap(); + } + + #[test] + fn record_boot_start_requires_installed() { + let (_tmp, mut lifecycle) = fixture(); + assert!(lifecycle.record_boot_start(1).is_err()); + + install_state(&lifecycle, 1, 100); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + + lifecycle.record_boot_start(1).unwrap(); + assert_eq!(lifecycle.pointers().currently_booting_patch, Some(1)); + assert!(lifecycle.pointers().boot_started_at.is_some()); + } + + #[test] + fn record_boot_start_errs_when_no_next_boot_patch() { + // Ports `patch_manager.rs::record_boot_success_for_patch_tests::errs_if_no_next_boot_patch`. + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 1, 100); + // pointers.next_boot_patch is unset; engine claiming to boot + // patch 1 is a sanity violation. + assert!(lifecycle.record_boot_start(1).is_err()); + } + + #[test] + fn record_boot_start_errs_on_patch_number_mismatch() { + // Ports `patch_manager.rs::record_boot_success_for_patch_tests::errs_if_patch_number_does_not_match_next_patch`. + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 1, 100); + install_state(&lifecycle, 2, 200); + lifecycle.pointers.next_boot_patch = Some(2); + lifecycle.save_pointers().unwrap(); + // Engine claims to boot 1 but our pointer says 2. + assert!(lifecycle.record_boot_start(1).is_err()); + } + + #[test] + // Ports `patch_manager.rs::record_boot_success_for_patch_tests::deletes_other_patch_artifacts`. + fn record_boot_success_promotes_and_cleans_older() { + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 1, 100); + install_state(&lifecycle, 2, 200); + install_state(&lifecycle, 3, 300); + // Drop a stale compressed download alongside an older patch's + // state to verify cleanup walks both roots through the cleanup() + // → forget_dir() chain. + let dl1 = lifecycle.download_artifact_path(1); + std::fs::create_dir_all(dl1.parent().unwrap()).unwrap(); + std::fs::write(&dl1, b"stale older download").unwrap(); + // Pretend patch 3 is what we're booting. + lifecycle.pointers.next_boot_patch = Some(3); + lifecycle.save_pointers().unwrap(); + + lifecycle.record_boot_start(3).unwrap(); + lifecycle.record_boot_success().unwrap(); + + assert_eq!(lifecycle.pointers().last_booted_patch, Some(3)); + assert!(lifecycle.pointers().currently_booting_patch.is_none()); + // Older patches removed entirely by record_boot_success — both + // the persistent patch dir and the cache-rooted download file. + assert!(!lifecycle.patch_dir(1).exists()); + assert!(!lifecycle.patch_dir(2).exists()); + assert!(!dl1.exists(), "stale download for older patch removed"); + // Booted patch survives. + assert!(lifecycle.patch_dir(3).exists()); + } + + #[test] + fn record_boot_success_keeps_bad_tombstones_for_older() { + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 1, 100); + install_state(&lifecycle, 2, 200); + install_state(&lifecycle, 3, 300); + lifecycle.pointers.next_boot_patch = Some(3); + lifecycle.save_pointers().unwrap(); + + // Patch 2 went bad some time ago. + lifecycle.mark_bad(2, BadReason::BootCrash).unwrap(); + + lifecycle.record_boot_start(3).unwrap(); + lifecycle.record_boot_success().unwrap(); + + assert!(!lifecycle.patch_dir(1).exists(), "1 forgotten"); + // Patch 2's tombstone survives older-than cleanup. + assert!(matches!( + lifecycle.read_state(2), + Some(PatchState::Bad { .. }) + )); + assert!(lifecycle.patch_dir(3).exists()); + } + + #[test] + // Cleanup-on-boot-success runs `cleanup_orphan_downloads`, which + // walks `download_root/` and removes anything that doesn't + // correspond to a `Downloading`/`Downloaded` patch. This test + // exercises every "remove" branch in one shot: an orphan (no + // state.json), a stale download (state is `Installed`), and a + // non-numeric file. Live downloads (`Downloading` / + // `Downloaded`) are kept. + fn record_boot_success_sweeps_orphan_and_stale_downloads() { + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 5, 500); + lifecycle.pointers.next_boot_patch = Some(5); + lifecycle.save_pointers().unwrap(); + + // Drop four files into download_root with varying states: + std::fs::create_dir_all(&lifecycle.download_root).unwrap(); + // 1) Orphan: numeric name with no state.json on disk. + std::fs::write(lifecycle.download_artifact_path(2), b"orphan bytes").unwrap(); + // 2) Stale: numeric name with state.json saying Installed + // (record_install_complete should have removed this; this + // is the safety net). + install_state(&lifecycle, 3, 300); + std::fs::write(lifecycle.download_artifact_path(3), b"stale bytes").unwrap(); + // 3) Non-numeric: garbage in our directory. + std::fs::write(lifecycle.download_root.join("not_a_number"), b"junk").unwrap(); + // 4) Live: a Downloading patch that should survive. + lifecycle + .write_state( + 7, + &PatchState::Downloading { + url: "u".into(), + hash: "h".into(), + signature: None, + }, + ) + .unwrap(); + std::fs::write(lifecycle.download_artifact_path(7), b"in flight").unwrap(); + + lifecycle.record_boot_start(5).unwrap(); + lifecycle.record_boot_success().unwrap(); + + assert!( + !lifecycle.download_artifact_path(2).exists(), + "orphan removed" + ); + assert!( + !lifecycle.download_artifact_path(3).exists(), + "stale Installed download removed" + ); + assert!( + !lifecycle.download_root.join("not_a_number").exists(), + "non-numeric garbage removed" + ); + assert!( + lifecycle.download_artifact_path(7).exists(), + "live Downloading patch's bytes preserved" + ); + } + + #[test] + // Ports + // `patch_manager.rs::record_boot_success_for_patch_tests::deletes_unrecognized_directories_in_patches_dir`. + // We own `patches/` — anything in it whose name isn't a patch + // number is corruption / debug residue / leftover-from-prior-code, + // and the older-than walk takes the opportunity to sweep it up. + fn record_boot_success_deletes_unrecognized_directories_in_patches_dir() { + let (_tmp, mut lifecycle) = fixture(); + // Drop a junk directory and a stray file in patches/ before any + // installs. + std::fs::create_dir_all(lifecycle.patches_root().join("junk_dir")).unwrap(); + std::fs::write(lifecycle.patches_root().join("not_a_number.txt"), b"x").unwrap(); + + install_state(&lifecycle, 3, 300); + lifecycle.pointers.next_boot_patch = Some(3); + lifecycle.save_pointers().unwrap(); + lifecycle.record_boot_start(3).unwrap(); + lifecycle.record_boot_success().unwrap(); + + assert!(!lifecycle.patches_root().join("junk_dir").exists()); + assert!(!lifecycle.patches_root().join("not_a_number.txt").exists()); + assert!(lifecycle.patch_dir(3).exists()); + } + + #[test] + // Ports + // `patch_manager.rs::fall_back_tests::succeeds_if_deleting_artifacts_fails`. + // The patch dirs were already deleted out from under us — every + // delete in mark_bad's cleanup path is graceful so the operation + // still succeeds and the pointer state is recomputed correctly. + fn record_boot_failure_succeeds_if_artifact_dirs_are_already_gone() { + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 1, 100); + install_state(&lifecycle, 2, 200); + lifecycle.pointers.last_booted_patch = Some(1); + lifecycle.pointers.next_boot_patch = Some(2); + lifecycle.save_pointers().unwrap(); + + // Wipe both patch dirs (state.json and artifact alike) before + // recording the failure — simulates filesystem-level corruption. + std::fs::remove_dir_all(lifecycle.patch_dir(1)).unwrap(); + std::fs::remove_dir_all(lifecycle.patch_dir(2)).unwrap(); + + // Need to set next_boot=2 manually since record_boot_start + // requires the matching state, but for this test we skip that + // and call record_boot_failure directly. + // record_boot_failure doesn't require currently_booting; clears it. + lifecycle.record_boot_failure(2).unwrap(); + + // Patch 2 transitioned to Bad{BootCrash}; mark_bad recreated + // its directory just for the tombstone state.json. + assert!(matches!( + lifecycle.read_state(2), + Some(PatchState::Bad { .. }) + )); + // Patch 1's state file is gone, so recompute can't promote it. + assert_eq!(lifecycle.pointers().next_boot_patch, None); + assert_eq!( + lifecycle.pointers().last_booted_patch, + None, + "stale last_booted pointer cleared by recompute" + ); + } + + #[test] + // Ports + // `patch_manager.rs::next_boot_patch_tests::falls_back_to_last_booted_patch_if_still_bootable` + // and `patch_manager.rs::next_boot_patch_tests::returns_last_booted_patch_if_next_patch_failed_to_boot` + // and `patch_manager.rs::fall_back_tests::sets_next_patch_to_latest_patch_if_both_are_present`. + fn record_boot_failure_marks_bad_and_recomputes_next_boot() { + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 1, 100); + install_state(&lifecycle, 2, 200); + // Pretend 1 was the last-booted, 2 is queued for next boot. + lifecycle.pointers.last_booted_patch = Some(1); + lifecycle.pointers.next_boot_patch = Some(2); + lifecycle.save_pointers().unwrap(); + + lifecycle.record_boot_start(2).unwrap(); + lifecycle.record_boot_failure(2).unwrap(); + assert!(matches!( + lifecycle.read_state(2), + Some(PatchState::Bad { .. }) + )); + // Last-booted promoted as the new next-boot. + assert_eq!(lifecycle.pointers().next_boot_patch, Some(1)); + assert!(lifecycle.pointers().currently_booting_patch.is_none()); + } + + #[test] + // Ports + // `patch_manager.rs::fall_back_tests::clears_next_and_last_patches_if_both_fail_validation`, + // adapted for the new pointer-vs-state separation: `last_booted`'s + // pointer is *kept* (as a Bad breadcrumb) where the old code cleared + // it. The functional outcome — `next_boot` becomes None when both + // candidates are unusable — is the same. + fn record_boot_failure_clears_next_boot_when_last_booted_is_also_bad() { + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 1, 100); + install_state(&lifecycle, 2, 200); + lifecycle.mark_bad(1, BadReason::BootCrash).unwrap(); + lifecycle.pointers.last_booted_patch = Some(1); + lifecycle.pointers.next_boot_patch = Some(2); + lifecycle.save_pointers().unwrap(); + + lifecycle.record_boot_start(2).unwrap(); + lifecycle.record_boot_failure(2).unwrap(); + + // Both candidates are Bad; no fallback target → boot base. + assert_eq!(lifecycle.pointers().next_boot_patch, None); + } + + #[test] + fn recompute_next_boot_clears_stale_last_booted() { + let (_tmp, mut lifecycle) = fixture(); + // last_booted points at a patch we've forgotten — e.g. an older + // release version was wiped and we're carrying a stale pointer. + lifecycle.pointers.last_booted_patch = Some(7); + lifecycle.save_pointers().unwrap(); + + lifecycle.recompute_next_boot().unwrap(); + + assert_eq!(lifecycle.pointers().last_booted_patch, None); + assert_eq!(lifecycle.pointers().next_boot_patch, None); + } + + #[test] + fn recompute_next_boot_keeps_bad_last_booted_pointer() { + // A `Bad` patch in last_booted is a useful breadcrumb — recompute + // shouldn't promote it (next_boot stays None) but shouldn't clear + // the historical pointer either. + let (_tmp, mut lifecycle) = fixture(); + lifecycle + .write_state( + 3, + &PatchState::Bad { + reason: BadReason::BootCrash, + hash: None, + signature: None, + size: None, + }, + ) + .unwrap(); + lifecycle.pointers.last_booted_patch = Some(3); + lifecycle.save_pointers().unwrap(); + + lifecycle.recompute_next_boot().unwrap(); + + assert_eq!(lifecycle.pointers().last_booted_patch, Some(3)); + assert_eq!(lifecycle.pointers().next_boot_patch, None); + } + + #[test] + fn detect_boot_crash_on_init_recovers_when_breadcrumb_set() { + let tmp = TempDir::new().unwrap(); + // First "process": records boot start, then "crashes" without + // recording success or failure. + { + let mut lifecycle = reload_at(tmp.path()); + install_state(&lifecycle, 1, 100); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + lifecycle.record_boot_start(1).unwrap(); + // Drop without record_boot_success/failure. + } + // Second "process": init detects the breadcrumb and marks Bad. + let mut lifecycle = reload_at(tmp.path()); + let recovered = lifecycle.detect_boot_crash_on_init().unwrap(); + assert_eq!(recovered, Some(1)); + assert!(matches!( + lifecycle.read_state(1), + Some(PatchState::Bad { .. }) + )); + assert!(lifecycle.pointers().currently_booting_patch.is_none()); + } + + #[test] + fn detect_boot_crash_on_init_is_noop_when_no_breadcrumb() { + let (_tmp, mut lifecycle) = fixture(); + assert_eq!(lifecycle.detect_boot_crash_on_init().unwrap(), None); + } + + #[test] + // Ports + // `patch_manager.rs::validate_next_boot_patch_tests::clears_next_boot_patch_if_it_is_not_bootable` + // and the size-mismatch half of + // `patch_manager.rs::validate_next_boot_patch_tests::strict_mode_detects_tampered_patch_at_boot_time`. + fn validate_next_boot_patch_marks_bad_on_size_mismatch() { + let (_tmp, mut lifecycle) = fixture(); + // Install patch 1 with a state.json claiming size=100. + install_state(&lifecycle, 1, 100); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + // Truncate the artifact so it no longer matches. + std::fs::write(lifecycle.installed_artifact_path(1), b"short").unwrap(); + + let result = lifecycle.validate_next_boot_patch(None, PatchVerificationMode::default()); + assert!(result.is_err()); + assert!(matches!( + lifecycle.read_state(1), + Some(PatchState::Bad { + reason: BadReason::ValidationFailed, + .. + }) + )); + assert_eq!(lifecycle.pointers().next_boot_patch, None); + } + + #[test] + // Ports + // `patch_manager.rs::validate_next_boot_patch_tests::does_nothing_if_no_next_boot_patch` + // and `patch_manager.rs::fall_back_tests::does_nothing_if_no_patch_exists`. + fn validate_next_boot_patch_is_noop_when_unset() { + let (_tmp, mut lifecycle) = fixture(); + assert!(lifecycle + .validate_next_boot_patch(None, PatchVerificationMode::default()) + .is_ok()); + } + + #[test] + fn validate_next_boot_patch_marks_bad_when_artifact_missing() { + let (_tmp, mut lifecycle) = fixture(); + // State says Installed but the dlc.vmcode file is gone (e.g. + // the user cleared app data). Validation should mark Bad. + lifecycle + .write_state( + 1, + &PatchState::Installed { + signature: None, + size: 100, + }, + ) + .unwrap(); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + + let result = lifecycle.validate_next_boot_patch(None, PatchVerificationMode::default()); + assert!(result.is_err()); + assert!(matches!( + lifecycle.read_state(1), + Some(PatchState::Bad { + reason: BadReason::ValidationFailed, + .. + }) + )); + assert_eq!(lifecycle.pointers().next_boot_patch, None); + } + + #[test] + fn validate_next_boot_patch_marks_bad_when_pointer_targets_non_installed() { + let (_tmp, mut lifecycle) = fixture(); + // Pointer says boot patch 1, but patch 1 is in Downloading + // (shouldn't happen in normal flow, but pointers and state can + // diverge through corruption). Validation should mark Bad. + lifecycle + .write_state( + 1, + &PatchState::Downloading { + url: "u".into(), + hash: "h".into(), + signature: None, + }, + ) + .unwrap(); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + + let result = lifecycle.validate_next_boot_patch(None, PatchVerificationMode::default()); + assert!(result.is_err()); + assert!(matches!( + lifecycle.read_state(1), + Some(PatchState::Bad { + reason: BadReason::ValidationFailed, + .. + }) + )); + assert_eq!(lifecycle.pointers().next_boot_patch, None); + } + + // base64-encoded RSA public key from the prior `signing.rs` / + // `patch_manager.rs` test fixtures. Reused here so the Strict-mode + // happy path can verify a real signature without committing a + // private key to the repo. + const TEST_PUBLIC_KEY: &str = "MIIBCgKCAQEA2wdpEGbuvlPsb9i0qYrfMefJnEw1BHTi8SYZTKrXOvJWmEpPE1hWfbkvYzXu5a96gV1yocF3DMwn04VmRlKhC4AhsD0NL0UNhYhotbKG91Kwi1vAXpHhCdz5gQEBw0K1uB4Jz+zK6WK+31PryYpwLwbyXNqXoY8IAAUQ4STsHYV5w+BMSi8pepWMRd7DR9RHcbNOZlJvdBQ5NxvB4JN4dRMq8cC73ez1P9d7Dfwv3TWY+he9EmuXLT2UivZSlHIrGBa7MFfqyUe2ro0F7Te/B0si12itBbWIqycvqcXjeOPNn6WEpqN7IWjb9LUh162JyYaz5Lb/VeeJX8LKtElccwIDAQAB"; + + /// Real signature of SHA256(`b"1"`) produced with the private key + /// matching `TEST_PUBLIC_KEY`. Carried over verbatim from the prior + /// `patch_manager.rs::SIGNATURE` constant. + const INFLATED_PATCH_SIGNATURE: &str = "ZGccldv01XqHQ76bXuKV/9EQnNK0Q+reQ9bJHVnGfLldF+BLRx0divgPfKP5Df9BJPA3dw1Z1VortfepmMGebP3kS593l5zoktu9MIepxvRAFWNKE5PDTIIvCL/ddTPEHt6NNCeD6HLOMLzbEX3cFZa+lq3UymGi0aqA5DlXirJBGtopojc9nOXZ22n/qHNZIHEkGcqKbSMSK9oC55whKHnlJTbCXdmSyDc65B4PcgseqJom1riVK3XGW1YMrSpuMAU+CDT7HhdESmI1UtH1bYeBITfRhQztdDTfti2vJTf2Y+lYC99CFiISgD7f1m0KUcC+VnEAMZSYtgxSk6AX2A=="; + + /// Test helper: writes an Installed state with a 100-byte artifact + /// whose hash *won't* match the recorded hash. Suitable for the + /// failure-path Strict-mode tests — those only need the + /// signature-verification call to fail somehow (missing / + /// invalid / bad pub key) and don't exercise the file-hash leg. + fn install_signed(lifecycle: &PatchLifecycle, n: usize, signature: Option<&str>) { + let path = lifecycle.installed_artifact_path(n); + std::fs::create_dir_all(path.parent().unwrap()).unwrap(); + std::fs::write(&path, vec![0u8; 100]).unwrap(); + lifecycle + .write_state( + n, + &PatchState::Installed { + signature: signature.map(String::from), + size: 100, + }, + ) + .unwrap(); + } + + /// Test helper for the Strict-mode happy path. Writes the 1-byte + /// artifact `b"1"` and an `Installed` state whose signature matches + /// SHA256(`b"1"`). Strict validation rehashes the file then verifies + /// the signature against that hash + the public key. + fn install_with_valid_signature(lifecycle: &PatchLifecycle, n: usize) { + let path = lifecycle.installed_artifact_path(n); + std::fs::create_dir_all(path.parent().unwrap()).unwrap(); + std::fs::write(&path, b"1").unwrap(); + lifecycle + .write_state( + n, + &PatchState::Installed { + signature: Some(INFLATED_PATCH_SIGNATURE.to_string()), + size: 1, + }, + ) + .unwrap(); + } + + #[test] + // Ports + // `patch_manager.rs::validate_next_boot_patch_tests::strict_mode_succeeds_with_valid_signature_at_boot_time`. + fn validate_next_boot_strict_mode_succeeds_with_valid_signature() { + let (_tmp, mut lifecycle) = fixture(); + install_with_valid_signature(&lifecycle, 1); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + + lifecycle + .validate_next_boot_patch(Some(TEST_PUBLIC_KEY), PatchVerificationMode::Strict) + .unwrap(); + // Patch survived — still Installed, still next_boot. + assert!(matches!( + lifecycle.read_state(1), + Some(PatchState::Installed { .. }) + )); + assert_eq!(lifecycle.pointers().next_boot_patch, Some(1)); + } + + #[test] + fn validate_next_boot_strict_mode_succeeds_with_no_public_key() { + // Ports `patch_manager.rs::validate_next_boot_patch_tests::succeeds_with_arbitrary_signature_if_no_public_key`. + // No public_key configured — Strict mode skips signature + // verification entirely and just validates size. + let (_tmp, mut lifecycle) = fixture(); + install_signed(&lifecycle, 1, Some("ignored")); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + + lifecycle + .validate_next_boot_patch(None, PatchVerificationMode::Strict) + .unwrap(); + // Patch is still good. + assert!(matches!( + lifecycle.read_state(1), + Some(PatchState::Installed { .. }) + )); + } + + #[test] + fn validate_next_boot_strict_mode_marks_bad_when_signature_missing() { + // Ports `patch_manager.rs::validate_next_boot_patch_tests::strict_mode_fails_boot_validation_if_signature_missing`. + // Strict + public_key configured + Installed state has no + // signature → ValidationFailed. + let (_tmp, mut lifecycle) = fixture(); + install_signed(&lifecycle, 1, None); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + + let result = lifecycle + .validate_next_boot_patch(Some(TEST_PUBLIC_KEY), PatchVerificationMode::Strict); + assert!(result.is_err()); + assert!(matches!( + lifecycle.read_state(1), + Some(PatchState::Bad { + reason: BadReason::ValidationFailed, + .. + }) + )); + } + + #[test] + fn validate_next_boot_strict_mode_marks_bad_when_signature_invalid() { + // Ports `patch_manager.rs::validate_next_boot_patch_tests::strict_mode_fails_boot_validation_if_signature_invalid`. + let (_tmp, mut lifecycle) = fixture(); + install_signed(&lifecycle, 1, Some("not_a_valid_signature")); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + + let result = lifecycle + .validate_next_boot_patch(Some(TEST_PUBLIC_KEY), PatchVerificationMode::Strict); + assert!(result.is_err()); + assert!(matches!( + lifecycle.read_state(1), + Some(PatchState::Bad { + reason: BadReason::ValidationFailed, + .. + }) + )); + } + + #[test] + // Ports + // `patch_manager.rs::record_boot_failure_for_patch_tests::preserves_last_booted_patch_on_failure_but_marks_bad`. + // + // Scenario: patch 1 was successfully booted (last_booted=1, + // next_boot=1). Then patch 1 fails to boot. The new behavior + // *keeps* the last_booted pointer pointing at 1 — the patch is + // now Bad{BootCrash} but the historical "this is what last + // booted" remains as a useful breadcrumb. is_known_bad_patch + // returns true. next_boot is None (recompute can't promote a + // Bad patch). + fn record_boot_failure_keeps_last_booted_pointer_when_failed_patch_was_last_booted() { + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 1, 100); + lifecycle.pointers.last_booted_patch = Some(1); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + + lifecycle.record_boot_start(1).unwrap(); + lifecycle.record_boot_failure(1).unwrap(); + + assert!(matches!( + lifecycle.read_state(1), + Some(PatchState::Bad { + reason: BadReason::BootCrash, + .. + }) + )); + assert_eq!( + lifecycle.pointers().last_booted_patch, + Some(1), + "last_booted breadcrumb preserved even though the patch is now Bad" + ); + assert_eq!(lifecycle.pointers().next_boot_patch, None); + } + + #[test] + fn rolled_back_patch_not_resurrected_when_replacement_fails() { + // Ports + // `patch_manager.rs::fall_back_tests::rollback_then_failed_replacement_does_not_resurrect_rolled_back_patch`. + // + // Scenario: patch 2 was successfully booted (last_booted=2), + // server then rolled it back (cleanup forgets it), patch 3 was + // installed and we're about to boot it. Patch 3 fails to boot. + // Recompute must not promote 2 — its state.json is gone, so + // last_booted's stale pointer should be cleared instead. + let (_tmp, mut lifecycle) = fixture(); + // Pretend patch 2 was booted then rolled back: forget it. + // (Equivalent to receiving a server rollback for 2.) + install_state(&lifecycle, 3, 300); + lifecycle.pointers.last_booted_patch = Some(2); // stale + lifecycle.pointers.next_boot_patch = Some(3); + lifecycle.save_pointers().unwrap(); + + lifecycle.record_boot_start(3).unwrap(); + lifecycle.record_boot_failure(3).unwrap(); + + // Patch 2 is not promoted (its state is gone); next_boot ends + // up None because there's no usable fallback. + assert_eq!(lifecycle.pointers().next_boot_patch, None); + assert_eq!( + lifecycle.pointers().last_booted_patch, + None, + "stale last_booted pointer cleared by recompute" + ); + } + + #[test] + fn validate_then_promote_catches_corrupted_last_booted() { + // Ports + // `patch_manager.rs::validate_next_boot_patch_tests::does_not_fall_back_to_last_booted_patch_if_corrupted`. + // + // Scenario: next_boot fails boot, recompute promotes + // last_booted, but last_booted's artifact is corrupt (size + // mismatch). On the next boot attempt, validate catches it + // and marks Bad — boot falls through to base. + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 1, 100); + install_state(&lifecycle, 2, 200); + lifecycle.pointers.last_booted_patch = Some(1); + lifecycle.pointers.next_boot_patch = Some(2); + lifecycle.save_pointers().unwrap(); + + // Truncate patch 1's artifact so it'll fail validation later. + std::fs::write(lifecycle.installed_artifact_path(1), b"short").unwrap(); + + // Patch 2 fails to boot. + lifecycle.record_boot_start(2).unwrap(); + lifecycle.record_boot_failure(2).unwrap(); + // Recompute promoted 1 (state still says Installed), but... + assert_eq!(lifecycle.pointers().next_boot_patch, Some(1)); + + // ...the next boot's validation pass catches the corruption. + let result = lifecycle.validate_next_boot_patch(None, PatchVerificationMode::default()); + assert!(result.is_err()); + assert!(matches!( + lifecycle.read_state(1), + Some(PatchState::Bad { + reason: BadReason::ValidationFailed, + .. + }) + )); + assert_eq!(lifecycle.pointers().next_boot_patch, None); + } + + #[test] + fn validate_next_boot_strict_mode_marks_bad_when_public_key_invalid() { + // Ports `patch_manager.rs::validate_next_boot_patch_tests::strict_mode_fails_boot_validation_if_public_key_invalid`. + let (_tmp, mut lifecycle) = fixture(); + install_signed(&lifecycle, 1, Some(INFLATED_PATCH_SIGNATURE)); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + + // public_key won't decode as base64 RSA → check_signature + // errors → validate_installed_patch errors → mark Bad. + let result = + lifecycle.validate_next_boot_patch(Some("not base64"), PatchVerificationMode::Strict); + assert!(result.is_err()); + assert!(matches!( + lifecycle.read_state(1), + Some(PatchState::Bad { .. }) + )); + } + + #[test] + // Ports the unbooted-deletion half of + // `patch_manager.rs::next_boot_patch_tests::adding_patch_deletes_unbooted_patch_not_last_booted`. + fn promote_to_next_boot_replaces_unbooted_predecessor() { + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 1, 100); + install_state(&lifecycle, 2, 200); + lifecycle.promote_to_next_boot(1).unwrap(); + // Now install 2 and promote it; 1 was never booted (last_booted is + // None) and should be forgotten. + lifecycle.promote_to_next_boot(2).unwrap(); + assert_eq!(lifecycle.pointers().next_boot_patch, Some(2)); + assert!(!lifecycle.patch_dir(1).exists(), "unbooted 1 forgotten"); + } + + #[test] + // Ports the preservation half of + // `patch_manager.rs::next_boot_patch_tests::adding_patch_deletes_unbooted_patch_not_last_booted` + // and the running-patch protection in + // `patch_manager.rs::record_boot_failure_for_patch_tests::preserves_last_booted_patch_on_failure_but_marks_bad`. + fn promote_to_next_boot_preserves_last_booted_patch() { + let (_tmp, mut lifecycle) = fixture(); + install_state(&lifecycle, 1, 100); + install_state(&lifecycle, 2, 200); + lifecycle.pointers.last_booted_patch = Some(1); + lifecycle.pointers.next_boot_patch = Some(1); + lifecycle.save_pointers().unwrap(); + // Now install 2 and promote it; 1 is last_booted so survives. + lifecycle.promote_to_next_boot(2).unwrap(); + assert_eq!(lifecycle.pointers().next_boot_patch, Some(2)); + assert!(lifecycle.patch_dir(1).exists(), "last_booted 1 preserved"); + } + + #[test] + fn promote_to_next_boot_requires_installed() { + let (_tmp, mut lifecycle) = fixture(); + assert!(lifecycle.promote_to_next_boot(1).is_err()); + } +} diff --git a/library/src/cache/mod.rs b/library/src/cache/mod.rs index 4bf5e330..4d8e0334 100644 --- a/library/src/cache/mod.rs +++ b/library/src/cache/mod.rs @@ -1,5 +1,5 @@ -mod disk_io; -mod patch_manager; +pub(crate) mod disk_io; +pub mod lifecycle; mod signing; pub mod updater_state; diff --git a/library/src/cache/patch_manager.rs b/library/src/cache/patch_manager.rs deleted file mode 100644 index 6fc8b2e7..00000000 --- a/library/src/cache/patch_manager.rs +++ /dev/null @@ -1,1615 +0,0 @@ -use super::{disk_io, signing, PatchInfo}; -use crate::file_errors::{FileOperation, IoResultExt}; -use crate::yaml::PatchVerificationMode; -use anyhow::{bail, Context, Result}; -use core::fmt::Debug; -use serde::{Deserialize, Serialize}; -use std::{ - collections::HashSet, - path::{Path, PathBuf}, -}; - -#[cfg(test)] -use mockall::automock; -#[cfg(test)] -use tempfile::TempDir; - -const PATCHES_DIR_NAME: &str = "patches"; -const PATCHES_STATE_FILE_NAME: &str = "patches_state.json"; -const PATCH_ARTIFACT_FILENAME: &str = "dlc.vmcode"; - -/// Information about a patch that is persisted to disk. -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Hash)] -struct PatchMetadata { - /// The number of the patch. - number: usize, - - /// The size of the patch artifact on disk. - size: u64, - - /// The hash of the patch artifact on disk. - hash: String, - - /// The signature of `hash`. - signature: Option, -} - -/// What gets serialized to disk -#[derive(Debug, Default, Deserialize, Serialize)] -struct PatchesState { - /// Historical record of the patch that most recently completed - /// `record_boot_success` on any run. Used as a fallback target by - /// `try_fall_back_from_patch` when the next-boot patch is invalid. - /// Updated only by `record_boot_success` — boot failures and - /// server-driven rollbacks of the running patch don't erase the - /// historical fact that the patch booted. Not the same thing as the - /// patch this process is running; see `running_patch` for that. - last_booted_patch: Option, - - /// The patch that will be run on the next app boot, if any. This may be the same - /// as the last booted patch patch if no new patch has been downloaded. - next_boot_patch: Option, - - /// This is given a value when we start booting a patch (record_boot_start_for_patch) and is - /// cleared when: - /// - the patch boots successfully (record_boot_success) - /// - the patch fails to boot (record_boot_failure_for_patch) - /// - the system initializes (on_init, we take this to mean the patch failed to boot) - currently_booting_patch: Option, - - /// Unix timestamp (seconds) when the current boot attempt started. - /// Used to compute elapsed time in crash recovery diagnostics. - /// Added in 1.6.93; older state files will deserialize this as None. - boot_started_at: Option, - - /// A list of patch numbers that we have tried and failed to install. - /// We should never attempt to download or install these again for the - /// current release. - known_bad_patches: HashSet, -} - -/// Abstracts the storage of patches on disk. -/// -/// The implementation of this (PatchManager) should only be responsible for -/// translating what is on disk into a form that is useful for the updater and -/// vice versa. Some business logic has crept in in the form of validation, and -/// we should consider moving that into a separate module. -#[cfg_attr(test, automock)] -pub trait ManagePatches { - /// Copies the patch file at file_path to the manager's directory structure - /// sets this patch as the next patch to boot. - /// - /// The explicit lifetime is required for automock to work with Options. - /// See https://github.com/asomers/mockall/issues/61. - #[allow(clippy::needless_lifetimes)] - fn add_patch<'a>( - &mut self, - number: usize, - file_path: &Path, - hash: &str, - signature: Option<&'a str>, - ) -> Result<()>; - - /// The patch most recently known to have successfully booted on a prior - /// run, or None if no patch is installed. Used as a fallback target by - /// `try_fall_back_from_patch` when the next-boot patch becomes invalid. - /// Not the same thing as the patch this process is running; for that, see - /// [`running_patch`]. - fn last_successfully_booted_patch(&self) -> Option; - - /// The patch this process is using, set at `report_launch_start` from - /// whatever `next_boot_patch` was at that moment. `None` means the - /// process is running the base release. Survives server-driven - /// rollbacks of that patch (the process is still using it). Backed by - /// a session-scoped global, not by `PatchesState` on disk: a fresh - /// process starts with `None` until the next `report_launch_start`, - /// which flutter_engine calls before `dart:ffi` is available, so the - /// `None` window is not observable from Dart. Distinct from - /// `currently_booting_patch`, which is the persisted "boot in progress" - /// breadcrumb used for cross-restart crash detection. - fn running_patch(&self) -> Option; - - /// Sets the patch this process is using. Called from - /// `report_launch_start` with `Some(n)` when launching a patch, or - /// `None` when launching the base release. - fn set_running_patch(&mut self, patch_number: Option); - - /// The patch we are currently booting, if any. This will only have a value: - /// 1. Between record_boot_start_for_patch and record_boot_success or record_boot_failure_for_patch - /// 2. On init if we attempted to boot a patch but never recorded a successful boot (e.g., because - /// the system crashed). - fn currently_booting_patch(&self) -> Option; - - /// Unix timestamp (seconds) when the current boot attempt started, if known. - fn boot_started_at(&self) -> Option; - - /// Returns the next patch to boot, or None if: - /// - no patches have been downloaded - /// - we cannot boot from the patch(es) on disk - fn next_boot_patch(&self) -> Option; - - /// Performs integrity checks on the next boot patch and updates the state accordingly. Returns - /// an error if the patch exists but is not bootable. - fn validate_next_boot_patch(&mut self) -> anyhow::Result<()>; - - /// Record that we're booting. If we have a next path, updates the last - /// attempted patch to be the next boot patch. - fn record_boot_start_for_patch(&mut self, patch_number: usize) -> Result<()>; - - /// Marks last_attempted_patch as "good", updates last_booted_patch to be the same, - /// and deletes all patch artifacts older than the last_booted_patch. - fn record_boot_success(&mut self) -> Result<()>; - - /// Records that the patch with number patch_number failed to boot, and ensures - /// that it will never be returned as the next boot or last booted patch. - fn record_boot_failure_for_patch(&mut self, patch_number: usize) -> Result<()>; - - /// Whether we have failed to boot from the patch with `patch_number`. - fn is_known_bad_patch(&self, patch_number: usize) -> bool; - - /// Deletes artifacts for the provided patch_number if they exist. - /// If the patch is the next_boot_patch, it is cleared. - fn remove_patch(&mut self, patch_number: usize) -> Result<()>; - - /// Resets the patch manager to its initial state, removing all patches. This is - /// intended to be used when a new release version is installed. - fn reset(&mut self) -> Result<()>; -} - -// This allows us to use the Debug trait on dyn ManagePatches, which is -// required to have it as a property of UpdaterState. -impl Debug for dyn ManagePatches { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "ManagePatches") - } -} - -#[derive(Debug)] -pub struct PatchManager { - /// The base directory used to store patch artifacts and state. - /// The directory structure created within this directory is: - /// patches_state.json - /// patches/ - /// / - /// dlc.vmcode - /// / - /// dlc.vmcode - root_dir: PathBuf, - - /// Metadata about the patches we have downloaded that is persisted to disk. - patches_state: PatchesState, - - /// The key used to sign patch hashes for the current release, if any. If this is - /// not None, all patches must have a signature that can be verified with this key. - patch_public_key: Option, - - /// Controls when signature verification occurs: at boot time (strict) or only - /// at install time (install_only). - verification_mode: PatchVerificationMode, -} - -impl PatchManager { - /// Creates a new PatchManager with the given root directory. This directory is - /// assumed to exist. The PatchManager will use this directory to store its - /// state and patch binaries. - pub fn new( - root_dir: PathBuf, - patch_public_key: Option<&str>, - verification_mode: PatchVerificationMode, - ) -> Self { - let patches_state = Self::load_patches_state(&root_dir).unwrap_or_default(); - - Self { - root_dir, - patches_state, - patch_public_key: patch_public_key.map(|s| s.to_owned()), - verification_mode, - } - } - - fn load_patches_state(root_dir: &Path) -> Option { - let path = root_dir.join(PATCHES_STATE_FILE_NAME); - match disk_io::read(&path) { - Ok(maybe_state) => maybe_state, - Err(e) => { - shorebird_debug!( - "Failed to load patches state from {}: {}", - path.display(), - e - ); - None - } - } - } - - fn save_patches_state(&self) -> Result<()> { - let path = self.root_dir.join(PATCHES_STATE_FILE_NAME); - disk_io::write(&self.patches_state, &path) - } - - /// The directory where all patch artifacts are stored. - fn patches_dir(&self) -> PathBuf { - self.root_dir.join(PATCHES_DIR_NAME) - } - - /// The directory where artifacts for the patch with the given number are stored. - fn patch_dir(&self, patch_number: usize) -> PathBuf { - self.patches_dir().join(patch_number.to_string()) - } - - /// The path to the runnable patch artifact with the given number. Runnable patch artifact files are - /// named .vmcode - fn patch_artifact_path(&self, patch_number: usize) -> PathBuf { - self.patch_dir(patch_number).join(PATCH_ARTIFACT_FILENAME) - } - - fn patch_info_for_number(&self, patch_number: usize) -> PatchInfo { - PatchInfo { - path: self.patch_artifact_path(patch_number), - number: patch_number, - } - } - - /// Checks that the patch with the given number: - /// - Has an artifact on disk - /// - That artifact on disk is the same size it was when it was installed - /// - In Strict mode: verifies the signature against the hash - /// - /// Returns Ok if the patch is bootable, or an error if it is not. - fn validate_patch_is_bootable(&self, patch: &PatchMetadata) -> Result<()> { - let artifact_path = self.patch_artifact_path(patch.number); - if !Path::exists(&artifact_path) { - bail!( - "Patch {} does not exist at {}", - patch.number, - artifact_path.display() - ); - } - - let artifact_size_on_disk = std::fs::metadata(&artifact_path) - .with_file_context(FileOperation::GetMetadata, &artifact_path)? - .len(); - if artifact_size_on_disk != patch.size { - bail!( - "Patch {} has size {} on disk, but expected size {}", - patch.number, - artifact_size_on_disk, - patch.size - ); - } - - // In Strict mode, verify the signature at boot time. - // This ensures the patch file hasn't been tampered with since installation. - if self.verification_mode == PatchVerificationMode::Strict { - if let Some(public_key) = &self.patch_public_key { - let signature = patch - .signature - .clone() - .context("Patch signature is missing")?; - - // Compute the hash of the patch file on disk and verify it matches. - let patch_hash = signing::hash_file(&artifact_path)?; - signing::check_signature(&patch_hash, &signature, public_key)?; - } else { - shorebird_info!("No public key provided, skipping signature verification"); - } - } - - Ok(()) - } - - fn delete_patch_artifacts(&mut self, patch_number: usize) -> Result<()> { - let patch_dir = self.patch_dir(patch_number); - if !patch_dir.exists() { - shorebird_debug!("Patch {} not installed, nothing to delete", patch_number); - return Ok(()); - } - - shorebird_info!("Deleting patch artifacts for patch {}", patch_number); - - std::fs::remove_dir_all(&patch_dir) - .map_err(|e| { - shorebird_error!("Failed to delete patch dir {}: {}", patch_dir.display(), e); - e - }) - .with_file_context(FileOperation::DeleteDir, &patch_dir) - } - - /// Deletes artifacts for the provided bad_patch_number and attempts to set the next_boot_patch to the last - /// successfully booted patch. If the last successfully booted patch is not bootable or has the same number - /// as the patch we're falling back from, we clear it as well. - fn try_fall_back_from_patch(&mut self, bad_patch_number: usize) -> Result<()> { - // Continue even if we fail to delete the patch artifacts. It's more important to not try to - // boot from a bad patch than to delete its artifacts. - // No need to log failure – delete_patch_artifacts logs for us. - let _ = self.delete_patch_artifacts(bad_patch_number); - - let is_bad_patch_last_booted_patch = self - .patches_state - .last_booted_patch - .clone() - .map(|patch| patch.number == bad_patch_number) - .unwrap_or(false); - let is_bad_patch_next_boot_patch = self - .patches_state - .next_boot_patch - .clone() - .map(|patch| patch.number == bad_patch_number) - .unwrap_or(false); - - if is_bad_patch_last_booted_patch && is_bad_patch_next_boot_patch { - // The bad patch is both the last successfully-booted patch and - // the queued next-boot patch. Clear `next_boot_patch` so we boot - // the base release on next launch, but leave `last_booted_patch` - // alone — the patch *did* successfully boot, and the running - // process is still using it. Erasing that historical record - // while the field was being read by FFI as "what's running" - // caused the patch-to-release rollback bug in - // shorebirdtech/shorebird#3728. - // - // The else-if fallback path consults `is_known_bad_patch` - // before promoting `last_booted_patch` back to - // `next_boot_patch`. For server-driven rollbacks, `remove_patch` - // adds the patch to `known_bad_patches` first, so a later - // fallback can't accidentally re-promote a rolled-back patch - // even if its artifact deletion above silently failed. - shorebird_info!( - "Clearing next boot patch (rollback target was both last booted and next boot)" - ); - self.patches_state.next_boot_patch = None; - } else if is_bad_patch_next_boot_patch { - shorebird_info!("Clearing next boot patch"); - self.patches_state.next_boot_patch = None; - - if let Some(last_boot_patch) = self.patches_state.last_booted_patch.clone() { - let is_known_bad = self - .patches_state - .known_bad_patches - .contains(&last_boot_patch.number); - if !is_known_bad && self.validate_patch_is_bootable(&last_boot_patch).is_ok() { - shorebird_info!( - "Setting last booted patch {} as next boot patch", - last_boot_patch.number - ); - self.patches_state.next_boot_patch = Some(last_boot_patch); - } else { - shorebird_info!( - "Last booted patch {} is not a valid fallback, deleting artifacts", - last_boot_patch.number - ); - self.patches_state.last_booted_patch = None; - // No need to log failure – delete_patch_artifacts logs for us. - let _ = self.delete_patch_artifacts(last_boot_patch.number); - } - } - } - - self.save_patches_state() - } - - /// Deletes all patch artifacts with numbers less than patch_number. - /// We intentionally only delete older patch artifacts. Consider the case: - /// - /// 1. We start booting patch 2 - /// 2. While booting (i.e., in between boot start and boot success), we download and inflate patch 3 - /// 3. We finish booting patch 2 - /// - /// Deleting all other patch artifacts would delete patch 3, and because we've "seen" patch 3, - /// we would never try to download it again (it would be considered "bad"). - fn delete_patch_artifacts_older_than(&mut self, patch_number: usize) -> Result<()> { - shorebird_info!("Deleting patch artifacts older than {}", patch_number); - for entry in std::fs::read_dir(self.patches_dir())? { - let entry = entry?; - match entry.file_name().to_string_lossy().parse::() { - Ok(number) if number < patch_number => { - // delete_patch_artifacts logs for us, no need to log here. - let _ = self.delete_patch_artifacts(number); - } - Ok(_) => {} - Err(e) => { - shorebird_error!( - "Failed to parse patch number from patches directory entry, deleting: {}", - e - ); - // Attempt to delete the unrecognized directory, but don't stop - // the artifact deletion process if it fails. - let _ = std::fs::remove_dir_all(entry.path()); - } - } - } - - Ok(()) - } -} - -impl ManagePatches for PatchManager { - // The explicit lifetime is required for automock to work with Options. - // See https://github.com/asomers/mockall/issues/61. - #[allow(clippy::needless_lifetimes)] - fn add_patch<'a>( - &mut self, - patch_number: usize, - file_path: &Path, - hash: &str, - signature: Option<&'a str>, - ) -> Result<()> { - if !file_path.exists() { - bail!("Patch file {} does not exist", file_path.display()); - } - - // In InstallOnly mode, verify signature at install time. - // In Strict mode, signature verification happens at boot time instead. - if self.verification_mode == PatchVerificationMode::InstallOnly { - if let Some(public_key) = &self.patch_public_key { - let sig = signature.context("Patch signature is missing")?; - signing::check_signature(hash, sig, public_key)?; - } - } - - let patch_path = self.patch_artifact_path(patch_number); - - let patch_dir = self.patch_dir(patch_number); - std::fs::create_dir_all(&patch_dir) - .with_file_context(FileOperation::CreateDir, &patch_dir)?; - - std::fs::rename(file_path, &patch_path) - .with_file_context(FileOperation::RenameFile, file_path)?; - - let new_patch = PatchMetadata { - number: patch_number, - size: std::fs::metadata(&patch_path) - .with_file_context(FileOperation::GetMetadata, &patch_path)? - .len(), - hash: hash.to_owned(), - signature: signature.map(|s| s.to_owned()), - }; - - // If a patch was never booted (next_boot_patch != last_booted_patch), we should delete - // it here before setting next_boot_patch to the new patch. - if let (Some(last_boot_patch), Some(next_boot_patch)) = ( - self.patches_state.last_booted_patch.clone(), - self.patches_state.next_boot_patch.clone(), - ) { - if last_boot_patch.number != next_boot_patch.number { - shorebird_info!( - "Patch {} was installed but never booted never booted, deleting artifacts", - next_boot_patch.number - ); - let _ = self.delete_patch_artifacts(next_boot_patch.number); - } - } - - self.patches_state.next_boot_patch = Some(new_patch); - self.save_patches_state() - } - - fn last_successfully_booted_patch(&self) -> Option { - self.patches_state - .last_booted_patch - .as_ref() - .map(|patch| self.patch_info_for_number(patch.number)) - } - - fn running_patch(&self) -> Option { - crate::config::running_patch_number().map(|number| self.patch_info_for_number(number)) - } - - fn set_running_patch(&mut self, patch_number: Option) { - crate::config::set_running_patch_number(patch_number); - } - - fn currently_booting_patch(&self) -> Option { - self.patches_state - .currently_booting_patch - .as_ref() - .map(|patch| self.patch_info_for_number(patch.number)) - } - - fn boot_started_at(&self) -> Option { - self.patches_state.boot_started_at - } - - fn validate_next_boot_patch(&mut self) -> anyhow::Result<()> { - let next_boot_patch = match self.patches_state.next_boot_patch.clone() { - Some(patch) => patch, - None => return anyhow::Ok(()), - }; - - shorebird_info!("Validating patch {}", next_boot_patch.number); - - if let Err(e) = self.validate_patch_is_bootable(&next_boot_patch) { - shorebird_error!("Patch {} is not bootable: {}", next_boot_patch.number, e); - - if let Err(e) = self.try_fall_back_from_patch(next_boot_patch.number) { - shorebird_error!( - "Failed to fall back from next_boot_patch {}: {}", - next_boot_patch.number, - e - ); - } - - return Err(e); - } - - anyhow::Ok(()) - } - - fn next_boot_patch(&self) -> Option { - self.patches_state - .next_boot_patch - .as_ref() - .map(|patch| self.patch_info_for_number(patch.number)) - } - - fn record_boot_start_for_patch(&mut self, patch_number: usize) -> Result<()> { - let next_boot_patch = self - .patches_state - .next_boot_patch - .clone() - .context("No next_boot_patch")?; - - if next_boot_patch.number != patch_number { - bail!( - "Attempted to record boot success for patch {} but next_boot_patch is {}", - patch_number, - next_boot_patch.number - ); - } - - self.patches_state.currently_booting_patch = Some(next_boot_patch.clone()); - self.patches_state.boot_started_at = Some(crate::time::unix_timestamp()); - self.save_patches_state() - } - - fn record_boot_success(&mut self) -> Result<()> { - let boot_patch = self - .patches_state - .currently_booting_patch - .clone() - .context("No currently_booting_patch")?; - - self.patches_state.currently_booting_patch = None; - self.patches_state.boot_started_at = None; - self.patches_state.last_booted_patch = Some(boot_patch.clone()); - if let Err(e) = self.delete_patch_artifacts_older_than(boot_patch.number) { - shorebird_error!( - "Failed to delete patch artifacts older than {}: {}", - boot_patch.number, - e - ); - } - self.save_patches_state() - } - - fn record_boot_failure_for_patch(&mut self, patch_number: usize) -> Result<()> { - self.patches_state.currently_booting_patch = None; - self.patches_state.boot_started_at = None; - self.patches_state.known_bad_patches.insert(patch_number); - self.try_fall_back_from_patch(patch_number) - } - - fn is_known_bad_patch(&self, patch_number: usize) -> bool { - self.patches_state.known_bad_patches.contains(&patch_number) - } - - fn remove_patch(&mut self, patch_number: usize) -> Result<()> { - // Server-driven rollback: mark known-bad so a later fallback path - // (e.g. record_boot_failure_for_patch on a *different* patch) can't - // promote `last_booted_patch` back to `next_boot_patch` if its - // artifact deletion in try_fall_back_from_patch silently fails. - self.patches_state.known_bad_patches.insert(patch_number); - self.try_fall_back_from_patch(patch_number) - } - - fn reset(&mut self) -> Result<()> { - self.patches_state = PatchesState::default(); - self.save_patches_state()?; - let patches_dir = self.patches_dir(); - std::fs::remove_dir_all(&patches_dir) - .with_file_context(FileOperation::DeleteDir, &patches_dir) - } -} - -#[cfg(test)] -impl PatchManager { - pub fn manager_for_test(temp_dir: &TempDir) -> PatchManager { - PatchManager::new( - temp_dir.path().to_owned(), - None, - PatchVerificationMode::default(), - ) - } - - pub fn add_patch_for_test(&mut self, temp_dir: &TempDir, patch_number: usize) -> Result<()> { - self.add_signed_patch_for_test(temp_dir, patch_number, "hash", None) - } - - pub fn add_signed_patch_for_test( - &mut self, - temp_dir: &TempDir, - patch_number: usize, - hash: &str, - signature: Option<&str>, - ) -> Result<()> { - let file_path = &temp_dir - .path() - .join(format!("patch{}.vmcode", patch_number)); - std::fs::write(file_path, patch_number.to_string().repeat(patch_number)).unwrap(); - shorebird_info!( - "Adding patch {} with contents {} hash {} at {}", - patch_number, - patch_number.to_string().repeat(patch_number), - hash, - file_path.display() - ); - self.add_patch(patch_number, file_path, hash, signature) - } -} - -#[cfg(test)] -mod debug_tests { - use tempfile::TempDir; - - use super::PatchManager; - use crate::yaml::PatchVerificationMode; - - #[test] - fn manage_patches_is_debug() { - let temp_dir = TempDir::new().unwrap(); - let patch_manager: Box = - Box::new(PatchManager::manager_for_test(&temp_dir)); - assert_eq!(format!("{:?}", patch_manager), "ManagePatches"); - } - - #[test] - fn patch_manager_is_debug() { - let temp_dir = TempDir::new().unwrap(); - let patch_manager = PatchManager::new( - temp_dir.path().to_owned(), - Some("public_key"), - PatchVerificationMode::default(), - ); - let actual = format!("{:?}", patch_manager); - assert!(actual.contains(r#"patches_state: PatchesState { last_booted_patch: None, next_boot_patch: None, currently_booting_patch: None, boot_started_at: None, known_bad_patches: {} }, patch_public_key: Some("public_key")"#)); - } -} - -#[cfg(test)] -mod add_patch_tests { - use super::*; - use std::path::Path; - use tempfile::TempDir; - - #[test] - fn errs_if_file_path_does_not_exist() { - let mut manager = PatchManager::manager_for_test(&TempDir::new().unwrap()); - assert!(manager - .add_patch( - 1, - Path::new("/path/to/file/that/does/not/exist"), - "hash", - None, - ) - .is_err()); - } - - #[test] - fn adds_patch_successfully() { - let patch_number = 1; - let patch_file_contents = "patch contents"; - let temp_dir = TempDir::new().unwrap(); - let mut manager = PatchManager::manager_for_test(&temp_dir); - - let file_path = &temp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, patch_file_contents).unwrap(); - - assert!(manager - .add_patch( - patch_number, - Path::new(file_path), - "hash", - Some("my_signature") - ) - .is_ok()); - - assert_eq!( - manager.patches_state.next_boot_patch, - Some(PatchMetadata { - number: patch_number, - size: patch_file_contents.len() as u64, - hash: "hash".to_string(), - signature: Some("my_signature".to_owned()) - }) - ); - assert!(!file_path.exists()); - } - - // InstallOnly mode signature verification tests - these verify that signature - // checking happens at install time when using PatchVerificationMode::InstallOnly. - - // The constant values below were generated by taking an arbitrary sha256 hash (INFLATED_PATCH_HASH) - // and using openssl to sign it with the private key corresponding to `PUBLIC_KEY`. - - // The base64-encoded public key in a DER format. This is required by ring to verify signatures. - // See https://docs.rs/ring/latest/ring/signature/index.html#signing-and-verifying-with-rsa-pkcs1-15-padding - const PUBLIC_KEY: &str = "MIIBCgKCAQEA2wdpEGbuvlPsb9i0qYrfMefJnEw1BHTi8SYZTKrXOvJWmEpPE1hWfbkvYzXu5a96gV1yocF3DMwn04VmRlKhC4AhsD0NL0UNhYhotbKG91Kwi1vAXpHhCdz5gQEBw0K1uB4Jz+zK6WK+31PryYpwLwbyXNqXoY8IAAUQ4STsHYV5w+BMSi8pepWMRd7DR9RHcbNOZlJvdBQ5NxvB4JN4dRMq8cC73ez1P9d7Dfwv3TWY+he9EmuXLT2UivZSlHIrGBa7MFfqyUe2ro0F7Te/B0si12itBbWIqycvqcXjeOPNn6WEpqN7IWjb9LUh162JyYaz5Lb/VeeJX8LKtElccwIDAQAB"; - - // The message that was signed. In practice, this will be the sha256 hash of an inflated patch artifact. - const INFLATED_PATCH_HASH: &str = - "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b"; - - // The base64-encoded signature of `INFLATED_PATCH_HASH` created using the private key corresponding - // to `PUBLIC_KEY`. - const SIGNATURE: &str = "ZGccldv01XqHQ76bXuKV/9EQnNK0Q+reQ9bJHVnGfLldF+BLRx0divgPfKP5Df9BJPA3dw1Z1VortfepmMGebP3kS593l5zoktu9MIepxvRAFWNKE5PDTIIvCL/ddTPEHt6NNCeD6HLOMLzbEX3cFZa+lq3UymGi0aqA5DlXirJBGtopojc9nOXZ22n/qHNZIHEkGcqKbSMSK9oC55whKHnlJTbCXdmSyDc65B4PcgseqJom1riVK3XGW1YMrSpuMAU+CDT7HhdESmI1UtH1bYeBITfRhQztdDTfti2vJTf2Y+lYC99CFiISgD7f1m0KUcC+VnEAMZSYtgxSk6AX2A=="; - - #[test] - fn install_only_errs_if_public_key_is_invalid() { - let temp_dir = TempDir::new().unwrap(); - let mut manager = PatchManager::new( - temp_dir.path().to_path_buf(), - Some("not a valid key"), - PatchVerificationMode::InstallOnly, - ); - - let file_path = &temp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, "patch contents").unwrap(); - - // In InstallOnly mode, fails at install time because the public key is invalid - let result = manager.add_patch(1, file_path, INFLATED_PATCH_HASH, Some(SIGNATURE)); - assert!(result.is_err()); - assert!(manager.next_boot_patch().is_none()); - } - - #[test] - fn install_only_errs_if_signature_is_missing_when_public_key_configured() { - let temp_dir = TempDir::new().unwrap(); - let mut manager = PatchManager::new( - temp_dir.path().to_path_buf(), - Some(PUBLIC_KEY), - PatchVerificationMode::InstallOnly, - ); - - let file_path = &temp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, "patch contents").unwrap(); - - // In InstallOnly mode, fails at install time because signature is missing - let result = manager.add_patch(1, file_path, INFLATED_PATCH_HASH, None); - assert!(result.is_err()); - assert!(manager.next_boot_patch().is_none()); - } - - #[test] - fn install_only_errs_if_signature_is_invalid() { - let temp_dir = TempDir::new().unwrap(); - let mut manager = PatchManager::new( - temp_dir.path().to_path_buf(), - Some(PUBLIC_KEY), - PatchVerificationMode::InstallOnly, - ); - - let file_path = &temp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, "patch contents").unwrap(); - - // Using INFLATED_PATCH_HASH as a signature because it is valid base64, but not a valid signature. - // In InstallOnly mode, this fails immediately at install time. - let result = - manager.add_patch(1, file_path, INFLATED_PATCH_HASH, Some(INFLATED_PATCH_HASH)); - assert!(result.is_err()); - assert!(manager.next_boot_patch().is_none()); - } - - #[test] - fn install_only_succeeds_with_valid_signature() { - let temp_dir = TempDir::new().unwrap(); - let mut manager = PatchManager::new( - temp_dir.path().to_path_buf(), - Some(PUBLIC_KEY), - PatchVerificationMode::InstallOnly, - ); - - let file_path = &temp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, "patch contents").unwrap(); - - // In InstallOnly mode, signature is verified at install time - let result = manager.add_patch(1, file_path, INFLATED_PATCH_HASH, Some(SIGNATURE)); - assert!(result.is_ok()); - assert!(manager.next_boot_patch().is_some()); - } - - #[test] - fn install_only_succeeds_with_any_signature_if_no_public_key() { - let temp_dir = TempDir::new().unwrap(); - let mut manager = PatchManager::new( - temp_dir.path().to_path_buf(), - None, // No public key configured - PatchVerificationMode::InstallOnly, - ); - - let file_path = &temp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, "patch contents").unwrap(); - - // Without a public key, signature verification is skipped even in InstallOnly mode - let result = manager.add_patch(1, file_path, "hash", Some("not a valid signature")); - assert!(result.is_ok()); - assert!(manager.next_boot_patch().is_some()); - } -} - -#[cfg(test)] -mod last_successfully_booted_patch_tests { - use super::*; - use tempfile::TempDir; - - #[test] - fn returns_none_if_no_patch_has_been_booted() -> Result<()> { - let temp_dir = TempDir::new().unwrap(); - let mut manager = PatchManager::manager_for_test(&temp_dir); - manager.add_patch_for_test(&temp_dir, 1)?; - assert!(manager.last_successfully_booted_patch().is_none()); - - Ok(()) - } - - #[test] - fn returns_value_from_patches_state() -> Result<()> { - let temp_dir = TempDir::new().unwrap(); - let mut manager = PatchManager::manager_for_test(&temp_dir); - manager.add_patch_for_test(&temp_dir, 1)?; - - let expected = PatchInfo { - path: manager.patch_artifact_path(1), - number: 1, - }; - manager.patches_state.last_booted_patch = manager.patches_state.next_boot_patch.clone(); - assert_eq!(manager.last_successfully_booted_patch(), Some(expected)); - - Ok(()) - } -} - -#[cfg(test)] -mod next_boot_patch_tests { - use super::*; - use tempfile::TempDir; - - #[test] - fn returns_none_if_no_next_boot_patch() { - let temp_dir = TempDir::new().unwrap(); - let manager = PatchManager::manager_for_test(&temp_dir); - assert!(manager.next_boot_patch().is_none()); - } - - #[test] - fn returns_none_patch_if_first_patch_failed_to_boot() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - // Add a first patch and pretend it failed to boot. - manager.add_patch_for_test(&temp_dir, 1)?; - manager.record_boot_start_for_patch(1)?; - manager.record_boot_failure_for_patch(1)?; - - // Because there is no previous patch, we should not attempt to boot any patch. - assert!(manager.next_boot_patch().is_none()); - assert!(manager.is_known_bad_patch(1)); - - Ok(()) - } - - #[test] - fn falls_back_to_last_booted_patch_if_still_bootable() -> Result<()> { - let patch_file_contents = "patch contents"; - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - let file_path = &temp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, patch_file_contents)?; - - // Add patch 1, pretend it booted successfully. - assert!(manager.add_patch(1, file_path, "hash", None).is_ok()); - assert!(manager.record_boot_start_for_patch(1).is_ok()); - assert!(manager.record_boot_success().is_ok()); - assert!(!manager.is_known_bad_patch(1)); - - // Add patch 2, pretend it failed to boot. - let file_path = &temp_dir.path().join("patch2.vmcode"); - std::fs::write(file_path, patch_file_contents)?; - assert!(manager.add_patch(2, file_path, "hash", None).is_ok()); - assert!(manager.record_boot_start_for_patch(2).is_ok()); - assert!(manager.record_boot_failure_for_patch(2).is_ok()); - assert!(manager.is_known_bad_patch(2)); - - // Verify that we will next attempt to boot from patch 1. - assert_eq!(manager.next_boot_patch().unwrap().number, 1); - - Ok(()) - } - - #[test] - fn adding_patch_deletes_unbooted_patch_not_last_booted() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - // Add patch 1 and boot it successfully. - manager.add_patch_for_test(&temp_dir, 1)?; - manager.record_boot_start_for_patch(1)?; - manager.record_boot_success()?; - - // Add patch 2 (not booted yet). - manager.add_patch_for_test(&temp_dir, 2)?; - - let patch_1_artifact = manager.patch_artifact_path(1); - let patch_2_artifact = manager.patch_artifact_path(2); - assert!(patch_1_artifact.exists()); - assert!(patch_2_artifact.exists()); - - // Add patch 3 — should delete patch 2 (unbooted), NOT patch 1 (last booted). - manager.add_patch_for_test(&temp_dir, 3)?; - - assert!( - patch_1_artifact.exists(), - "Last booted patch 1 artifacts should NOT be deleted" - ); - assert!( - !patch_2_artifact.exists(), - "Unbooted patch 2 artifacts should be deleted" - ); - - Ok(()) - } - - #[test] - fn returns_last_booted_patch_if_next_patch_failed_to_boot() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - // Add a first patch and pretend it booted successfully. - manager.add_patch_for_test(&temp_dir, 1)?; - manager.record_boot_start_for_patch(1)?; - manager.record_boot_success()?; - - // Add a second patch and pretend it failed to boot. - manager.add_patch_for_test(&temp_dir, 2)?; - manager.record_boot_start_for_patch(2)?; - manager.record_boot_failure_for_patch(2)?; - - // Verify that we will next attempt to boot from patch 1. - assert_eq!(manager.next_boot_patch().unwrap().number, 1); - assert!(!manager.is_known_bad_patch(1)); - assert!(manager.is_known_bad_patch(2)); - - Ok(()) - } -} - -#[cfg(test)] -mod validate_next_boot_patch_tests { - use super::*; - use anyhow::Result; - use tempfile::TempDir; - - #[test] - fn does_nothing_if_no_next_boot_patch() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - assert!(manager.validate_next_boot_patch().is_ok()); - Ok(()) - } - - #[test] - fn clears_next_boot_patch_if_it_is_not_bootable() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - manager.add_patch_for_test(&temp_dir, 1)?; - - // Write junk to the artifact, this should render the patch unbootable in the eyes - // of the PatchManager. - let artifact_path = manager.patch_artifact_path(1); - std::fs::write(&artifact_path, "junk")?; - - assert!(manager.next_boot_patch().is_some()); - assert!(manager.validate_next_boot_patch().is_err()); - assert!(manager.next_boot_patch().is_none()); - - // Ensure the internal state is cleared. - assert!(manager.patches_state.next_boot_patch.is_none()); - - // The artifact should have been deleted. - assert!(!&artifact_path.exists()); - - Ok(()) - } - - #[test] - fn clears_current_and_next_on_boot_failure_if_they_are_the_same() -> Result<()> { - let patch_file_contents = "patch contents"; - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - let file_path = &temp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, patch_file_contents)?; - assert!(manager.add_patch(1, file_path, "hash", None).is_ok()); - - // Write junk to the artifact, this should render the patch unbootable in the eyes - // of the PatchManager. - let artifact_path = manager.patch_artifact_path(1); - std::fs::write(&artifact_path, "junk")?; - - assert!(manager.next_boot_patch().is_some()); - assert!(manager.validate_next_boot_patch().is_err()); - assert!(manager.next_boot_patch().is_none()); - - // Ensure the internal state is cleared. - assert!(manager.patches_state.next_boot_patch.is_none()); - assert!(manager.patches_state.last_booted_patch.is_none()); - - // The artifact should have been deleted. - assert!(!&artifact_path.exists()); - - Ok(()) - } - - #[test] - fn does_not_fall_back_to_last_booted_patch_if_corrupted() -> Result<()> { - let patch_file_contents = "patch contents"; - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - let file_path = &temp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, patch_file_contents)?; - - // Add patch 1, pretend it booted successfully. - assert!(manager.add_patch(1, file_path, "hash", None).is_ok()); - assert!(manager.record_boot_start_for_patch(1).is_ok()); - assert!(manager.record_boot_success().is_ok()); - - // Add patch 2, pretend it failed to boot. - let file_path = &temp_dir.path().join("patch2.vmcode"); - std::fs::write(file_path, patch_file_contents)?; - assert!(manager.add_patch(2, file_path, "hash", None).is_ok()); - assert!(manager.record_boot_start_for_patch(2).is_ok()); - assert!(manager.record_boot_failure_for_patch(2).is_ok()); - - // Write junk to patch 1's artifact. This should prevent us from falling back to it. - let patch_1_artifact_path = manager.patch_artifact_path(1); - std::fs::write(patch_1_artifact_path, "junk")?; - - assert!(manager.next_boot_patch().is_some()); - assert!(manager.validate_next_boot_patch().is_err()); - - // Verify that we will not attempt to boot from either patch. - assert!(manager.next_boot_patch().is_none()); - - // Patch 1 should *not* be considered bad, as we successfully booted from it and it only - // became corrupted after that. Downloading it a second time might resolve the issue. - assert!(!manager.is_known_bad_patch(1)); - - // Patch 2 failed to boot, so it should be considered bad. - assert!(manager.is_known_bad_patch(2)); - - Ok(()) - } - - // The constant values below were generated by taking an arbitrary sha256 hash (INFLATED_PATCH_HASH) - // and using openssl to sign it with the private key corresponding to `PUBLIC_KEY`. - - // The base64-encoded public key in a DER format. This is required by ring to verify signatures. - // See https://docs.rs/ring/latest/ring/signature/index.html#signing-and-verifying-with-rsa-pkcs1-15-padding - const PUBLIC_KEY: &str = "MIIBCgKCAQEA2wdpEGbuvlPsb9i0qYrfMefJnEw1BHTi8SYZTKrXOvJWmEpPE1hWfbkvYzXu5a96gV1yocF3DMwn04VmRlKhC4AhsD0NL0UNhYhotbKG91Kwi1vAXpHhCdz5gQEBw0K1uB4Jz+zK6WK+31PryYpwLwbyXNqXoY8IAAUQ4STsHYV5w+BMSi8pepWMRd7DR9RHcbNOZlJvdBQ5NxvB4JN4dRMq8cC73ez1P9d7Dfwv3TWY+he9EmuXLT2UivZSlHIrGBa7MFfqyUe2ro0F7Te/B0si12itBbWIqycvqcXjeOPNn6WEpqN7IWjb9LUh162JyYaz5Lb/VeeJX8LKtElccwIDAQAB"; - - // The message that was signed. In practice, this will be the sha256 hash of an inflated patch artifact. - const INFLATED_PATCH_HASH: &str = - "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b"; - - // The base64-encoded signature of `INFLATED_PATCH_HASH` created using the private key corresponding - // to `PUBLIC_KEY`. - const SIGNATURE: &str = "ZGccldv01XqHQ76bXuKV/9EQnNK0Q+reQ9bJHVnGfLldF+BLRx0divgPfKP5Df9BJPA3dw1Z1VortfepmMGebP3kS593l5zoktu9MIepxvRAFWNKE5PDTIIvCL/ddTPEHt6NNCeD6HLOMLzbEX3cFZa+lq3UymGi0aqA5DlXirJBGtopojc9nOXZ22n/qHNZIHEkGcqKbSMSK9oC55whKHnlJTbCXdmSyDc65B4PcgseqJom1riVK3XGW1YMrSpuMAU+CDT7HhdESmI1UtH1bYeBITfRhQztdDTfti2vJTf2Y+lYC99CFiISgD7f1m0KUcC+VnEAMZSYtgxSk6AX2A=="; - - // Strict mode boot-time signature verification tests. - // In Strict mode, signature verification happens at boot time (validate_next_boot_patch), - // not at install time. This provides protection against post-install tampering. - - #[test] - fn strict_mode_succeeds_with_valid_signature_at_boot_time() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::new( - temp_dir.path().to_path_buf(), - Some(PUBLIC_KEY), - PatchVerificationMode::Strict, - ); - - // In Strict mode, add_patch does NOT verify signature (that happens at boot time) - manager.add_signed_patch_for_test(&temp_dir, 1, INFLATED_PATCH_HASH, Some(SIGNATURE))?; - - // Boot-time validation verifies the signature by computing hash and checking signature - assert!(manager.next_boot_patch().is_some()); - assert!(manager.validate_next_boot_patch().is_ok()); - assert!(manager.next_boot_patch().is_some()); - - let patch = manager.next_boot_patch().unwrap(); - assert_eq!(patch.number, 1); - - Ok(()) - } - - #[test] - fn succeeds_with_arbitrary_signature_if_no_public_key() -> Result<()> { - let temp_dir = TempDir::new()?; - // Create a PatchManager without a public key - signature verification is skipped. - let mut manager = PatchManager::manager_for_test(&temp_dir); - - manager.add_signed_patch_for_test( - &temp_dir, - 1, - INFLATED_PATCH_HASH, - Some("not a valid signature"), - )?; - - // Without a public key, boot-time validation only checks file existence and size - assert!(manager.next_boot_patch().is_some()); - assert!(manager.validate_next_boot_patch().is_ok()); - assert!(manager.next_boot_patch().is_some()); - let patch = manager.next_boot_patch().unwrap(); - assert_eq!(patch.number, 1); - - Ok(()) - } - - #[test] - fn strict_mode_fails_boot_validation_if_signature_missing() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::new( - temp_dir.path().to_path_buf(), - Some(PUBLIC_KEY), - PatchVerificationMode::Strict, - ); - - // In Strict mode, add_patch succeeds without signature (no install-time check) - manager.add_signed_patch_for_test(&temp_dir, 1, INFLATED_PATCH_HASH, None)?; - - assert!(manager.next_boot_patch().is_some()); - // But boot-time validation fails because signature is required - assert!(manager.validate_next_boot_patch().is_err()); - assert!(manager.next_boot_patch().is_none()); - - Ok(()) - } - - #[test] - fn strict_mode_fails_boot_validation_if_signature_invalid() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::new( - temp_dir.path().to_path_buf(), - Some(PUBLIC_KEY), - PatchVerificationMode::Strict, - ); - - // In Strict mode, add_patch succeeds with invalid signature (no install-time check) - manager.add_signed_patch_for_test( - &temp_dir, - 1, - INFLATED_PATCH_HASH, - Some(INFLATED_PATCH_HASH), // Using hash as signature, which is invalid - )?; - - assert!(manager.next_boot_patch().is_some()); - // But boot-time validation fails because signature doesn't verify - assert!(manager.validate_next_boot_patch().is_err()); - assert!(manager.next_boot_patch().is_none()); - - Ok(()) - } - - #[test] - fn strict_mode_fails_boot_validation_if_public_key_invalid() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::new( - temp_dir.path().to_path_buf(), - Some("not a valid key"), - PatchVerificationMode::Strict, - ); - - // In Strict mode, add_patch succeeds (no install-time check) - manager.add_signed_patch_for_test(&temp_dir, 1, INFLATED_PATCH_HASH, Some(SIGNATURE))?; - - assert!(manager.next_boot_patch().is_some()); - // But boot-time validation fails because public key can't be used - assert!(manager.validate_next_boot_patch().is_err()); - assert!(manager.next_boot_patch().is_none()); - - Ok(()) - } - - #[test] - fn strict_mode_detects_tampered_patch_at_boot_time() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::new( - temp_dir.path().to_path_buf(), - Some(PUBLIC_KEY), - PatchVerificationMode::Strict, - ); - - // Install a valid patch - manager.add_signed_patch_for_test(&temp_dir, 1, INFLATED_PATCH_HASH, Some(SIGNATURE))?; - - // Tamper with the patch file after installation - let patch_path = manager.patch_artifact_path(1); - std::fs::write(&patch_path, "tampered content")?; - - assert!(manager.next_boot_patch().is_some()); - // Boot-time validation detects tampering: computed hash doesn't match signature - assert!(manager.validate_next_boot_patch().is_err()); - assert!(manager.next_boot_patch().is_none()); - - Ok(()) - } -} - -#[cfg(test)] -mod fall_back_tests { - use super::*; - - #[test] - fn does_nothing_if_no_patch_exists() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - assert!(manager.patches_state.last_booted_patch.is_none()); - assert!(manager.patches_state.next_boot_patch.is_none()); - - manager.try_fall_back_from_patch(1)?; - - assert!(manager.patches_state.last_booted_patch.is_none()); - assert!(manager.patches_state.next_boot_patch.is_none()); - - Ok(()) - } - - #[test] - fn sets_next_patch_to_latest_patch_if_both_are_present() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - // Download and successfully boot from patch 1 - manager.add_patch_for_test(&temp_dir, 1)?; - manager.record_boot_start_for_patch(1)?; - manager.record_boot_success()?; - - // Download and fall back from patch 2 - manager.add_patch_for_test(&temp_dir, 2)?; - - manager.try_fall_back_from_patch(2)?; - - assert_eq!(manager.patches_state.last_booted_patch.unwrap().number, 1); - assert_eq!(manager.patches_state.next_boot_patch.unwrap().number, 1); - - Ok(()) - } - - #[test] - fn clears_next_and_last_patches_if_both_fail_validation() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - // Download and successfully boot from patch 1, and then corrupt it on disk. - manager.add_patch_for_test(&temp_dir, 1)?; - manager.record_boot_start_for_patch(1)?; - manager.record_boot_success()?; - let patch_1_path = manager.patch_artifact_path(1); - std::fs::write(patch_1_path, "junk junk junk")?; - - // Download and fall back from patch 2 - manager.add_patch_for_test(&temp_dir, 2)?; - - manager.try_fall_back_from_patch(2)?; - - // Neither patch should exist. - assert!(manager.patches_state.last_booted_patch.is_none()); - assert!(manager.patches_state.next_boot_patch.is_none()); - - Ok(()) - } - - #[test] - fn does_not_clear_next_patch_if_changed_since_boot_start() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - // Simulate a situation where we download both patches 1 and 2. - manager.add_patch_for_test(&temp_dir, 1)?; - - // Start booting from patch 1. - manager.record_boot_start_for_patch(1)?; - - // Download patch 2 before patch 1 finishes booting. - manager.add_patch_for_test(&temp_dir, 2)?; - - manager.record_boot_failure_for_patch(1)?; - - manager.try_fall_back_from_patch(1)?; - - assert!(manager.patches_state.last_booted_patch.is_none()); - assert_eq!( - manager - .patches_state - .next_boot_patch - .clone() - .unwrap() - .number, - 2 - ); - assert!(manager.is_known_bad_patch(1)); - - Ok(()) - } - - /// Server rolls back patch 1, then patch 2 arrives and fails to boot. - /// The else-if fallback path must not promote patch 1 back into - /// `next_boot_patch` even though `last_booted_patch` still points at - /// patch 1 — the server told us not to use it. `remove_patch` records - /// patch 1 as known-bad so this can't happen. - #[test] - fn rollback_then_failed_replacement_does_not_resurrect_rolled_back_patch() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - // Patch 1: install + successful boot. - manager.add_patch_for_test(&temp_dir, 1)?; - manager.record_boot_start_for_patch(1)?; - manager.record_boot_success()?; - - // Server rolls back patch 1. - manager.remove_patch(1)?; - assert!(manager.is_known_bad_patch(1)); - assert!(manager.patches_state.next_boot_patch.is_none()); - // last_booted_patch is intentionally preserved; the running - // process is still using patch 1 until it restarts. - assert_eq!( - manager - .patches_state - .last_booted_patch - .as_ref() - .unwrap() - .number, - 1 - ); - - // Re-create patch 1's artifact to simulate a silent - // delete_patch_artifacts failure (e.g. transient FS issue). - let patch_1_path = manager.patch_artifact_path(1); - std::fs::create_dir_all(patch_1_path.parent().unwrap())?; - std::fs::write(&patch_1_path, "patch contents")?; - - // Patch 2 arrives and then fails to boot. - manager.add_patch_for_test(&temp_dir, 2)?; - manager.record_boot_start_for_patch(2)?; - manager.record_boot_failure_for_patch(2)?; - - // Patch 1 must NOT be promoted back to next_boot_patch. - assert!(manager.patches_state.next_boot_patch.is_none()); - assert!(manager.is_known_bad_patch(1)); - assert!(manager.is_known_bad_patch(2)); - - Ok(()) - } - - #[test] - fn succeeds_if_deleting_artifacts_fails() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - // Download and successfully boot from patch 1, and then corrupt it on disk. - manager.add_patch_for_test(&temp_dir, 1)?; - manager.record_boot_start_for_patch(1)?; - manager.record_boot_success()?; - - // Download patch 2. - manager.add_patch_for_test(&temp_dir, 2)?; - - // Remove all data for both patches. - let patch_dir = manager.patch_dir(1); - std::fs::remove_dir_all(patch_dir)?; - let patch_dir = manager.patch_dir(2); - std::fs::remove_dir_all(patch_dir)?; - - manager.try_fall_back_from_patch(2)?; - - assert!(manager.patches_state.last_booted_patch.is_none()); - assert!(manager.patches_state.next_boot_patch.is_none()); - - Ok(()) - } -} - -#[cfg(test)] -mod record_boot_success_for_patch_tests { - use super::*; - use anyhow::{Ok, Result}; - use tempfile::TempDir; - - #[test] - fn errs_if_no_next_boot_patch() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - // This should fail because no patches have been added. - assert!(manager.record_boot_success().is_err()); - - Ok(()) - } - - #[test] - fn errs_if_patch_number_does_not_match_next_patch() -> Result<()> { - let patch_number = 1; - let patch_file_contents = "patch contents"; - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - let file_path = &temp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, patch_file_contents)?; - assert!(manager - .add_patch(patch_number, file_path, "hash", None) - .is_ok()); - assert!(manager.record_boot_success().is_err()); - - Ok(()) - } - - #[test] - fn succeeds_when_provided_next_boot_patch_number() -> Result<()> { - let patch_number = 1; - let patch_file_contents = "patch contents"; - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - let file_path = &temp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, patch_file_contents)?; - assert!(manager - .add_patch(patch_number, file_path, "hash", None) - .is_ok()); - - assert!(manager.record_boot_start_for_patch(1).is_ok()); - assert!(manager.record_boot_success().is_ok()); - - Ok(()) - } - - #[test] - fn deletes_other_patch_artifacts() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - // Download patches 1, 2, and 3 before we start booting from patch 2. - manager.add_patch_for_test(&temp_dir, 1)?; - manager.add_patch_for_test(&temp_dir, 2)?; - manager.add_patch_for_test(&temp_dir, 3)?; - - // Start booting from our latest patch. - manager.record_boot_start_for_patch(3)?; - - // Download patch 4 while we're booting from patch 3. - manager.add_patch_for_test(&temp_dir, 4)?; - - // Record success for patch 3, make sure the artifact still exists. - manager.record_boot_success()?; - - // Make sure that recording success for patch 2 deleted artifacts for prior - // patches but not for subsequent patches. - let mut patch_dir_names = std::fs::read_dir(manager.patches_dir())? - .map(|res| res.map(|e| e.path())) - .map(|e| e.unwrap()) - .map(|e| e.file_name().unwrap().to_owned()) - .collect::>(); - patch_dir_names.sort(); - assert_eq!(patch_dir_names, vec!["3", "4"]); - - Ok(()) - } - - #[test] - fn deletes_unrecognized_directories_in_patches_dir() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - - // Add a junk directory to the patches directory. - let junk_dir = manager.patches_dir().join("junk"); - std::fs::create_dir_all(&junk_dir)?; - - manager.add_patch_for_test(&temp_dir, 1)?; - manager.add_patch_for_test(&temp_dir, 2)?; - manager.record_boot_start_for_patch(2)?; - manager.record_boot_success()?; - - assert!(!junk_dir.exists()); - assert!(!manager.patch_dir(1).exists()); - - Ok(()) - } -} - -#[cfg(test)] -mod record_boot_failure_for_patch_tests { - use super::*; - use anyhow::{Ok, Result}; - use tempfile::TempDir; - - #[test] - fn deletes_failed_patch_artifacts() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - manager.add_patch_for_test(&temp_dir, 1)?; - assert!(manager.record_boot_start_for_patch(1).is_ok()); - assert!(manager.record_boot_success().is_ok()); - assert!(!manager.is_known_bad_patch(1)); - let succeeded_patch_artifact_path = manager.patch_artifact_path(1); - - manager.add_patch_for_test(&temp_dir, 2)?; - let failed_patch_artifact_path = manager.patch_artifact_path(2); - - // Make sure patch artifacts exist - assert!(failed_patch_artifact_path.exists()); - assert!(succeeded_patch_artifact_path.exists()); - - assert!(manager.record_boot_start_for_patch(2).is_ok()); - assert!(manager.record_boot_failure_for_patch(2).is_ok()); - assert!(!failed_patch_artifact_path.exists()); - assert!(manager.is_known_bad_patch(2)); - - Ok(()) - } - - /// Patch 1 successfully booted on a prior run; on this run boot fails. - /// `last_successfully_booted_patch` keeps reporting patch 1 — it *did* - /// successfully boot once, that's a historical fact. The operational - /// "don't try this patch again" intent is captured by - /// `is_known_bad_patch` and by deleting the artifacts; nobody falls - /// back to a patch with missing artifacts. - #[test] - fn preserves_last_booted_patch_on_failure_but_marks_bad() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - manager.add_patch_for_test(&temp_dir, 1)?; - let patch_artifact_path = manager.patch_artifact_path(1); - - // Pretend we booted from this patch - assert!(manager.record_boot_start_for_patch(1).is_ok()); - assert!(manager.record_boot_success().is_ok()); - assert_eq!(manager.last_successfully_booted_patch().unwrap().number, 1); - assert_eq!(manager.next_boot_patch().unwrap().number, 1); - assert!(patch_artifact_path.exists()); - assert!(!manager.is_known_bad_patch(1)); - - // Now pretend it failed to boot - assert!(manager.record_boot_start_for_patch(1).is_ok()); - assert!(manager.record_boot_failure_for_patch(1).is_ok()); - // Historical fact preserved. - assert_eq!(manager.last_successfully_booted_patch().unwrap().number, 1); - // Operational state: don't try this patch again. - assert!(manager.next_boot_patch().is_none()); - assert!(manager.is_known_bad_patch(1)); - assert!(!patch_artifact_path.exists()); - - Ok(()) - } -} - -#[cfg(test)] -mod reset_tests { - use super::*; - use anyhow::{Ok, Result}; - use tempfile::TempDir; - - #[test] - fn deletes_patches_dir_and_resets_patches_state() -> Result<()> { - let temp_dir = TempDir::new()?; - let mut manager = PatchManager::manager_for_test(&temp_dir); - manager.add_patch_for_test(&temp_dir, 1)?; - let path_artifacts_dir = manager.patches_dir(); - - // Make sure the directory and artifact files were created - assert!(path_artifacts_dir.exists()); - assert_eq!(std::fs::read_dir(&path_artifacts_dir).unwrap().count(), 1); - - assert!(manager.reset().is_ok()); - - // Make sure the directory and artifact files were deleted - assert!(!path_artifacts_dir.exists()); - - Ok(()) - } -} diff --git a/library/src/cache/updater_state.rs b/library/src/cache/updater_state.rs index e8ca3b04..ab8fb1d0 100644 --- a/library/src/cache/updater_state.rs +++ b/library/src/cache/updater_state.rs @@ -1,63 +1,85 @@ // This file deals with the cache / state management for the updater. -// This code is very confused and uses "patch number" sometimes -// and "slot index" others. The public interface should be -// consistent and use patch number everywhere. -// PatchInfo can probably go away. - use std::path::{Path, PathBuf}; use anyhow::Result; +#[cfg(test)] +use anyhow::{bail, Context}; use serde::{Deserialize, Serialize}; use crate::events::PatchEvent; use crate::yaml::PatchVerificationMode; -use super::patch_manager::{ManagePatches, PatchManager}; +use super::lifecycle::{PatchLifecycle, PatchState}; +#[cfg(test)] +use super::signing; use super::{disk_io, PatchInfo}; -/// Where the updater state is stored on disk. const STATE_FILE_NAME: &str = "state.json"; -/// Records the updater's "state of the world" - which patches we know to be -/// good or bad, which patches we have downloaded, which patch we're currently -/// booted from, events that need to be reported to the server, etc. +/// Files and directories under `cache_dir` that shorebird has ever +/// written. On a release-version change or unparseable state we wipe +/// these and keep going. `state.json` is intentionally absent — it's +/// rewritten in place with the preserved `client_id` (so removing it +/// would lose the only thing we want to carry forward). +/// +/// `patches_state.json` is the legacy file from the prior `PatchManager` +/// implementation; carrying it forward would orphan ~few-KB of stale +/// state on every device upgrading through this PR. +// TODO(eseidel): Drop `patches_state.json` from this list two minor +// versions after the release that ships this PR. By that point the +// in-flight devices upgrading from a pre-PR build have all wiped it +// once on their first release-version change, and nothing on disk +// references it anymore. +const SHOREBIRD_OWNED_PATHS: &[&str] = &["patches", "pointers.json", "patches_state.json"]; + +/// Records the updater's "state of the world": which patches we have +/// downloaded or installed, which patch booted last, events that need to +/// be reported to the server, etc. /// -// This struct is public, as callers can have a handle to it, but modifying -// anything inside should be done via the functions below. -// TODO(eseidel): Split the per-release state from the per-device state. -// That way per-release state is reset when the release version changes. -// but per-device state is not. +/// Per-patch state lives inside [`PatchLifecycle`] (one document per +/// patch number under `{cache}/patches/{N}/state.json`). UpdaterState +/// itself only owns the per-device `client_id` and the per-release event +/// queue; all other patch-related fields are pointers managed by the +/// lifecycle. +// TODO(eseidel): Split the per-release state from the per-device state +// so per-device state isn't reset on release-version change. #[derive(Debug)] pub struct UpdaterState { - // Per-device state: - /// Where this writes to disk. Don't serialize this field, as it can change - /// between runs of the app. + /// Persistent app-storage root. Holds `state.json`, `pointers.json`, + /// and `patches/{N}/` (state.json + dlc.vmcode). The compressed + /// download bytes live separately under the OS-managed cache dir + /// (passed to `load_or_new_on_error`); the lifecycle owns both + /// roots and we don't store the download dir here directly. cache_dir: PathBuf, - - patch_manager: Box, - + lifecycle: PatchLifecycle, + patch_public_key: Option, + verification_mode: PatchVerificationMode, serialized_state: SerializedState, } -/// UpdaterState fields that are serialized to disk. +/// UpdaterState fields that are serialized to disk at `{cache}/state.json`. /// -/// Written out to disk as a json file at STATE_FILE_NAME. +/// Every per-release field on disk (this struct, `pointers.json`, and the +/// per-patch `state.json` files under `patches/`) is wiped when +/// `release_version` changes. The release version effectively names a +/// unique build of the engine + updater, since the updater ships +/// embedded in the engine — there's no "version range" of updater code +/// that's mutually compatible. On a release-version mismatch, anything +/// we read from disk could have been written by code we don't +/// recognize, so we discard it. #[derive(Debug, Deserialize, Serialize)] struct SerializedState { - /// The client ID for this device. This is assigned on the first launch of this app and persists - /// between release versions. This is only reset when the app is uninstalled. - /// Shorebird uses these per-install ids in order to provide you, the customer, - /// install-count analytics for your apps. Storage or use of this, and any other, - /// information is covered in our privacy policy: https://shorebird.dev/privacy/ + /// Stable per-install ID. Survives release-version changes; only + /// reset when the app is uninstalled. Used for analytics. + /// client_id: String, - // Per-release state: - /// The release version this cache corresponds to. - /// If this does not match the release version we're booting from we will - /// clear the cache. + /// The release version this cache corresponds to. Mismatch with the + /// app's reported release version triggers a wipe of all per-release + /// state. release_version: String, - /// Events that have not yet been sent to the server. - /// Format could change between releases, so this is per-release state. + /// Events that have not yet been sent to the server. Format may + /// change between releases, so this is per-release state. queued_events: Vec, } @@ -74,30 +96,26 @@ fn is_file_not_found(error: &anyhow::Error) -> bool { false } -/// Serialized updater state impl UpdaterState { pub fn client_id(&self) -> String { self.serialized_state.client_id.clone() } } -/// Lifecycle methods for the updater state. impl UpdaterState { - /// Creates a new `UpdaterState`. fn new( cache_dir: PathBuf, + download_dir: PathBuf, release_version: String, patch_public_key: Option<&str>, verification_mode: PatchVerificationMode, client_id: String, ) -> Self { Self { - cache_dir: cache_dir.clone(), - patch_manager: Box::new(PatchManager::new( - cache_dir.clone(), - patch_public_key, - verification_mode, - )), + lifecycle: PatchLifecycle::load_or_default(cache_dir.clone(), download_dir), + cache_dir, + patch_public_key: patch_public_key.map(|s| s.to_owned()), + verification_mode, serialized_state: SerializedState { client_id, release_version, @@ -106,35 +124,70 @@ impl UpdaterState { } } - /// Loads UpdaterState from disk fn load( cache_dir: &Path, + download_dir: &Path, patch_public_key: Option<&str>, verification_mode: PatchVerificationMode, - ) -> anyhow::Result { + ) -> Result { let path = cache_dir.join(STATE_FILE_NAME); let serialized_state = disk_io::read(&path)?; - Ok(UpdaterState { + Ok(Self { cache_dir: cache_dir.to_path_buf(), - patch_manager: Box::new(PatchManager::new( + lifecycle: PatchLifecycle::load_or_default( cache_dir.to_path_buf(), - patch_public_key, - verification_mode, - )), + download_dir.to_path_buf(), + ), + patch_public_key: patch_public_key.map(|s| s.to_owned()), + verification_mode, serialized_state, }) } - /// Initializes a new UpdaterState and saves it to disk. + /// Initializes a new UpdaterState and saves it to disk. Wipes the + /// shorebird-managed files in `cache_dir` and the entire + /// `download_dir` — used when the release version changes or when + /// the on-disk state was unparseable. + /// + /// We *don't* blanket-wipe `cache_dir` because the embedder may + /// configure it to be shared with non-shorebird files (the test + /// suite does this; production engines typically hand us a + /// dedicated subdir but the API doesn't enforce that). Instead we + /// enumerate the set of files we've ever written there. Add new + /// entries to `SHOREBIRD_OWNED_PATHS` when introducing new files + /// or directories under `cache_dir`. fn create_new_and_save( - storage_dir: &Path, + cache_dir: &Path, + download_dir: &Path, release_version: &str, patch_public_key: Option<&str>, verification_mode: PatchVerificationMode, client_id: String, ) -> Self { + for relative in SHOREBIRD_OWNED_PATHS { + let path = cache_dir.join(relative); + if !path.exists() { + continue; + } + let result = if path.is_dir() { + std::fs::remove_dir_all(&path) + } else { + std::fs::remove_file(&path) + }; + if let Err(e) = result { + shorebird_error!("Failed to wipe {:?} on reset: {:?}", path, e); + } + } + // The download dir is fully shorebird-owned — wipe it whole. + if download_dir.exists() { + if let Err(e) = std::fs::remove_dir_all(download_dir) { + shorebird_error!("Failed to wipe download dir on reset: {:?}", e); + } + } + let mut state = Self::new( - storage_dir.to_owned(), + cache_dir.to_owned(), + download_dir.to_owned(), release_version.to_owned(), patch_public_key, verification_mode, @@ -143,19 +196,19 @@ impl UpdaterState { if let Err(e) = state.save() { shorebird_warn!("Error saving state {:?}, ignoring.", e); } - // Ensure we clear any patch data if we're creating a new state. - let _ = state.patch_manager.reset(); + state.lifecycle = + PatchLifecycle::load_or_default(cache_dir.to_path_buf(), download_dir.to_path_buf()); state } pub fn load_or_new_on_error( - storage_dir: &Path, + cache_dir: &Path, + download_dir: &Path, release_version: &str, patch_public_key: Option<&str>, verification_mode: PatchVerificationMode, ) -> Self { - let load_result = Self::load(storage_dir, patch_public_key, verification_mode); - match load_result { + match Self::load(cache_dir, download_dir, patch_public_key, verification_mode) { Ok(loaded) => { if loaded.serialized_state.release_version != release_version { shorebird_info!( @@ -164,7 +217,8 @@ impl UpdaterState { release_version ); return Self::create_new_and_save( - storage_dir, + cache_dir, + download_dir, release_version, patch_public_key, verification_mode, @@ -178,7 +232,8 @@ impl UpdaterState { shorebird_info!("No existing state file found: {:#}, creating new state.", e); } Self::create_new_and_save( - storage_dir, + cache_dir, + download_dir, release_version, patch_public_key, verification_mode, @@ -188,116 +243,185 @@ impl UpdaterState { } } - /// Saves the updater state to disk. - pub fn save(&self) -> anyhow::Result<()> { - let path = Path::new(&self.cache_dir).join(STATE_FILE_NAME); - disk_io::write(&self.serialized_state, &path) + /// Saves the top-level (non-patch) state to disk. + pub fn save(&self) -> Result<()> { + disk_io::write( + &self.serialized_state, + &self.cache_dir.join(STATE_FILE_NAME), + ) } } -/// Patch management. All patch management is done via the patch manager. +/// Patch lifecycle accessors — UpdaterState delegates to [`PatchLifecycle`]. impl UpdaterState { - /// Records that we are attempting to boot the patch with patch_number. + /// Direct access to the lifecycle. Wrapping every transition in a + /// forwarding method on UpdaterState would be churn for no reader + /// benefit, so callers are expected to reach in for transitions + /// (`decide_start`, `record_download_*`, `mark_bad`, etc). The + /// boot-lifecycle / install / boot-failure helpers below are kept + /// as wrappers because they have invariants (e.g. patch number + /// argument validation, breadcrumb clearing) that a direct caller + /// would have to know about. + pub fn lifecycle(&self) -> &PatchLifecycle { + &self.lifecycle + } + + /// See [`lifecycle`]. + pub fn lifecycle_mut(&mut self) -> &mut PatchLifecycle { + &mut self.lifecycle + } + + /// Records that we are attempting to boot the patch with `patch_number`. pub fn record_boot_start_for_patch(&mut self, patch_number: usize) -> Result<()> { - self.patch_manager.record_boot_start_for_patch(patch_number) + self.lifecycle.record_boot_start(patch_number) } - /// Records that the patch with patch_number failed to boot, uninstalls the patch. + /// Records that patch `patch_number` failed to boot. Marks it + /// `Bad{BootCrash}` and recomputes `next_boot_patch`. Clears the + /// boot breadcrumb regardless of whether it matched. pub fn record_boot_failure_for_patch(&mut self, patch_number: usize) -> Result<()> { - self.patch_manager - .record_boot_failure_for_patch(patch_number) + self.lifecycle.record_boot_failure(patch_number) } - /// Records that the patch with patch_number was successfully booted, marks the patch as "good". + /// Records that the in-flight boot succeeded. pub fn record_boot_success(&mut self) -> Result<()> { - self.patch_manager.record_boot_success() + self.lifecycle.record_boot_success() } - /// The patch that is currently in the process of booting. That is, we've recorded a boot start - /// but not yet a boot success or failure. pub fn currently_booting_patch(&self) -> Option { - self.patch_manager.currently_booting_patch() + self.lifecycle + .pointers() + .currently_booting_patch + .map(|n| self.patch_info(n)) } - /// Unix timestamp (seconds) when the current boot attempt started, if known. pub fn boot_started_at(&self) -> Option { - self.patch_manager.boot_started_at() + self.lifecycle.pointers().boot_started_at } - /// The last patch that was successfully booted (e.g., for which we record_boot_success was - /// called). - /// Will be None if: - /// - There was no good patch at time of boot. - /// - The updater has been initialized but no boot recorded yet. pub fn last_successfully_booted_patch(&self) -> Option { - self.patch_manager.last_successfully_booted_patch() + self.lifecycle + .pointers() + .last_booted_patch + .map(|n| self.patch_info(n)) } - /// The patch this process is using, set at `report_launch_start` and - /// kept until the next launch. `None` means the process is running the - /// base release. Survives server-driven rollbacks of that patch — the - /// running process is still using it. + /// The patch this process is using. Backed by the session-scoped + /// global in `config.rs` — survives server-driven rollback (the + /// running process is still using the patch) and resets on every + /// fresh process start. pub fn running_patch(&self) -> Option { - self.patch_manager.running_patch() + crate::config::running_patch_number().map(|n| self.patch_info(n)) } - /// Records which patch this process is using. Called from - /// `report_launch_start` with `Some(n)` when launching a patch, or - /// `None` when launching the base release. pub fn set_running_patch(&mut self, patch_number: Option) { - self.patch_manager.set_running_patch(patch_number); + crate::config::set_running_patch_number(patch_number); } - /// This is the patch that will be used for the next boot. - /// Will be None if: - /// - There has never been a patch selected. - /// - There was a patch selected but it was later marked as bad. pub fn next_boot_patch(&mut self) -> Option { - self.patch_manager.next_boot_patch() + self.lifecycle + .pointers() + .next_boot_patch + .map(|n| self.patch_info(n)) } - /// Performs integrity checks on the next boot patch. If the patch fails these checks, the patch - /// will be deleted and the next boot patch will be set to the last successfully booted patch or - /// the base release if there is no last successfully booted patch. - /// - /// Returns an error if the patch fails integrity checks. - pub fn validate_next_boot_patch(&mut self) -> anyhow::Result<()> { - self.patch_manager.validate_next_boot_patch() + /// Validates that `next_boot_patch` is bootable. On failure, marks + /// the patch `Bad{ValidationFailed}` and recomputes `next_boot_patch`. + pub fn validate_next_boot_patch(&mut self) -> Result<()> { + self.lifecycle + .validate_next_boot_patch(self.patch_public_key.as_deref(), self.verification_mode) } - /// Copies the patch file at file_path to the manager's directory structure sets - /// this patch as the next patch to boot. + /// Moves the inflated artifact at `patch.path` into the lifecycle's + /// installed location, validates the signature in `InstallOnly` + /// mode, transitions the patch to `Installed`, and promotes it to + /// `next_boot_patch`. + /// + /// Test-only entry point. The production update flow inflates + /// directly into the lifecycle's installed location and transitions + /// `Downloaded → Installed` via `lifecycle::record_install_complete`, + /// so no production caller goes through this function. Gated to + /// `#[cfg(test)]` so a future refactor can't accidentally + /// reintroduce the divergence — direct lifecycle calls are the + /// canonical path. Used by `test_utils::install_fake_patch` and + /// the tests below. + #[cfg(test)] pub fn install_patch( &mut self, patch: &PatchInfo, hash: &str, signature: Option<&str>, - ) -> anyhow::Result<()> { - self.patch_manager - .add_patch(patch.number, &patch.path, hash, signature) + ) -> Result<()> { + if !patch.path.exists() { + bail!("Patch file {} does not exist", patch.path.display()); + } + // InstallOnly mode verifies the signature here; Strict mode + // verifies it again at boot time via validate_next_boot_patch. + if self.verification_mode == PatchVerificationMode::InstallOnly { + if let Some(public_key) = &self.patch_public_key { + let sig = signature.context("Patch signature is missing")?; + signing::check_signature(hash, sig, public_key)?; + } + } + let installed_path = self.lifecycle.installed_artifact_path(patch.number); + if let Some(parent) = installed_path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::rename(&patch.path, &installed_path)?; + // Mirror `record_install_complete`'s cleanup of the now-stale + // compressed download bytes if any are sitting in the patch dir. + let download = self.lifecycle.download_artifact_path(patch.number); + if download.exists() { + if let Err(e) = std::fs::remove_file(&download) { + shorebird_error!( + "Failed to remove stale download for patch {}: {:?}", + patch.number, + e + ); + } + } + let installed_size = std::fs::metadata(&installed_path)?.len(); + self.lifecycle.write_state( + patch.number, + &PatchState::Installed { + signature: signature.map(String::from), + size: installed_size, + }, + )?; + self.lifecycle.promote_to_next_boot(patch.number) } - /// Removes the artifacts for patch `patch_number` from disk and updates state to ensure the - /// uninstalled patch is not booted in the future. + /// Removes the artifacts for `patch_number` and recomputes pointers. + /// Used today for server-driven rollbacks. pub fn uninstall_patch(&mut self, patch_number: usize) -> Result<()> { - self.patch_manager.remove_patch(patch_number) + self.lifecycle.cleanup(patch_number)?; + self.lifecycle.recompute_next_boot() } - /// Returns true if we have previously failed to boot from patch `patch_number`. + /// True if `patch_number` is currently in `Bad` state — we tried it + /// and it failed, and shouldn't be retried within this release. pub fn is_known_bad_patch(&self, patch_number: usize) -> bool { - self.patch_manager.is_known_bad_patch(patch_number) + matches!( + self.lifecycle.read_state(patch_number), + Some(PatchState::Bad { .. }) + ) + } + + fn patch_info(&self, n: usize) -> PatchInfo { + PatchInfo { + path: self.lifecycle.installed_artifact_path(n), + number: n, + } } } -/// PatchEvent management +/// PatchEvent management. impl UpdaterState { - /// Adds an event to the queue to be sent to the server. pub fn queue_event(&mut self, event: PatchEvent) -> Result<()> { self.serialized_state.queued_events.push(event); self.save() } - /// Returns up to `limit` events from the reporting queue. pub fn copy_events(&self, limit: usize) -> Vec { self.serialized_state .queued_events @@ -307,7 +431,6 @@ impl UpdaterState { .collect() } - /// Removes all events from the reporting queue. pub fn clear_events(&mut self) -> Result<()> { self.serialized_state.queued_events.clear(); self.save() @@ -316,277 +439,314 @@ impl UpdaterState { #[cfg(test)] mod tests { - use tempfile::TempDir; - - use crate::cache::patch_manager::MockManagePatches; - - use mockall::predicate::eq; - use super::*; + use crate::cache::lifecycle::BadReason; + use tempfile::TempDir; - fn test_state(tmp_dir: &TempDir, patch_manager: MP) -> UpdaterState - where - MP: ManagePatches + 'static, - { - UpdaterState { - cache_dir: tmp_dir.path().to_path_buf(), - patch_manager: Box::new(patch_manager), - serialized_state: SerializedState { - client_id: "123".to_string(), - release_version: "1.0.0+1".to_string(), - queued_events: Vec::new(), - }, - } - } - - fn fake_patch(tmp_dir: &TempDir, number: usize) -> super::PatchInfo { - let path = tmp_dir.path().join(format!("patch_{}", number)); - std::fs::write(&path, "fake patch").unwrap(); + fn fake_artifact(tmp: &TempDir, number: usize) -> PatchInfo { + let path = tmp.path().join(format!("patch{}.full", number)); + std::fs::write(&path, format!("patch_{}_bytes", number)).unwrap(); PatchInfo { number, path } } - #[test] - fn release_version_changed_resets_patches() { - let tmp_dir = TempDir::new().unwrap(); - let mut patch_manager = PatchManager::manager_for_test(&tmp_dir); - let file_path = &tmp_dir.path().join("patch1.vmcode"); - std::fs::write(file_path, "patch file contents").unwrap(); - assert!(patch_manager.add_patch(1, file_path, "hash", None).is_ok()); - - let state = test_state(&tmp_dir, patch_manager); - let release_version = state.serialized_state.release_version.clone(); - assert!(state.save().is_ok()); - - let mut state = UpdaterState::load_or_new_on_error( - &state.cache_dir, - &release_version, + fn load(tmp: &TempDir, release_version: &str) -> UpdaterState { + UpdaterState::load_or_new_on_error( + tmp.path(), + &tmp.path().join("downloads"), + release_version, None, PatchVerificationMode::default(), - ); - assert_eq!(state.next_boot_patch().unwrap().number, 1); - - let mut next_version_state = UpdaterState::load_or_new_on_error( - &state.cache_dir, - "1.0.0+2", - None, - PatchVerificationMode::default(), - ); - assert!(next_version_state.next_boot_patch().is_none()); + ) } #[test] - fn is_file_not_found_test() { - use anyhow::Context; - assert!(!super::is_file_not_found(&anyhow::anyhow!(""))); - let tmp_dir = TempDir::new().unwrap(); - let path = tmp_dir.path().join("does_not_exist"); - let result = std::fs::File::open(path).context("foo"); - assert!(result.is_err()); - assert!(super::is_file_not_found(&result.unwrap_err())); + fn release_version_change_wipes_patch_state() { + // Ports `patch_manager.rs::reset_tests::deletes_patches_dir_and_resets_patches_state`. + let tmp = TempDir::new().unwrap(); + let mut state = load(&tmp, "1.0.0+1"); + let p = fake_artifact(&tmp, 1); + state.install_patch(&p, "hash", None).unwrap(); + state.save().unwrap(); + assert_eq!(state.next_boot_patch().map(|p| p.number), Some(1)); + + let mut next = load(&tmp, "1.0.0+2"); + assert!(next.next_boot_patch().is_none()); } #[test] - fn creates_updater_state_with_client_id() { - let tmp_dir = TempDir::new().unwrap(); - let state = UpdaterState::load_or_new_on_error( - tmp_dir.path(), - "1.0.0+1", - None, - PatchVerificationMode::default(), - ); - let saved_state = UpdaterState::load_or_new_on_error( - tmp_dir.path(), - "1.0.0+1", - None, - PatchVerificationMode::default(), - ); - assert_eq!( - state.serialized_state.client_id, - saved_state.serialized_state.client_id + fn release_version_change_wipes_legacy_patches_state_json() { + // Devices upgrading from the prior `PatchManager` will have a + // `patches_state.json` left behind in cache_dir from the old + // code. The new code never reads or writes it, but leaving it + // on disk would orphan a few KB on every release upgrade. + // Belongs to the SHOREBIRD_OWNED_PATHS wipe list. + let tmp = TempDir::new().unwrap(); + let _state = load(&tmp, "1.0.0+1"); + std::fs::write( + tmp.path().join("patches_state.json"), + br#"{"legacy":"junk"}"#, + ) + .unwrap(); + assert!(tmp.path().join("patches_state.json").exists()); + + let _next = load(&tmp, "1.0.0+2"); + assert!( + !tmp.path().join("patches_state.json").exists(), + "legacy patches_state.json should be wiped on release-version change" ); } - // A new UpdaterState is created when the release version is changed, but - // the client_id should remain the same. #[test] - fn client_id_does_not_change_if_release_version_changes() { - let tmp_dir = TempDir::new().unwrap(); - - let state = test_state(&tmp_dir, PatchManager::manager_for_test(&tmp_dir)); - let original_loaded = UpdaterState::load_or_new_on_error( - &state.cache_dir, - &state.serialized_state.release_version, - None, - PatchVerificationMode::default(), + fn release_version_change_wipes_download_dir() { + // The cache-rooted download dir is per-release just like the + // persistent patches/ tree. A release-version mismatch must + // wipe both — otherwise an in-flight partial download from + // the prior release could be confused for a current one. + let tmp = TempDir::new().unwrap(); + let downloads_dir = tmp.path().join("downloads"); + std::fs::create_dir_all(&downloads_dir).unwrap(); + std::fs::write(downloads_dir.join("1"), b"stale prior-release bytes").unwrap(); + std::fs::write(downloads_dir.join("orphan"), b"junk").unwrap(); + + // Release-version change triggers a full wipe. + let _next = load(&tmp, "1.0.0+2"); + assert!( + !downloads_dir.join("1").exists(), + "stale prior-release download should be wiped" ); - - let new_loaded = UpdaterState::load_or_new_on_error( - &state.cache_dir, - "1.0.0+2", - None, - PatchVerificationMode::default(), + assert!( + !downloads_dir.join("orphan").exists(), + "junk in downloads/ should be wiped" ); + } - assert_eq!( - original_loaded.serialized_state.client_id, - new_loaded.serialized_state.client_id - ); + #[test] + fn client_id_persists_across_release_changes() { + let tmp = TempDir::new().unwrap(); + let original = load(&tmp, "1.0.0+1"); + let original_client_id = original.client_id(); + let next = load(&tmp, "1.0.0+2"); + assert_eq!(next.client_id(), original_client_id); } #[test] - fn does_not_save_cache_dir() { - let original_tmp_dir = TempDir::new().unwrap(); - let original_state = UpdaterState { - cache_dir: original_tmp_dir.path().to_path_buf(), - patch_manager: Box::new(PatchManager::manager_for_test(&original_tmp_dir)), - serialized_state: SerializedState { - client_id: "123".to_string(), - release_version: "1.0.0+1".to_string(), - queued_events: Vec::new(), - }, - }; - original_state.save().unwrap(); + fn corrupt_state_file_creates_new_state() { + let tmp = TempDir::new().unwrap(); + let mut state = load(&tmp, "1.0.0+1"); + let p = fake_artifact(&tmp, 1); + state.install_patch(&p, "hash", None).unwrap(); + state.save().unwrap(); - let new_tmp_dir = TempDir::new().unwrap(); - let original_state_path = original_tmp_dir.path().join(STATE_FILE_NAME); - let new_state_path = new_tmp_dir.path().join(STATE_FILE_NAME); - std::fs::rename(original_state_path, new_state_path).unwrap(); + std::fs::write(tmp.path().join(STATE_FILE_NAME), "garbage").unwrap(); - let new_state = - UpdaterState::load(new_tmp_dir.path(), None, PatchVerificationMode::default()).unwrap(); - assert_eq!(new_state.cache_dir, new_tmp_dir.path()); + let mut reloaded = load(&tmp, "1.0.0+2"); + assert!(reloaded.next_boot_patch().is_none()); } #[test] - fn record_boot_failure_for_patch_forwards_to_patch_manager() { - let patch_number = 1; - let tmp_dir = TempDir::new().unwrap(); - let mut mock_manage_patches = MockManagePatches::new(); - mock_manage_patches - .expect_record_boot_failure_for_patch() - .with(eq(patch_number)) - .returning(|_| Ok(())); - let mut state = test_state(&tmp_dir, mock_manage_patches); - assert!(state.record_boot_failure_for_patch(patch_number).is_ok()); + fn install_patch_renames_into_lifecycle_dir_and_sets_next_boot() { + // Ports `patch_manager.rs::add_patch_tests::adds_patch_successfully`. + let tmp = TempDir::new().unwrap(); + let mut state = load(&tmp, "1.0.0+1"); + let p = fake_artifact(&tmp, 1); + state.install_patch(&p, "hash", None).unwrap(); + let next = state.next_boot_patch().unwrap(); + assert_eq!(next.number, 1); + assert!(next.path.exists()); + assert!(!tmp.path().join("patch1.full").exists(), "source moved"); } #[test] - fn record_boot_success_for_patch_forwards_to_patch_manager() { - let tmp_dir = TempDir::new().unwrap(); - let mut mock_manage_patches = MockManagePatches::new(); - mock_manage_patches - .expect_record_boot_success() - .returning(|| Ok(())); - let mut state = test_state(&tmp_dir, mock_manage_patches); - - assert!(state.record_boot_success().is_ok()); + fn install_patch_replaces_unbooted_predecessor() { + // Ports + // `patch_manager.rs::next_boot_patch_tests::adding_patch_deletes_unbooted_patch_not_last_booted`. + let tmp = TempDir::new().unwrap(); + let mut state = load(&tmp, "1.0.0+1"); + state + .install_patch(&fake_artifact(&tmp, 1), "h1", None) + .unwrap(); + state + .install_patch(&fake_artifact(&tmp, 2), "h2", None) + .unwrap(); + assert_eq!(state.next_boot_patch().map(|p| p.number), Some(2)); + assert!(!state.lifecycle.installed_artifact_path(1).exists()); } #[test] - fn last_successfully_booted_patch_forwards_from_patch_manager() { - let tmp_dir = TempDir::new().unwrap(); - let patch = fake_patch(&tmp_dir, 1); - let mut mock_manage_patches = MockManagePatches::new(); - mock_manage_patches - .expect_last_successfully_booted_patch() - .return_const(Some(patch.clone())); - let state = test_state(&tmp_dir, mock_manage_patches); - assert_eq!(state.last_successfully_booted_patch(), Some(patch)); + fn install_patch_errors_when_file_missing() { + // Ports `patch_manager.rs::add_patch_tests::errs_if_file_path_does_not_exist`. + let tmp = TempDir::new().unwrap(); + let mut state = load(&tmp, "1.0.0+1"); + let bogus = PatchInfo { + number: 1, + path: tmp.path().join("nope"), + }; + assert!(state.install_patch(&bogus, "h", None).is_err()); + } + + // The base64-encoded RSA key + matching signature were generated for + // signing.rs's tests; reused here to exercise the InstallOnly path + // without standing up our own keypair fixture. + const TEST_PUBLIC_KEY: &str = "MIIBCgKCAQEA2wdpEGbuvlPsb9i0qYrfMefJnEw1BHTi8SYZTKrXOvJWmEpPE1hWfbkvYzXu5a96gV1yocF3DMwn04VmRlKhC4AhsD0NL0UNhYhotbKG91Kwi1vAXpHhCdz5gQEBw0K1uB4Jz+zK6WK+31PryYpwLwbyXNqXoY8IAAUQ4STsHYV5w+BMSi8pepWMRd7DR9RHcbNOZlJvdBQ5NxvB4JN4dRMq8cC73ez1P9d7Dfwv3TWY+he9EmuXLT2UivZSlHIrGBa7MFfqyUe2ro0F7Te/B0si12itBbWIqycvqcXjeOPNn6WEpqN7IWjb9LUh162JyYaz5Lb/VeeJX8LKtElccwIDAQAB"; + const TEST_HASH: &str = "404e5caa5b906f6d03c97657e8c4d604d759f9cfba1a8bba9d5b49a5ebc174f9"; + const TEST_SIGNATURE: &str = "2ixSo5LpaWUSLg2GJEV+D+uyLeLjp0c3vNXnl0yb1iJjAdpn10BFlbcwCcjaJW9PNky2HU2hKOBe62PkFHOU8DDYOfxf2LGg/ToLGPHin85WrwFAceAUYDs7JpQr43dRTbrXcT8k5tuCQOTwXecGwuWcOFFvh0GbXFnyAmi7fLfN9CtTsG2GIOle/LyYLwoviTrXn/fZTZEYrqxD/wZ4QzoWOWLWNvrPbILhqWELkBLhdZeK0+nC2CIxFRYd3bUeOi1AGtPyHKBfdwuf4VO3+HbwJVaAEiD7HU2Bj+Zp1xeSdbznmYgBV86oizrLFd23D+lBfTlmDGgdfNE9J4Z2/g=="; + + fn load_with_verification( + tmp: &TempDir, + public_key: Option<&str>, + mode: PatchVerificationMode, + ) -> UpdaterState { + UpdaterState::load_or_new_on_error( + tmp.path(), + &tmp.path().join("downloads"), + "1.0.0+1", + public_key, + mode, + ) } #[test] - fn next_boot_patch_forwards_from_patch_manager() { - let patch_number = 1; - let tmp_dir = TempDir::new().unwrap(); - let patch = fake_patch(&tmp_dir, patch_number); - let mut mock_manage_patches = MockManagePatches::new(); - mock_manage_patches - .expect_next_boot_patch() - .return_const(Some(patch.clone())); - let mut state = test_state(&tmp_dir, mock_manage_patches); - assert_eq!(state.next_boot_patch(), Some(patch)); + fn install_patch_install_only_accepts_valid_signature() { + // Ports `patch_manager.rs::add_patch_tests::install_only_succeeds_with_valid_signature`. + let tmp = TempDir::new().unwrap(); + let mut state = load_with_verification( + &tmp, + Some(TEST_PUBLIC_KEY), + PatchVerificationMode::InstallOnly, + ); + let p = fake_artifact(&tmp, 1); + state + .install_patch(&p, TEST_HASH, Some(TEST_SIGNATURE)) + .unwrap(); + assert_eq!(state.next_boot_patch().map(|p| p.number), Some(1)); } #[test] - fn validate_next_boot_patch_forwards_to_patch_manager() { - let tmp_dir = TempDir::new().unwrap(); - let mut mock_manage_patches = MockManagePatches::new(); - mock_manage_patches - .expect_validate_next_boot_patch() - .returning(|| Ok(())); - let mut state = test_state(&tmp_dir, mock_manage_patches); - assert!(state.validate_next_boot_patch().is_ok()); + fn install_patch_install_only_rejects_missing_signature() { + // Ports + // `patch_manager.rs::add_patch_tests::install_only_errs_if_signature_is_missing_when_public_key_configured`. + let tmp = TempDir::new().unwrap(); + let mut state = load_with_verification( + &tmp, + Some(TEST_PUBLIC_KEY), + PatchVerificationMode::InstallOnly, + ); + let p = fake_artifact(&tmp, 1); + assert!(state.install_patch(&p, TEST_HASH, None).is_err()); + // Failure leaves no Installed state. + assert!(state.next_boot_patch().is_none()); } #[test] - fn install_patch_forwards_to_patch_manager() { - let patch_number = 1; - let tmp_dir = TempDir::new().unwrap(); - let patch = fake_patch(&tmp_dir, patch_number); - let mut mock_manage_patches = MockManagePatches::new(); - let cloned_patch = patch.clone(); - mock_manage_patches - .expect_add_patch() - .withf(move |number, path, hash, signature| { - number == &cloned_patch.number - && path == cloned_patch.path - && hash == "hash" - && signature == &Some("signature") - }) - .returning(|_, __, ___, ____| Ok(())); - let mut state = test_state(&tmp_dir, mock_manage_patches); - + fn install_patch_install_only_rejects_bad_signature() { + // Ports + // `patch_manager.rs::add_patch_tests::install_only_errs_if_signature_is_invalid` + // and (same code path) + // `patch_manager.rs::add_patch_tests::install_only_errs_if_public_key_is_invalid`. + let tmp = TempDir::new().unwrap(); + let mut state = load_with_verification( + &tmp, + Some(TEST_PUBLIC_KEY), + PatchVerificationMode::InstallOnly, + ); + let p = fake_artifact(&tmp, 1); assert!(state - .install_patch(&patch, "hash", Some("signature")) - .is_ok()); + .install_patch(&p, TEST_HASH, Some("not_a_real_signature")) + .is_err()); } #[test] - fn is_known_bad_patch_returns_value_from_patch_manager() { - let tmp_dir = TempDir::new().unwrap(); - let mut mock_manage_patches = MockManagePatches::new(); - mock_manage_patches - .expect_is_known_bad_patch() - .with(eq(1)) - .return_const(true); - mock_manage_patches - .expect_is_known_bad_patch() - .with(eq(2)) - .return_const(false); - let state = test_state(&tmp_dir, mock_manage_patches); + fn boot_lifecycle_tracks_state() { + // Ports + // `patch_manager.rs::last_successfully_booted_patch_tests::returns_value_from_patches_state` + // and the happy path of + // `patch_manager.rs::record_boot_success_for_patch_tests::succeeds_when_provided_next_boot_patch_number`. + let tmp = TempDir::new().unwrap(); + let mut state = load(&tmp, "1.0.0+1"); + state + .install_patch(&fake_artifact(&tmp, 1), "h", None) + .unwrap(); + state.record_boot_start_for_patch(1).unwrap(); + assert_eq!(state.currently_booting_patch().map(|p| p.number), Some(1)); + state.record_boot_success().unwrap(); + assert!(state.currently_booting_patch().is_none()); + assert_eq!( + state.last_successfully_booted_patch().map(|p| p.number), + Some(1) + ); + } + + #[test] + fn record_boot_failure_marks_bad_and_clears_next_boot() { + // Ports + // `patch_manager.rs::next_boot_patch_tests::returns_none_patch_if_first_patch_failed_to_boot` + // and + // `patch_manager.rs::record_boot_failure_for_patch_tests::deletes_failed_patch_artifacts`. + let tmp = TempDir::new().unwrap(); + let mut state = load(&tmp, "1.0.0+1"); + state + .install_patch(&fake_artifact(&tmp, 1), "h", None) + .unwrap(); + state.record_boot_start_for_patch(1).unwrap(); + state.record_boot_failure_for_patch(1).unwrap(); assert!(state.is_known_bad_patch(1)); - assert!(!state.is_known_bad_patch(2)); + assert!(state.next_boot_patch().is_none()); } #[test] - fn load_or_new_on_error_clears_patch_state_on_error() -> Result<()> { - let tmp_dir = TempDir::new()?; + fn record_boot_failure_works_without_active_boot() { + // Matches the prior PatchManager semantics: the call doesn't + // require currently_booting_patch to be set; it just marks the + // patch bad and recomputes pointers. + let tmp = TempDir::new().unwrap(); + let mut state = load(&tmp, "1.0.0+1"); + state + .install_patch(&fake_artifact(&tmp, 1), "h", None) + .unwrap(); + state.record_boot_failure_for_patch(1).unwrap(); + assert!(state.is_known_bad_patch(1)); + assert!(state.next_boot_patch().is_none()); + } - // Create a new state, add a patch, and save it. - let mut state = UpdaterState::load_or_new_on_error( - tmp_dir.path(), - "1.0.0+1", - None, - PatchVerificationMode::default(), - ); - let patch = fake_patch(&tmp_dir, 1); - state.install_patch(&patch, "hash", None)?; - state.save()?; - assert_eq!(state.next_boot_patch().unwrap().number, 1); - - // Corrupt the state file. - let state_file = tmp_dir.path().join(STATE_FILE_NAME); - std::fs::write(&state_file, "corrupt json")?; - - // Ensure that, by corrupting the file, we've reset the patches state. - let mut state = UpdaterState::load_or_new_on_error( - tmp_dir.path(), - "1.0.0+2", - None, - PatchVerificationMode::default(), - ); + #[test] + fn uninstall_patch_clears_artifacts_and_recomputes_pointers() { + let tmp = TempDir::new().unwrap(); + let mut state = load(&tmp, "1.0.0+1"); + state + .install_patch(&fake_artifact(&tmp, 1), "h", None) + .unwrap(); + assert_eq!(state.next_boot_patch().map(|p| p.number), Some(1)); + state.uninstall_patch(1).unwrap(); assert!(state.next_boot_patch().is_none()); + assert!(!state.lifecycle.installed_artifact_path(1).exists()); + } + + #[test] + fn is_known_bad_patch_after_mark_bad() { + let tmp = TempDir::new().unwrap(); + let mut state = load(&tmp, "1.0.0+1"); + state + .install_patch(&fake_artifact(&tmp, 1), "h", None) + .unwrap(); + state + .lifecycle + .mark_bad(1, BadReason::InstallHashMismatch) + .unwrap(); + assert!(state.is_known_bad_patch(1)); + } - Ok(()) + #[test] + fn install_patch_install_only_skips_verification_when_no_public_key() { + // Ports + // `patch_manager.rs::add_patch_tests::install_only_succeeds_with_any_signature_if_no_public_key`. + // InstallOnly + no public_key configured → signature is never + // checked, so any value (including garbage) is accepted. + let tmp = TempDir::new().unwrap(); + let mut state = load_with_verification(&tmp, None, PatchVerificationMode::InstallOnly); + let p = fake_artifact(&tmp, 1); + state + .install_patch(&p, "any-hash", Some("garbage-signature")) + .unwrap(); + assert_eq!(state.next_boot_patch().map(|p| p.number), Some(1)); } } diff --git a/library/src/download_state.rs b/library/src/download_state.rs deleted file mode 100644 index 4abf8b14..00000000 --- a/library/src/download_state.rs +++ /dev/null @@ -1,126 +0,0 @@ -/// Tracks metadata about an in-progress patch download so that it can be -/// resumed after a failure or app restart. -/// -/// Stored as a sidecar JSON file alongside the partial download: -/// {download_dir}/{patch_number}.download.json -use serde::{Deserialize, Serialize}; -use std::path::{Path, PathBuf}; - -use crate::file_errors::{FileOperation, IoResultExt}; - -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -pub struct DownloadState { - /// The URL this download was started from. Used to decide whether a - /// partial file on disk matches the current server response — if the URL - /// changed, we discard and start fresh. - pub url: String, - /// The patch number being downloaded. - pub patch_number: usize, - /// Expected total file size from Content-Length/Content-Range (if known - /// from a prior download attempt). Used for post-download validation. - pub expected_size: Option, - /// Hash of the inflated patch from the server response. Checked on resume - /// to catch the case where a patch is deleted and re-added with the same - /// number but different content (URL may stay the same but hash changes). - pub expected_hash: String, -} - -/// Returns the sidecar path for a given download path. -/// e.g. "{download_dir}/1" -> "{download_dir}/1.download.json" -pub fn sidecar_path(download_path: &Path) -> PathBuf { - let mut p = download_path.as_os_str().to_owned(); - p.push(".download.json"); - PathBuf::from(p) -} - -/// Write a DownloadState to its sidecar file. -pub fn write_download_state(download_path: &Path, state: &DownloadState) -> anyhow::Result<()> { - let path = sidecar_path(download_path); - let json = serde_json::to_string(state)?; - std::fs::write(&path, json).with_file_context(FileOperation::WriteFile, &path)?; - Ok(()) -} - -/// Read a DownloadState from its sidecar file, if it exists. -pub fn read_download_state(download_path: &Path) -> anyhow::Result> { - let path = sidecar_path(download_path); - if !path.exists() { - return Ok(None); - } - let json = std::fs::read_to_string(&path).with_file_context(FileOperation::ReadFile, &path)?; - let state: DownloadState = serde_json::from_str(&json)?; - Ok(Some(state)) -} - -/// Delete the sidecar file for a download, if it exists. -pub fn delete_download_state(download_path: &Path) -> anyhow::Result<()> { - let path = sidecar_path(download_path); - if path.exists() { - std::fs::remove_file(&path).with_file_context(FileOperation::DeleteFile, &path)?; - } - Ok(()) -} - -#[cfg(test)] -mod tests { - use super::*; - use tempfile::TempDir; - - #[test] - fn round_trip_download_state() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("1"); - - let state = DownloadState { - url: "https://example.com/patch/1".to_string(), - patch_number: 1, - expected_size: Some(12345), - expected_hash: "abc123".to_string(), - }; - - write_download_state(&download_path, &state).unwrap(); - let loaded = read_download_state(&download_path).unwrap(); - assert_eq!(loaded, Some(state)); - } - - #[test] - fn read_missing_returns_none() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("99"); - let loaded = read_download_state(&download_path).unwrap(); - assert_eq!(loaded, None); - } - - #[test] - fn delete_removes_sidecar() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("1"); - - let state = DownloadState { - url: "https://example.com/patch/1".to_string(), - patch_number: 1, - expected_size: None, - expected_hash: "abc".to_string(), - }; - - write_download_state(&download_path, &state).unwrap(); - assert!(sidecar_path(&download_path).exists()); - - delete_download_state(&download_path).unwrap(); - assert!(!sidecar_path(&download_path).exists()); - } - - #[test] - fn delete_missing_is_ok() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("99"); - // Should not error. - delete_download_state(&download_path).unwrap(); - } - - #[test] - fn sidecar_path_is_correct() { - let p = sidecar_path(Path::new("/cache/downloads/1")); - assert_eq!(p, PathBuf::from("/cache/downloads/1.download.json")); - } -} diff --git a/library/src/file_errors.rs b/library/src/file_errors.rs index 828f58c9..3bb1cd40 100644 --- a/library/src/file_errors.rs +++ b/library/src/file_errors.rs @@ -13,6 +13,7 @@ pub enum FileOperation { ReadFile, #[allow(dead_code)] // Included for completeness; not yet used outside tests. DeleteFile, + #[allow(dead_code)] // Included for completeness; not yet used outside tests. DeleteDir, RenameFile, GetMetadata, diff --git a/library/src/lib.rs b/library/src/lib.rs index fbe31057..63d83ebc 100644 --- a/library/src/lib.rs +++ b/library/src/lib.rs @@ -11,7 +11,6 @@ pub mod c_api; // Declare other .rs file/module exists, but make them private. mod cache; mod config; -mod download_state; mod events; mod file_errors; mod logging; diff --git a/library/src/test_utils.rs b/library/src/test_utils.rs index 6415c3f7..48cb4abc 100644 --- a/library/src/test_utils.rs +++ b/library/src/test_utils.rs @@ -16,6 +16,7 @@ pub fn install_fake_patch(patch_number: usize) -> anyhow::Result<()> { let mut state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, diff --git a/library/src/updater.rs b/library/src/updater.rs index 908e570c..f330f13f 100644 --- a/library/src/updater.rs +++ b/library/src/updater.rs @@ -3,20 +3,22 @@ use std::fmt::{Debug, Display, Formatter}; use std::fs::{self}; use std::io::{Cursor, Read, Seek}; -use std::path::{Path, PathBuf}; +use std::path::Path; +// PathBuf is only used by `libapp_path_from_settings`, which is itself +// gated to non-android, non-test builds. +#[cfg(not(any(target_os = "android", test)))] +use std::path::PathBuf; use crate::file_errors::{FileOperation, IoResultExt}; use anyhow::{bail, Context, Result}; use dyn_clone::DynClone; +use crate::cache::lifecycle::{self, BadReason, DownloadAction, SkipReason}; use crate::cache::{PatchInfo, UpdaterState}; use crate::config::{set_config, with_config, UpdateConfig}; -use crate::download_state::{self, DownloadState}; use crate::events::{EventType, PatchEvent}; use crate::logging::init_logging; -use crate::network::{ - download_to_path, patches_check_url, DownloadResult, NetworkHooks, PatchCheckRequest, -}; +use crate::network::{download_to_path, patches_check_url, NetworkHooks, PatchCheckRequest}; use crate::updater_lock::{with_updater_thread_lock, UpdaterLockState}; use crate::yaml::YamlConfig; @@ -157,6 +159,7 @@ where with_config(|config| { let state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, @@ -172,6 +175,7 @@ where with_config(|config| { let mut state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, @@ -222,6 +226,7 @@ pub fn handle_prior_boot_failure_if_necessary() -> Result<(), InitError> { with_config(|config| { let mut state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, @@ -303,11 +308,15 @@ pub fn check_for_downloadable_update(channel: Option<&str>) -> anyhow::Result Ok(true), - ShouldInstallPatchCheckResult::PatchKnownBad => Ok(false), - ShouldInstallPatchCheckResult::PatchAlreadyInstalled => Ok(false), - } + let action = with_state(|state| { + Ok(state + .lifecycle() + .decide_start(patch.number, &patch.download_url, &patch.hash)) + })?; + Ok(matches!( + action, + DownloadAction::Fresh | DownloadAction::Resume { .. } | DownloadAction::Complete + )) } else { Ok(false) } @@ -427,10 +436,21 @@ fn update_internal(_: &UpdaterLockState, channel: Option<&str>) -> anyhow::Resul let patch = response.patch.ok_or(UpdateError::BadServerResponse)?; - match should_install_patch(patch.number)? { - ShouldInstallPatchCheckResult::PatchOkToInstall => {} - ShouldInstallPatchCheckResult::PatchKnownBad => return Ok(UpdateStatus::UpdateIsBadPatch), - ShouldInstallPatchCheckResult::PatchAlreadyInstalled => return Ok(UpdateStatus::NoUpdate), + let action = with_state(|state| { + Ok(state + .lifecycle() + .decide_start(patch.number, &patch.download_url, &patch.hash)) + })?; + match action { + DownloadAction::Skip(SkipReason::KnownBad) => { + shorebird_info!("Patch {} is known bad, skipping.", patch.number); + return Ok(UpdateStatus::UpdateIsBadPatch); + } + DownloadAction::Skip(SkipReason::AlreadyInstalled) => { + shorebird_info!("Patch {} is already installed, skipping.", patch.number); + return Ok(UpdateStatus::NoUpdate); + } + _ => {} } shorebird_info!( @@ -439,180 +459,158 @@ fn update_internal(_: &UpdaterLockState, channel: Option<&str>) -> anyhow::Resul config.app_id, config.release_version ); - let download_dir = PathBuf::from(&config.download_dir); - let download_path = download_dir.join(patch.number.to_string()); - let start_state = determine_download_start_state( - &download_path, - &patch.download_url, - patch.number, - &patch.hash, - ); + let download_path = + lifecycle::download_artifact_path(Path::new(&config.download_dir), patch.number); - // Ensure the download directory exists. - std::fs::create_dir_all(&download_dir) - .with_file_context(FileOperation::CreateDir, &download_dir)?; - - // Clean up any orphaned files in the download directory. We own this - // directory entirely, so anything that isn't for the current patch is - // stale (e.g. from a prior patch number, a crashed inflate, or a - // partial download for a patch that's since been replaced). - clean_download_dir(&download_dir, patch.number); - - let dl_result = match start_state { - DownloadStartState::Complete(size) => { - // Bytes from a prior attempt are already on disk and the sidecar - // says they're the expected size. Skip the network request and - // let the install path validate them — re-downloading would just - // waste bandwidth in cases like the app being killed between the - // download finishing and install starting. - shorebird_info!( - "Prior download already complete ({} bytes); skipping fetch.", - size - ); - DownloadResult { - total_bytes: size, - content_length: Some(size), + if !matches!(action, DownloadAction::Complete) { + let resume_from = match action { + DownloadAction::Resume { offset } => { + shorebird_info!("Resuming download from byte {}", offset); + offset } - } - DownloadStartState::Fresh | DownloadStartState::Resume(_) => { - // Write sidecar *before* downloading so we can resume on crash. - let dl_state = DownloadState { - url: patch.download_url.clone(), - patch_number: patch.number, - expected_size: None, - expected_hash: patch.hash.clone(), - }; - download_state::write_download_state(&download_path, &dl_state)?; - - let resume_from = match start_state { - DownloadStartState::Resume(offset) => { - shorebird_info!("Resuming download from byte {}", offset); - offset - } - _ => 0, - }; + _ => 0, + }; - // Consider supporting allowing the system to download for us (e.g. iOS). - let dl_result = download_to_path( - &config.network_hooks, + // Record `Downloading` *before* the network call so we can detect + // partial files on the next attempt and resume. + with_mut_state(|state| { + state.lifecycle_mut().record_download_started( + patch.number, &patch.download_url, - &download_path, - resume_from, - )?; - - // Record the actual on-disk size, not the server's Content-Length. - // For chunked transfer encoding the server doesn't send - // Content-Length, so dl_result.content_length would be None — and - // a subsequent crash-before-install attempt would then see - // expected_size: None plus a full-size file, fall through to - // Resume(file_size), and re-create the same HTTP 416 this PR - // fixes. Using total_bytes works for both chunked and - // Content-Length responses (they're equal in the latter case). - // - // KNOWN GAP: if the process dies between download_to_path - // returning above and this sidecar write succeeding, expected_size - // stays None from the pre-download write and we hit the 416 loop - // anyway. The window is microseconds; closing it requires - // restructuring (e.g. download_to_path writing the sidecar - // itself, or a single atomic per-patch state document) — tracked - // in shorebirdtech/shorebird#3737. - let dl_state = DownloadState { - expected_size: Some(dl_result.total_bytes), - ..dl_state - }; - download_state::write_download_state(&download_path, &dl_state)?; + &patch.hash, + patch.hash_signature.as_deref(), + ) + })?; - dl_result + if let Some(parent) = download_path.parent() { + std::fs::create_dir_all(parent).with_file_context(FileOperation::CreateDir, parent)?; } - }; - let output_path = download_dir.join(format!("{}.full", patch.number)); - // Once the download bytes are on disk, they will either be consumed - // (install success) or rejected (any failure). Either way the artifacts - // have served their purpose — clean them up unconditionally so a stuck - // partial file can't make a future Range request go past the end of the - // file and yield HTTP 416. - let result = install_downloaded_patch(&config, &patch, &dl_result, &download_path, output_path); - cleanup_download_artifacts(&download_path); - result + // Consider supporting allowing the system to download for us (e.g. iOS). + let dl_result = download_to_path( + &config.network_hooks, + &patch.download_url, + &download_path, + resume_from, + )?; + + // Server-side bug check: when the server told us Content-Length + // ahead of time but delivered a different number of bytes, + // surface the contract violation directly instead of letting it + // surface obliquely through inflate failure. + if let Some(expected) = dl_result.content_length { + if dl_result.total_bytes != expected { + let _ = with_mut_state(|state| state.uninstall_patch(patch.number)); + bail!( + "Download size mismatch: expected {} bytes, got {}", + expected, + dl_result.total_bytes + ); + } + } + + // Transition to `Downloaded`. The recorded size is the actual + // on-disk byte count, not the server's Content-Length — chunked + // responses don't send the latter and the lifecycle relies on + // the state itself (not size comparisons) to distinguish + // complete from partial. + with_mut_state(|state| { + state + .lifecycle_mut() + .record_download_complete(patch.number, dl_result.total_bytes) + })?; + } else { + shorebird_info!("Prior download already complete; skipping fetch."); + } + + install_downloaded_patch(&config, &patch, &download_path) } -/// Consumes the bytes at `download_path`: validates the size, inflates them, -/// hash-checks the result, and installs the patch. Returns `UpdateInstalled` -/// on success. -/// -/// Caller is responsible for cleaning up `download_path` artifacts after this -/// returns, regardless of outcome. +/// Inflates the compressed bytes at `download_path` into the lifecycle's +/// installed location, hash-checks the result, and transitions the patch +/// from `Downloaded` to `Installed`. Marks the patch `Bad` (with the +/// appropriate reason) on inflate or hash-check failure so the next +/// update cycle short-circuits via `decide_start`. fn install_downloaded_patch( config: &UpdateConfig, patch: &crate::network::Patch, - dl_result: &DownloadResult, download_path: &Path, - output_path: PathBuf, ) -> anyhow::Result { - // Validate download size if Content-Length was provided. - if let Some(expected) = dl_result.content_length { - if dl_result.total_bytes != expected { - bail!( - "Download size mismatch: expected {} bytes, got {}", - expected, - dl_result.total_bytes - ); - } - } + let installed_path = + lifecycle::installed_artifact_path(Path::new(&config.storage_dir), patch.number); let patch_base_rs = patch_base(config)?; - inflate(download_path, patch_base_rs, &output_path)?; + if let Err(e) = inflate(download_path, patch_base_rs, &installed_path) { + // Inflate failed — bytes won't decompress. Deterministically bad. + mark_patch_bad(patch.number, BadReason::InvalidPatchBytes); + return Err(e); + } - // Check the hash before moving into place. - check_hash(&output_path, &patch.hash).with_context(|| { + if let Err(e) = check_hash(&installed_path, &patch.hash).with_context(|| { format!( "This app reports version {}, but the binary is different from \ the version {} that was submitted to Shorebird.", config.release_version, config.release_version ) - })?; + }) { + // Hash mismatch — bytes decompressed but produced the wrong + // result. Most often a customer build/release mismatch on their + // side; deterministic on this device. + mark_patch_bad(patch.number, BadReason::InstallHashMismatch); + return Err(e); + } + + let installed_size = std::fs::metadata(&installed_path)?.len(); - // We're abusing the config lock as a UpdateState lock for now. - // This makes it so we never try to write to the UpdateState file from - // two threads at once. We could give UpdateState its own lock instead. with_mut_state(|state| { - let patch_info = PatchInfo { - path: output_path, - number: patch.number, - }; - // Move/state update should be "atomic" (it isn't today). - state.install_patch(&patch_info, &patch.hash, patch.hash_signature.as_deref())?; + state + .lifecycle_mut() + .record_install_complete(patch.number, installed_size)?; + state.lifecycle_mut().promote_to_next_boot(patch.number)?; shorebird_info!( "Patch {} successfully downloaded. It will be launched when the app next restarts.", patch.number ); let client_id = state.client_id(); - let config = config.clone(); + let config_clone = config.clone(); let patch_number = patch.number; std::thread::spawn(move || { let event = PatchEvent::new( - &config, + &config_clone, EventType::PatchDownload, patch_number, client_id, None, ); - let report_result = crate::network::send_patch_event(event, &config); - if let Err(err) = report_result { + if let Err(err) = crate::network::send_patch_event(event, &config_clone) { shorebird_error!("Failed to report patch download: {:?}", err); } }); - // Should set some state to say the status is "update required" and that - // we now have a different "next" version of the app from the current - // booted version (patched or not). Ok(UpdateStatus::UpdateInstalled) }) } +/// Marks `patch_number` Bad{reason} and recomputes `next_boot_patch`. +/// Errors are logged but not propagated — the caller is already +/// returning the underlying install error and the state-machine +/// bookkeeping is best-effort relative to that. +fn mark_patch_bad(patch_number: usize, reason: BadReason) { + let result = with_mut_state(|state| { + state.lifecycle_mut().mark_bad(patch_number, reason)?; + state.lifecycle_mut().recompute_next_boot() + }); + if let Err(e) = result { + shorebird_error!( + "Failed to mark patch {} bad after install failure: {:?}", + patch_number, + e + ); + } +} + fn roll_back_patches_if_needed(patch_numbers: Vec) -> anyhow::Result<()> { with_mut_state(|state| { for patch_number in patch_numbers { @@ -622,141 +620,6 @@ fn roll_back_patches_if_needed(patch_numbers: Vec) -> anyhow::Result<()> }) } -fn should_install_patch(patch_number: usize) -> Result { - // Don't install a patch if it has previously failed to boot. - let is_known_bad_patch = with_state(|state| Ok(state.is_known_bad_patch(patch_number)))?; - if is_known_bad_patch { - shorebird_info!( - "Patch {} has previously failed to boot, skipping.", - patch_number - ); - return Ok(ShouldInstallPatchCheckResult::PatchKnownBad); - } - - // If we already have the latest available patch downloaded, we don't need to download it again. - let next_boot_patch = with_mut_state(|state| Ok(state.next_boot_patch()))?; - if let Some(next_boot_patch) = next_boot_patch { - if next_boot_patch.number == patch_number { - shorebird_info!("Patch {} is already installed, skipping.", patch_number); - return Ok(ShouldInstallPatchCheckResult::PatchAlreadyInstalled); - } - } - - Ok(ShouldInstallPatchCheckResult::PatchOkToInstall) -} - -/// What the prior attempt left on disk, and what to do about it. -#[derive(Debug, PartialEq)] -enum DownloadStartState { - /// No usable prior artifacts — request the full file. - Fresh, - /// Partial file from an interrupted prior attempt. Send a Range header - /// from this byte offset to ask the server for the remainder. - Resume(u64), - /// Prior attempt finished downloading but didn't finish installing - /// (e.g. the process was killed between download and inflate, or - /// inflate/install failed). Skip the network request and let the - /// install path validate the bytes already on disk. - Complete(u64), -} - -/// Inspects the sidecar and partial file to decide how the next download -/// attempt should begin. Read-only — does not modify on-disk state. -fn determine_download_start_state( - download_path: &Path, - url: &str, - patch_number: usize, - expected_hash: &str, -) -> DownloadStartState { - let prior_state = match download_state::read_download_state(download_path) { - Ok(Some(state)) => state, - _ => return DownloadStartState::Fresh, - }; - - // The hash check catches the case where a patch is deleted and re-added - // with the same number — the URL might stay the same but the content - // differs. - if prior_state.url != url - || prior_state.patch_number != patch_number - || prior_state.expected_hash != expected_hash - { - shorebird_info!("Download state mismatch, starting fresh."); - return DownloadStartState::Fresh; - } - - let file_size = match std::fs::metadata(download_path) { - Ok(meta) if meta.len() > 0 => meta.len(), - _ => return DownloadStartState::Fresh, - }; - - // If the prior attempt's bytes look complete, skip the network request. - // The install path will validate them; if they're bad it will fail and - // the unconditional cleanup at the end of `update_internal` will wipe - // them so the next attempt re-downloads. - if let Some(expected) = prior_state.expected_size { - if file_size >= expected { - return DownloadStartState::Complete(file_size); - } - } - - DownloadStartState::Resume(file_size) -} - -/// Removes everything in `download_dir` except files belonging to -/// `current_patch_number`. We own this directory entirely, so anything -/// unrecognized or from a different patch number is safe to delete. -fn clean_download_dir(download_dir: &Path, current_patch_number: usize) { - let entries = match fs::read_dir(download_dir) { - Ok(entries) => entries, - Err(_) => return, // Directory may not exist yet. - }; - - let current_prefix = current_patch_number.to_string(); - for entry in entries.flatten() { - let file_name = entry.file_name(); - let name = file_name.to_string_lossy(); - - // Keep files that belong to the current patch: - // "{number}", "{number}.full", "{number}.download.json" - if name == current_prefix - || name == format!("{current_prefix}.full") - || name == format!("{current_prefix}.download.json") - { - continue; - } - - // Everything else is an orphan — delete it. - let path = entry.path(); - if path.is_file() { - if let Err(e) = fs::remove_file(&path) { - shorebird_error!("Failed to clean up orphaned file {:?}: {:?}", path, e); - } else { - shorebird_info!("Cleaned up orphaned download file: {:?}", path); - } - } - } -} - -/// Removes the compressed download file and its sidecar. -/// -/// KNOWN GAP: delete errors are logged but not propagated. If a delete -/// silently fails (e.g. transient filesystem error), the stale artifacts -/// persist into the next cycle and could re-create the same 416 loop this -/// PR fixes — `determine_download_start_state` would still see a sidecar -/// with `expected_size` matching the file and try to resume against a -/// resource that's no longer the right one. Disk delete failures are rare -/// in practice; the proper fix lives in shorebirdtech/shorebird#3737. -fn cleanup_download_artifacts(download_path: &Path) { - if let Err(e) = download_state::delete_download_state(download_path) { - shorebird_error!("Failed to delete download sidecar: {:?}", e); - } - if download_path.exists() { - if let Err(e) = std::fs::remove_file(download_path) { - shorebird_error!("Failed to delete download file: {:?}", e); - } - } -} - /// Synchronously checks for an update and downloads and installs it if available. pub fn update(channel: Option<&str>) -> anyhow::Result { match with_updater_thread_lock(|lock_state| update_internal(lock_state, channel)) { @@ -936,6 +799,7 @@ pub fn report_launch_failure() -> anyhow::Result<()> { with_config(|config| { let mut state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, @@ -973,6 +837,7 @@ pub fn report_launch_success() -> anyhow::Result<()> { // and make that the "current" patch. let mut state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, @@ -1186,6 +1051,7 @@ mod tests { with_config(|config| { let mut state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, @@ -1483,6 +1349,7 @@ patch_verification: bogus_mode with_config(|config| { let state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, @@ -1514,6 +1381,7 @@ patch_verification: bogus_mode with_config(|config| { let mut state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, @@ -1702,6 +1570,7 @@ patch_verification: bogus_mode let mut updater_state = with_config(|config| { let mut state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, @@ -1787,6 +1656,7 @@ patch_verification: bogus_mode with_config(|config| { let mut state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, @@ -1819,6 +1689,7 @@ patch_verification: bogus_mode with_config(|config| { let state = UpdaterState::load_or_new_on_error( &config.storage_dir, + &config.download_dir, &config.release_version, config.patch_public_key.as_deref(), config.patch_verification, @@ -1830,285 +1701,6 @@ patch_verification: bogus_mode .unwrap(); } - use super::DownloadStartState; - - fn write_state(download_path: &Path, expected_size: Option) { - crate::download_state::write_download_state( - download_path, - &crate::download_state::DownloadState { - url: "http://example.com/patch".to_string(), - patch_number: 1, - expected_size, - expected_hash: "abc123".to_string(), - }, - ) - .unwrap(); - } - - #[test] - fn download_start_state_no_sidecar() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("downloads/1"); - fs::create_dir_all(download_path.parent().unwrap()).unwrap(); - - assert_eq!( - super::determine_download_start_state( - &download_path, - "http://example.com/patch", - 1, - "abc123" - ), - DownloadStartState::Fresh - ); - } - - #[test] - fn download_start_state_matching_sidecar_resumes() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("downloads/1"); - fs::create_dir_all(download_path.parent().unwrap()).unwrap(); - - fs::write(&download_path, vec![0u8; 500]).unwrap(); - write_state(&download_path, Some(1000)); - - assert_eq!( - super::determine_download_start_state( - &download_path, - "http://example.com/patch", - 1, - "abc123" - ), - DownloadStartState::Resume(500) - ); - } - - #[test] - fn download_start_state_mismatched_url_starts_fresh() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("downloads/1"); - fs::create_dir_all(download_path.parent().unwrap()).unwrap(); - - fs::write(&download_path, vec![0u8; 500]).unwrap(); - crate::download_state::write_download_state( - &download_path, - &crate::download_state::DownloadState { - url: "http://example.com/old-patch".to_string(), - patch_number: 1, - expected_size: None, - expected_hash: "abc123".to_string(), - }, - ) - .unwrap(); - - assert_eq!( - super::determine_download_start_state( - &download_path, - "http://example.com/new-patch", - 1, - "abc123" - ), - DownloadStartState::Fresh - ); - } - - #[test] - fn download_start_state_mismatched_hash_starts_fresh() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("downloads/1"); - fs::create_dir_all(download_path.parent().unwrap()).unwrap(); - - fs::write(&download_path, vec![0u8; 500]).unwrap(); - crate::download_state::write_download_state( - &download_path, - &crate::download_state::DownloadState { - url: "http://example.com/patch".to_string(), - patch_number: 1, - expected_size: None, - expected_hash: "hash_old".to_string(), - }, - ) - .unwrap(); - - assert_eq!( - super::determine_download_start_state( - &download_path, - "http://example.com/patch", - 1, - "hash_new" - ), - DownloadStartState::Fresh - ); - } - - #[test] - fn download_start_state_mismatched_patch_number_starts_fresh() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("downloads/1"); - fs::create_dir_all(download_path.parent().unwrap()).unwrap(); - - fs::write(&download_path, vec![0u8; 500]).unwrap(); - write_state(&download_path, None); - - assert_eq!( - super::determine_download_start_state( - &download_path, - "http://example.com/patch", - 2, - "abc123" - ), - DownloadStartState::Fresh - ); - } - - #[test] - fn download_start_state_corrupt_sidecar_starts_fresh() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("downloads/1"); - fs::create_dir_all(download_path.parent().unwrap()).unwrap(); - - fs::write(&download_path, vec![0u8; 500]).unwrap(); - let sidecar = crate::download_state::sidecar_path(&download_path); - fs::write(&sidecar, "not valid json").unwrap(); - - assert_eq!( - super::determine_download_start_state( - &download_path, - "http://example.com/patch", - 1, - "abc123" - ), - DownloadStartState::Fresh - ); - } - - #[test] - fn download_start_state_empty_file_starts_fresh() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("downloads/1"); - fs::create_dir_all(download_path.parent().unwrap()).unwrap(); - - fs::write(&download_path, []).unwrap(); - write_state(&download_path, None); - - assert_eq!( - super::determine_download_start_state( - &download_path, - "http://example.com/patch", - 1, - "abc123" - ), - DownloadStartState::Fresh - ); - } - - #[test] - fn download_start_state_full_file_reports_complete() { - // Reproduces the customer's stuck state: a prior attempt finished - // downloading (file size == expected_size) but install failed (or the - // app was killed before install). The function should report Complete - // so the caller skips the network request and validates the bytes. - // It must NOT delete anything — that's the caller's job after install. - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("downloads/1"); - fs::create_dir_all(download_path.parent().unwrap()).unwrap(); - - fs::write(&download_path, vec![0u8; 1000]).unwrap(); - write_state(&download_path, Some(1000)); - - assert_eq!( - super::determine_download_start_state( - &download_path, - "http://example.com/patch", - 1, - "abc123" - ), - DownloadStartState::Complete(1000) - ); - - // determine_download_start_state must be read-only. - assert!(download_path.exists()); - assert!(crate::download_state::sidecar_path(&download_path).exists()); - } - - #[test] - fn cleanup_download_artifacts_removes_file_and_sidecar() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("downloads/1"); - fs::create_dir_all(download_path.parent().unwrap()).unwrap(); - - fs::write(&download_path, b"partial data").unwrap(); - crate::download_state::write_download_state( - &download_path, - &crate::download_state::DownloadState { - url: "http://example.com/patch".to_string(), - patch_number: 1, - expected_size: None, - expected_hash: "abc123".to_string(), - }, - ) - .unwrap(); - - let sidecar = crate::download_state::sidecar_path(&download_path); - assert!(download_path.exists()); - assert!(sidecar.exists()); - - super::cleanup_download_artifacts(&download_path); - - assert!(!download_path.exists()); - assert!(!sidecar.exists()); - } - - #[test] - fn cleanup_download_artifacts_noop_when_missing() { - let tmp = TempDir::new().unwrap(); - let download_path = tmp.path().join("downloads/1"); - // Should not panic when files don't exist. - super::cleanup_download_artifacts(&download_path); - } - - #[test] - fn clean_download_dir_removes_orphans_keeps_current() { - let tmp = TempDir::new().unwrap(); - let download_dir = tmp.path().join("downloads"); - fs::create_dir_all(&download_dir).unwrap(); - - // Files for current patch (number 3) — should be kept. - fs::write(download_dir.join("3"), b"compressed").unwrap(); - fs::write(download_dir.join("3.full"), b"inflated").unwrap(); - fs::write(download_dir.join("3.download.json"), b"{}").unwrap(); - - // Files for old patches — should be deleted. - fs::write(download_dir.join("1"), b"old compressed").unwrap(); - fs::write(download_dir.join("1.full"), b"old inflated").unwrap(); - fs::write(download_dir.join("1.download.json"), b"{}").unwrap(); - fs::write(download_dir.join("2"), b"old compressed").unwrap(); - - // Unrecognized file — should be deleted. - fs::write(download_dir.join("garbage.tmp"), b"junk").unwrap(); - - super::clean_download_dir(&download_dir, 3); - - // Current patch files preserved. - assert!(download_dir.join("3").exists()); - assert!(download_dir.join("3.full").exists()); - assert!(download_dir.join("3.download.json").exists()); - - // Old and unrecognized files removed. - assert!(!download_dir.join("1").exists()); - assert!(!download_dir.join("1.full").exists()); - assert!(!download_dir.join("1.download.json").exists()); - assert!(!download_dir.join("2").exists()); - assert!(!download_dir.join("garbage.tmp").exists()); - } - - #[test] - fn clean_download_dir_noop_when_dir_missing() { - let tmp = TempDir::new().unwrap(); - let download_dir = tmp.path().join("nonexistent"); - // Should not panic. - super::clean_download_dir(&download_dir, 1); - } - #[serial] #[test] fn successful_update_cleans_up_download_artifacts() -> anyhow::Result<()> { @@ -2156,14 +1748,18 @@ patch_verification: bogus_mode let result = super::update(None)?; assert_eq!(result, crate::UpdateStatus::UpdateInstalled); - // After successful install, compressed download and sidecar should be cleaned up. - let download_path = tmp_dir.path().join("downloads/1"); - let sidecar_path = crate::download_state::sidecar_path(&download_path); - assert!( - !download_path.exists(), - "compressed download should be deleted" - ); - assert!(!sidecar_path.exists(), "sidecar should be deleted"); + // After successful install, the compressed download bytes are + // gone (record_install_complete deletes them) and the patch's + // state.json reads `Installed`. + let patch_dir = tmp_dir.path().join("patches/1"); + assert!(!tmp_dir.path().join("downloads/1").exists()); + assert!(patch_dir.join("dlc.vmcode").exists()); + let state: crate::cache::lifecycle::PatchState = + crate::cache::disk_io::read(&patch_dir.join("state.json")).unwrap(); + assert!(matches!( + state, + crate::cache::lifecycle::PatchState::Installed { .. } + )); Ok(()) } @@ -2211,11 +1807,10 @@ patch_verification: bogus_mode "Expected size mismatch error, got: {err}" ); - // Verify artifacts were cleaned up after the mismatch. - let download_path = tmp_dir.path().join("downloads/1"); - let sidecar_path = crate::download_state::sidecar_path(&download_path); - assert!(!download_path.exists(), "download should be cleaned up"); - assert!(!sidecar_path.exists(), "sidecar should be cleaned up"); + // Server contract violation: the in-progress download was + // forgotten entirely (uninstall_patch). The next attempt starts + // fresh. + assert!(!tmp_dir.path().join("patches/1").exists()); Ok(()) } @@ -2334,20 +1929,23 @@ patch_verification: bogus_mode let apk_path = tmp_dir.path().join("base.apk"); write_fake_apk(apk_path.to_str().unwrap(), base.as_bytes()); - // Simulate a prior partial download: write the first 10 bytes + sidecar. - let download_dir = tmp_dir.path().join("downloads"); - fs::create_dir_all(&download_dir).unwrap(); - let download_path = download_dir.join("1"); - fs::write(&download_path, first_part).unwrap(); - crate::download_state::write_download_state( - &download_path, - &crate::download_state::DownloadState { + // Simulate a prior partial download: write the first 10 bytes + // (in the cache-rooted download dir) and a Downloading + // state.json (in the persistent state-rooted patch dir) that + // points at the same URL/hash. + let patch_dir = tmp_dir.path().join("patches/1"); + fs::create_dir_all(&patch_dir).unwrap(); + let downloads_dir = tmp_dir.path().join("downloads"); + fs::create_dir_all(&downloads_dir).unwrap(); + fs::write(downloads_dir.join("1"), first_part).unwrap(); + crate::cache::disk_io::write( + &crate::cache::lifecycle::PatchState::Downloading { url: download_url.to_string(), - patch_number: 1, - expected_size: None, - expected_hash: "bb8f1d041a5cdc259055afe9617136799543e0a7a86f86db82f8c1fadbd8cc45" + hash: "bb8f1d041a5cdc259055afe9617136799543e0a7a86f86db82f8c1fadbd8cc45" .to_string(), + signature: None, }, + &patch_dir.join("state.json"), ) .unwrap(); @@ -2410,18 +2008,18 @@ patch_verification: bogus_mode write_fake_apk(apk_path.to_str().unwrap(), base.as_bytes()); // Simulate prior partial download with a DIFFERENT URL. - let download_dir = tmp_dir.path().join("downloads"); - fs::create_dir_all(&download_dir).unwrap(); - let download_path = download_dir.join("1"); - fs::write(&download_path, b"stale data from old url").unwrap(); - crate::download_state::write_download_state( - &download_path, - &crate::download_state::DownloadState { + let patch_dir = tmp_dir.path().join("patches/1"); + fs::create_dir_all(&patch_dir).unwrap(); + let downloads_dir = tmp_dir.path().join("downloads"); + fs::create_dir_all(&downloads_dir).unwrap(); + fs::write(downloads_dir.join("1"), b"stale data from old url").unwrap(); + crate::cache::disk_io::write( + &crate::cache::lifecycle::PatchState::Downloading { url: "http://old-cdn.example.com/patch/1".to_string(), - patch_number: 1, - expected_size: None, - expected_hash: "hash_old".to_string(), + hash: "hash_old".to_string(), + signature: None, }, + &patch_dir.join("state.json"), ) .unwrap(); @@ -2901,19 +2499,20 @@ mod state_recovery_tests { Ok(()) })?; - // Corrupt the patches_state.json file. - let patches_state_path = tmp_dir.path().join("patches_state.json"); + // Corrupt the pointers.json file. + let pointers_path = tmp_dir.path().join("pointers.json"); assert!( - patches_state_path.exists(), - "patches_state.json should exist before we corrupt it" + pointers_path.exists(), + "pointers.json should exist before we corrupt it" ); - std::fs::write(&patches_state_path, "{{{{not json at all")?; + std::fs::write(&pointers_path, "{{{{not json at all")?; // Reinitialize — should recover gracefully. init_for_testing(&tmp_dir, None); with_mut_state(|state| { - // Corrupt state means we lose knowledge of the patch. + // Corrupt pointers means we lose knowledge of which patch + // to boot. assert!( state.next_boot_patch().is_none(), "Expected no next_boot_patch after state corruption" @@ -2924,7 +2523,7 @@ mod state_recovery_tests { Ok(()) } - /// When patches_state.json is deleted but patch artifacts remain on disk, + /// When pointers.json is deleted but patch artifacts remain on disk, /// the updater should not try to boot from orphaned artifacts. #[serial] #[test] @@ -2936,15 +2535,15 @@ mod state_recovery_tests { report_launch_start()?; report_launch_success()?; - // Delete patches_state.json but leave artifacts. - let patches_state_path = tmp_dir.path().join("patches_state.json"); - std::fs::remove_file(&patches_state_path)?; + // Delete pointers.json but leave per-patch artifacts. + let pointers_path = tmp_dir.path().join("pointers.json"); + std::fs::remove_file(&pointers_path)?; // Reinitialize. init_for_testing(&tmp_dir, None); with_mut_state(|state| { - // Without state, we shouldn't boot from any patch. + // Without pointers, we shouldn't boot from any patch. assert!(state.next_boot_patch().is_none()); assert!(state.last_successfully_booted_patch().is_none()); Ok(()) @@ -3117,16 +2716,13 @@ mod state_recovery_tests { install_fake_patch(1)?; report_launch_start()?; - // Manually clear boot_started_at from the state file to simulate - // an old state format that doesn't have this field. - let state_path = tmp_dir.path().join("patches_state.json"); - let state_json = std::fs::read_to_string(&state_path)?; - let mut state_value: serde_json::Value = serde_json::from_str(&state_json)?; - state_value - .as_object_mut() - .unwrap() - .remove("boot_started_at"); - std::fs::write(&state_path, serde_json::to_string(&state_value)?)?; + // Manually clear boot_started_at from the pointers file to + // simulate an old state format that doesn't have this field. + let pointers_path = tmp_dir.path().join("pointers.json"); + let json = std::fs::read_to_string(&pointers_path)?; + let mut value: serde_json::Value = serde_json::from_str(&json)?; + value.as_object_mut().unwrap().remove("boot_started_at"); + std::fs::write(&pointers_path, serde_json::to_string(&value)?)?; // Reinitialize — triggers crash recovery. init_for_testing(&tmp_dir, None); @@ -3193,13 +2789,18 @@ mod state_recovery_tests { let original_client_id = with_state(|state| Ok(state.client_id()))?; - // Corrupt patches_state.json only. - let patches_state_path = tmp_dir.path().join("patches_state.json"); + // Install a patch so pointers.json gets written (it's lazy + // otherwise — load_or_default doesn't write a file until a + // pointer is set). + install_fake_patch(1)?; + + // Corrupt pointers.json only. + let pointers_path = tmp_dir.path().join("pointers.json"); assert!( - patches_state_path.exists(), - "patches_state.json should exist before we corrupt it" + pointers_path.exists(), + "pointers.json should exist before we corrupt it" ); - std::fs::write(&patches_state_path, "corrupt")?; + std::fs::write(&pointers_path, "corrupt")?; // Reinitialize — state.json is fine, so client_id should survive. init_for_testing(&tmp_dir, None); @@ -3589,20 +3190,20 @@ mod resume_edge_case_tests { let apk_path = tmp_dir.path().join("base.apk"); write_fake_apk(apk_path.to_str().unwrap(), base.as_bytes()); - // Create sidecar without corresponding partial file. - let download_dir = tmp_dir.path().join("downloads"); - fs::create_dir_all(&download_dir)?; - let download_path = download_dir.join("1"); - crate::download_state::write_download_state( - &download_path, - &crate::download_state::DownloadState { + // Create the lifecycle state.json without a corresponding + // partial file (the bytes were deleted out from under us). + let patch_dir = tmp_dir.path().join("patches/1"); + fs::create_dir_all(&patch_dir)?; + crate::cache::disk_io::write( + &crate::cache::lifecycle::PatchState::Downloaded { url: "http://example.com/patch/1".to_string(), - patch_number: 1, - expected_size: Some(31), - expected_hash: PATCH_HASH.to_string(), + hash: PATCH_HASH.to_string(), + signature: None, + size: 31, }, + &patch_dir.join("state.json"), )?; - // Note: download_path itself does NOT exist — file was deleted. + // Note: patches/1/download itself does NOT exist — file was deleted. setup_hooks_with_download(|_url, dest: &Path, resume_from: u64| { // Should start fresh since partial file is missing. @@ -3637,19 +3238,20 @@ mod resume_edge_case_tests { let apk_path = tmp_dir.path().join("base.apk"); write_fake_apk(apk_path.to_str().unwrap(), base.as_bytes()); - // Pre-create a partial download and sidecar. - let download_dir = tmp_dir.path().join("downloads"); - fs::create_dir_all(&download_dir)?; - let download_path = download_dir.join("1"); - fs::write(&download_path, &PATCH_BYTES[..10])?; - crate::download_state::write_download_state( - &download_path, - &crate::download_state::DownloadState { + // Pre-create a partial download (in cache-rooted downloads/) + // and Downloading state.json (in state-rooted patches/). + let patch_dir = tmp_dir.path().join("patches/1"); + fs::create_dir_all(&patch_dir)?; + let downloads_dir = tmp_dir.path().join("downloads"); + fs::create_dir_all(&downloads_dir)?; + fs::write(downloads_dir.join("1"), &PATCH_BYTES[..10])?; + crate::cache::disk_io::write( + &crate::cache::lifecycle::PatchState::Downloading { url: "http://example.com/patch/1".to_string(), - patch_number: 1, - expected_size: None, - expected_hash: PATCH_HASH.to_string(), + hash: PATCH_HASH.to_string(), + signature: None, }, + &patch_dir.join("state.json"), )?; setup_hooks_with_download(|_url, dest: &Path, _resume_from: u64| { @@ -3694,17 +3296,17 @@ mod resume_edge_case_tests { let result = crate::update(None); assert!(result.is_err()); - // The sidecar should have been written before the download started. - let download_path = tmp_dir.path().join("downloads/1"); - let sidecar_path = crate::download_state::sidecar_path(&download_path); + // The lifecycle state.json was written before the download + // started; both it (state-rooted) and the partial file + // (cache-rooted) survive the network error so the next attempt + // can resume. + let patch_dir = tmp_dir.path().join("patches/1"); assert!( - sidecar_path.exists(), - "Sidecar should survive a download failure for retry" + patch_dir.join("state.json").exists(), + "state.json should survive a download failure for retry" ); - - // The partial file should still exist. assert!( - download_path.exists(), + tmp_dir.path().join("downloads/1").exists(), "Partial download should survive for resume" ); @@ -3782,16 +3384,25 @@ mod resume_edge_case_tests { let result = crate::update(None); assert!(result.is_err()); - let download_path = tmp_dir.path().join("downloads/1"); - let sidecar_path = crate::download_state::sidecar_path(&download_path); + // Inflate failure marks the patch Bad{InvalidPatchBytes}: the + // tombstone state.json survives, but artifact files are gone so + // the next attempt either short-circuits (Skip(KnownBad)) or + // can re-download from a clean slate. + let patch_dir = tmp_dir.path().join("patches/1"); + assert!(patch_dir.join("state.json").exists(), "tombstone preserved"); assert!( - !download_path.exists(), - "download file should be cleaned up after inflate failure" - ); - assert!( - !sidecar_path.exists(), - "sidecar should be cleaned up after inflate failure" + !tmp_dir.path().join("downloads/1").exists(), + "download artifact removed on inflate failure" ); + let state: crate::cache::lifecycle::PatchState = + crate::cache::disk_io::read(&patch_dir.join("state.json")).unwrap(); + assert!(matches!( + state, + crate::cache::lifecycle::PatchState::Bad { + reason: crate::cache::lifecycle::BadReason::InvalidPatchBytes, + .. + } + )); Ok(()) } @@ -3817,19 +3428,22 @@ mod resume_edge_case_tests { let apk_path = tmp_dir.path().join("base.apk"); write_fake_apk(apk_path.to_str().unwrap(), base.as_bytes()); - // Simulate the stuck state: full-size partial file plus matching sidecar. - let download_dir = tmp_dir.path().join("downloads"); - fs::create_dir_all(&download_dir)?; - let download_path = download_dir.join("1"); - fs::write(&download_path, PATCH_BYTES)?; - crate::download_state::write_download_state( - &download_path, - &crate::download_state::DownloadState { + // Simulate the post-download / pre-install state: full-size + // download file (cache-rooted) plus a `Downloaded` state.json + // (state-rooted). + let patch_dir = tmp_dir.path().join("patches/1"); + fs::create_dir_all(&patch_dir)?; + let downloads_dir = tmp_dir.path().join("downloads"); + fs::create_dir_all(&downloads_dir)?; + fs::write(downloads_dir.join("1"), PATCH_BYTES)?; + crate::cache::disk_io::write( + &crate::cache::lifecycle::PatchState::Downloaded { url: "http://example.com/patch/1".to_string(), - patch_number: 1, - expected_size: Some(PATCH_BYTES.len() as u64), - expected_hash: PATCH_HASH.to_string(), + hash: PATCH_HASH.to_string(), + signature: None, + size: PATCH_BYTES.len() as u64, }, + &patch_dir.join("state.json"), )?; // The download hook must NOT be called — bytes are already on disk.