Skip to content

Commit 4e93c50

Browse files
committed
Verify cached digest against file
And in case of failure remove the cached assets, they'll be re-downloaded and if needed. Currently re-fetching any kind of file truncates the pre-existing file on disk, so this should be equivalent.
1 parent d2c037e commit 4e93c50

File tree

3 files changed

+56
-16
lines changed

3 files changed

+56
-16
lines changed

data/src/cache.rs

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,29 @@ use std::path::{Path, PathBuf};
55
use chrono::Utc;
66
use derive_more::AsRef;
77
use serde::{Deserialize, Serialize, de::DeserializeOwned};
8+
use sha2::{Digest, Sha256};
89
use tokio::fs;
10+
use tokio_stream::StreamExt;
11+
use tokio_util::io::ReaderStream;
912
use url::Url;
1013

1114
pub use trim::TrimConfig;
1215

1316
/// SHA256 digest of cache content.
1417
#[derive(Debug, Clone, Serialize, Deserialize, AsRef)]
15-
pub struct Digest(String);
18+
pub struct HexDigest(String);
1619

17-
impl Digest {
20+
impl HexDigest {
1821
pub fn new(data: &[u8]) -> Self {
1922
Self(hex::encode(data))
2023
}
2124
}
2225

26+
#[derive(Debug, Clone, Copy)]
27+
pub struct Asset<'a>(pub &'a Path, pub &'a HexDigest);
28+
2329
pub trait CachedAsset {
24-
fn paths(&self) -> Vec<&Path>;
30+
fn assets(&self) -> Vec<Asset<'_>>;
2531
}
2632

2733
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -51,9 +57,9 @@ impl FileCache {
5157
let state: CacheState<T> = serde_json::from_slice(&bytes).ok()?;
5258

5359
if let CacheState::Ok(ref asset) = state {
54-
let any_missing = asset.paths().iter().any(|p| !p.exists());
55-
if any_missing {
56-
// If any of the asset's files are missing, treat the cache as invalid.
60+
let assets = asset.assets();
61+
if verify_assets(&assets).await.is_none() {
62+
remove_assets(&assets).await;
5763
return None;
5864
}
5965
}
@@ -90,7 +96,7 @@ impl FileCache {
9096
.join(format!("{hash}.json"))
9197
}
9298

93-
pub fn blob_path(&self, digest: &Digest, ext: &str) -> PathBuf {
99+
pub fn blob_path(&self, digest: &HexDigest, ext: &str) -> PathBuf {
94100
let hash = digest.as_ref();
95101

96102
blob_dir_from_root(&self.root)
@@ -113,3 +119,36 @@ impl FileCache {
113119
pub fn blob_dir_from_root(root: &Path) -> PathBuf {
114120
root.join("blobs")
115121
}
122+
123+
async fn hash_file(file: fs::File) -> Option<HexDigest> {
124+
let mut stream = ReaderStream::new(file);
125+
let mut hasher = Sha256::new();
126+
127+
while let Some(item) = stream.next().await {
128+
let chunk = item.ok()?;
129+
hasher.update(&chunk);
130+
}
131+
132+
Some(HexDigest::new(&hasher.finalize()))
133+
}
134+
135+
async fn verify_assets(assets: &[Asset<'_>]) -> Option<()> {
136+
for Asset(path, digest) in assets {
137+
// Check if the file actually exists
138+
let file = fs::File::open(path).await.ok()?;
139+
140+
// Check if the file content matches the expected digest
141+
let actual_digest = hash_file(file).await?;
142+
if actual_digest.as_ref() != digest.as_ref() {
143+
return None;
144+
}
145+
}
146+
147+
Some(())
148+
}
149+
150+
async fn remove_assets(assets: &[Asset<'_>]) {
151+
for Asset(path, _) in assets {
152+
let _ = fs::remove_file(path).await;
153+
}
154+
}

data/src/preview.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use std::collections::HashMap;
22
use std::io;
3-
use std::path::Path;
43
use std::sync::{Arc, LazyLock, OnceLock};
54
use std::time::Duration;
65

@@ -19,7 +18,7 @@ use url::Url;
1918

2019
pub use self::card::Card;
2120
pub use self::image::Image;
22-
use crate::cache::{self, CacheState, CachedAsset, FileCache};
21+
use crate::cache::{self, Asset, CacheState, CachedAsset, FileCache};
2322
use crate::message::Source;
2423
use crate::server::Server;
2524
use crate::target::{self, TargetRef};
@@ -125,10 +124,12 @@ impl Preview {
125124
}
126125

127126
impl CachedAsset for Preview {
128-
fn paths(&self) -> Vec<&Path> {
127+
fn assets(&self) -> Vec<Asset<'_>> {
129128
match self {
130-
Preview::Card(c) => vec![c.image.path.as_path()],
131-
Preview::Image(i) => vec![i.path.as_path()],
129+
Preview::Card(c) => {
130+
vec![Asset(c.image.path.as_path(), &c.image.digest)]
131+
}
132+
Preview::Image(i) => vec![Asset(i.path.as_path(), &i.digest)],
132133
}
133134
}
134135
}
@@ -321,7 +322,7 @@ async fn fetch(
321322
written += chunk.len();
322323
}
323324

324-
let digest = cache::Digest::new(&hasher.finalize());
325+
let digest = cache::HexDigest::new(&hasher.finalize());
325326
let image_path =
326327
cache.blob_path(&digest, format.extensions_str()[0]);
327328

data/src/preview/image.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::path::PathBuf;
33
use serde::{Deserialize, Serialize};
44
use url::Url;
55

6-
use crate::cache::Digest;
6+
use crate::cache::HexDigest;
77

88
pub type Format = image::ImageFormat;
99
pub type Error = image::ImageError;
@@ -13,15 +13,15 @@ pub struct Image {
1313
#[serde(with = "serde_format")]
1414
pub format: Format,
1515
pub url: Url,
16-
pub digest: Digest,
16+
pub digest: HexDigest,
1717
pub path: PathBuf,
1818
}
1919

2020
impl Image {
2121
pub fn new(
2222
format: Format,
2323
url: Url,
24-
digest: Digest,
24+
digest: HexDigest,
2525
path: PathBuf,
2626
) -> Self {
2727
Self {

0 commit comments

Comments
 (0)