From 71de7f8dce7b3d08bfaacc14f9b05054a3d0b6d0 Mon Sep 17 00:00:00 2001 From: Kelong Cong Date: Mon, 11 May 2026 11:34:04 +0200 Subject: [PATCH 1/7] chore: ensure there are no missing backward compatibilityt tests This is a simple check to ensure that all the versioned types that are present in the source file also exist in one of the .ron files. It does not check that all versions for all types are tested because the version number is not stored in the .ron files. Closes zama-ai/kms-internal#3026 --- Cargo.lock | 3 + Cargo.toml | 1 + core/service/Cargo.toml | 4 + core/service/tests/versioned_enum_coverage.rs | 266 ++++++++++++++++++ 4 files changed, 274 insertions(+) create mode 100644 core/service/tests/versioned_enum_coverage.rs diff --git a/Cargo.lock b/Cargo.lock index 12aa83646d..500220ff18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4279,6 +4279,7 @@ dependencies = [ "signature 2.2.0", "strum", "strum_macros", + "syn 2.0.114", "sysinfo", "tempfile", "test-utils", @@ -4296,6 +4297,7 @@ dependencies = [ "tokio", "tokio-rustls 0.26.2", "tokio-util", + "toml", "tonic", "tonic-health", "tonic-tls", @@ -4306,6 +4308,7 @@ dependencies = [ "trait-variant", "url", "validator", + "walkdir", "wasm-bindgen", "x509-parser", "zeroize", diff --git a/Cargo.toml b/Cargo.toml index 00e4acae49..47e216cd81 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -220,6 +220,7 @@ typed-builder = "=0.21.0" # Builder pattern macro - HIGH RISK: Individual maint url = { version = "=2.5.8", features = ["serde"] } # URL parsing and manipulation - LOW RISK: servo team, uuid = { version = "=1.19.0", features = ["v4", "fast-rng"] } # UUID generation - LOW RISK: uuid-rs team validator = { version = "=0.20.0", features = ["derive"] } # Struct validation - HIGH RISK: Individual maintainer (Keats), less active maintenance +walkdir = "=2.5.0" # Recursive directory walking - LOW RISK: BurntSushi, 250M+ downloads, test-only dependency wasm-bindgen = { version = "=0.2.114", features = ["serde-serialize"] } # WASM bindings - LOW RISK: rustwasm team x509-parser = { version = "=0.18.0", features = ["verify-aws"] } # X.509 certificate parsing - LOW RISK: rusticata team zeroize = { version = "=1.8.2", features = ["zeroize_derive"] } # Secure memory wiping - LOW RISK: RustCrypto org, critical for key management diff --git a/core/service/Cargo.toml b/core/service/Cargo.toml index 8c201e7c76..220bd96e03 100644 --- a/core/service/Cargo.toml +++ b/core/service/Cargo.toml @@ -142,10 +142,14 @@ threshold-execution = { workspace = true, features = ["testing", "malicious_stra kms = { workspace = true, features = ["insecure", "testing"] } proptest.workspace = true rstest.workspace = true +serde = { workspace = true, features = ["derive"] } +syn = { workspace = true } sysinfo.workspace = true tempfile.workspace = true test-utils-service = { path = "./test-utils-service" } +toml = { workspace = true } tracing-subscriber = { workspace = true, features = ["fmt", "std"] } +walkdir = { workspace = true } [features] diff --git a/core/service/tests/versioned_enum_coverage.rs b/core/service/tests/versioned_enum_coverage.rs new file mode 100644 index 0000000000..ddd1b6f737 --- /dev/null +++ b/core/service/tests/versioned_enum_coverage.rs @@ -0,0 +1,266 @@ +//! Backward-compatibility coverage gate for `VersionsDispatch` enums. +//! +//! Adding a new versionable type requires (a) a `*Versioned` dispatch enum +//! with contiguous `V0..Vn` variants, and (b) at least one `.ron` fixture +//! entry under `backward-compatibility/data/`. + +use std::collections::BTreeSet; +use std::path::{Path, PathBuf}; + +use serde::Deserialize; +use syn::{Attribute, Item, Path as SynPath}; + +#[derive(Deserialize)] +struct WorkspaceManifest { + workspace: Workspace, +} + +#[derive(Deserialize)] +struct Workspace { + members: Vec, +} + +/// One `#[derive(VersionsDispatch)]` enum discovered during the workspace scan. +/// +/// Carries just enough information to drive the two coverage checks: +/// `enum_name` (e.g. `FooVersioned`) is stripped of its `Versioned` suffix to +/// match against `.ron` fixture entries; `variants` is inspected for the +/// `V0..Vn` contiguity rule; `file` is reported back in error messages so the +/// offending source location is obvious. +#[derive(Debug)] +struct Dispatch { + enum_name: String, + variants: Vec, + file: PathBuf, +} + +/// Types we deliberately don't require a direct `.ron` fixture for, because +/// they are transitively exercised as fields of a root type that *does* have +/// a fixture. +/// +/// TODO(zama-ai/kms-internal#3028): this explicit list should go away after +/// we have a proper way to identify which structs need to be tested. +const ALLOW_UNCOVERED: &[&str] = &[ + // Encryption: enc keys and scheme tags are embedded in + // UnifiedSigncryption{,Key,UnsigncryptionKey}OwnedTest / UnifiedCipherTest. + "UnifiedPublicEncKey", + "UnifiedPrivateEncKey", + "PkeSchemeType", + // Signing scheme tag is embedded in Private/PublicSigKeyTest. + "SigningSchemeType", + // Backup: Inner / Material / Payload types are fields of OperatorBackupOutputTest, + // RecoveryValidationMaterialTest, and InternalCustodianSetupMessageTest. + "InnerOperatorBackupOutput", + "BackupMaterial", + "RecoveryValidationMaterialPayload", + "CustodianSetupMessagePayload", + // *Inner metadata structs are fields of CrsGenMetadataTest / KeyGenMetadataTest. + "KeyGenMetadataInner", + "CrsGenMetadataInner", + // Role is serialized inside root fixtures such as PRSSSetup, Share, + // ContextInfo, and custodian/operator backup types. + "Role", + // Threshold-FHE inner key shares: covered transitively through + // ThresholdFheKeysTest / PrivateKeySetTest. + "PublicKeyMaterial", + "ResiduePoly", + "LweSecretKeyShare", + "LweSecretKeyShareEnum", + "GlweSecretKeyShare", + "GlweSecretKeyShareEnum", + "CompressionPrivateKeyShares", + "CompressionPrivateKeySharesEnum", +]; + +const RON_FILES: &[&str] = &[ + "backward-compatibility/data/kms.ron", + "backward-compatibility/data/kms-grpc.ron", + "backward-compatibility/data/threshold-fhe.ron", +]; + +fn repo_root() -> PathBuf { + // CARGO_MANIFEST_DIR is core/service; go up two levels to the workspace root. + Path::new(env!("CARGO_MANIFEST_DIR")) + .ancestors() + .nth(2) + .expect("CARGO_MANIFEST_DIR has fewer than 2 ancestors") + .to_path_buf() +} + +// The repo currently uses plain paths (no globs), so direct TOML parsing is +// sufficient; if globs are introduced later, switch to +// `cargo metadata --format-version 1`. +fn workspace_members() -> Vec { + let root = repo_root(); + let text = std::fs::read_to_string(root.join("Cargo.toml")) + .expect("failed to read workspace Cargo.toml"); + let manifest: WorkspaceManifest = + toml::from_str(&text).expect("failed to parse workspace Cargo.toml"); + manifest + .workspace + .members + .into_iter() + .map(|m| root.join(m)) + .filter(|p| p.is_dir()) + .collect() +} + +fn path_ends_with_ident(path: &SynPath, ident: &str) -> bool { + path.segments + .last() + .is_some_and(|segment| segment.ident == ident) +} + +/// Returns true if `attrs` contains a `#[derive(..., VersionsDispatch, ...)]`. +/// +/// Matches on the trailing path segment so both `VersionsDispatch` and +/// fully-qualified forms like `tfhe_versionable::VersionsDispatch` are detected. +fn derives_versions_dispatch(attrs: &[Attribute]) -> bool { + attrs.iter().any(|a| { + if !a.path().is_ident("derive") { + return false; + } + let mut hit = false; + let _ = a.parse_nested_meta(|m| { + if path_ends_with_ident(&m.path, "VersionsDispatch") { + hit = true; + } + Ok(()) + }); + hit + }) +} + +/// Walks every workspace member and returns one [`Dispatch`] per enum that +/// derives `VersionsDispatch`. +/// +/// `target/`, `tests/`, and hidden directories are skipped: build artifacts +/// would slow the scan, and integration-test files (including this one) may +/// contain dispatch-like fixtures that aren't part of the production surface. +/// +/// Files that fail to read or parse are logged and skipped rather than +/// failing the test, so a single malformed file doesn't mask real coverage +/// gaps in the rest of the workspace. +fn collect_dispatches() -> Vec { + let mut out = Vec::new(); + for crate_dir in workspace_members() { + for entry in walkdir::WalkDir::new(&crate_dir) + .into_iter() + .filter_entry(|e| { + let n = e.file_name().to_string_lossy(); + // Skip build artifacts, integration-test dirs, hidden dirs. + n != "target" && n != "tests" && !n.starts_with('.') + }) + .filter_map(Result::ok) + .filter(|e| e.path().extension().is_some_and(|x| x == "rs")) + { + let src = match std::fs::read_to_string(entry.path()) { + Ok(s) => s, + Err(err) => { + eprintln!("warning: skipping {}: {err}", entry.path().display()); + continue; + } + }; + let parsed = match syn::parse_file(&src) { + Ok(f) => f, + Err(err) => { + eprintln!("warning: skipping {}: {err}", entry.path().display()); + continue; + } + }; + for item in parsed.items { + if let Item::Enum(e) = item { + if !derives_versions_dispatch(&e.attrs) { + continue; + } + out.push(Dispatch { + enum_name: e.ident.to_string(), + variants: e.variants.iter().map(|v| v.ident.to_string()).collect(), + file: entry.path().to_path_buf(), + }); + } + } + } + } + out +} + +#[test] +fn versioned_enums_have_contiguous_variants() { + let dispatches = collect_dispatches(); + assert!( + !dispatches.is_empty(), + "found no VersionsDispatch enums — scan paths likely wrong" + ); + + let mut violations = Vec::new(); + for d in &dispatches { + for (i, v) in d.variants.iter().enumerate() { + let expected = format!("V{i}"); + if v != &expected { + violations.push(format!( + "{}::{}: variant #{i} is `{v}`, expected `{expected}`", + d.file.display(), + d.enum_name, + )); + } + } + } + assert!( + violations.is_empty(), + "non-contiguous Vn variants:\n{}", + violations.join("\n") + ); +} + +#[test] +fn versioned_enums_are_covered_by_ron_metadata() { + let dispatches = collect_dispatches(); + let root = repo_root(); + + // Compare names case-insensitively: the `.ron` files store variant names + // chosen by the test author (e.g. `PrssSetupCombined`) which sometimes + // smooth the casing of the underlying Rust type (`PRSSSetupCombined`). + let mut covered: BTreeSet = BTreeSet::new(); + for f in RON_FILES { + let path = root.join(f); + let text = std::fs::read_to_string(&path) + .unwrap_or_else(|err| panic!("failed to read {}: {err}", path.display())); + for line in text.lines() { + let t = line.trim(); + if let Some(rest) = t.strip_prefix("metadata: ") + && let Some(name) = rest.split('(').next() + { + covered.insert(name.to_lowercase()); + } + } + } + + let mut missing: Vec<(String, PathBuf)> = Vec::new(); + let mut seen: BTreeSet = BTreeSet::new(); + for d in &dispatches { + let underlying = d.enum_name.trim_end_matches("Versioned").to_string(); + if !seen.insert(underlying.clone()) { + continue; + } + if ALLOW_UNCOVERED.contains(&underlying.as_str()) { + continue; + } + if !covered.contains(&underlying.to_lowercase()) { + missing.push((underlying, d.file.clone())); + } + } + + if !missing.is_empty() { + let mut msg = String::from( + "VersionsDispatch enums with no backward-compatibility coverage.\n\ + Add a metadata entry under backward-compatibility/data/*.ron\n\ + (and a generator entry in backward-compatibility/generate-vX.Y.Z/),\n\ + or add the type name to ALLOW_UNCOVERED with justification.\n\n", + ); + for (name, file) in &missing { + msg.push_str(&format!(" - {name} at {}\n", file.display())); + } + panic!("{msg}"); + } +} From 599f4b4691312cebd2a9c6f1ecd3c09575c654f7 Mon Sep 17 00:00:00 2001 From: Kelong Cong Date: Tue, 12 May 2026 10:19:29 +0200 Subject: [PATCH 2/7] chore: improve ron parsing --- core/service/tests/versioned_enum_coverage.rs | 51 +++++++++++-------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/core/service/tests/versioned_enum_coverage.rs b/core/service/tests/versioned_enum_coverage.rs index ddd1b6f737..ded4e5ebc8 100644 --- a/core/service/tests/versioned_enum_coverage.rs +++ b/core/service/tests/versioned_enum_coverage.rs @@ -7,7 +7,10 @@ use std::collections::BTreeSet; use std::path::{Path, PathBuf}; -use serde::Deserialize; +use backward_compatibility::{ + TestMetadataDD, TestMetadataKMS, TestMetadataKmsGrpc, load::load_tests_metadata, +}; +use serde::{Deserialize, de::DeserializeOwned}; use syn::{Attribute, Item, Path as SynPath}; #[derive(Deserialize)] @@ -72,12 +75,6 @@ const ALLOW_UNCOVERED: &[&str] = &[ "CompressionPrivateKeySharesEnum", ]; -const RON_FILES: &[&str] = &[ - "backward-compatibility/data/kms.ron", - "backward-compatibility/data/kms-grpc.ron", - "backward-compatibility/data/threshold-fhe.ron", -]; - fn repo_root() -> PathBuf { // CARGO_MANIFEST_DIR is core/service; go up two levels to the workspace root. Path::new(env!("CARGO_MANIFEST_DIR")) @@ -213,6 +210,20 @@ fn versioned_enums_have_contiguous_variants() { ); } +/// Loads a `.ron` fixture file and returns the lowercased `metadata` field of +/// each testcase. +fn load_covered_names(path: &Path) -> Vec +where + M: DeserializeOwned + std::fmt::Display, +{ + let testcases: Vec> = load_tests_metadata(path) + .unwrap_or_else(|err| panic!("failed to parse {}: {err}", path.display())); + testcases + .iter() + .map(|tc| tc.metadata.to_string().to_lowercase()) + .collect() +} + #[test] fn versioned_enums_are_covered_by_ron_metadata() { let dispatches = collect_dispatches(); @@ -221,20 +232,20 @@ fn versioned_enums_are_covered_by_ron_metadata() { // Compare names case-insensitively: the `.ron` files store variant names // chosen by the test author (e.g. `PrssSetupCombined`) which sometimes // smooth the casing of the underlying Rust type (`PRSSSetupCombined`). + // `TestMetadata*` derive `strum::Display`, which renders an enum value as + // just its variant name — exactly what we need to match against + // `*Versioned` dispatch enums. + let data_dir = root.join("backward-compatibility/data"); let mut covered: BTreeSet = BTreeSet::new(); - for f in RON_FILES { - let path = root.join(f); - let text = std::fs::read_to_string(&path) - .unwrap_or_else(|err| panic!("failed to read {}: {err}", path.display())); - for line in text.lines() { - let t = line.trim(); - if let Some(rest) = t.strip_prefix("metadata: ") - && let Some(name) = rest.split('(').next() - { - covered.insert(name.to_lowercase()); - } - } - } + covered.extend(load_covered_names::( + &data_dir.join("kms.ron"), + )); + covered.extend(load_covered_names::( + &data_dir.join("kms-grpc.ron"), + )); + covered.extend(load_covered_names::( + &data_dir.join("threshold-fhe.ron"), + )); let mut missing: Vec<(String, PathBuf)> = Vec::new(); let mut seen: BTreeSet = BTreeSet::new(); From f20af15b54860e882adca87e63060fdb9f685c2e Mon Sep 17 00:00:00 2001 From: Kelong Cong Date: Wed, 13 May 2026 15:09:36 +0200 Subject: [PATCH 3/7] chore: clarify ALLOW_UNCOVERED list --- core/service/tests/versioned_enum_coverage.rs | 68 ++++++++++++++++--- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/core/service/tests/versioned_enum_coverage.rs b/core/service/tests/versioned_enum_coverage.rs index ded4e5ebc8..dbd41eb21a 100644 --- a/core/service/tests/versioned_enum_coverage.rs +++ b/core/service/tests/versioned_enum_coverage.rs @@ -44,34 +44,82 @@ struct Dispatch { /// TODO(zama-ai/kms-internal#3028): this explicit list should go away after /// we have a proper way to identify which structs need to be tested. const ALLOW_UNCOVERED: &[&str] = &[ - // Encryption: enc keys and scheme tags are embedded in - // UnifiedSigncryption{,Key,UnsigncryptionKey}OwnedTest / UnifiedCipherTest. + // Field of UnifiedSigncryptionKeyOwned, UnifiedUnsigncryptionKeyOwned, + // CustodianSetupMessagePayload, InternalCustodianSetupMessage, and + // InternalCustodianContext. Covered via UnifiedSigncryptionKeyTest, + // UnifiedUnsigncryptionKeyTest, InternalCustodianSetupMessageTest, and + // InternalCustodianContextTest. "UnifiedPublicEncKey", + // Field of UnifiedUnsigncryptionKeyOwned. + // Covered via UnifiedUnsigncryptionKeyTest. "UnifiedPrivateEncKey", + // Field of UnifiedCipher and UnifiedSigncryption. + // Covered via UnifiedCipherTest and UnifiedSigncryptionTest. "PkeSchemeType", - // Signing scheme tag is embedded in Private/PublicSigKeyTest. + // Field of UnifiedSigncryption. (PrivateSigKey / PublicSigKey expose it + // only via the HasSigningScheme trait method, not as a struct field.) + // Covered via UnifiedSigncryptionTest. "SigningSchemeType", - // Backup: Inner / Material / Payload types are fields of OperatorBackupOutputTest, - // RecoveryValidationMaterialTest, and InternalCustodianSetupMessageTest. + // Map value in RecoveryValidationMaterialPayload.cts and the actual + // serialized fixture body produced by the OperatorBackupOutputTest + // generator. Covered via OperatorBackupOutputTest and + // RecoveryValidationMaterialTest. "InnerOperatorBackupOutput", + // Plaintext that Operator::secret_share_and_signcrypt signcrypts into the + // UnifiedSigncryption inside InnerOperatorBackupOutput; not a struct + // field. Covered (in serialized-and-signcrypted form) via + // OperatorBackupOutputTest. "BackupMaterial", + // Field of RecoveryValidationMaterial.payload. + // Covered via RecoveryValidationMaterialTest. "RecoveryValidationMaterialPayload", + // safe_serialize'd into the protobuf CustodianSetupMessage.payload (then + // unpacked into InternalCustodianSetupMessage on load). + // Covered via InternalCustodianSetupMessageTest. "CustodianSetupMessagePayload", - // *Inner metadata structs are fields of CrsGenMetadataTest / KeyGenMetadataTest. + // Variant payload of KeyGenMetadata::Current. + // Covered via KeyGenMetadataTest and KeyGenMetadataWithExtraDataTest. "KeyGenMetadataInner", + // Variant payload of CrsGenMetadata::Current. + // Covered via CrsGenMetadataTest and CrsGenMetadataWithExtraDataTest. "CrsGenMetadataInner", - // Role is serialized inside root fixtures such as PRSSSetup, Share, - // ContextInfo, and custodian/operator backup types. + // Field of Share, BackupMaterial, InternalCustodianSetupMessage, and + // InternalCustodianRecoveryOutput; map key in + // RecoveryValidationMaterialPayload.{cts,commitments} and + // InternalCustodianContext.custodian_nodes; transitively inside PRSSSetup + // via PrssSet.parties. Covered via ShareTest, PRSSSetupTest, PrssSetTest, + // PrssSetupCombinedTest, OperatorBackupOutputTest, + // RecoveryValidationMaterialTest, InternalCustodianSetupMessageTest, + // InternalCustodianContextTest, and InternalCustodianRecoveryOutputTest. "Role", - // Threshold-FHE inner key shares: covered transitively through - // ThresholdFheKeysTest / PrivateKeySetTest. + // Field of ThresholdFheKeys.public_material. + // Covered via ThresholdFheKeysTest. "PublicKeyMaterial", + // Element type inside the Share> collections used by all + // secret-key shares (PrivateKeySet) and inside LweCiphertextShare.body. + // Covered via ShareTest, PrivateKeySetTest, and ThresholdFheKeysTest. "ResiduePoly", + // Variant payload of LweSecretKeyShareEnum, and direct field of + // PrivateKeySet.{glwe_secret_key_share_sns_as_lwe, + // glwe_sns_compression_key_as_lwe} (plus older PrivateKeySetV{1,2} + // fields). Covered via PrivateKeySetTest and ThresholdFheKeysTest. "LweSecretKeyShare", + // Field of PrivateKeySet.{lwe_encryption,lwe_compute,oprf}_secret_key_share. + // Covered via PrivateKeySetTest and ThresholdFheKeysTest. "LweSecretKeyShareEnum", + // Field of CompressionPrivateKeyShares.post_packing_ks_key and + // SnsCompressionPrivateKeyShares.post_packing_ks_key, plus variant + // payload of GlweSecretKeyShareEnum. Covered via PrivateKeySetTest and + // ThresholdFheKeysTest. "GlweSecretKeyShare", + // Field of PrivateKeySet.glwe_secret_key_share. + // Covered via PrivateKeySetTest and ThresholdFheKeysTest. "GlweSecretKeyShareEnum", + // Variant payload of CompressionPrivateKeySharesEnum. + // Covered via PrivateKeySetTest and ThresholdFheKeysTest. "CompressionPrivateKeyShares", + // Field of PrivateKeySet.glwe_secret_key_share_compression. + // Covered via PrivateKeySetTest and ThresholdFheKeysTest. "CompressionPrivateKeySharesEnum", ]; From 6b6d26f46b001f7a0d6c48aac6dc531a4dbfa245 Mon Sep 17 00:00:00 2001 From: Kelong Cong Date: Wed, 13 May 2026 16:14:56 +0200 Subject: [PATCH 4/7] chore: address comments --- Cargo.lock | 39 +++- Cargo.toml | 1 + core/service/Cargo.toml | 2 +- core/service/tests/versioned_enum_coverage.rs | 221 ++++++++++++++---- docs/developer/backward_compatibility.md | 34 +++ 5 files changed, 248 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 500220ff18..af84794bff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2083,6 +2083,39 @@ dependencies = [ "serde", ] +[[package]] +name = "camino" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629a66d692cb9ff1a1c664e41771b3dcaf961985a9774c0eb0bd1b51cf60a48" +dependencies = [ + "serde_core", +] + +[[package]] +name = "cargo-platform" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd0061da739915fae12ea00e16397555ed4371a6bb285431aab930f61b0aa4ba" +dependencies = [ + "serde", + "serde_core", +] + +[[package]] +name = "cargo_metadata" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef987d17b0a113becdd19d3d0022d04d7ef41f9efe4f3fb63ac44ba61df3ade9" +dependencies = [ + "camino", + "cargo-platform", + "semver 1.0.27", + "serde", + "serde_json", + "thiserror 2.0.12", +] + [[package]] name = "cast" version = "0.3.0" @@ -4243,6 +4276,7 @@ dependencies = [ "bc2wrap", "bincode 2.0.1", "bip39", + "cargo_metadata", "cbc", "cfg-if", "ciborium", @@ -4297,7 +4331,6 @@ dependencies = [ "tokio", "tokio-rustls 0.26.2", "tokio-util", - "toml", "tonic", "tonic-health", "tonic-tls", @@ -6743,6 +6776,10 @@ name = "semver" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +dependencies = [ + "serde", + "serde_core", +] [[package]] name = "semver-parser" diff --git a/Cargo.toml b/Cargo.toml index 47e216cd81..e608a50ab6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -124,6 +124,7 @@ backoff = "=0.4.0" # Retry with exponential backoff - HIGH RISK: Individual mai bincode = { version = "=2.0.1", features = ["serde"] } # Binary serialization - CRITICAL RISK: bincode-org, 147M+ downloads bip39 = { version = "=2.2.2", features = ["alloc"] } # BIP39 mnemonic seed phrases - LOW RISK: rust-bitcoin org (reputable), CI enabled bytes = "=1.11.1" # Byte buffer utilities - LOW RISK: tokio team, 100M+ downloads +cargo_metadata = "=0.23.1" # Workspace metadata extraction - LOW RISK: oli-obk (rust-lang member) + community, 60M+ downloads, test-only dependency cbc = { version = "=0.1.2", features = ["alloc"] } # CBC block cipher mode - LOW RISK: RustCrypto org, standard cipher mode cfg-if = "=1.0.4" # Conditional compilation - LOW RISK: Rust-lang org owned clap = { version = "=4.5.47", features = ["derive"] } # CLI argument parsing - LOW RISK: clap-rs team diff --git a/core/service/Cargo.toml b/core/service/Cargo.toml index 220bd96e03..c2e7f8a5f2 100644 --- a/core/service/Cargo.toml +++ b/core/service/Cargo.toml @@ -137,6 +137,7 @@ zeroize.workspace = true # Maintain alphabetical order assert_cmd.workspace = true backward-compatibility = { workspace = true, features = ["load", "tests"] } +cargo_metadata = { workspace = true } threshold-execution = { workspace = true, features = ["testing", "malicious_strategies"] } # Self-import for integration tests kms = { workspace = true, features = ["insecure", "testing"] } @@ -147,7 +148,6 @@ syn = { workspace = true } sysinfo.workspace = true tempfile.workspace = true test-utils-service = { path = "./test-utils-service" } -toml = { workspace = true } tracing-subscriber = { workspace = true, features = ["fmt", "std"] } walkdir = { workspace = true } diff --git a/core/service/tests/versioned_enum_coverage.rs b/core/service/tests/versioned_enum_coverage.rs index dbd41eb21a..8fef68b287 100644 --- a/core/service/tests/versioned_enum_coverage.rs +++ b/core/service/tests/versioned_enum_coverage.rs @@ -1,4 +1,6 @@ -//! Backward-compatibility coverage gate for `VersionsDispatch` enums. +//! Backward-compatibility coverage gate for `VersionsDispatch` enums. This is a +//! best-effort solution, not a guarantee there are no coverage gaps if the +//! tests here pass! //! //! Adding a new versionable type requires (a) a `*Versioned` dispatch enum //! with contiguous `V0..Vn` variants, and (b) at least one `.ron` fixture @@ -10,19 +12,10 @@ use std::path::{Path, PathBuf}; use backward_compatibility::{ TestMetadataDD, TestMetadataKMS, TestMetadataKmsGrpc, load::load_tests_metadata, }; -use serde::{Deserialize, de::DeserializeOwned}; +use cargo_metadata::{Metadata, MetadataCommand}; +use serde::de::DeserializeOwned; use syn::{Attribute, Item, Path as SynPath}; -#[derive(Deserialize)] -struct WorkspaceManifest { - workspace: Workspace, -} - -#[derive(Deserialize)] -struct Workspace { - members: Vec, -} - /// One `#[derive(VersionsDispatch)]` enum discovered during the workspace scan. /// /// Carries just enough information to drive the two coverage checks: @@ -123,30 +116,27 @@ const ALLOW_UNCOVERED: &[&str] = &[ "CompressionPrivateKeySharesEnum", ]; -fn repo_root() -> PathBuf { - // CARGO_MANIFEST_DIR is core/service; go up two levels to the workspace root. - Path::new(env!("CARGO_MANIFEST_DIR")) - .ancestors() - .nth(2) - .expect("CARGO_MANIFEST_DIR has fewer than 2 ancestors") - .to_path_buf() +fn cargo_metadata() -> Metadata { + MetadataCommand::new() + .exec() + .expect("failed to run `cargo metadata`") +} + +fn repo_root(metadata: &Metadata) -> PathBuf { + metadata.workspace_root.clone().into_std_path_buf() } -// The repo currently uses plain paths (no globs), so direct TOML parsing is -// sufficient; if globs are introduced later, switch to -// `cargo metadata --format-version 1`. -fn workspace_members() -> Vec { - let root = repo_root(); - let text = std::fs::read_to_string(root.join("Cargo.toml")) - .expect("failed to read workspace Cargo.toml"); - let manifest: WorkspaceManifest = - toml::from_str(&text).expect("failed to parse workspace Cargo.toml"); - manifest - .workspace - .members +fn workspace_members(metadata: &Metadata) -> Vec { + metadata + .workspace_packages() .into_iter() - .map(|m| root.join(m)) - .filter(|p| p.is_dir()) + .map(|pkg| { + pkg.manifest_path + .parent() + .expect("package manifest has a parent directory") + .as_std_path() + .to_path_buf() + }) .collect() } @@ -186,9 +176,9 @@ fn derives_versions_dispatch(attrs: &[Attribute]) -> bool { /// Files that fail to read or parse are logged and skipped rather than /// failing the test, so a single malformed file doesn't mask real coverage /// gaps in the rest of the workspace. -fn collect_dispatches() -> Vec { +fn collect_dispatches(metadata: &Metadata) -> Vec { let mut out = Vec::new(); - for crate_dir in workspace_members() { + for crate_dir in workspace_members(metadata) { for entry in walkdir::WalkDir::new(&crate_dir) .into_iter() .filter_entry(|e| { @@ -230,9 +220,36 @@ fn collect_dispatches() -> Vec { out } +#[derive(Debug, PartialEq, Eq)] +struct VariantViolation { + index: usize, + actual: String, + expected: String, +} + +/// Reports each variant whose name does not match its positional expectation +/// `V{i}`. An empty result means the variants form a contiguous `V0..Vn` +/// sequence. +fn check_variant_contiguity>(variants: &[S]) -> Vec { + variants + .iter() + .enumerate() + .filter_map(|(i, v)| { + let expected = format!("V{i}"); + let actual = v.as_ref(); + (actual != expected).then(|| VariantViolation { + index: i, + actual: actual.to_string(), + expected, + }) + }) + .collect() +} + #[test] fn versioned_enums_have_contiguous_variants() { - let dispatches = collect_dispatches(); + let metadata = cargo_metadata(); + let dispatches = collect_dispatches(&metadata); assert!( !dispatches.is_empty(), "found no VersionsDispatch enums — scan paths likely wrong" @@ -240,15 +257,15 @@ fn versioned_enums_have_contiguous_variants() { let mut violations = Vec::new(); for d in &dispatches { - for (i, v) in d.variants.iter().enumerate() { - let expected = format!("V{i}"); - if v != &expected { - violations.push(format!( - "{}::{}: variant #{i} is `{v}`, expected `{expected}`", - d.file.display(), - d.enum_name, - )); - } + for v in check_variant_contiguity(&d.variants) { + violations.push(format!( + "{}::{}: variant #{} is `{}`, expected `{}`", + d.file.display(), + d.enum_name, + v.index, + v.actual, + v.expected, + )); } } assert!( @@ -274,8 +291,9 @@ where #[test] fn versioned_enums_are_covered_by_ron_metadata() { - let dispatches = collect_dispatches(); - let root = repo_root(); + let metadata = cargo_metadata(); + let dispatches = collect_dispatches(&metadata); + let root = repo_root(&metadata); // Compare names case-insensitively: the `.ron` files store variant names // chosen by the test author (e.g. `PrssSetupCombined`) which sometimes @@ -323,3 +341,112 @@ fn versioned_enums_are_covered_by_ron_metadata() { panic!("{msg}"); } } + +#[test] +fn check_variant_contiguity_corner_cases() { + // Canonical V0..Vn sequence — no violations. + assert!(check_variant_contiguity(&["V0", "V1", "V2"]).is_empty()); + + // Single V0 variant — valid. + assert!(check_variant_contiguity(&["V0"]).is_empty()); + + // Empty variant list — locks in current behaviour (no violations). + assert!(check_variant_contiguity::<&str>(&[]).is_empty()); + + // Gap with a high number: V11 at index 2 should have been V2. + assert_eq!( + check_variant_contiguity(&["V0", "V1", "V11"]), + vec![VariantViolation { + index: 2, + actual: "V11".into(), + expected: "V2".into(), + }], + ); + + // Internal gap: index 1 expected V1 but got V2. + assert_eq!( + check_variant_contiguity(&["V0", "V2"]), + vec![VariantViolation { + index: 1, + actual: "V2".into(), + expected: "V1".into(), + }], + ); + + // Out of order: both positions wrong, both reported. + assert_eq!( + check_variant_contiguity(&["V1", "V0"]), + vec![ + VariantViolation { + index: 0, + actual: "V1".into(), + expected: "V0".into(), + }, + VariantViolation { + index: 1, + actual: "V0".into(), + expected: "V1".into(), + }, + ], + ); + + // Non-numeric suffix: Vfoo at index 0 — flagged because it isn't `V0`. + assert_eq!( + check_variant_contiguity(&["Vfoo"]), + vec![VariantViolation { + index: 0, + actual: "Vfoo".into(), + expected: "V0".into(), + }], + ); + + // Leading zero: V01 is not V0 — comparison is string equality, not + // numeric normalization. + assert_eq!( + check_variant_contiguity(&["V01"]), + vec![VariantViolation { + index: 0, + actual: "V01".into(), + expected: "V0".into(), + }], + ); + + // Duplicate V0 at index 1 — second occurrence is wrong because index 1 + // should be V1. + assert_eq!( + check_variant_contiguity(&["V0", "V0"]), + vec![VariantViolation { + index: 1, + actual: "V0".into(), + expected: "V1".into(), + }], + ); +} + +#[test] +fn derives_versions_dispatch_corner_cases() { + // Bare ident matches. + let attr: Attribute = syn::parse_quote!(#[derive(VersionsDispatch)]); + assert!(derives_versions_dispatch(&[attr])); + + // Fully-qualified path also matches — only the trailing segment is + // compared. + let attr: Attribute = syn::parse_quote!(#[derive(tfhe_versionable::VersionsDispatch)]); + assert!(derives_versions_dispatch(&[attr])); + + // Matched when mixed in with other derives. + let attr: Attribute = syn::parse_quote!(#[derive(Clone, Debug, VersionsDispatch, Serialize)]); + assert!(derives_versions_dispatch(&[attr])); + + // Different ident that merely starts with "VersionsDispatch" must not + // match (guards against accidental substring/prefix matching). + let attr: Attribute = syn::parse_quote!(#[derive(VersionsDispatchOther)]); + assert!(!derives_versions_dispatch(&[attr])); + + // Non-derive attributes are ignored entirely. + let attr: Attribute = syn::parse_quote!(#[versionize(FooVersioned)]); + assert!(!derives_versions_dispatch(&[attr])); + + // No attributes at all. + assert!(!derives_versions_dispatch(&[])); +} diff --git a/docs/developer/backward_compatibility.md b/docs/developer/backward_compatibility.md index 0de00ecf41..52a5bcd26e 100644 --- a/docs/developer/backward_compatibility.md +++ b/docs/developer/backward_compatibility.md @@ -316,6 +316,40 @@ impl TestedModule for KMS { ``` +## Coverage gate for `VersionsDispatch` enums + +A workspace-level test at [`core/service/tests/versioned_enum_coverage.rs`](../../core/service/tests/versioned_enum_coverage.rs) statically scans every `#[derive(VersionsDispatch)]` enum in the workspace and enforces two invariants: + +1. **Contiguous variants** — variants must be named `V0`, `V1`, `V2`, … in order. +2. **Fixture coverage** — every dispatch type (with its `Versioned` suffix stripped) must appear as a variant of one of the `TestMetadata*` enums backing `backward-compatibility/data/{kms,kms-grpc,threshold-fhe}.ron`, *or* be explicitly listed in the `ALLOW_UNCOVERED` constant in the same test file. + +The default expectation when you add a new `#[derive(VersionsDispatch)]` enum is to add a direct `.ron` fixture for it, as described in [Adding a test for a new type](#adding-a-test-for-a-new-type). `ALLOW_UNCOVERED` is for inner types that are only reachable as fields of a parent type that already has a fixture. + +### Updating `ALLOW_UNCOVERED` + +Each entry must carry a comment that records: + +- **Who uses it** — the parent struct/enum that contains the type as a field, or the precise way the type is otherwise reached (e.g. as a signcrypted payload). +- **Who covers it** — the `*Test` fixture(s) in `backward-compatibility/src/lib.rs` whose serialized output transitively exercises the type. + +Example: + +```rust +// Field of ThresholdFheKeys.public_material. +// Covered via ThresholdFheKeysTest. +"PublicKeyMaterial", +``` + +⚠️ **The list is manually curated and not self-verifying.** If a developer later starts serializing one of these types directly — or removes the parent field that used to carry it — the coverage gate keeps passing (the type is still on the allowlist) while no `.ron` fixture actually exercises it. The result is a silent backward-compatibility gap. + +**Whenever you touch a type listed in `ALLOW_UNCOVERED`, re-audit its entry:** + +- If the type is now serialized at a top level (e.g. as a stored object, a gRPC field body, or the root of a new fixture), **remove it from `ALLOW_UNCOVERED` and add a direct fixture** following [Adding a test for a new type](#adding-a-test-for-a-new-type). +- If the parent named in the comment no longer carries the type, rewrite the comment to point at the current parent — or, if no parent remains, remove the entry. +- If the type is deleted, remove the entry: stale allowlist names go unmatched and silently rot. + +Tracking ticket for replacing this manual list with structural reachability checks: [zama-ai/kms-internal#3028](https://github.com/zama-ai/kms-internal/issues/3028). + ## Adding a new kms-core release ⚠️ **Important**: Before adding a new version, check for dependency compatibility. See [`backward-compatibility/ADDING_NEW_VERSIONS.md`](../../backward-compatibility/ADDING_NEW_VERSIONS.md) for detailed instructions. From cbfcf89c5d3825617e9cbb0df5669636480548db Mon Sep 17 00:00:00 2001 From: Kelong Cong Date: Fri, 15 May 2026 11:46:17 +0200 Subject: [PATCH 5/7] chore: dylint impl for autodetect top level serialization types --- ai-docs/COMMANDS.md | 2 +- dylint.toml | 2 +- tools/kms-lints/.cargo/config.toml | 5 + tools/kms-lints/.gitignore | 2 + tools/kms-lints/Cargo.toml | 8 + tools/kms-lints/README.md | 42 + tools/kms-lints/common/Cargo.toml | 16 + tools/kms-lints/common/src/lib.rs | 139 ++++ tools/kms-lints/lints/Cargo.toml | 32 + .../lints/src/bc2wrap_type_inventory.rs | 764 ++++++++++++++++++ tools/kms-lints/lints/src/lib.rs | 20 + .../tests/bc2wrap_type_inventory/main.rs | 212 +++++ .../tests/bc2wrap_type_inventory/main.stderr | 20 + tools/kms-lints/rust-toolchain | 4 + 14 files changed, 1266 insertions(+), 2 deletions(-) create mode 100644 tools/kms-lints/.cargo/config.toml create mode 100644 tools/kms-lints/.gitignore create mode 100644 tools/kms-lints/Cargo.toml create mode 100644 tools/kms-lints/README.md create mode 100644 tools/kms-lints/common/Cargo.toml create mode 100644 tools/kms-lints/common/src/lib.rs create mode 100644 tools/kms-lints/lints/Cargo.toml create mode 100644 tools/kms-lints/lints/src/bc2wrap_type_inventory.rs create mode 100644 tools/kms-lints/lints/src/lib.rs create mode 100644 tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs create mode 100644 tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr create mode 100644 tools/kms-lints/rust-toolchain diff --git a/ai-docs/COMMANDS.md b/ai-docs/COMMANDS.md index 2206faa4e2..932068e1b3 100644 --- a/ai-docs/COMMANDS.md +++ b/ai-docs/COMMANDS.md @@ -2,7 +2,7 @@ Commands that come up during ordinary work in this repo. All commands are expected to be run from the repository root unless noted otherwise. -Toolchain is pinned to Rust `1.95.0` via [`rust-toolchain.toml`](../rust-toolchain.toml). System prerequisites: `protoc`, `pkgconfig`, `openssl`, Docker. +Toolchain is pinned to Rust `1.94.0` via [`rust-toolchain.toml`](../rust-toolchain.toml). System prerequisites: `protoc`, `pkgconfig`, `openssl`, Docker. ## Build and lint diff --git a/dylint.toml b/dylint.toml index 0297b0ecd4..17027fa49c 100644 --- a/dylint.toml +++ b/dylint.toml @@ -1,8 +1,8 @@ [workspace.metadata.dylint] libraries = [ + { path = "tools/kms-lints", pattern = "lints" }, { git = "https://github.com/trailofbits/dylint", rev = "2c20d164e06c446de2cecadff0c55e16a877c006", pattern = "examples/general/non_local_effect_before_error_return" }, ] [non_local_effect_before_error_return] work_limit = 100000000 - diff --git a/tools/kms-lints/.cargo/config.toml b/tools/kms-lints/.cargo/config.toml new file mode 100644 index 0000000000..19ae6c631c --- /dev/null +++ b/tools/kms-lints/.cargo/config.toml @@ -0,0 +1,5 @@ +[build] +target-dir = "../../target/kms-lints" + +[target.'cfg(all())'] +linker = "dylint-link" diff --git a/tools/kms-lints/.gitignore b/tools/kms-lints/.gitignore new file mode 100644 index 0000000000..06c5d5bf8b --- /dev/null +++ b/tools/kms-lints/.gitignore @@ -0,0 +1,2 @@ +# Cargo creates a lockfile for this nested Dylint workspace during local runs. +Cargo.lock diff --git a/tools/kms-lints/Cargo.toml b/tools/kms-lints/Cargo.toml new file mode 100644 index 0000000000..dfd3027cf2 --- /dev/null +++ b/tools/kms-lints/Cargo.toml @@ -0,0 +1,8 @@ +[workspace] +members = ["common", "lints"] +resolver = "3" + +[workspace.dependencies] +dylint_linting = "5.0.0" # Dylint helper crate; required for custom lint libraries and follows the existing tfhe-lints convention. +serde = { version = "1.0", features = ["derive"] } # Serialization for generated JSON inventory; maintained by dtolnay and already used across KMS. +serde_json = "1.0" # JSON output for generated inventories; maintained by dtolnay and already used across KMS. diff --git a/tools/kms-lints/README.md b/tools/kms-lints/README.md new file mode 100644 index 0000000000..d77fc9b27d --- /dev/null +++ b/tools/kms-lints/README.md @@ -0,0 +1,42 @@ +# Project specific lints for KMS + +This tool is based on [Dylint](https://github.com/trailofbits/dylint). + +## Usage + +From the KMS repository root: + +```sh +cargo dylint --all --workspace +``` + +Do not pass `--all-targets` when generating the inventory, because the lint is +intended to describe production lib/bin targets, not tests, benches, or +examples. + +## Lints + +### `bc2wrap_type_inventory` + +Collects the root workspace-local types used at curated serialization sink call +sites, including: + +- `bc2wrap::serialize` +- `bc2wrap::serialize_into` +- `bc2wrap::deserialize_safe` +- `bc2wrap::deserialize_unsafe` +- `store_versioned_at_request_id` +- `store_versioned_at_request_and_epoch_id` +- `read_versioned_at_request_id` +- `read_versioned_at_request_and_epoch_id` +- the `write_all` storage wrapper method + +The lint writes one JSON file per checked crate target under +`target/kms-lints/bc2wrap-type-inventory` by default. Set +`KMS_BC2WRAP_INVENTORY_DIR` to override the output directory. + +The inventory records only root types. It does not recursively expand fields. +Each file contains full `calls` plus a `types` summary categorized as `local`, +`foreign`, `generic`, `compound`, `primitive`, or `unknown` for auditability. +Call records include `sink_path` (the rustc def path of the matched sink), +and type summaries aggregate every distinct sink path each root flowed through. diff --git a/tools/kms-lints/common/Cargo.toml b/tools/kms-lints/common/Cargo.toml new file mode 100644 index 0000000000..e5ba160069 --- /dev/null +++ b/tools/kms-lints/common/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "kms-lints-common" +version = "0.1.0" +description = "Shared utilities for KMS lints" +edition = "2024" +publish = false + +[lib] +test = false +doctest = false + +[dependencies] +serde_json = { workspace = true } + +[package.metadata.rust-analyzer] +rustc_private = true diff --git a/tools/kms-lints/common/src/lib.rs b/tools/kms-lints/common/src/lib.rs new file mode 100644 index 0000000000..a3f9e6146c --- /dev/null +++ b/tools/kms-lints/common/src/lib.rs @@ -0,0 +1,139 @@ +#![feature(rustc_private)] +#![warn(unused_extern_crates)] + +extern crate rustc_hir; +extern crate rustc_lint; +extern crate rustc_middle; +extern crate rustc_span; + +use std::collections::BTreeSet; +use std::path::PathBuf; +use std::process::Command; +use std::sync::OnceLock; + +use rustc_hir::def_id::DefId; +use rustc_lint::LateContext; +use rustc_middle::ty::{Ty, TyKind}; +use rustc_span::Span; + +/// Name of the environment variable used to override the inventory output directory. +pub const INVENTORY_DIR_ENV: &str = "KMS_BC2WRAP_INVENTORY_DIR"; + +/// Source position of a linted expression. +#[derive(Clone, Debug)] +pub struct SourcePosition { + /// Source file name as reported by rustc. + pub file: String, + /// One-based source line. + pub line: usize, + /// One-based source column. + pub column: usize, +} + +/// Returns a stable display position for a span. +pub fn source_position(cx: &LateContext<'_>, span: Span) -> SourcePosition { + let source_map = cx.tcx.sess.source_map(); + let location = source_map.lookup_char_pos(span.lo()); + + SourcePosition { + file: location + .file + .name + .prefer_local_unconditionally() + .to_string(), + line: location.line, + column: location.col_display + 1, + } +} + +/// Gets the [`DefId`] of a type when the root is a named definition. +pub fn get_def_id_from_ty(ty: Ty<'_>) -> Option { + match ty.kind() { + TyKind::Adt(adt_def, _) => Some(adt_def.did()), + TyKind::Alias(_, alias_ty) => Some(alias_ty.def_id), + TyKind::Dynamic(predicates, ..) => predicates.principal_def_id(), + TyKind::FnDef(def_id, _) + | TyKind::Foreign(def_id) + | TyKind::Closure(def_id, ..) + | TyKind::CoroutineClosure(def_id, _) + | TyKind::Coroutine(def_id, _) + | TyKind::CoroutineWitness(def_id, _) => Some(*def_id), + _ => None, + } +} + +/// Peels references from a type until the root value type is reached. +pub fn peel_references(mut ty: Ty<'_>) -> Ty<'_> { + while let TyKind::Ref(_, inner, _) = ty.kind() { + ty = *inner; + } + ty +} + +/// Returns true if the `DefId` belongs to one of the workspace crate names. +pub fn is_workspace_def( + cx: &LateContext<'_>, + def_id: DefId, + workspace_crates: &BTreeSet, +) -> bool { + if def_id.is_local() { + return true; + } + + workspace_crates.contains(&cx.tcx.crate_name(def_id.krate).to_string()) +} + +/// Returns the rustc crate name for a definition. +pub fn def_crate_name(cx: &LateContext<'_>, def_id: DefId) -> String { + cx.tcx.crate_name(def_id.krate).to_string() +} + +/// Returns the output directory for generated inventory files. +pub fn inventory_dir() -> PathBuf { + std::env::var(INVENTORY_DIR_ENV) + .map(PathBuf::from) + .unwrap_or_else(|_| PathBuf::from("target/kms-lints/bc2wrap-type-inventory")) +} + +/// Returns the workspace crate names, normalized to rustc crate-name spelling. +pub fn workspace_crates() -> &'static BTreeSet { + static WORKSPACE_CRATES: OnceLock> = OnceLock::new(); + WORKSPACE_CRATES.get_or_init(load_workspace_crates) +} + +fn load_workspace_crates() -> BTreeSet { + let metadata = Command::new("cargo") + .args(["metadata", "--no-deps", "--format-version", "1"]) + .output(); + + let Ok(metadata) = metadata else { + return BTreeSet::new(); + }; + + if !metadata.status.success() { + return BTreeSet::new(); + } + + let Ok(json) = serde_json::from_slice::(&metadata.stdout) else { + return BTreeSet::new(); + }; + + let mut crates = BTreeSet::new(); + if let Some(packages) = json.get("packages").and_then(serde_json::Value::as_array) { + for package in packages { + if let Some(targets) = package.get("targets").and_then(serde_json::Value::as_array) { + for target in targets { + if let Some(name) = target.get("name").and_then(serde_json::Value::as_str) { + crates.insert(normalize_crate_name(name)); + } + } + } + } + } + + crates +} + +fn normalize_crate_name(name: &str) -> String { + name.replace('-', "_") +} diff --git a/tools/kms-lints/lints/Cargo.toml b/tools/kms-lints/lints/Cargo.toml new file mode 100644 index 0000000000..73c660600d --- /dev/null +++ b/tools/kms-lints/lints/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "kms-lints" +version = "0.1.0" +description = "Project specific lints for KMS" +edition = "2024" +publish = false +autotests = false + +[lib] +crate-type = ["cdylib"] + +[dependencies] +dylint_linting = { workspace = true } +kms-lints-common = { path = "../common" } +serde = { workspace = true } +serde_json = { workspace = true } + +[dev-dependencies] +dylint_testing = "5.0.0" # Dylint UI test helper matching dylint_linting; test-only lint tooling. +serde_json = { workspace = true } + +[package.metadata.rust-analyzer] +rustc_private = true + +[[example]] +name = "bc2wrap_type_inventory" +path = "tests/bc2wrap_type_inventory/main.rs" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = [ + 'cfg(dylint_lib, values(any()))', +] } diff --git a/tools/kms-lints/lints/src/bc2wrap_type_inventory.rs b/tools/kms-lints/lints/src/bc2wrap_type_inventory.rs new file mode 100644 index 0000000000..4e9b5c7f92 --- /dev/null +++ b/tools/kms-lints/lints/src/bc2wrap_type_inventory.rs @@ -0,0 +1,764 @@ +//! Compile-time inventory of types passed to serialization / storage sinks. +//! +//! ## What this is +//! +//! A Dylint lint plugin (a Rust compiler plugin loaded by Dylint). Despite the +//! word "lint", its primary job is **not** to flag bad code: it observes every +//! call to a curated list of serialization and storage functions in the crate +//! being compiled, figures out which *root type* flows through each call, and +//! writes a JSON report to +//! `target/kms-lints/bc2wrap-type-inventory/.json`. Downstream tooling +//! reads that report to check that every persisted type is covered by +//! backward-compatibility tests. +//! +//! Why a compiler plugin? Because we need **post-typecheck** information. +//! Plain text-search can't tell you that a generic helper is monomorphized to +//! `MyKey` at one call site and `Vec` at another. Hooking into rustc as a +//! `LateLintPass` gives us fully resolved types after type inference, for free. +//! +//! ## Vocabulary +//! +//! - **Sink** — a function whose call sites we care about. Sinks are listed in +//! `SINK_SPECS`. The current set covers the raw encode / decode helpers in +//! the `bc2wrap` crate and the vault read / write helpers in this workspace. +//! - **Root type** — the top of the type tree handed to a sink. Fields are +//! *not* recursed into: if a sink receives `&MyStruct { … }`, the root is +//! `MyStruct`, full stop. +//! - **Extractor** — a `RootExtractor` tells the lint *where* in a call to +//! look for the root. A sink may declare more than one (e.g. `write_all` +//! records both its public-data and private-data roots). +//! +//! ## Categories +//! +//! Each recorded root is bucketed into a `RootCategory`: +//! +//! - `Local` — defined in a workspace crate. **These are the rows that matter +//! for backward compatibility.** +//! - `Foreign` — defined in an external dependency. Recorded for visibility. +//! - `Generic` / `Unknown` — the concrete type isn't resolvable at this call +//! site (e.g. a generic helper that hasn't been specialized yet). Every +//! such occurrence emits a compile-time warning so the author can refactor +//! the call to expose the concrete type. +//! - `Compound` / `Primitive` — tuples, arrays, ints, etc. Recorded but +//! normally filtered out downstream. +//! +//! ## Pipeline (per crate) +//! +//! 1. `check_expr` runs on every expression in the crate. `sink_for_expr` +//! tries to resolve it to a `(MatchedSink, args)` pair by matching the +//! callee's `DefId` against `SINK_SPECS`. If nothing matches, we return. +//! 2. For each `RootExtractor` on the matched spec, `extract_root_type` +//! returns one of: +//! - `Root(ty)` — the concrete root. We classify it via `classify_root` +//! and push a `CallRecord`. +//! - `NoRoot` — the position is legitimately empty (e.g. a `None` +//! argument). Skipped silently. +//! - `Unresolved` — we expected a root and couldn't determine one. We +//! push a synthetic `` record and emit a warning. +//! 3. Once the whole crate has been linted, `check_crate_post` writes the +//! JSON file (one per crate target). The file contains the full `calls` +//! array plus a deduped `types` summary built by `summarize_types`. +//! +//! ### Running example +//! +//! Take this call site (lifted from the UI fixture): +//! +//! ```ignore +//! let payload = LocalPayload { _value: 7 }; +//! let _ = bc2wrap::serialize(&payload); +//! ``` +//! +//! 1. rustc visits the `Call` expression and invokes `check_expr`. +//! 2. `sink_for_expr` resolves the callee's `DefId` to `bc2wrap::serialize` +//! and looks it up in `SINK_SPECS`, finding the entry whose extractors +//! are `&[RootExtractor::Arg(0)]`. +//! 3. `extract_root_type` runs `Arg(0)`: it reads arg 0's type +//! (`&LocalPayload`), peels the reference, and returns +//! `Root(LocalPayload)`. +//! 4. `classify_root` sees an ADT defined in a workspace crate and returns +//! `RootCategory::Local`. +//! 5. `record` pushes a `CallRecord` with `function = "serialize"`, +//! `sink_path = "bc2wrap::serialize"`, `type_display = "LocalPayload"`, +//! `category = Local`, and the source file / line / column of the call. +//! 6. After every other expression in the crate has been visited, +//! `check_crate_post` collapses duplicate calls into the `types` summary +//! and writes the JSON file. +//! +//! Replace `&payload` with a generic `value: &T` inside `fn f(value: &T)` +//! and only step 4 changes: `classify_root` sees `ty.has_param()`, returns +//! `RootCategory::Generic`, and `record` additionally emits a compile-time +//! warning so the author can refactor the call site to expose a concrete +//! type. The call is still recorded, just under the generic bucket. +//! +//! ## How to add a new sink +//! +//! 1. Append an entry to `SINK_SPECS`: the function name, a `path_contains` +//! substring that disambiguates same-named functions, and one or more +//! extractors. The sink only matches functions defined in a workspace +//! crate (via `is_workspace_def`) — there is no separate "owner" knob. +//! 2. If none of the existing `RootExtractor` variants captures where the +//! root lives in your signature, add a variant and handle it in +//! `extract_root_type`. +//! 3. Extend the UI fixture in `tests/bc2wrap_type_inventory/main.rs` and add +//! an assertion in the `ui` test at the bottom of this file so the new +//! sink is exercised by `cargo test`. +//! +//! ## Configuration +//! +//! Set `KMS_BC2WRAP_INVENTORY_DIR` to override the output directory. The +//! workspace crate set is always auto-detected via `cargo metadata`. + +use std::collections::{BTreeMap, BTreeSet}; + +use kms_lints_common::{ + def_crate_name, get_def_id_from_ty, inventory_dir, is_workspace_def, peel_references, + source_position, workspace_crates, +}; +use rustc_hir::{Expr, ExprKind}; +use rustc_lint::{LateContext, LateLintPass, LintContext}; +use rustc_middle::ty::{GenericArgsRef, Ty, TyKind, TypeVisitableExt}; +use rustc_session::{declare_lint, impl_lint_pass}; +use rustc_span::def_id::{DefId, LOCAL_CRATE}; +use serde::Serialize; + +const SINK_SPECS: &[SinkSpec] = &[ + SinkSpec { + function: "serialize", + path_contains: Some("bc2wrap"), + extractors: &[RootExtractor::Arg(0)], + }, + SinkSpec { + function: "serialize_into", + path_contains: Some("bc2wrap"), + extractors: &[RootExtractor::Arg(0)], + }, + SinkSpec { + function: "deserialize_safe", + path_contains: Some("bc2wrap"), + extractors: &[RootExtractor::ReturnResultOk], + }, + SinkSpec { + function: "deserialize_unsafe", + path_contains: Some("bc2wrap"), + extractors: &[RootExtractor::ReturnResultOk], + }, + SinkSpec { + function: "store_versioned_at_request_id", + path_contains: Some("vault::storage"), + extractors: &[RootExtractor::Arg(2)], + }, + SinkSpec { + function: "store_versioned_at_request_and_epoch_id", + path_contains: Some("vault::storage"), + extractors: &[RootExtractor::Arg(3)], + }, + SinkSpec { + function: "read_versioned_at_request_id", + path_contains: Some("vault::storage"), + extractors: &[RootExtractor::FnGenericArg(1)], + }, + SinkSpec { + function: "read_versioned_at_request_and_epoch_id", + path_contains: Some("vault::storage"), + extractors: &[RootExtractor::FnGenericArg(1)], + }, + SinkSpec { + function: "write_all", + path_contains: Some("vault::storage::crypto_material::base"), + extractors: &[ + RootExtractor::OptionTupleRefFirst(2), + RootExtractor::OptionTupleRefFirst(3), + ], + }, +]; + +#[derive(Clone, Debug, Serialize)] +struct SourceLocation { + file: String, + line: usize, + column: usize, +} + +#[derive(Clone, Debug, Serialize)] +struct CallRecord { + function: String, + sink_path: String, + type_display: String, + def_path: Option, + owner_crate: Option, + category: RootCategory, + source: SourceLocation, +} + +#[derive(Clone, Debug, Serialize)] +struct TypeSummary { + type_display: String, + def_path: Option, + owner_crate: Option, + category: RootCategory, + sink_paths: Vec, + call_count: usize, +} + +#[derive(Clone, Debug, Serialize)] +struct Inventory { + crate_name: String, + target_name: String, + calls: Vec, + types: Vec, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize)] +#[serde(rename_all = "snake_case")] +enum RootCategory { + Local, + Foreign, + Generic, + Compound, + Primitive, + Unknown, +} + +#[derive(Clone, Debug)] +struct RootInfo { + type_display: String, + def_path: Option, + owner_crate: Option, + category: RootCategory, +} + +type SummaryKey = (RootCategory, String, Option); + +#[derive(Clone, Debug)] +struct SummaryEntry { + summary: TypeSummary, + sink_paths: BTreeSet, +} + +#[derive(Clone, Copy, Debug)] +struct SinkSpec { + function: &'static str, + path_contains: Option<&'static str>, + extractors: &'static [RootExtractor], +} + +#[derive(Clone, Copy, Debug)] +enum RootExtractor { + Arg(usize), + OptionTupleRefFirst(usize), + ReturnResultOk, + FnGenericArg(usize), +} + +#[derive(Clone, Debug)] +struct MatchedSink<'tcx> { + spec: &'static SinkSpec, + sink_path: String, + generic_args: Option>, +} + +#[derive(Clone, Copy, Debug)] +enum ExtractedRoot<'tcx> { + Root(Ty<'tcx>), + NoRoot, + Unresolved, +} + +/// Late pass lint collecting bc2wrap root type inventory data. +#[derive(Default)] +pub struct Bc2wrapTypeInventory { + calls: Vec, +} + +declare_lint! { + /// ### What it does + /// Collects an inventory of workspace-local root types used by `bc2wrap` call sites. + /// + /// ### Why is this useful? + /// Types serialized through `bc2wrap` are compatibility-sensitive. A compiler-aware + /// inventory makes these roots visible without requiring marker traits on foreign types. + pub BC2WRAP_TYPE_INVENTORY, + Warn, + "Collects root types used by bc2wrap serialization and deserialization" +} + +impl_lint_pass!(Bc2wrapTypeInventory => [BC2WRAP_TYPE_INVENTORY]); + +impl<'tcx> LateLintPass<'tcx> for Bc2wrapTypeInventory { + // Record each resolved sink call as it is visited. The final JSON is emitted once the crate + // has been fully checked so the summaries can be built from all call records. + fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'_>) { + let Some((sink, args)) = sink_for_expr(cx, expr) else { + return; + }; + + for extractor in sink.spec.extractors { + let root_info = match extract_root_type(*extractor, cx, expr, args, sink.generic_args) { + ExtractedRoot::Root(root_ty) => classify_root(cx, root_ty), + ExtractedRoot::NoRoot => continue, + ExtractedRoot::Unresolved => RootInfo { + type_display: "".to_string(), + def_path: None, + owner_crate: None, + category: RootCategory::Unknown, + }, + }; + self.record(cx, expr, &sink, root_info); + } + } + + // Write one inventory file per crate target. Dylint also checks build scripts, but those do + // not represent production bc2wrap call sites for this inventory. + fn check_crate_post(&mut self, cx: &LateContext<'tcx>) { + let crate_name = cx.tcx.crate_name(LOCAL_CRATE).to_string(); + if crate_name == "build_script_build" { + return; + } + + let target_name = crate_name.clone(); + let calls = std::mem::take(&mut self.calls); + + let inventory = build_inventory(crate_name.clone(), target_name, calls); + let dir = inventory_dir(); + if let Err(error) = std::fs::create_dir_all(&dir) { + eprintln!( + "bc2wrap_type_inventory: failed to create {}: {error}", + dir.display() + ); + return; + } + + let path = dir.join(format!("{crate_name}.json")); + match serde_json::to_string_pretty(&inventory) { + Ok(json) => { + if let Err(error) = std::fs::write(&path, json) { + eprintln!( + "bc2wrap_type_inventory: failed to write {}: {error}", + path.display() + ); + return; + } + } + Err(error) => { + eprintln!( + "bc2wrap_type_inventory: failed to encode JSON for {crate_name}: {error}" + ); + return; + } + } + + if !inventory.calls.is_empty() { + let local_types = count_types(&inventory.types, |category| { + *category == RootCategory::Local + }); + let generic_sinks = count_types(&inventory.types, |category| { + matches!(*category, RootCategory::Generic | RootCategory::Unknown) + }); + let skipped_roots = count_types(&inventory.types, |category| { + !matches!( + *category, + RootCategory::Local | RootCategory::Generic | RootCategory::Unknown + ) + }); + + eprintln!( + "bc2wrap_type_inventory: wrote {} calls, {} local types, {} generic sinks, {} skipped roots for {}", + inventory.calls.len(), + local_types, + generic_sinks, + skipped_roots, + crate_name + ); + } + } +} + +impl Bc2wrapTypeInventory { + fn record<'tcx>( + &mut self, + cx: &LateContext<'tcx>, + expr: &'tcx Expr<'_>, + sink: &MatchedSink<'tcx>, + root_info: RootInfo, + ) { + if matches!( + root_info.category, + RootCategory::Generic | RootCategory::Unknown + ) { + cx.span_lint(BC2WRAP_TYPE_INVENTORY, expr.span, |diag| { + diag.primary_message(format!( + "generic or unresolved root type `{}` used by `{}`", + root_info.type_display, sink.sink_path, + )); + diag.note("The concrete type cannot be inventoried from this generic sink"); + }); + } + + self.calls.push(CallRecord { + function: sink.spec.function.to_string(), + sink_path: sink.sink_path.clone(), + type_display: root_info.type_display, + def_path: root_info.def_path, + owner_crate: root_info.owner_crate, + category: root_info.category, + source: source_location(cx, expr), + }); + } +} + +/// Resolve a call or method-call expression to one of the configured serialization sinks. +fn sink_for_expr<'tcx>( + cx: &LateContext<'tcx>, + expr: &'tcx Expr<'tcx>, +) -> Option<(MatchedSink<'tcx>, &'tcx [Expr<'tcx>])> { + match expr.kind { + ExprKind::Call(callee, args) => { + let (def_id, generic_args) = resolved_call_def(cx, callee)?; + let spec = sink_spec_for_def(cx, def_id)?; + Some((MatchedSink::new(cx, spec, def_id, Some(generic_args)), args)) + } + ExprKind::MethodCall(_, _, args, _) => { + let def_id = cx.typeck_results().type_dependent_def_id(expr.hir_id)?; + let spec = sink_spec_for_def(cx, def_id)?; + Some((MatchedSink::new(cx, spec, def_id, None), args)) + } + _ => None, + } +} + +/// Resolve the `DefId` and inferred generic arguments of a function call. +fn resolved_call_def<'tcx>( + cx: &LateContext<'tcx>, + callee: &'tcx Expr<'_>, +) -> Option<(DefId, GenericArgsRef<'tcx>)> { + let callee_ty = cx.typeck_results().expr_ty(callee); + let TyKind::FnDef(def_id, generic_args) = callee_ty.kind() else { + return None; + }; + Some((*def_id, generic_args)) +} + +/// Find the sink spec for a resolved function or method definition. +fn sink_spec_for_def(cx: &LateContext<'_>, def_id: DefId) -> Option<&'static SinkSpec> { + let item_name = cx.tcx.item_name(def_id); + let item_name = item_name.as_str(); + + SINK_SPECS + .iter() + .find(|spec| spec.matches(cx, def_id, item_name)) +} + +impl SinkSpec { + fn matches(&self, cx: &LateContext<'_>, def_id: DefId, item_name: &str) -> bool { + if item_name != self.function { + return false; + } + + if let Some(path_contains) = self.path_contains + && !cx.tcx.def_path_str(def_id).contains(path_contains) + { + return false; + } + + is_workspace_def(cx, def_id, workspace_crates()) + } +} + +impl<'tcx> MatchedSink<'tcx> { + fn new( + cx: &LateContext<'tcx>, + spec: &'static SinkSpec, + def_id: DefId, + generic_args: Option>, + ) -> Self { + Self { + spec, + sink_path: cx.tcx.def_path_str(def_id), + generic_args, + } + } +} + +/// Extract the root type that a configured sink serializes or deserializes at a call site. +fn extract_root_type<'tcx>( + extractor: RootExtractor, + cx: &LateContext<'tcx>, + expr: &'tcx Expr<'_>, + args: &'tcx [Expr<'tcx>], + generic_args: Option>, +) -> ExtractedRoot<'tcx> { + match extractor { + RootExtractor::Arg(index) => extract_arg(cx, args, index), + RootExtractor::OptionTupleRefFirst(index) => option_tuple_ref_first(cx, args, index), + RootExtractor::ReturnResultOk => decode_ok_type(cx.typeck_results().expr_ty(expr)) + .map(ExtractedRoot::Root) + .unwrap_or(ExtractedRoot::Unresolved), + RootExtractor::FnGenericArg(index) => generic_args + .and_then(|args| args.types().nth(index)) + .map(ExtractedRoot::Root) + .unwrap_or(ExtractedRoot::Unresolved), + } +} + +fn extract_arg<'tcx>( + cx: &LateContext<'tcx>, + args: &'tcx [Expr<'tcx>], + index: usize, +) -> ExtractedRoot<'tcx> { + args.get(index) + .map(|arg| ExtractedRoot::Root(peel_references(cx.typeck_results().expr_ty(arg)))) + .unwrap_or(ExtractedRoot::Unresolved) +} + +fn option_tuple_ref_first<'tcx>( + cx: &LateContext<'tcx>, + args: &'tcx [Expr<'tcx>], + index: usize, +) -> ExtractedRoot<'tcx> { + let Some(arg) = args.get(index) else { + return ExtractedRoot::Unresolved; + }; + + let ExprKind::Call(_, option_args) = arg.kind else { + return ExtractedRoot::NoRoot; + }; + let Some(tuple_expr) = option_args.first() else { + return ExtractedRoot::NoRoot; + }; + let ExprKind::Tup(tuple_fields) = tuple_expr.kind else { + return ExtractedRoot::Unresolved; + }; + let Some(first_field) = tuple_fields.first() else { + return ExtractedRoot::Unresolved; + }; + + ExtractedRoot::Root(peel_references(cx.typeck_results().expr_ty(first_field))) +} + +/// Return the first type argument of an enum return type, which is `T` for `Result`. +/// +/// This intentionally stays structural instead of checking for the exact `Result` definition: +/// bc2wrap decode helpers already identify the function, and the first enum type argument is the +/// value type we need from their return signature. +fn decode_ok_type<'tcx>(ty: Ty<'tcx>) -> Option> { + let TyKind::Adt(adt_def, args) = ty.kind() else { + return None; + }; + + if !adt_def.is_enum() { + return None; + } + args.types().next() +} + +/// Classify a root type into the inventory categories used by the JSON output. +/// +/// Generic roots are kept separate because they identify call sites where the concrete serialized +/// type depends on monomorphization and cannot be represented by one stable root definition. +fn classify_root(cx: &LateContext<'_>, ty: Ty<'_>) -> RootInfo { + let type_display = ty.to_string(); + + if ty.has_param() || ty.has_infer() { + return RootInfo { + type_display, + def_path: None, + owner_crate: None, + category: RootCategory::Generic, + }; + } + + match ty.kind() { + TyKind::Adt(_, _) | TyKind::Alias(..) => classify_named_root(cx, ty), + TyKind::Tuple(..) | TyKind::Array(..) | TyKind::Slice(..) => RootInfo { + type_display, + def_path: None, + owner_crate: None, + category: RootCategory::Compound, + }, + TyKind::Bool + | TyKind::Char + | TyKind::Int(_) + | TyKind::Uint(_) + | TyKind::Float(_) + | TyKind::Str + | TyKind::Never => RootInfo { + type_display, + def_path: None, + owner_crate: None, + category: RootCategory::Primitive, + }, + _ => RootInfo { + type_display, + def_path: None, + owner_crate: None, + category: RootCategory::Unknown, + }, + } +} + +/// Classify a named root type as local or foreign by comparing its defining crate to the Cargo +/// workspace package set. +fn classify_named_root(cx: &LateContext<'_>, ty: Ty<'_>) -> RootInfo { + let type_display = ty.to_string(); + let Some(def_id) = get_def_id_from_ty(ty) else { + return RootInfo { + type_display, + def_path: None, + owner_crate: None, + category: RootCategory::Unknown, + }; + }; + + let owner_crate = def_crate_name(cx, def_id); + let def_path = cx.tcx.def_path_str(def_id); + let category = if is_workspace_def(cx, def_id, workspace_crates()) { + RootCategory::Local + } else { + RootCategory::Foreign + }; + + RootInfo { + type_display, + def_path: Some(def_path), + owner_crate: Some(owner_crate), + category, + } +} + +/// Convert a rustc span into a stable, JSON-friendly source location. +fn source_location(cx: &LateContext<'_>, expr: &Expr<'_>) -> SourceLocation { + let position = source_position(cx, expr.span); + SourceLocation { + file: position.file, + line: position.line, + column: position.column, + } +} + +/// Build the final per-target inventory from the complete set of call records. +fn build_inventory(crate_name: String, target_name: String, calls: Vec) -> Inventory { + let types = summarize_types(&calls); + + Inventory { + crate_name, + target_name, + calls, + types, + } +} + +/// Collapse raw call records into one categorized summary per distinct root type. +/// +/// The full `calls` array remains the source of detailed locations. This summary exists for quick +/// auditing by root category, sink path, and call count. +fn summarize_types(calls: &[CallRecord]) -> Vec { + let mut summaries: BTreeMap = BTreeMap::new(); + + for call in calls { + let key = ( + call.category, + call.type_display.clone(), + call.def_path.clone(), + ); + let entry = summaries.entry(key).or_insert_with(|| SummaryEntry { + summary: TypeSummary { + type_display: call.type_display.clone(), + def_path: call.def_path.clone(), + owner_crate: call.owner_crate.clone(), + category: call.category, + sink_paths: Vec::new(), + call_count: 0, + }, + sink_paths: BTreeSet::new(), + }); + entry.summary.call_count += 1; + entry.sink_paths.insert(call.sink_path.clone()); + } + + summaries + .into_values() + .map(|mut entry| { + entry.summary.sink_paths = entry.sink_paths.into_iter().collect(); + entry.summary + }) + .collect() +} + +/// Count summary roots matching a category predicate for the stderr status line. +fn count_types(summaries: &[TypeSummary], predicate: impl Fn(&RootCategory) -> bool) -> usize { + summaries + .iter() + .filter(|summary| predicate(&summary.category)) + .count() +} + +/// Run the Dylint UI fixture and assert that its JSON inventory keeps the intended categories. +#[test] +fn ui() { + let output_dir = std::env::temp_dir().join(format!( + "kms_lints_bc2wrap_type_inventory_ui_{}", + std::process::id() + )); + let _ = std::fs::remove_dir_all(&output_dir); + + // SAFETY: The UI test is single-threaded with respect to this lint invocation, and the + // environment variables are set before the child cargo process is spawned. + unsafe { + std::env::set_var(kms_lints_common::INVENTORY_DIR_ENV, output_dir.as_os_str()); + } + + dylint_testing::ui_test_example(env!("CARGO_PKG_NAME"), "bc2wrap_type_inventory"); + + let json_path = output_dir.join("main.json"); + let json = std::fs::read_to_string(&json_path) + .unwrap_or_else(|error| panic!("expected inventory at {}: {error}", json_path.display())); + let inventory: serde_json::Value = + serde_json::from_str(&json).expect("inventory should be valid JSON"); + + let types = inventory["types"] + .as_array() + .expect("types should be an array"); + assert!( + types.iter().any( + |entry| entry["type_display"].as_str() == Some("LocalPayload") + && entry["category"].as_str() == Some("local") + ), + "LocalPayload should be inventoried as a local root" + ); + + assert!( + types.iter().any( + |entry| entry["type_display"].as_str() == Some("std::string::String") + && entry["category"].as_str() == Some("foreign") + ), + "String should be recorded as a skipped foreign root" + ); + + assert!( + types + .iter() + .any(|entry| entry["type_display"].as_str() == Some("T") + && entry["category"].as_str() == Some("generic") + && sink_paths_contains(entry, "bc2wrap") + && sink_paths_contains(entry, "vault::storage")), + "generic T should be recorded by both a bc2wrap and a versioned-storage sink" + ); + + assert!( + types.iter().any( + |entry| entry["type_display"].as_str() == Some("LocalStoragePayload") + && entry["category"].as_str() == Some("local") + && sink_paths_contains(entry, "vault::storage") + ), + "LocalStoragePayload should be inventoried as a local versioned storage root" + ); +} + +#[cfg(test)] +fn sink_paths_contains(entry: &serde_json::Value, needle: &str) -> bool { + entry["sink_paths"] + .as_array() + .expect("sink_paths should be an array") + .iter() + .any(|path| path.as_str().is_some_and(|p| p.contains(needle))) +} diff --git a/tools/kms-lints/lints/src/lib.rs b/tools/kms-lints/lints/src/lib.rs new file mode 100644 index 0000000000..c52dd92a14 --- /dev/null +++ b/tools/kms-lints/lints/src/lib.rs @@ -0,0 +1,20 @@ +#![feature(rustc_private)] +#![warn(unused_extern_crates)] + +extern crate rustc_hir; +extern crate rustc_lint; +extern crate rustc_middle; +extern crate rustc_session; +extern crate rustc_span; + +mod bc2wrap_type_inventory; + +dylint_linting::dylint_library!(); + +#[allow(clippy::no_mangle_with_rust_abi)] +#[unsafe(no_mangle)] +pub fn register_lints(_sess: &rustc_session::Session, lint_store: &mut rustc_lint::LintStore) { + lint_store.register_lints(&[bc2wrap_type_inventory::BC2WRAP_TYPE_INVENTORY]); + lint_store + .register_late_pass(|_| Box::new(bc2wrap_type_inventory::Bc2wrapTypeInventory::default())); +} diff --git a/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs b/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs new file mode 100644 index 0000000000..135bd2893f --- /dev/null +++ b/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs @@ -0,0 +1,212 @@ +#![allow(dead_code, unused_variables)] + +mod bc2wrap { + pub mod error { + #[derive(Debug)] + pub struct EncodeError; + + #[derive(Debug)] + pub struct DecodeError; + } + + pub fn serialize(_value: &T) -> Result, error::EncodeError> { + Ok(Vec::new()) + } + + pub fn serialize_into( + _value: &T, + _writer: &mut W, + ) -> Result { + Ok(0) + } + + pub fn deserialize_safe(_bytes: &[u8]) -> Result { + panic!("test fixture does not execute") + } + + pub fn deserialize_unsafe(_bytes: &[u8]) -> Result { + panic!("test fixture does not execute") + } +} + +mod vault { + pub mod storage { + #[derive(Clone, Copy)] + pub enum PubDataType { + PublicKey, + } + + #[derive(Clone, Copy)] + pub enum PrivDataType { + FheKeyInfo, + } + + pub struct Storage; + + pub async fn store_versioned_at_request_id( + _storage: &mut S, + _request_id: &u64, + _data: &T, + _data_type: &str, + ) -> Result<(), ()> { + Ok(()) + } + + pub async fn store_versioned_at_request_and_epoch_id( + _storage: &mut S, + _request_id: &u64, + _epoch_id: &u64, + _data: &T, + _data_type: &str, + ) -> Result<(), ()> { + Ok(()) + } + + pub async fn read_versioned_at_request_id( + _storage: &S, + _request_id: &u64, + _data_type: &str, + ) -> Result { + panic!("test fixture does not execute") + } + + pub async fn read_versioned_at_request_and_epoch_id( + _storage: &S, + _request_id: &u64, + _epoch_id: &u64, + _data_type: &str, + ) -> Result { + panic!("test fixture does not execute") + } + + pub mod crypto_material { + pub mod base { + use super::super::{PrivDataType, PubDataType}; + + pub struct StorageRoot; + + impl StorageRoot { + pub async fn write_all( + &self, + _request_id: &u64, + _epoch_id: Option<&u64>, + _pub_data: Option<(&PubData, PubDataType)>, + _priv_data: Option<(&PrivData, PrivDataType)>, + _update_backup: bool, + _op_metric_tag: &'static str, + ) -> Result<(), ()> { + Ok(()) + } + } + } + } + } +} + +struct LocalPayload { + _value: u64, +} + +struct LocalStoragePayload { + _value: u64, +} + +struct LocalPublicMaterial { + _value: u64, +} + +struct LocalPrivateMaterial { + _value: u64, +} + +trait RoundTrip: Sized { + fn decode(bytes: &[u8]) -> Result; +} + +impl RoundTrip for LocalPayload { + fn decode(bytes: &[u8]) -> Result { + bc2wrap::deserialize_safe(bytes) + } +} + +fn generic_sink(value: &T) { + let _ = bc2wrap::serialize(value); +} + +async fn generic_storage_sink(storage: &mut vault::storage::Storage, value: &T) { + let request_id = 7; + let _ = + vault::storage::store_versioned_at_request_id(storage, &request_id, value, "generic").await; +} + +async fn storage_sinks() { + use vault::storage::crypto_material::base::StorageRoot; + use vault::storage::{ + PrivDataType, PubDataType, Storage, read_versioned_at_request_and_epoch_id, + read_versioned_at_request_id, store_versioned_at_request_and_epoch_id, + store_versioned_at_request_id, + }; + + let mut storage = Storage; + let storage_root = StorageRoot; + let request_id = 7; + let epoch_id = 11; + let payload = LocalStoragePayload { _value: 13 }; + let public_material = LocalPublicMaterial { _value: 17 }; + let private_material = LocalPrivateMaterial { _value: 19 }; + + let _ = store_versioned_at_request_id(&mut storage, &request_id, &payload, "payload").await; + let _ = store_versioned_at_request_and_epoch_id( + &mut storage, + &request_id, + &epoch_id, + &payload, + "payload", + ) + .await; + + let _: LocalPayload = read_versioned_at_request_id(&storage, &request_id, "payload") + .await + .unwrap(); + let _: LocalStoragePayload = + read_versioned_at_request_and_epoch_id(&storage, &request_id, &epoch_id, "payload") + .await + .unwrap(); + + let _ = storage_root + .write_all::( + &request_id, + Some(&epoch_id), + Some((&public_material, PubDataType::PublicKey)), + Some((&private_material, PrivDataType::FheKeyInfo)), + false, + "fixture", + ) + .await; + + generic_storage_sink(&mut storage, &payload).await; +} + +fn main() { + let payload = LocalPayload { _value: 7 }; + let _ = bc2wrap::serialize(&payload); + + use bc2wrap::serialize as encode; + let _ = encode(&payload); + + let mut writer: Vec = Vec::new(); + let _ = bc2wrap::serialize_into(&payload, &mut writer); + + let string = String::from("foreign"); + let _ = bc2wrap::serialize(&string); + + let tuple = (1_u64, 2_u64); + let _ = bc2wrap::serialize(&tuple); + + let bytes = []; + let _: LocalPayload = bc2wrap::deserialize_safe(&bytes).unwrap(); + let _: LocalPayload = bc2wrap::deserialize_unsafe(&bytes).unwrap(); + let _ = LocalPayload::decode(&bytes); + + generic_sink(&payload); +} diff --git a/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr b/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr new file mode 100644 index 0000000000..dfc3606f70 --- /dev/null +++ b/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr @@ -0,0 +1,20 @@ +warning: generic or unresolved root type `T` used by `bc2wrap::serialize` + --> $DIR/main.rs:133:13 + | +LL | let _ = bc2wrap::serialize(value); + | ^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: The concrete type cannot be inventoried from this generic sink + = note: `#[warn(bc2wrap_type_inventory)]` on by default + +warning: generic or unresolved root type `T` used by `vault::storage::store_versioned_at_request_id` + --> $DIR/main.rs:139:9 + | +LL | vault::storage::store_versioned_at_request_id(storage, &request_id, value, "generic").await; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: The concrete type cannot be inventoried from this generic sink + +bc2wrap_type_inventory: wrote 16 calls, 4 local types, 1 generic sinks, 2 skipped roots for main +warning: 2 warnings emitted + diff --git a/tools/kms-lints/rust-toolchain b/tools/kms-lints/rust-toolchain new file mode 100644 index 0000000000..1ab63d7a70 --- /dev/null +++ b/tools/kms-lints/rust-toolchain @@ -0,0 +1,4 @@ +[toolchain] +channel = "nightly-2026-01-22" +components = ["llvm-tools-preview", "rustc-dev"] +profile = "default" From 86f5d7e6f8903650e8cb6719a654931ffef7e5d1 Mon Sep 17 00:00:00 2001 From: Kelong Cong Date: Mon, 18 May 2026 16:20:15 +0200 Subject: [PATCH 6/7] chore: post-monomorphization analysis --- tools/kms-lints/Cargo.toml | 1 - tools/kms-lints/README.md | 14 +- tools/kms-lints/common/src/lib.rs | 2 +- tools/kms-lints/lints/Cargo.toml | 3 +- .../lints/src/bc2wrap_type_inventory.rs | 542 ++++++++++++------ tools/kms-lints/lints/src/lib.rs | 11 +- .../tests/bc2wrap_type_inventory/main.rs | 210 ++++--- .../tests/bc2wrap_type_inventory/main.stderr | 21 +- tools/kms-lints/rust-toolchain | 2 +- 9 files changed, 496 insertions(+), 310 deletions(-) diff --git a/tools/kms-lints/Cargo.toml b/tools/kms-lints/Cargo.toml index dfd3027cf2..6a2c46dea2 100644 --- a/tools/kms-lints/Cargo.toml +++ b/tools/kms-lints/Cargo.toml @@ -3,6 +3,5 @@ members = ["common", "lints"] resolver = "3" [workspace.dependencies] -dylint_linting = "5.0.0" # Dylint helper crate; required for custom lint libraries and follows the existing tfhe-lints convention. serde = { version = "1.0", features = ["derive"] } # Serialization for generated JSON inventory; maintained by dtolnay and already used across KMS. serde_json = "1.0" # JSON output for generated inventories; maintained by dtolnay and already used across KMS. diff --git a/tools/kms-lints/README.md b/tools/kms-lints/README.md index d77fc9b27d..944b54e80a 100644 --- a/tools/kms-lints/README.md +++ b/tools/kms-lints/README.md @@ -19,17 +19,16 @@ examples. ### `bc2wrap_type_inventory` Collects the root workspace-local types used at curated serialization sink call -sites, including: +sites in codegen-reachable monomorphized MIR, including: - `bc2wrap::serialize` - `bc2wrap::serialize_into` - `bc2wrap::deserialize_safe` - `bc2wrap::deserialize_unsafe` -- `store_versioned_at_request_id` -- `store_versioned_at_request_and_epoch_id` -- `read_versioned_at_request_id` -- `read_versioned_at_request_and_epoch_id` -- the `write_all` storage wrapper method +- `Storage::store_data` +- `StorageExt::store_data_at_epoch` +- `StorageReader::read_data` +- `StorageReaderExt::read_data_at_epoch` The lint writes one JSON file per checked crate target under `target/kms-lints/bc2wrap-type-inventory` by default. Set @@ -40,3 +39,6 @@ Each file contains full `calls` plus a `types` summary categorized as `local`, `foreign`, `generic`, `compound`, `primitive`, or `unknown` for auditability. Call records include `sink_path` (the rustc def path of the matched sink), and type summaries aggregate every distinct sink path each root flowed through. +Storage wrapper functions are intentionally not listed as sinks; their concrete +types are recovered from the canonical storage trait calls reached after +monomorphization. diff --git a/tools/kms-lints/common/src/lib.rs b/tools/kms-lints/common/src/lib.rs index a3f9e6146c..82a9e08a94 100644 --- a/tools/kms-lints/common/src/lib.rs +++ b/tools/kms-lints/common/src/lib.rs @@ -50,7 +50,7 @@ pub fn source_position(cx: &LateContext<'_>, span: Span) -> SourcePosition { pub fn get_def_id_from_ty(ty: Ty<'_>) -> Option { match ty.kind() { TyKind::Adt(adt_def, _) => Some(adt_def.did()), - TyKind::Alias(_, alias_ty) => Some(alias_ty.def_id), + TyKind::Alias(alias_ty) => Some(alias_ty.kind.def_id()), TyKind::Dynamic(predicates, ..) => predicates.principal_def_id(), TyKind::FnDef(def_id, _) | TyKind::Foreign(def_id) diff --git a/tools/kms-lints/lints/Cargo.toml b/tools/kms-lints/lints/Cargo.toml index 73c660600d..5723dad9e1 100644 --- a/tools/kms-lints/lints/Cargo.toml +++ b/tools/kms-lints/lints/Cargo.toml @@ -10,13 +10,12 @@ autotests = false crate-type = ["cdylib"] [dependencies] -dylint_linting = { workspace = true } kms-lints-common = { path = "../common" } serde = { workspace = true } serde_json = { workspace = true } [dev-dependencies] -dylint_testing = "5.0.0" # Dylint UI test helper matching dylint_linting; test-only lint tooling. +dylint_testing = "6.0.0" # Dylint UI test helper; test-only lint tooling. serde_json = { workspace = true } [package.metadata.rust-analyzer] diff --git a/tools/kms-lints/lints/src/bc2wrap_type_inventory.rs b/tools/kms-lints/lints/src/bc2wrap_type_inventory.rs index 4e9b5c7f92..30842bae2e 100644 --- a/tools/kms-lints/lints/src/bc2wrap_type_inventory.rs +++ b/tools/kms-lints/lints/src/bc2wrap_type_inventory.rs @@ -11,22 +11,23 @@ //! reads that report to check that every persisted type is covered by //! backward-compatibility tests. //! -//! Why a compiler plugin? Because we need **post-typecheck** information. -//! Plain text-search can't tell you that a generic helper is monomorphized to -//! `MyKey` at one call site and `Vec` at another. Hooking into rustc as a -//! `LateLintPass` gives us fully resolved types after type inference, for free. +//! Why a compiler plugin? Because we need **post-monomorphization** information. +//! Plain text-search and HIR linting can't tell you that a generic helper is +//! monomorphized to `MyKey` at one call site and `Vec` at another. Hooking +//! into rustc as a `LateLintPass` lets us ask rustc for the concrete instances +//! it will codegen, then inspect those instances' MIR. //! //! ## Vocabulary //! //! - **Sink** — a function whose call sites we care about. Sinks are listed in //! `SINK_SPECS`. The current set covers the raw encode / decode helpers in -//! the `bc2wrap` crate and the vault read / write helpers in this workspace. +//! the `bc2wrap` crate and the canonical vault storage reader / writer trait +//! methods in this workspace. //! - **Root type** — the top of the type tree handed to a sink. Fields are //! *not* recursed into: if a sink receives `&MyStruct { … }`, the root is //! `MyStruct`, full stop. -//! - **Extractor** — a `RootExtractor` tells the lint *where* in a call to -//! look for the root. A sink may declare more than one (e.g. `write_all` -//! records both its public-data and private-data roots). +//! - **Extractor** — a `RootExtractor` tells the lint *where* in a +//! monomorphized call to look for the root. //! //! ## Categories //! @@ -35,29 +36,31 @@ //! - `Local` — defined in a workspace crate. **These are the rows that matter //! for backward compatibility.** //! - `Foreign` — defined in an external dependency. Recorded for visibility. -//! - `Generic` / `Unknown` — the concrete type isn't resolvable at this call -//! site (e.g. a generic helper that hasn't been specialized yet). Every -//! such occurrence emits a compile-time warning so the author can refactor -//! the call to expose the concrete type. +//! - `Generic` / `Unknown` — the concrete type still isn't resolvable after +//! monomorphization. Every such occurrence emits a compile-time warning so +//! the author can refactor the call to expose the concrete type. //! - `Compound` / `Primitive` — tuples, arrays, ints, etc. Recorded but //! normally filtered out downstream. //! //! ## Pipeline (per crate) //! -//! 1. `check_expr` runs on every expression in the crate. `sink_for_expr` -//! tries to resolve it to a `(MatchedSink, args)` pair by matching the -//! callee's `DefId` against `SINK_SPECS`. If nothing matches, we return. -//! 2. For each `RootExtractor` on the matched spec, `extract_root_type` -//! returns one of: +//! 1. `check_crate_post` seeds a local worklist with codegen-reachable +//! non-generic MIR roots, then follows local monomorphized calls and +//! closure / coroutine bodies. +//! 2. The lint deduplicates local function instances and scans each instance's +//! MIR call terminators. For every call, it substitutes the caller +//! instance's concrete args into the callee and argument types. +//! 3. `sink_for_mono_call` canonicalizes impl-method calls back to their trait +//! item when applicable, then matches the callee against `SINK_SPECS`. +//! 4. For each `RootExtractor` on the matched spec, `extract_mono_root_type` +//! returns either: //! - `Root(ty)` — the concrete root. We classify it via `classify_root` //! and push a `CallRecord`. -//! - `NoRoot` — the position is legitimately empty (e.g. a `None` -//! argument). Skipped silently. -//! - `Unresolved` — we expected a root and couldn't determine one. We -//! push a synthetic `` record and emit a warning. -//! 3. Once the whole crate has been linted, `check_crate_post` writes the -//! JSON file (one per crate target). The file contains the full `calls` -//! array plus a deduped `types` summary built by `summarize_types`. +//! - `Unresolved` — we expected a root and couldn't determine one. We push +//! a synthetic `` record and emit a warning. +//! 5. `check_crate_post` writes the JSON file (one per crate target). The file +//! contains the full `calls` array plus a deduped `types` summary built by +//! `summarize_types`. //! //! ### Running example //! @@ -68,37 +71,35 @@ //! let _ = bc2wrap::serialize(&payload); //! ``` //! -//! 1. rustc visits the `Call` expression and invokes `check_expr`. -//! 2. `sink_for_expr` resolves the callee's `DefId` to `bc2wrap::serialize` -//! and looks it up in `SINK_SPECS`, finding the entry whose extractors -//! are `&[RootExtractor::Arg(0)]`. -//! 3. `extract_root_type` runs `Arg(0)`: it reads arg 0's type -//! (`&LocalPayload`), peels the reference, and returns +//! 1. rustc monomorphizes the caller containing the call. +//! 2. `collect_mono_calls` scans that caller's MIR and sees a call terminator +//! whose callee resolves to `bc2wrap::serialize`. +//! 3. `extract_mono_root_type` runs `Arg(0)`: it reads arg 0's monomorphized +//! type (`&LocalPayload`), peels the reference, and returns //! `Root(LocalPayload)`. //! 4. `classify_root` sees an ADT defined in a workspace crate and returns //! `RootCategory::Local`. //! 5. `record` pushes a `CallRecord` with `function = "serialize"`, //! `sink_path = "bc2wrap::serialize"`, `type_display = "LocalPayload"`, -//! `category = Local`, and the source file / line / column of the call. -//! 6. After every other expression in the crate has been visited, -//! `check_crate_post` collapses duplicate calls into the `types` summary -//! and writes the JSON file. +//! `category = Local`, and the source file / line / column of the matched +//! lower-level sink call. +//! 6. After every mono item has been scanned, `check_crate_post` collapses +//! duplicate calls into the `types` summary and writes the JSON file. //! //! Replace `&payload` with a generic `value: &T` inside `fn f(value: &T)` -//! and only step 4 changes: `classify_root` sees `ty.has_param()`, returns -//! `RootCategory::Generic`, and `record` additionally emits a compile-time -//! warning so the author can refactor the call site to expose a concrete -//! type. The call is still recorded, just under the generic bucket. +//! and call `f(&payload)`: the monomorphized MIR for `f::` +//! still records `LocalPayload`, not `T`. A `Generic` or `Unknown` warning is +//! emitted only if the root remains unresolved after monomorphization. //! //! ## How to add a new sink //! //! 1. Append an entry to `SINK_SPECS`: the function name, a `path_contains` -//! substring that disambiguates same-named functions, and one or more -//! extractors. The sink only matches functions defined in a workspace +//! substring that disambiguates same-named functions or traits, and one or +//! more extractors. The sink only matches functions defined in a workspace //! crate (via `is_workspace_def`) — there is no separate "owner" knob. //! 2. If none of the existing `RootExtractor` variants captures where the //! root lives in your signature, add a variant and handle it in -//! `extract_root_type`. +//! `extract_mono_root_type`. //! 3. Extend the UI fixture in `tests/bc2wrap_type_inventory/main.rs` and add //! an assertion in the `ui` test at the bottom of this file so the new //! sink is exercised by `cargo test`. @@ -108,17 +109,22 @@ //! Set `KMS_BC2WRAP_INVENTORY_DIR` to override the output directory. The //! workspace crate set is always auto-detected via `cargo metadata`. -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::{BTreeMap, BTreeSet, HashSet, VecDeque}; use kms_lints_common::{ def_crate_name, get_def_id_from_ty, inventory_dir, is_workspace_def, peel_references, source_position, workspace_crates, }; -use rustc_hir::{Expr, ExprKind}; use rustc_lint::{LateContext, LateLintPass, LintContext}; -use rustc_middle::ty::{GenericArgsRef, Ty, TyKind, TypeVisitableExt}; +use rustc_middle::mir::{ + AggregateKind, Body, Operand, Rvalue, StatementKind, Terminator, TerminatorKind, +}; +use rustc_middle::ty::{ + self, EarlyBinder, GenericArgsRef, Instance, Ty, TyKind, TypeVisitableExt, Unnormalized, +}; use rustc_session::{declare_lint, impl_lint_pass}; use rustc_span::def_id::{DefId, LOCAL_CRATE}; +use rustc_span::{Span, Spanned}; use serde::Serialize; const SINK_SPECS: &[SinkSpec] = &[ @@ -135,40 +141,32 @@ const SINK_SPECS: &[SinkSpec] = &[ SinkSpec { function: "deserialize_safe", path_contains: Some("bc2wrap"), - extractors: &[RootExtractor::ReturnResultOk], + extractors: &[RootExtractor::LastFnGenericArg], }, SinkSpec { function: "deserialize_unsafe", path_contains: Some("bc2wrap"), - extractors: &[RootExtractor::ReturnResultOk], - }, - SinkSpec { - function: "store_versioned_at_request_id", - path_contains: Some("vault::storage"), - extractors: &[RootExtractor::Arg(2)], + extractors: &[RootExtractor::LastFnGenericArg], }, SinkSpec { - function: "store_versioned_at_request_and_epoch_id", - path_contains: Some("vault::storage"), - extractors: &[RootExtractor::Arg(3)], + function: "store_data", + path_contains: Some("vault::storage::Storage"), + extractors: &[RootExtractor::Arg(1)], }, SinkSpec { - function: "read_versioned_at_request_id", - path_contains: Some("vault::storage"), - extractors: &[RootExtractor::FnGenericArg(1)], + function: "store_data_at_epoch", + path_contains: Some("vault::storage::StorageExt"), + extractors: &[RootExtractor::Arg(1)], }, SinkSpec { - function: "read_versioned_at_request_and_epoch_id", - path_contains: Some("vault::storage"), - extractors: &[RootExtractor::FnGenericArg(1)], + function: "read_data", + path_contains: Some("vault::storage::StorageReader"), + extractors: &[RootExtractor::LastFnGenericArg], }, SinkSpec { - function: "write_all", - path_contains: Some("vault::storage::crypto_material::base"), - extractors: &[ - RootExtractor::OptionTupleRefFirst(2), - RootExtractor::OptionTupleRefFirst(3), - ], + function: "read_data_at_epoch", + path_contains: Some("vault::storage::StorageReaderExt"), + extractors: &[RootExtractor::LastFnGenericArg], }, ]; @@ -242,25 +240,50 @@ struct SinkSpec { extractors: &'static [RootExtractor], } +/// Where in a monomorphized sink call the root type lives. +/// +/// The convention for `LastFnGenericArg` is that the inventoried type is always the *last* type +/// generic of the callee. For trait methods this works because the implicit `Self` precedes the +/// method's own type params, so `T` in `read_data(&self, ...)` is the last entry in the +/// monomorphized `GenericArgsRef`. For free functions with a single type param (`deserialize_safe`) +/// "last" is equivalently "only". #[derive(Clone, Copy, Debug)] enum RootExtractor { + /// Read the type of the call's positional argument at `index` (after monomorphization and + /// reference peeling). Arg 0 is the receiver for trait methods. Arg(usize), - OptionTupleRefFirst(usize), - ReturnResultOk, - FnGenericArg(usize), + /// Read the last type generic of the callee. Used for sinks that take the root by `T` in the + /// signature (e.g. `deserialize_safe(bytes: &[u8]) -> Result`, + /// `StorageReader::read_data`). + LastFnGenericArg, } #[derive(Clone, Debug)] struct MatchedSink<'tcx> { spec: &'static SinkSpec, sink_path: String, - generic_args: Option>, + generic_args: GenericArgsRef<'tcx>, +} + +#[derive(Clone, Copy, Debug)] +struct MonoCallee<'tcx> { + canonical_def_id: DefId, + generic_args: GenericArgsRef<'tcx>, + instance: Option>, +} + +#[derive(Clone, Copy, Debug)] +struct MonoCall<'a, 'tcx> { + caller: Instance<'tcx>, + body: &'a Body<'tcx>, + func: &'a Operand<'tcx>, + args: &'a [Spanned>], + span: Span, } #[derive(Clone, Copy, Debug)] enum ExtractedRoot<'tcx> { Root(Ty<'tcx>), - NoRoot, Unresolved, } @@ -285,28 +308,6 @@ declare_lint! { impl_lint_pass!(Bc2wrapTypeInventory => [BC2WRAP_TYPE_INVENTORY]); impl<'tcx> LateLintPass<'tcx> for Bc2wrapTypeInventory { - // Record each resolved sink call as it is visited. The final JSON is emitted once the crate - // has been fully checked so the summaries can be built from all call records. - fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'_>) { - let Some((sink, args)) = sink_for_expr(cx, expr) else { - return; - }; - - for extractor in sink.spec.extractors { - let root_info = match extract_root_type(*extractor, cx, expr, args, sink.generic_args) { - ExtractedRoot::Root(root_ty) => classify_root(cx, root_ty), - ExtractedRoot::NoRoot => continue, - ExtractedRoot::Unresolved => RootInfo { - type_display: "".to_string(), - def_path: None, - owner_crate: None, - category: RootCategory::Unknown, - }, - }; - self.record(cx, expr, &sink, root_info); - } - } - // Write one inventory file per crate target. Dylint also checks build scripts, but those do // not represent production bc2wrap call sites for this inventory. fn check_crate_post(&mut self, cx: &LateContext<'tcx>) { @@ -316,6 +317,7 @@ impl<'tcx> LateLintPass<'tcx> for Bc2wrapTypeInventory { } let target_name = crate_name.clone(); + self.collect_mono_calls(cx); let calls = std::mem::take(&mut self.calls); let inventory = build_inventory(crate_name.clone(), target_name, calls); @@ -374,10 +376,98 @@ impl<'tcx> LateLintPass<'tcx> for Bc2wrapTypeInventory { } impl Bc2wrapTypeInventory { + fn collect_mono_calls<'tcx>(&mut self, cx: &LateContext<'tcx>) { + let tcx = cx.tcx; + let mut pending = VecDeque::new(); + let mut seen_instances = HashSet::new(); + + for &def_id in tcx.mir_keys(()).iter() { + if tcx.generics_of(def_id).requires_monomorphization(tcx) { + continue; + } + let def_id = def_id.to_def_id(); + let is_root = tcx.entry_fn(()).is_some_and(|(entry, _)| entry == def_id) + || tcx.is_reachable_non_generic(def_id); + if is_root { + pending.push_back(Instance::mono(tcx, def_id)); + } + } + + while let Some(instance) = pending.pop_front() { + if !seen_instances.insert(instance) { + continue; + } + + let body = tcx.instance_mir(instance.def); + for basic_block in body.basic_blocks.iter() { + for statement in &basic_block.statements { + if let StatementKind::Assign(assign) = &statement.kind { + let (_, rvalue) = &**assign; + enqueue_local_aggregate_instance(cx, instance, rvalue, &mut pending); + } + } + + if let Some((func, args, fn_span)) = + call_terminator_fields(basic_block.terminator()) + { + self.record_mono_call( + cx, + MonoCall { + caller: instance, + body, + func, + args, + span: fn_span, + }, + &mut pending, + ); + } + } + } + } + + fn record_mono_call<'tcx>( + &mut self, + cx: &LateContext<'tcx>, + call: MonoCall<'_, 'tcx>, + pending: &mut VecDeque>, + ) { + let Some(callee) = resolve_mono_callee(cx, call.caller, call.body, call.func) else { + return; + }; + if let Some(instance) = callee.instance { + enqueue_local_instance(cx, instance, pending); + } + + let Some(sink) = sink_for_mono_callee(cx, callee) else { + return; + }; + + for extractor in sink.spec.extractors { + let root_info = match extract_mono_root_type( + *extractor, + cx, + call.caller, + call.body, + call.args, + &sink, + ) { + ExtractedRoot::Root(root_ty) => classify_root(cx, root_ty), + ExtractedRoot::Unresolved => RootInfo { + type_display: "".to_string(), + def_path: None, + owner_crate: None, + category: RootCategory::Unknown, + }, + }; + self.record(cx, call.span, &sink, root_info); + } + } + fn record<'tcx>( &mut self, cx: &LateContext<'tcx>, - expr: &'tcx Expr<'_>, + span: Span, sink: &MatchedSink<'tcx>, root_info: RootInfo, ) { @@ -385,13 +475,17 @@ impl Bc2wrapTypeInventory { root_info.category, RootCategory::Generic | RootCategory::Unknown ) { - cx.span_lint(BC2WRAP_TYPE_INVENTORY, expr.span, |diag| { - diag.primary_message(format!( - "generic or unresolved root type `{}` used by `{}`", - root_info.type_display, sink.sink_path, - )); - diag.note("The concrete type cannot be inventoried from this generic sink"); - }); + cx.emit_span_lint( + BC2WRAP_TYPE_INVENTORY, + span, + rustc_errors::DiagDecorator(|diag| { + diag.primary_message(format!( + "generic or unresolved root type `{}` used by `{}`", + root_info.type_display, sink.sink_path, + )); + diag.note("The concrete type could not be inventoried after monomorphization"); + }), + ); } self.calls.push(CallRecord { @@ -401,41 +495,63 @@ impl Bc2wrapTypeInventory { def_path: root_info.def_path, owner_crate: root_info.owner_crate, category: root_info.category, - source: source_location(cx, expr), + source: source_location(cx, span), }); } } -/// Resolve a call or method-call expression to one of the configured serialization sinks. -fn sink_for_expr<'tcx>( - cx: &LateContext<'tcx>, - expr: &'tcx Expr<'tcx>, -) -> Option<(MatchedSink<'tcx>, &'tcx [Expr<'tcx>])> { - match expr.kind { - ExprKind::Call(callee, args) => { - let (def_id, generic_args) = resolved_call_def(cx, callee)?; - let spec = sink_spec_for_def(cx, def_id)?; - Some((MatchedSink::new(cx, spec, def_id, Some(generic_args)), args)) - } - ExprKind::MethodCall(_, _, args, _) => { - let def_id = cx.typeck_results().type_dependent_def_id(expr.hir_id)?; - let spec = sink_spec_for_def(cx, def_id)?; - Some((MatchedSink::new(cx, spec, def_id, None), args)) - } - _ => None, - } -} - -/// Resolve the `DefId` and inferred generic arguments of a function call. -fn resolved_call_def<'tcx>( +/// Resolve a monomorphized MIR call to its concrete instance when rustc can do so. +fn resolve_mono_callee<'tcx>( cx: &LateContext<'tcx>, - callee: &'tcx Expr<'_>, -) -> Option<(DefId, GenericArgsRef<'tcx>)> { - let callee_ty = cx.typeck_results().expr_ty(callee); + caller: Instance<'tcx>, + body: &Body<'tcx>, + func: &Operand<'tcx>, +) -> Option> { + let callee_ty = monomorphize_ty(cx, caller, func.ty(body, cx.tcx)); let TyKind::FnDef(def_id, generic_args) = callee_ty.kind() else { return None; }; - Some((*def_id, generic_args)) + + let instance = Instance::try_resolve( + cx.tcx, + ty::TypingEnv::fully_monomorphized(), + *def_id, + generic_args, + ) + .ok() + .flatten(); + let resolved_def_id = instance + .map(|instance| instance.def_id()) + .unwrap_or(*def_id); + // Canonicalize impl-method calls back to the trait item so the sink lookup matches the trait + // path (e.g. `::store_data` → `Storage::store_data`). We try the + // resolved (impl) DefId first, then fall back to the unresolved one — the latter fires when + // `Instance::try_resolve` failed and the original call is already against a trait item. + let canonical_def_id = cx + .tcx + .trait_item_of(resolved_def_id) + .or_else(|| cx.tcx.trait_item_of(*def_id)) + .unwrap_or(*def_id); + + Some(MonoCallee { + canonical_def_id, + generic_args, + instance, + }) +} + +/// Match a resolved monomorphized MIR call against the configured serialization sinks. +fn sink_for_mono_callee<'tcx>( + cx: &LateContext<'tcx>, + callee: MonoCallee<'tcx>, +) -> Option> { + let spec = sink_spec_for_def(cx, callee.canonical_def_id)?; + Some(MatchedSink::new( + cx, + spec, + callee.canonical_def_id, + callee.generic_args, + )) } /// Find the sink spec for a resolved function or method definition. @@ -469,7 +585,7 @@ impl<'tcx> MatchedSink<'tcx> { cx: &LateContext<'tcx>, spec: &'static SinkSpec, def_id: DefId, - generic_args: Option>, + generic_args: GenericArgsRef<'tcx>, ) -> Self { Self { spec, @@ -480,75 +596,108 @@ impl<'tcx> MatchedSink<'tcx> { } /// Extract the root type that a configured sink serializes or deserializes at a call site. -fn extract_root_type<'tcx>( +fn extract_mono_root_type<'tcx>( extractor: RootExtractor, cx: &LateContext<'tcx>, - expr: &'tcx Expr<'_>, - args: &'tcx [Expr<'tcx>], - generic_args: Option>, + caller: Instance<'tcx>, + body: &Body<'tcx>, + args: &[Spanned>], + sink: &MatchedSink<'tcx>, ) -> ExtractedRoot<'tcx> { match extractor { - RootExtractor::Arg(index) => extract_arg(cx, args, index), - RootExtractor::OptionTupleRefFirst(index) => option_tuple_ref_first(cx, args, index), - RootExtractor::ReturnResultOk => decode_ok_type(cx.typeck_results().expr_ty(expr)) - .map(ExtractedRoot::Root) - .unwrap_or(ExtractedRoot::Unresolved), - RootExtractor::FnGenericArg(index) => generic_args - .and_then(|args| args.types().nth(index)) + RootExtractor::Arg(index) => extract_mono_arg(cx, caller, body, args, index), + RootExtractor::LastFnGenericArg => sink + .generic_args + .types() + .last() .map(ExtractedRoot::Root) .unwrap_or(ExtractedRoot::Unresolved), } } -fn extract_arg<'tcx>( +fn extract_mono_arg<'tcx>( cx: &LateContext<'tcx>, - args: &'tcx [Expr<'tcx>], + caller: Instance<'tcx>, + body: &Body<'tcx>, + args: &[Spanned>], index: usize, ) -> ExtractedRoot<'tcx> { args.get(index) - .map(|arg| ExtractedRoot::Root(peel_references(cx.typeck_results().expr_ty(arg)))) + .map(|arg| { + let ty = arg.node.ty(body, cx.tcx); + let ty = monomorphize_ty(cx, caller, ty); + ExtractedRoot::Root(peel_references(ty)) + }) .unwrap_or(ExtractedRoot::Unresolved) } -fn option_tuple_ref_first<'tcx>( - cx: &LateContext<'tcx>, - args: &'tcx [Expr<'tcx>], - index: usize, -) -> ExtractedRoot<'tcx> { - let Some(arg) = args.get(index) else { - return ExtractedRoot::Unresolved; - }; +/// Extract `(func, args, fn_span)` from any call-shaped terminator. Returns `None` for any other +/// terminator kind. +fn call_terminator_fields<'a, 'tcx>( + terminator: &'a Terminator<'tcx>, +) -> Option<(&'a Operand<'tcx>, &'a [Spanned>], Span)> { + match &terminator.kind { + TerminatorKind::Call { + func, + args, + fn_span, + .. + } + | TerminatorKind::TailCall { + func, + args, + fn_span, + } => Some((func, &**args, *fn_span)), + _ => None, + } +} - let ExprKind::Call(_, option_args) = arg.kind else { - return ExtractedRoot::NoRoot; - }; - let Some(tuple_expr) = option_args.first() else { - return ExtractedRoot::NoRoot; - }; - let ExprKind::Tup(tuple_fields) = tuple_expr.kind else { - return ExtractedRoot::Unresolved; +fn monomorphize_ty<'tcx>(cx: &LateContext<'tcx>, caller: Instance<'tcx>, ty: Ty<'tcx>) -> Ty<'tcx> { + caller.instantiate_mir_and_normalize_erasing_regions( + cx.tcx, + ty::TypingEnv::fully_monomorphized(), + EarlyBinder::bind(ty), + ) +} + +fn enqueue_local_aggregate_instance<'tcx>( + cx: &LateContext<'tcx>, + caller: Instance<'tcx>, + rvalue: &Rvalue<'tcx>, + pending: &mut VecDeque>, +) { + let Rvalue::Aggregate(kind, _) = rvalue else { + return; }; - let Some(first_field) = tuple_fields.first() else { - return ExtractedRoot::Unresolved; + let (AggregateKind::Closure(def_id, args) + | AggregateKind::Coroutine(def_id, args) + | AggregateKind::CoroutineClosure(def_id, args)) = &**kind + else { + return; }; - ExtractedRoot::Root(peel_references(cx.typeck_results().expr_ty(first_field))) + let args = caller.instantiate_mir_and_normalize_erasing_regions( + cx.tcx, + ty::TypingEnv::fully_monomorphized(), + EarlyBinder::bind(*args), + ); + let instance = Instance::new_raw(*def_id, args); + if let Ok(instance) = cx.tcx.try_normalize_erasing_regions( + ty::TypingEnv::fully_monomorphized(), + Unnormalized::new_wip(instance), + ) { + enqueue_local_instance(cx, instance, pending); + } } -/// Return the first type argument of an enum return type, which is `T` for `Result`. -/// -/// This intentionally stays structural instead of checking for the exact `Result` definition: -/// bc2wrap decode helpers already identify the function, and the first enum type argument is the -/// value type we need from their return signature. -fn decode_ok_type<'tcx>(ty: Ty<'tcx>) -> Option> { - let TyKind::Adt(adt_def, args) = ty.kind() else { - return None; - }; - - if !adt_def.is_enum() { - return None; +fn enqueue_local_instance<'tcx>( + cx: &LateContext<'tcx>, + instance: Instance<'tcx>, + pending: &mut VecDeque>, +) { + if instance.def_id().is_local() && cx.tcx.is_mir_available(instance.def_id()) { + pending.push_back(instance); } - args.types().next() } /// Classify a root type into the inventory categories used by the JSON output. @@ -626,8 +775,8 @@ fn classify_named_root(cx: &LateContext<'_>, ty: Ty<'_>) -> RootInfo { } /// Convert a rustc span into a stable, JSON-friendly source location. -fn source_location(cx: &LateContext<'_>, expr: &Expr<'_>) -> SourceLocation { - let position = source_position(cx, expr.span); +fn source_location(cx: &LateContext<'_>, span: Span) -> SourceLocation { + let position = source_position(cx, span); SourceLocation { file: position.file, line: position.line, @@ -737,20 +886,45 @@ fn ui() { assert!( types .iter() - .any(|entry| entry["type_display"].as_str() == Some("T") - && entry["category"].as_str() == Some("generic") - && sink_paths_contains(entry, "bc2wrap") - && sink_paths_contains(entry, "vault::storage")), - "generic T should be recorded by both a bc2wrap and a versioned-storage sink" + .all(|entry| entry["category"].as_str() != Some("generic")), + "generic roots should be resolved by post-monomorphization analysis" ); assert!( types.iter().any( |entry| entry["type_display"].as_str() == Some("LocalStoragePayload") && entry["category"].as_str() == Some("local") - && sink_paths_contains(entry, "vault::storage") + && sink_paths_contains(entry, "vault::storage::Storage::store_data") + && sink_paths_contains(entry, "vault::storage::StorageExt::store_data_at_epoch") + && sink_paths_contains( + entry, + "vault::storage::StorageReaderExt::read_data_at_epoch" + ) ), - "LocalStoragePayload should be inventoried as a local versioned storage root" + "LocalStoragePayload should be inventoried through canonical storage trait sinks" + ); + + assert!( + types.iter().any( + |entry| entry["type_display"].as_str() == Some("LocalPayload") + && entry["category"].as_str() == Some("local") + && sink_paths_contains(entry, "bc2wrap::serialize") + && sink_paths_contains(entry, "vault::storage::StorageReader::read_data") + ), + "LocalPayload should include concrete generic bc2wrap and storage-reader roots" + ); + + let calls = inventory["calls"] + .as_array() + .expect("calls should be an array"); + assert!( + calls.iter().all(|entry| { + let sink_path = entry["sink_path"].as_str().unwrap_or_default(); + !sink_path.contains("store_versioned") + && !sink_path.contains("read_versioned") + && !sink_path.contains("write_all") + }), + "wrapper functions should not be matched as storage sinks" ); } diff --git a/tools/kms-lints/lints/src/lib.rs b/tools/kms-lints/lints/src/lib.rs index c52dd92a14..5981f2dc05 100644 --- a/tools/kms-lints/lints/src/lib.rs +++ b/tools/kms-lints/lints/src/lib.rs @@ -1,7 +1,7 @@ #![feature(rustc_private)] #![warn(unused_extern_crates)] -extern crate rustc_hir; +extern crate rustc_errors; extern crate rustc_lint; extern crate rustc_middle; extern crate rustc_session; @@ -9,7 +9,14 @@ extern crate rustc_span; mod bc2wrap_type_inventory; -dylint_linting::dylint_library!(); +#[allow(unused_extern_crates)] +extern crate rustc_driver; + +#[doc(hidden)] +#[unsafe(no_mangle)] +pub extern "C" fn dylint_version() -> *mut std::os::raw::c_char { + std::ffi::CString::from(c"0.1.0").into_raw() +} #[allow(clippy::no_mangle_with_rust_abi)] #[unsafe(no_mangle)] diff --git a/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs b/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs index 135bd2893f..1c15263f11 100644 --- a/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs +++ b/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs @@ -31,75 +31,127 @@ mod bc2wrap { mod vault { pub mod storage { - #[derive(Clone, Copy)] - pub enum PubDataType { - PublicKey, + pub trait Storage { + fn store_data( + &mut self, + _data: &T, + _request_id: &u64, + _data_type: &str, + ) -> Result<(), ()>; } - #[derive(Clone, Copy)] - pub enum PrivDataType { - FheKeyInfo, + pub trait StorageExt: Storage { + fn store_data_at_epoch( + &mut self, + _data: &T, + _request_id: &u64, + _epoch_id: &u64, + _data_type: &str, + ) -> Result<(), ()>; } - pub struct Storage; + pub trait StorageReader { + fn read_data(&self, _request_id: &u64, _data_type: &str) -> Result; + } - pub async fn store_versioned_at_request_id( - _storage: &mut S, - _request_id: &u64, - _data: &T, - _data_type: &str, - ) -> Result<(), ()> { - Ok(()) + pub trait StorageReaderExt: StorageReader { + fn read_data_at_epoch( + &self, + _request_id: &u64, + _epoch_id: &u64, + _data_type: &str, + ) -> Result; } - pub async fn store_versioned_at_request_and_epoch_id( - _storage: &mut S, - _request_id: &u64, - _epoch_id: &u64, - _data: &T, - _data_type: &str, - ) -> Result<(), ()> { - Ok(()) + pub struct MemoryStorage; + + impl Storage for MemoryStorage { + fn store_data( + &mut self, + _data: &T, + _request_id: &u64, + _data_type: &str, + ) -> Result<(), ()> { + Ok(()) + } } - pub async fn read_versioned_at_request_id( - _storage: &S, - _request_id: &u64, - _data_type: &str, - ) -> Result { - panic!("test fixture does not execute") + impl StorageExt for MemoryStorage { + fn store_data_at_epoch( + &mut self, + _data: &T, + _request_id: &u64, + _epoch_id: &u64, + _data_type: &str, + ) -> Result<(), ()> { + Ok(()) + } } - pub async fn read_versioned_at_request_and_epoch_id( - _storage: &S, - _request_id: &u64, - _epoch_id: &u64, - _data_type: &str, - ) -> Result { - panic!("test fixture does not execute") + impl StorageReader for MemoryStorage { + fn read_data(&self, _request_id: &u64, _data_type: &str) -> Result { + panic!("test fixture does not execute") + } } - pub mod crypto_material { - pub mod base { - use super::super::{PrivDataType, PubDataType}; - - pub struct StorageRoot; - - impl StorageRoot { - pub async fn write_all( - &self, - _request_id: &u64, - _epoch_id: Option<&u64>, - _pub_data: Option<(&PubData, PubDataType)>, - _priv_data: Option<(&PrivData, PrivDataType)>, - _update_backup: bool, - _op_metric_tag: &'static str, - ) -> Result<(), ()> { - Ok(()) - } - } + impl StorageReaderExt for MemoryStorage { + fn read_data_at_epoch( + &self, + _request_id: &u64, + _epoch_id: &u64, + _data_type: &str, + ) -> Result { + panic!("test fixture does not execute") } } + + pub fn store_versioned_at_request_id( + storage: &mut S, + request_id: &u64, + data: &T, + data_type: &str, + ) -> Result<(), ()> + where + S: Storage, + { + storage.store_data(data, request_id, data_type) + } + + pub fn store_versioned_at_request_and_epoch_id( + storage: &mut S, + request_id: &u64, + epoch_id: &u64, + data: &T, + data_type: &str, + ) -> Result<(), ()> + where + S: StorageExt, + { + storage.store_data_at_epoch(data, request_id, epoch_id, data_type) + } + + pub fn read_versioned_at_request_id( + storage: &S, + request_id: &u64, + data_type: &str, + ) -> Result + where + S: StorageReader, + { + storage.read_data(request_id, data_type) + } + + pub fn read_versioned_at_request_and_epoch_id( + storage: &S, + request_id: &u64, + epoch_id: &u64, + data_type: &str, + ) -> Result + where + S: StorageReaderExt, + { + storage.read_data_at_epoch(request_id, epoch_id, data_type) + } } } @@ -111,14 +163,6 @@ struct LocalStoragePayload { _value: u64, } -struct LocalPublicMaterial { - _value: u64, -} - -struct LocalPrivateMaterial { - _value: u64, -} - trait RoundTrip: Sized { fn decode(bytes: &[u8]) -> Result; } @@ -133,58 +177,37 @@ fn generic_sink(value: &T) { let _ = bc2wrap::serialize(value); } -async fn generic_storage_sink(storage: &mut vault::storage::Storage, value: &T) { +fn generic_storage_sink(storage: &mut vault::storage::MemoryStorage, value: &T) { let request_id = 7; - let _ = - vault::storage::store_versioned_at_request_id(storage, &request_id, value, "generic").await; + let _ = vault::storage::store_versioned_at_request_id(storage, &request_id, value, "generic"); } -async fn storage_sinks() { - use vault::storage::crypto_material::base::StorageRoot; +fn storage_sinks() { use vault::storage::{ - PrivDataType, PubDataType, Storage, read_versioned_at_request_and_epoch_id, - read_versioned_at_request_id, store_versioned_at_request_and_epoch_id, - store_versioned_at_request_id, + MemoryStorage, read_versioned_at_request_and_epoch_id, read_versioned_at_request_id, + store_versioned_at_request_and_epoch_id, store_versioned_at_request_id, }; - let mut storage = Storage; - let storage_root = StorageRoot; + let mut storage = MemoryStorage; let request_id = 7; let epoch_id = 11; let payload = LocalStoragePayload { _value: 13 }; - let public_material = LocalPublicMaterial { _value: 17 }; - let private_material = LocalPrivateMaterial { _value: 19 }; - let _ = store_versioned_at_request_id(&mut storage, &request_id, &payload, "payload").await; + let _ = store_versioned_at_request_id(&mut storage, &request_id, &payload, "payload"); let _ = store_versioned_at_request_and_epoch_id( &mut storage, &request_id, &epoch_id, &payload, "payload", - ) - .await; + ); - let _: LocalPayload = read_versioned_at_request_id(&storage, &request_id, "payload") - .await - .unwrap(); + let _: LocalPayload = read_versioned_at_request_id(&storage, &request_id, "payload").unwrap(); let _: LocalStoragePayload = read_versioned_at_request_and_epoch_id(&storage, &request_id, &epoch_id, "payload") - .await .unwrap(); - let _ = storage_root - .write_all::( - &request_id, - Some(&epoch_id), - Some((&public_material, PubDataType::PublicKey)), - Some((&private_material, PrivDataType::FheKeyInfo)), - false, - "fixture", - ) - .await; - - generic_storage_sink(&mut storage, &payload).await; + generic_storage_sink(&mut storage, &payload); } fn main() { @@ -209,4 +232,5 @@ fn main() { let _ = LocalPayload::decode(&bytes); generic_sink(&payload); + storage_sinks(); } diff --git a/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr b/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr index dfc3606f70..bad79a90fc 100644 --- a/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr +++ b/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr @@ -1,20 +1 @@ -warning: generic or unresolved root type `T` used by `bc2wrap::serialize` - --> $DIR/main.rs:133:13 - | -LL | let _ = bc2wrap::serialize(value); - | ^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: The concrete type cannot be inventoried from this generic sink - = note: `#[warn(bc2wrap_type_inventory)]` on by default - -warning: generic or unresolved root type `T` used by `vault::storage::store_versioned_at_request_id` - --> $DIR/main.rs:139:9 - | -LL | vault::storage::store_versioned_at_request_id(storage, &request_id, value, "generic").await; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - | - = note: The concrete type cannot be inventoried from this generic sink - -bc2wrap_type_inventory: wrote 16 calls, 4 local types, 1 generic sinks, 2 skipped roots for main -warning: 2 warnings emitted - +bc2wrap_type_inventory: wrote 13 calls, 2 local types, 0 generic sinks, 2 skipped roots for main diff --git a/tools/kms-lints/rust-toolchain b/tools/kms-lints/rust-toolchain index 1ab63d7a70..64328638fa 100644 --- a/tools/kms-lints/rust-toolchain +++ b/tools/kms-lints/rust-toolchain @@ -1,4 +1,4 @@ [toolchain] -channel = "nightly-2026-01-22" +channel = "nightly-2026-05-17" components = ["llvm-tools-preview", "rustc-dev"] profile = "default" From e0954a332b7ed86fa50efeb9f439356109329919 Mon Sep 17 00:00:00 2001 From: Kelong Cong Date: Tue, 19 May 2026 10:16:32 +0200 Subject: [PATCH 7/7] chore: use tfhe-rs fns as sink --- tools/kms-lints/README.md | 27 +- tools/kms-lints/common/src/lib.rs | 4 +- tools/kms-lints/lints/Cargo.toml | 5 +- tools/kms-lints/lints/src/lib.rs | 9 +- ...entory.rs => versioned_codec_inventory.rs} | 228 ++++++++--------- .../tests/bc2wrap_type_inventory/main.rs | 236 ------------------ .../tests/bc2wrap_type_inventory/main.stderr | 1 - .../tests/tfhe-safe-serialize/Cargo.toml | 9 + .../tests/tfhe-safe-serialize/src/lib.rs | 35 +++ .../tests/versioned_codec_inventory/main.rs | 61 +++++ .../versioned_codec_inventory/main.stderr | 1 + 11 files changed, 226 insertions(+), 390 deletions(-) rename tools/kms-lints/lints/src/{bc2wrap_type_inventory.rs => versioned_codec_inventory.rs} (78%) delete mode 100644 tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs delete mode 100644 tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr create mode 100644 tools/kms-lints/lints/tests/tfhe-safe-serialize/Cargo.toml create mode 100644 tools/kms-lints/lints/tests/tfhe-safe-serialize/src/lib.rs create mode 100644 tools/kms-lints/lints/tests/versioned_codec_inventory/main.rs create mode 100644 tools/kms-lints/lints/tests/versioned_codec_inventory/main.stderr diff --git a/tools/kms-lints/README.md b/tools/kms-lints/README.md index 944b54e80a..a5620a441e 100644 --- a/tools/kms-lints/README.md +++ b/tools/kms-lints/README.md @@ -16,29 +16,24 @@ examples. ## Lints -### `bc2wrap_type_inventory` +### `versioned_codec_inventory` -Collects the root workspace-local types used at curated serialization sink call -sites in codegen-reachable monomorphized MIR, including: +Collects the root workspace-local types used at TFHE safe-serialization +encode/decode sink call sites in checked local MIR, including: -- `bc2wrap::serialize` -- `bc2wrap::serialize_into` -- `bc2wrap::deserialize_safe` -- `bc2wrap::deserialize_unsafe` -- `Storage::store_data` -- `StorageExt::store_data_at_epoch` -- `StorageReader::read_data` -- `StorageReaderExt::read_data_at_epoch` +- `tfhe_safe_serialize::safe_serialize` +- `tfhe_safe_serialize::safe_deserialize` +- `tfhe_safe_serialize::safe_deserialize_conformant` The lint writes one JSON file per checked crate target under -`target/kms-lints/bc2wrap-type-inventory` by default. Set -`KMS_BC2WRAP_INVENTORY_DIR` to override the output directory. +`target/kms-lints/versioned-codec-inventory` by default. Set +`KMS_VERSIONED_CODEC_INVENTORY_DIR` to override the output directory. The inventory records only root types. It does not recursively expand fields. Each file contains full `calls` plus a `types` summary categorized as `local`, `foreign`, `generic`, `compound`, `primitive`, or `unknown` for auditability. Call records include `sink_path` (the rustc def path of the matched sink), and type summaries aggregate every distinct sink path each root flowed through. -Storage wrapper functions are intentionally not listed as sinks; their concrete -types are recovered from the canonical storage trait calls reached after -monomorphization. +Storage wrapper and trait functions are intentionally not listed as sinks; a +storage-backed value is recorded only when the scanner reaches the concrete +TFHE safe-serialization encode/decode call. diff --git a/tools/kms-lints/common/src/lib.rs b/tools/kms-lints/common/src/lib.rs index 82a9e08a94..9329273a05 100644 --- a/tools/kms-lints/common/src/lib.rs +++ b/tools/kms-lints/common/src/lib.rs @@ -17,7 +17,7 @@ use rustc_middle::ty::{Ty, TyKind}; use rustc_span::Span; /// Name of the environment variable used to override the inventory output directory. -pub const INVENTORY_DIR_ENV: &str = "KMS_BC2WRAP_INVENTORY_DIR"; +pub const INVENTORY_DIR_ENV: &str = "KMS_VERSIONED_CODEC_INVENTORY_DIR"; /// Source position of a linted expression. #[derive(Clone, Debug)] @@ -92,7 +92,7 @@ pub fn def_crate_name(cx: &LateContext<'_>, def_id: DefId) -> String { pub fn inventory_dir() -> PathBuf { std::env::var(INVENTORY_DIR_ENV) .map(PathBuf::from) - .unwrap_or_else(|_| PathBuf::from("target/kms-lints/bc2wrap-type-inventory")) + .unwrap_or_else(|_| PathBuf::from("target/kms-lints/versioned-codec-inventory")) } /// Returns the workspace crate names, normalized to rustc crate-name spelling. diff --git a/tools/kms-lints/lints/Cargo.toml b/tools/kms-lints/lints/Cargo.toml index 5723dad9e1..d4962f75a1 100644 --- a/tools/kms-lints/lints/Cargo.toml +++ b/tools/kms-lints/lints/Cargo.toml @@ -17,13 +17,14 @@ serde_json = { workspace = true } [dev-dependencies] dylint_testing = "6.0.0" # Dylint UI test helper; test-only lint tooling. serde_json = { workspace = true } +tfhe-safe-serialize = { path = "tests/tfhe-safe-serialize" } [package.metadata.rust-analyzer] rustc_private = true [[example]] -name = "bc2wrap_type_inventory" -path = "tests/bc2wrap_type_inventory/main.rs" +name = "versioned_codec_inventory" +path = "tests/versioned_codec_inventory/main.rs" [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ diff --git a/tools/kms-lints/lints/src/lib.rs b/tools/kms-lints/lints/src/lib.rs index 5981f2dc05..a7d5d4ad88 100644 --- a/tools/kms-lints/lints/src/lib.rs +++ b/tools/kms-lints/lints/src/lib.rs @@ -7,7 +7,7 @@ extern crate rustc_middle; extern crate rustc_session; extern crate rustc_span; -mod bc2wrap_type_inventory; +mod versioned_codec_inventory; #[allow(unused_extern_crates)] extern crate rustc_driver; @@ -21,7 +21,8 @@ pub extern "C" fn dylint_version() -> *mut std::os::raw::c_char { #[allow(clippy::no_mangle_with_rust_abi)] #[unsafe(no_mangle)] pub fn register_lints(_sess: &rustc_session::Session, lint_store: &mut rustc_lint::LintStore) { - lint_store.register_lints(&[bc2wrap_type_inventory::BC2WRAP_TYPE_INVENTORY]); - lint_store - .register_late_pass(|_| Box::new(bc2wrap_type_inventory::Bc2wrapTypeInventory::default())); + lint_store.register_lints(&[versioned_codec_inventory::VERSIONED_CODEC_INVENTORY]); + lint_store.register_late_pass(|_| { + Box::new(versioned_codec_inventory::VersionedCodecInventory::default()) + }); } diff --git a/tools/kms-lints/lints/src/bc2wrap_type_inventory.rs b/tools/kms-lints/lints/src/versioned_codec_inventory.rs similarity index 78% rename from tools/kms-lints/lints/src/bc2wrap_type_inventory.rs rename to tools/kms-lints/lints/src/versioned_codec_inventory.rs index 30842bae2e..3be00b67c9 100644 --- a/tools/kms-lints/lints/src/bc2wrap_type_inventory.rs +++ b/tools/kms-lints/lints/src/versioned_codec_inventory.rs @@ -1,28 +1,28 @@ -//! Compile-time inventory of types passed to serialization / storage sinks. +//! Compile-time inventory of types passed to versioned safe-serialization sinks. //! //! ## What this is //! //! A Dylint lint plugin (a Rust compiler plugin loaded by Dylint). Despite the //! word "lint", its primary job is **not** to flag bad code: it observes every -//! call to a curated list of serialization and storage functions in the crate +//! call to a curated list of TFHE safe-serialization functions in the crate //! being compiled, figures out which *root type* flows through each call, and //! writes a JSON report to -//! `target/kms-lints/bc2wrap-type-inventory/.json`. Downstream tooling -//! reads that report to check that every persisted type is covered by +//! `target/kms-lints/versioned-codec-inventory/.json`. Downstream tooling +//! reads that report to check that every versioned codec root is covered by //! backward-compatibility tests. //! //! Why a compiler plugin? Because we need **post-monomorphization** information. //! Plain text-search and HIR linting can't tell you that a generic helper is //! monomorphized to `MyKey` at one call site and `Vec` at another. Hooking -//! into rustc as a `LateLintPass` lets us ask rustc for the concrete instances -//! it will codegen, then inspect those instances' MIR. +//! into rustc as a `LateLintPass` lets us inspect local MIR while substituting +//! concrete caller instances into generic helper calls. //! //! ## Vocabulary //! //! - **Sink** — a function whose call sites we care about. Sinks are listed in -//! `SINK_SPECS`. The current set covers the raw encode / decode helpers in -//! the `bc2wrap` crate and the canonical vault storage reader / writer trait -//! methods in this workspace. +//! `SINK_SPECS`. The current set covers the TFHE safe-serialization encode / +//! decode helpers re-exported by `tfhe::safe_serialization` and defined in +//! `tfhe_safe_serialize`. //! - **Root type** — the top of the type tree handed to a sink. Fields are //! *not* recursed into: if a sink receives `&MyStruct { … }`, the root is //! `MyStruct`, full stop. @@ -44,9 +44,9 @@ //! //! ## Pipeline (per crate) //! -//! 1. `check_crate_post` seeds a local worklist with codegen-reachable -//! non-generic MIR roots, then follows local monomorphized calls and -//! closure / coroutine bodies. +//! 1. `check_crate_post` seeds a local worklist with every local non-generic +//! MIR root, then follows local monomorphized calls and closure / coroutine +//! bodies. //! 2. The lint deduplicates local function instances and scans each instance's //! MIR call terminators. For every call, it substitutes the caller //! instance's concrete args into the callee and argument types. @@ -68,19 +68,19 @@ //! //! ```ignore //! let payload = LocalPayload { _value: 7 }; -//! let _ = bc2wrap::serialize(&payload); +//! let _ = tfhe_safe_serialize::safe_serialize(&payload, Vec::::new(), 1024); //! ``` //! //! 1. rustc monomorphizes the caller containing the call. //! 2. `collect_mono_calls` scans that caller's MIR and sees a call terminator -//! whose callee resolves to `bc2wrap::serialize`. +//! whose callee resolves to `tfhe_safe_serialize::safe_serialize`. //! 3. `extract_mono_root_type` runs `Arg(0)`: it reads arg 0's monomorphized //! type (`&LocalPayload`), peels the reference, and returns //! `Root(LocalPayload)`. //! 4. `classify_root` sees an ADT defined in a workspace crate and returns //! `RootCategory::Local`. -//! 5. `record` pushes a `CallRecord` with `function = "serialize"`, -//! `sink_path = "bc2wrap::serialize"`, `type_display = "LocalPayload"`, +//! 5. `record` pushes a `CallRecord` with `function = "safe_serialize"`, +//! `sink_path = "tfhe_safe_serialize::safe_serialize"`, `type_display = "LocalPayload"`, //! `category = Local`, and the source file / line / column of the matched //! lower-level sink call. //! 6. After every mono item has been scanned, `check_crate_post` collapses @@ -93,20 +93,18 @@ //! //! ## How to add a new sink //! -//! 1. Append an entry to `SINK_SPECS`: the function name, a `path_contains` -//! substring that disambiguates same-named functions or traits, and one or -//! more extractors. The sink only matches functions defined in a workspace -//! crate (via `is_workspace_def`) — there is no separate "owner" knob. +//! 1. Append an entry to `SINK_SPECS`: the function name, the trusted defining +//! crate, and one or more extractors. //! 2. If none of the existing `RootExtractor` variants captures where the //! root lives in your signature, add a variant and handle it in //! `extract_mono_root_type`. -//! 3. Extend the UI fixture in `tests/bc2wrap_type_inventory/main.rs` and add +//! 3. Extend the UI fixture in `tests/versioned_codec_inventory/main.rs` and add //! an assertion in the `ui` test at the bottom of this file so the new //! sink is exercised by `cargo test`. //! //! ## Configuration //! -//! Set `KMS_BC2WRAP_INVENTORY_DIR` to override the output directory. The +//! Set `KMS_VERSIONED_CODEC_INVENTORY_DIR` to override the output directory. The //! workspace crate set is always auto-detected via `cargo metadata`. use std::collections::{BTreeMap, BTreeSet, HashSet, VecDeque}; @@ -127,46 +125,23 @@ use rustc_span::def_id::{DefId, LOCAL_CRATE}; use rustc_span::{Span, Spanned}; use serde::Serialize; +const TFHE_SAFE_SERIALIZE_CRATE: &str = "tfhe_safe_serialize"; + const SINK_SPECS: &[SinkSpec] = &[ SinkSpec { - function: "serialize", - path_contains: Some("bc2wrap"), - extractors: &[RootExtractor::Arg(0)], - }, - SinkSpec { - function: "serialize_into", - path_contains: Some("bc2wrap"), + function: "safe_serialize", + crate_name: TFHE_SAFE_SERIALIZE_CRATE, extractors: &[RootExtractor::Arg(0)], }, SinkSpec { - function: "deserialize_safe", - path_contains: Some("bc2wrap"), - extractors: &[RootExtractor::LastFnGenericArg], - }, - SinkSpec { - function: "deserialize_unsafe", - path_contains: Some("bc2wrap"), - extractors: &[RootExtractor::LastFnGenericArg], - }, - SinkSpec { - function: "store_data", - path_contains: Some("vault::storage::Storage"), - extractors: &[RootExtractor::Arg(1)], - }, - SinkSpec { - function: "store_data_at_epoch", - path_contains: Some("vault::storage::StorageExt"), - extractors: &[RootExtractor::Arg(1)], - }, - SinkSpec { - function: "read_data", - path_contains: Some("vault::storage::StorageReader"), - extractors: &[RootExtractor::LastFnGenericArg], + function: "safe_deserialize", + crate_name: TFHE_SAFE_SERIALIZE_CRATE, + extractors: &[RootExtractor::FnGenericArg(0)], }, SinkSpec { - function: "read_data_at_epoch", - path_contains: Some("vault::storage::StorageReaderExt"), - extractors: &[RootExtractor::LastFnGenericArg], + function: "safe_deserialize_conformant", + crate_name: TFHE_SAFE_SERIALIZE_CRATE, + extractors: &[RootExtractor::FnGenericArg(0)], }, ]; @@ -236,26 +211,21 @@ struct SummaryEntry { #[derive(Clone, Copy, Debug)] struct SinkSpec { function: &'static str, - path_contains: Option<&'static str>, + crate_name: &'static str, extractors: &'static [RootExtractor], } /// Where in a monomorphized sink call the root type lives. /// -/// The convention for `LastFnGenericArg` is that the inventoried type is always the *last* type -/// generic of the callee. For trait methods this works because the implicit `Self` precedes the -/// method's own type params, so `T` in `read_data(&self, ...)` is the last entry in the -/// monomorphized `GenericArgsRef`. For free functions with a single type param (`deserialize_safe`) -/// "last" is equivalently "only". +/// Decode sinks use `FnGenericArg(0)` because `safe_deserialize(reader: impl Read, ...)` +/// has an anonymous reader generic after `T` in monomorphized rustc args. #[derive(Clone, Copy, Debug)] enum RootExtractor { /// Read the type of the call's positional argument at `index` (after monomorphization and /// reference peeling). Arg 0 is the receiver for trait methods. Arg(usize), - /// Read the last type generic of the callee. Used for sinks that take the root by `T` in the - /// signature (e.g. `deserialize_safe(bytes: &[u8]) -> Result`, - /// `StorageReader::read_data`). - LastFnGenericArg, + /// Read the type generic of the callee at `index`. + FnGenericArg(usize), } #[derive(Clone, Debug)] @@ -287,29 +257,29 @@ enum ExtractedRoot<'tcx> { Unresolved, } -/// Late pass lint collecting bc2wrap root type inventory data. +/// Late pass lint collecting versioned codec root type inventory data. #[derive(Default)] -pub struct Bc2wrapTypeInventory { +pub struct VersionedCodecInventory { calls: Vec, } declare_lint! { /// ### What it does - /// Collects an inventory of workspace-local root types used by `bc2wrap` call sites. + /// Collects an inventory of workspace-local root types used by versioned codec call sites. /// /// ### Why is this useful? - /// Types serialized through `bc2wrap` are compatibility-sensitive. A compiler-aware - /// inventory makes these roots visible without requiring marker traits on foreign types. - pub BC2WRAP_TYPE_INVENTORY, + /// Types serialized through TFHE safe serialization are compatibility-sensitive. A + /// compiler-aware inventory makes these roots visible after monomorphization. + pub VERSIONED_CODEC_INVENTORY, Warn, - "Collects root types used by bc2wrap serialization and deserialization" + "Collects root types used by versioned safe serialization and deserialization" } -impl_lint_pass!(Bc2wrapTypeInventory => [BC2WRAP_TYPE_INVENTORY]); +impl_lint_pass!(VersionedCodecInventory => [VERSIONED_CODEC_INVENTORY]); -impl<'tcx> LateLintPass<'tcx> for Bc2wrapTypeInventory { +impl<'tcx> LateLintPass<'tcx> for VersionedCodecInventory { // Write one inventory file per crate target. Dylint also checks build scripts, but those do - // not represent production bc2wrap call sites for this inventory. + // not represent production versioned codec call sites for this inventory. fn check_crate_post(&mut self, cx: &LateContext<'tcx>) { let crate_name = cx.tcx.crate_name(LOCAL_CRATE).to_string(); if crate_name == "build_script_build" { @@ -324,7 +294,7 @@ impl<'tcx> LateLintPass<'tcx> for Bc2wrapTypeInventory { let dir = inventory_dir(); if let Err(error) = std::fs::create_dir_all(&dir) { eprintln!( - "bc2wrap_type_inventory: failed to create {}: {error}", + "versioned_codec_inventory: failed to create {}: {error}", dir.display() ); return; @@ -335,7 +305,7 @@ impl<'tcx> LateLintPass<'tcx> for Bc2wrapTypeInventory { Ok(json) => { if let Err(error) = std::fs::write(&path, json) { eprintln!( - "bc2wrap_type_inventory: failed to write {}: {error}", + "versioned_codec_inventory: failed to write {}: {error}", path.display() ); return; @@ -343,7 +313,7 @@ impl<'tcx> LateLintPass<'tcx> for Bc2wrapTypeInventory { } Err(error) => { eprintln!( - "bc2wrap_type_inventory: failed to encode JSON for {crate_name}: {error}" + "versioned_codec_inventory: failed to encode JSON for {crate_name}: {error}" ); return; } @@ -364,7 +334,7 @@ impl<'tcx> LateLintPass<'tcx> for Bc2wrapTypeInventory { }); eprintln!( - "bc2wrap_type_inventory: wrote {} calls, {} local types, {} generic sinks, {} skipped roots for {}", + "versioned_codec_inventory: wrote {} calls, {} local types, {} generic sinks, {} skipped roots for {}", inventory.calls.len(), local_types, generic_sinks, @@ -375,7 +345,7 @@ impl<'tcx> LateLintPass<'tcx> for Bc2wrapTypeInventory { } } -impl Bc2wrapTypeInventory { +impl VersionedCodecInventory { fn collect_mono_calls<'tcx>(&mut self, cx: &LateContext<'tcx>) { let tcx = cx.tcx; let mut pending = VecDeque::new(); @@ -385,12 +355,7 @@ impl Bc2wrapTypeInventory { if tcx.generics_of(def_id).requires_monomorphization(tcx) { continue; } - let def_id = def_id.to_def_id(); - let is_root = tcx.entry_fn(()).is_some_and(|(entry, _)| entry == def_id) - || tcx.is_reachable_non_generic(def_id); - if is_root { - pending.push_back(Instance::mono(tcx, def_id)); - } + pending.push_back(Instance::mono(tcx, def_id.to_def_id())); } while let Some(instance) = pending.pop_front() { @@ -476,7 +441,7 @@ impl Bc2wrapTypeInventory { RootCategory::Generic | RootCategory::Unknown ) { cx.emit_span_lint( - BC2WRAP_TYPE_INVENTORY, + VERSIONED_CODEC_INVENTORY, span, rustc_errors::DiagDecorator(|diag| { diag.primary_message(format!( @@ -523,10 +488,9 @@ fn resolve_mono_callee<'tcx>( let resolved_def_id = instance .map(|instance| instance.def_id()) .unwrap_or(*def_id); - // Canonicalize impl-method calls back to the trait item so the sink lookup matches the trait - // path (e.g. `::store_data` → `Storage::store_data`). We try the - // resolved (impl) DefId first, then fall back to the unresolved one — the latter fires when - // `Instance::try_resolve` failed and the original call is already against a trait item. + // Canonicalize impl-method calls back to the trait item before sink lookup. The current sink + // set is made of free functions, but keeping this normalization makes the matcher safe for + // future trait-method sinks. let canonical_def_id = cx .tcx .trait_item_of(resolved_def_id) @@ -570,13 +534,11 @@ impl SinkSpec { return false; } - if let Some(path_contains) = self.path_contains - && !cx.tcx.def_path_str(def_id).contains(path_contains) - { + if def_crate_name(cx, def_id) != self.crate_name { return false; } - is_workspace_def(cx, def_id, workspace_crates()) + true } } @@ -606,10 +568,10 @@ fn extract_mono_root_type<'tcx>( ) -> ExtractedRoot<'tcx> { match extractor { RootExtractor::Arg(index) => extract_mono_arg(cx, caller, body, args, index), - RootExtractor::LastFnGenericArg => sink + RootExtractor::FnGenericArg(index) => sink .generic_args .types() - .last() + .nth(index) .map(ExtractedRoot::Root) .unwrap_or(ExtractedRoot::Unresolved), } @@ -845,7 +807,7 @@ fn count_types(summaries: &[TypeSummary], predicate: impl Fn(&RootCategory) -> b #[test] fn ui() { let output_dir = std::env::temp_dir().join(format!( - "kms_lints_bc2wrap_type_inventory_ui_{}", + "kms_lints_versioned_codec_inventory_ui_{}", std::process::id() )); let _ = std::fs::remove_dir_all(&output_dir); @@ -856,7 +818,7 @@ fn ui() { std::env::set_var(kms_lints_common::INVENTORY_DIR_ENV, output_dir.as_os_str()); } - dylint_testing::ui_test_example(env!("CARGO_PKG_NAME"), "bc2wrap_type_inventory"); + dylint_testing::ui_test_example(env!("CARGO_PKG_NAME"), "versioned_codec_inventory"); let json_path = output_dir.join("main.json"); let json = std::fs::read_to_string(&json_path) @@ -867,12 +829,16 @@ fn ui() { let types = inventory["types"] .as_array() .expect("types should be an array"); - assert!( - types.iter().any( - |entry| entry["type_display"].as_str() == Some("LocalPayload") - && entry["category"].as_str() == Some("local") - ), - "LocalPayload should be inventoried as a local root" + assert_local_with_sink(types, "LocalPayload", "tfhe_safe_serialize::safe_serialize"); + assert_local_with_sink( + types, + "LocalPayload", + "tfhe_safe_serialize::safe_deserialize", + ); + assert_local_with_sink( + types, + "ConformantPayload", + "tfhe_safe_serialize::safe_deserialize_conformant", ); assert!( @@ -886,32 +852,24 @@ fn ui() { assert!( types .iter() - .all(|entry| entry["category"].as_str() != Some("generic")), - "generic roots should be resolved by post-monomorphization analysis" + .any(|entry| entry["type_display"].as_str() == Some("(u64, u64)") + && entry["category"].as_str() == Some("compound")), + "tuple roots should still be recorded as compound" ); assert!( - types.iter().any( - |entry| entry["type_display"].as_str() == Some("LocalStoragePayload") - && entry["category"].as_str() == Some("local") - && sink_paths_contains(entry, "vault::storage::Storage::store_data") - && sink_paths_contains(entry, "vault::storage::StorageExt::store_data_at_epoch") - && sink_paths_contains( - entry, - "vault::storage::StorageReaderExt::read_data_at_epoch" - ) - ), - "LocalStoragePayload should be inventoried through canonical storage trait sinks" + types + .iter() + .any(|entry| entry["type_display"].as_str() == Some("u64") + && entry["category"].as_str() == Some("primitive")), + "primitive roots should still be recorded as primitive" ); assert!( - types.iter().any( - |entry| entry["type_display"].as_str() == Some("LocalPayload") - && entry["category"].as_str() == Some("local") - && sink_paths_contains(entry, "bc2wrap::serialize") - && sink_paths_contains(entry, "vault::storage::StorageReader::read_data") - ), - "LocalPayload should include concrete generic bc2wrap and storage-reader roots" + types + .iter() + .all(|entry| entry["category"].as_str() != Some("generic")), + "generic roots should be resolved by post-monomorphization analysis" ); let calls = inventory["calls"] @@ -920,11 +878,23 @@ fn ui() { assert!( calls.iter().all(|entry| { let sink_path = entry["sink_path"].as_str().unwrap_or_default(); - !sink_path.contains("store_versioned") - && !sink_path.contains("read_versioned") - && !sink_path.contains("write_all") + !sink_path.contains("bc2wrap") + && !sink_path.contains("Storage") + && !sink_path.contains("safe_serialized_size") }), - "wrapper functions should not be matched as storage sinks" + "only TFHE safe-serialization encode/decode sinks should be matched" + ); +} + +#[cfg(test)] +fn assert_local_with_sink(types: &[serde_json::Value], type_display: &str, sink_path: &str) { + assert!( + types + .iter() + .any(|entry| entry["type_display"].as_str() == Some(type_display) + && entry["category"].as_str() == Some("local") + && sink_paths_contains(entry, sink_path)), + "{type_display} should be inventoried through {sink_path}" ); } diff --git a/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs b/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs deleted file mode 100644 index 1c15263f11..0000000000 --- a/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.rs +++ /dev/null @@ -1,236 +0,0 @@ -#![allow(dead_code, unused_variables)] - -mod bc2wrap { - pub mod error { - #[derive(Debug)] - pub struct EncodeError; - - #[derive(Debug)] - pub struct DecodeError; - } - - pub fn serialize(_value: &T) -> Result, error::EncodeError> { - Ok(Vec::new()) - } - - pub fn serialize_into( - _value: &T, - _writer: &mut W, - ) -> Result { - Ok(0) - } - - pub fn deserialize_safe(_bytes: &[u8]) -> Result { - panic!("test fixture does not execute") - } - - pub fn deserialize_unsafe(_bytes: &[u8]) -> Result { - panic!("test fixture does not execute") - } -} - -mod vault { - pub mod storage { - pub trait Storage { - fn store_data( - &mut self, - _data: &T, - _request_id: &u64, - _data_type: &str, - ) -> Result<(), ()>; - } - - pub trait StorageExt: Storage { - fn store_data_at_epoch( - &mut self, - _data: &T, - _request_id: &u64, - _epoch_id: &u64, - _data_type: &str, - ) -> Result<(), ()>; - } - - pub trait StorageReader { - fn read_data(&self, _request_id: &u64, _data_type: &str) -> Result; - } - - pub trait StorageReaderExt: StorageReader { - fn read_data_at_epoch( - &self, - _request_id: &u64, - _epoch_id: &u64, - _data_type: &str, - ) -> Result; - } - - pub struct MemoryStorage; - - impl Storage for MemoryStorage { - fn store_data( - &mut self, - _data: &T, - _request_id: &u64, - _data_type: &str, - ) -> Result<(), ()> { - Ok(()) - } - } - - impl StorageExt for MemoryStorage { - fn store_data_at_epoch( - &mut self, - _data: &T, - _request_id: &u64, - _epoch_id: &u64, - _data_type: &str, - ) -> Result<(), ()> { - Ok(()) - } - } - - impl StorageReader for MemoryStorage { - fn read_data(&self, _request_id: &u64, _data_type: &str) -> Result { - panic!("test fixture does not execute") - } - } - - impl StorageReaderExt for MemoryStorage { - fn read_data_at_epoch( - &self, - _request_id: &u64, - _epoch_id: &u64, - _data_type: &str, - ) -> Result { - panic!("test fixture does not execute") - } - } - - pub fn store_versioned_at_request_id( - storage: &mut S, - request_id: &u64, - data: &T, - data_type: &str, - ) -> Result<(), ()> - where - S: Storage, - { - storage.store_data(data, request_id, data_type) - } - - pub fn store_versioned_at_request_and_epoch_id( - storage: &mut S, - request_id: &u64, - epoch_id: &u64, - data: &T, - data_type: &str, - ) -> Result<(), ()> - where - S: StorageExt, - { - storage.store_data_at_epoch(data, request_id, epoch_id, data_type) - } - - pub fn read_versioned_at_request_id( - storage: &S, - request_id: &u64, - data_type: &str, - ) -> Result - where - S: StorageReader, - { - storage.read_data(request_id, data_type) - } - - pub fn read_versioned_at_request_and_epoch_id( - storage: &S, - request_id: &u64, - epoch_id: &u64, - data_type: &str, - ) -> Result - where - S: StorageReaderExt, - { - storage.read_data_at_epoch(request_id, epoch_id, data_type) - } - } -} - -struct LocalPayload { - _value: u64, -} - -struct LocalStoragePayload { - _value: u64, -} - -trait RoundTrip: Sized { - fn decode(bytes: &[u8]) -> Result; -} - -impl RoundTrip for LocalPayload { - fn decode(bytes: &[u8]) -> Result { - bc2wrap::deserialize_safe(bytes) - } -} - -fn generic_sink(value: &T) { - let _ = bc2wrap::serialize(value); -} - -fn generic_storage_sink(storage: &mut vault::storage::MemoryStorage, value: &T) { - let request_id = 7; - let _ = vault::storage::store_versioned_at_request_id(storage, &request_id, value, "generic"); -} - -fn storage_sinks() { - use vault::storage::{ - MemoryStorage, read_versioned_at_request_and_epoch_id, read_versioned_at_request_id, - store_versioned_at_request_and_epoch_id, store_versioned_at_request_id, - }; - - let mut storage = MemoryStorage; - let request_id = 7; - let epoch_id = 11; - let payload = LocalStoragePayload { _value: 13 }; - - let _ = store_versioned_at_request_id(&mut storage, &request_id, &payload, "payload"); - let _ = store_versioned_at_request_and_epoch_id( - &mut storage, - &request_id, - &epoch_id, - &payload, - "payload", - ); - - let _: LocalPayload = read_versioned_at_request_id(&storage, &request_id, "payload").unwrap(); - let _: LocalStoragePayload = - read_versioned_at_request_and_epoch_id(&storage, &request_id, &epoch_id, "payload") - .unwrap(); - - generic_storage_sink(&mut storage, &payload); -} - -fn main() { - let payload = LocalPayload { _value: 7 }; - let _ = bc2wrap::serialize(&payload); - - use bc2wrap::serialize as encode; - let _ = encode(&payload); - - let mut writer: Vec = Vec::new(); - let _ = bc2wrap::serialize_into(&payload, &mut writer); - - let string = String::from("foreign"); - let _ = bc2wrap::serialize(&string); - - let tuple = (1_u64, 2_u64); - let _ = bc2wrap::serialize(&tuple); - - let bytes = []; - let _: LocalPayload = bc2wrap::deserialize_safe(&bytes).unwrap(); - let _: LocalPayload = bc2wrap::deserialize_unsafe(&bytes).unwrap(); - let _ = LocalPayload::decode(&bytes); - - generic_sink(&payload); - storage_sinks(); -} diff --git a/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr b/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr deleted file mode 100644 index bad79a90fc..0000000000 --- a/tools/kms-lints/lints/tests/bc2wrap_type_inventory/main.stderr +++ /dev/null @@ -1 +0,0 @@ -bc2wrap_type_inventory: wrote 13 calls, 2 local types, 0 generic sinks, 2 skipped roots for main diff --git a/tools/kms-lints/lints/tests/tfhe-safe-serialize/Cargo.toml b/tools/kms-lints/lints/tests/tfhe-safe-serialize/Cargo.toml new file mode 100644 index 0000000000..1d9220f96e --- /dev/null +++ b/tools/kms-lints/lints/tests/tfhe-safe-serialize/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "tfhe-safe-serialize" +version = "0.0.0" +edition = "2024" +publish = false + +[lib] +name = "tfhe_safe_serialize" +path = "src/lib.rs" diff --git a/tools/kms-lints/lints/tests/tfhe-safe-serialize/src/lib.rs b/tools/kms-lints/lints/tests/tfhe-safe-serialize/src/lib.rs new file mode 100644 index 0000000000..d4b33193a3 --- /dev/null +++ b/tools/kms-lints/lints/tests/tfhe-safe-serialize/src/lib.rs @@ -0,0 +1,35 @@ +#![allow(unused_variables)] + +#[derive(Debug)] +pub struct Error; + +pub trait ParameterSetConformant { + type ParameterSet; +} + +pub fn safe_serialize( + object: &T, + writer: impl std::io::Write, + serialized_size_limit: u64, +) -> Result<(), Error> { + Ok(()) +} + +pub fn safe_serialized_size(object: &T) -> Result { + Ok(0) +} + +pub fn safe_deserialize( + reader: impl std::io::Read, + deserialized_size_limit: u64, +) -> Result { + panic!("test fixture does not execute") +} + +pub fn safe_deserialize_conformant( + reader: impl std::io::Read, + deserialized_size_limit: u64, + parameter_set: &T::ParameterSet, +) -> Result { + panic!("test fixture does not execute") +} diff --git a/tools/kms-lints/lints/tests/versioned_codec_inventory/main.rs b/tools/kms-lints/lints/tests/versioned_codec_inventory/main.rs new file mode 100644 index 0000000000..5c37632870 --- /dev/null +++ b/tools/kms-lints/lints/tests/versioned_codec_inventory/main.rs @@ -0,0 +1,61 @@ +#![allow(dead_code, unused_variables)] + +use std::io::Cursor; + +use tfhe_safe_serialize::{ + ParameterSetConformant, safe_deserialize, safe_deserialize_conformant, safe_serialize, +}; + +struct LocalPayload { + _value: u64, +} + +struct ConformantPayload { + _value: u64, +} + +impl ParameterSetConformant for ConformantPayload { + type ParameterSet = (); +} + +fn generic_encode(value: &T) { + let mut writer = Vec::new(); + let _ = safe_serialize(value, &mut writer, 1024); +} + +fn generic_decode(bytes: &[u8]) -> Result { + safe_deserialize(Cursor::new(bytes), 1024) +} + +fn ignored_size(value: &T) { + let _ = tfhe_safe_serialize::safe_serialized_size(value); +} + +fn main() { + let payload = LocalPayload { _value: 7 }; + let mut writer = Vec::new(); + let _ = safe_serialize(&payload, &mut writer, 1024); + + use tfhe_safe_serialize::safe_serialize as encode; + let _ = encode(&payload, Vec::new(), 1024); + + let string = String::from("foreign"); + let _ = safe_serialize(&string, Vec::new(), 1024); + + let tuple = (1_u64, 2_u64); + let _ = safe_serialize(&tuple, Vec::new(), 1024); + + let primitive = 9_u64; + let _ = safe_serialize(&primitive, Vec::new(), 1024); + + let bytes: &[u8] = &[]; + let _: LocalPayload = safe_deserialize(Cursor::new(bytes), 1024).unwrap(); + let _: LocalPayload = generic_decode(bytes).unwrap(); + + let parameter_set = (); + let _: ConformantPayload = + safe_deserialize_conformant(Cursor::new(bytes), 1024, ¶meter_set).unwrap(); + + generic_encode(&payload); + ignored_size(&payload); +} diff --git a/tools/kms-lints/lints/tests/versioned_codec_inventory/main.stderr b/tools/kms-lints/lints/tests/versioned_codec_inventory/main.stderr new file mode 100644 index 0000000000..932ca3db70 --- /dev/null +++ b/tools/kms-lints/lints/tests/versioned_codec_inventory/main.stderr @@ -0,0 +1 @@ +versioned_codec_inventory: wrote 9 calls, 2 local types, 0 generic sinks, 3 skipped roots for main