diff --git a/Cargo.lock b/Cargo.lock index 3179aa76fadba..b09efcc490343 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6046,6 +6046,7 @@ dependencies = [ "uv-platform-tags", "uv-preview", "uv-pypi-types", + "uv-toml", "uv-warnings", "walkdir", "zip", @@ -7287,6 +7288,13 @@ dependencies = [ "uv-version", ] +[[package]] +name = "uv-toml" +version = "0.0.40" +dependencies = [ + "toml_parser", +] + [[package]] name = "uv-tool" version = "0.0.40" diff --git a/Cargo.toml b/Cargo.toml index ff3de697ad597..ee2dd7ae1c79d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,6 +77,7 @@ uv-small-str = { version = "0.0.40", path = "crates/uv-small-str" } uv-state = { version = "0.0.40", path = "crates/uv-state" } uv-static = { version = "0.0.40", path = "crates/uv-static" } uv-test = { version = "0.0.40", path = "crates/uv-test" } +uv-toml = { version = "0.0.40", path = "crates/uv-toml" } uv-tool = { version = "0.0.40", path = "crates/uv-tool" } uv-torch = { version = "0.0.40", path = "crates/uv-torch" } uv-trampoline-builder = { version = "0.0.40", path = "crates/uv-trampoline-builder" } @@ -269,6 +270,7 @@ tokio-stream = { version = "0.1.16" } tokio-util = { version = "0.7.12", features = ["compat", "io"] } toml = { version = "1.1.0", features = ["fast_hash"] } toml_edit = { version = "0.25.8", features = ["serde"] } +toml_parser = { version = "1.1.0" } tracing = { version = "0.1.40" } tracing-durations-export = { version = "0.3.0", features = ["plot"] } tracing-subscriber = { version = "0.3.18" } # Default feature set for uv_build, uv activates extra features diff --git a/crates/uv-build-backend/Cargo.toml b/crates/uv-build-backend/Cargo.toml index 1a987be1c0afa..7bcebe16d85a2 100644 --- a/crates/uv-build-backend/Cargo.toml +++ b/crates/uv-build-backend/Cargo.toml @@ -24,6 +24,7 @@ uv-pep508 = { workspace = true } uv-platform-tags = { workspace = true } uv-preview = { workspace = true } uv-pypi-types = { workspace = true } +uv-toml = { workspace = true } uv-warnings = { workspace = true } base64 = { workspace = true } diff --git a/crates/uv-build-backend/src/lib.rs b/crates/uv-build-backend/src/lib.rs index d3d5787501857..542d6735fcef2 100644 --- a/crates/uv-build-backend/src/lib.rs +++ b/crates/uv-build-backend/src/lib.rs @@ -1955,4 +1955,95 @@ mod tests { build-backend = "uv_build" "#); } + + /// Test that TOML 1.1 features in pyproject.toml trigger auto-detection and rewrite to TOML + /// 1.0, even without explicitly enabling `PreviewFeature::TomlBackwardsCompatibility`. + #[test] + fn toml_1_1_backward_compatibility_auto_detection() { + let _preview = uv_preview::test::with_features(&[]); + let src = TempDir::new().unwrap(); + + // A `pyproject.toml` with a TOML 1.1 feature, trailing commas in inline tables. + let pyproject_toml = indoc! {r#" + [project] + name = "toml11-project" + version = "0.1.0" + description = "A test package using TOML 1.1 features" + requires-python = ">=3.12" + # TOML 1.1 feature: Trailing comma in inline table + authors = [ + { name = "Ferris", email = "ferris@example.com", }, + { name = "Platypus", email = "platypus@example.com", }, + ] + + [build-system] + requires = ["uv_build>=0.5.15,<0.6.0"] + build-backend = "uv_build" + "#}; + + fs_err::write(src.path().join("pyproject.toml"), pyproject_toml).unwrap(); + fs_err::create_dir_all(src.path().join("src").join("toml11_project")).unwrap(); + File::create( + src.path() + .join("src") + .join("toml11_project") + .join("__init__.py"), + ) + .unwrap(); + + let dist = TempDir::new().unwrap(); + let build = build(src.path(), dist.path()).unwrap(); + + // Check that both `pyproject.toml` and `pyproject.toml.orig` are in the sdist. + assert_snapshot!(build.source_dist_contents.join("\n"), @" + toml11_project-0.1.0/ + toml11_project-0.1.0/PKG-INFO + toml11_project-0.1.0/pyproject.toml + toml11_project-0.1.0/pyproject.toml.orig + toml11_project-0.1.0/src + toml11_project-0.1.0/src/toml11_project + toml11_project-0.1.0/src/toml11_project/__init__.py + "); + + // Extract the sdist to verify the contents of both files. + let source_dist_path = dist.path().join(build.source_dist_filename.to_string()); + let sdist_reader = BufReader::new(File::open(&source_dist_path).unwrap()); + let mut source_dist = tar::Archive::new(GzDecoder::new(sdist_reader)); + + let mut pyproject_toml_content = String::new(); + let mut pyproject_toml_orig_content = String::new(); + for entry in source_dist.entries().unwrap() { + let mut entry = entry.unwrap(); + let path = entry.path().unwrap().to_string_lossy().to_string(); + + if path.ends_with("pyproject.toml") && !path.eq_ignore_ascii_case(".orig") { + entry.read_to_string(&mut pyproject_toml_content).unwrap(); + } else if path.ends_with("pyproject.toml.orig") { + entry + .read_to_string(&mut pyproject_toml_orig_content) + .unwrap(); + } + } + + assert_eq!(pyproject_toml_orig_content, pyproject_toml); + assert_snapshot!(pyproject_toml_content, @r#" + [project] + name = "toml11-project" + version = "0.1.0" + description = "A test package using TOML 1.1 features" + requires-python = ">=3.12" + + [[project.authors]] + name = "Ferris" + email = "ferris@example.com" + + [[project.authors]] + name = "Platypus" + email = "platypus@example.com" + + [build-system] + requires = ["uv_build>=0.5.15,<0.6.0"] + build-backend = "uv_build" + "#); + } } diff --git a/crates/uv-build-backend/src/source_dist.rs b/crates/uv-build-backend/src/source_dist.rs index d0a7a7a8d8168..de6d739e29772 100644 --- a/crates/uv-build-backend/src/source_dist.rs +++ b/crates/uv-build-backend/src/source_dist.rs @@ -17,6 +17,7 @@ use uv_distribution_filename::{SourceDistExtension, SourceDistFilename}; use uv_fs::{Simplified, normalize_path}; use uv_globfilter::{GlobDirFilter, PortableGlobParser}; use uv_preview::PreviewFeature; +use uv_toml::has_toml11_features; use uv_warnings::warn_user_once; use walkdir::WalkDir; @@ -236,11 +237,26 @@ fn write_source_dist( // // To work around this, we do a best-effort rewrite of `pyproject.toml` to TOML 1.0. We also // add the original `pyproject.toml` as `pyproject.toml.orig` for reference. + // + // The feature is enabled either explicitly via the preview flag, or automatically when the + // `pyproject.toml` is detected to contain TOML 1.1-only syntax. + let pyproject_path = source_tree.join("pyproject.toml"); + let pyproject_contents = fs_err::read_to_string(&pyproject_path)?; let toml_backwards_compatibility = - uv_preview::is_enabled(PreviewFeature::TomlBackwardsCompatibility); + if uv_preview::is_enabled(PreviewFeature::TomlBackwardsCompatibility) { + true + } else if has_toml11_features(&pyproject_contents) { + warn_user_once!( + "`pyproject.toml` uses TOML 1.1 features; rewriting to TOML 1.0 for \ + compatibility with older build tools. Use `--preview-feature \ + {feature}` to suppress this warning.", + feature = PreviewFeature::TomlBackwardsCompatibility + ); + true + } else { + false + }; if toml_backwards_compatibility { - let pyproject_path = source_tree.join("pyproject.toml"); - let pyproject_contents = fs_err::read_to_string(&pyproject_path)?; let pyproject_value: toml::Value = toml::from_str(&pyproject_contents) .map_err(|err| Error::Toml(pyproject_path.clone(), err))?; // See https://github.com/toml-rs/toml/issues/1088 for `to_string_pretty`. diff --git a/crates/uv-toml/Cargo.toml b/crates/uv-toml/Cargo.toml new file mode 100644 index 0000000000000..1a0cd1e97aa86 --- /dev/null +++ b/crates/uv-toml/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "uv-toml" +version = "0.0.40" +description = "This is an internal component crate of uv" +edition = { workspace = true } +rust-version = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +license = { workspace = true } + +[lib] +doctest = false + +[lints] +workspace = true + +[dependencies] +toml_parser = { workspace = true } diff --git a/crates/uv-toml/README.md b/crates/uv-toml/README.md new file mode 100644 index 0000000000000..52555baf91901 --- /dev/null +++ b/crates/uv-toml/README.md @@ -0,0 +1,13 @@ + + +# uv-fs + +This crate is an internal component of [uv](https://crates.io/crates/uv). The Rust API exposed here +is unstable and will have frequent breaking changes. + +This version (0.0.40) is a component of [uv 0.11.7](https://crates.io/crates/uv/0.11.7). The source +can be found [here](https://github.com/astral-sh/uv/blob/0.11.7/crates/uv-fs). + +See uv's +[crate versioning policy](https://docs.astral.sh/uv/reference/policies/versioning/#crate-versioning) +for details on versioning. diff --git a/crates/uv-toml/src/lib.rs b/crates/uv-toml/src/lib.rs new file mode 100644 index 0000000000000..77e0e9c08f5a9 --- /dev/null +++ b/crates/uv-toml/src/lib.rs @@ -0,0 +1,263 @@ +use toml_parser::{ + ErrorSink, Source, Span, + decoder::Encoding, + lexer::Token, + parser::{EventReceiver, parse_document}, +}; + +/// Detect TOML 1.1 specific features in a TOML document. +/// +/// Note: This function does _not_ perform any validation. +pub fn has_toml11_features(source: &str) -> bool { + let tokens: Box<[Token]> = Source::new(source).lex().collect(); + let mut checker = DetectToml11::new(source); + let mut errors = None; + parse_document(&tokens, &mut checker, &mut errors); + checker.is_11() +} + +/// Structure state in a TOML document +#[derive(Debug, Copy, Clone)] +enum State { + /// Regular table (e.g. `[foo]`) + StdTable, + /// Array table (e.g. `[[foo]]`) + ArrayTable, + /// Inline table (e.g. `{ k = "v" }` + InlineTable { trailing_sep: bool }, + /// Array (e.g. `[1, 2, 3]`) + Array, +} + +/// Detect TOML 1.1 specific features. +pub struct DetectToml11<'s> { + /// The underlying TOML source + source: &'s str, + /// Current nesting state + state: Vec, + /// Set to true when a TOML 1.1 specific feature is seen + toml11: bool, +} + +impl<'s> DetectToml11<'s> { + fn new(source: &'s str) -> Self { + Self { + source, + state: Vec::new(), + toml11: false, + } + } + + fn raw_at(&self, span: Span) -> &'s str { + &self.source[span.start()..span.end()] + } + + fn flag_11(&mut self) { + self.toml11 = true; + } + + fn set_sep(&mut self, sep: bool) { + if let Some(State::InlineTable { trailing_sep }) = self.state.last_mut() { + *trailing_sep = sep; + } + } + + pub fn is_11(&self) -> bool { + self.toml11 + } +} + +impl EventReceiver for DetectToml11<'_> { + fn std_table_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) { + self.state.push(State::StdTable); + } + + fn std_table_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) { + self.state.pop(); + } + + fn array_table_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) { + self.state.push(State::ArrayTable); + } + + fn array_table_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) { + self.state.pop(); + } + + fn inline_table_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) -> bool { + self.state.push(State::InlineTable { + trailing_sep: false, + }); + true + } + + fn inline_table_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) { + if matches!( + self.state.last(), + Some(State::InlineTable { trailing_sep: true }) + ) { + // TOML 1.1 introduces trailing commas in inline tables + self.flag_11(); + } + self.state.pop(); + } + + fn array_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) -> bool { + self.state.push(State::Array); + true + } + + fn array_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) { + self.state.pop(); + } + + fn simple_key(&mut self, _span: Span, _kind: Option, _error: &mut dyn ErrorSink) { + self.set_sep(false); + } + + fn scalar(&mut self, span: Span, kind: Option, _error: &mut dyn ErrorSink) { + self.set_sep(false); + + if matches!(kind, Some(Encoding::BasicString | Encoding::MlBasicString)) { + if has_toml11_escapes(self.raw_at(span)) { + // TOML 1.1 introduces new escape sequences + self.flag_11(); + } + } + } + + fn value_sep(&mut self, _span: Span, _error: &mut dyn ErrorSink) { + self.set_sep(true); + } + + fn newline(&mut self, _span: Span, _error: &mut dyn ErrorSink) { + if matches!(self.state.last(), Some(State::InlineTable { .. })) { + // TOML 1.1 introduces newlines in inline tables + self.flag_11(); + } + } +} + +/// Scan the characters of a snippet of TOML representing a basic string for the TOML 1.1 exclusive +/// escape sequences: `\xHH` and `\e` +fn has_toml11_escapes(raw: &str) -> bool { + let mut chars = raw.chars(); + while let Some(c) = chars.next() { + if c == '\\' + && let Some(c) = chars.next() + && matches!(c, 'x' | 'e') + { + return true; + } + } + false +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn escapes_plain_string() { + assert!(!has_toml11_escapes(r#""hello world""#)); + } + + #[test] + fn escapes_toml10_escape_n() { + assert!(!has_toml11_escapes(r#""hello\nworld""#)); + } + + #[test] + fn escapes_toml10_escape_u() { + assert!(!has_toml11_escapes(r#""r\u00E9sum\u00E9""#)); + } + + #[test] + fn escapes_toml11_hex() { + assert!(has_toml11_escapes(r#""val \x41""#)); + } + + #[test] + fn escapes_toml11_esc() { + assert!(has_toml11_escapes(r#""val \e""#)); + } + + #[test] + fn escapes_double_backslash_e() { + assert!(!has_toml11_escapes(r#""\\e""#)); + } + + #[test] + fn escapes_double_backslash_x() { + assert!(!has_toml11_escapes(r#""\\x41""#)); + } + + #[test] + fn features_plain_toml10() { + assert!(!has_toml11_features("x = 1\ny = \"hello\"\nz = true\n")); + } + + #[test] + fn features_std_table() { + assert!(!has_toml11_features( + "[server]\nhost = \"localhost\"\nport = 8080\n" + )); + } + + #[test] + fn features_array_of_tables() { + assert!(!has_toml11_features( + "[[items]]\nname = \"a\"\n[[items]]\nname = \"b\"\n" + )); + } + + #[test] + fn features_inline_table_no_trailing_comma() { + assert!(!has_toml11_features("x = {a = 1, b = 2}\n")); + } + + #[test] + fn features_trailing_comma_in_inline_table() { + assert!(has_toml11_features("x = {a = 1, b = 2,}\n")); + } + + #[test] + fn features_multiline_inline_table() { + assert!(has_toml11_features("x = {\n a = 1\n}\n")); + } + + #[test] + fn features_multiline_inline_table_with_trailing_comma() { + assert!(has_toml11_features("x = {\n a = 1,\n}\n")); + } + + #[test] + fn features_hex_escape() { + assert!(has_toml11_features("x = \"val \\x41\"\n")); + } + + #[test] + fn features_esc_escape() { + assert!(has_toml11_features("x = \"val \\e\"\n")); + } + + #[test] + fn features_double_backslash_not_escape() { + assert!(!has_toml11_features("x = \"\\\\e\"\n")); + } + + #[test] + fn features_toml10_escape_in_value() { + assert!(!has_toml11_features("x = \"tab\\there\"\n")); + } + + #[test] + fn features_escape_in_nested_structure() { + assert!(has_toml11_features("[t]\na = {b = \"\\x20\",}\n")); + } + + #[test] + fn features_trailing_comma_in_array_is_not_11() { + assert!(!has_toml11_features("x = [1, 2, 3,]\n")); + } +} diff --git a/crates/uv/tests/it/build_backend.rs b/crates/uv/tests/it/build_backend.rs index 20ca09d361a43..f104622f0f859 100644 --- a/crates/uv/tests/it/build_backend.rs +++ b/crates/uv/tests/it/build_backend.rs @@ -1600,3 +1600,55 @@ fn warn_on_license_classifier() -> Result<()> { Ok(()) } + +/// Auto-detect TOML 1.1 features in `pyproject.toml` and warn the user. +#[test] +fn warn_on_toml_1_1_auto_detected() -> Result<()> { + let context = uv_test::test_context!("3.12"); + + context + .temp_dir + .child("pyproject.toml") + .write_str(indoc! {r#" + [project] + name = "foo" + version = "1.0.0" + requires-python = ">=3.12" + # TOML 1.1 feature: trailing comma in inline table + authors = [{ name = "Ferris", email = "ferris@example.com", }] + + [build-system] + requires = ["uv_build>=0.7,<10000"] + build-backend = "uv_build" + "#})?; + context.temp_dir.child("src/foo/__init__.py").touch()?; + + // Without the preview flag: auto-detection fires and a warning is shown. + uv_snapshot!(context.filters(), context.build(), @" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Building source distribution (uv build backend)... + warning: `pyproject.toml` uses TOML 1.1 features; rewriting to TOML 1.0 for compatibility with older build tools. Use `--preview-feature toml-backwards-compatibility` to suppress this warning. + Building wheel from source distribution (uv build backend)... + Successfully built dist/foo-1.0.0.tar.gz + Successfully built dist/foo-1.0.0-py3-none-any.whl + "); + + // With the preview flag set explicitly: rewrite still happens, but no warning. + uv_snapshot!(context.filters(), context.build().arg("--preview-feature").arg("toml-backwards-compatibility"), @" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Building source distribution (uv build backend)... + Building wheel from source distribution (uv build backend)... + Successfully built dist/foo-1.0.0.tar.gz + Successfully built dist/foo-1.0.0-py3-none-any.whl + "); + + Ok(()) +}