From 15f33ee00f3e06a31664ad957f5915720d4911a7 Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Sat, 24 Jan 2026 12:55:09 +0000 Subject: [PATCH 1/9] Support .gitattributes export-ignore in distribution archives Parse .gitattributes files and exclude paths marked with the export-ignore attribute from distribution tarballs. This is the mechanism used by git-archive to exclude files, allowing projects to exclude dev-only files like dune-workspace from releases. Supported patterns: - Exact matches: dune-workspace - Directory patterns: .github/** - Glob patterns: *.log, test_*, file?.txt - Double star: **/build, src/**/test.ml Paths are normalized before matching (handles ./ and ../). Not supported: - Escaped patterns (\\! for literal \!) - Quoted patterns ("a b" for patterns with spaces) - Case insensitivity (core.ignorecase) - Negation patterns (\!pattern) - Subdirectory .gitattributes files Testing: - Unit tests comparing behavior against git check-attr - Tests cover pattern matching and parsing edge cases - .gitattributes content is generated from test cases to ensure sync - Archive integration test verifies end-to-end exclusion without requiring git Signed-off-by: Josh Berdine --- CHANGES.md | 2 + README.md | 25 +++ lib/archive.ml | 24 ++- lib/archive.mli | 17 +- lib/gitattributes.ml | 132 ++++++++++++ lib/gitattributes.mli | 32 +++ lib/pkg.ml | 4 +- tests/lib/test_gitattributes.ml | 354 +++++++++++++++++++++++++++++++ tests/lib/test_gitattributes.mli | 1 + tests/lib/tests.ml | 1 + 10 files changed, 578 insertions(+), 14 deletions(-) create mode 100644 lib/gitattributes.ml create mode 100644 lib/gitattributes.mli create mode 100644 tests/lib/test_gitattributes.ml create mode 100644 tests/lib/test_gitattributes.mli diff --git a/CHANGES.md b/CHANGES.md index 5c13226d..b65ba0b3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,8 @@ ### Added +- Support `.gitattributes` `export-ignore` attribute to exclude files from + distribution archives (#, @jberdine) - Add `dune-release delegate-info version` to show the current version as infered by the tool (#495, @samoht) - Add `--dev-repo` to `dune-release` and `dune-release publish` to overwrite diff --git a/README.md b/README.md index e81274d4..61ac9bbe 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,31 @@ The full documentation of this command is available with dune-release help distrib ``` +#### Excluding files with .gitattributes + +Files marked with the `export-ignore` attribute in `.gitattributes` will be excluded from the distribution archive. This can be used to exclude development files like `dune-workspace` that should not be included in releases. + +Example `.gitattributes`: +``` +dune-workspace export-ignore +.github/** export-ignore +``` + +**Supported patterns:** +- Exact filenames: `dune-workspace` +- Directory patterns: `.github/**` (matches all files under `.github/`) +- Glob patterns: `*.log`, `test_*`, `file?.txt` +- Double star in path: `**/build`, `src/**/test.ml` +- Path normalization: handles `./` and `../` in paths + +**Not supported:** +- Negation patterns (`!pattern`) +- Escaped patterns (`\!` for literal `!`) +- Quoted patterns (`"a b"` for patterns with spaces) +- Character classes (`[abc]`) +- Case insensitivity (`core.ignorecase`) +- `.gitattributes` files in subdirectories + ### Publish the distribution online diff --git a/lib/archive.ml b/lib/archive.ml index 3e1f3fe3..4fec5d10 100644 --- a/lib/archive.ml +++ b/lib/archive.ml @@ -117,8 +117,15 @@ module Tar = struct String.concat (List.rev (end_of_file :: t)) end -let path_set_of_dir dir ~exclude_paths = - let not_excluded p = Ok (not (Fpath.Set.mem (Fpath.base p) exclude_paths)) in +let path_set_of_dir dir ~exclude_paths ~export_ignore = + let not_excluded p = + if Fpath.Set.mem (Fpath.base p) exclude_paths then Ok false + else + match Fpath.rem_prefix dir p with + | None -> Ok true + | Some rel_path -> + Ok (not (List.exists (Gitattributes.matches rel_path) export_ignore)) + in let traverse = `Sat not_excluded in let elements = `Sat not_excluded in let err _ e = e in @@ -126,24 +133,29 @@ let path_set_of_dir dir ~exclude_paths = >>= OS.Path.fold ~dotfiles:true ~err ~elements ~traverse Fpath.Set.add Fpath.Set.empty -let tar dir ~exclude_paths ~root ~mtime = +let tar dir ~exclude_paths ~export_ignore ~root ~mtime = let tar_add file tar = let fname = match Fpath.rem_prefix dir file with | None -> assert false | Some file -> Fpath.(root // file) in - Logs.info (fun m -> m "Archiving %a" Fpath.pp fname); tar >>= fun tar -> OS.Dir.exists file >>= function - | true -> Tar.add tar fname ~mode:0o775 ~mtime `Dir + | true -> + (* Skip directories - they will be created implicitly when their + contents are added. This ensures that directories whose contents + are excluded via export-ignore patterns don't appear as empty + directories in the archive. *) + Ok tar | false -> + Logs.info (fun m -> m "Archiving %a" Fpath.pp fname); OS.Path.Mode.get file >>= fun mode -> OS.File.read file >>= fun contents -> let mode = if 0o100 land mode > 0 then 0o775 else 0o664 in Tar.add tar fname ~mode ~mtime (`File contents) in - path_set_of_dir dir ~exclude_paths >>= fun fset -> + path_set_of_dir dir ~exclude_paths ~export_ignore >>= fun fset -> Fpath.Set.fold tar_add fset (Ok Tar.empty) >>| fun tar -> Tar.to_string tar (* Bzip2 compression and unarchiving *) diff --git a/lib/archive.mli b/lib/archive.mli index 1deb4be2..b23e5a28 100644 --- a/lib/archive.mli +++ b/lib/archive.mli @@ -13,16 +13,19 @@ open Bos_setup val tar : Fpath.t -> exclude_paths:Fpath.set -> + export_ignore:Gitattributes.pattern list -> root:Fpath.t -> mtime:int64 -> (string, R.msg) result -(** [tar dir ~exclude_paths ~root ~mtime] is a (us)tar archive that contains the - file hierarchy [dir] except the relative hierarchies present in - [exclude_paths]. In the archive, members of [dir] are rerooted at [root] and - sorted according to {!Fpath.compare}. They have their modification time set - to [mtime] and their file permissions are [0o775] for directories and files - executable by the user and [0o664] for other files. No other file metadata - is preserved. +(** [tar dir ~exclude_paths ~export_ignore ~root ~mtime] is a (us)tar archive + that contains the file hierarchy [dir] except: + - relative hierarchies present in [exclude_paths] (basename matching) + - files matching patterns in [export_ignore] (from [.gitattributes]) + + In the archive, members of [dir] are rerooted at [root] and sorted according + to {!Fpath.compare}. They have their modification time set to [mtime] and + their file permissions are [0o775] for directories and files executable by + the user and [0o664] for other files. No other file metadata is preserved. {b Note.} This is a pure OCaml implementation, no [tar] tool is needed. *) diff --git a/lib/gitattributes.ml b/lib/gitattributes.ml new file mode 100644 index 00000000..d42f7fc9 --- /dev/null +++ b/lib/gitattributes.ml @@ -0,0 +1,132 @@ +open Bos_setup + +type pattern = + | Exact of string (** Exact match against basename or full path. *) + | Prefix of string + (** Pattern like [dir/**] that matches everything under a directory, but + not the directory itself. *) + | Glob of Re.re (** Compiled glob pattern. *) + +(** [glob_to_re pattern] is a compiled regex for glob [pattern]. Supports [*] + (any chars except /), [?] (single char except /), and [**] (any path + segments, but only when adjacent to /). *) +let glob_to_re pattern = + let buf = Buffer.create (String.length pattern * 2) in + Buffer.add_char buf '^'; + let len = String.length pattern in + let rec loop i = + if i >= len then () + else + let c = pattern.[i] in + match c with + | '*' -> + if i + 1 < len && pattern.[i + 1] = '*' then + (* ** only crosses path separators when adjacent to / *) + let preceded_by_slash = i > 0 && pattern.[i - 1] = '/' in + let at_start = i = 0 in + if i + 2 < len && pattern.[i + 2] = '/' then ( + (* **/ matches zero or more directories *) + Buffer.add_string buf "(.*/)?"; + loop (i + 3)) + else if i + 2 >= len && (preceded_by_slash || at_start) then ( + (* /** at end or just ** alone - matches anything *) + Buffer.add_string buf ".*"; + loop (i + 2)) + else ( + (* ** not adjacent to / - acts like * *) + Buffer.add_string buf "[^/]*"; + loop (i + 2)) + else ( + (* * matches anything except path separator *) + Buffer.add_string buf "[^/]*"; + loop (i + 1)) + | '?' -> + (* ? matches any single character except path separator *) + Buffer.add_string buf "[^/]"; + loop (i + 1) + | '.' | '+' | '^' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '|' | '\\' + -> + (* Escape regex metacharacters *) + Buffer.add_char buf '\\'; + Buffer.add_char buf c; + loop (i + 1) + | _ -> + (* Literal character *) + Buffer.add_char buf c; + loop (i + 1) + in + loop 0; + Buffer.add_char buf '$'; + Re.Pcre.regexp (Buffer.contents buf) + +let parse_pattern s = + let s = String.trim s in + (* Remove leading slash if present - we always match relative paths *) + let s = + if String.is_prefix ~affix:"/" s then + String.Sub.to_string (String.sub ~start:1 s) + else s + in + let has_wildcard s = String.exists (fun c -> c = '*' || c = '?') s in + if String.is_suffix ~affix:"/**" s then + (* Directory pattern: match everything under the directory *) + let prefix = + String.Sub.to_string (String.sub ~stop:(String.length s - 3) s) + in + if has_wildcard prefix then + (* Prefix contains wildcards, treat whole pattern as glob *) + Glob (glob_to_re s) + else Prefix prefix + else if has_wildcard s then + (* Has wildcards - compile as glob *) + Glob (glob_to_re s) + else + (* Exact match *) + Exact s + +let matches path pattern = + let path = Fpath.normalize path in + let path_str = Fpath.to_string path in + let basename = Fpath.basename path in + match pattern with + | Exact s -> + (* Match against basename or full relative path *) + String.equal s basename || String.equal s path_str + | Prefix prefix -> + (* Match everything under the directory, but not the directory itself *) + String.is_prefix ~affix:(prefix ^ "/") path_str + | Glob re -> + (* Match against full path or basename for patterns like *.log *) + Re.execp re path_str || Re.execp re basename + +let parse_export_ignore content = + (* Strip UTF-8 BOM if present at start of file *) + let content = + if String.is_prefix ~affix:"\xef\xbb\xbf" content then + String.Sub.to_string (String.sub ~start:3 content) + else content + in + content |> String.cuts ~sep:"\n" + |> List.filter_map (fun line -> + let line = String.trim line in + (* Skip empty lines and comments *) + if String.length line = 0 || String.is_prefix ~affix:"#" line then None + else + (* Format: ... + Attributes can be separated by spaces or tabs *) + let parts = + String.fields ~empty:false + ~is_sep:(fun c -> c = ' ' || c = '\t') + line + in + match parts with + | pattern :: attrs + when List.exists (String.equal "export-ignore") attrs -> + Some (parse_pattern pattern) + | _ -> None) + +let read_export_ignore dir = + let file = Fpath.(dir / ".gitattributes") in + OS.File.exists file >>= function + | false -> Ok [] + | true -> OS.File.read file >>| parse_export_ignore diff --git a/lib/gitattributes.mli b/lib/gitattributes.mli new file mode 100644 index 00000000..caa01c8b --- /dev/null +++ b/lib/gitattributes.mli @@ -0,0 +1,32 @@ +(** Gitattributes parsing for export-ignore. + + Parses [.gitattributes] files and extracts patterns marked with the + [export-ignore] attribute. These patterns can be used to exclude files from + distribution archives. *) + +open Bos_setup + +(** {1 Patterns} *) + +type pattern +(** The type for gitattributes patterns. *) + +val parse_pattern : string -> pattern +(** [parse_pattern s] is the pattern parsed from string [s]. Supports: + - Exact matches: [filename] + - Directory patterns: [dir/**] + - Glob patterns: [*.ext], [prefix*] *) + +val matches : Fpath.t -> pattern -> bool +(** [matches path pattern] holds if [path] matches [pattern]. [path] should be + relative to the repository root. *) + +(** {1 Parsing .gitattributes} *) + +val parse_export_ignore : string -> pattern list +(** [parse_export_ignore content] is the list of patterns marked with + [export-ignore] in [.gitattributes] file [content]. *) + +val read_export_ignore : Fpath.t -> (pattern list, R.msg) result +(** [read_export_ignore dir] is the list of patterns marked with [export-ignore] + in [dir/.gitattributes], or the empty list if the file doesn't exist. *) diff --git a/lib/pkg.ml b/lib/pkg.ml index 40fb4c3f..235826de 100644 --- a/lib/pkg.ml +++ b/lib/pkg.ml @@ -465,7 +465,9 @@ let distrib_archive ~dry_run ~keep_dir ~include_submodules p = >>= fun () -> distrib_prepare ~dry_run ~dist_build_dir ~version >>= fun () -> let exclude_paths = Fpath.Set.of_list Distrib.exclude_paths in - Archive.tar dist_build_dir ~exclude_paths ~root ~mtime >>= fun tar -> + Gitattributes.read_export_ignore dist_build_dir >>= fun export_ignore -> + Archive.tar dist_build_dir ~exclude_paths ~export_ignore ~root ~mtime + >>= fun tar -> distrib_archive_path p >>= fun archive -> Archive.bzip2 ~dry_run ~force:true ~dst:archive tar >>= fun () -> (if keep_dir then Ok () else Sos.delete_dir ~dry_run dist_build_dir) diff --git a/tests/lib/test_gitattributes.ml b/tests/lib/test_gitattributes.ml new file mode 100644 index 00000000..b1537d74 --- /dev/null +++ b/tests/lib/test_gitattributes.ml @@ -0,0 +1,354 @@ +(** Gitattributes tests. + + - Archive integration: tests that Archive.tar excludes files matching + export-ignore patterns + - Pattern matching: compares our implementation against git check-attr as + source of truth, with .gitattributes content generated from test cases *) + +open Dune_release + +(*=========================================================================== + * Archive integration + *) + +(** Test archive creation with export-ignore patterns. This tests the full + integration without requiring git. *) +let test_archive_export_ignore () = + let ( >>= ) = Result.bind in + let result = + Bos.OS.Dir.tmp "archive-test-%s" >>= fun dir -> + (* Create test file structure *) + let files = + [ + ("CHANGES.md", "changes"); + ("foo.opam", "opam"); + ("dune-project", "(lang dune 3.0)"); + ("dune-workspace", "(lang dune 3.0)"); + ( ".gitattributes", + "dune-workspace export-ignore\n\ + .github/** export-ignore\n\ + internal/** export-ignore" ); + (".github/workflows/ci.yml", "ci"); + ("internal/notes.txt", "notes"); + ("src/main.ml", "let () = ()"); + ] + in + let create_file (path, content) = + let fpath = Fpath.(dir // v path) in + let parent = Fpath.parent fpath in + Bos.OS.Dir.create ~path:true parent >>= fun _ -> + Bos.OS.File.write fpath content + in + List.fold_left + (fun acc file -> acc >>= fun () -> create_file file) + (Ok ()) files + >>= fun () -> + (* Read export-ignore patterns *) + Gitattributes.read_export_ignore dir >>= fun export_ignore -> + (* Create the archive *) + let exclude_paths = Fpath.Set.empty in + let root = Fpath.v "test-1.0.0" in + let mtime = 0L in + Archive.tar dir ~exclude_paths ~export_ignore ~root ~mtime + >>= fun tar_content -> + (* Write tarball and list contents *) + let tarball = Fpath.(dir / "test.tar") in + Bos.OS.File.write tarball tar_content >>= fun () -> + let cmd = Bos.Cmd.(v "tar" % "-tf" % Fpath.to_string tarball) in + Bos.OS.Cmd.(run_out cmd |> out_lines) >>= fun (files_in_tar, _) -> + Ok files_in_tar + in + match result with + | Error (`Msg msg) -> Alcotest.fail msg + | Ok files_in_tar -> + let has_file name = + List.exists + (fun f -> Astring.String.is_infix ~affix:name f) + files_in_tar + in + (* Check that excluded files are NOT present *) + Alcotest.(check bool) + "dune-workspace excluded" false + (has_file "dune-workspace"); + Alcotest.(check bool) ".github excluded" false (has_file ".github"); + Alcotest.(check bool) "internal excluded" false (has_file "internal"); + (* Check that included files ARE present *) + Alcotest.(check bool) "CHANGES.md included" true (has_file "CHANGES.md"); + Alcotest.(check bool) "foo.opam included" true (has_file "foo.opam"); + Alcotest.(check bool) + ".gitattributes included" true + (has_file ".gitattributes"); + Alcotest.(check bool) "src/main.ml included" true (has_file "src/main.ml") + +let archive_tests = + [ ("archive with export-ignore", `Quick, test_archive_export_ignore) ] + +(*=========================================================================== + * Pattern matching + *) + +(** Build mapping from pattern to unique prefix. *) +let pattern_prefix_map pattern_tests = + let patterns = + pattern_tests |> List.map fst |> List.sort_uniq String.compare + in + List.mapi (fun i p -> (p, Printf.sprintf "t%03d" (i + 1))) patterns + +(** Generate .gitattributes content from test cases. *) +let generate_gitattributes parse_tests pattern_tests = + let buf = Buffer.create 4096 in + (* Add parse test lines *) + List.iter + (fun (line, _) -> + Buffer.add_string buf line; + Buffer.add_char buf '\n') + parse_tests; + Buffer.add_char buf '\n'; + (* Add pattern test lines with prefix *) + let prefix_map = pattern_prefix_map pattern_tests in + List.iter + (fun (pattern, prefix) -> + Printf.bprintf buf "%s/%s export-ignore\n" prefix pattern) + prefix_map; + Buffer.contents buf + +(** Generate all test paths. *) +let generate_test_paths parse_tests pattern_tests = + let prefix_map = pattern_prefix_map pattern_tests in + (* Parse test paths *) + let parse_paths = List.map snd parse_tests in + (* Pattern test paths with prefix *) + let pattern_paths = + List.map + (fun (pattern, path) -> + let prefix = List.assoc pattern prefix_map in + prefix ^ "/" ^ path) + pattern_tests + in + parse_paths @ pattern_paths + +let setup_git_repo gitattributes_content = + match Bos.OS.Dir.tmp "gitattributes-test-%s" with + | Error _ -> None + | Ok dir -> ( + let cmd = + Bos.Cmd.(v "git" % "-C" % Fpath.to_string dir % "init" % "-q") + in + match Bos.OS.Cmd.run cmd with + | Error _ -> None + | Ok () -> ( + (* Write the .gitattributes file *) + let gitattributes = Fpath.(dir / ".gitattributes") in + match Bos.OS.File.write gitattributes gitattributes_content with + | Error _ -> None + | Ok () -> Some dir)) + +let run_git_check_attr ~dir ~path = + let cmd = + Bos.Cmd.( + v "git" % "-C" % Fpath.to_string dir % "check-attr" % "export-ignore" + % path) + in + match Bos.OS.Cmd.(run_out cmd |> out_string) with + | Error _ -> None + | Ok (output, _) -> Some (Astring.String.is_infix ~affix:": set" output) + +let git_tests gitattributes_content test_paths = + match setup_git_repo gitattributes_content with + | None -> [] (* Skip git tests if git setup fails *) + | Some dir -> + List.map + (fun path -> + let name = Printf.sprintf "git: %s" path in + let test_fun () = + match run_git_check_attr ~dir ~path with + | None -> Alcotest.fail "Could not run git check-attr" + | Some git_result -> + let patterns = + Gitattributes.parse_export_ignore gitattributes_content + in + let our_result = + List.exists (Gitattributes.matches (Fpath.v path)) patterns + in + Alcotest.(check bool) name git_result our_result + in + (name, `Quick, test_fun)) + test_paths + +(** Pattern matching test cases: (pattern, path) pairs. Each pattern gets a + unique prefix (t001/, t002/, etc.) to isolate tests. *) +let pattern_tests = + [ + (* Basic glob patterns *) + ("*.log", "debug.log"); + ("*.log", "subdir/debug.log"); + ("*.log", "a/b/c/debug.log"); + ("*.log", "foo.txt"); + (* Exact basename matching *) + ("dune-workspace", "dune-workspace"); + ("dune-workspace", "subdir/dune-workspace"); + ("dune-workspace", "other-file"); + (* Directory patterns with /** *) + (".github/**", ".github"); + (".github/**", ".github/workflows"); + (".github/**", ".github/workflows/ci.yml"); + (".github/**", ".github-actions"); + (".github/**", "src/main.ml"); + (* Glob prefix with /** *) + ("test_*/**", "test_foo/bar.ml"); + ("test_*/**", "test_foo/sub/file.ml"); + ("test_*/**", "other/file.ml"); + (* **/ in middle of pattern *) + ("src/**/test.ml", "src/test.ml"); + ("src/**/test.ml", "src/foo/test.ml"); + ("src/**/test.ml", "src/foo/bar/test.ml"); + ("src/**/test.ml", "test.ml"); + (* **/ at start *) + ("**/build", "build"); + ("**/build", "foo/build"); + ("**/build", "foo/bar/build"); + ("**/build", "builder"); + (* Directory wildcard *) + ("dir/*.log", "dir/foo.log"); + ("dir/*.log", "dir/sub/foo.log"); + ("dir/*.log", "other/foo.log"); + (* Star not crossing slash *) + ("a*b", "aXXXb"); + ("a*b", "a/b"); + (* Question mark *) + ("file?.txt", "file1.txt"); + ("file?.txt", "file12.txt"); + (* Double star alone *) + ("**", "anything"); + ("**", "a/b/c"); + (* Single star *) + ("*", "foo"); + ("*", "foo/bar"); + (* **/f pattern - matches f at any level *) + ("**/f", "f"); + ("**/f", "a/f"); + ("**/f", "a/b/f"); + ("**/f", "a/b/c/f"); + ("**/f", "g"); + ("**/f", "fx"); + (* a**f pattern - ** without slash acts like * *) + ("a**f", "af"); + ("a**f", "axf"); + ("a**f", "axxf"); + ("a**f", "a/f"); + ("a**f", "a/b/f"); + (* Simple basename patterns matching at multiple levels *) + ("f", "f"); + ("f", "a/f"); + ("f", "a/b/f"); + ("f", "g"); + ("f", "fx"); + (* Path-specific patterns *) + ("a/f", "a/f"); + ("a/f", "b/a/f"); + (* Path-specific pattern should not match when nested deeper *) + ("a/i", "a/i"); + ("a/i", "subdir/a/i"); + ("a/b/g", "a/b/g"); + ("b/g", "b/g"); + ("b/g", "a/b/g"); + (* Path normalization - git normalizes paths before matching *) + ("f", "./f"); + ("a/g", "a/./g"); + ("a/b/g", "a/c/../b/g"); + (* Exact path matching *) + ("subdir/file", "subdir/file"); + ("a/b", "x/a/b"); + ("src/file.ml", "root/src/file.ml"); + (* Leading slash stripped *) + ("/dune-workspace", "dune-workspace"); + (* Literal dot in pattern *) + ("file.txt", "file.txt"); + ("file.txt", "filextxt"); + (* Case sensitivity *) + ("Makefile", "Makefile"); + (* Prefix no false positive nested *) + (".git/**", ".github/workflows/ci.yml"); + (* Star alone with extension *) + ("*", "foo.ml"); + (* Star not matching slash *) + ("a*.ml", "a/foo.ml"); + (* Question mark not matching slash *) + ("a?b", "a/b"); + (* Trailing slash patterns - trailing slashes are not stripped, so these + patterns won't match paths without trailing slashes *) + ("dir/", "dir"); + ("*/", "dir"); + (* Double star not adjacent to slash *) + ("a**b", "aXXXb"); + ("a**b", "a/x/b"); + (* Double star at start without slash *) + ("**test.ml", "test.ml"); + ("**test.ml", "src/test.ml"); + (* */** pattern *) + ("*/**", "foo/bar"); + ("*/**", "foo/bar/baz.txt"); + (* **/ pattern alone *) + ("**/", "foo/"); + ("**/", "foo"); + (* **/** pattern *) + ("**/**", "foo/bar"); + ("**/**", "a/b/c/d"); + ("**/**", "foo"); + (* Mixed * and ? *) + ("*.?", "foo.c"); + ("*.?", "foo.ml"); + ("?est_*", "test_foo"); + ("t?st_*.ml", "test_foo.ml"); + (* Empty/whitespace patterns - these become empty string after trim and + never match anything *) + ("", "foo"); + (" ", "foo"); + ] + +(** Parsing edge cases: (gitattributes_line, test_path). Lines are written + exactly as-is to .gitattributes. *) +let parse_tests = + [ + (* UTF-8 BOM at start of file - must be first to test BOM handling *) + ("\xef\xbb\xbfparse_bom export-ignore", "parse_bom"); + (* Comment handling *) + ("# comment line\nparse_comment export-ignore", "parse_comment"); + (* Empty line handling *) + ( "parse_before export-ignore\n\nparse_after_empty export-ignore", + "parse_after_empty" ); + (* Tab as separator *) + ("parse_tab\texport-ignore", "parse_tab"); + (* Multiple attributes - export-ignore second *) + ("parse_multi_second binary export-ignore", "parse_multi_second"); + (* Multiple attributes - export-ignore first *) + ("parse_multi_first export-ignore text", "parse_multi_first"); + (* No export-ignore attribute - should NOT match *) + ("parse_no_export binary", "parse_no_export"); + (* Attribute as substring - should NOT match *) + ("parse_substr not-export-ignore-really", "parse_substr"); + (* Attribute as prefix - should NOT match *) + ("parse_attr_prefix export-ignore-extended", "parse_attr_prefix"); + (* Attribute as suffix - should NOT match *) + ("parse_attr_suffix my-export-ignore", "parse_attr_suffix"); + (* Leading whitespace on pattern *) + (" parse_whitespace export-ignore", "parse_whitespace"); + (* Hash in pattern (not a comment) *) + ("parse#hash export-ignore", "parse#hash"); + (* Pattern with no attributes - should NOT match *) + ("parse_no_attr", "parse_no_attr"); + (* Indented comment *) + ( " # indented comment\nparse_indented_comment export-ignore", + "parse_indented_comment" ); + (* Whitespace-only line *) + ( "parse_ws_before export-ignore\n \t \nparse_ws_after export-ignore", + "parse_ws_after" ); + (* Windows line endings *) + ("parse_crlf export-ignore\r\nparse_crlf2 export-ignore", "parse_crlf2"); + ] + +let gitattributes_content = generate_gitattributes parse_tests pattern_tests +let test_paths = generate_test_paths parse_tests pattern_tests + +let suite = + ("Gitattributes", archive_tests @ git_tests gitattributes_content test_paths) diff --git a/tests/lib/test_gitattributes.mli b/tests/lib/test_gitattributes.mli new file mode 100644 index 00000000..a6aeeabd --- /dev/null +++ b/tests/lib/test_gitattributes.mli @@ -0,0 +1 @@ +val suite : string * (string * [> `Quick ] * (unit -> unit)) list diff --git a/tests/lib/tests.ml b/tests/lib/tests.ml index 90590bff..cdcf11e7 100644 --- a/tests/lib/tests.ml +++ b/tests/lib/tests.ml @@ -4,6 +4,7 @@ let () = Test_github.suite; Test_github_v3_api.suite; Test_github_v4_api.suite; + Test_gitattributes.suite; Test_opam_file.suite; Test_pkg.suite; Test_stdext.suite; From 892f3433f80e55f093361773d06c3e3683939ac6 Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Sat, 24 Jan 2026 22:51:34 +0000 Subject: [PATCH 2/9] Update CHANGES.md Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index b65ba0b3..54e03513 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,7 +3,7 @@ ### Added - Support `.gitattributes` `export-ignore` attribute to exclude files from - distribution archives (#, @jberdine) + distribution archives (#515, @jberdine) - Add `dune-release delegate-info version` to show the current version as infered by the tool (#495, @samoht) - Add `--dev-repo` to `dune-release` and `dune-release publish` to overwrite From 7864bf5315e35d3694de33e224d40d73f94561f1 Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Wed, 22 Apr 2026 21:48:20 +0100 Subject: [PATCH 3/9] Rename not_excluded predicate to included Avoid the double negation in the predicate name used by OS.Path.fold's `Sat filter. Signed-off-by: Josh Berdine --- lib/archive.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/archive.ml b/lib/archive.ml index 4fec5d10..82e42e7f 100644 --- a/lib/archive.ml +++ b/lib/archive.ml @@ -118,7 +118,7 @@ module Tar = struct end let path_set_of_dir dir ~exclude_paths ~export_ignore = - let not_excluded p = + let included p = if Fpath.Set.mem (Fpath.base p) exclude_paths then Ok false else match Fpath.rem_prefix dir p with @@ -126,8 +126,8 @@ let path_set_of_dir dir ~exclude_paths ~export_ignore = | Some rel_path -> Ok (not (List.exists (Gitattributes.matches rel_path) export_ignore)) in - let traverse = `Sat not_excluded in - let elements = `Sat not_excluded in + let traverse = `Sat included in + let elements = `Sat included in let err _ e = e in OS.Dir.contents ~dotfiles:true dir >>= OS.Path.fold ~dotfiles:true ~err ~elements ~traverse Fpath.Set.add From 1342036e0c41b9a9da19ab184757d8170f7d0706 Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Wed, 22 Apr 2026 21:49:35 +0100 Subject: [PATCH 4/9] Drop speculative 2x buffer size in glob_to_re The Buffer grows on demand; the 2x factor was a guess with no measurement behind it. Signed-off-by: Josh Berdine --- lib/gitattributes.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gitattributes.ml b/lib/gitattributes.ml index d42f7fc9..d18aeeec 100644 --- a/lib/gitattributes.ml +++ b/lib/gitattributes.ml @@ -11,7 +11,7 @@ type pattern = (any chars except /), [?] (single char except /), and [**] (any path segments, but only when adjacent to /). *) let glob_to_re pattern = - let buf = Buffer.create (String.length pattern * 2) in + let buf = Buffer.create (String.length pattern) in Buffer.add_char buf '^'; let len = String.length pattern in let rec loop i = From 0988a73e06e5e869ff954ebcd7ecf04dfce8a03c Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Wed, 22 Apr 2026 21:52:39 +0100 Subject: [PATCH 5/9] Hoist String.length in glob_to_re Compute the pattern length once and reuse for both the Buffer size and the loop bound. Signed-off-by: Josh Berdine --- lib/gitattributes.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/gitattributes.ml b/lib/gitattributes.ml index d18aeeec..438683d7 100644 --- a/lib/gitattributes.ml +++ b/lib/gitattributes.ml @@ -11,9 +11,9 @@ type pattern = (any chars except /), [?] (single char except /), and [**] (any path segments, but only when adjacent to /). *) let glob_to_re pattern = - let buf = Buffer.create (String.length pattern) in - Buffer.add_char buf '^'; let len = String.length pattern in + let buf = Buffer.create len in + Buffer.add_char buf '^'; let rec loop i = if i >= len then () else From f5f00f6396ff4af18ff49d4ecb666d0748bc6eb7 Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Wed, 22 Apr 2026 22:05:00 +0100 Subject: [PATCH 6/9] Name the UTF-8 BOM constant Bind `utf8_bom` so the byte sequence appears once and the skip length is derived from it rather than a magic 3. Signed-off-by: Josh Berdine --- lib/gitattributes.ml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/gitattributes.ml b/lib/gitattributes.ml index 438683d7..4e9c1ab0 100644 --- a/lib/gitattributes.ml +++ b/lib/gitattributes.ml @@ -99,11 +99,13 @@ let matches path pattern = (* Match against full path or basename for patterns like *.log *) Re.execp re path_str || Re.execp re basename +let utf8_bom = "\xef\xbb\xbf" + let parse_export_ignore content = (* Strip UTF-8 BOM if present at start of file *) let content = - if String.is_prefix ~affix:"\xef\xbb\xbf" content then - String.Sub.to_string (String.sub ~start:3 content) + if String.is_prefix ~affix:utf8_bom content then + String.Sub.to_string (String.sub ~start:(String.length utf8_bom) content) else content in content |> String.cuts ~sep:"\n" From 1a002b0d4641bdc349cb72308b1e1a5ebf964fe3 Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Wed, 22 Apr 2026 22:07:01 +0100 Subject: [PATCH 7/9] Rename Gitattributes.pattern to Gitattributes.t Follow the standard OCaml convention of naming the primary type of a module `t`. Signed-off-by: Josh Berdine --- lib/archive.mli | 2 +- lib/gitattributes.ml | 2 +- lib/gitattributes.mli | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/archive.mli b/lib/archive.mli index b23e5a28..38bbb505 100644 --- a/lib/archive.mli +++ b/lib/archive.mli @@ -13,7 +13,7 @@ open Bos_setup val tar : Fpath.t -> exclude_paths:Fpath.set -> - export_ignore:Gitattributes.pattern list -> + export_ignore:Gitattributes.t list -> root:Fpath.t -> mtime:int64 -> (string, R.msg) result diff --git a/lib/gitattributes.ml b/lib/gitattributes.ml index 4e9c1ab0..88b5c490 100644 --- a/lib/gitattributes.ml +++ b/lib/gitattributes.ml @@ -1,6 +1,6 @@ open Bos_setup -type pattern = +type t = | Exact of string (** Exact match against basename or full path. *) | Prefix of string (** Pattern like [dir/**] that matches everything under a directory, but diff --git a/lib/gitattributes.mli b/lib/gitattributes.mli index caa01c8b..06f14b7d 100644 --- a/lib/gitattributes.mli +++ b/lib/gitattributes.mli @@ -8,25 +8,25 @@ open Bos_setup (** {1 Patterns} *) -type pattern +type t (** The type for gitattributes patterns. *) -val parse_pattern : string -> pattern +val parse_pattern : string -> t (** [parse_pattern s] is the pattern parsed from string [s]. Supports: - Exact matches: [filename] - Directory patterns: [dir/**] - Glob patterns: [*.ext], [prefix*] *) -val matches : Fpath.t -> pattern -> bool +val matches : Fpath.t -> t -> bool (** [matches path pattern] holds if [path] matches [pattern]. [path] should be relative to the repository root. *) (** {1 Parsing .gitattributes} *) -val parse_export_ignore : string -> pattern list +val parse_export_ignore : string -> t list (** [parse_export_ignore content] is the list of patterns marked with [export-ignore] in [.gitattributes] file [content]. *) -val read_export_ignore : Fpath.t -> (pattern list, R.msg) result +val read_export_ignore : Fpath.t -> (t list, R.msg) result (** [read_export_ignore dir] is the list of patterns marked with [export-ignore] in [dir/.gitattributes], or the empty list if the file doesn't exist. *) From 5dad686f46bebdb9ebc1bc956ea535bf1ea71e41 Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Wed, 22 Apr 2026 22:08:31 +0100 Subject: [PATCH 8/9] Use plausible opam content in archive test fixture Signed-off-by: Josh Berdine --- tests/lib/test_gitattributes.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lib/test_gitattributes.ml b/tests/lib/test_gitattributes.ml index b1537d74..c0053718 100644 --- a/tests/lib/test_gitattributes.ml +++ b/tests/lib/test_gitattributes.ml @@ -21,7 +21,7 @@ let test_archive_export_ignore () = let files = [ ("CHANGES.md", "changes"); - ("foo.opam", "opam"); + ("foo.opam", {|opam-version: "2.0"|}); ("dune-project", "(lang dune 3.0)"); ("dune-workspace", "(lang dune 3.0)"); ( ".gitattributes", From f871732280fc76c011ae2dc9764a162af7fbc506 Mon Sep 17 00:00:00 2001 From: Josh Berdine Date: Wed, 22 Apr 2026 22:29:20 +0100 Subject: [PATCH 9/9] Warn and skip on unsupported .gitattributes pattern syntax Detect negation (`\!pattern`), escape (`\\`), quoting (`"`), and character class (`[...]`) syntax in parse_pattern and skip the pattern with a Logs.warn rather than compiling to a regex that silently matches no files. Change parse_pattern to return a t option to reflect this. Split the README "not supported" list: syntactic cases are warned-and- skipped uniformly, while case-insensitivity and subdirectory .gitattributes are documented as separate limitations. Signed-off-by: Josh Berdine --- README.md | 8 +++--- lib/gitattributes.ml | 63 +++++++++++++++++++++++++++---------------- lib/gitattributes.mli | 6 +++-- 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 61ac9bbe..a1936c65 100644 --- a/README.md +++ b/README.md @@ -127,13 +127,15 @@ dune-workspace export-ignore - Double star in path: `**/build`, `src/**/test.ml` - Path normalization: handles `./` and `../` in paths -**Not supported:** +**Unsupported pattern syntax** (such patterns are skipped with a warning): - Negation patterns (`!pattern`) - Escaped patterns (`\!` for literal `!`) - Quoted patterns (`"a b"` for patterns with spaces) - Character classes (`[abc]`) -- Case insensitivity (`core.ignorecase`) -- `.gitattributes` files in subdirectories + +**Other limitations:** +- Only the repository-root `.gitattributes` is read; files in subdirectories are ignored. +- Matching is always case-sensitive; `core.ignorecase` is not consulted. ### Publish the distribution online diff --git a/lib/gitattributes.ml b/lib/gitattributes.ml index 88b5c490..ba08aff4 100644 --- a/lib/gitattributes.ml +++ b/lib/gitattributes.ml @@ -59,30 +59,47 @@ let glob_to_re pattern = Buffer.add_char buf '$'; Re.Pcre.regexp (Buffer.contents buf) +(* [unsupported_syntax s] is [Some reason] when [s] uses a gitattributes + pattern feature we do not implement. The pattern is then skipped rather + than compiled into a regex that happens to match nothing. *) +let unsupported_syntax s = + if String.length s > 0 && s.[0] = '!' then Some "negation (!pattern)" + else if String.exists (fun c -> c = '\\') s then Some "escape (\\)" + else if String.exists (fun c -> c = '"') s then Some "quoting (\")" + else if String.exists (fun c -> c = '[' || c = ']') s then + Some "character class ([...])" + else None + let parse_pattern s = let s = String.trim s in - (* Remove leading slash if present - we always match relative paths *) - let s = - if String.is_prefix ~affix:"/" s then - String.Sub.to_string (String.sub ~start:1 s) - else s - in - let has_wildcard s = String.exists (fun c -> c = '*' || c = '?') s in - if String.is_suffix ~affix:"/**" s then - (* Directory pattern: match everything under the directory *) - let prefix = - String.Sub.to_string (String.sub ~stop:(String.length s - 3) s) - in - if has_wildcard prefix then - (* Prefix contains wildcards, treat whole pattern as glob *) - Glob (glob_to_re s) - else Prefix prefix - else if has_wildcard s then - (* Has wildcards - compile as glob *) - Glob (glob_to_re s) - else - (* Exact match *) - Exact s + match unsupported_syntax s with + | Some reason -> + Logs.warn (fun m -> + m "Skipping unsupported .gitattributes pattern %S: %s" s reason); + None + | None -> + (* Remove leading slash if present - we always match relative paths *) + let s = + if String.is_prefix ~affix:"/" s then + String.Sub.to_string (String.sub ~start:1 s) + else s + in + let has_wildcard s = String.exists (fun c -> c = '*' || c = '?') s in + if String.is_suffix ~affix:"/**" s then + (* Directory pattern: match everything under the directory *) + let prefix = + String.Sub.to_string (String.sub ~stop:(String.length s - 3) s) + in + if has_wildcard prefix then + (* Prefix contains wildcards, treat whole pattern as glob *) + Some (Glob (glob_to_re s)) + else Some (Prefix prefix) + else if has_wildcard s then + (* Has wildcards - compile as glob *) + Some (Glob (glob_to_re s)) + else + (* Exact match *) + Some (Exact s) let matches path pattern = let path = Fpath.normalize path in @@ -124,7 +141,7 @@ let parse_export_ignore content = match parts with | pattern :: attrs when List.exists (String.equal "export-ignore") attrs -> - Some (parse_pattern pattern) + parse_pattern pattern | _ -> None) let read_export_ignore dir = diff --git a/lib/gitattributes.mli b/lib/gitattributes.mli index 06f14b7d..9306f878 100644 --- a/lib/gitattributes.mli +++ b/lib/gitattributes.mli @@ -11,8 +11,10 @@ open Bos_setup type t (** The type for gitattributes patterns. *) -val parse_pattern : string -> t -(** [parse_pattern s] is the pattern parsed from string [s]. Supports: +val parse_pattern : string -> t option +(** [parse_pattern s] is the pattern parsed from string [s], or [None] if [s] + uses an unsupported syntactic feature (negation, escaping, quoting, or + character classes). In that case a warning is logged. Supports: - Exact matches: [filename] - Directory patterns: [dir/**] - Glob patterns: [*.ext], [prefix*] *)