Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

### Added

- Support `.gitattributes` `export-ignore` attribute to exclude files from
distribution archives (#515, @jberdine)
- Add `dune-release delegate-info version` to show the current version as infered
by the tool (#495, @samoht)
- Add `--dev-repo` to `dune-release` and `dune-release publish` to overwrite
Expand Down
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,33 @@ The full documentation of this command is available with
dune-release help distrib
```

#### Excluding files with .gitattributes

Files marked with the `export-ignore` attribute in `.gitattributes` will be excluded from the distribution archive. This can be used to exclude development files like `dune-workspace` that should not be included in releases.

Example `.gitattributes`:
```
dune-workspace export-ignore
.github/** export-ignore
```

**Supported patterns:**
- Exact filenames: `dune-workspace`
- Directory patterns: `.github/**` (matches all files under `.github/`)
- Glob patterns: `*.log`, `test_*`, `file?.txt`
- Double star in path: `**/build`, `src/**/test.ml`
- Path normalization: handles `./` and `../` in paths

**Unsupported pattern syntax** (such patterns are skipped with a warning):
- Negation patterns (`!pattern`)
- Escaped patterns (`\!` for literal `!`)
- Quoted patterns (`"a b"` for patterns with spaces)
- Character classes (`[abc]`)

**Other limitations:**
- Only the repository-root `.gitattributes` is read; files in subdirectories are ignored.
- Matching is always case-sensitive; `core.ignorecase` is not consulted.


### Publish the distribution online

Expand Down
28 changes: 20 additions & 8 deletions lib/archive.ml
Original file line number Diff line number Diff line change
Expand Up @@ -117,33 +117,45 @@ module Tar = struct
String.concat (List.rev (end_of_file :: t))
end

let path_set_of_dir dir ~exclude_paths =
let not_excluded p = Ok (not (Fpath.Set.mem (Fpath.base p) exclude_paths)) in
let traverse = `Sat not_excluded in
let elements = `Sat not_excluded in
let path_set_of_dir dir ~exclude_paths ~export_ignore =
let included p =
if Fpath.Set.mem (Fpath.base p) exclude_paths then Ok false
else
match Fpath.rem_prefix dir p with
| None -> Ok true
| Some rel_path ->
Ok (not (List.exists (Gitattributes.matches rel_path) export_ignore))
in
let traverse = `Sat included in
let elements = `Sat included in
let err _ e = e in
OS.Dir.contents ~dotfiles:true dir
>>= OS.Path.fold ~dotfiles:true ~err ~elements ~traverse Fpath.Set.add
Fpath.Set.empty

let tar dir ~exclude_paths ~root ~mtime =
let tar dir ~exclude_paths ~export_ignore ~root ~mtime =
let tar_add file tar =
let fname =
match Fpath.rem_prefix dir file with
| None -> assert false
| Some file -> Fpath.(root // file)
in
Logs.info (fun m -> m "Archiving %a" Fpath.pp fname);
tar >>= fun tar ->
OS.Dir.exists file >>= function
| true -> Tar.add tar fname ~mode:0o775 ~mtime `Dir
| true ->
(* Skip directories - they will be created implicitly when their
contents are added. This ensures that directories whose contents
are excluded via export-ignore patterns don't appear as empty
directories in the archive. *)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But isn't this what one would expect to do? If I exclude a directory, I would expect the directory to show up, but not its contents.

Git does this but its more to do with the fact that Git doesn't track directories at all, so an empty directory is unrepresentable. But dune-release doesn't have this issue.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My thinking is that the context here is reading a .gitattributes file, and so the expectation is to interpret it the way that git archive does (as much as possible). As a user, if some workflows used git archive and some dune release, and I had to adjust the .gitattributes file because they interpreted it differently, I would assume that dune was wrong since the name .gitattributes indicates that git "owns" the format.

Ok tar
| false ->
Logs.info (fun m -> m "Archiving %a" Fpath.pp fname);
OS.Path.Mode.get file >>= fun mode ->
OS.File.read file >>= fun contents ->
let mode = if 0o100 land mode > 0 then 0o775 else 0o664 in
Tar.add tar fname ~mode ~mtime (`File contents)
in
path_set_of_dir dir ~exclude_paths >>= fun fset ->
path_set_of_dir dir ~exclude_paths ~export_ignore >>= fun fset ->
Fpath.Set.fold tar_add fset (Ok Tar.empty) >>| fun tar -> Tar.to_string tar

(* Bzip2 compression and unarchiving *)
Expand Down
17 changes: 10 additions & 7 deletions lib/archive.mli
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,19 @@ open Bos_setup
val tar :
Fpath.t ->
exclude_paths:Fpath.set ->
export_ignore:Gitattributes.t list ->
root:Fpath.t ->
mtime:int64 ->
(string, R.msg) result
(** [tar dir ~exclude_paths ~root ~mtime] is a (us)tar archive that contains the
file hierarchy [dir] except the relative hierarchies present in
[exclude_paths]. In the archive, members of [dir] are rerooted at [root] and
sorted according to {!Fpath.compare}. They have their modification time set
to [mtime] and their file permissions are [0o775] for directories and files
executable by the user and [0o664] for other files. No other file metadata
is preserved.
(** [tar dir ~exclude_paths ~export_ignore ~root ~mtime] is a (us)tar archive
that contains the file hierarchy [dir] except:
- relative hierarchies present in [exclude_paths] (basename matching)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it wouldn't make more sense to just translate exclude_paths to a Gitattributes.pattern and use the more generic mechanism. That way there's only one way to ignore paths.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That could make sense. My worry is that exclude_paths excludes based on basename, so at any depth, while Gitattributes.patterns are path-anchored. Translating exclude_paths could have a semantic change if not done exactly right. So I think such a change ought to be in a separate PR, and I would want to have a much better idea for a good way to test it than I have at the moment.

- files matching patterns in [export_ignore] (from [.gitattributes])

In the archive, members of [dir] are rerooted at [root] and sorted according
to {!Fpath.compare}. They have their modification time set to [mtime] and
their file permissions are [0o775] for directories and files executable by
the user and [0o664] for other files. No other file metadata is preserved.

{b Note.} This is a pure OCaml implementation, no [tar] tool is needed. *)

Expand Down
151 changes: 151 additions & 0 deletions lib/gitattributes.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
open Bos_setup

type t =
| Exact of string (** Exact match against basename or full path. *)
| Prefix of string
(** Pattern like [dir/**] that matches everything under a directory, but
not the directory itself. *)
| Glob of Re.re (** Compiled glob pattern. *)

(** [glob_to_re pattern] is a compiled regex for glob [pattern]. Supports [*]
(any chars except /), [?] (single char except /), and [**] (any path
segments, but only when adjacent to /). *)
let glob_to_re pattern =
let len = String.length pattern in
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick but the length of the pattern is determined twice, so could be pulled up in the function an reused.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

let buf = Buffer.create len in
Buffer.add_char buf '^';
let rec loop i =
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it wouldn't make sense to use a string list and then List.rev |> String.concat instead of a mutable buffer here.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That could be done. But I think that using a Buffer for building a string char-by-char is idiomatic, and the mutable state is encapsulated within a single function.

if i >= len then ()
else
let c = pattern.[i] in
match c with
| '*' ->
if i + 1 < len && pattern.[i + 1] = '*' then
(* ** only crosses path separators when adjacent to / *)
let preceded_by_slash = i > 0 && pattern.[i - 1] = '/' in
let at_start = i = 0 in
if i + 2 < len && pattern.[i + 2] = '/' then (
(* **/ matches zero or more directories *)
Buffer.add_string buf "(.*/)?";
loop (i + 3))
else if i + 2 >= len && (preceded_by_slash || at_start) then (
(* /** at end or just ** alone - matches anything *)
Buffer.add_string buf ".*";
loop (i + 2))
else (
(* ** not adjacent to / - acts like * *)
Buffer.add_string buf "[^/]*";
loop (i + 2))
else (
(* * matches anything except path separator *)
Buffer.add_string buf "[^/]*";
loop (i + 1))
| '?' ->
(* ? matches any single character except path separator *)
Buffer.add_string buf "[^/]";
loop (i + 1)
| '.' | '+' | '^' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '|' | '\\'
->
(* Escape regex metacharacters *)
Buffer.add_char buf '\\';
Buffer.add_char buf c;
loop (i + 1)
| _ ->
(* Literal character *)
Buffer.add_char buf c;
loop (i + 1)
in
loop 0;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it could be more maintainable to use ocamllex for this instead of hand-writing a tokenizer where the tokens are regex snippets.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe, but I don't know. It would add a build dependency and generated file, and I don't expect that the syntax of globs is likely to change often. It could just as easily be done later, but I don't know if I will have time to work on that soon.

Buffer.add_char buf '$';
Re.Pcre.regexp (Buffer.contents buf)

(* [unsupported_syntax s] is [Some reason] when [s] uses a gitattributes
pattern feature we do not implement. The pattern is then skipped rather
than compiled into a regex that happens to match nothing. *)
let unsupported_syntax s =
if String.length s > 0 && s.[0] = '!' then Some "negation (!pattern)"
else if String.exists (fun c -> c = '\\') s then Some "escape (\\)"
else if String.exists (fun c -> c = '"') s then Some "quoting (\")"
else if String.exists (fun c -> c = '[' || c = ']') s then
Some "character class ([...])"
else None

let parse_pattern s =
let s = String.trim s in
match unsupported_syntax s with
| Some reason ->
Logs.warn (fun m ->
m "Skipping unsupported .gitattributes pattern %S: %s" s reason);
None
| None ->
(* Remove leading slash if present - we always match relative paths *)
let s =
if String.is_prefix ~affix:"/" s then
String.Sub.to_string (String.sub ~start:1 s)
else s
in
let has_wildcard s = String.exists (fun c -> c = '*' || c = '?') s in
if String.is_suffix ~affix:"/**" s then
(* Directory pattern: match everything under the directory *)
let prefix =
String.Sub.to_string (String.sub ~stop:(String.length s - 3) s)
in
if has_wildcard prefix then
(* Prefix contains wildcards, treat whole pattern as glob *)
Some (Glob (glob_to_re s))
else Some (Prefix prefix)
else if has_wildcard s then
(* Has wildcards - compile as glob *)
Some (Glob (glob_to_re s))
else
(* Exact match *)
Some (Exact s)

let matches path pattern =
let path = Fpath.normalize path in
let path_str = Fpath.to_string path in
let basename = Fpath.basename path in
match pattern with
| Exact s ->
(* Match against basename or full relative path *)
String.equal s basename || String.equal s path_str
| Prefix prefix ->
(* Match everything under the directory, but not the directory itself *)
String.is_prefix ~affix:(prefix ^ "/") path_str
| Glob re ->
(* Match against full path or basename for patterns like *.log *)
Re.execp re path_str || Re.execp re basename

let utf8_bom = "\xef\xbb\xbf"

let parse_export_ignore content =
(* Strip UTF-8 BOM if present at start of file *)
let content =
if String.is_prefix ~affix:utf8_bom content then
String.Sub.to_string (String.sub ~start:(String.length utf8_bom) content)
else content
in
content |> String.cuts ~sep:"\n"
|> List.filter_map (fun line ->
let line = String.trim line in
(* Skip empty lines and comments *)
if String.length line = 0 || String.is_prefix ~affix:"#" line then None
else
(* Format: <pattern> <attr1> <attr2> ...
Attributes can be separated by spaces or tabs *)
let parts =
String.fields ~empty:false
~is_sep:(fun c -> c = ' ' || c = '\t')
line
in
match parts with
| pattern :: attrs
when List.exists (String.equal "export-ignore") attrs ->
parse_pattern pattern
| _ -> None)

let read_export_ignore dir =
let file = Fpath.(dir / ".gitattributes") in
OS.File.exists file >>= function
| false -> Ok []
| true -> OS.File.read file >>| parse_export_ignore
34 changes: 34 additions & 0 deletions lib/gitattributes.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
(** Gitattributes parsing for export-ignore.

Parses [.gitattributes] files and extracts patterns marked with the
[export-ignore] attribute. These patterns can be used to exclude files from
distribution archives. *)

open Bos_setup

(** {1 Patterns} *)

type t
(** The type for gitattributes patterns. *)

val parse_pattern : string -> t option
(** [parse_pattern s] is the pattern parsed from string [s], or [None] if [s]
uses an unsupported syntactic feature (negation, escaping, quoting, or
character classes). In that case a warning is logged. Supports:
- Exact matches: [filename]
- Directory patterns: [dir/**]
- Glob patterns: [*.ext], [prefix*] *)

val matches : Fpath.t -> t -> bool
(** [matches path pattern] holds if [path] matches [pattern]. [path] should be
relative to the repository root. *)

(** {1 Parsing .gitattributes} *)

val parse_export_ignore : string -> t list
(** [parse_export_ignore content] is the list of patterns marked with
[export-ignore] in [.gitattributes] file [content]. *)

val read_export_ignore : Fpath.t -> (t list, R.msg) result
(** [read_export_ignore dir] is the list of patterns marked with [export-ignore]
in [dir/.gitattributes], or the empty list if the file doesn't exist. *)
4 changes: 3 additions & 1 deletion lib/pkg.ml
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,9 @@ let distrib_archive ~dry_run ~keep_dir ~include_submodules p =
>>= fun () ->
distrib_prepare ~dry_run ~dist_build_dir ~version >>= fun () ->
let exclude_paths = Fpath.Set.of_list Distrib.exclude_paths in
Archive.tar dist_build_dir ~exclude_paths ~root ~mtime >>= fun tar ->
Gitattributes.read_export_ignore dist_build_dir >>= fun export_ignore ->
Archive.tar dist_build_dir ~exclude_paths ~export_ignore ~root ~mtime
>>= fun tar ->
distrib_archive_path p >>= fun archive ->
Archive.bzip2 ~dry_run ~force:true ~dst:archive tar >>= fun () ->
(if keep_dir then Ok () else Sos.delete_dir ~dry_run dist_build_dir)
Expand Down
Loading
Loading