diff --git a/clicommand/bootstrap.go b/clicommand/bootstrap.go index 9dac123236..5d925982df 100644 --- a/clicommand/bootstrap.go +++ b/clicommand/bootstrap.go @@ -60,6 +60,7 @@ type BootstrapConfig struct { PullRequest string `cli:"pullrequest"` PullRequestUsingMergeRefspec bool `cli:"pull-request-using-merge-refspec"` GitSubmodules bool `cli:"git-submodules"` + GitLFSEnabled bool `cli:"git-lfs-enabled"` SSHKeyscan bool `cli:"ssh-keyscan"` AgentName string `cli:"agent" validate:"required"` Queue string `cli:"queue"` @@ -298,6 +299,11 @@ var BootstrapCommand = cli.Command{ Usage: "Enable git submodules (default: true)", EnvVar: "BUILDKITE_GIT_SUBMODULES", }, + cli.BoolFlag{ + Name: "git-lfs-enabled", + Usage: "Enable Git LFS object download during checkout (default: false)", + EnvVar: "BUILDKITE_GIT_LFS_ENABLED", + }, cli.BoolTFlag{ Name: "pty", Usage: "Run jobs within a pseudo terminal (default: true)", @@ -440,6 +446,7 @@ var BootstrapCommand = cli.Command{ GitCloneFlags: cfg.GitCloneFlags, GitCloneMirrorFlags: cfg.GitCloneMirrorFlags, GitFetchFlags: cfg.GitFetchFlags, + GitLFSEnabled: cfg.GitLFSEnabled, GitMirrorsLockTimeout: cfg.GitMirrorsLockTimeout, GitMirrorsPath: cfg.GitMirrorsPath, GitMirrorCheckoutMode: cfg.GitMirrorCheckoutMode, diff --git a/internal/job/checkout.go b/internal/job/checkout.go index af93dbf8ce..94dcbbdad8 100644 --- a/internal/job/checkout.go +++ b/internal/job/checkout.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "os/exec" "path/filepath" "runtime" "slices" @@ -902,6 +903,13 @@ func (e *Executor) defaultCheckoutPhase(ctx context.Context) error { } } + // Fail fast before any git work if git-lfs is required but missing. + if e.GitLFSEnabled { + if _, err := exec.LookPath("git-lfs"); err != nil { + return fmt.Errorf("BUILDKITE_GIT_LFS_ENABLED=true but git-lfs binary is not found on PATH: %w", err) + } + } + // Git clean prior to checkout, we do this even if submodules have been // disabled to ensure previous submodules are cleaned up if hasGitSubmodules(e.shell) { @@ -914,6 +922,20 @@ func (e *Executor) defaultCheckoutPhase(ctx context.Context) error { return fmt.Errorf("cleaning git repository: %w", err) } + // Install LFS filter before fetch so the filter is registered before any + // network operation, following the conventional git-lfs setup order. + if e.GitLFSEnabled { + e.shell.Commentf("Installing Git LFS filter") + if err := e.shell.Command("git", "lfs", "install", "--local").Run(ctx); err != nil { + return fmt.Errorf("installing git lfs filter: %w", err) + } + // Force-set GIT_LFS_SKIP_SMUDGE=1 so checkout writes pointer files to + // disk rather than downloading objects inline. Intentionally not + // restored — git lfs checkout materialises files from cache without + // triggering the smudge filter. + e.shell.Env.Set("GIT_LFS_SKIP_SMUDGE", "1") + } + if err := e.fetchSource(ctx); err != nil { return err } @@ -1019,6 +1041,15 @@ func (e *Executor) defaultCheckoutPhase(ctx context.Context) error { } } + if e.GitLFSEnabled { + if err := gitLFSFetchCheckout(ctx, gitLFSFetchCheckoutArgs{ + Shell: e.shell, + Retry: true, + }); err != nil { + return err + } + } + // Git clean after checkout. We need to do this because submodules could have // changed in between the last checkout and this one. A double clean is the only // good solution to this problem that we've found diff --git a/internal/job/checkout_test.go b/internal/job/checkout_test.go index a23167d737..0d38655118 100644 --- a/internal/job/checkout_test.go +++ b/internal/job/checkout_test.go @@ -2,7 +2,12 @@ package job import ( "context" + "fmt" "os" + "os/exec" + "path/filepath" + "runtime" + "strings" "testing" "time" @@ -276,3 +281,193 @@ func TestDefaultCheckoutPhase_DelayedRefCreation(t *testing.T) { t.Fatalf("tt.executor.defaultCheckoutPhase(ctx) error = %v, want nil", err) } } + +func TestDefaultCheckoutPhase_GitLFS(t *testing.T) { + // Not parallel: subtests manipulate PATH via t.Setenv, which modifies + // process-global state. + ctx := context.Background() + + t.Setenv("GIT_AUTHOR_NAME", "Buildkite Agent") + t.Setenv("GIT_AUTHOR_EMAIL", "agent@example.com") + t.Setenv("GIT_COMMITTER_NAME", "Buildkite Agent") + t.Setenv("GIT_COMMITTER_EMAIL", "agent@example.com") + + // gitOnlyBinDir returns a temp dir containing git (via a symlink on Unix or + // a .bat wrapper on Windows) but no git-lfs, so exec.LookPath("git-lfs") + // will fail while git commands still work. + gitOnlyBinDir := func(t *testing.T) string { + t.Helper() + gitBin, err := exec.LookPath("git") + if err != nil { + t.Fatalf("exec.LookPath(\"git\") error = %v", err) + } + binDir := t.TempDir() + if runtime.GOOS == "windows" { + // Use a .bat wrapper to avoid copying the multi-MB binary and to + // sidestep the symlink-privilege requirement on Windows. + wrapper := fmt.Sprintf("@echo off\r\n\"%s\" %%*\r\n", gitBin) + if err := os.WriteFile(filepath.Join(binDir, "git.bat"), []byte(wrapper), 0o755); err != nil { + t.Fatalf("os.WriteFile() error = %v", err) + } + } else { + if err := os.Symlink(gitBin, filepath.Join(binDir, "git")); err != nil { + t.Fatalf("os.Symlink() error = %v", err) + } + } + return binDir + } + + // fakeLFSBinDir returns a temp dir that has git (via gitOnlyBinDir) plus a + // fake git-lfs whose behaviour is defined by the provided scripts. + // unixScript is a #!/bin/sh script; winBatch is a .bat file body. + fakeLFSBinDir := func(t *testing.T, unixScript, winBatch string) string { + t.Helper() + binDir := gitOnlyBinDir(t) + if runtime.GOOS == "windows" { + if err := os.WriteFile(filepath.Join(binDir, "git-lfs.bat"), []byte(winBatch), 0o755); err != nil { + t.Fatalf("os.WriteFile() error = %v", err) + } + } else { + if err := os.WriteFile(filepath.Join(binDir, "git-lfs"), []byte(unixScript), 0o755); err != nil { + t.Fatalf("os.WriteFile() error = %v", err) + } + } + return binDir + } + + tests := []struct { + name string + lfsEnabled bool + setupPath func(t *testing.T) + wantErr string + }{ + { + name: "LFS disabled", + lfsEnabled: false, + }, + { + name: "LFS enabled binary present", + lfsEnabled: true, + setupPath: func(t *testing.T) { + if _, err := exec.LookPath("git-lfs"); err != nil { + t.Skip("git-lfs not installed") + } + }, + }, + { + name: "LFS enabled binary missing", + lfsEnabled: true, + setupPath: func(t *testing.T) { + t.Setenv("PATH", gitOnlyBinDir(t)) + }, + wantErr: "git-lfs binary is not found on PATH", + }, + { + name: "LFS enabled git lfs command fails", + lfsEnabled: true, + setupPath: func(t *testing.T) { + // Git for Windows ships its own git-lfs.exe inside + // GIT_EXEC_PATH, which git resolves before falling back to + // PATH. We can't fool git's subcommand lookup with a PATH + // override the way we can fool Go's exec.LookPath. + if runtime.GOOS == "windows" { + t.Skip("Not runnable on Windows: git for Windows uses bundled git-lfs.exe regardless of PATH") + } + t.Setenv("PATH", fakeLFSBinDir(t, + "#!/bin/sh\nexit 1\n", + "@echo off\r\nexit /b 1\r\n", + )) + }, + wantErr: "installing git lfs filter", + }, + { + name: "LFS enabled git lfs fetch fails", + lfsEnabled: true, + setupPath: func(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("Not runnable on Windows: git for Windows uses bundled git-lfs.exe regardless of PATH") + } + t.Setenv("PATH", fakeLFSBinDir(t, + "#!/bin/sh\ncase \"$1\" in\n install) exit 0 ;;\n *) exit 1 ;;\nesac\n", + "@echo off\r\nif \"%1\"==\"install\" exit /b 0\r\nexit /b 1\r\n", + )) + }, + wantErr: "git lfs fetch", + }, + } + + s := githttptest.NewServer() + t.Cleanup(s.Close) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Set up the remote repository BEFORE restricting PATH so that + // githttptest's git operations use the real git binary. + projectName := "test-" + strings.ReplaceAll(strings.ToLower(tt.name), " ", "-") + if err := s.CreateRepository(projectName); err != nil { + t.Fatalf("s.CreateRepository(%q) error = %v", projectName, err) + } + out, err := s.InitRepository(projectName) + if err != nil { + t.Fatalf("s.InitRepository(%q) error = %v, output: %s", projectName, err, out) + } + + // Restrict PATH after the repo is initialised. + if tt.setupPath != nil { + tt.setupPath(t) + } + + sh, err := shell.New() + if err != nil { + t.Fatalf("shell.New() error = %v", err) + } + + // Use os.MkdirTemp + best-effort cleanup rather than t.TempDir(): + // on Windows, git's child processes (credential helpers, git-lfs + // filter-process) can hold file handles open past their parent's + // exit, and t.TempDir()'s strict cleanup fails the test. + checkoutDir, err := os.MkdirTemp("", "checkout-path-") + if err != nil { + t.Fatalf("os.MkdirTemp() error = %v", err) + } + t.Cleanup(func() { + os.RemoveAll(checkoutDir) //nolint:errcheck // Best-effort cleanup. + }) + buildDir, err := os.MkdirTemp("", "build-path-") + if err != nil { + t.Fatalf("os.MkdirTemp() error = %v", err) + } + t.Cleanup(func() { + os.RemoveAll(buildDir) //nolint:errcheck // Best-effort cleanup. + }) + sh.Env.Set("BUILDKITE_BUILD_CHECKOUT_PATH", checkoutDir) + + executor := &Executor{ + shell: sh, + ExecutorConfig: ExecutorConfig{ + Commit: "HEAD", + Branch: "main", + GitCleanFlags: "-f -d -x", + BuildPath: buildDir, + Repository: s.RepoURL(projectName), + GitLFSEnabled: tt.lfsEnabled, + }, + } + + err = executor.defaultCheckoutPhase(ctx) + if tt.wantErr == "" { + if err != nil { + t.Errorf("defaultCheckoutPhase() error = %v, want nil", err) + } + return + } + if err == nil { + t.Errorf("defaultCheckoutPhase() error = nil, want error containing %q", tt.wantErr) + return + } + if !strings.Contains(err.Error(), tt.wantErr) { + t.Errorf("defaultCheckoutPhase() error = %q, want it to contain %q", err.Error(), tt.wantErr) + } + }) + } +} diff --git a/internal/job/config.go b/internal/job/config.go index ab59d0b0e9..c40a5e28cb 100644 --- a/internal/job/config.go +++ b/internal/job/config.go @@ -53,6 +53,9 @@ type ExecutorConfig struct { // Should git submodules be checked out GitSubmodules bool `env:"BUILDKITE_GIT_SUBMODULES"` + // Whether to enable Git LFS operations during checkout + GitLFSEnabled bool `env:"BUILDKITE_GIT_LFS_ENABLED"` + // If the commit was part of a pull request, this will container the PR number PullRequest string diff --git a/internal/job/executor.go b/internal/job/executor.go index 85795c7c51..809f577d73 100644 --- a/internal/job/executor.go +++ b/internal/job/executor.go @@ -907,6 +907,10 @@ func (e *Executor) setUp(ctx context.Context) error { // Disable any interactive Git/SSH prompting e.shell.Env.Set("GIT_TERMINAL_PROMPT", "0") + // Force-set GIT_LFS_SKIP_SMUDGE=1 before any git operations so LFS + // objects are not downloaded automatically during checkout. + e.shell.Env.Set("GIT_LFS_SKIP_SMUDGE", "1") + // Fetch and set secrets before environment hook execution if e.Secrets != "" { if err := e.fetchAndSetSecrets(ctx); err != nil { diff --git a/internal/job/git.go b/internal/job/git.go index aef370940a..eebc47a41b 100644 --- a/internal/job/git.go +++ b/internal/job/git.go @@ -133,6 +133,52 @@ func gitCleanSubmodules(ctx context.Context, sh *shell.Shell, gitCleanFlags stri return nil } +type gitLFSFetchCheckoutArgs struct { + Shell *shell.Shell + Retry bool // Whether to retry the fetch+checkout on failure +} + +// gitLFSFetchCheckout fetches LFS objects for the current HEAD then materialises +// them. Fetch and checkout failures are wrapped with distinct messages so that a +// caller can tell which step failed from the error string alone. +// +// When args.Retry is true, the fetch+checkout pair is retried with exponential +// backoff to ride out transient network errors talking to the LFS server. +// Unlike gitFetch, we don't smelt for specific error strings: git-lfs uses +// different exit codes and error vocabulary than git itself, so we retry +// indiscriminately on any failure and rely on the retry budget to bound the +// damage from a genuinely permanent error. +func gitLFSFetchCheckout(ctx context.Context, args gitLFSFetchCheckoutArgs) error { + retrier := roko.NewRetrier( + roko.WithStrategy(roko.Constant(0)), + roko.WithMaxAttempts(1), + ) + + if args.Retry { + retrier = roko.NewRetrier( + roko.WithStrategy(roko.ExponentialSubsecond(1*time.Second)), + roko.WithMaxAttempts(10), // 10 attempts will take ~2 minutes 17s + roko.WithJitter(), + ) + } + + return retrier.DoWithContext(ctx, func(retrier *roko.Retrier) error { + if err := args.Shell.Command("git", "lfs", "fetch").Run(ctx); err != nil { + if args.Retry { + args.Shell.Commentf("%s", retrier) + } + return fmt.Errorf("git lfs fetch: %w", err) + } + if err := args.Shell.Command("git", "lfs", "checkout").Run(ctx); err != nil { + if args.Retry { + args.Shell.Commentf("%s", retrier) + } + return fmt.Errorf("git lfs checkout: %w", err) + } + return nil + }) +} + func gitRepack(ctx context.Context, sh *shell.Shell, args ...string) error { commandArgs := []string{"repack"} commandArgs = append(commandArgs, args...) diff --git a/internal/job/git_test.go b/internal/job/git_test.go index 2cf6b62df8..077b06ed8d 100644 --- a/internal/job/git_test.go +++ b/internal/job/git_test.go @@ -311,3 +311,31 @@ func TestGitFetch(t *testing.T) { t.Errorf("executed commands diff (-got +want):\n%s", diff) } } + +func TestGitLFSFetchCheckout(t *testing.T) { + t.Parallel() + ctx := context.Background() + + var gotLog [][]string + sh := shell.NewTestShell(t, shell.WithDryRun(true), shell.WithCommandLog(&gotLog)) + + absoluteGit, err := sh.AbsolutePath("git") + if err != nil { + t.Fatalf("sh.AbsolutePath(git) = %v", err) + } + + if err := gitLFSFetchCheckout(ctx, gitLFSFetchCheckoutArgs{ + Shell: sh, + Retry: true, + }); err != nil { + t.Fatalf("gitLFSFetchCheckout(ctx, ...) = %v", err) + } + + wantLog := [][]string{ + {absoluteGit, "lfs", "fetch"}, + {absoluteGit, "lfs", "checkout"}, + } + if diff := cmp.Diff(gotLog, wantLog); diff != "" { + t.Errorf("executed commands diff (-got +want):\n%s", diff) + } +}