diff --git a/go.work.sum b/go.work.sum index 80aa46d170..e5dcac25f3 100644 --- a/go.work.sum +++ b/go.work.sum @@ -143,6 +143,7 @@ github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0/go.mod h github.com/AzureAD/microsoft-authentication-library-for-go v1.5.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk= github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0/go.mod h1:Cz6ft6Dkn3Et6l2v2a9/RpN7epQ1GtDlO6lj8bEcOvw= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= @@ -212,6 +213,7 @@ github.com/containerd/stargz-snapshotter/estargz v0.18.1/go.mod h1:ALIEqa7B6oVDs github.com/containerd/typeurl/v2 v2.2.0/go.mod h1:8XOOxnyatxSWuG8OfsZXVnAF4iZfedjS/8UHSPJnX4g= github.com/containers/storage v1.59.1/go.mod h1:KoAYHnAjP3/cTsRS+mmWZGkufSY2GACiKQ4V3ZLQnR0= github.com/coreos/go-iptables v0.8.0/go.mod h1:Qe8Bv2Xik5FyTXwgIbLAnv2sWSBmvWdFETJConOQ//Q= +github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= @@ -234,6 +236,7 @@ github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJP github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA= +github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= @@ -318,6 +321,9 @@ github.com/letsencrypt/boulder v0.20251110.0/go.mod h1:ogKCJQwll82m7OVHWyTuf8eeF github.com/letsencrypt/boulder v0.20260223.0/go.mod h1:r3aTSA7UZ7dbDfiGK+HLHJz0bWNbHk6YSPiXgzl23sA= github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A= github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/mdlayher/packet v1.1.2/go.mod h1:GEu1+n9sG5VtiRE4SydOmX5GTwyyYlteZiFU+x0kew4= github.com/mdlayher/socket v0.5.1/go.mod h1:TjPLHI1UgwEv5J1B5q0zTZq12A/6H7nKmtTanQE37IQ= @@ -391,11 +397,17 @@ github.com/spf13/viper v1.20.1/go.mod h1:P9Mdzt1zoHIG8m2eZQinpiBjo6kCmZSKBClNNqj github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs= github.com/stefanberger/go-pkcs11uri v0.0.0-20201008174630-78d3cae3a980/go.mod h1:AO3tvPzVZ/ayst6UlUKUv6rcPQInYe3IknH3jYhAKu8= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= @@ -408,6 +420,7 @@ github.com/tink-crypto/tink-go/v2 v2.5.0/go.mod h1:2WbBA6pfNsAfBwDCggboaHeB2X29w github.com/tink-crypto/tink-go/v2 v2.6.0/go.mod h1:2WbBA6pfNsAfBwDCggboaHeB2X29wkU8XHtGwh2YIk8= github.com/titanous/rocacheck v0.0.0-20171023193734-afe73141d399/go.mod h1:LdwHTNJT99C5fTAzDz0ud328OgXz+gierycbcIx2fRs= github.com/u-root/uio v0.0.0-20240224005618-d2acac8f3701/go.mod h1:P3a5rG4X7tI17Nn3aOIAYr5HbIMukwXG0urG0WuL8OA= +github.com/urfave/cli v1.22.16/go.mod h1:EeJR6BKodywf4zciqrdw6hpCPk68JO9z5LazXZMn5Po= github.com/urfave/cli v1.22.17/go.mod h1:b0ht0aqgH/6pBYzzxURyrM4xXNgsoT/n2ZzwQiEhNVo= github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= github.com/ysmood/gotrace v0.6.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM= @@ -529,8 +542,12 @@ golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= diff --git a/storage/go.mod b/storage/go.mod index bcc039629f..c7a2c37d3a 100644 --- a/storage/go.mod +++ b/storage/go.mod @@ -25,7 +25,7 @@ require ( github.com/stretchr/testify v1.11.1 github.com/tchap/go-patricia/v2 v2.3.3 github.com/ulikunitz/xz v0.5.15 - github.com/vbatts/tar-split v0.12.2 + github.com/vbatts/tar-split v0.12.3 golang.org/x/sync v0.20.0 golang.org/x/sys v0.43.0 gotest.tools/v3 v3.5.2 diff --git a/storage/go.sum b/storage/go.sum index 8ece4f65bf..fe51f1f044 100644 --- a/storage/go.sum +++ b/storage/go.sum @@ -73,8 +73,8 @@ github.com/tchap/go-patricia/v2 v2.3.3 h1:xfNEsODumaEcCcY3gI0hYPZ/PcpVv5ju6RMAhg github.com/tchap/go-patricia/v2 v2.3.3/go.mod h1:VZRHKAb53DLaG+nA9EaYYiaEx6YztwDlLElMsnSHD4k= github.com/ulikunitz/xz v0.5.15 h1:9DNdB5s+SgV3bQ2ApL10xRc35ck0DuIX/isZvIk+ubY= github.com/ulikunitz/xz v0.5.15/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= -github.com/vbatts/tar-split v0.12.2 h1:w/Y6tjxpeiFMR47yzZPlPj/FcPLpXbTUi/9H7d3CPa4= -github.com/vbatts/tar-split v0.12.2/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA= +github.com/vbatts/tar-split v0.12.3 h1:Cd46rkGXI3Td4yrVNwU8ripbxFaQbmesqhjBUUYAJSw= +github.com/vbatts/tar-split v0.12.3/go.mod h1:sQOc6OlqGCr7HkGx/IDBeKiTIvqhmj8KffNhEXG4Nq0= golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= diff --git a/vendor/github.com/vbatts/tar-split/archive/tar/format.go b/vendor/github.com/vbatts/tar-split/archive/tar/format.go index 60977980c5..6f31845e43 100644 --- a/vendor/github.com/vbatts/tar-split/archive/tar/format.go +++ b/vendor/github.com/vbatts/tar-split/archive/tar/format.go @@ -147,6 +147,12 @@ const ( // Max length of a special file (PAX header, GNU long name or link). // This matches the limit used by libarchive. maxSpecialFileSize = 1 << 20 + + // Maximum number of sparse file entries. + // We should never actually hit this limit + // (every sparse encoding will first be limited by maxSpecialFileSize), + // but this adds an additional layer of defense. + maxSparseFileEntries = 1 << 20 ) // blockPadding computes the number of bytes needed to pad offset up to the diff --git a/vendor/github.com/vbatts/tar-split/archive/tar/reader.go b/vendor/github.com/vbatts/tar-split/archive/tar/reader.go index a645c41605..ebe8579772 100644 --- a/vendor/github.com/vbatts/tar-split/archive/tar/reader.go +++ b/vendor/github.com/vbatts/tar-split/archive/tar/reader.go @@ -537,7 +537,8 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, err } s := blk.GNU().Sparse() spd := make(sparseDatas, 0, s.MaxEntries()) - for { + totalSize := len(s) + for totalSize < maxSpecialFileSize { for i := 0; i < s.MaxEntries(); i++ { // This termination condition is identical to GNU and BSD tar. if s.Entry(i).Offset()[0] == 0x00 { @@ -548,7 +549,11 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, err if p.err != nil { return nil, p.err } - spd = append(spd, sparseEntry{Offset: offset, Length: length}) + var err error + spd, err = appendSparseEntry(spd, sparseEntry{Offset: offset, Length: length}) + if err != nil { + return nil, err + } } if s.IsExtended()[0] > 0 { @@ -560,10 +565,12 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, err tr.rawBytes.Write(blk[:]) } s = blk.Sparse() + totalSize += len(s) continue } return spd, nil // Done } + return nil, errSparseTooLong } // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format @@ -636,7 +643,10 @@ func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) { if err1 != nil || err2 != nil { return nil, ErrHeader } - spd = append(spd, sparseEntry{Offset: offset, Length: length}) + spd, err = appendSparseEntry(spd, sparseEntry{Offset: offset, Length: length}) + if err != nil { + return nil, err + } } return spd, nil } @@ -670,12 +680,22 @@ func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) { if err1 != nil || err2 != nil { return nil, ErrHeader } - spd = append(spd, sparseEntry{Offset: offset, Length: length}) + spd, err = appendSparseEntry(spd, sparseEntry{Offset: offset, Length: length}) + if err != nil { + return nil, err + } sparseMap = sparseMap[2:] } return spd, nil } +func appendSparseEntry(spd sparseDatas, ent sparseEntry) (sparseDatas, error) { + if len(spd) >= maxSparseFileEntries { + return nil, errSparseTooLong + } + return append(spd, ent), nil +} + // Read reads from the current file in the tar archive. // It returns (0, io.EOF) when it reaches the end of that file, // until Next is called to advance to the next file. diff --git a/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go b/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go index 80c2522afe..a17b6eac1e 100644 --- a/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go +++ b/vendor/github.com/vbatts/tar-split/tar/asm/disassemble.go @@ -1,156 +1,237 @@ package asm import ( + "errors" "io" "github.com/vbatts/tar-split/archive/tar" "github.com/vbatts/tar-split/tar/storage" ) -// NewInputTarStream wraps the Reader stream of a tar archive and provides a -// Reader stream of the same. +// runInputTarStreamGoroutine is the goroutine entrypoint. // -// In the middle it will pack the segments and file metadata to storage.Packer -// `p`. +// It centralizes the goroutine protocol so the core parsing logic can be +// written as ordinary Go code that just "returns an error". // -// The the storage.FilePutter is where payload of files in the stream are -// stashed. If this stashing is not needed, you can provide a nil -// storage.FilePutter. Since the checksumming is still needed, then a default -// of NewDiscardFilePutter will be used internally -func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) { - // What to do here... folks will want their own access to the Reader that is - // their tar archive stream, but we'll need that same stream to use our - // forked 'archive/tar'. - // Perhaps do an io.TeeReader that hands back an io.Reader for them to read - // from, and we'll MITM the stream to store metadata. - // We'll need a storage.FilePutter too ... +// Protocol guarantees: +// - pW is always closed exactly once (CloseWithError(nil) == Close()). +// - if done != nil, exactly one value is sent (nil on success, non-nil on failure). +// - panics are converted into a non-nil error (and the panic is rethrown). +func runInputTarStreamGoroutine(outputRdr io.Reader, pW *io.PipeWriter, p storage.Packer, fp storage.FilePutter, done chan<- error) { + // Default to a non-nil error so a panic can't accidentally look like success. + err := errors.New("panic in runInputTarStream") + defer func() { + // CloseWithError(nil) is equivalent to Close(). + pW.CloseWithError(err) - // Another concern, whether to do any storage.FilePutter operations, such that we - // don't extract any amount of the archive. But then again, we're not making - // files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter. - // Perhaps we have a DiscardFilePutter that is a bit bucket. + if done != nil { + done <- err + } - // we'll return the pipe reader, since TeeReader does not buffer and will - // only read what the outputRdr Read's. Since Tar archives have padding on - // the end, we want to be the one reading the padding, even if the user's - // `archive/tar` doesn't care. - pR, pW := io.Pipe() - outputRdr := io.TeeReader(r, pW) + // Preserve panic semantics while still ensuring the protocol above runs. + if r := recover(); r != nil { + panic(r) + } + }() - // we need a putter that will generate the crc64 sums of file payloads - if fp == nil { - fp = storage.NewDiscardFilePutter() - } + err = runInputTarStream(outputRdr, p, fp) +} - go func() { - tr := tar.NewReader(outputRdr) - tr.RawAccounting = true - for { - hdr, err := tr.Next() - if err != nil { - if err != io.EOF { - pW.CloseWithError(err) - return - } - // even when an EOF is reached, there is often 1024 null bytes on - // the end of an archive. Collect them too. - if b := tr.RawBytes(); len(b) > 0 { - _, err := p.AddEntry(storage.Entry{ - Type: storage.SegmentType, - Payload: b, - }) - if err != nil { - pW.CloseWithError(err) - return - } - } - break // not return. We need the end of the reader. - } - if hdr == nil { - break // not return. We need the end of the reader. - } +// runInputTarStream drives tar-split parsing. +// +// It reads a tar stream from outputRdr and records tar-split metadata into the +// provided storage.Packer. +// +// Abort behavior: if the consumer closes the read end early, the tee reader will +// stop producing bytes (due to pipe write failure) and tar parsing will return +// an error. We propagate that error so the goroutine terminates promptly rather +// than draining the input stream for no benefit. +func runInputTarStream(outputRdr io.Reader, p storage.Packer, fp storage.FilePutter) error { + tr := tar.NewReader(outputRdr) + tr.RawAccounting = true + for { + hdr, err := tr.Next() + if err != nil { + if err != io.EOF { + return err + } + // Even when EOF is reached, there is often 1024 null bytes at the end + // of an archive. Collect them too. if b := tr.RawBytes(); len(b) > 0 { - _, err := p.AddEntry(storage.Entry{ + if _, err := p.AddEntry(storage.Entry{ Type: storage.SegmentType, Payload: b, - }) - if err != nil { - pW.CloseWithError(err) - return - } - } - - var csum []byte - if hdr.Size > 0 { - var err error - _, csum, err = fp.Put(hdr.Name, tr) - if err != nil { - pW.CloseWithError(err) - return + }); err != nil { + return err } } + break // Not return: we still need to drain any additional padding. + } + if hdr == nil { + break // Not return: we still need to drain any additional padding. + } - entry := storage.Entry{ - Type: storage.FileType, - Size: hdr.Size, - Payload: csum, + if b := tr.RawBytes(); len(b) > 0 { + if _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }); err != nil { + return err } - // For proper marshalling of non-utf8 characters - entry.SetName(hdr.Name) + } - // File entries added, regardless of size - _, err = p.AddEntry(entry) + var csum []byte + if hdr.Size > 0 { + _, csum, err = fp.Put(hdr.Name, tr) if err != nil { - pW.CloseWithError(err) - return + return err } + } - if b := tr.RawBytes(); len(b) > 0 { - _, err = p.AddEntry(storage.Entry{ - Type: storage.SegmentType, - Payload: b, - }) - if err != nil { - pW.CloseWithError(err) - return - } - } + entry := storage.Entry{ + Type: storage.FileType, + Size: hdr.Size, + Payload: csum, } + // For proper marshalling of non-utf8 characters + entry.SetName(hdr.Name) - // It is allowable, and not uncommon that there is further padding on - // the end of an archive, apart from the expected 1024 null bytes. We - // do this in chunks rather than in one go to avoid cases where a - // maliciously crafted tar file tries to trick us into reading many GBs - // into memory. - const paddingChunkSize = 1024 * 1024 - var paddingChunk [paddingChunkSize]byte - for { - var isEOF bool - n, err := outputRdr.Read(paddingChunk[:]) - if err != nil { - if err != io.EOF { - pW.CloseWithError(err) - return - } - isEOF = true + // File entries added, regardless of size + if _, err := p.AddEntry(entry); err != nil { + return err + } + + if b := tr.RawBytes(); len(b) > 0 { + if _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }); err != nil { + return err } - if n != 0 { - _, err = p.AddEntry(storage.Entry{ - Type: storage.SegmentType, - Payload: paddingChunk[:n], - }) - if err != nil { - pW.CloseWithError(err) - return - } + } + } + + // It is allowable, and not uncommon that there is further padding on + // the end of an archive, apart from the expected 1024 null bytes. We + // do this in chunks rather than in one go to avoid cases where a + // maliciously crafted tar file tries to trick us into reading many GBs + // into memory. + const paddingChunkSize = 1024 * 1024 + var paddingChunk [paddingChunkSize]byte + for { + n, err := outputRdr.Read(paddingChunk[:]) + if n != 0 { + if _, aerr := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: paddingChunk[:n], + }); aerr != nil { + return aerr } - if isEOF { + } + if err != nil { + if err == io.EOF { break } + return err } - pW.Close() - }() + } + + return nil +} + +// newInputTarStreamCommon sets up the shared plumbing for NewInputTarStream and +// NewInputTarStreamWithDone. +// +// It constructs an io.Pipe and an io.TeeReader such that: +// +// - The caller reads tar bytes from the returned *io.PipeReader. +// - The background goroutine simultaneously reads the same stream from the +// TeeReader to perform tar-split parsing and metadata packing. +// +// Abort and synchronization semantics: +// +// - Closing the returned PipeReader causes the TeeReader to fail its write to +// the pipe, which in turn causes the background goroutine to exit promptly. +// - If withDone is true, a done channel is returned that receives exactly one +// error value (nil on success) once the background goroutine has fully +// terminated. This allows callers to safely wait until the input reader `r` +// is no longer in use. +func newInputTarStreamCommon( + r io.Reader, + p storage.Packer, + fp storage.FilePutter, + done chan<- error, +) (pr *io.PipeReader) { + // What to do here... folks will want their own access to the Reader that is + // their tar archive stream, but we'll need that same stream to use our + // forked 'archive/tar'. + // Perhaps do an io.TeeReader that hands back an io.Reader for them to read + // from, and we'll MITM the stream to store metadata. + // We'll need a storage.FilePutter too ... + + // Another concern, whether to do any storage.FilePutter operations, such that we + // don't extract any amount of the archive. But then again, we're not making + // files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter. + // Perhaps we have a DiscardFilePutter that is a bit bucket. - return pR, nil + // we'll return the pipe reader, since TeeReader does not buffer and will + // only read what the outputRdr Read's. Since Tar archives have padding on + // the end, we want to be the one reading the padding, even if the user's + // `archive/tar` doesn't care. + pr, pw := io.Pipe() + + if fp == nil { + fp = storage.NewDiscardFilePutter() + } + + outputRdr := io.TeeReader(r, pw) + go runInputTarStreamGoroutine(outputRdr, pw, p, fp, done) + + return pr +} + +// NewInputTarStream wraps the Reader stream of a tar archive and provides a +// Reader stream of the same. +// +// In the middle it will pack the segments and file metadata to storage.Packer +// `p`. +// +// The storage.FilePutter is where payload of files in the stream are +// stashed. If this stashing is not needed, you can provide a nil +// storage.FilePutter. Since the checksumming is still needed, then a default +// of NewDiscardFilePutter will be used internally +// +// If callers need to be able to abort early and/or wait for goroutine termination, +// prefer NewInputTarStreamWithDone. +// +// Deprecated: This leaves a goroutine around if the consumer aborts without consuming +// the whole stream, and does not allow the caller to know when r is safe to deallocate +// or when p has written everything. Use NewInputTarStreamWithDone instead. +func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) { + pr := newInputTarStreamCommon(r, p, fp, nil) + return pr, nil +} + +// NewInputTarStreamWithDone wraps the Reader stream of a tar archive and provides a +// Reader stream of the same. +// +// In the middle it will pack the segments and file metadata to storage.Packer `p`. +// +// It also returns a done channel that will receive exactly one error value +// (nil on success) when the internal goroutine has fully completed parsing +// the tar stream (including the final paddingChunk draining loop) and has +// finished writing all entries to `p`. +// +// The returned reader is an io.ReadCloser so callers can stop early; closing it +// aborts the pipe so the internal goroutine can terminate promptly (rather than +// hanging on a blocked pipe write). +// +// The caller is expected to consume the returned reader fully until EOF +// (not just the tar EOF marker); closing the returned reader earlier will +// cause the done channel to return a failure. +func NewInputTarStreamWithDone(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.ReadCloser, <-chan error, error) { + done := make(chan error, 1) + pr := newInputTarStreamCommon(r, p, fp, done) + return pr, done, nil } diff --git a/vendor/github.com/vbatts/tar-split/tar/asm/iterate.go b/vendor/github.com/vbatts/tar-split/tar/asm/iterate.go index 8a65887cf2..9db3ab509d 100644 --- a/vendor/github.com/vbatts/tar-split/tar/asm/iterate.go +++ b/vendor/github.com/vbatts/tar-split/tar/asm/iterate.go @@ -11,7 +11,7 @@ import ( // IterateHeaders calls handler for each tar header provided by Unpacker func IterateHeaders(unpacker storage.Unpacker, handler func(hdr *tar.Header) error) error { - // We assume about NewInputTarStream: + // We assume about NewInputTarStreamWithDone: // - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions // - (There is a FileType entry for every tar header, we ignore it) // - Trailing padding of a file, if any, is included in the next SegmentType entry diff --git a/vendor/modules.txt b/vendor/modules.txt index da9f7bd5b2..b5682b9455 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -449,7 +449,9 @@ github.com/ulikunitz/xz/internal/hash github.com/ulikunitz/xz/internal/xlog github.com/ulikunitz/xz/lzma # github.com/vbatts/tar-split v0.12.2 -## explicit; go 1.17 +## explicit; go 1.22.0 +# github.com/vbatts/tar-split v0.12.3 +## explicit; go 1.22.0 github.com/vbatts/tar-split/archive/tar github.com/vbatts/tar-split/tar/asm github.com/vbatts/tar-split/tar/storage