From 7c28455ecbd366cb3a816faa6bdfd0ea6b888f43 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sun, 20 Apr 2025 01:47:36 -0700 Subject: [PATCH 1/9] add progress updates --- .golangci.yml | 6 +- 7z.go | 49 +++++++------- ar.go | 50 ++++++++++---- cpio.go | 40 ++++++----- decompress.go | 179 ++++++++++++++++++++++++-------------------------- files.go | 43 +++++++++--- iso.go | 51 ++++++++++++-- progress.go | 143 ++++++++++++++++++++++++++++++++++++++++ queue.go | 4 +- rar.go | 58 +++++++++++----- rpm.go | 55 +++++++++------- tar.go | 96 ++++++++++++++++----------- zip.go | 25 +++++-- 13 files changed, 550 insertions(+), 249 deletions(-) create mode 100644 progress.go diff --git a/.golangci.yml b/.golangci.yml index 8c037fd6..e6296235 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -7,6 +7,7 @@ linters: - exhaustruct - depguard - nonamedreturns + - forbidigo exclusions: rules: - linters: @@ -20,6 +21,7 @@ linters: gosec: excludes: - G304 + - G115 gocritic: enable-all: true disabled-checks: @@ -30,8 +32,8 @@ linters: checkExported: true errcheck: check-type-assertions: true - check-blank: true - disable-default-exclusions: true + check-blank: false + disable-default-exclusions: false exclude-functions: - (*os.File).Close - os.RemoveAll diff --git a/7z.go b/7z.go index 51086d66..b6e96948 100644 --- a/7z.go +++ b/7z.go @@ -10,7 +10,7 @@ import ( // Extract7z extracts a 7zip archive. // Volumes: https://github.com/bodgit/sevenzip/issues/54 -func Extract7z(xFile *XFile) (size int64, filesList, archiveList []string, err error) { +func Extract7z(xFile *XFile) (size uint64, filesList, archiveList []string, err error) { if len(xFile.Passwords) == 0 && xFile.Password == "" { return extract7z(xFile) } @@ -41,50 +41,51 @@ func Extract7z(xFile *XFile) (size int64, filesList, archiveList []string, err e return 0, nil, nil, nil } -func extract7z(xFile *XFile) (int64, []string, []string, error) { - var ( - sevenZip *sevenzip.ReadCloser - err error - ) - - if xFile.Password != "" { - sevenZip, err = sevenzip.OpenReaderWithPassword(xFile.FilePath, xFile.Password) - } else { - sevenZip, err = sevenzip.OpenReader(xFile.FilePath) +func extract7z(xFile *XFile) (uint64, []string, []string, error) { + sevenZip, err := sevenzip.OpenReaderWithPassword(xFile.FilePath, xFile.Password) + if err != nil { + return 0, nil, nil, fmt.Errorf("%s: os.Open: %w", xFile.FilePath, err) } + defer xFile.newProgress(getUncompressed7zSize(sevenZip)).done() // this closes rarReader + + sevenZip, err = sevenzip.OpenReaderWithPassword(xFile.FilePath, xFile.Password) if err != nil { return 0, nil, nil, fmt.Errorf("%s: os.Open: %w", xFile.FilePath, err) } - defer sevenZip.Close() files := []string{} - size := int64(0) for _, zipFile := range sevenZip.File { fSize, wfile, err := xFile.un7zip(zipFile) if err != nil { - lastFile := xFile.FilePath - /* // https://github.com/bodgit/sevenzip/issues/54 - // We can probably never get the file with the error. - if volumes := sevenZip.Volumes(); len(volumes) > 0 { - lastFile = volumes[len(volumes)-1] - } */ - return size, files, sevenZip.Volumes(), fmt.Errorf("%s: %w", lastFile, err) + return xFile.prog.Wrote, files, sevenZip.Volumes(), fmt.Errorf("%s: %w", xFile.FilePath, err) } files = append(files, filepath.Join(xFile.OutputDir, zipFile.Name)) - size += fSize - xFile.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", wfile, fSize, len(files), size) + xFile.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + wfile, fSize, xFile.prog.Files, xFile.prog.Wrote) } files, err = xFile.cleanup(files) - return size, files, sevenZip.Volumes(), err + return xFile.prog.Wrote, files, sevenZip.Volumes(), err +} + +func getUncompressed7zSize(reader *sevenzip.ReadCloser) (total, compressed uint64, count int) { + defer reader.Close() + + for _, zipFile := range reader.File { + total += zipFile.UncompressedSize + // compressed += uint64(zipFile.FileInfo().Size()) + count++ + } + + return total, 0, count } -func (x *XFile) un7zip(zipFile *sevenzip.File) (int64, string, error) { +func (x *XFile) un7zip(zipFile *sevenzip.File) (uint64, string, error) { zFile, err := zipFile.Open() if err != nil { return 0, zipFile.Name, fmt.Errorf("zipFile.Open: %w", err) diff --git a/ar.go b/ar.go index 3f867524..85c1294f 100644 --- a/ar.go +++ b/ar.go @@ -11,20 +11,28 @@ import ( ) // ExtractAr extracts a raw ar archive. Used by debian (.deb) packages. -func ExtractAr(xFile *XFile) (size int64, filesList []string, err error) { +func ExtractAr(xFile *XFile) (size uint64, filesList []string, err error) { arFile, err := os.Open(xFile.FilePath) if err != nil { + return 0, nil, fmt.Errorf("rardecode.OpenReader: %w", err) + } + + defer xFile.newProgress(getUncompressedArSize(arFile)).done() // this closes arFile + + if arFile, err = os.Open(xFile.FilePath); err != nil { return 0, nil, fmt.Errorf("os.Open: %w", err) } + defer arFile.Close() - return xFile.unAr(arFile) + files, err := xFile.unAr(xFile.prog.reader(arFile)) + + return xFile.prog.Wrote, files, err } -func (x *XFile) unAr(reader io.Reader) (int64, []string, error) { +func (x *XFile) unAr(reader io.Reader) ([]string, error) { arReader := ar.NewReader(reader) files := []string{} - size := int64(0) for { header, err := arReader.Next() @@ -33,34 +41,54 @@ func (x *XFile) unAr(reader io.Reader) (int64, []string, error) { break } - return size, files, fmt.Errorf("%s: arReader.Next: %w", x.FilePath, err) + return files, fmt.Errorf("%s: arReader.Next: %w", x.FilePath, err) } file := &file{ Path: x.clean(header.Name), Data: arReader, - FileMode: os.FileMode(header.Mode), //nolint:gosec // what else ya gonna do with this? + FileMode: os.FileMode(header.Mode), DirMode: x.DirMode, Mtime: header.ModTime, } if !strings.HasPrefix(file.Path, x.OutputDir) { // The file being written is trying to write outside of our base path. Malicious archive? - return size, files, fmt.Errorf("%s: %w: %s (from: %s)", x.FilePath, ErrInvalidPath, file.Path, header.Name) + return files, fmt.Errorf("%s: %w: %s (from: %s)", x.FilePath, ErrInvalidPath, file.Path, header.Name) } // ar format does not store directory paths. Flat list of files. fSize, err := x.write(file) if err != nil { - return size, files, err + return files, err } files = append(files, file.Path) - size += fSize + x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + file.Path, fSize, x.prog.Files, x.prog.Wrote) } - files, err := x.cleanup(files) + return x.cleanup(files) +} + +// ar files are not compressed. +func getUncompressedArSize(arFile io.ReadCloser) (total, compressed uint64, count int) { + defer arFile.Close() + + arReader := ar.NewReader(arFile) - return size, files, err + for { + header, err := arReader.Next() + if err != nil { + if errors.Is(err, io.EOF) { + return total, 0, count + } + + return total, 0, count + } + + total += uint64(header.Size) + count++ + } } diff --git a/cpio.go b/cpio.go index ad44df75..47a1cc02 100644 --- a/cpio.go +++ b/cpio.go @@ -13,57 +13,65 @@ import ( ) // ExtractCPIOGzip extracts a gzip-compressed cpio archive (cpgz). -func ExtractCPIOGzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractCPIOGzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipStream, err := gzip.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + zipStream, err := gzip.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("gzip.NewReader: %w", err) } defer zipStream.Close() - return xFile.uncpio(zipStream) + files, err := xFile.uncpio(zipStream) + + return xFile.prog.Wrote, files, err } // ExtractCPIO extracts a .cpio file. -func ExtractCPIO(xFile *XFile) (size int64, filesList []string, err error) { - fileReader, err := os.Open(xFile.FilePath) +func ExtractCPIO(xFile *XFile) (size uint64, filesList []string, err error) { + fileReader, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer fileReader.Close() - return xFile.uncpio(fileReader) + defer xFile.newProgress(uint64(stat.Size()), uint64(stat.Size()), 0).done() + + files, err := xFile.uncpio(xFile.prog.reader(fileReader)) + + return xFile.prog.Wrote, files, err } -func (x *XFile) uncpio(reader io.Reader) (int64, []string, error) { +func (x *XFile) uncpio(reader io.Reader) ([]string, error) { zipReader := cpio.NewReader(reader) files := []string{} - size := int64(0) for { zipFile, err := zipReader.Next() if errors.Is(err, io.EOF) { - return size, files, nil + return files, nil } else if err != nil { - return 0, nil, fmt.Errorf("cpio Next() failed: %w", err) + return nil, fmt.Errorf("cpio Next() failed: %w", err) } fSize, err := x.uncpioFile(zipFile, zipReader) if err != nil { - return size, files, fmt.Errorf("%s: %w", x.FilePath, err) + return files, fmt.Errorf("%s: %w", x.FilePath, err) } files = append(files, filepath.Join(x.OutputDir, zipFile.Name)) - size += fSize + x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + zipFile.Name, fSize, x.prog.Files, x.prog.Wrote) } } -func (x *XFile) uncpioFile(cpioFile *cpio.Header, cpioReader *cpio.Reader) (int64, error) { +func (x *XFile) uncpioFile(cpioFile *cpio.Header, cpioReader *cpio.Reader) (uint64, error) { file := &file{ Path: x.clean(cpioFile.Name), Data: cpioReader, diff --git a/decompress.go b/decompress.go index eef907d6..6e2e63f9 100644 --- a/decompress.go +++ b/decompress.go @@ -4,7 +4,6 @@ import ( "compress/bzip2" "compress/gzip" "fmt" - "os" "github.com/andybalholm/brotli" "github.com/klauspost/compress/s2" @@ -18,14 +17,16 @@ import ( ) // ExtractXZ extracts an XZ-compressed file. A single file. -func ExtractXZ(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractXZ(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := xz.NewReader(compressedFile, 0) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := xz.NewReader(xFile.prog.reader(compressedFile), 0) if err != nil { return 0, nil, fmt.Errorf("xz.NewReader: %w", err) } @@ -39,22 +40,21 @@ func ExtractXZ(xFile *XFile) (size int64, filesList []string, err error) { } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractZlib extracts a zlib-compressed file. A single file. -func ExtractZlib(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractZlib(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := zlib.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := zlib.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("zlib.NewReader: %w", err) } @@ -69,22 +69,21 @@ func ExtractZlib(xFile *XFile) (size int64, filesList []string, err error) { } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractLZMA extracts an lzma-compressed file. A single file. -func ExtractLZMA(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractLZMA(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := lzma.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := lzma.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("lzma.NewReader: %w", err) } @@ -106,14 +105,16 @@ func ExtractLZMA(xFile *XFile) (size int64, filesList []string, err error) { } // ExtractLZMA2 extracts an lzma2-compressed file. A single file. -func ExtractLZMA2(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractLZMA2(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := lzma.NewReader2(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := lzma.NewReader2(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("lzma.NewReader2: %w", err) } @@ -127,22 +128,21 @@ func ExtractLZMA2(xFile *XFile) (size int64, filesList []string, err error) { } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractZstandard extracts a Zstandard-compressed file. A single file. -func ExtractZstandard(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractZstandard(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := zstd.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := zstd.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("zstd.NewReader: %w", err) } @@ -157,22 +157,21 @@ func ExtractZstandard(xFile *XFile) (size int64, filesList []string, err error) } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractLZW extracts an LZW-compressed file. A single file. -func ExtractLZW(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractLZW(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := lzw.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := lzw.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("lzw.NewReader: %w", err) } @@ -186,142 +185,139 @@ func ExtractLZW(xFile *XFile) (size int64, filesList []string, err error) { } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractLZ4 extracts an LZ4-compressed file. A single file. -func ExtractLZ4(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractLZ4(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + reader := lz4.NewReader(xFile.prog.reader(compressedFile)) + xFile.prog.Total = uint64(reader.Size()) + // Get the absolute path of the file being written. file := &file{ Path: xFile.clean(xFile.FilePath, ".lz4"), - Data: lz4.NewReader(compressedFile), + Data: reader, FileMode: xFile.FileMode, DirMode: xFile.DirMode, } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractSnappy extracts a snappy-compressed file. A single file. -func ExtractSnappy(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractSnappy(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + // Get the absolute path of the file being written. file := &file{ Path: xFile.clean(xFile.FilePath, ".snappy", ".sz"), - Data: snappy.NewReader(compressedFile), + Data: snappy.NewReader(xFile.prog.reader(compressedFile)), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractS2 extracts a Snappy2-compressed file. A single file. -func ExtractS2(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractS2(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + // Get the absolute path of the file being written. file := &file{ Path: xFile.clean(xFile.FilePath, ".s2"), - Data: s2.NewReader(compressedFile), + Data: s2.NewReader(xFile.prog.reader(compressedFile)), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractBrotli extracts a Brotli-compressed file. A single file. -func ExtractBrotli(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractBrotli(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + // Get the absolute path of the file being written. file := &file{ Path: xFile.clean(xFile.FilePath, ".brotli", ".br"), - Data: brotli.NewReader(compressedFile), + Data: brotli.NewReader(xFile.prog.reader(compressedFile)), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractBzip extracts a bzip2-compressed file. That is, a single file. -func ExtractBzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractBzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + // Get the absolute path of the file being written. file := &file{ Path: xFile.clean(xFile.FilePath, ".bz", ".bz2"), - Data: bzip2.NewReader(compressedFile), + Data: bzip2.NewReader(xFile.prog.reader(compressedFile)), FileMode: xFile.FileMode, DirMode: xFile.DirMode, } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } // ExtractGzip extracts a gzip-compressed file. That is, a single file. -func ExtractGzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractGzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipReader, err := gzip.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 1).done() + + zipReader, err := gzip.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("gzip.NewReader: %w", err) } @@ -337,9 +333,6 @@ func ExtractGzip(xFile *XFile) (size int64, filesList []string, err error) { } size, err = xFile.write(file) - if err != nil { - return size, nil, err - } - return size, []string{file.Path}, nil + return size, []string{file.Path}, err } diff --git a/files.go b/files.go index 3a061ec1..6e3a2783 100644 --- a/files.go +++ b/files.go @@ -24,7 +24,7 @@ type archive struct { } // Interface is a common interface for extracting compressed or non-compressed files or archives. -type Interface func(x *XFile) (size int64, filesList, archiveList []string, err error) +type Interface func(x *XFile) (size uint64, filesList, archiveList []string, err error) // https://github.com/golift/xtractr/issues/44 // @@ -80,8 +80,8 @@ var extension2function = []archive{ // ChngInt converts the smaller return interface into an ExtractInterface. // Functions with multi-part archive files return four values. Other functions return only 3. // This ChngInt function makes both interfaces compatible. -func ChngInt(smallFn func(*XFile) (int64, []string, error)) Interface { - return func(xFile *XFile) (int64, []string, []string, error) { +func ChngInt(smallFn func(*XFile) (uint64, []string, error)) Interface { + return func(xFile *XFile) (uint64, []string, []string, error) { size, files, err := smallFn(xFile) return size, files, []string{xFile.FilePath}, err } @@ -112,6 +112,13 @@ type XFile struct { Password string // (RAR/7z) Archive passwords (to try multiple). Passwords []string + // Progress is called periodically during file extraction. + // Contains info about the progress of the extraction. + // This is not called if an Updates channel is also provided. + Progress func(Progress) + // If an Updates channel is provided, all Progress updates are sent to it. + // Contains info about the progress of the extraction. + Updates chan Progress // If the archive only has one directory in the root, then setting // this true will cause the extracted content to be moved into the // output folder, and the root folder in the archive to be removed. @@ -119,6 +126,7 @@ type XFile struct { // Logger allows printing debug messages. log Logger moveFiles func(fromPath, toPath string, overwrite bool) ([]string, error) + prog *Progress } // Filter is the input to find compressed files. @@ -310,13 +318,13 @@ func getCompressedFiles(path string, filter *Filter, fileList []os.FileInfo, dep // Extract calls the correct procedure for the type of file being extracted. // Returns size of extracted data, list of extracted files, and/or error. -func (x *XFile) Extract() (size int64, filesList, archiveList []string, err error) { +func (x *XFile) Extract() (size uint64, filesList, archiveList []string, err error) { return ExtractFile(x) } // ExtractFile calls the correct procedure for the type of file being extracted. // Returns size of extracted data, list of extracted files, list of archives processed, and/or error. -func ExtractFile(xFile *XFile) (size int64, filesList, archiveList []string, err error) { +func ExtractFile(xFile *XFile) (size uint64, filesList, archiveList []string, err error) { sName := strings.ToLower(xFile.FilePath) // just borrowing this... Has to go into an interface to avoid a cycle. xFile.moveFiles = parseConfig(&Config{Logger: xFile.log}).MoveFiles @@ -416,7 +424,7 @@ func (x *XFile) mkDir(path string, mode os.FileMode, mtime time.Time) error { } // write a file from an io reader, making sure all parent directories exist. -func (x *XFile) write(file *file) (int64, error) { +func (x *XFile) write(file *file) (uint64, error) { if err := x.mkDir(filepath.Dir(file.Path), file.DirMode, file.Mtime); err != nil { return 0, fmt.Errorf("writing archived file '%s' parent folder: %w", filepath.Base(file.Path), err) } @@ -427,17 +435,17 @@ func (x *XFile) write(file *file) (int64, error) { } defer fout.Close() - size, err := io.Copy(fout, file.Data) + size, err := io.Copy(x.prog.writer(fout), file.Data) if err != nil { - return size, fmt.Errorf("copying archived file '%s' io: %w", file.Path, err) + return uint64(size), fmt.Errorf("copying archived file '%s' io: %w", file.Path, err) } // If this sucks, make it a defer and ignore the error, like xFile.mkDir(). if err = os.Chtimes(file.Path, file.Atime, file.Mtime); err != nil { - return size, fmt.Errorf("changing archived file times: %w", err) + return uint64(size), fmt.Errorf("changing archived file times: %w", err) } - return size, nil + return uint64(size), nil } // Rename is an attempt to deal with "invalid cross link device" on weird file systems. @@ -600,3 +608,18 @@ func (x *XFile) safeFileMode(current os.FileMode) os.FileMode { return current | minimum } + +func openStatFile(path string) (*os.File, os.FileInfo, error) { + file, err := os.Open(path) + if err != nil { + return nil, nil, fmt.Errorf("os.Open: %w", err) + } + + stat, err := file.Stat() + if err != nil { + _ = file.Close() + return nil, nil, fmt.Errorf("file.Stat: %w", err) + } + + return file, stat, nil +} diff --git a/iso.go b/iso.go index 00c309b7..718ef653 100644 --- a/iso.go +++ b/iso.go @@ -10,14 +10,18 @@ import ( ) // ExtractISO writes an ISO's contents to disk. -func ExtractISO(xFile *XFile) (size int64, filesList []string, err error) { - openISO, err := os.Open(xFile.FilePath) +func ExtractISO(xFile *XFile) (size uint64, filesList []string, err error) { + openISO, err := os.Open(xFile.FilePath) // os.Open on purpose. if err != nil { - return 0, nil, fmt.Errorf("failed to open iso file: %s: %w", xFile.FilePath, err) + return 0, nil, fmt.Errorf("os.Open: %w", err) } defer openISO.Close() - iso, err := iso9660.OpenImage(openISO) + image, _ := iso9660.OpenImage(openISO) + + defer xFile.newProgress(getUncompressedIsoSize(image)).done() + + iso, err := iso9660.OpenImage(xFile.prog.readAter(openISO)) if err != nil { return 0, nil, fmt.Errorf("failed to open iso image: %s: %w", xFile.FilePath, err) } @@ -35,7 +39,38 @@ func ExtractISO(xFile *XFile) (size int64, filesList []string, err error) { return size, files, nil } -func (x *XFile) uniso(isoFile *iso9660.File, parent string) (int64, []string, error) { +//nolint:unparam // so we can pass it in. +func getUncompressedIsoSize(image *iso9660.Image) (total, _ uint64, count int) { + if image == nil { + return total, 0, count + } + + var loop func(isoFile *iso9660.File) + loop = func(isoFile *iso9660.File) { + count++ + + children, err := isoFile.GetChildren() + if err != nil { + return + } + + for _, child := range children { + total += uint64(child.Size()) + loop(child) + } + } + + root, err := image.RootDir() + if err != nil { + return total, 0, count + } + + loop(root) + + return total, 0, count +} + +func (x *XFile) uniso(isoFile *iso9660.File, parent string) (uint64, []string, error) { itemName := filepath.Join(parent, isoFile.Name()) if isoFile.Name() == string([]byte{0}) { // rename root folder. @@ -56,7 +91,7 @@ func (x *XFile) uniso(isoFile *iso9660.File, parent string) (int64, []string, er } files := []string{} - size := int64(0) + size := uint64(0) for _, child := range children { childSize, childFiles, err := x.uniso(child, itemName) @@ -74,7 +109,7 @@ func (x *XFile) uniso(isoFile *iso9660.File, parent string) (int64, []string, er return size, files, err } -func (x *XFile) unisofile(isoFile *iso9660.File, wfile string) (int64, []string, error) { +func (x *XFile) unisofile(isoFile *iso9660.File, wfile string) (uint64, []string, error) { file := &file{ Path: x.clean(wfile), Data: isoFile.Reader(), @@ -93,6 +128,8 @@ func (x *XFile) unisofile(isoFile *iso9660.File, wfile string) (int64, []string, x.Debugf("Writing archived file: %s (bytes: %d)", file.Path, isoFile.Size()) size, err := x.write(file) + x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + file.Path, size, x.prog.Files, int64(x.prog.Wrote)) return size, []string{file.Path}, err } diff --git a/progress.go b/progress.go new file mode 100644 index 00000000..dcf477e7 --- /dev/null +++ b/progress.go @@ -0,0 +1,143 @@ +package xtractr + +import ( + "fmt" + "io" +) + +const maxPercent = 100 + +// Progress provides data about an in-progress file extraction and/or decompression. +type Progress struct { + // Total uncompressed bytes in the archive. + // This number is not available in all archive types, and may be 0. + Total uint64 + // Compressed is the size of the archive file (compressed size). + // It may equal the Total (uncompressed) for non-compressed archives, like tar. + Compressed uint64 + // Wrote this many bytes to disk. + Wrote uint64 + // This many compressed bytes have been read from the archive. + Read uint64 + // Files (number of) written to disk. + Files int + // Count of files in archive. + // This number is not available in all archive types, and may be 0. + Count int + // Done is set to true in the final progress update. + Done bool + // This is the input file. Do not modify the data. + XFile *XFile + send func() +} + +// Percent returns the percent of bytes read or written. +func (p *Progress) Percent() (perc float64) { + if p.Total > 0 { + return float64(p.Wrote) / float64(p.Total) * maxPercent + } else if p.Compressed > 0 { + return float64(p.Read) / float64(p.Compressed) * maxPercent + } + + return 0 +} + +// ArchiveProgress is a helper/example function you can use in your code to print extraction percentages. +// @every - Should be a number between 1 and 50 or so. This controls how often to print the percentage. +// The values 1, 2, 5, 10, and 20 work best. +// @exit - If exit is true, then the for loop exit and the process returns when Progress.Done is true. +// Set `exit` true if you want a separate printer for each archive. A good reason is parallel extractions. +func ArchiveProgress(every float64, progress chan Progress, exit bool) { + var perc, last float64 + + const extra = 0.000000001 + + for prog := range progress { + if prog.Done && exit { + return + } + + if prog.Done { + last = 0 // reset for the next archive. + continue + } + + if perc = prog.Percent(); perc == maxPercent && last < maxPercent { + fmt.Printf("%.00f%%\n", perc) + + last = maxPercent + } + + if last == 0 && perc == 0 || perc > last+every { + fmt.Printf("%.00f%% ", perc) + last = perc + extra // we add extra so 0% only prints once. + } + } +} + +func (x *XFile) newProgress(total, compressed uint64, count int) *Progress { + x.prog = &Progress{Total: total, Compressed: compressed, Count: count, send: func() {}} + + if x.Progress != nil { + x.prog.send = func() { x.Progress(*x.prog) } + } + + if x.Updates != nil { + x.prog.send = func() { x.Updates <- *x.prog } + } + + return x.prog +} + +// progressWrapper wraps several io interfaces so we can count the bytes read and written to those interfaces. +type progressWrapper struct { + io.Writer + io.Reader + io.ReaderAt + *Progress +} + +func (p *progressWrapper) Write(data []byte) (n int, err error) { + defer p.send() + + size, err := p.Writer.Write(data) + p.Wrote += uint64(size) + + return size, err //nolint:wrapcheck +} + +func (p *progressWrapper) Read(data []byte) (n int, err error) { + defer p.send() + + size, err := p.Reader.Read(data) + p.Progress.Read += uint64(size) + + return size, err //nolint:wrapcheck +} + +func (p *progressWrapper) ReadAt(data []byte, off int64) (n int, err error) { + defer p.send() + + size, err := p.ReaderAt.ReadAt(data, off) + p.Progress.Read += uint64(size) + + return size, err //nolint:wrapcheck +} + +func (p *Progress) writer(writer io.Writer) io.Writer { + p.Files++ + return &progressWrapper{Writer: writer, Progress: p} +} + +func (p *Progress) reader(reader io.Reader) io.Reader { + return &progressWrapper{Reader: reader, Progress: p} +} + +func (p *Progress) readAter(reader io.ReaderAt) io.ReaderAt { + return &progressWrapper{ReaderAt: reader, Progress: p} +} + +func (p *Progress) done() { + p.Done = true + p.send() +} diff --git a/queue.go b/queue.go index 5970fb99..d60ad91c 100644 --- a/queue.go +++ b/queue.go @@ -52,7 +52,7 @@ type Response struct { // Extract Started (false) or Finished (true). Done bool // Size of data written. - Size int64 + Size uint64 // Temporary output folder. Output string // Items still in queue. @@ -313,7 +313,7 @@ func (x *Xtractr) decompressArchives(resp *Response) error { // processArchives extracts one archive at a time. // Returns list of archive files extracted, size of data written and files written. -func (x *Xtractr) processArchive(filename string, resp *Response) (int64, []string, []string, error) { +func (x *Xtractr) processArchive(filename string, resp *Response) (uint64, []string, []string, error) { if err := os.MkdirAll(resp.Output, x.config.DirMode); err != nil { return 0, nil, nil, fmt.Errorf("making output dir: %w", err) } diff --git a/rar.go b/rar.go index 2b87ebbb..d0326787 100644 --- a/rar.go +++ b/rar.go @@ -13,7 +13,7 @@ import ( ) // ExtractRAR attempts to extract a file as a rar file. -func ExtractRAR(xFile *XFile) (size int64, filesList, archiveList []string, err error) { +func ExtractRAR(xFile *XFile) (size uint64, filesList, archiveList []string, err error) { if len(xFile.Passwords) == 0 && xFile.Password == "" { return extractRAR(xFile) } @@ -55,29 +55,54 @@ func ExtractRAR(xFile *XFile) (size int64, filesList, archiveList []string, err } // extractRAR extracts a rar file. to a destination. This wraps github.com/nwaples/rardecode. -func extractRAR(xFile *XFile) (int64, []string, []string, error) { +func extractRAR(xFile *XFile) (uint64, []string, []string, error) { rarReader, err := rardecode.OpenReader(xFile.FilePath, xFile.Password) if err != nil { return 0, nil, nil, fmt.Errorf("rardecode.OpenReader: %w", err) } + + defer xFile.newProgress(getUncompressedRarSize(rarReader)).done() // this closes rarReader + + rarReader, err = rardecode.OpenReader(xFile.FilePath, xFile.Password) // open it again. + if err != nil { + return 0, nil, nil, fmt.Errorf("rardecode.OpenReader: %w", err) + } defer rarReader.Close() - size, files, err := xFile.unrar(rarReader) + files, err := xFile.unrar(rarReader) if err != nil { lastFile := xFile.FilePath if volumes := rarReader.Volumes(); len(volumes) > 0 { lastFile = volumes[len(volumes)-1] } - return size, files, rarReader.Volumes(), fmt.Errorf("%s: %w", lastFile, err) + return xFile.prog.Wrote, files, rarReader.Volumes(), fmt.Errorf("%s: %w", lastFile, err) } - return size, files, rarReader.Volumes(), nil + return xFile.prog.Wrote, files, rarReader.Volumes(), nil } -func (x *XFile) unrar(rarReader *rardecode.ReadCloser) (int64, []string, error) { +func getUncompressedRarSize(rarReader *rardecode.ReadCloser) (total, compressed uint64, count int) { + defer rarReader.Close() + + for { + header, err := rarReader.Next() + if err != nil { + if errors.Is(err, io.EOF) { + return total, 0, count + } + + return total, 0, count + } + + total += uint64(header.UnPackedSize) + // compressed += uint64(header.PackedSize) + count++ + } +} + +func (x *XFile) unrar(rarReader *rardecode.ReadCloser) ([]string, error) { files := []string{} - size := int64(0) for { header, err := rarReader.Next() @@ -86,7 +111,7 @@ func (x *XFile) unrar(rarReader *rardecode.ReadCloser) (int64, []string, error) break } - return size, files, fmt.Errorf("rarReader.Next: %w", err) + return files, fmt.Errorf("rarReader.Next: %w", err) } file := &file{ @@ -100,7 +125,7 @@ func (x *XFile) unrar(rarReader *rardecode.ReadCloser) (int64, []string, error) //nolint:gocritic // this 1-argument filepath.Join removes a ./ prefix should there be one. if !strings.HasPrefix(file.Path, filepath.Join(x.OutputDir)) { // The file being written is trying to write outside of our base path. Malicious archive? - return size, files, fmt.Errorf("%s: %w: %s != %s (from: %s)", + return files, fmt.Errorf("%s: %w: %s != %s (from: %s)", x.FilePath, ErrInvalidPath, file.Path, x.OutputDir, header.Name) } @@ -108,25 +133,24 @@ func (x *XFile) unrar(rarReader *rardecode.ReadCloser) (int64, []string, error) x.Debugf("Writing archived directory: %s", file.Path) if err = x.mkDir(file.Path, header.Mode(), header.ModificationTime); err != nil { - return size, files, fmt.Errorf("making rar file dir: %w", err) + return files, fmt.Errorf("making rar file dir: %w", err) } continue } - x.Debugf("Writing archived file: %s (packed: %d, unpacked: %d)", file.Path, header.PackedSize, header.UnPackedSize) + x.Debugf("Writing archived file: %s (packed: %d, unpacked: %d)", + file.Path, header.PackedSize, header.UnPackedSize) fSize, err := x.write(file) if err != nil { - return size, files, err + return files, err } files = append(files, file.Path) - size += fSize - x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", file.Path, fSize, len(files), size) + x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + file.Path, fSize, x.prog.Files, x.prog.Wrote) } - files, err := x.cleanup(files) - - return size, files, err + return x.cleanup(files) } diff --git a/rpm.go b/rpm.go index ab3413b8..570f147d 100644 --- a/rpm.go +++ b/rpm.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "os" "github.com/cavaliergopher/rpm" "github.com/klauspost/compress/zstd" @@ -21,17 +20,25 @@ var ( ) // ExtractRPM extract a file as a RedHat Package Manager file. -func ExtractRPM(xFile *XFile) (size int64, filesList []string, err error) { //nolint:cyclop - rpmFile, err := os.Open(xFile.FilePath) +func ExtractRPM(xFile *XFile) (size uint64, filesList []string, err error) { + osFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } - defer rpmFile.Close() + defer osFile.Close() + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + files, err := xFile.extractRPM(xFile.prog.reader(osFile)) + + return xFile.prog.Wrote, files, err +} + +func (x *XFile) extractRPM(rpmFile io.Reader) (filesList []string, err error) { //nolint:cyclop // Read the package headers pkg, err := rpm.Read(rpmFile) if err != nil { - return 0, nil, fmt.Errorf("rpm.Read: %w", err) + return nil, fmt.Errorf("rpm.Read: %w", err) } // Check the RPM compression algorithm. @@ -39,57 +46,59 @@ func ExtractRPM(xFile *XFile) (size int64, filesList []string, err error) { //no case "xz": zipReader, err := xz.NewReader(rpmFile, 0) if err != nil { - return 0, nil, fmt.Errorf("xz.NewReader: %w", err) + return nil, fmt.Errorf("xz.NewReader: %w", err) } - return xFile.unrpm(zipReader, pkg.PayloadFormat()) + return x.unrpm(zipReader, pkg.PayloadFormat()) case "gz", "gzip": zipReader, err := gzip.NewReader(rpmFile) if err != nil { - return 0, nil, fmt.Errorf("gzip.NewReader: %w", err) + return nil, fmt.Errorf("gzip.NewReader: %w", err) } defer zipReader.Close() - return xFile.unrpm(zipReader, pkg.PayloadFormat()) + return x.unrpm(zipReader, pkg.PayloadFormat()) case "bz2", "bzip2": - return xFile.unrpm(bzip2.NewReader(rpmFile), pkg.PayloadFormat()) + return x.unrpm(bzip2.NewReader(rpmFile), pkg.PayloadFormat()) case "zstd", "zstandard", "zst", "Zstandard": zipReader, err := zstd.NewReader(rpmFile) if err != nil { - return 0, nil, fmt.Errorf("zstd.NewReader: %w", err) + return nil, fmt.Errorf("zstd.NewReader: %w", err) } defer zipReader.Close() - return xFile.unrpm(zipReader, pkg.PayloadFormat()) + return x.unrpm(zipReader, pkg.PayloadFormat()) case "lzma2": zipReader, err := lzma.NewReader2(rpmFile) if err != nil { - return 0, nil, fmt.Errorf("lzma.NewReader2: %w", err) + return nil, fmt.Errorf("lzma.NewReader2: %w", err) } - return xFile.unrpm(zipReader, pkg.PayloadFormat()) + return x.unrpm(zipReader, pkg.PayloadFormat()) case "lzma", "lzip": zipReader, err := lzma.NewReader(rpmFile) if err != nil { - return 0, nil, fmt.Errorf("lzma.NewReader: %w", err) + return nil, fmt.Errorf("lzma.NewReader: %w", err) } - return xFile.unrpm(zipReader, pkg.PayloadFormat()) + return x.unrpm(zipReader, pkg.PayloadFormat()) default: - return 0, nil, fmt.Errorf("%w: %s", ErrUnsupportedRPMCompression, compression) + return nil, fmt.Errorf("%w: %s", ErrUnsupportedRPMCompression, compression) } } -func (x *XFile) unrpm(reader io.Reader, format string) (size int64, filesList []string, err error) { +func (x *XFile) unrpm(reader io.Reader, format string) (filesList []string, err error) { // Check the archive format of the payload switch format { case "cpio": - return x.uncpio(reader) + filesList, err = x.uncpio(reader) case "tar": - return x.untar(reader) + filesList, err = x.untar(reader) case "ar": - return x.unAr(reader) + filesList, err = x.unAr(reader) default: - return 0, nil, fmt.Errorf("%w: %s", ErrUnsupportedRPMArchiveFmt, format) + return nil, fmt.Errorf("%w: %s", ErrUnsupportedRPMArchiveFmt, format) } + + return filesList, err } diff --git a/tar.go b/tar.go index 56314329..911ec3a2 100644 --- a/tar.go +++ b/tar.go @@ -7,7 +7,6 @@ import ( "errors" "fmt" "io" - "os" "strings" "time" @@ -17,96 +16,119 @@ import ( ) // ExtractTar extracts a raw (non-compressed) tar archive. -func ExtractTar(xFile *XFile) (size int64, filesList []string, err error) { - tarFile, err := os.Open(xFile.FilePath) +func ExtractTar(xFile *XFile) (size uint64, filesList []string, err error) { + tarFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer tarFile.Close() - return xFile.untar(tarFile) + defer xFile.newProgress(uint64(stat.Size()), uint64(stat.Size()), 0).done() + + files, err := xFile.untar(xFile.prog.reader(tarFile)) + + return xFile.prog.Wrote, files, err } // ExtractTarBzip extracts a bzip2-compressed tar archive. -func ExtractTarBzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractTarBzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - return xFile.untar(bzip2.NewReader(compressedFile)) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + files, err := xFile.untar(bzip2.NewReader(xFile.prog.reader(compressedFile))) + + return xFile.prog.Wrote, files, err } // ExtractTarXZ extracts an XZ-compressed tar archive (txz). -func ExtractTarXZ(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractTarXZ(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipStream, err := xz.NewReader(compressedFile, 0) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + zipStream, err := xz.NewReader(xFile.prog.reader(compressedFile), 0) if err != nil { return 0, nil, fmt.Errorf("xz.NewReader: %w", err) } - return xFile.untar(zipStream) + files, err := xFile.untar(zipStream) + + return xFile.prog.Wrote, files, err } // ExtractTarZ extracts an LZW-compressed tar archive (tz). -func ExtractTarZ(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractTarZ(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipStream, err := lzw.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + zipStream, err := lzw.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("lzw.NewReader: %w", err) } - return xFile.untar(zipStream) + files, err := xFile.untar(zipStream) + + return xFile.prog.Wrote, files, err } // ExtractTarGzip extracts a gzip-compressed tar archive (tgz). -func ExtractTarGzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractTarGzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipStream, err := gzip.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + zipStream, err := gzip.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("gzip.NewReader: %w", err) } defer zipStream.Close() - return xFile.untar(zipStream) + files, err := xFile.untar(zipStream) + + return xFile.prog.Wrote, files, err } // ExtractTarLzip extracts an LZIP-compressed tar archive (tlz). -func ExtractTarLzip(xFile *XFile) (size int64, filesList []string, err error) { - compressedFile, err := os.Open(xFile.FilePath) +func ExtractTarLzip(xFile *XFile) (size uint64, filesList []string, err error) { + compressedFile, stat, err := openStatFile(xFile.FilePath) if err != nil { - return 0, nil, fmt.Errorf("os.Open: %w", err) + return 0, nil, err } defer compressedFile.Close() - zipStream, err := lzma.NewReader(compressedFile) + defer xFile.newProgress(0, uint64(stat.Size()), 0).done() + + zipStream, err := lzma.NewReader(xFile.prog.reader(compressedFile)) if err != nil { return 0, nil, fmt.Errorf("xz.NewReader: %w", err) } - return xFile.untar(zipStream) + files, err := xFile.untar(zipStream) + + return xFile.prog.Wrote, files, err } -func (x *XFile) untar(reader io.Reader) (int64, []string, error) { +func (x *XFile) untar(reader io.Reader) ([]string, error) { tarReader := tar.NewReader(reader) files := []string{} - size := int64(0) for { header, err := tarReader.Next() @@ -115,25 +137,25 @@ func (x *XFile) untar(reader io.Reader) (int64, []string, error) { break } - return size, files, fmt.Errorf("%s: tarReader.Next: %w", x.FilePath, err) + return files, fmt.Errorf("%s: tarReader.Next: %w", x.FilePath, err) } fSize, err := x.untarFile(header, tarReader) if err != nil { - return size, files, err + return files, err } files = append(files, header.Name) - size += fSize - x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", header.Name, fSize, len(files), size) + x.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + header.Name, fSize, x.prog.Files, x.prog.Wrote) } files, err := x.cleanup(files) - return size, files, err + return files, err } -func (x *XFile) untarFile(header *tar.Header, tarReader *tar.Reader) (int64, error) { +func (x *XFile) untarFile(header *tar.Header, tarReader *tar.Reader) (uint64, error) { file := &file{ Path: x.clean(header.Name), Data: tarReader, diff --git a/zip.go b/zip.go index df9b1123..c068c856 100644 --- a/zip.go +++ b/zip.go @@ -11,34 +11,45 @@ import ( /* How to extract a ZIP file. */ // ExtractZIP extracts a zip file.. to a destination. Simple enough. -func ExtractZIP(xFile *XFile) (size int64, filesList []string, err error) { +func ExtractZIP(xFile *XFile) (size uint64, filesList []string, err error) { zipReader, err := zip.OpenReader(xFile.FilePath) if err != nil { return 0, nil, fmt.Errorf("zip.OpenReader: %w", err) } defer zipReader.Close() + defer xFile.newProgress(getUncompressedZipSize(zipReader)).done() + files := []string{} - size = int64(0) for _, zipFile := range zipReader.File { fSize, wfile, err := xFile.unzip(zipFile) if err != nil { - return size, files, fmt.Errorf("%s: %w", xFile.FilePath, err) + return xFile.prog.Wrote, files, fmt.Errorf("%s: %w", xFile.FilePath, err) } //nolint:gosec // this is safe because we clean the paths. files = append(files, filepath.Join(xFile.OutputDir, zipFile.Name)) - size += fSize - xFile.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", wfile, fSize, len(files), size) + xFile.Debugf("Wrote archived file: %s (%d bytes), total: %d files and %d bytes", + wfile, fSize, xFile.prog.Files, xFile.prog.Wrote) } files, err = xFile.cleanup(files) - return size, files, err + return xFile.prog.Wrote, files, err +} + +func getUncompressedZipSize(zipReader *zip.ReadCloser) (total, compressed uint64, count int) { + for _, zipFile := range zipReader.File { + total += zipFile.UncompressedSize64 + // compressed += zipFile.CompressedSize64 + count++ + } + + return total, 0, count } -func (x *XFile) unzip(zipFile *zip.File) (int64, string, error) { +func (x *XFile) unzip(zipFile *zip.File) (uint64, string, error) { zFile, err := zipFile.Open() if err != nil { return 0, zipFile.Name, fmt.Errorf("zipFile.Open: %w", err) From bcf556251a63bf51ef742c4c86c66e6bf6453116 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sun, 20 Apr 2025 02:12:15 -0700 Subject: [PATCH 2/9] a little cleaning --- progress.go | 3 +-- rpm.go | 14 ++++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/progress.go b/progress.go index dcf477e7..b81f3c47 100644 --- a/progress.go +++ b/progress.go @@ -44,7 +44,7 @@ func (p *Progress) Percent() (perc float64) { // ArchiveProgress is a helper/example function you can use in your code to print extraction percentages. // @every - Should be a number between 1 and 50 or so. This controls how often to print the percentage. -// The values 1, 2, 5, 10, and 20 work best. +// The values 1, 2, 4, 5, 10, 20 and 25 work best. // @exit - If exit is true, then the for loop exit and the process returns when Progress.Done is true. // Set `exit` true if you want a separate printer for each archive. A good reason is parallel extractions. func ArchiveProgress(every float64, progress chan Progress, exit bool) { @@ -64,7 +64,6 @@ func ArchiveProgress(every float64, progress chan Progress, exit bool) { if perc = prog.Percent(); perc == maxPercent && last < maxPercent { fmt.Printf("%.00f%%\n", perc) - last = maxPercent } diff --git a/rpm.go b/rpm.go index 570f147d..5a2d284f 100644 --- a/rpm.go +++ b/rpm.go @@ -21,15 +21,15 @@ var ( // ExtractRPM extract a file as a RedHat Package Manager file. func ExtractRPM(xFile *XFile) (size uint64, filesList []string, err error) { - osFile, stat, err := openStatFile(xFile.FilePath) + rpmFile, stat, err := openStatFile(xFile.FilePath) if err != nil { return 0, nil, err } - defer osFile.Close() + defer rpmFile.Close() defer xFile.newProgress(0, uint64(stat.Size()), 0).done() - files, err := xFile.extractRPM(xFile.prog.reader(osFile)) + files, err := xFile.extractRPM(xFile.prog.reader(rpmFile)) return xFile.prog.Wrote, files, err } @@ -91,14 +91,12 @@ func (x *XFile) unrpm(reader io.Reader, format string) (filesList []string, err // Check the archive format of the payload switch format { case "cpio": - filesList, err = x.uncpio(reader) + return x.uncpio(reader) case "tar": - filesList, err = x.untar(reader) + return x.untar(reader) case "ar": - filesList, err = x.unAr(reader) + return x.unAr(reader) default: return nil, fmt.Errorf("%w: %s", ErrUnsupportedRPMArchiveFmt, format) } - - return filesList, err } From 26c70e95ccf170dee4fd7b9996e9bab78c388ff1 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sun, 20 Apr 2025 08:21:17 -0700 Subject: [PATCH 3/9] fix tests --- 7z.go | 2 +- iso_test.go | 4 ++-- queue_test.go | 2 +- util_test.go | 2 +- zip_test.go | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/7z.go b/7z.go index b6e96948..774f9f89 100644 --- a/7z.go +++ b/7z.go @@ -47,7 +47,7 @@ func extract7z(xFile *XFile) (uint64, []string, []string, error) { return 0, nil, nil, fmt.Errorf("%s: os.Open: %w", xFile.FilePath, err) } - defer xFile.newProgress(getUncompressed7zSize(sevenZip)).done() // this closes rarReader + defer xFile.newProgress(getUncompressed7zSize(sevenZip)).done() // this closes sevenZip sevenZip, err = sevenzip.OpenReaderWithPassword(xFile.FilePath, xFile.Password) if err != nil { diff --git a/iso_test.go b/iso_test.go index 9fbf17fe..437d4ca2 100644 --- a/iso_test.go +++ b/iso_test.go @@ -23,7 +23,7 @@ func TestIso(t *testing.T) { require.NoError(t, err, "failed to cleanup writer") }() - size := int64(0) + size := uint64(0) walkErr := filepath.Walk(testFilesInfo.srcFilesDir, func(path string, info os.FileInfo, err error) error { require.NoError(t, err, "unexpected") @@ -37,7 +37,7 @@ func TestIso(t *testing.T) { fStat, err := fileToAdd.Stat() require.NoError(t, err, "failed to stat file") - size += fStat.Size() + size += uint64(fStat.Size()) err = writer.AddFile(fileToAdd, strings.TrimPrefix(fileToAdd.Name(), testFilesInfo.srcFilesDir)) require.NoError(t, err, "failed to add file") diff --git a/queue_test.go b/queue_test.go index 46b37acb..d23f9be3 100644 --- a/queue_test.go +++ b/queue_test.go @@ -23,7 +23,7 @@ var filesInTestArchive = []string{ const ( testFile = "test_data/archive.rar" - testDataSize = int64(20770) + testDataSize = uint64(20770) ) type testLogger struct{ t *testing.T } diff --git a/util_test.go b/util_test.go index 89a001b5..fe6987f3 100644 --- a/util_test.go +++ b/util_test.go @@ -14,7 +14,7 @@ import ( type testFilesInfo struct { srcFilesDir string dstFilesDir string - dataSize int64 + dataSize uint64 fileCount int archiveCount int } diff --git a/zip_test.go b/zip_test.go index c5939566..1769fe75 100644 --- a/zip_test.go +++ b/zip_test.go @@ -33,7 +33,7 @@ func makeZipFile(t *testing.T) testFilesInfo { t.Helper() const ( - dataSize = int64(21) + dataSize = uint64(21) fileCount = 5 archiveCount = 1 ) From 6e4a403547d5d41c4c1c60e7379aac213452b753 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sun, 20 Apr 2025 09:00:16 -0700 Subject: [PATCH 4/9] give queue and input --- queue.go | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/queue.go b/queue.go index d60ad91c..8177169d 100644 --- a/queue.go +++ b/queue.go @@ -31,7 +31,7 @@ type Xtract struct { RecurseISO bool // Folder to extract data. Default is same level as SearchPath with a suffix. ExtractTo string - // Leave files in temporary folder? false=move files back to Searchpath + // Leave files in temporary folder? false=move files back to Filter.Path // Moving files back will cause the "extracted files" returned to only contain top-level items. TempFolder bool // Delete Archives after successful extraction? Be careful. @@ -42,11 +42,20 @@ type Xtract struct { CBFunction func(*Response) // Callback Channel, msg sent twice per queued item. CBChannel chan *Response + // Progress is called periodically during file extraction. + // Contains info about the progress of the extraction. + // This is not called if an Updates channel is also provided. + // Shared by all archive file extractions that occur with this Xtract. + Progress func(Progress) + // If an Updates channel is provided, all Progress updates are sent to it. + // Contains info about the progress of the extraction. + // Shared by all archive file extractions that occur with this Xtract. + Updates chan Progress } // Response is sent to the call-back function. The first CBFunction call is just // a notification that the extraction has started. You can determine it's the first -// call by chcking Response.Done. false = started, true = finished. When done=false +// call by checking Response.Done. false = started, true = finished. When done=false // the only other meaningful data provided is the re.Archives, re.Output and re.Queue. type Response struct { // Extract Started (false) or Finished (true). @@ -71,6 +80,8 @@ type Response struct { Error error // Copied from input data. X *Xtract + // Progress information about the extraction. + Progress Progress } // Extract is how external code begins an extraction process against a path. @@ -328,6 +339,8 @@ func (x *Xtractr) processArchive(filename string, resp *Response) (uint64, []str Passwords: resp.X.Passwords, Password: resp.X.Password, log: x.config.Logger, + Updates: resp.X.Updates, + Progress: resp.X.Progress, }) if err != nil { x.DeleteFiles(resp.Output) // clean up the mess after an error and bail. From b9ab11614791c6583c7c6ff87120ccc83ab920bb Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sun, 20 Apr 2025 10:49:56 -0700 Subject: [PATCH 5/9] do not need this here --- queue.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/queue.go b/queue.go index 8177169d..50145728 100644 --- a/queue.go +++ b/queue.go @@ -80,8 +80,6 @@ type Response struct { Error error // Copied from input data. X *Xtract - // Progress information about the extraction. - Progress Progress } // Extract is how external code begins an extraction process against a path. From 7c253b295785faf8385c2e88e0356df1fa0f5958 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sun, 20 Apr 2025 13:07:25 -0700 Subject: [PATCH 6/9] bug fixes --- 7z.go | 4 ++-- progress.go | 3 ++- queue.go | 4 ++++ rar.go | 4 ++-- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/7z.go b/7z.go index 774f9f89..19681aa0 100644 --- a/7z.go +++ b/7z.go @@ -60,7 +60,7 @@ func extract7z(xFile *XFile) (uint64, []string, []string, error) { for _, zipFile := range sevenZip.File { fSize, wfile, err := xFile.un7zip(zipFile) if err != nil { - return xFile.prog.Wrote, files, sevenZip.Volumes(), fmt.Errorf("%s: %w", xFile.FilePath, err) + return xFile.prog.Wrote, files, []string{xFile.FilePath}, fmt.Errorf("%s: %w", xFile.FilePath, err) } files = append(files, filepath.Join(xFile.OutputDir, zipFile.Name)) @@ -70,7 +70,7 @@ func extract7z(xFile *XFile) (uint64, []string, []string, error) { files, err = xFile.cleanup(files) - return xFile.prog.Wrote, files, sevenZip.Volumes(), err + return xFile.prog.Wrote, files, []string{xFile.FilePath}, err } func getUncompressed7zSize(reader *sevenzip.ReadCloser) (total, compressed uint64, count int) { diff --git a/progress.go b/progress.go index b81f3c47..bd0dcba7 100644 --- a/progress.go +++ b/progress.go @@ -64,6 +64,7 @@ func ArchiveProgress(every float64, progress chan Progress, exit bool) { if perc = prog.Percent(); perc == maxPercent && last < maxPercent { fmt.Printf("%.00f%%\n", perc) + last = maxPercent } @@ -75,7 +76,7 @@ func ArchiveProgress(every float64, progress chan Progress, exit bool) { } func (x *XFile) newProgress(total, compressed uint64, count int) *Progress { - x.prog = &Progress{Total: total, Compressed: compressed, Count: count, send: func() {}} + x.prog = &Progress{Total: total, Compressed: compressed, Count: count, send: func() {}, XFile: x} if x.Progress != nil { x.prog.send = func() { x.Progress(*x.prog) } diff --git a/queue.go b/queue.go index 50145728..87e1c00e 100644 --- a/queue.go +++ b/queue.go @@ -178,6 +178,8 @@ func (x *Xtractr) decompressFolders(resp *Response) error { DeleteOrig: resp.X.DeleteOrig, TempFolder: resp.X.TempFolder, LogFile: resp.X.LogFile, + Updates: resp.X.Updates, + Progress: resp.X.Progress, }, Started: resp.Started, Output: output, @@ -273,6 +275,8 @@ func (x *Xtractr) decompressFiles(resp *Response) error { X: &Xtract{ Password: resp.X.Password, Passwords: resp.X.Passwords, + Progress: resp.X.Progress, + Updates: resp.X.Updates, }, Started: resp.Started, Output: resp.Output, diff --git a/rar.go b/rar.go index d0326787..ec868fc5 100644 --- a/rar.go +++ b/rar.go @@ -76,10 +76,10 @@ func extractRAR(xFile *XFile) (uint64, []string, []string, error) { lastFile = volumes[len(volumes)-1] } - return xFile.prog.Wrote, files, rarReader.Volumes(), fmt.Errorf("%s: %w", lastFile, err) + return xFile.prog.Wrote, files, []string{xFile.FilePath}, fmt.Errorf("%s: %w", lastFile, err) } - return xFile.prog.Wrote, files, rarReader.Volumes(), nil + return xFile.prog.Wrote, files, []string{xFile.FilePath}, nil } func getUncompressedRarSize(rarReader *rardecode.ReadCloser) (total, compressed uint64, count int) { From dfd640da4ba16bdf2480b68b0c0f23d61fcd0bc0 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Mon, 21 Apr 2025 18:16:48 -0700 Subject: [PATCH 7/9] add more features to printer --- progress.go | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/progress.go b/progress.go index bd0dcba7..aceca5ec 100644 --- a/progress.go +++ b/progress.go @@ -45,32 +45,47 @@ func (p *Progress) Percent() (perc float64) { // ArchiveProgress is a helper/example function you can use in your code to print extraction percentages. // @every - Should be a number between 1 and 50 or so. This controls how often to print the percentage. // The values 1, 2, 4, 5, 10, 20 and 25 work best. +// @reset - If set true, a `\r` is printed before each line, which will reset it on most terminals. // @exit - If exit is true, then the for loop exit and the process returns when Progress.Done is true. // Set `exit` true if you want a separate printer for each archive. A good reason is parallel extractions. -func ArchiveProgress(every float64, progress chan Progress, exit bool) { - var perc, last float64 +func ArchiveProgress(every float64, progress chan Progress, reset, exit bool) { + var ( + perc, last float64 + pre string + mod = "%s%.0f%% " + ) const extra = 0.000000001 + if reset { + pre = "\r\033[K" + } + + if every < 1 { + mod = "%s%.1f%% " + } + for prog := range progress { if prog.Done && exit { + fmt.Println() return } if prog.Done { + fmt.Println() last = 0 // reset for the next archive. + continue } if perc = prog.Percent(); perc == maxPercent && last < maxPercent { - fmt.Printf("%.00f%%\n", perc) - last = maxPercent + fmt.Printf(mod, pre, perc) } if last == 0 && perc == 0 || perc > last+every { - fmt.Printf("%.00f%% ", perc) last = perc + extra // we add extra so 0% only prints once. + fmt.Printf(mod, pre, perc) } } } From a9415f440cef4e3ecd2dd0e2ad12ea6e3a455359 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Mon, 21 Apr 2025 18:24:36 -0700 Subject: [PATCH 8/9] lint --- progress.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/progress.go b/progress.go index aceca5ec..c380a3c4 100644 --- a/progress.go +++ b/progress.go @@ -48,7 +48,7 @@ func (p *Progress) Percent() (perc float64) { // @reset - If set true, a `\r` is printed before each line, which will reset it on most terminals. // @exit - If exit is true, then the for loop exit and the process returns when Progress.Done is true. // Set `exit` true if you want a separate printer for each archive. A good reason is parallel extractions. -func ArchiveProgress(every float64, progress chan Progress, reset, exit bool) { +func ArchiveProgress(every float64, progress chan Progress, reset, exit bool) { //nolint:cyclop var ( perc, last float64 pre string @@ -80,11 +80,13 @@ func ArchiveProgress(every float64, progress chan Progress, reset, exit bool) { if perc = prog.Percent(); perc == maxPercent && last < maxPercent { last = maxPercent + fmt.Printf(mod, pre, perc) } if last == 0 && perc == 0 || perc > last+every { last = perc + extra // we add extra so 0% only prints once. + fmt.Printf(mod, pre, perc) } } From 1e2b1ed89c4a7ddf72e36758d2727065976d9d42 Mon Sep 17 00:00:00 2001 From: David Newhall II Date: Sun, 14 Sep 2025 19:25:48 -0700 Subject: [PATCH 9/9] Add delay before moving extracted files --- queue.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/queue.go b/queue.go index 87e1c00e..d4cba9c8 100644 --- a/queue.go +++ b/queue.go @@ -96,6 +96,8 @@ func (x *Xtractr) Extract(extract *Xtract) (int, error) { return queueSize, nil } +const fsSyncDelay = 10 * time.Second + // processQueue runs in a go routine, 'x.Parallel' times, // and watches for things to extract. func (x *Xtractr) processQueue() { @@ -364,6 +366,7 @@ func (x *Xtractr) cleanupProcessedArchives(resp *Response) error { var err error if !resp.X.TempFolder { + time.Sleep(fsSyncDelay) // Wait for file system to catch up/sync. // If TempFolder is false then move the files back to the original location. resp.NewFiles, err = x.MoveFiles(resp.Output, resp.X.Path, false) }