openzfs · GarthSnyder · Mar 6, 2026
diff --git a/cmd/zstream/scripts/add-xattrs.py b/cmd/zstream/scripts/add-xattrs.py
@@ -0,0 +1,91 @@
+#!/tmp/zstream-venv/bin/python3
+"""Add random extended attributes to files until 600 bytes of xattrs are added."""
+
+import argparse
+import os
+import random
+import sys
+from lorem_text import lorem
+
+ADJECTIVES = [
+    "boogie", "funky", "wobbly", "snazzy", "jazzy", "groovy", "zippy",
+    "bouncy", "fluffy", "crunchy", "sparkly", "fuzzy", "spiffy", "dandy",
+    "peppy", "snappy", "sassy", "zesty", "swanky", "nifty", "plucky",
+    "quirky", "wacky", "goofy", "dizzy", "breezy", "cheery", "perky",
+    "frisky", "chirpy", "feisty", "jolly", "lively", "merry", "spunky",
+    "zippy", "vivid", "brisk", "sunny", "witty", "kinky",
+]
+
+NOUNS = [
+    "woogie", "monkey", "noodle", "pickle", "muffin", "waffle", "pebble",
+    "wobble", "doodle", "tangle", "giggle", "wiggle", "jiggle", "sparkle",
+    "crinkle", "twinkle", "frizzle", "drizzle", "sizzle", "fizzle",
+    "puddle", "bubble", "muddle", "huddle", "cuddle", "juggle", "muggle",
+    "snuggle", "tuggle", "buggle", "nugget", "widget", "gadget", "gibbet",
+    "trinket", "bracket", "racket", "jacket", "ticket", "cricket", "thicket",
+    "biscuit", "circuit", "summit", "muppet", "trumpet", "basket", "casket",
+]
+
+TARGET_BYTES = 1024
+
+
+def random_attr_name(used: set) -> str:
+    for _ in range(1000):
+        name = f"user.{random.choice(ADJECTIVES)}-{random.choice(NOUNS)}"
+        if name not in used:
+            return name
+    base = f"user.{random.choice(ADJECTIVES)}-{random.choice(NOUNS)}"
+    i = 2
+    while f"{base}-{i}" in used:
+        i += 1
+    return f"{base}-{i}"
+
+
+def random_value(length: int) -> bytes:
+    # Pull words from lorem sentences and trim/pad to exact length
+    text = ""
+    while len(text) < length:
+        text += lorem.sentence() + " "
+    return text[:length].encode()
+
+
+def add_xattrs(path: str) -> int:
+    """Add xattrs to path until TARGET_BYTES total value bytes added. Returns bytes added."""
+    used_names = set()
+    total = 0
+    while total < TARGET_BYTES:
+        remaining = TARGET_BYTES - total
+        length = min(random.randint(40, 200), remaining) if remaining < 40 else random.randint(40, min(200, remaining))
+        # If remaining < 40, just do one final attr to hit the target
+        if remaining < 40:
+            length = remaining
+        name = random_attr_name(used_names)
+        used_names.add(name)
+        value = random_value(length)
+        os.setxattr(path, name, value)
+        total += len(value)
+    return total
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=f"Add random xattrs to files until {TARGET_BYTES} bytes of xattr values are added."
+    )
+    parser.add_argument("files", nargs="+", help="Files to annotate with xattrs")
+    args = parser.parse_args()
+
+    errors = 0
+    for path in args.files:
+        try:
+            added = add_xattrs(path)
+            print(f"  {path}  ({added:,} bytes in xattrs)")
+        except OSError as e:
+            print(f"  {path}  error: {e}", file=sys.stderr)
+            errors += 1
+
+    if errors:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cmd/zstream/scripts/gen-lorem-files.py b/cmd/zstream/scripts/gen-lorem-files.py
@@ -0,0 +1,83 @@
+#!/tmp/zstream-venv/bin/python3
+"""Generate files with random names filled with lorem ipsum paragraphs."""
+
+import argparse
+import random
+import sys
+from pathlib import Path
+from lorem_text import lorem
+
+ADJECTIVES = [
+    "boogie", "funky", "wobbly", "snazzy", "jazzy", "groovy", "zippy",
+    "bouncy", "fluffy", "crunchy", "sparkly", "fuzzy", "spiffy", "dandy",
+    "peppy", "snappy", "sassy", "zesty", "swanky", "nifty", "plucky",
+    "quirky", "wacky", "goofy", "dizzy", "breezy", "cheery", "perky",
+    "frisky", "chirpy", "feisty", "jolly", "lively", "merry", "spunky",
+    "frisky", "zippy", "vivid", "brisk", "sunny", "witty", "kinky",
+]
+
+NOUNS = [
+    "woogie", "monkey", "noodle", "pickle", "muffin", "waffle", "pebble",
+    "wobble", "doodle", "tangle", "giggle", "wiggle", "jiggle", "sparkle",
+    "crinkle", "twinkle", "frizzle", "drizzle", "sizzle", "fizzle",
+    "puddle", "bubble", "muddle", "huddle", "cuddle", "juggle", "muggle",
+    "snuggle", "tuggle", "buggle", "nugget", "widget", "gadget", "gibbet",
+    "trinket", "bracket", "racket", "jacket", "ticket", "cricket", "thicket",
+    "biscuit", "circuit", "summit", "muppet", "trumpet", "basket", "casket",
+]
+
+def random_name(used: set) -> str:
+    for _ in range(1000):
+        name = f"{random.choice(ADJECTIVES)}-{random.choice(NOUNS)}"
+        if name not in used:
+            return name
+    # Fallback: append a number
+    base = f"{random.choice(ADJECTIVES)}-{random.choice(NOUNS)}"
+    i = 2
+    while f"{base}-{i}" in used:
+        i += 1
+    return f"{base}-{i}"
+
+
+def fill_file(path: Path, target_size: int, repeat=False) -> None:
+    content_parts = []
+    total = 0
+    para = lorem.paragraph()
+    while total < target_size:
+        content_parts.append(para)
+        total += len(para) + 1  # +1 for newline
+        if not repeat:
+             para = lorem.paragraph()
+    path.write_text("\n\n".join(content_parts) + "\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate files with random names and lorem ipsum content."
+    )
+    parser.add_argument("count", type=int, help="Number of files to create")
+    parser.add_argument("-d", "--directory", default=".", help="Target directory (default: .)")
+    parser.add_argument("-r", "--repeat", action="store_true", help="Fill files with reps of a single paragraph")
+    parser.add_argument("--min-size", type=int, default=16384, help="Minimum file size in bytes (default: 2048)")
+    parser.add_argument("--max-size", type=int, default=128000, help="Maximum file size in bytes (default: 128000)")
+    args = parser.parse_args()
+
+    if args.min_size >= args.max_size:
+        print(f"error: min-size ({args.min_size}) must be less than max-size ({args.max_size})", file=sys.stderr)
+        sys.exit(1)
+
+    directory = Path(args.directory)
+    directory.mkdir(parents=True, exist_ok=True)
+
+    used_names = set()
+    for i in range(args.count):
+        name = random_name(used_names)
+        used_names.add(name)
+        target_size = random.randint(args.min_size, args.max_size)
+        path = directory / name
+        fill_file(path, target_size, args.repeat)
+        print(f"  {path}  ({path.stat().st_size:,} bytes)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cmd/zstream/scripts/make-all-records-streams.sh b/cmd/zstream/scripts/make-all-records-streams.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+if [ $# -ne 1 ]; then
+	echo "Usage: $0 <device>" >&2
+	exit 1
+fi
+
+DEVICE="$1"
+SCRIPTDIR="$(cd "$(dirname "$0")" && pwd)"
+
+zpool create -o ashift=12 test "$DEVICE"
+zfs set compression=on xattr=sa test
+zfs create test/source
+
+"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/source --min-size 2048 \
+    --max-size 32000 3
+"$SCRIPTDIR/add-xattrs.py" /test/source/*
+echo "very small" > /test/source/small
+echo "password" > /test/source/to-be-redacted
+chmod 400 /test/source/to-be-redacted
+
+zfs snapshot -r test/source@baseline
+zfs clone test/source@baseline test/redacted
+rm /test/redacted/to-be-redacted
+"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/redacted --min-size 4096 \
+    --max-size 32000 3
+"$SCRIPTDIR/add-xattrs.py" /test/redacted/*
+cd /test/redacted
+tar cf /tmp/dups.tar .
+mkdir copies
+cd copies
+tar xvf /tmp/dups.tar
+
+echo "password" > /test/redacted/new-key
+zfs create -o encryption=on -o keylocation=file:///test/redacted/new-key \
+    -o keyformat=passphrase test/redacted/encrypted
+"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/redacted/encrypted 3
+echo "very small" > /test/redacted/encrypted/small-encrypted
+# "$SCRIPTDIR/add-xattrs.py" /test/redacted/encrypted/*
+
+zfs snapshot -r test/redacted@clean
+
+zfs redact test/source@baseline redaction-bookmark test/redacted@clean
+zfs send -ce --redact redaction-bookmark test/source@baseline > /tmp/all-record-types-base.zsend
+zfs send -Rcew -i test/source@baseline test/redacted@clean > /tmp/all-record-types-incr.zsend
diff --git a/cmd/zstream/scripts/make-decompression-streams.sh b/cmd/zstream/scripts/make-decompression-streams.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+if [ $# -ne 1 ]; then
+	echo "Usage: $0 <device>" >&2
+	exit 1
+fi
+
+DEVICE="$1"
+SCRIPTDIR="$(cd "$(dirname "$0")" && pwd)"
+
+zpool create -o ashift=12 test "$DEVICE"
+echo "password" > /test/password
+
+zfs create -o compression=zstd-5 test/unencrypted
+"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/unencrypted --min-size 12000 \
+    --max-size 40000 2
+"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/unencrypted --min-size 140000 \
+    --max-size 160000 1
+
+zfs create -o compression=lz4 -o encryption=on -o keylocation=file:///test/password -o keyformat=passphrase test/encrypted
+"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/encrypted --min-size 12000 \
+    --max-size 40000 3
+
+zfs snapshot -r test@decompression
+zfs send -cw test/unencrypted@decompression > /tmp/decompression.zsend
+zfs send -cw test/encrypted@decompression > /tmp/decompression-crypt.zsend
diff --git a/cmd/zstream/scripts/make-dump-files.py b/cmd/zstream/scripts/make-dump-files.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+"""Run old and new zstream dump -v on stream files, producing abbreviated dump outputs."""
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+
+def abbreviate(filename: str) -> str:
+    """Split filename at dashes, take the first letter of each segment, lowercased."""
+    stem = Path(filename).stem
+    # Strip common compression suffixes to get the logical stem
+    for ext in (".zfs", ".gz", ".bz2", ".xz", ".zst", ".lz4"):
+        if stem.endswith(ext):
+            stem = stem[: -len(ext)]
+    return "".join(seg[0] for seg in stem.split("-") if seg).lower()
+
+def run_dump(zstream: Path, stream: Path, output: Path) -> bool:
+    """Run `zstream dump -v < stream > output`.  Returns True on success."""
+    try:
+        with open(stream, "rb") as inf, open(output, "w") as outf:
+            proc = subprocess.run(
+                [str(zstream), "dump", "-v"],
+                stdin=inf,
+                stdout=outf,
+                stderr=outf,
+            )
+        if proc.returncode != 0:
+            print(
+                f"  WARNING: {zstream} exited {proc.returncode} for {stream.name}",
+                file=sys.stderr,
+            )
+            if proc.stderr:
+                print(f"    stderr: {proc.stderr.decode(errors='replace').rstrip()}",
+                      file=sys.stderr)
+        return True
+    except Exception as e:
+        print(f"  ERROR: {e}", file=sys.stderr)
+        return False
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run old and new zstream dump -v on stream files."
+    )
+    parser.add_argument("old_zstream", type=Path, help="Path to old zstream binary")
+    parser.add_argument("new_zstream", type=Path, help="Path to new zstream binary")
+    parser.add_argument(
+        "streams", nargs="+", type=Path, help="Compressed stream files to process"
+    )
+    args = parser.parse_args()
+
+    for zs in (args.old_zstream, args.new_zstream):
+        if not zs.is_file():
+            parser.error(f"zstream binary not found: {zs}")
+
+    for stream in args.streams:
+        if not stream.is_file():
+            print(f"Skipping missing file: {stream}", file=sys.stderr)
+            continue
+
+        abbrev = abbreviate(stream.name)
+        out_dir = stream.parent
+
+        old_out = out_dir / f"{abbrev}-old.dump"
+        new_out = out_dir / f"{abbrev}-new.dump"
+
+        print(f"{stream.name} -> {abbrev}")
+
+        print(f"  old: {old_out}")
+        run_dump(args.old_zstream, stream, old_out)
+
+        print(f"  new: {new_out}")
+        run_dump(args.new_zstream, stream, new_out)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cmd/zstream/scripts/make-long-payloads.sh b/cmd/zstream/scripts/make-long-payloads.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+if [ $# -ne 1 ]; then
+	echo "Usage: $0 <device>" >&2
+	exit 1
+fi
+
+DEVICE="$1"
+SCRIPTDIR="$(cd "$(dirname "$0")" && pwd)"
+
+zpool create -o ashift=12 test "$DEVICE"
+
+zfs set compression=off recordsize=16MiB test
+
+# We are testing 8MB blocks, so write one short file, 8.5MB
+# file, and one 24.5MB file.
+
+"$SCRIPTDIR/gen-lorem-files.py" -d /test -r --min-size 20000 \
+    --max-size 24000 1
+"$SCRIPTDIR/gen-lorem-files.py" -d /test -r --min-size 8500000 \
+    --max-size 8510000 1
+"$SCRIPTDIR/gen-lorem-files.py" -d /test -r --min-size 24500000 \
+    --max-size 24510000 1
+
+zfs snapshot test@long-payloads
+zfs send -L test@long-payloads > /tmp/long-payloads.zsend
diff --git a/cmd/zstream/scripts/make-venv.sh b/cmd/zstream/scripts/make-venv.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+python3 -m venv /tmp/zstream-venv
+. /tmp/zstream-venv/bin/activate
+pip install lorem_text
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
@@ -1003,7 +1003,7 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
     'rsend_030_pos', 'rsend_031_pos', 'rsend-exclude_001_pos',
     'rsend-exclude_002_pos', 'send-c_verify_ratio',
     'send-c_verify_contents', 'send-c_props', 'send-c_incremental',
-    'send-c_volume', 'send-c_zstream_recompress', 'send-c_zstreamdump',
+    'send-c_volume',
     'send-c_lz4_disabled', 'send-c_recv_lz4_disabled',
     'send-c_mixed_compression', 'send-c_stream_size_estimate',
     'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
@@ -1012,7 +1012,7 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
     'send_encrypted_props', 'send_encrypted_truncated_files',
     'send_freeobjects', 'send_realloc_files', 'send_realloc_encrypted_files',
     'send_spill_block', 'send_holds', 'send_hole_birth', 'send_mixed_raw',
-    'send-wR_encrypted_zvol', 'send-zstream_drop_record',
+    'send-wR_encrypted_zvol',
     'send_partial_dataset', 'send_invalid',
     'send_large_blocks_incremental', 'send_large_blocks_initial',
     'send_large_microzap_incremental', 'send_large_microzap_transitive',
@@ -1131,6 +1131,20 @@ tests = ['zoned_uid_001_pos', 'zoned_uid_002_pos', 'zoned_uid_003_pos',
     'zoned_uid_029_neg', 'zoned_uid_031_pos']
 tags = ['functional', 'zoned_uid']
 
+[tests/functional/zstream]
+tests = ['zstream_checksum_001_pos',
+    'zstream_decompress_001_pos', 'zstream_decompress_002_pos',
+    'zstream_decompress_003_neg', 'zstream_decompress_004_pos',
+    'zstream_decompress_005_pos', 'zstream_decompress_006_neg',
+    'zstream_drop_record_001_pos',
+    'zstream_dump_001_pos', 'zstream_dump_002_pos',
+    'zstream_dump_003_pos', 'zstream_dump_004_neg',
+    'zstream_recompress_001_pos', 'zstream_recompress_002_pos',
+    'zstream_recompress_003_pos', 'zstream_recompress_004_pos',
+    'zstream_recompress_005_pos',
+    'zstream_redup_001_pos']
+tags = ['functional', 'zstream']
+
 [tests/functional/zvol/zvol_ENOSPC]
 tests = ['zvol_ENOSPC_001_pos']
 tags = ['functional', 'zvol', 'zvol_ENOSPC']