Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion cmd/zstream/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,26 @@ CPPCHECKTARGETS += zstream
zstream_SOURCES = \
%D%/zstream.c \
%D%/zstream.h \
%D%/zstream_byteswap.c \
%D%/zstream_byteswap.h \
%D%/zstream_chain.c \
%D%/zstream_chain.h \
%D%/zstream_decompress.c \
%D%/zstream_drop_record.c \
%D%/zstream_dump.c \
%D%/zstream_fletcher4.c \
%D%/zstream_fletcher4.h \
%D%/zstream_io.c \
%D%/zstream_io.h \
%D%/zstream_modules.h \
%D%/zstream_recompress.c \
%D%/zstream_recompress.h \
%D%/zstream_redup.c \
%D%/zstream_token.c \
%D%/zstream_util.c \
%D%/zstream_util.h
%D%/zstream_util.h \
%D%/zstream_validate.c \
%D%/zstream_validate.h

zstream_LDADD = \
libzfs.la \
Expand Down
112 changes: 112 additions & 0 deletions cmd/zstream/scripts/add-xattrs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/tmp/zstream-venv/bin/python3
"""Add at least 1024 bytes of random extended attributes to files."""

#
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the Common
# Development and Distribution License ("CDDL"), version 1.0. You may only use
# this file in accordance with the terms of version 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this source. A
# copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright (c) 2026 by Garth Snyder. All rights reserved.
#

import argparse
import os
import random
import sys
from lorem_text import lorem

ADJECTIVES = [
"boogie", "funky", "wobbly", "snazzy", "jazzy", "groovy", "zippy",
"bouncy", "fluffy", "crunchy", "sparkly", "fuzzy", "spiffy", "dandy",
"peppy", "snappy", "sassy", "zesty", "swanky", "nifty", "plucky",
"quirky", "wacky", "goofy", "dizzy", "breezy", "cheery", "perky",
"frisky", "chirpy", "feisty", "jolly", "lively", "merry", "spunky",
"zippy", "vivid", "brisk", "sunny", "witty", "kinky",
]

NOUNS = [
"woogie", "monkey", "noodle", "pickle", "muffin", "waffle", "pebble",
"wobble", "doodle", "tangle", "giggle", "wiggle", "jiggle", "sparkle",
"crinkle", "twinkle", "frizzle", "drizzle", "sizzle", "fizzle",
"puddle", "bubble", "muddle", "huddle", "cuddle", "juggle", "muggle",
"snuggle", "tuggle", "buggle", "nugget", "widget", "gadget", "gibbet",
"trinket", "bracket", "racket", "jacket", "ticket", "cricket", "thicket",
"biscuit", "circuit", "summit", "muppet", "trumpet", "basket", "casket",
]

TARGET_BYTES = 1024


def random_attr_name(used: set) -> str:
for _ in range(1000):
name = f"user.{random.choice(ADJECTIVES)}-{random.choice(NOUNS)}"
if name not in used:
return name
base = f"user.{random.choice(ADJECTIVES)}-{random.choice(NOUNS)}"
i = 2
while f"{base}-{i}" in used:
i += 1
return f"{base}-{i}"


def random_value(length: int) -> bytes:
# Pull words from lorem sentences and trim/pad to exact length
text = ""
while len(text) < length:
text += lorem.sentence() + " "
return text[:length].encode()


def add_xattrs(path: str) -> int:
"""Add xattrs to path until TARGET_BYTES added. Returns bytes added."""
used_names = set()
total = 0
while total < TARGET_BYTES:
remaining = TARGET_BYTES - total
if remaining < 40:
length = remaining
else:
length = random.randint(40, min(200, remaining))
name = random_attr_name(used_names)
used_names.add(name)
value = random_value(length)
os.setxattr(path, name, value)
total += len(value)
return total


def main():
parser = argparse.ArgumentParser(
description=f"Add random xattrs to files until {TARGET_BYTES} bytes "
"of xattr values are added."
)
parser.add_argument("files", nargs="+", help="Files to add xattrs to")
args = parser.parse_args()

errors = 0
for path in args.files:
try:
added = add_xattrs(path)
print(f" {path} ({added:,} bytes in xattrs)")
except OSError as e:
print(f" {path} error: {e}", file=sys.stderr)
errors += 1

if errors:
sys.exit(1)


if __name__ == "__main__":
main()
109 changes: 109 additions & 0 deletions cmd/zstream/scripts/gen-lorem-files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/tmp/zstream-venv/bin/python3
Comment thread
GarthSnyder marked this conversation as resolved.
Comment thread
GarthSnyder marked this conversation as resolved.
"""Generate randomly-named files with lorem ipsum paragraphs."""

#
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the Common
# Development and Distribution License ("CDDL"), version 1.0. You may only use
# this file in accordance with the terms of version 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this source. A
# copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright (c) 2026 by Garth Snyder. All rights reserved.
#

import argparse
import random
import sys
from pathlib import Path
from lorem_text import lorem

ADJECTIVES = [
"boogie", "funky", "wobbly", "snazzy", "jazzy", "groovy", "zippy",
"bouncy", "fluffy", "crunchy", "sparkly", "fuzzy", "spiffy", "dandy",
"peppy", "snappy", "sassy", "zesty", "swanky", "nifty", "plucky",
"quirky", "wacky", "goofy", "dizzy", "breezy", "cheery", "perky",
"frisky", "chirpy", "feisty", "jolly", "lively", "merry", "spunky",
"frisky", "zippy", "vivid", "brisk", "sunny", "witty", "kinky",
]

NOUNS = [
"woogie", "monkey", "noodle", "pickle", "muffin", "waffle", "pebble",
"wobble", "doodle", "tangle", "giggle", "wiggle", "jiggle", "sparkle",
"crinkle", "twinkle", "frizzle", "drizzle", "sizzle", "fizzle",
"puddle", "bubble", "muddle", "huddle", "cuddle", "juggle", "muggle",
"snuggle", "tuggle", "buggle", "nugget", "widget", "gadget", "gibbet",
"trinket", "bracket", "racket", "jacket", "ticket", "cricket", "thicket",
"biscuit", "circuit", "summit", "muppet", "trumpet", "basket", "casket",
]


def random_name(used: set) -> str:
for _ in range(1000):
name = f"{random.choice(ADJECTIVES)}-{random.choice(NOUNS)}"
if name not in used:
return name
# Fallback: append a number
base = f"{random.choice(ADJECTIVES)}-{random.choice(NOUNS)}"
i = 2
while f"{base}-{i}" in used:
i += 1
return f"{base}-{i}"


def fill_file(path: Path, target_size: int, repeat=False) -> None:
content_parts = []
total = 0
para = lorem.paragraph()
while total < target_size:
content_parts.append(para)
total += len(para) + 1 # +1 for newline
if not repeat:
para = lorem.paragraph()
path.write_text("\n\n".join(content_parts) + "\n")


def main():
parser = argparse.ArgumentParser(
description="Generate files with random names and lorem ipsum content."
)
parser.add_argument("count", type=int, help="Number of files to create")
parser.add_argument("-d", "--directory", default=".",
help="Target directory (default: .)")
parser.add_argument("-r", "--repeat", action="store_true",
help="Fill files with reps of a single paragraph")
parser.add_argument("--min-size", type=int, default=16384,
help="Minimum file size in bytes (default: 16384)")
parser.add_argument("--max-size", type=int, default=128000,
help="Maximum file size in bytes (default: 128000)")
args = parser.parse_args()

if args.min_size >= args.max_size:
print(f"error: min-size ({args.min_size}) must be less than max-size "
f" ({args.max_size})", file=sys.stderr)
sys.exit(1)

directory = Path(args.directory)
directory.mkdir(parents=True, exist_ok=True)

used_names = set()
for i in range(args.count):
name = random_name(used_names)
used_names.add(name)
target_size = random.randint(args.min_size, args.max_size)
path = directory / name
fill_file(path, target_size, args.repeat)
print(f" {path} ({path.stat().st_size:,} bytes)")


if __name__ == "__main__":
main()
67 changes: 67 additions & 0 deletions cmd/zstream/scripts/make-all-records-streams.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/sh

#
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the Common
# Development and Distribution License ("CDDL"), version 1.0. You may only use
# this file in accordance with the terms of version 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this source. A
# copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright (c) 2026 by Garth Snyder. All rights reserved.
#

if [ $# -ne 1 ]; then
echo "Usage: $0 <device>" >&2
exit 1
fi

DEVICE="$1"
SCRIPTDIR="$(cd "$(dirname "$0")" && pwd)"

zpool create -o ashift=12 test "$DEVICE"
zfs set compression=on xattr=sa test
zfs create test/source
Comment thread
GarthSnyder marked this conversation as resolved.

"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/source --min-size 2048 \
--max-size 32000 3
"$SCRIPTDIR/add-xattrs.py" /test/source/*
echo "very small" > /test/source/small
echo "password" > /test/source/to-be-redacted
chmod 400 /test/source/to-be-redacted

zfs snapshot -r test/source@baseline
zfs clone test/source@baseline test/redacted
rm /test/redacted/to-be-redacted
"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/redacted --min-size 4096 \
--max-size 32000 3
"$SCRIPTDIR/add-xattrs.py" /test/redacted/*
cd /test/redacted
tar cf /tmp/dups.tar .
mkdir copies
cd copies
tar xvf /tmp/dups.tar

echo "password" > /test/redacted/new-key
zfs create -o encryption=on -o keylocation=file:///test/redacted/new-key \
-o keyformat=passphrase test/redacted/encrypted
"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/redacted/encrypted 3
echo "very small" > /test/redacted/encrypted/small-encrypted
# "$SCRIPTDIR/add-xattrs.py" /test/redacted/encrypted/*

zfs snapshot -r test/redacted@clean

zfs redact test/source@baseline redaction-bookmark test/redacted@clean
zfs send -ce --redact redaction-bookmark test/source@baseline \
> /tmp/all-record-types-base.zsend
zfs send -Rcew -i test/source@baseline test/redacted@clean \
> /tmp/all-record-types-incr.zsend
49 changes: 49 additions & 0 deletions cmd/zstream/scripts/make-decompression-streams.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/sh

#
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the Common
# Development and Distribution License ("CDDL"), version 1.0. You may only use
# this file in accordance with the terms of version 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this source. A
# copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright (c) 2026 by Garth Snyder. All rights reserved.
#

if [ $# -ne 1 ]; then
echo "Usage: $0 <device>" >&2
exit 1
fi

set -e

DEVICE="$1"
SCRIPTDIR="$(cd "$(dirname "$0")" && pwd)"

zpool create -o ashift=12 test "$DEVICE"
echo "password" > /test/password

Comment thread
GarthSnyder marked this conversation as resolved.
zfs create -o compression=zstd-5 test/unencrypted
"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/unencrypted --min-size 12000 \
--max-size 40000 2
"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/unencrypted --min-size 140000 \
--max-size 160000 1

zfs create -o compression=lz4 -o encryption=on \
-o keylocation=file:///test/password -o keyformat=passphrase test/encrypted
"$SCRIPTDIR/gen-lorem-files.py" -r -d /test/encrypted --min-size 12000 \
--max-size 40000 3

zfs snapshot -r test@decompression
zfs send -cw test/unencrypted@decompression > /tmp/decompression.zsend
zfs send -cw test/encrypted@decompression > /tmp/decompression-crypt.zsend
Loading
Loading