diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 1dcd70f628b3..a4919d33ff08 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -37,6 +37,7 @@ * Copyright (c) 2023, 2024, Klara Inc. * Copyright (c) 2023, Rob Norris * Copyright (c) 2026, TrueNAS. + * Copyright (c) 2026, Hewlett Packard Enterprise Development LP. */ #include @@ -1032,9 +1033,21 @@ dump_zap_stats(objset_t *os, uint64_t object) if (zs.zs_ptrtbl_len == 0) { ASSERT(zs.zs_num_blocks == 1); - (void) printf("\tmicrozap: %llu bytes, %llu entries\n", - (u_longlong_t)zs.zs_blocksize, - (u_longlong_t)zs.zs_num_entries); + if (zs.zs_is_tinyzap) { + /* TinyZAP */ + (void) printf("\ttinyzap: %llu bytes, %llu entries, " + "stride %llu chunk=%llu num_chunks=%llu\n", + (u_longlong_t)zs.zs_blocksize, + (u_longlong_t)zs.zs_num_entries, + (u_longlong_t)zs.zs_tinyzap_stride, + (u_longlong_t)zs.zs_tinyzap_chunk, + (u_longlong_t)zs.zs_tinyzap_num_chunks); + } else { + /* Plain MicroZAP */ + (void) printf("\tmicrozap: %llu bytes, %llu entries\n", + (u_longlong_t)zs.zs_blocksize, + (u_longlong_t)zs.zs_num_entries); + } return; } diff --git a/include/sys/zap.h b/include/sys/zap.h index 7e89ad7d3de5..9f104a81138e 100644 --- a/include/sys/zap.h +++ b/include/sys/zap.h @@ -25,6 +25,7 @@ * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2026, TrueNAS. + * Copyright (c) 2026, Hewlett Packard Enterprise Development LP. */ #ifndef _SYS_ZAP_H @@ -61,11 +62,24 @@ * * Implementation / Performance Notes: * - * The ZAP is intended to operate most efficiently on attributes with - * short (49 bytes or less) names and single 8-byte values, for which - * the microzap will be used. The ZAP should be efficient enough so + * The ZAP operates in three modes, selected automatically: + * + * MicroZAP: most efficient for attributes with short names (up to 49 + * characters, 50 bytes including NULL ('\0')) and a single 8-byte value. + * Fixed 64-byte chunk layout. The ZAP should be efficient enough * that the user does not need to cache these attributes. * + * TinyZAP: used when an entry cannot fit MicroZAP, i.e. when either + * condition is true: + * - num_integers > 1 (value too wide for MicroZAP), OR + * - strlen(key) >= MZAP_NAME_LEN (name too long for MicroZAP) + * AND at least one chunk size (64/128/256 bytes) can accommodate the + * entry. The chunk size and stride are stamped automatically on the + * first zap_add(). No create-time hint is required. + * + * FatZAP: used for all other cases, or when the ZAP grows beyond the + * capacity of a single block. Supports arbitrary name/value sizes. + * * The ZAP's locking scheme makes its routines thread-safe. Operations * on different zapobjs will be processed concurrently. Operations on * the same zapobj which only read data will be processed concurrently. @@ -181,7 +195,7 @@ int zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx); /* - * All operations on a zapobj take either the the objset/objectid pair + * All operations on a zapobj take either the objset/objectid pair * that "names" the object, or an existing dnode_t for the object. The * zapobj passed in must be a valid ZAP object. */ @@ -263,7 +277,7 @@ int zap_contains_by_dnode(dnode_t *dn, const char *name); /* * Prefetch the blocks within the ZAP where the given key is stored. The - * prefetch IO will occure in the background. + * prefetch IO will occur in the background. */ int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name); @@ -538,6 +552,20 @@ typedef struct zap_stats { uint64_t zs_num_entries; /* The number of zap entries */ uint64_t zs_salt; /* salt to stir into hash function */ + /* + * TinyZAP statistics. Only meaningful when zs_is_tinyzap is B_TRUE. + * + * zs_is_tinyzap: B_TRUE if MZAP_FLAG_TINYZAP is set. + * zs_tinyzap_stride: value width in bytes (8..255, mult of 8). + * zs_tinyzap_chunk: chunk size in bytes (1 << mz_chunk_shift). + * zs_tinyzap_flags: raw mz_flags uint8 (for zdb diagnostics). + */ + boolean_t zs_is_tinyzap; + uint64_t zs_tinyzap_stride; /* value width: 8..255 bytes */ + uint64_t zs_tinyzap_chunk; /* chunk size: 64 / 128 / 256 */ + uint64_t zs_tinyzap_num_chunks; /* number of chunks used */ + uint64_t zs_tinyzap_flags; /* raw mz_flags for zdb */ + /* * Histograms. For all histograms, the last index * (ZAP_HISTOGRAM_SIZE-1) includes any values which are greater diff --git a/include/sys/zap_impl.h b/include/sys/zap_impl.h index ea8963f550fc..fa7a41e0f25b 100644 --- a/include/sys/zap_impl.h +++ b/include/sys/zap_impl.h @@ -27,6 +27,7 @@ * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2024, Klara, Inc. * Copyright (c) 2026, TrueNAS. + * Copyright (c) 2026, Hewlett Packard Enterprise Development LP. */ #ifndef _SYS_ZAP_IMPL_H @@ -50,6 +51,135 @@ extern int fzap_default_block_shift; #define ZAP_NEED_CD (-1U) +/* + * Flag to detect TinyZAP, a MicroZAP variant. + */ +#define MZAP_FLAG_TINY (1 << 0) + +/* + * TinyZAP: a variable-stride, variable-chunk-size variant of MicroZAP. + * + * MZAP_FLAG_TINY in mz_flags distinguishes TinyZAP from plain MicroZAP. + * Chunk size and value width are stored in mz_chunk_shift / mz_value_ints. + * + * mzap_phys_t header layout (64 bytes total = MZAP_ENT_LEN): + * [ 0.. 7] mz_block_type uint64_t ZBT_MICRO + * [ 8.. 15] mz_salt uint64_t + * [ 16.. 23] mz_normflags uint64_t + * [ 24] mz_flags uint8_t MZAP_FLAG_TINY + * [ 25] mz_chunk_shift uint8_t log2(chunk): 6=64, 7=128, 8=256 + * [ 26] mz_value_ints uint8_t num_integers; stride = *8 + * [ 27] mz_pad1 uint8_t zero + * [ 28.. 31] mz_pad2 uint32_t zero + * [ 32.. 63] mz_pad3[4] uint64_t zero + * + * Supported chunk sizes and resulting geometry (examples only). + * name_len = chunk - stride - 4 (stride = mz_value_ints * 8) + * + * chunk | stride | name_len | integers | use-case + * ------+--------+----------+----------+-------------------------------------- + * 64 | 8 | 52 | 1 | 1×uint64, name up to 51 chars + * 64 | 16 | 44 | 2 | 2×uint64 (Lustre FID) + * 64 | 24 | 36 | 3 | 3×uint64 + * 64 | 32 | 28 | 4 | 4×uint64 + * 64 | 56 | 4 | 7 | max stride for chunk=64 + * 128 | 8 | 116 | 1 | 1×uint64 + long name + * 128 | 16 | 108 | 2 | 2×uint64 + long name + * 128 | 48 | 76 | 6 | 6×uint64 (3×Lustre FID) + * 128 | 120 | 4 | 15 | max stride for chunk=128 + * 256 | 8 | 244 | 1 | 1×uint64 + very long name + * 256 | 16 | 236 | 2 | 2×uint64 + very long name + * 256 | 128 | 124 | 16 | 16×uint64 (wide value, medium name) + * 256 | 248 | 4 | 31 | max stride for chunk=256 + * ... + * + * Note: stride=8 with chunk=64 is skipped by tzap_try_promote() because + * it provides only 2 bytes more than MicroZAP. Chunk=128 is the minimum + * for stride=8. chunk=64 is only used when stride >= 16 (num_integers > 1). + */ + +/* + * TinyZAP chunk table: the three supported chunk sizes in bytes. + * chunk_id 0=64B, 1=128B, 2=256B. + */ +#define TZAP_CHUNK_SIZES 3 +extern const uint16_t tzap_chunk_table[TZAP_CHUNK_SIZES]; + +/* chunk size constants */ +#define TZAP_MIN_CHUNK_LOG2 6 /* 64 bytes, backward compat */ +#define TZAP_MAX_CHUNK_LOG2 8 /* 256 bytes */ +#define TZAP_MIN_CHUNK (1U << TZAP_MIN_CHUNK_LOG2) /* 64 */ +#define TZAP_MAX_CHUNK (1U << TZAP_MAX_CHUNK_LOG2) /* 256 */ + +/* stride constants: min stride across ALL chunk sizes */ +#define TZAP_MIN_STRIDE 8 /* 1×uint64 minimum */ +#define TZAP_MIN_NAME_LEN 4 /* min bytes reserved for name string */ + +/* + * Max stride is chunk dependent: + * TZAP_MAX_STRIDE(chunk) = chunk - sizeof (uint32_t) - TZAP_MIN_NAME_LEN + * Use tzap_max_stride(chunk) inline below. + */ + +#define MZAP_IS_TINYZAP(phys) \ + (((phys)->mz_flags & MZAP_FLAG_TINY) != 0) + +#define MZAP_CHUNK_SIZE(phys) \ + ((uint16_t)(1U << (phys)->mz_chunk_shift)) + +#define MZAP_STRIDE(phys) \ + ((uint16_t)((phys)->mz_value_ints * sizeof (uint64_t))) + +/* + * Name length available in a TinyZAP chunk: + * chunk - stride - sizeof (uint32_t cd) + * Both chunk and stride are required, chunk is no longer fixed. + */ +#define TZAP_NAME_LEN(chunk, stride) \ + ((uint16_t)((chunk) - (stride) - sizeof (uint32_t))) + +/* + * tzap_max_stride: maximum value width for a given chunk size. + * Leaves at least TZAP_MIN_NAME_LEN bytes for the name field. + */ +static inline uint16_t +tzap_max_stride(uint16_t chunk) +{ + return ((uint16_t)((chunk) - sizeof (uint32_t) - TZAP_MIN_NAME_LEN)); +} + +/* + * TinyZAP physical entry. + * + * Size is variable (chunk bytes: 64, 128, or 256). + * NEVER stack-allocate: always access via TZE_PHYS() pointer cast. + * + * [0 .. stride-1] value blob (mz_value_ints × uint64) + * [stride .. stride+3] cd (uint32_t) + * [stride+4 .. chunk-1] name (TZAP_NAME_LEN(chunk,stride) bytes) + */ +typedef struct tzap_ent_phys { + uint8_t tze_data[0]; /* zero-length array */ +} tzap_ent_phys_t; + +static inline uint8_t * +tze_value(tzap_ent_phys_t *tze) +{ + return (tze->tze_data); +} + +static inline uint32_t * +tze_cd_ptr(tzap_ent_phys_t *tze, uint16_t stride) +{ + return ((uint32_t *)(tze->tze_data + stride)); +} + +static inline char * +tze_name_ptr(tzap_ent_phys_t *tze, uint16_t stride) +{ + return ((char *)(tze->tze_data + stride + sizeof (uint32_t))); +} + typedef struct mzap_ent_phys { uint64_t mze_value; uint32_t mze_cd; @@ -57,12 +187,21 @@ typedef struct mzap_ent_phys { char mze_name[MZAP_NAME_LEN]; } mzap_ent_phys_t; +/* + * MicroZAP / TinyZAP on-disk header. + * Total size = MZAP_ENT_LEN (64 bytes). + */ typedef struct mzap_phys { uint64_t mz_block_type; /* ZBT_MICRO */ uint64_t mz_salt; uint64_t mz_normflags; - uint64_t mz_pad[5]; - + uint8_t mz_flags; /* MZAP_FLAG_TINY */ + /* log2(chunk) for TinyZAP, 0 = MicroZAP */ + uint8_t mz_chunk_shift; + uint8_t mz_value_ints; /* num_integers; stride = *8 */ + uint8_t mz_pad1; /* zero */ + uint32_t mz_pad2; /* zero */ + uint64_t mz_pad3[4]; /* zero */ /* actually variable size depending on block size */ mzap_ent_phys_t mz_chunk[]; } mzap_phys_t; @@ -76,6 +215,15 @@ typedef struct mzap_ent { #define MZE_PHYS(zap, mze) \ (&zap_m_phys(zap)->mz_chunk[(mze)->mze_chunkid]) +/* + * TinyZAP accessor: byte-offset into mz_chunk[] raw bytes using + * variable chunk size. mze_chunkid is a slot index, not a byte offset. + * Only valid when zap->zap_m.zap_stride != 0. + */ +#define TZE_PHYS(zap, mze) \ + ((tzap_ent_phys_t *)((uint8_t *)zap_m_phys(zap)->mz_chunk + \ + (size_t)(mze)->mze_chunkid * (zap)->zap_m.zap_chunk_size)) + /* * The (fat) zap is stored in one object. It is an array of * 1<= 16) + * 128 = TinyZAP 128-byte chunk + * 256 = TinyZAP 256-byte chunk + * Set alongside zap_stride by tzap_try_promote(). + * Entries accessed via TZE_PHYS() / tzap_ent_phys_t. + */ + uint16_t zap_chunk_size; zfs_btree_t zap_tree; } zap_micro; } zap_u; @@ -291,6 +459,107 @@ void mzap_addent(zap_name_t *zn, uint64_t value); void mzap_byteswap(mzap_phys_t *buf, size_t size); uint64_t zap_get_micro_max_size(spa_t *spa); +/* + * MicroZAP in-memory tree helpers, also used by zap_tiny.c + */ +uint32_t mze_find_unused_cd(zap_t *zap, uint64_t hash); +void mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash); + +/* + * Shared between zap_tiny.c and zap_micro.c for deferred + * spa_feature_incr/decr via dsl_sync_task. + */ +typedef struct { + spa_t *tfa_spa; +} tzap_feature_arg_t; + +void tzap_feature_incr_sync(void *arg, dmu_tx_t *tx); +void tzap_feature_decr_sync(void *arg, dmu_tx_t *tx); +void tzap_feature_incr_cb(void *arg, int error); +void tzap_feature_decr_cb(void *arg, int error); + +/* + * tzap_try_promote() - stamp TinyZAP geometry on-disk and promote + * this MicroZAP to TinyZAP on the first qualifying add. + * + * Called from zap_add_by_dnode() when: + * - the entry does not fit plain MicroZAP constraints, AND + * - the ZAP has no committed entries with a different geometry. + * + * Validates integer_size, stride, and key length inline, then + * selects the smallest chunk size (64->128->256) that fits: + * - stride=8 (num_integers=1, long key): starts at chunk=128, not 64 + * - stride>=16 (num_integers>1): starts at chunk=64 + * then writes three independent uint8_t fields on-disk: + * mz_flags |= MZAP_FLAG_TINY + * mz_chunk_shift = log2(chunk) (6, 7, or 8) + * mz_value_ints = stride / 8 + * + * Updates in-memory state: + * zap->zap_m.zap_stride = stride + * zap->zap_m.zap_chunk_size = chunk (64, 128, or 256) + * zap->zap_m.zap_num_chunks = block_size / chunk + * + * Feature flag: chunk > 64 requires SPA_FEATURE_TINY_ZAP. + * - stride>=16 + feature absent: falls back to chunk=64 if stride fits, + * otherwise returns B_FALSE -> FatZAP upgrade. + * - stride=8 + feature absent: chunk=64 is always skipped for stride=8, + * so returns B_FALSE immediately -> FatZAP upgrade. + * + * On any failure: clears MZAP_FLAG_TINY from mz_flags and returns B_FALSE. + */ +boolean_t tzap_try_promote(zap_t *zap, int integer_size, + uint64_t num_integers, const char *key, dmu_tx_t *tx); + +void tzap_reencode_micro_to_tiny(zap_t *zap, uint16_t chunk, dmu_tx_t *tx); + +boolean_t tzap_try_chunk_upgrade(zap_t *zap, uint16_t stride, + size_t keylen, dmu_tx_t *tx); + +int tzap_upgrade_chunk(zap_t *zap, uint16_t new_chunk, dmu_tx_t *tx); + +/* + * tzap_addent() - write an entry into an active TinyZAP chunk slot. + * zap_stride and zap_chunk_size must already be stamped by + * tzap_try_promote(). Caller must hold the write lock and dirty the dbuf. + */ +void tzap_addent(zap_name_t *zn, const void *val); + +/* + * tzap_lookup() - retrieve a value from a TinyZAP entry. + */ +int tzap_lookup(zap_t *zap, mzap_ent_t *mze, uint64_t integer_size, + uint64_t num_integers, void *buf, char *realname, int rn_len, + boolean_t *ncp); + +/* + * tzap_cursor_fill() - populate a zap_attribute_t from a TinyZAP btree entry + * during cursor iteration. + */ +void tzap_cursor_fill(zap_cursor_t *zc, mzap_ent_t *mze, + zap_attribute_t *za); + +/* + * tzap_upgrade_entries() - re-encode all variable-stride TinyZAP entries + * into FatZAP leaf blocks during mzap_upgrade(). + * Reads geometry from mzap_phys_t fields directly: + * mz_value_ints -> stride + * mz_chunk_shift -> log2(chunk) + */ +int tzap_upgrade_entries(mzap_phys_t *mzp, size_t db_size, + zap_name_t *zn, dmu_tx_t *tx); + +void tzap_get_stats(zap_t *zap, zap_stats_t *zs); + +#ifdef ZFS_DEBUG +#define TZAP_VERIFY_PHYS(zap) \ + tzap_verify_phys(__FUNCTION__, zap) +#else +#define TZAP_VERIFY_PHYS(zap) (B_TRUE) +#endif + +extern boolean_t tzap_verify_phys(const char *caller, zap_t *zap); + /* Fatzap implementation. */ void fzap_byteswap(void *buf, size_t size); int fzap_count(zap_t *zap, uint64_t *count); diff --git a/include/zfeature_common.h b/include/zfeature_common.h index 64606de226b0..928063e6eea0 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -26,6 +26,7 @@ * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2026, Hewlett Packard Enterprise Development LP. */ #ifndef _ZFEATURE_COMMON_H @@ -91,6 +92,7 @@ typedef enum spa_feature { SPA_FEATURE_BLOCK_CLONING_ENDIAN, SPA_FEATURE_PHYSICAL_REWRITE, SPA_FEATURE_DRAID_FAIL_DOMAINS, + SPA_FEATURE_TINYZAP, SPA_FEATURES } spa_feature_t; diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 3f88f2fb83d3..242e4ef7cf23 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -691,7 +691,7 @@ - + @@ -6702,7 +6702,8 @@ - + + @@ -10082,8 +10083,8 @@ - - + + @@ -10144,7 +10145,7 @@ - + diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index 4cd003a718b2..f60eb9ce77dd 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -1489,6 +1489,12 @@ + + + + + + @@ -1503,6 +1509,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1577,6 +1670,8 @@ + + @@ -1592,6 +1687,14 @@ + + + + + + + + diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 22c7ceaa1ba4..1e2b38ec6e96 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -170,6 +170,7 @@ nodist_libzpool_la_SOURCES = \ module/zfs/zap_impl.c \ module/zfs/zap_leaf.c \ module/zfs/zap_micro.c \ + module/zfs/zap_tiny.c \ module/zfs/zcp.c \ module/zfs/zcp_get.c \ module/zfs/zcp_global.c \ diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7 index 6221cfdfda9a..6d83ebaf7ab4 100644 --- a/man/man7/zpool-features.7 +++ b/man/man7/zpool-features.7 @@ -18,6 +18,7 @@ .\" Copyright (c) 2019, 2023, 2024, Klara, Inc. .\" Copyright (c) 2019, Allan Jude .\" Copyright (c) 2021, Colm Buckley +.\" Copyright (c) 2026, Hewlett Packard Enterprise Development LP. .\" .Dd July 23, 2025 .Dt ZPOOL-FEATURES 7 @@ -775,6 +776,39 @@ module parameter. See .Xr zfs 4 . . +.feature com.hpe tinyzap no extensible_dataset +This feature enables the +.Sy TinyZAP +on-disk format, a compact variant of MicroZAP that supports multi-word +values and longer key names without upgrading to a FatZAP. +.Pp +Plain MicroZAP entries are limited to a single 8-byte value and a +50-byte key name. +TinyZAP extends this by packing entries into fixed-size chunks of 64, 128, +or 256 bytes. +The chunk size is chosen automatically the first time an entry is added to +the ZAP object, based on the size of the value and the length of the key. +Depending on the chunk size selected, TinyZAP supports values up to +16 eight-byte words and key names up to 238 bytes. +.Pp +Once the chunk size is chosen for a ZAP object it remains fixed. +A plain MicroZAP is automatically promoted to TinyZAP when the first entry +does not fit the MicroZAP constraints +.Pq e.g.\& a value wider than 8 bytes or a key name longer than 50 bytes . +If a later entry has a different size than the original, the ZAP object is +upgraded to a FatZAP to accommodate it. +.Pp +This feature is not read-only compatible. +A pool that contains TinyZAP objects cannot be imported by software that +does not support this feature, even in read-only mode. +.Pp +This feature becomes +.Sy active +the first time a TinyZAP object is created on the pool. +It will return to being +.Sy enabled +when all TinyZAP objects have been removed or upgraded to FatZAP. +. .feature com.delphix livelist yes extensible_dataset This feature allows clones to be deleted faster than the traditional method when a large number of random/sparse writes have been made to the clone. diff --git a/module/Kbuild.in b/module/Kbuild.in index fa4085c84b0c..c1e586dcee8b 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -419,6 +419,7 @@ ZFS_OBJS := \ zap_impl.o \ zap_leaf.o \ zap_micro.o \ + zap_tiny.o \ zcp.o \ zcp_get.o \ zcp_global.o \ diff --git a/module/Makefile.bsd b/module/Makefile.bsd index a0ddbeb9ae68..f43a21d50497 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -355,6 +355,7 @@ SRCS+= abd.c \ zap_impl.c \ zap_leaf.c \ zap_micro.c \ + zap_tiny.c \ zcp.c \ zcp_get.c \ zcp_global.c \ diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index 2bb19c0cf5fd..3550b8f3e086 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -28,6 +28,7 @@ * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, 2024, Klara, Inc. * Copyright (c) 2019, Allan Jude + * Copyright (c) 2026, Hewlett Packard Enterprise Development LP. */ #ifndef _KERNEL @@ -823,6 +824,13 @@ zpool_feature_init(void) ZFEATURE_TYPE_BOOLEAN, physical_rewrite_deps, sfeatures); } + zfeature_register(SPA_FEATURE_TINYZAP, + "com.hpe:tinyzap", "tinyzap", + "Support for variable-stride, variable-chunk ZAP for " + "multi-integer and long-name directory entries without " + "FatZAP overhead.", + ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures); + zfs_mod_list_supported_free(sfeatures); } diff --git a/module/zfs/zap.c b/module/zfs/zap.c index caed9c677942..b97bfafcd7d1 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -27,6 +27,7 @@ * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2024, Klara, Inc. * Copyright (c) 2026, TrueNAS. + * Copyright (c) 2026, Hewlett Packard Enterprise Development LP. */ #include @@ -247,6 +248,10 @@ zap_lookup_norm_by_dnode(dnode_t *dn, const char *name, mzap_ent_t *mze = mze_find(zn, &idx); if (mze == NULL) { err = SET_ERROR(ENOENT); + } else if (zap->zap_m.zap_stride != 0) { + /* TinyZAP: delegate to tzap_lookup() */ + err = tzap_lookup(zap, mze, integer_size, num_integers, + buf, realname, rn_len, ncp); } else { if (num_integers < 1) { err = SET_ERROR(EOVERFLOW); @@ -476,7 +481,12 @@ zap_add_by_dnode(dnode_t *dn, const char *key, if (err != 0) return (err); - const uint64_t *intval = val; + /* if stride is set, the TINY bit must be present on-disk */ + if (zap->zap_ismicro && zap->zap_m.zap_stride != 0) { + ASSERT(MZAP_IS_TINYZAP(zap_m_phys(zap))); + ASSERT3U(MZAP_STRIDE(zap_m_phys(zap)), ==, + zap->zap_m.zap_stride); + } zap_name_t *zn = zap_name_alloc_str(zap, key, 0); if (zn == NULL) { zap_unlock(zap, FTAG); @@ -484,19 +494,134 @@ zap_add_by_dnode(dnode_t *dn, const char *key, } if (!zap->zap_ismicro) { err = fzap_add(zn, integer_size, num_integers, val, tx); - } else if (integer_size != 8 || num_integers != 1 || - strlen(key) >= MZAP_NAME_LEN || - !mze_canfit_fzap_leaf(zn, zn->zn_hash)) { - err = mzap_upgrade(&zn->zn_zap, tx, 0); - if (err == 0) { - err = fzap_add(zn, integer_size, num_integers, val, tx); + zap = zn->zn_zap; /* fzap_add() may change zap */ + } else if (zap->zap_m.zap_stride != 0) { + /* + * TinyZAP: Entry must exactly match the stamped stride. + * Any mismatch (different num_integers, key too + * long, or leaf won't fit) forces FatZAP upgrade. + */ + uint16_t stride = zap->zap_m.zap_stride; + uint16_t chunk = zap->zap_m.zap_chunk_size; + + if (integer_size != 8 || + num_integers != stride / sizeof (uint64_t)) { + /* Stride mismatch or !re-encode upgrade to FatZAP */ + dprintf("obj %llu: TinyZAP mismatch: " + "intsz=%d numints=%llu keylen=%zu stride=%u " + "chunk=%u upgrading to FatZAP\n", + (u_longlong_t)zap->zap_object, integer_size, + (u_longlong_t)num_integers, strlen(key), + stride, chunk); + err = mzap_upgrade(&zn->zn_zap, tx, 0); + if (err == 0) + err = fzap_add(zn, integer_size, num_integers, + val, tx); + zap = zn->zn_zap; /* fzap_add() may change zap */ + } else if (strlen(key) >= TZAP_NAME_LEN(chunk, stride)) { + if (!tzap_try_chunk_upgrade(zap, stride, + strlen(key), tx)) { + /* name too long & no chunk fits, upgrade */ + err = mzap_upgrade(&zn->zn_zap, tx, 0); + if (err == 0) + err = fzap_add(zn, integer_size, + num_integers, val, tx); + zap = zn->zn_zap; + } else { + /* + * Chunk upgrade succeeded. Refresh chunk and + * num_chunks from the updated zap_m fields. + */ + chunk = zap->zap_m.zap_chunk_size; + if (zap->zap_m.zap_num_entries >= + zap->zap_m.zap_num_chunks) { + /* full after chunk upgrade, upgrade */ + err = mzap_upgrade(&zn->zn_zap, tx, 0); + if (err == 0) + err = fzap_add(zn, + integer_size, + num_integers, val, tx); + zap = zn->zn_zap; + } else { + /* TinyZAP: delegate to tzap_add() */ + zfs_btree_index_t idx; + if (mze_find(zn, &idx) != NULL) + err = SET_ERROR(EEXIST); + else + tzap_addent(zn, val); + } + } + } else if (zap->zap_m.zap_num_entries >= + zap->zap_m.zap_num_chunks) { + /* full after possible chunk upgrade -> FatZAP */ + err = mzap_upgrade(&zn->zn_zap, tx, 0); + if (err == 0) + err = fzap_add(zn, integer_size, + num_integers, val, tx); + zap = zn->zn_zap; + } else { + /* TinyZAP: delegate to tzap_add() */ + zfs_btree_index_t idx; + if (mze_find(zn, &idx) != NULL) + err = SET_ERROR(EEXIST); + else + tzap_addent(zn, val); } } else { - zfs_btree_index_t idx; - if (mze_find(zn, &idx) != NULL) { - err = SET_ERROR(EEXIST); + /* + * Plain MicroZAP: no hint, no stride. + * Fast path: entry fits plain MicroZAP constraints. + * - integer_size == 8, num_integers == 1 + * - strlen(key) < MZAP_NAME_LEN + * - leaf can fit the new entry (mze_canfit_fzap_leaf()) + */ + if (integer_size == 8 && num_integers == 1 && + strlen(key) < MZAP_NAME_LEN && + mze_canfit_fzap_leaf(zn, zn->zn_hash)) { + zfs_btree_index_t idx; + if (mze_find(zn, &idx) != NULL) { + err = SET_ERROR(EEXIST); + } else { + mzap_addent(zn, *(const uint64_t *)val); + } } else { - mzap_addent(zn, *intval); + /* + * Entry does not fit plain MicroZAP. + * + * Auto-promotion to TinyZAP: + * stride=8 (long-name trigger): promote even on + * populated ZAP, tzap_reencode_micro_to_tiny() + * re-encodes existing entries. + * tzap_try_promote() validates and stamps the stride + * and chunk size based on the first add geometry. + */ + uint16_t stride = + (uint16_t)(num_integers * sizeof (uint64_t)); + boolean_t can_promote = (stride == 8 || + (zap->zap_m.zap_num_entries == 0)); + + if (can_promote && + tzap_try_promote(zap, integer_size, + num_integers, key, tx)) { + zfs_btree_index_t idx; + if (mze_find(zn, &idx) != NULL) { + err = SET_ERROR(EEXIST); + } else { + tzap_addent(zn, val); + } + } else { + /* + * tzap_try_promote() failed or ZAP already has + * entries with different geometry. + * Upgrade to FatZAP unconditionally. + */ + err = mzap_upgrade(&zn->zn_zap, tx, 0); + if (err == 0) + err = fzap_add(zn, integer_size, + num_integers, val, tx); + /* fzap_add() may change zap */ + zap = zn->zn_zap; + } } } ASSERT(zap == zn->zn_zap); @@ -580,17 +705,94 @@ zap_update_by_dnode(dnode_t *dn, const char *name, int integer_size, } if (!zap->zap_ismicro) { err = fzap_update(zn, integer_size, num_integers, val, tx); - } else if (integer_size != 8 || num_integers != 1 || + } else if (zap->zap_m.zap_stride != 0) { + /* + * TinyZAP: Update must match the stamped geometry. + * - integer_size must be 8 + * - num_integers must equal stride / 8 + * - key must fit in TZAP_NAME_LEN(chunk, stride) + * Any mismatch forces a FatZAP upgrade. + */ + uint16_t stride = zap->zap_m.zap_stride; + uint16_t chunk = zap->zap_m.zap_chunk_size; + + if (integer_size != 8 || + num_integers != stride / sizeof (uint64_t)) { + dprintf("obj %llu: TinyZAP update mismatch: " + "intsz=%d numints=%llu keylen=%zu stride=%u " + "chunk=%u upgrading to FatZAP\n", + (u_longlong_t)dn->dn_object, integer_size, + (u_longlong_t)num_integers, strlen(name), + stride, chunk); + err = mzap_upgrade(&zn->zn_zap, tx, 0); + if (err == 0) + err = fzap_update(zn, integer_size, + num_integers, val, tx); + /* fzap_update() may change zap */ + zap = zn->zn_zap; + } else if (strlen(name) >= TZAP_NAME_LEN(chunk, stride) && + !tzap_try_chunk_upgrade(zap, stride, strlen(name), tx)) { + /* name too long & no chunk fits, upgrade to FatZAP */ + err = mzap_upgrade(&zn->zn_zap, tx, 0); + if (err == 0) + err = fzap_update(zn, integer_size, + num_integers, val, tx); + zap = zn->zn_zap; + } else { + /* TinyZAP: delegate to tzap_update() */ + zfs_btree_index_t idx; + mzap_ent_t *mze = mze_find(zn, &idx); + if (mze != NULL) { + /* Overwrite value at TZE slot */ + memcpy(tze_value(TZE_PHYS(zap, mze)), val, + stride); + } else if (zap->zap_m.zap_num_entries >= + zap->zap_m.zap_num_chunks) { + /* full after possible chunk upgrade, upgrade */ + err = mzap_upgrade(&zn->zn_zap, tx, 0); + if (err == 0) + err = fzap_update(zn, integer_size, + num_integers, val, tx); + zap = zn->zn_zap; + } else { + /* Add new entry */ + tzap_addent(zn, val); + } + } + } else if ((integer_size != 8 || num_integers != 1) || strlen(name) >= MZAP_NAME_LEN) { - dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", - (u_longlong_t)dn->dn_object, integer_size, - (u_longlong_t)num_integers, name); - err = mzap_upgrade(&zn->zn_zap, tx, 0); - if (err == 0) { - err = fzap_update(zn, integer_size, num_integers, - val, tx); + uint16_t stride = (uint16_t)(num_integers * sizeof (uint64_t)); + boolean_t can_promote = (stride == 8 || + (zap->zap_m.zap_num_entries == 0)); + if (can_promote && + tzap_try_promote(zap, integer_size, + num_integers, name, tx)) { + /* promote to TinyZAP and delegate to tzap_update() */ + zfs_btree_index_t idx; + mzap_ent_t *mze = mze_find(zn, &idx); + if (mze != NULL) { + /* Overwrite value at TZE slot */ + memcpy(tze_value(TZE_PHYS(zap, mze)), val, + stride); + } else { + /* Add new entry */ + tzap_addent(zn, val); + } + zap = zn->zn_zap; + } else { + /* MicroZAP: entry doesn't fit. Upgrade to FatZAP */ + dprintf("upgrading obj %llu: intsz=%d " + "numints=%llu name=%s\n", + (u_longlong_t)dn->dn_object, integer_size, + (u_longlong_t)num_integers, name); + err = mzap_upgrade(&zn->zn_zap, tx, 0); + if (err == 0) + err = fzap_update(zn, integer_size, + num_integers, val, tx); + zap = zn->zn_zap; } } else { + /* Plain MicroZAP: update/insert directly */ zfs_btree_index_t idx; mzap_ent_t *mze = mze_find(zn, &idx); if (mze != NULL) { @@ -682,10 +884,19 @@ zap_length_by_dnode(dnode_t *dn, const char *name, uint64_t *integer_size, if (mze == NULL) { err = SET_ERROR(ENOENT); } else { - if (integer_size) + if (integer_size != NULL) *integer_size = 8; - if (num_integers) - *num_integers = 1; + if (num_integers != NULL) { + if (zap->zap_m.zap_stride != 0) { + /* TinyZAP: variable chunk size */ + *num_integers = + zap->zap_m.zap_stride / + sizeof (uint64_t); + } else { + /* Plain MicroZAP: fixed chunk size */ + *num_integers = 1; + } + } } } zap_name_free(zn); @@ -768,7 +979,15 @@ zap_remove_norm_by_dnode(dnode_t *dn, const char *name, matchtype_t mt, err = SET_ERROR(ENOENT); } else { zap->zap_m.zap_num_entries--; - memset(MZE_PHYS(zap, mze), 0, sizeof (mzap_ent_phys_t)); + if (zap->zap_m.zap_stride != 0) { + /* TinyZAP: variable chunk size */ + memset(TZE_PHYS(zap, mze), 0, + zap->zap_m.zap_chunk_size); + } else { + /* Plain MicroZAP: fixed chunk size */ + memset(MZE_PHYS(zap, mze), 0, + sizeof (mzap_ent_phys_t)); + } zfs_btree_remove_idx(&zap->zap_m.zap_tree, &idx); } } @@ -1166,16 +1385,26 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) &idx, &idx); } if (mze) { - mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); - ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); - za->za_normalization_conflict = - mzap_normalization_conflict(zc->zc_zap, NULL, - mze, &idx); - za->za_integer_length = 8; - za->za_num_integers = 1; - za->za_first_integer = mzep->mze_value; - (void) strlcpy(za->za_name, mzep->mze_name, - za->za_name_len); + if (zc->zc_zap->zap_m.zap_stride != 0) { + /* TinyZAP: variable chunk size */ + tzap_cursor_fill(zc, mze, za); + za->za_normalization_conflict = + mzap_normalization_conflict(zc->zc_zap, + NULL, mze, &idx); + } else { + /* Plain MicroZAP: fixed chunk size */ + mzap_ent_phys_t *mzep = + MZE_PHYS(zc->zc_zap, mze); + ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); + za->za_normalization_conflict = + mzap_normalization_conflict(zc->zc_zap, + NULL, mze, &idx); + za->za_integer_length = 8; + za->za_num_integers = 1; + za->za_first_integer = mzep->mze_value; + (void) strlcpy(za->za_name, mzep->mze_name, + za->za_name_len); + } zc->zc_hash = (uint64_t)mze->mze_hash << 32; zc->zc_cd = mze->mze_cd; err = 0; @@ -1235,6 +1464,8 @@ zap_get_stats_by_dnode(dnode_t *dn, zap_stats_t *zs) zs->zs_blocksize = zap->zap_dbuf->db_size; zs->zs_num_entries = zap->zap_m.zap_num_entries; zs->zs_num_blocks = 1; + if (MZAP_IS_TINYZAP(zap_m_phys(zap))) + tzap_get_stats(zap, zs); /* Populate TinyZAP fields */ } else { fzap_get_stats(zap, zs); } diff --git a/module/zfs/zap_impl.c b/module/zfs/zap_impl.c index 0c2ba1cdbfe7..7d146518745d 100644 --- a/module/zfs/zap_impl.c +++ b/module/zfs/zap_impl.c @@ -27,6 +27,7 @@ * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2024, Klara, Inc. * Copyright (c) 2026, TrueNAS. + * Copyright (c) 2026, Hewlett Packard Enterprise Development LP. */ #include @@ -351,8 +352,16 @@ zap_lock_impl(dnode_t *dn, dmu_buf_t *db, dmu_tx_t *tx, return (err); } VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx)); - zap->zap_m.zap_num_chunks = - db->db_size / MZAP_ENT_LEN - 1; + if (zap->zap_m.zap_stride != 0 && + zap->zap_m.zap_chunk_size != 0) { + /* Fully promoted TinyZAP */ + zap->zap_m.zap_num_chunks = + (int16_t)((newsz - MZAP_ENT_LEN) / + zap->zap_m.zap_chunk_size); + } else { + zap->zap_m.zap_num_chunks = + newsz / MZAP_ENT_LEN - 1; + } if (newsz > SPA_OLD_MAXBLOCKSIZE) { dsl_dataset_t *ds = dmu_objset_ds(os); diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index a7c9c9c03b45..393cc62d2d92 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -26,6 +26,7 @@ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2024, Klara, Inc. + * Copyright (c) 2026, Hewlett Packard Enterprise Development LP. */ #include @@ -39,6 +40,8 @@ #include #include #include +#include +#include #ifdef _KERNEL #include @@ -87,12 +90,40 @@ mzap_byteswap(mzap_phys_t *buf, size_t size) buf->mz_block_type = BSWAP_64(buf->mz_block_type); buf->mz_salt = BSWAP_64(buf->mz_salt); buf->mz_normflags = BSWAP_64(buf->mz_normflags); - int max = (size / MZAP_ENT_LEN) - 1; + boolean_t tinyzap = MZAP_IS_TINYZAP(buf); + uint16_t stride = tinyzap ? MZAP_STRIDE(buf) : 0; + uint16_t chunk = MZAP_ENT_LEN; /* default: MicroZAP */ + + if (tinyzap && stride != 0) { + ASSERT3U(buf->mz_chunk_shift, >=, TZAP_MIN_CHUNK_LOG2); + ASSERT3U(buf->mz_chunk_shift, <=, TZAP_MAX_CHUNK_LOG2); + chunk = MZAP_CHUNK_SIZE(buf); + ASSERT3U(stride, >=, TZAP_MIN_STRIDE); + ASSERT3U(stride, <=, tzap_max_stride(chunk)); + } + + /* Number of entry slots after the 64-byte header. */ + int max = (int)((size - MZAP_ENT_LEN) / chunk); + for (int i = 0; i < max; i++) { - buf->mz_chunk[i].mze_value = - BSWAP_64(buf->mz_chunk[i].mze_value); - buf->mz_chunk[i].mze_cd = - BSWAP_32(buf->mz_chunk[i].mze_cd); + if (tinyzap && stride != 0) { + tzap_ent_phys_t *tze = (tzap_ent_phys_t *)( + (uint8_t *)buf->mz_chunk + (size_t)i * chunk); + /* + * Byteswap each uint64 in the value blob, then the + * cd (uint32). The name (char array) needs no swap. + */ + uint64_t *val = (uint64_t *)tze_value(tze); + for (int j = 0; j < stride / sizeof (uint64_t); j++) + val[j] = BSWAP_64(val[j]); + *tze_cd_ptr(tze, stride) = + BSWAP_32(*tze_cd_ptr(tze, stride)); + } else { + buf->mz_chunk[i].mze_value = + BSWAP_64(buf->mz_chunk[i].mze_value); + buf->mz_chunk[i].mze_cd = + BSWAP_32(buf->mz_chunk[i].mze_cd); + } } } @@ -110,7 +141,7 @@ mze_compare(const void *arg1, const void *arg2) ZFS_BTREE_FIND_IN_BUF_FUNC(mze_find_in_buf, mzap_ent_t, mze_compare) -static void +void mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash) { mzap_ent_t mze; @@ -121,9 +152,18 @@ mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash) mze.mze_chunkid = chunkid; ASSERT0(hash & 0xffffffff); mze.mze_hash = hash >> 32; - ASSERT3U(MZE_PHYS(zap, &mze)->mze_cd, <=, 0xffff); - mze.mze_cd = (uint16_t)MZE_PHYS(zap, &mze)->mze_cd; - ASSERT(MZE_PHYS(zap, &mze)->mze_name[0] != 0); + + if (zap->zap_m.zap_stride != 0) { + tzap_ent_phys_t *tze = TZE_PHYS(zap, &mze); + uint16_t stride = zap->zap_m.zap_stride; + ASSERT3U(*tze_cd_ptr(tze, stride), <=, 0xffff); + mze.mze_cd = (uint16_t)*tze_cd_ptr(tze, stride); + ASSERT(tze_name_ptr(tze, stride)[0] != 0); + } else { + ASSERT3U(MZE_PHYS(zap, &mze)->mze_cd, <=, 0xffff); + mze.mze_cd = (uint16_t)MZE_PHYS(zap, &mze)->mze_cd; + ASSERT(MZE_PHYS(zap, &mze)->mze_name[0] != 0); + } zfs_btree_add(&zap->zap_m.zap_tree, &mze); } @@ -146,15 +186,27 @@ mze_find(zap_name_t *zn, zfs_btree_index_t *idx) mze = zfs_btree_next(tree, idx, idx); for (; mze && mze->mze_hash == mze_tofind.mze_hash; mze = zfs_btree_next(tree, idx, idx)) { - ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); - if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) + zap_t *zap = zn->zn_zap; + const char *name; + + if (zap->zap_m.zap_stride != 0) { + tzap_ent_phys_t *tze = TZE_PHYS(zap, mze); + name = tze_name_ptr(tze, zap->zap_m.zap_stride); + ASSERT3U(mze->mze_cd, ==, + *tze_cd_ptr(tze, zap->zap_m.zap_stride)); + } else { + ASSERT3U(mze->mze_cd, ==, + MZE_PHYS(zn->zn_zap, mze)->mze_cd); + name = MZE_PHYS(zap, mze)->mze_name; + } + if (zap_match(zn, name)) return (mze); } return (NULL); } -static uint32_t +uint32_t mze_find_unused_cd(zap_t *zap, uint64_t hash) { mzap_ent_t mze_tofind; @@ -269,6 +321,42 @@ mzap_open(dmu_buf_t *db) zap->zap_normflags = zap_m_phys(zap)->mz_normflags; zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; + mzap_phys_t *mzp = zap_m_phys(zap); + if (MZAP_IS_TINYZAP(mzp)) { + /* + * Validate entire TinyZAP geometry before touching + * any field. mz_chunk_shift and mz_value_ints are + * uint8_t. No endian issue. + */ + if (!TZAP_VERIFY_PHYS(zap)) { + /* + * DEBUG: Geometry is corrupt. return EIO. + */ + rw_exit(&zap->zap_rwlock); + rw_destroy(&zap->zap_rwlock); + if (!zap->zap_ismicro) + mutex_destroy( + &zap->zap_f.zap_num_entries_mtx); + kmem_free(zap, sizeof (zap_t)); + return (NULL); /* caller checks for NULL */ + } + uint16_t stride = MZAP_STRIDE(mzp); + zap->zap_m.zap_stride = stride; + if (stride != 0) { + /* + * Fully promoted TinyZAP: chunk geometry is + * stamped. Restore chunk_size and num_chunks + * so TZE_PHYS() computes correct byte offsets. + */ + uint16_t chunk = MZAP_CHUNK_SIZE(mzp); + ASSERT3U(chunk, >=, TZAP_MIN_CHUNK); + ASSERT3U(chunk, <=, TZAP_MAX_CHUNK); + zap->zap_m.zap_chunk_size = chunk; + zap->zap_m.zap_num_chunks = + (int16_t)((db->db_size - + MZAP_ENT_LEN) / chunk); + } + } /* * Reduce B-tree leaf from 4KB to 512 bytes to reduce memmove() * overhead on massive inserts below. It still allows to store @@ -278,12 +366,27 @@ mzap_open(dmu_buf_t *db) mze_find_in_buf, sizeof (mzap_ent_t), 512); zap_name_t *zn = zap_name_alloc(zap, B_FALSE); + uint16_t stride = zap->zap_m.zap_stride; + uint16_t chunk = stride != 0 ? + zap->zap_m.zap_chunk_size : MZAP_ENT_LEN; for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) { - mzap_ent_phys_t *mze = - &zap_m_phys(zap)->mz_chunk[i]; - if (mze->mze_name[0]) { + const char *name; + if (stride != 0) { + /* + * TinyZAP: slot offset = i * chunk bytes. + * mz_chunk[i] would give wrong offsets for + * chunk > 64 — must use byte arithmetic. + */ + tzap_ent_phys_t *tze = (tzap_ent_phys_t *)( + (uint8_t *)zap_m_phys(zap)->mz_chunk + + (size_t)i * chunk); + name = tze_name_ptr(tze, stride); + } else { + name = zap_m_phys(zap)->mz_chunk[i].mze_name; + } + if (name[0]) { zap->zap_m.zap_num_entries++; - zap_name_init_str(zn, mze->mze_name, 0); + zap_name_init_str(zn, name, 0); mze_insert(zap, i, zn->zn_hash); } } @@ -353,17 +456,46 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) fzap_upgrade(zap, tx, flags); zap_name_t *zn = zap_name_alloc(zap, B_FALSE); - for (int i = 0; i < nchunks; i++) { - mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; - if (mze->mze_name[0] == 0) - continue; - dprintf("adding %s=%llu\n", - mze->mze_name, (u_longlong_t)mze->mze_value); - zap_name_init_str(zn, mze->mze_name, 0); - /* If we fail here, we would end up losing entries */ - VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, - tx)); + + if (MZAP_IS_TINYZAP(mzp)) { + /* + * spa_feature_decr() requires syncing context. + */ + spa_t *spa = dmu_objset_spa(zap->zap_objset); + if (dmu_tx_is_syncing(tx)) { + if (spa_feature_is_active(spa, SPA_FEATURE_TINYZAP)) + spa_feature_decr(spa, SPA_FEATURE_TINYZAP, tx); + } else { + tzap_feature_arg_t *tfa = + kmem_alloc(sizeof (*tfa), KM_SLEEP); + tfa->tfa_spa = spa; + dmu_tx_callback_register(tx, + tzap_feature_decr_cb, tfa); + } + /* + * TinyZAP: entries use tzap_ent_phys_t format, + * tzap_upgrade_entries() handles the fzap_add_cd(). + */ + err = tzap_upgrade_entries(mzp, sz, zn, tx); + if (err != 0) { + zap_name_free(zn); + vmem_free(mzp, sz); + return (err); + } + } else { + for (int i = 0; i < nchunks; i++) { + mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; + if (mze->mze_name[0] == 0) + continue; + dprintf("adding %s=%llu\n", + mze->mze_name, (u_longlong_t)mze->mze_value); + zap_name_init_str(zn, mze->mze_name, 0); + /* If we fail here, we would end up losing entries */ + VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, + mze->mze_cd, tx)); + } } + zap = zn->zn_zap; /* fzap_add_cd() may change zap */ zap_name_free(zn); vmem_free(mzp, sz); *zapp = zap; @@ -432,12 +564,27 @@ mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze, other && other->mze_hash == mze->mze_hash; other = zfs_btree_prev(&zap->zap_m.zap_tree, &oidx, &oidx)) { + /* TinyZAP entries use tze_name_ptr */ + const char *name; + if (zap->zap_m.zap_stride != 0) { + name = tze_name_ptr(TZE_PHYS(zap, other), + zap->zap_m.zap_stride); + } else { + name = MZE_PHYS(zap, other)->mze_name; + } + if (zn == NULL) { - zn = zap_name_alloc_str(zap, - MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE); + const char *mze_name; + if (zap->zap_m.zap_stride != 0) { + mze_name = tze_name_ptr(TZE_PHYS(zap, mze), + zap->zap_m.zap_stride); + } else { + mze_name = MZE_PHYS(zap, mze)->mze_name; + } + zn = zap_name_alloc_str(zap, mze_name, MT_NORMALIZE); allocdzn = B_TRUE; } - if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { + if (zap_match(zn, name)) { if (allocdzn) zap_name_free(zn); return (B_TRUE); @@ -448,12 +595,26 @@ mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze, other && other->mze_hash == mze->mze_hash; other = zfs_btree_next(&zap->zap_m.zap_tree, &oidx, &oidx)) { + const char *name; + if (zap->zap_m.zap_stride != 0) { + name = tze_name_ptr(TZE_PHYS(zap, other), + zap->zap_m.zap_stride); + } else { + name = MZE_PHYS(zap, other)->mze_name; + } + if (zn == NULL) { - zn = zap_name_alloc_str(zap, - MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE); + const char *mze_name; + if (zap->zap_m.zap_stride != 0) { + mze_name = tze_name_ptr(TZE_PHYS(zap, mze), + zap->zap_m.zap_stride); + } else { + mze_name = MZE_PHYS(zap, mze)->mze_name; + } + zn = zap_name_alloc_str(zap, mze_name, MT_NORMALIZE); allocdzn = B_TRUE; } - if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { + if (zap_match(zn, name)) { if (allocdzn) zap_name_free(zn); return (B_TRUE); diff --git a/module/zfs/zap_tiny.c b/module/zfs/zap_tiny.c new file mode 100644 index 000000000000..8a34c7a3e6d4 --- /dev/null +++ b/module/zfs/zap_tiny.c @@ -0,0 +1,740 @@ +// SPDX-License-Identifier: CDDL-1.0 +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2026, Hewlett Packard Enterprise Development LP. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void +tzap_feature_incr_sync(void *arg, dmu_tx_t *tx) +{ + tzap_feature_arg_t *tfa = arg; + spa_feature_incr(tfa->tfa_spa, SPA_FEATURE_TINYZAP, tx); + kmem_free(tfa, sizeof (*tfa)); +} + +void +tzap_feature_decr_sync(void *arg, dmu_tx_t *tx) +{ + tzap_feature_arg_t *tfa = arg; + if (spa_feature_is_active(tfa->tfa_spa, SPA_FEATURE_TINYZAP)) + spa_feature_decr(tfa->tfa_spa, SPA_FEATURE_TINYZAP, tx); + kmem_free(tfa, sizeof (*tfa)); +} + +/* + * dmu_tx post-commit callback: incr|decr TinyZAP feature refcount. + * Fires in syncing context after the upgrading tx commits. Safe to + * call spa_feature_{incr|decr} here without holding any tx. + */ + +void +tzap_feature_incr_cb(void *arg, int error) +{ + tzap_feature_arg_t *tfa = arg; + + if (error == 0) + /* tfa freed by tzap_feature_incr_sync */ + VERIFY0(dsl_sync_task(spa_name(tfa->tfa_spa), NULL, + tzap_feature_incr_sync, tfa, 0, ZFS_SPACE_CHECK_NONE)); + else + /* tx aborted, upgrade did not happen, just free */ + kmem_free(tfa, sizeof (*tfa)); +} + +void +tzap_feature_decr_cb(void *arg, int error) +{ + tzap_feature_arg_t *tfa = arg; + + if (error == 0) + /* tfa freed by tzap_feature_decr_sync */ + VERIFY0(dsl_sync_task(spa_name(tfa->tfa_spa), NULL, + tzap_feature_decr_sync, tfa, 0, ZFS_SPACE_CHECK_NONE)); + else + /* tx aborted, upgrade did not happen, just free */ + kmem_free(tfa, sizeof (*tfa)); +} + +const uint16_t tzap_chunk_table[TZAP_CHUNK_SIZES] = { 64, 128, 256 }; +/* + * tzap_chunk_for_stride: select the smallest chunk (64/128/256) that + * can accommodate (stride + cd(4) + keylen + NUL). + * + * Returns chunk_id (0=64B, 1=128B, or 2=256B) on success. + * Returns -1 if no chunk fits, caller must fall back to FatZAP. + */ +static int +tzap_chunk_for_stride(uint16_t stride, size_t keylen) +{ + /* For stride=8 (1×uint64, long-name path), skip chunk=64 (cid=0) */ + int start_cid = (stride == TZAP_MIN_STRIDE) ? 1 : 0; + + for (int cid = start_cid; cid < TZAP_CHUNK_SIZES; cid++) { + uint16_t chunk = tzap_chunk_table[cid]; + /* + * Slot must hold: value (stride) + cd (4) + at least + * TZAP_MIN_NAME_LEN bytes of name. + */ + if ((uint32_t)stride + sizeof (uint32_t) + + TZAP_MIN_NAME_LEN > chunk) + continue; + + if (keylen < (size_t)TZAP_NAME_LEN(chunk, stride)) + return (cid); + } + return (-1); +} + +/* + * tzap_try_promote: attempt to promote this MicroZAP to TinyZAP. + * + * Called on the first zap_add() when ZAP_FLAG_TINYZAP was set at + * create time, or when a plain MicroZAP entry doesn't fit. + * + * Selects the smallest fitting chunk via tzap_chunk_for_stride(), then: + * writes three independent uint8_t fields on-disk: + * mz_flags = MZAP_FLAG_TINY (bit 0) + * mz_chunk_shift = log2(chunk) (6, 7, or 8) + * mz_value_ints = stride / 8 + * sets zap_stride, zap_chunk_size, zap_num_chunks in-memory. + * + * Returns B_TRUE : if the ZAP was promoted (or was already promoted). + * Returns B_FALSE : if the geometry doesn't qualify, caller falls back + * to plain MicroZAP / FatZAP promotion. + */ +boolean_t +tzap_try_promote(zap_t *zap, int integer_size, uint64_t num_integers, + const char *key, dmu_tx_t *tx) +{ + ASSERT(zap->zap_ismicro); + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + + /* Already promoted on a previous add */ + if (zap->zap_m.zap_stride != 0) + return (B_TRUE); + + /* Require the pool feature to be enabled */ + spa_t *spa = dmu_objset_spa(zap->zap_objset); + if (!spa_feature_is_enabled(spa, SPA_FEATURE_TINYZAP)) { + dprintf("tzap_try_promote: SPA_FEATURE_TINYZAP is not enabled " + "obj=%llu, falling back to FatZAP\n", + (u_longlong_t)zap->zap_object); + return (B_FALSE); + } + + if (integer_size != 8) + return (B_FALSE); + + uint16_t stride = (uint16_t)(num_integers * sizeof (uint64_t)); + + if (stride < TZAP_MIN_STRIDE) + return (B_FALSE); + + int cid = tzap_chunk_for_stride(stride, strlen(key)); + if (cid < 0 || cid >= TZAP_CHUNK_SIZES) { + /* + * Hint was set at create time but first add does not + * qualify. Clear mz_flags so mzap_open() on remount + * does not misread plain MicroZAP entries as TinyZAP. + */ + zap_m_phys(zap)->mz_flags = 0; + dprintf("tzap_try_promote: disqualified obj=%llu " + "integer_size=%d num_integers=%llu key=%s\n", + (u_longlong_t)zap->zap_object, integer_size, + (u_longlong_t)num_integers, key); + return (B_FALSE); + } + + if (zap->zap_m.zap_num_entries > 0 && stride != 8) + return (B_FALSE); + + ASSERT(cid >= 0 && cid < TZAP_CHUNK_SIZES); + + /* + * chunk_log2: 6=64B, 7=128B, 8=256B. + * tzap_chunk_table is {2^6, 2^7, 2^8}, so log2 = MIN_LOG2 + cid. + */ + uint16_t chunk = tzap_chunk_table[cid]; + uint8_t chunk_log2 = (uint8_t)(TZAP_MIN_CHUNK_LOG2 + cid); + ASSERT3U(chunk_log2, >=, TZAP_MIN_CHUNK_LOG2); + ASSERT3U(chunk_log2, <=, TZAP_MAX_CHUNK_LOG2); + ASSERT3U(1U << chunk_log2, ==, chunk); + + if (zap->zap_m.zap_num_entries > 0) { + /* + * Block may be too small to re-encode all existing MicroZAP + * entries at the new chunk pitch. Grow it first if needed, + * otherwise return B_FALSE so caller upgrades to FatZAP. + */ + size_t need = (size_t)(zap->zap_m.zap_num_entries + 1) * + chunk + MZAP_ENT_LEN; + if (need > (size_t)zap->zap_dbuf->db_size) { + uint64_t newsz = P2ROUNDUP(need, SPA_MINBLOCKSIZE); + uint64_t maxsz = zap_get_micro_max_size( + dmu_objset_spa(zap->zap_objset)); + if (newsz > maxsz) + return (B_FALSE); + VERIFY0(dmu_object_set_blocksize(zap->zap_objset, + zap->zap_object, newsz, 0, tx)); + } + tzap_reencode_micro_to_tiny(zap, chunk, tx); + } + + /* Stamp on-disk geometry into three independent uint8_t fields */ + zap_m_phys(zap)->mz_flags = MZAP_FLAG_TINY; + zap_m_phys(zap)->mz_chunk_shift = chunk_log2; + zap_m_phys(zap)->mz_value_ints = (uint8_t)(stride / sizeof (uint64_t)); + + /* + * spa_feature_incr() requires syncing context. + */ + if (dmu_tx_is_syncing(tx)) { + spa_feature_incr(spa, SPA_FEATURE_TINYZAP, tx); + } else { + tzap_feature_arg_t *tfa = kmem_alloc(sizeof (*tfa), KM_SLEEP); + tfa->tfa_spa = spa; + /* tfa freed by callback */ + dmu_tx_callback_register(tx, tzap_feature_incr_cb, tfa); + } + + /* Only set in-memory state here if NOT re-encoded above */ + if (zap->zap_m.zap_num_entries == 0) { + zap->zap_m.zap_stride = stride; + zap->zap_m.zap_chunk_size = chunk; + zap->zap_m.zap_num_chunks = + (int16_t)((zap->zap_dbuf->db_size - MZAP_ENT_LEN) / chunk); + } + dprintf("tzap_try_promote: Promoted obj=%llu stride=%u chunk=%uB " + "(log2=%u) nchunks=%d name_avail=%u key=%s\n", + (u_longlong_t)zap->zap_object, stride, chunk, chunk_log2, + zap->zap_m.zap_num_chunks, TZAP_NAME_LEN(chunk, stride) - 1, key); + return (B_TRUE); +} + +/* + * tzap_addent: insert an entry into a TinyZAP. + * + * zap_stride and zap_chunk_size must already be stamped by + * tzap_try_promote(). + * + * Slot layout: [value(stride bytes) | cd(4B) | name(NUL-terminated)] + */ +void +tzap_addent(zap_name_t *zn, const void *val) +{ + zap_t *zap = zn->zn_zap; + uint16_t stride = zap->zap_m.zap_stride; + uint16_t chunk = zap->zap_m.zap_chunk_size; + + ASSERT(zap->zap_ismicro); + ASSERT(stride != 0); + ASSERT(chunk != 0); + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + ASSERT3U(stride, >=, TZAP_MIN_STRIDE); + ASSERT3U(stride, <=, tzap_max_stride(chunk)); + ASSERT3U(strlen(zn->zn_key_orig), <, TZAP_NAME_LEN(chunk, stride)); + ASSERT3U(zap->zap_m.zap_num_entries, <, zap->zap_m.zap_num_chunks); + +#ifdef ZFS_DEBUG + for (int i = 0; i < zap->zap_m.zap_num_chunks; i++) { + tzap_ent_phys_t *tze = (tzap_ent_phys_t *) + ((uint8_t *)zap_m_phys(zap)->mz_chunk + + ((size_t)i * chunk)); + if (tze_name_ptr(tze, stride)[0] == '\0') + continue; + ASSERT(strcmp(tze_name_ptr(tze, stride), + zn->zn_key_orig) != 0); + } +#endif + + uint16_t start = zap->zap_m.zap_alloc_next; + uint32_t cd = mze_find_unused_cd(zap, zn->zn_hash); + /* TinyZAP capacity is small; this must not happen */ + ASSERT(cd < zap_maxcd(zap)); + +again: + for (uint16_t i = start; i < zap->zap_m.zap_num_chunks; i++) { + tzap_ent_phys_t *tze = (tzap_ent_phys_t *)((uint8_t *) + zap_m_phys(zap)->mz_chunk + (i * chunk)); + + if (tze_name_ptr(tze, stride)[0] != '\0') + continue; + + memcpy(tze_value(tze), val, stride); + *tze_cd_ptr(tze, stride) = cd; + (void) strlcpy(tze_name_ptr(tze, stride), + zn->zn_key_orig, TZAP_NAME_LEN(chunk, stride)); + zap->zap_m.zap_num_entries++; + zap->zap_m.zap_alloc_next = i + 1; + if (zap->zap_m.zap_alloc_next == zap->zap_m.zap_num_chunks) + zap->zap_m.zap_alloc_next = 0; + mze_insert(zap, i, zn->zn_hash); + return; + } + if (start != 0) { + start = 0; + goto again; + } + cmn_err(CE_PANIC, + "tzap_addent: out of TinyZAP entries obj=%llu\n", + (u_longlong_t)zap->zap_object); +} + +/* + * tzap_lookup: read value and optional realname from a TinyZAP entry. + * + * Returns 0 on success. + * Returns EINVAL if integer_size != 8. + * Returns EOVERFLOW if num_integers < stride/8. + */ +int +tzap_lookup(zap_t *zap, mzap_ent_t *mze, uint64_t integer_size, + uint64_t num_integers, void *buf, char *realname, int rn_len, + boolean_t *ncp) +{ + uint16_t stride = zap->zap_m.zap_stride; + + ASSERT(zap->zap_ismicro); + ASSERT(stride != 0); + ASSERT(mze != NULL); + + if (integer_size != 8) + return (SET_ERROR(EINVAL)); + + uint64_t stored_ints = stride / sizeof (uint64_t); + if (num_integers < stored_ints) + return (SET_ERROR(EOVERFLOW)); + + tzap_ent_phys_t *tze = TZE_PHYS(zap, mze); + memcpy(buf, tze_value(tze), stride); + + if (realname != NULL) + (void) strlcpy(realname, tze_name_ptr(tze, stride), rn_len); + + /* + * Normalization conflicts are now checked via + * mzap_normalization_conflict. + */ + if (ncp != NULL) { + zfs_btree_index_t idx; + zap_name_t *zn_tmp = zap_name_alloc_str(zap, + tze_name_ptr(tze, stride), MT_NORMALIZE); + if (zn_tmp != NULL) { + mzap_ent_t *m = mze_find(zn_tmp, &idx); + *ncp = (m != NULL) ? + mzap_normalization_conflict(zap, zn_tmp, m, &idx) : + B_FALSE; + zap_name_free(zn_tmp); + } else { + *ncp = B_FALSE; + } + } + + return (0); +} + +/* + * tzap_reencode_micro_to_tiny: re-encode all plain MicroZAP entries + * (stride=8, fixed 64-byte slots) into TinyZAP format (stride=8, + * chunk=128 or 256). + * + * Called from tzap_try_promote() when zap_num_entries > 0 and + * stride == 8 (long-name trigger). The caller has already selected + * chunk and verified all existing names fit TZAP_NAME_LEN(chunk, 8). + * + * Uses a scratch copy to avoid aliasing: source slots are 64 bytes, + * destination slots are 128 or 256 bytes, so they overlap if done in-place. + * + * Caller must hold RW_WRITER and have called dmu_buf_will_dirty(). + */ +void +tzap_reencode_micro_to_tiny(zap_t *zap, uint16_t chunk, dmu_tx_t *tx) +{ + (void) tx; + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + ASSERT3U(chunk, >, MZAP_ENT_LEN); /* chunk=64 skipped for stride=8 */ + + int db_size = zap->zap_dbuf->db_size; + /* + * Use db_size to derive old_nchunks: zap_num_chunks may reflect a + * freshly grown block (dmu_object_set_blocksize just called), but + * the on-disk content is still in plain MicroZAP format with + * MZAP_ENT_LEN-sized slots. + */ + int old_nchunks = (db_size - MZAP_ENT_LEN) / MZAP_ENT_LEN; + uint16_t stride = 8; + + /* copy of the current MicroZAP block */ + mzap_phys_t *src = vmem_alloc(db_size, KM_SLEEP); + memcpy(src, zap_m_phys(zap), db_size); + + /* zero the destination chunk area (header is preserved in place) */ + memset(zap_m_phys(zap)->mz_chunk, 0, db_size - MZAP_ENT_LEN); + + int new_nchunks = (db_size - MZAP_ENT_LEN) / chunk; + int dst_slot = 0; + + for (int i = 0; i < old_nchunks; i++) { + mzap_ent_phys_t *me = &src->mz_chunk[i]; + if (me->mze_name[0] == '\0') + continue; + + ASSERT3U(dst_slot, <, new_nchunks); + + tzap_ent_phys_t *tze = (tzap_ent_phys_t *) + ((uint8_t *)zap_m_phys(zap)->mz_chunk + + (size_t)dst_slot * chunk); + /* value: 1×uint64 */ + memcpy(tze_value(tze), &me->mze_value, stride); + /* cd */ + *tze_cd_ptr(tze, stride) = me->mze_cd; + /* name */ + (void) strlcpy(tze_name_ptr(tze, stride), + me->mze_name, TZAP_NAME_LEN(chunk, stride)); + + dst_slot++; + } + + vmem_free(src, db_size); + + /* + * Rebuild the in-memory B-tree: mze_chunkid values must map to + * the new contiguous dst_slot indices, not the original sparse + * plain MicroZAP slot indices. + * mze_insert() reads from the live on-disk block, which is now + * in TinyZAP format, so we must set zap_stride and zap_chunk_size + * BEFORE rebuilding the tree. + */ + mze_destroy(zap); + zap->zap_m.zap_stride = stride; + zap->zap_m.zap_chunk_size = chunk; + zap->zap_m.zap_num_chunks = (int16_t)new_nchunks; + /* zap_num_entries is unchanged */ + + zap_name_t *zn = zap_name_alloc(zap, B_FALSE); + for (int i = 0; i < new_nchunks; i++) { + tzap_ent_phys_t *tze = (tzap_ent_phys_t *) + ((uint8_t *)zap_m_phys(zap)->mz_chunk + + (size_t)i * chunk); + const char *name = tze_name_ptr(tze, stride); + if (name[0] == '\0') + continue; + zap_name_init_str(zn, name, 0); + mze_insert(zap, i, zn->zn_hash); + } + zap_name_free(zn); + zap->zap_m.zap_alloc_next = zap->zap_m.zap_num_entries; +} + +/* + * Attempt to grow chunk size to fit a longer key. + * Returns B_TRUE if upgrade succeeded and the entry can be added, + * B_FALSE if no larger chunk fits and caller must go to FatZAP. + */ +boolean_t +tzap_try_chunk_upgrade(zap_t *zap, uint16_t stride, size_t keylen, + dmu_tx_t *tx) +{ + uint16_t chunk = zap->zap_m.zap_chunk_size; + + int new_cid = tzap_chunk_for_stride(stride, keylen); + if (new_cid < 0) + return (B_FALSE); + + uint16_t new_chunk = tzap_chunk_table[new_cid]; + if (new_chunk <= chunk) + return (B_FALSE); + + size_t need = (size_t)(zap->zap_m.zap_num_entries + 1) * + new_chunk + MZAP_ENT_LEN; + if (need > (size_t)zap->zap_dbuf->db_size) { + uint64_t newsz = P2ROUNDUP(need, SPA_MINBLOCKSIZE); + uint64_t maxsz = + zap_get_micro_max_size(dmu_objset_spa(zap->zap_objset)); + if (newsz > maxsz) + return (B_FALSE); + VERIFY0(dmu_object_set_blocksize(zap->zap_objset, + zap->zap_object, newsz, 0, tx)); + } + + return (tzap_upgrade_chunk(zap, new_chunk, tx) == 0); +} + +/* + * Add all TinyZAP entries from old_chunk to + * new_chunk, keeping stride unchanged. + * + * Triggered when an entry's key fits the stamped stride but is too long + * for the current TZAP_NAME_LEN(chunk, stride). + * + * Caller must have grown the block if necessary and called + * dmu_buf_will_dirty(). Holds RW_WRITER. + * + * Returns 0 on success, ENOSPC if entries won't fit at new pitch. + */ +int +tzap_upgrade_chunk(zap_t *zap, uint16_t new_chunk, dmu_tx_t *tx) +{ + (void) tx; + ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); + + uint16_t stride = zap->zap_m.zap_stride; + uint16_t old_chunk = zap->zap_m.zap_chunk_size; + int db_size = zap->zap_dbuf->db_size; + + ASSERT3U(new_chunk, >, old_chunk); + ASSERT3U(stride, !=, 0); + + int new_nchunks = (db_size - MZAP_ENT_LEN) / new_chunk; + if (zap->zap_m.zap_num_entries > new_nchunks) + return (SET_ERROR(ENOSPC)); + + mzap_phys_t *src = vmem_alloc(db_size, KM_SLEEP); + memcpy(src, zap_m_phys(zap), db_size); + memset(zap_m_phys(zap)->mz_chunk, 0, db_size - MZAP_ENT_LEN); + + int old_nchunks = zap->zap_m.zap_num_chunks; + int dst_slot = 0; + + for (int i = 0; i < old_nchunks; i++) { + tzap_ent_phys_t *s = (tzap_ent_phys_t *) + ((uint8_t *)src->mz_chunk + (size_t)i * old_chunk); + if (tze_name_ptr(s, stride)[0] == '\0') + continue; + ASSERT3U(dst_slot, <, new_nchunks); + tzap_ent_phys_t *d = (tzap_ent_phys_t *) + ((uint8_t *)zap_m_phys(zap)->mz_chunk + + (size_t)dst_slot * new_chunk); + memcpy(tze_value(d), tze_value(s), stride); + *tze_cd_ptr(d, stride) = *tze_cd_ptr(s, stride); + (void) strlcpy(tze_name_ptr(d, stride), + tze_name_ptr(s, stride), + TZAP_NAME_LEN(new_chunk, stride)); + dst_slot++; + } + vmem_free(src, db_size); + uint8_t new_log2 = 0; + for (uint16_t v = new_chunk; v > 1; v >>= 1) + new_log2++; + + /* Only the chunk size changes; TINY bit and stride stay. */ + zap_m_phys(zap)->mz_chunk_shift = new_log2; + + mze_destroy(zap); + zap->zap_m.zap_chunk_size = new_chunk; + zap->zap_m.zap_num_chunks = (int16_t)new_nchunks; + zap->zap_m.zap_alloc_next = zap->zap_m.zap_num_entries; + + zap_name_t *zn = zap_name_alloc(zap, B_FALSE); + for (int i = 0; i < new_nchunks; i++) { + tzap_ent_phys_t *tze = (tzap_ent_phys_t *) + ((uint8_t *)zap_m_phys(zap)->mz_chunk + + (size_t)i * new_chunk); + const char *name = tze_name_ptr(tze, stride); + if (name[0] == '\0') + continue; + zap_name_init_str(zn, name, 0); + mze_insert(zap, i, zn->zn_hash); + } + zap_name_free(zn); + return (0); +} + +/* + * tzap_cursor_fill: populate a zap_attribute_t from a TinyZAP entry. + * + * za_integer_length = 8 (always uint64_t) + * za_num_integers = stride / 8 + * za_first_integer = first uint64 of value blob only; callers + * needing the full blob must call zap_lookup(). + */ +void +tzap_cursor_fill(zap_cursor_t *zc, mzap_ent_t *mze, zap_attribute_t *za) +{ + zap_t *zap = zc->zc_zap; + uint16_t stride = zap->zap_m.zap_stride; + + ASSERT(stride != 0); + ASSERT(mze != NULL); + + tzap_ent_phys_t *tze = TZE_PHYS(zap, mze); + + za->za_integer_length = sizeof (uint64_t); + za->za_num_integers = stride / sizeof (uint64_t); + za->za_first_integer = *(const uint64_t *)tze_value(tze); + + (void) strlcpy(za->za_name, tze_name_ptr(tze, stride), + za->za_name_len); +} + +/* + * tzap_upgrade_entries: re-encode TinyZAP entries into a FatZAP. + * nchunks: total chunk slots in mzp (computed by caller from block size). + */ +int +tzap_upgrade_entries(mzap_phys_t *mzp, size_t db_size, + zap_name_t *zn, dmu_tx_t *tx) +{ + uint16_t stride = MZAP_STRIDE(mzp); + + /* + * Read geometry from independent uint8_t fields. + * mz_value_ints == 0 means no entries were ever added. + */ + if (stride == 0) + return (0); + + ASSERT(MZAP_IS_TINYZAP(mzp)); + + uint8_t log2 = mzp->mz_chunk_shift; + ASSERT3U(log2, >=, TZAP_MIN_CHUNK_LOG2); + ASSERT3U(log2, <=, TZAP_MAX_CHUNK_LOG2); + + /* Derive actual chunk byte-size */ + uint16_t chunk = MZAP_CHUNK_SIZE(mzp); + ASSERT3U(chunk, >=, TZAP_MIN_CHUNK); + ASSERT3U(chunk, <=, TZAP_MAX_CHUNK); + + /* Derive nchunks from TinyZAP chunk size */ + int nchunks = (int)((db_size - MZAP_ENT_LEN) / chunk); + ASSERT3S(nchunks, >, 0); + for (int i = 0; i < nchunks; i++) { + tzap_ent_phys_t *tze = + (tzap_ent_phys_t *)((uint8_t *)mzp->mz_chunk + + (size_t)i * chunk); + const char *name = tze_name_ptr(tze, stride); + + if (name[0] == '\0') + continue; + + (void) zap_name_init_str(zn, name, 0); + + int err = fzap_add_cd(zn, + sizeof (uint64_t), + stride / sizeof (uint64_t), + tze_value(tze), + *tze_cd_ptr(tze, stride), tx); + if (err != 0) { + dprintf("tzap_upgrade_entries: obj=%llu " + "key=%s fzap_add_cd failed: error=%d\n", + (u_longlong_t)zn->zn_zap->zap_object, name, err); + return (err); + } + } + return (0); +} + +/* + * tzap_get_stats: fill the TinyZAP-specific fields of a zap_stats_t. + * + * Called from zap_get_stats() when the ZAP is a MicroZAP with + * ZAP_FLAG_TINYZAP set. + */ +void +tzap_get_stats(zap_t *zap, zap_stats_t *zs) +{ + ASSERT(zap->zap_ismicro); + + mzap_phys_t *mzp = zap_m_phys(zap); + + zs->zs_is_tinyzap = MZAP_IS_TINYZAP(mzp); + zs->zs_tinyzap_stride = MZAP_STRIDE(mzp); + zs->zs_tinyzap_chunk = (zs->zs_is_tinyzap) ? + MZAP_CHUNK_SIZE(mzp) : 0; + zs->zs_tinyzap_num_chunks = zap->zap_m.zap_num_chunks; + zs->zs_tinyzap_flags = mzp->mz_flags; +} + +/* + * tzap_verify_phys: validate on-disk mz_flags before any dereference. + */ +boolean_t +tzap_verify_phys(const char *caller, zap_t *zap) +{ + mzap_phys_t *mzp = zap_m_phys(zap); + + if (!MZAP_IS_TINYZAP(mzp)) { + cmn_err(CE_WARN, "%s: obj=%llu mz_flags=0x%x: " + "TINY bit missing", caller, + (u_longlong_t)zap->zap_object, + (unsigned)mzp->mz_flags); + return (B_FALSE); + } + + uint16_t stride = MZAP_STRIDE(mzp); + uint8_t log2 = mzp->mz_chunk_shift; + uint16_t chunk = MZAP_CHUNK_SIZE(mzp); + + if (stride != 0 && (stride % sizeof (uint64_t)) != 0) { + cmn_err(CE_WARN, "%s: obj=%llu bad stride=%u", + caller, (u_longlong_t)zap->zap_object, stride); + return (B_FALSE); + } + + if (log2 < TZAP_MIN_CHUNK_LOG2 || log2 > TZAP_MAX_CHUNK_LOG2) { + cmn_err(CE_WARN, "%s: obj=%llu bad chunk_log2=%u", + caller, (u_longlong_t)zap->zap_object, log2); + return (B_FALSE); + } + + if (chunk != (1U << log2)) { + cmn_err(CE_WARN, "%s: obj=%llu chunk=%u != 1<<%u=%u", + caller, (u_longlong_t)zap->zap_object, + chunk, log2, 1U << log2); + return (B_FALSE); + } + + if (stride + sizeof (uint32_t) + TZAP_MIN_NAME_LEN > chunk) { + cmn_err(CE_WARN, "%s: obj=%llu stride=%u won't fit in chunk=%u", + caller, (u_longlong_t)zap->zap_object, stride, chunk); + return (B_FALSE); + } + + /* Sanity: computed nchunks must be > 0 */ + int nchunks = (int)((zap->zap_dbuf->db_size - MZAP_ENT_LEN) / chunk); + if (nchunks <= 0) { + cmn_err(CE_WARN, "%s: obj=%llu nchunks=%d <= 0 " + "(dbuf_size=%lu chunk=%u)", + caller, (u_longlong_t)zap->zap_object, nchunks, + (ulong_t)zap->zap_dbuf->db_size, chunk); + return (B_FALSE); + } + + dprintf("%s: obj=%llu TinyZAP layout OK nchunks=%d\n", + caller, (u_longlong_t)zap->zap_object, nchunks); + return (B_TRUE); +} diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 0dda8fdfa363..55e852d2a7d7 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -696,6 +696,13 @@ tags = ['functional', 'cp_files'] tests = ['zap_shrink_001_pos'] tags = ['functional', 'zap_shrink'] +[tests/functional/zap] +tests = ['zap_micro_tiny_promote_001', 'zap_micro_fat_promote_002', + 'zap_tiny_chunk_upgrade_003', 'zap_tiny_fat_004', + 'zap_tiny_add_remove_005', 'zap_tiny_update_006', + 'zap_tiny_remount_007', 'zap_tiny_feature_008'] +tags = ['functional', 'zap'] + [tests/functional/crtime] tests = ['crtime_001_pos' ] tags = ['functional', 'crtime'] diff --git a/tests/unit/Makefile.am b/tests/unit/Makefile.am index cb5bfc100136..e33778be5325 100644 --- a/tests/unit/Makefile.am +++ b/tests/unit/Makefile.am @@ -27,10 +27,12 @@ nodist_%C%_test_zap_SOURCES = \ module/zfs/zap_impl.c \ module/zfs/zap_micro.c \ module/zfs/zap_leaf.c \ + module/zfs/zap_tiny.c \ module/zfs/u8_textprep.c %C%_test_zap_SOURCES = \ - %D%/test_zap.c + %D%/test_zap.c \ + %D%/mock_spa.c %C%_test_zap_LDADD = \ libspl.la \ diff --git a/tests/unit/mock_dmu.c b/tests/unit/mock_dmu.c index 65c38c1fd9fb..a1e216123e0d 100644 --- a/tests/unit/mock_dmu.c +++ b/tests/unit/mock_dmu.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -315,6 +316,17 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) (void) ds; (void) tx; } +/* Mock dsl_sync_task */ +int +dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc, + dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified, + zfs_space_check_t space_check) +{ + (void) pool, (void) checkfunc, (void) syncfunc; + (void) arg, (void) blocks_modified, (void) space_check; + return (0); +} + boolean_t spa_feature_is_enabled(spa_t *spa, spa_feature_t f) { @@ -391,3 +403,16 @@ dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset, (void) os; (void) object; (void) offset; (void) len; return (EIO); } + +int +dmu_tx_is_syncing(dmu_tx_t *tx) +{ + (void) tx; + return (B_FALSE); +} + +void +dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) +{ + (void) tx, (void) func, (void) data; +} diff --git a/tests/unit/mock_spa.c b/tests/unit/mock_spa.c new file mode 100644 index 000000000000..fdc813e10f1c --- /dev/null +++ b/tests/unit/mock_spa.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: CDDL-1.0 +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2026, Hewlett Packard Enterprise Development LP. + */ + +#include +#include +#include +#include +#include + +/* + * Mock implementations of spa/dsl functions for unit tests. + */ +char * +spa_name(spa_t *spa) +{ + (void) spa; + static char name[] = "mockpool"; + return (name); +} + +void +spa_feature_incr(spa_t *spa, spa_feature_t fid, dmu_tx_t *tx) +{ + (void) spa, (void) fid, (void) tx; +} + +void +spa_feature_decr(spa_t *spa, spa_feature_t fid, dmu_tx_t *tx) +{ + (void) spa, (void) fid, (void) tx; +} + +boolean_t +spa_feature_is_active(spa_t *spa, spa_feature_t fid) +{ + (void) spa, (void) fid; + return (B_FALSE); +} diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index c7931ca95e29..656b7c2ff653 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -395,6 +395,7 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/vdev_zaps/vdev_zaps.kshlib \ functional/xattr/xattr.cfg \ functional/xattr/xattr_common.kshlib \ + functional/zap/zap_common.kshlib \ functional/zoned_uid/zoned_uid.cfg \ functional/zoned_uid/zoned_uid_common.kshlib \ functional/zvol/zvol.cfg \ @@ -2351,6 +2352,16 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/zap_shrink/cleanup.ksh \ functional/zap_shrink/zap_shrink_001_pos.ksh \ functional/zap_shrink/setup.ksh \ + functional/zap/cleanup.ksh \ + functional/zap/zap_tiny_feature_008.ksh \ + functional/zap/zap_tiny_remount_007.ksh \ + functional/zap/zap_tiny_update_006.ksh \ + functional/zap/zap_tiny_add_remove_005.ksh \ + functional/zap/zap_tiny_fat_004.ksh \ + functional/zap/zap_tiny_chunk_upgrade_003.ksh \ + functional/zap/zap_micro_fat_promote_002.ksh \ + functional/zap/zap_micro_tiny_promote_001.ksh \ + functional/zap/setup.ksh \ functional/zpool_influxdb/cleanup.ksh \ functional/zpool_influxdb/setup.ksh \ functional/zpool_influxdb/zpool_influxdb.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index a68a2a4995cc..df0d6cc1f5d2 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -28,6 +28,7 @@ # # Copyright (c) 2013, 2014 by Delphix. All rights reserved. # Copyright 2016 Nexenta Systems, Inc. All rights reserved. +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. # # Set the expected properties of zpool @@ -177,5 +178,6 @@ if is_linux || is_freebsd; then "feature@longname" "feature@large_microzap" "feature@block_cloning_endian" + "feature@tinyzap" ) fi diff --git a/tests/zfs-tests/tests/functional/zap/cleanup.ksh b/tests/zfs-tests/tests/functional/zap/cleanup.ksh new file mode 100755 index 000000000000..59f8feff7d14 --- /dev/null +++ b/tests/zfs-tests/tests/functional/zap/cleanup.ksh @@ -0,0 +1,35 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. +# + +. $STF_SUITE/include/libtest.shlib +verify_runnable "global" + +rm -rf "$TESTDIR"/zap-* +default_cleanup_noexit + +log_pass "ZAP test cleanup complete" + diff --git a/tests/zfs-tests/tests/functional/zap/setup.ksh b/tests/zfs-tests/tests/functional/zap/setup.ksh new file mode 100755 index 000000000000..48df7011bc8e --- /dev/null +++ b/tests/zfs-tests/tests/functional/zap/setup.ksh @@ -0,0 +1,33 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. +# + +. $STF_SUITE/include/libtest.shlib +verify_runnable "global" + +default_raidz_setup_noexit "$DISKS" +log_pass "ZAP test setup complete" + diff --git a/tests/zfs-tests/tests/functional/zap/zap_common.kshlib b/tests/zfs-tests/tests/functional/zap/zap_common.kshlib new file mode 100644 index 000000000000..75e34bfbb24c --- /dev/null +++ b/tests/zfs-tests/tests/functional/zap/zap_common.kshlib @@ -0,0 +1,194 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. +# + +# +# zap_get_obj +# +# Return the ZFS DMU object number for by scanning the full +# dataset dump from zdb, matching on the trailing path component. +# +zap_get_obj() +{ + typeset path=$1 + typeset name + + name=$(basename "$path") + sync_pool $TESTPOOL > /dev/null 2>&1 + + zdb -ddddddd "$TESTPOOL/$TESTFS" 2>/dev/null | \ + awk -v name="/$name" ' + /^[[:space:]]+[0-9]+[[:space:]]/ && !/bonus/ { obj = $1 } + /[[:space:]]path[[:space:]]/ { + if ($0 ~ (name "$")) { print obj; exit } + } + ' +} + +# +# zap_type +# +# Return the on-disk ZAP type for the directory at : +# "tinyzap" | "microzap" | "fatzap" | "unknown" +# +zap_type() +{ + typeset path=$1 + typeset obj result + + obj=$(zap_get_obj "$path") + if [[ -z "$obj" ]]; then + print "unknown" + return + fi + + sync_pool "$TESTPOOL" > /dev/null 2>&1 + + result=$(zdb -ddddddd "$TESTPOOL/$TESTFS" "$obj" 2>/dev/null | \ + awk ' + /^[[:space:]]+(tinyzap|microzap|Fat ZAP)[: ]/ { + if ($0 ~ /tinyzap/) { print "tinyzap"; exit } + else if ($0 ~ /microzap/) { print "microzap"; exit } + else if ($0 ~ /Fat ZAP/) { print "fatzap"; exit } + } + ') + print "${result:-unknown}" +} + +# +# zap_dump +# +# Log ZAP structure fields for diagnostic output on test failure. +# +zap_dump() +{ + typeset path=$1 + typeset obj + + obj=$(zap_get_obj "$path") + + log_note "zap_dump: $path obj=${obj:-not found} ds=$TESTPOOL/$TESTFS" + [[ -z "$obj" ]] && return + + zdb -ddddddd "$TESTPOOL/$TESTFS" "$obj" 2>/dev/null \ + | grep -E 'microzap|tinyzap|Fat ZAP|entries|mz_flags|chunk|stride|nchunks|ptrtbl' \ + | while IFS= read -r line; do + log_note " $line" + done +} + +# +# zap_assert_type +# +# Fail the test (with a zap_dump) if the on-disk ZAP type does not +# match . must be one of: +# "microzap" | "tinyzap" | "fatzap" +# +zap_assert_type() +{ + typeset path=$1 + typeset expected=$2 + typeset got + + got=$(zap_type "$path") + if [[ "$got" != "$expected" ]]; then + zap_dump "$path" + log_fail "$path: expected ZAP type '$expected', got '$got'" + fi + log_note "$path: ZAP type = $got [OK]" +} + +# +# zap_assert_entries [ ...] +# +# Fail the test if any of the listed names are not accessible +# under via stat(1). +# +zap_assert_entries() +{ + typeset dir=$1 + shift + typeset name + typeset -i missing=0 + + for name in "$@"; do + if ! stat "$dir/$name" > /dev/null 2>&1; then + log_note "MISSING: $dir/$name" + ((missing++)) + fi + done + if [[ $missing -gt 0 ]]; then + zap_dump "$dir" + log_fail "$dir: $missing entries missing after operation" + fi +} + +# +# zap_chunk_size +# +# Return the on-disk TinyZAP chunk size for the directory at +# by reading zap_chunk_size from the zdb microzap dump. +# Returns "" if the directory is not a TinyZAP. +# +zap_chunk_size() +{ + typeset path=$1 + typeset obj + + obj=$(zap_get_obj "$path") + [[ -z "$obj" ]] && return + + sync_pool "$TESTPOOL" > /dev/null 2>&1 + + # Flush ARC on Linux only; on FreeBSD sync_pool is sufficient. + if [[ -w /proc/sys/vm/drop_caches ]]; then + echo 3 > /proc/sys/vm/drop_caches + fi + + zdb -ddddddd "$TESTPOOL/$TESTFS" "$obj" 2>/dev/null | \ + awk '/tinyzap:/ { line = $0; sub(/.*chunk=/, "", line); \ + sub(/[^0-9].*/, "", line); print line; exit }' +} + +# +# zap_assert_chunk +# +# Fail the test if the on-disk TinyZAP chunk size does not match +# (e.g. 128 or 256). +# +zap_assert_chunk() +{ + typeset path=$1 + typeset expected=$2 + typeset got + + got=$(zap_chunk_size "$path") + if [[ "$got" != "$expected" ]]; then + zap_dump "$path" + log_fail "$path: expected chunk size '$expected', got '${got:-not found}'" + fi + log_note "$path: chunk size = $got [OK]" +} diff --git a/tests/zfs-tests/tests/functional/zap/zap_micro_fat_promote_002.ksh b/tests/zfs-tests/tests/functional/zap/zap_micro_fat_promote_002.ksh new file mode 100755 index 000000000000..4912fc692555 --- /dev/null +++ b/tests/zfs-tests/tests/functional/zap/zap_micro_fat_promote_002.ksh @@ -0,0 +1,77 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/zap/zap_common.kshlib + +# +# DESCRIPTION: +# When a key exceeds TZAP_NAME_LEN(256, 8) = 244 characters, no TinyZAP +# chunk is large enough. The ZAP is upgraded directly from MicroZAP to +# FatZAP. +# +# All pre-existing MicroZAP entries must survive mzap_upgrade(). +# +# STRATEGY: +# 1. mkdir + 10 x touch(short) -> assert microzap +# 2. touch 250-char name -> assert fatzap +# 3. assert all 11 entries accessible +# + +verify_runnable "global" +DIR=zap-micro-to-fat +TDIR="$TESTDIR/$DIR" + +function cleanup { + rm -rf "$TDIR"; +} +log_onexit cleanup + +log_assert "250-char key (> TZAP_NAME_LEN(256,8)=244) forces "\ + "microzap to fatzap upgrade" + +log_must mkdir "$TDIR" + +# Step 1: fill microzap with 10 short entries +typeset i +for i in $(seq 1 10); do + log_must touch "$TDIR/e$i" +done + +# Step 2: Assert microzap, then add 250-char name to trigger upgrade +zap_assert_type "$TDIR" "microzap" +typeset -r NAME250=$(awk 'BEGIN { s = ""; for (i=0;i<250;i++) s = s "f"; print s }') +log_must touch "$TDIR/$NAME250" + +# Step 3: Assert fatzap, then assert all entries accessible +zap_assert_type "$TDIR" "fatzap" +typeset names=() +for i in $(seq 1 10); do names+=("e$i"); done +names+=("$NAME250") +zap_assert_entries "$TDIR" "${names[@]}" + +log_pass "MicroZAP -> FatZAP upgrade passed" diff --git a/tests/zfs-tests/tests/functional/zap/zap_micro_tiny_promote_001.ksh b/tests/zfs-tests/tests/functional/zap/zap_micro_tiny_promote_001.ksh new file mode 100755 index 000000000000..da8d425c65b3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/zap/zap_micro_tiny_promote_001.ksh @@ -0,0 +1,104 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/zap/zap_common.kshlib + +# +# DESCRIPTION: +# Create a directory and add entries to it's max limit and +# check if it stays microzap and then add additional entries +# and check if it's promoted to tinyzap. +# +# STRATEGY: +# 1. Create a directory and add 15 entries with short names (8 chars), +# filling the MicroZAP to its theoretical maximum of 15 slots. +# 2. Sync and assert ZAP type = microzap. +# 3. Assert all 15 entries are accessible via stat(1). +# 4. Add a 16th entry with a 51-char name (> MZAP_NAME_LEN=49). +# 5. Sync and assert ZAP type = tinyzap. +# 6. Assert all 16 entries (15 short + 1 long) are accessible. +# + +verify_runnable "global" +DIR=zap_dir +TDIR="$TESTDIR/$DIR" + +function cleanup +{ + rm -rf "$TDIR" +} +log_onexit cleanup + +log_assert "MicroZAP: stays microzap at max $MZAP_MAX_ENTRIES entries;" \ + "51-char key triggers upgrade to tinyzap" + +typeset -r MZAP_OBJ_SIZE=1024 +typeset -r MZAP_ENT_LEN=64 +typeset -r MZAP_MAX_ENTRIES=$(( (MZAP_OBJ_SIZE - MZAP_ENT_LEN) / MZAP_ENT_LEN )) + +log_must mkdir $TDIR + +# Step 1: fill microzap to its theoretical maximum (15 entries). +typeset -i i=0 +log_note "Step 1: adding $MZAP_MAX_ENTRIES short entries to fill MicroZAP" +for i in $(seq 1 $MZAP_MAX_ENTRIES); do + log_must touch "$TDIR/entry$(printf "%04d" $i)" +done +sync_pool $TESTPOOL + +# Step 2: assert ZAP type = microzap. +log_note "Step 2: asserting ZAP type is microzap" +zap_assert_type "$TDIR" "microzap" + +# Step 3: assert all 15 entries are accessible via stat(1). +log_note "Step 3: stat all $MZAP_MAX_ENTRIES short entries" +typeset -a names=() +for i in $(seq 1 $MZAP_MAX_ENTRIES); do + names+=("entry$(printf "%04d" $i)") +done +zap_assert_entries "$TDIR" "${names[@]}" + +# Step 4: add a 16th entry with a 51-char name (> MZAP_NAME_LEN=49). +typeset -r TRIGGER_NAME=$(awk 'BEGIN { s = ""; for (i=0;i<51;i++) s = s "m"; print s }') +log_note "Step 4: adding 16th entry with 51-char name to trigger upgrade" +log_must touch "$TDIR/$TRIGGER_NAME" +sync_pool $TESTPOOL + +# Step 5: directory must now be TinyZAP. +log_note "Step 5: asserting ZAP type is tinyzap after adding long entry" +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +# Step 6: all 16 entries must survive the re-encoding. +log_note "Step 6: stat all entries after upgrade to tinyzap" +names+=("$TRIGGER_NAME") +zap_assert_entries "$TDIR" "${names[@]}" + +zap_dump "$TDIR" + +log_pass "MicroZAP correctly promoted to TinyZAP remain accessible" diff --git a/tests/zfs-tests/tests/functional/zap/zap_tiny_add_remove_005.ksh b/tests/zfs-tests/tests/functional/zap/zap_tiny_add_remove_005.ksh new file mode 100755 index 000000000000..1bb89f2e67ad --- /dev/null +++ b/tests/zfs-tests/tests/functional/zap/zap_tiny_add_remove_005.ksh @@ -0,0 +1,91 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/zap/zap_common.kshlib + +# +# DESCRIPTION: +# TinyZAP add / remove / re-add sanity. +# Tests the memset(TZE_PHYS, 0, chunk_size) zero-on-delete path. +# Re-adding must succeed and the entry must be accessible. +# +# STRATEGY: +# 1. mkdir + touch 51-char name -> tinyzap +# 2. touch 5 short entries +# 3. rm one short entry -> assert gone, count = 5 +# 4. re-touch removed entry -> assert present, count = 6 +# 5. assert tinyzap throughout +# 6. assert all 6 entries accessible +# + +verify_runnable "global" +TDIR="$TESTDIR/zap-tiny-add-remove" + +function cleanup { rm -rf "$TDIR"; } +log_onexit cleanup + +log_assert "TinyZAP: add / remove / re-add preserves format and entry count" + +log_must mkdir "$TDIR" + +typeset -r NAME51=$(awk 'BEGIN { s=""; for(i=0;i<51;i++) s=s"r"; print s }') + +# Step 1 +log_must touch "$TDIR/$NAME51" +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +# Step 2 +typeset i +for i in $(seq 1 5); do + log_must touch "$TDIR/r$i" +done + +# Step 3 +log_must rm "$TDIR/r3" +log_mustnot stat "$TDIR/r3" +typeset cnt=$(ls "$TDIR" | wc -l) +[[ $cnt -eq 5 ]] || log_fail "expected 5 entries after rm, got $cnt" + +# Step 4 +log_must touch "$TDIR/r3" +log_must stat "$TDIR/r3" +cnt=$(ls "$TDIR" | wc -l) +[[ $cnt -eq 6 ]] || log_fail "expected 6 entries after re-add, got $cnt" + +# Step 5 +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +# Step 6 +typeset names=("$NAME51") +for i in $(seq 1 5); do names+=("r$i"); done +zap_assert_entries "$TDIR" "${names[@]}" + +log_pass "TinyZAP add/remove/re-add passed" + diff --git a/tests/zfs-tests/tests/functional/zap/zap_tiny_chunk_upgrade_003.ksh b/tests/zfs-tests/tests/functional/zap/zap_tiny_chunk_upgrade_003.ksh new file mode 100755 index 000000000000..fafcdc5eecb7 --- /dev/null +++ b/tests/zfs-tests/tests/functional/zap/zap_tiny_chunk_upgrade_003.ksh @@ -0,0 +1,86 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/zap/zap_common.kshlib + +# +# DESCRIPTION: +# A 51-char name promotes MicroZAP to TinyZAP with chunk=128. +# A subsequent 120-char name (> TZAP_NAME_LEN(128,8)=116) triggers +# chunk upgrade to re-pack in-place with chunk=256. +# Directory stays tinyzap; all entries survive. +# +# STRATEGY: +# 1. mkdir + touch 51-char name -> microzap promotes to tinyzap chunk=128 +# 2. touch 5 short names -> assert tinyzap +# 3. touch 120-char name -> -> chunk=256 +# 4. assert tinyzap +# 5. assert all 7 entries accessible +# + +verify_runnable "global" +TDIR="$TESTDIR/zap-tiny-chunk-upgrade" + +function cleanup { rm -rf "$TDIR"; } +log_onexit cleanup + +log_assert "TinyZAP chunk 128->256: tzap_try_chunk_upgrade preserves all entries" + +log_must mkdir "$TDIR" + +typeset -r NAME51=$(awk 'BEGIN { s=""; for(i=0;i<51;i++) s=s"a"; print s }') +typeset -r NAME120=$(awk 'BEGIN { s=""; for(i=0;i<120;i++) s=s"b"; print s }') + +# Step 1: promote MicroZAP -> TinyZAP chunk=128 +log_must touch "$TDIR/$NAME51" +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +# Step 2: more short entries +typeset i +for i in $(seq 1 5); do + log_must touch "$TDIR/s$i" +done +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +# Step 3: 120-char key exceeds TZAP_NAME_LEN(128,8)=116 +# tzap_try_chunk_upgrade re-packs with chunk=256 +log_must touch "$TDIR/$NAME120" + +# Step 4 +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "256" + +# Step 5 +typeset names=("$NAME51" "$NAME120") +for i in $(seq 1 5); do names+=("s$i"); done +zap_assert_entries "$TDIR" "${names[@]}" + +log_pass "TinyZAP chunk upgrade 128->256 passed" + diff --git a/tests/zfs-tests/tests/functional/zap/zap_tiny_fat_004.ksh b/tests/zfs-tests/tests/functional/zap/zap_tiny_fat_004.ksh new file mode 100755 index 000000000000..884db3fcb914 --- /dev/null +++ b/tests/zfs-tests/tests/functional/zap/zap_tiny_fat_004.ksh @@ -0,0 +1,109 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/zap/zap_common.kshlib + +# +# DESCRIPTION: +# Fill a TinyZAP to its exact slot capacity (num_chunks), then add a +# 250-char key which exceeds TZAP_NAME_LEN(256,8)=244, triggering +# mzap_upgrade to convert the TinyZAP to a FatZAP. +# +# All pre-existing TinyZAP entries must survive mzap_upgrade(). +# +# STRATEGY: +# 1. mkdir + touch 51-char name -> tinyzap chunk=128 +# 2. read nchunks live from zdb +# 3. fill remaining (nchunks - 1) slots with short names +# 4. assert tinyzap is exactly full (num_entries == nchunks) +# 5. touch 250-char name (> TZAP_NAME_LEN(256,8)=244) +# -> tzap_try_chunk_upgrade B_FALSE -> mzap_upgrade -> fatzap +# 6. assert fatzap +# 7. assert all entries accessible +# + +verify_runnable "global" +TDIR="$TESTDIR/zap-tiny-to-fat" + +function cleanup { rm -rf "$TDIR"; } +log_onexit cleanup + +log_assert "TinyZAP: fill to num_chunks capacity, exhaust tinyzap, " \ + "add 250-char key trigger upgrade to fatzap; all entries survive" + +log_must mkdir "$TDIR" + +typeset -r NAME51=$(awk 'BEGIN { s=""; for(i=0;i<51;i++) s=s"t"; print s }') +typeset -r NAME250=$(awk 'BEGIN { s=""; for(i=0;i<250;i++) s=s"f"; print s }') + +# Step 1: promote MicroZAP -> TinyZAP chunk=128 +log_must touch "$TDIR/$NAME51" +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +# Step 2: read live nchunks from zdb +typeset obj nchunks +obj=$(zap_get_obj "$TDIR") +[[ -z "$obj" ]] && log_fail "could not resolve DMU object for $TDIR" + +nchunks=$(zdb -ddddddd "$TESTPOOL/$TESTFS" "$obj" 2>/dev/null | \ + awk '/tinyzap:/ { line = $0; sub(/.*num_chunks=/, "", line); \ + sub(/[^0-9].*/, "", line); print line; exit }') +[[ -z "$nchunks" ]] && log_fail "could not read num_chunks from zdb" +log_note "nchunks=$nchunks after initial promote" + +# Step 3: fill remaining (nchunks - 1) slots; NAME51 occupies slot 0 +typeset i fill=$(( nchunks - 1 )) +log_note "filling $fill more short entries to reach capacity" +for i in $(seq 1 $fill); do + log_must touch "$TDIR/s$i" +done + +# Step 4: verify TinyZAP is exactly full +typeset cnt=$(ls "$TDIR" | wc -l) +log_note "entries at capacity: $cnt (nchunks=$nchunks)" +[[ $cnt -eq $nchunks ]] || \ + log_fail "expected $nchunks entries at capacity, got $cnt" +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +# Step 5: 250-char key exceeds TZAP_NAME_LEN(256,8)=244 +# tzap_try_chunk_upgrade returns B_FALSE -> mzap_upgrade -> fatzap +log_note "adding 250-char key: upgrade to fatzap" +log_must touch "$TDIR/$NAME250" + +# Step 6 +zap_assert_type "$TDIR" "fatzap" + +# Step 7 +typeset names=("$NAME51" "$NAME250") +for i in $(seq 1 $fill); do names+=("s$i"); done +zap_assert_entries "$TDIR" "${names[@]}" + +log_pass "TinyZAP fill-to-capacity -> FatZAP passed" + diff --git a/tests/zfs-tests/tests/functional/zap/zap_tiny_feature_008.ksh b/tests/zfs-tests/tests/functional/zap/zap_tiny_feature_008.ksh new file mode 100755 index 000000000000..a784681045f5 --- /dev/null +++ b/tests/zfs-tests/tests/functional/zap/zap_tiny_feature_008.ksh @@ -0,0 +1,87 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/zap/zap_common.kshlib + +# +# DESCRIPTION: +# Verify the com.hpe:tinyzap pool feature transitions correctly: +# enabled -> before any TinyZAP directory exists +# active -> after the first TinyZAP directory is created +# +# STRATEGY: +# 1. assert feature is 'enabled' (setup created pool with feature on) +# 2. mkdir + touch 51-char name -> tinyzap +# 3. assert feature is 'active' +# 4. assert tinyzap +# 5. Empty the directory and sync to ensure the feature remains active. +# + +verify_runnable "global" +TDIR="$TESTDIR/zap-tiny-feature" +FEATURE="com.hpe:tinyzap" + +function cleanup { rm -rf "$TDIR"; } +log_onexit cleanup + +log_assert "$FEATURE feature: enabled before, active after first TinyZAP" + +# Step 1 +typeset state +state=$(zpool get -H -o value "feature@tinyzap" $TESTPOOL) +if [[ "$state" != "enabled" && "$state" != "active" ]]; then + log_fail "tinyzap feature not enabled on pool (got: $state)" +fi +log_note "feature state before: $state" + +# Step 2 +log_must mkdir "$TDIR" +typeset -r NAME51=$(awk 'BEGIN { s=""; for(i=0;i<51;i++) s=s"F"; print s }') +log_must touch "$TDIR/$NAME51" +sync_pool $TESTPOOL + +# Step 3 +state=$(zpool get -H -o value "feature@tinyzap" $TESTPOOL) +[[ "$state" == "active" ]] || \ + log_fail "tinyzap feature expected 'active' after first TinyZAP, got '$state'" +log_note "feature state after: $state [OK]" + +# Step 4 +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +# Step 5 +log_must rm -f "$TDIR/$NAME51" +sync_pool $TESTPOOL +state=$(zpool get -H -o value "feature@tinyzap" $TESTPOOL) +[[ "$state" == "active" ]] || \ + log_fail "tinyzap feature expected to remain 'active' after " \ + "removing entry, got '$state'" +log_note "feature state after removing entry: $state [OK]" +log_pass "$FEATURE zap feature flag transition passed" + diff --git a/tests/zfs-tests/tests/functional/zap/zap_tiny_remount_007.ksh b/tests/zfs-tests/tests/functional/zap/zap_tiny_remount_007.ksh new file mode 100755 index 000000000000..75abdb20a9d8 --- /dev/null +++ b/tests/zfs-tests/tests/functional/zap/zap_tiny_remount_007.ksh @@ -0,0 +1,125 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/zap/zap_common.kshlib + +# +# DESCRIPTION: +# TinyZAP on-disk format persists across pool export + import for +# both chunk sizes: +# +# Part A (chunk=128): +# Test if mzap_open re-reads mz_flags and rebuilds the +# in-memory zap_t correctly. +# +# Part B (chunk=256): +# Same as Part A but after the chunk is upgraded with chunk_log2=8. +# Verifies the upgraded mz_flags survive on disk. +# +# STRATEGY: +# Part A - chunk=128 +# 1. mkdir + touch 51-char name + 5 short -> tinyzap chunk=128 +# 2. zpool export / import +# 3. assert tinyzap, assert all 6 entries accessible +# +# Part B - chunk=256 +# 4. touch 120-char name (> TZAP_NAME_LEN(128,8)=116) +# -> chunk upgrade to 256 +# 5. touch 5 more short entries -> assert tinyzap +# 6. zpool export / import +# 7. assert tinyzap, assert all 12 entries accessible +# + +verify_runnable "global" +TDIR="$TESTDIR/zap-tiny-remount" + +function cleanup { + poolexists $TESTPOOL || zpool import $TESTPOOL 2>/dev/null + rm -rf "$TDIR" +} +log_onexit cleanup + +log_assert "TinyZAP chunk=128 and chunk=256 both persist across pool export/import" + +log_must mkdir "$TDIR" + +typeset -r NAME51=$(awk 'BEGIN { s=""; for(i=0;i<51;i++) s=s"p"; print s }') +typeset -r NAME120=$(awk 'BEGIN { s=""; for(i=0;i<120;i++) s=s"P"; print s }') + +# ---------------------------------------------------------------- +# Part A: chunk=128 persists across export/import +# ---------------------------------------------------------------- +log_note "Part A: TinyZAP chunk=128 export/import" + +log_must touch "$TDIR/$NAME51" +typeset i +for i in $(seq 1 5); do + log_must touch "$TDIR/p$i" +done +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL + +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +typeset names=("$NAME51") +for i in $(seq 1 5); do names+=("p$i"); done +zap_assert_entries "$TDIR" "${names[@]}" +log_note "Part A passed: chunk=128 survived export/import" + +# ---------------------------------------------------------------- +# Part B: chunk=256 (after upgrade) persists across export/import +# ---------------------------------------------------------------- +log_note "Part B: TinyZAP chunk=256 export/import" + +# 120-char key exceeds TZAP_NAME_LEN(128,8)=116 -> chunk upgrade to 256 +log_must touch "$TDIR/$NAME120" +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "256" + +for i in $(seq 6 10); do + log_must touch "$TDIR/p$i" +done +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "256" + +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL + +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "256" + +names+=("$NAME120") +for i in $(seq 6 10); do names+=("p$i"); done +zap_assert_entries "$TDIR" "${names[@]}" +log_note "Part B passed: chunk=256 survived export/import" + +log_pass "TinyZAP chunk=128 and chunk=256 remount passed" diff --git a/tests/zfs-tests/tests/functional/zap/zap_tiny_update_006.ksh b/tests/zfs-tests/tests/functional/zap/zap_tiny_update_006.ksh new file mode 100755 index 000000000000..f2662b1b790e --- /dev/null +++ b/tests/zfs-tests/tests/functional/zap/zap_tiny_update_006.ksh @@ -0,0 +1,82 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Hewlett Packard Enterprise Development LP. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/zap/zap_common.kshlib + +# +# DESCRIPTION: +# TinyZAP value update (overwrite) via zap_update(). +# +# STRATEGY: +# 1. mkdir + touch 51-char name -> tinyzap +# 2. touch fileA +# 3. mv fileB over fileA -> zap_update replaces dirent value +# 4. assert fileA accessible (points to new inode) +# 5. assert fileB gone +# 6. assert tinyzap +# + +verify_runnable "global" +TDIR="$TESTDIR/zap-tiny-update" + +function cleanup { rm -rf "$TDIR"; } +log_onexit cleanup + +log_assert "TinyZAP: rename overwrites dirent value (zap_update path)" + +log_must mkdir "$TDIR" + +typeset -r NAME51=$(awk 'BEGIN { s=""; for(i=0;i<51;i++) s=s"u"; print s }') + +# Step 1 +log_must touch "$TDIR/$NAME51" +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +# Step 2: create two files +log_must touch "$TDIR/fileA" +log_must touch "$TDIR/fileB" + +# Step 3: rename fileB -> fileA (zap_update on fileA dirent) +log_must mv "$TDIR/fileB" "$TDIR/fileA" + +# Step 4: fileA must still be accessible +log_must stat "$TDIR/fileA" + +# Step 5: fileB must be gone +log_mustnot stat "$TDIR/fileB" + +# Step 6 +zap_assert_type "$TDIR" "tinyzap" +zap_assert_chunk "$TDIR" "128" + +typeset names=("$NAME51" "fileA") +zap_assert_entries "$TDIR" "${names[@]}" + +log_pass "TinyZAP value update (rename overwrite) passed" +