From 459cb7f3560678063cd7d0db13f737c9bb506a35 Mon Sep 17 00:00:00 2001 From: Christos Longros Date: Wed, 27 May 2026 18:13:40 +0200 Subject: [PATCH] metaslab: expose zfs_metaslab_condense_pct and zfs_metaslab_sm_blksz_* on Linux expose tunables on linux Signed-off-by: Christos Longros --- include/sys/metaslab_impl.h | 2 +- man/man4/zfs.4 | 26 ++++++++++++++++++++ module/os/freebsd/zfs/sysctl_os.c | 40 ------------------------------- module/zfs/metaslab.c | 22 +++++++++++++---- 4 files changed, 44 insertions(+), 46 deletions(-) diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h index faeb96fe965e..44a4d4ddf753 100644 --- a/include/sys/metaslab_impl.h +++ b/include/sys/metaslab_impl.h @@ -330,7 +330,7 @@ struct metaslab_group { * * As the space map grows (as a result of the appends) it will * eventually become space-inefficient. When the metaslab's in-core - * free tree is zfs_condense_pct/100 times the size of the minimal + * free tree is zfs_metaslab_condense_pct/100 times the size of the minimal * on-disk representation, we rewrite it in its minimized form. If a * metaslab needs to condense then we must set the ms_condensing flag to * ensure that allocations are not performed on the metaslab that is diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 657070de02a2..09195b03e1a1 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -446,6 +446,32 @@ This improves performance, especially when there are many metaslabs per vdev and the allocation can't actually be satisfied (so we would otherwise iterate all metaslabs). . +.It Sy zfs_metaslab_sm_blksz_no_log Ns = Ns Sy 16384 Ns B Po 16 KiB Pc Pq int +Block size for the metaslab space maps in pools where the +.Sy log_spacemap +feature is disabled. +Multiple metaslabs are modified per transaction group, so a smaller block size +lets more, scattered I/O operations be issued. +Must be a power of 2 greater than +.Sy 4096 . +This parameter can only be set at module load time. +. +.It Sy zfs_metaslab_sm_blksz_with_log Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq int +Block size for the metaslab space maps in pools where the +.Sy log_spacemap +feature is enabled. +Changes are batched in the per-pool log spacemap and flushed to each metaslab's +space map only occasionally, so a larger block size is more efficient. +Must be a power of 2 greater than +.Sy 4096 . +This parameter can only be set at module load time. +. +.It Sy zfs_metaslab_condense_pct Ns = Ns Sy 200 Ns % Pq uint +Condense an on-disk space map when its size exceeds this percentage of +the in-memory representation. +The minimum is +.Sy 100 . +. .It Sy zfs_vdev_default_ms_count Ns = Ns Sy 200 Pq uint When a vdev is added, target this number of metaslabs per top-level vdev. . diff --git a/module/os/freebsd/zfs/sysctl_os.c b/module/os/freebsd/zfs/sysctl_os.c index eb7ada12e141..a0a721aec20d 100644 --- a/module/os/freebsd/zfs/sysctl_os.c +++ b/module/os/freebsd/zfs/sysctl_os.c @@ -289,46 +289,6 @@ param_set_active_allocator(SYSCTL_HANDLER_ARGS) return (param_set_active_allocator_common(buf)); } -/* - * In pools where the log space map feature is not enabled we touch - * multiple metaslabs (and their respective space maps) with each - * transaction group. Thus, we benefit from having a small space map - * block size since it allows us to issue more I/O operations scattered - * around the disk. So a sane default for the space map block size - * is 8~16K. - */ -extern int zfs_metaslab_sm_blksz_no_log; - -SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_no_log, - CTLFLAG_RDTUN, &zfs_metaslab_sm_blksz_no_log, 0, - "Block size for space map in pools with log space map disabled. " - "Power of 2 greater than 4096."); - -/* - * When the log space map feature is enabled, we accumulate a lot of - * changes per metaslab that are flushed once in a while so we benefit - * from a bigger block size like 128K for the metaslab space maps. - */ -extern int zfs_metaslab_sm_blksz_with_log; - -SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log, - CTLFLAG_RDTUN, &zfs_metaslab_sm_blksz_with_log, 0, - "Block size for space map in pools with log space map enabled. " - "Power of 2 greater than 4096."); - -/* - * The in-core space map representation is more compact than its on-disk form. - * The zfs_condense_pct determines how much more compact the in-core - * space map representation must be before we compact it on-disk. - * Values should be greater than or equal to 100. - */ -extern uint_t zfs_condense_pct; - -SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct, - CTLFLAG_RWTUN, &zfs_condense_pct, 0, - "Condense on-disk spacemap when it is more than this many percents" - " of in-memory counterpart"); - /* * Minimum size which forces the dynamic allocator to change * it's allocation strategy. Once the space map cannot satisfy diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 6ea3ecd74fc3..8b5df12f72ac 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -82,11 +82,11 @@ int zfs_metaslab_sm_blksz_with_log = (1 << 17); /* * The in-core space map representation is more compact than its on-disk form. - * The zfs_condense_pct determines how much more compact the in-core + * The zfs_metaslab_condense_pct determines how much more compact the in-core * space map representation must be before we compact it on-disk. * Values should be greater than or equal to 100. */ -uint_t zfs_condense_pct = 200; +uint_t zfs_metaslab_condense_pct = 200; /* * Condensing a metaslab is not guaranteed to actually reduce the amount of @@ -3826,8 +3826,8 @@ metaslab_group_preload(metaslab_group_t *mg) * increase as a result of writing out the free space range tree. * * 2. Condense if the on on-disk space map representation is at least - * zfs_condense_pct/100 times the size of the optimal representation - * (i.e. zfs_condense_pct = 110 and in-core = 1MB, optimal = 1.1MB). + * zfs_metaslab_condense_pct/100 times the size of the optimal representation + * (i.e. zfs_metaslab_condense_pct = 110 and in-core = 1MB, optimal = 1.1MB). * * 3. Do not condense if the on-disk size of the space map does not actually * decrease. @@ -3863,7 +3863,7 @@ metaslab_should_condense(metaslab_t *msp) uint64_t optimal_size = space_map_estimate_optimal_size(sm, msp->ms_allocatable, SM_NO_VDEVID); - return (object_size >= (optimal_size * zfs_condense_pct / 100) && + return (object_size >= (optimal_size * zfs_metaslab_condense_pct / 100) && object_size > zfs_metaslab_condense_block_threshold * record_size); } @@ -6454,6 +6454,18 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, try_hard_before_gang, INT, ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, UINT, ZMOD_RW, "Normally only consider this many of the best metaslabs in each vdev"); +ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, sm_blksz_no_log, INT, ZMOD_RD, + "Block size for space map in pools with log space map disabled. " + "Power of 2 greater than 4096."); + +ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, sm_blksz_with_log, INT, ZMOD_RD, + "Block size for space map in pools with log space map enabled. " + "Power of 2 greater than 4096."); + ZFS_MODULE_PARAM_CALL(zfs, zfs_, active_allocator, param_set_active_allocator, param_get_charp, ZMOD_RW, "SPA active allocator"); + +ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, condense_pct, UINT, ZMOD_RW, + "Condense on-disk spacemap when it is more than this many percents " + "of in-memory counterpart");