From 6f6dfa32ef51a700cfda12c6ad6b143dd15f9bce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jerzy=20Ko=C5=82osowski?= Date: Sun, 24 May 2026 15:55:02 +0200 Subject: [PATCH] dsl_scan: detect 'unencrypted block in encrypted objset' during scrub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a BP-level check in dsl_scan_visitbp(): when a dataset is encrypted yet a block pointer lacks BP_USES_CRYPT, count it as a scan error, push the offending bookmark into the SPA error log (so it surfaces in 'zpool status -v' as a permanent error against the affected file), and emit a zfs_dbgmsg with the exact bookmark coordinates for offline correlation with zdb output. The check covers BOTH leaf blocks (level 0, file data) AND indirect blocks (level > 0). Both must have BP_USES_CRYPT set on an encrypted dataset — leaf BPs store the data MAC in blk_cksum, while indirect BPs store a MAC-of-MAC computed over their children's MACs (see zio_crypt_do_indirect_mac_checksum_abd in zio_crypt.c). Both forms are corrupted equally when the flag is lost during write. Embedded BPs are explicitly skipped — they inline data and never carry the CRYPT flag. This gives users an actionable per-file list for the bug class discussed in #14330, #15275, #16065, #14709, #18186 — historically the only visible symptoms were recurring kernel panics in zfs_mknode and intermittent EIOs from dbuf_read. PR #15677 converted that read-path panic to a logged error; this commit adds the symmetric visibility during scrub so users do not have to wait for a panic to identify affected files. Detection only — no repair attempted. Users who hit it should delete the affected file and restore from backup, or recreate the pool without ZFS native encryption (LUKS underneath still provides data-at-rest). Refs: #14330 #15275 #16065 #14709 #18186 ZFS-CI-Type: full Signed-off-by: Jerzy Kołosowski --- module/zfs/dsl_scan.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 6f5dfac7b9d7..5ec009d2fa22 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -2380,6 +2380,43 @@ dsl_scan_visitbp(const blkptr_t *bp, const zbookmark_phys_t *zb, return; } + /* + * Detect "unencrypted block in encrypted object set" corruption. + * On an encrypted dataset every BP — leaf data block or indirect — + * must have BP_USES_CRYPT set. The read path in dbuf.c converts + * a missing flag to EIO, and the create path historically panicked + * in zfs_mknode. + * + * Log each occurrence so it appears in 'zpool status -v', and + * count it for the scan's permanent-error tally. Both leaf and + * indirect levels are checked: leaf BPs store data MAC in blk_cksum + * directly, while indirect BPs store a MAC-of-MAC computed over + * their children's MACs (see zio_crypt_do_indirect_mac_checksum_abd + * in zio_crypt.c). Both forms are corrupted equally when the flag + * is lost during write, so neither read nor scrub can verify them. + * + * Embedded BPs (BP_IS_EMBEDDED) inline their data into the BP + * itself and never carry the encryption flag — skip them. + * + * Refs: #14330 #15275 #16065 #14709 #18186 + */ + if (ds != NULL && !BP_IS_EMBEDDED(bp) && !BP_USES_CRYPT(bp)) { + objset_t *bp_os; + if (dmu_objset_from_ds(ds, &bp_os) == 0 && + bp_os->os_encrypted) { + scn->scn_phys.scn_errors++; + spa_log_error(dp->dp_spa, zb, + BP_GET_PHYSICAL_BIRTH(bp)); + zfs_dbgmsg("scrub: encrypted objset %llu has BP " + "without BP_USES_CRYPT (object %llu, " + "level %u, blkid %llu)", + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + (uint_t)zb->zb_level, + (u_longlong_t)zb->zb_blkid); + } + } + /* * Check if this block contradicts any filesystem flags. */