diff --git a/include/sys/zap.h b/include/sys/zap.h index 7e89ad7d3de5..ad20d427ad9c 100644 --- a/include/sys/zap.h +++ b/include/sys/zap.h @@ -443,16 +443,20 @@ void zap_attribute_free(zap_attribute_t *attrp); struct zap; struct zap_leaf; + typedef struct zap_cursor { /* This structure is opaque! */ - objset_t *zc_objset; struct zap *zc_zap; struct zap_leaf *zc_leaf; - uint64_t zc_zapobj; - uint64_t zc_serialized; uint64_t zc_hash; uint32_t zc_cd; boolean_t zc_prefetch; + /* + * Legacy fields to main source compat with Lustre, which accesses + * them directly. Not to be used in new code! + */ + objset_t *zc_objset; + uint64_t zc_zapobj; } zap_cursor_t; /* @@ -460,14 +464,15 @@ typedef struct zap_cursor { * The entire zapobj will be prefetched. You must call zap_cursor_fini the * cursor when you are done with it. */ -void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj); +int zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj); +int zap_cursor_init_by_dnode(zap_cursor_t *zc, dnode_t *dn); void zap_cursor_fini(zap_cursor_t *zc); /* * Initialize a cursor at the beginning, but request that we not prefetch * the entire ZAP object. */ -void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, +int zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, uint64_t zapobj); /* @@ -477,8 +482,10 @@ void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, * zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to * zap_cursor_init(...).) */ -void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, +int zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, uint64_t serialized); +int zap_cursor_init_serialized_by_dnode(zap_cursor_t *zc, dnode_t *dn, + uint64_t serialized); /* * Get the attribute currently pointed to by the cursor. Returns diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 6f5dfac7b9d7..03e13ca96cc4 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -1280,6 +1280,7 @@ dsl_errorscrub_pause_resume_sync(void *arg, dmu_tx_t *tx) spa->spa_scan_pass_errorscrub_pause = gethrestime_sec(); scn->errorscrub_phys.dep_paused_flags = B_TRUE; dsl_errorscrub_sync_state(scn, tx); + zap_cursor_fini(&scn->errorscrub_cursor); spa_event_notify(spa, NULL, NULL, ESC_ZFS_ERRORSCRUB_PAUSED); } else { ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL); diff --git a/module/zfs/zap.c b/module/zfs/zap.c index caed9c677942..ee94917d8e84 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -1072,53 +1072,100 @@ zap_lookup_int_key(objset_t *os, uint64_t obj, uint64_t key, uint64_t *valuep) /* zap_cursor */ -static void -zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, +static int +zap_cursor_init_by_dnode_impl(zap_cursor_t *zc, dnode_t *dn, uint64_t serialized, boolean_t prefetch) { - zc->zc_objset = os; zc->zc_zap = NULL; zc->zc_leaf = NULL; - zc->zc_zapobj = zapobj; - zc->zc_serialized = serialized; - zc->zc_hash = 0; - zc->zc_cd = 0; + + int err = zap_lock_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, + zc, &zc->zc_zap); + if (err != 0) + return (err); + zc->zc_prefetch = prefetch; + zc->zc_objset = dn->dn_objset; + zc->zc_zapobj = dn->dn_object; + + int hb = zap_hashbits(zc->zc_zap); + zc->zc_hash = serialized << (64 - hb); + zc->zc_cd = serialized >> hb; + if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ + zc->zc_cd = 0; + + /* + * Drop ZAP read lock, but keep the hold, so the holds on the + * underlying dnode and header dbuf are maintained. + */ + rw_exit(&zc->zc_zap->zap_rwlock); + + return (0); } -void +static int +zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, + uint64_t serialized, uint32_t prefetch) +{ + dnode_t *dn = NULL; + int err = dnode_hold(os, zapobj, FTAG, &dn); + if (err != 0) { + zc->zc_zap = NULL; + zc->zc_leaf = NULL; + return (err); + } + + err = zap_cursor_init_by_dnode_impl(zc, dn, serialized, prefetch); + + dnode_rele(dn, FTAG); + + return (err); +} + +int zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) { - zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE); + return (zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE)); } -void +int +zap_cursor_init_by_dnode(zap_cursor_t *zc, dnode_t *dn) +{ + return (zap_cursor_init_by_dnode_impl(zc, dn, 0, B_TRUE)); +} + +int zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) { - zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE); + return (zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE)); } -void +int zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, uint64_t serialized) { - zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE); + return (zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE)); +} + +int +zap_cursor_init_serialized_by_dnode(zap_cursor_t *zc, dnode_t *dn, + uint64_t serialized) +{ + return (zap_cursor_init_by_dnode_impl(zc, dn, serialized, B_TRUE)); } void zap_cursor_fini(zap_cursor_t *zc) { - if (zc->zc_zap) { - rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); - zap_unlock(zc->zc_zap, NULL); - zc->zc_zap = NULL; - } if (zc->zc_leaf) { rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); zap_put_leaf(zc->zc_leaf); - zc->zc_leaf = NULL; } - zc->zc_objset = NULL; + if (zc->zc_zap) { + rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); + zap_unlock(zc->zc_zap, zc); + } + memset(zc, 0, sizeof (zap_cursor_t)); } int @@ -1126,30 +1173,15 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) { int err; + if (zc->zc_zap == NULL) + /* zap_cursor_init failed, cursor is invalid */ + return (SET_ERROR(EIO)); + if (zc->zc_hash == -1ULL) return (SET_ERROR(ENOENT)); - if (zc->zc_zap == NULL) { - int hb; - err = zap_lock(zc->zc_objset, zc->zc_zapobj, NULL, - RW_READER, TRUE, FALSE, NULL, &zc->zc_zap); - if (err != 0) - return (err); - - /* - * To support zap_cursor_init_serialized, advance, retrieve, - * we must add to the existing zc_cd, which may already - * be 1 due to the zap_cursor_advance. - */ - ASSERT0(zc->zc_hash); - hb = zap_hashbits(zc->zc_zap); - zc->zc_hash = zc->zc_serialized << (64 - hb); - zc->zc_cd += zc->zc_serialized >> hb; - if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ - zc->zc_cd = 0; - } else { - rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); - } + rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); + if (!zc->zc_zap->zap_ismicro) { err = fzap_cursor_retrieve(zc->zc_zap, zc, za); } else { @@ -1184,6 +1216,7 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) err = SET_ERROR(ENOENT); } } + rw_exit(&zc->zc_zap->zap_rwlock); return (err); } @@ -1199,10 +1232,9 @@ zap_cursor_advance(zap_cursor_t *zc) uint64_t zap_cursor_serialize(zap_cursor_t *zc) { - if (zc->zc_hash == -1ULL) + if (zc->zc_zap == NULL || zc->zc_hash == -1ULL) return (-1ULL); - if (zc->zc_zap == NULL) - return (zc->zc_serialized); + ASSERT0((zc->zc_hash & zap_maxcd(zc->zc_zap))); ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); diff --git a/tests/unit/mock_dmu.c b/tests/unit/mock_dmu.c index 65c38c1fd9fb..ae035498da67 100644 --- a/tests/unit/mock_dmu.c +++ b/tests/unit/mock_dmu.c @@ -28,6 +28,7 @@ #include #include "mock_dmu.h" +#include "unit.h" /* * A mock dbuf. A real dmu_buf_t (first for casting) plus the attached user @@ -48,6 +49,7 @@ typedef struct mock_dbuf mock_dbuf_t; */ struct mock_dnode { dnode_t mdn_dn; + uint64_t mdn_refcount; size_t mdn_blksize; size_t mdn_nblocks; mock_dbuf_t **mdn_blocks; @@ -110,6 +112,7 @@ mock_dnode_create(size_t blksize, dmu_object_type_t type) ASSERT(IS_P2ALIGNED(blksize, 512)); mock_dnode_t *mdn = kmem_zalloc(sizeof (mock_dnode_t), KM_SLEEP); + mdn->mdn_refcount = 1; mdn->mdn_dn.dn_type = type; mdn->mdn_dn.dn_object = 1; /* arbitrary non-zero object number */ mdn->mdn_blksize = blksize; @@ -156,6 +159,12 @@ mock_dnode_block_data(mock_dnode_t *mdn, uint64_t blkid) return (mdn->mdn_blocks[blkid]->mdb_db.db_data); } +uint64_t +mock_dnode_refcount(mock_dnode_t *mdn) +{ + return (mdn->mdn_refcount); +} + /* Mock transaction */ mock_dmu_tx_t * @@ -258,14 +267,21 @@ dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, boolean_t dnode_add_ref(dnode_t *dn, const void *tag) { - (void) dn; (void) tag; + (void) tag; + mock_dnode_t *mdn = (mock_dnode_t *)dn; + if (mdn->mdn_refcount == 0) + return (B_FALSE); + mdn->mdn_refcount++; return (B_TRUE); } void dnode_rele(dnode_t *dn, const void *tag) { - (void) dn; (void) tag; + (void) tag; + mock_dnode_t *mdn = (mock_dnode_t *)dn; + unit_gt(mdn->mdn_refcount, 0); + mdn->mdn_refcount--; } /* diff --git a/tests/unit/mock_dmu.h b/tests/unit/mock_dmu.h index a46454c779fb..2ac82c18b7a1 100644 --- a/tests/unit/mock_dmu.h +++ b/tests/unit/mock_dmu.h @@ -40,6 +40,9 @@ size_t mock_dnode_block_count(mock_dnode_t *mdn); /* Returns a pointer to the data under the given block id. */ const void *mock_dnode_block_data(mock_dnode_t *mdn, uint64_t blkid); +/* Returns the current dnode ref (hold) count. */ +uint64_t mock_dnode_refcount(mock_dnode_t *mdn); + /* Create/destroy a mock transaction handle. */ mock_dmu_tx_t *mock_tx_create(void); void mock_tx_destroy(mock_dmu_tx_t *tx); diff --git a/tests/unit/test_zap.c b/tests/unit/test_zap.c index 5d53e49b2c76..276b6127e3f6 100644 --- a/tests/unit/test_zap.c +++ b/tests/unit/test_zap.c @@ -103,7 +103,9 @@ mock_zap_is_fatzap(dnode_t *dn) static void mock_zap_destroy(dnode_t *dn) { - mock_dnode_destroy((mock_dnode_t *)dn); + mock_dnode_t *mdn = (mock_dnode_t *)dn; + unit_eq(mock_dnode_refcount(mdn), 1); + mock_dnode_destroy(mdn); } /* Create a ZAP of the type named in the given test params. */ @@ -515,6 +517,323 @@ test_fatzap_stats(const MunitParameter params[], void *data) /* ========== */ +/* Cursor tests. */ + +/* + * Basic cursor test. Add a bunch of keys+values to a ZAP, read them back + * via cursor, confirm they're all there and nothing else is. + */ +static MunitResult +test_cursor(const MunitParameter params[], void *data) +{ + (void) data; + + dnode_t *dn = mock_zap_create_params(params, "type"); + dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create(); + + /* For each ASCII letter as key, add a unique value to the ZAP. */ + for (int i = 0; i < 26; i++) { + char c = (char)i + 'a'; + char k[2] = { c, '\0' }; + uint64_t v = (uint64_t)c * 11; + unit_ok(zap_add_by_dnode(dn, k, sizeof (uint64_t), 1, &v, tx)); + } + + /* Sanity check; confirm they're all there by count. */ + uint64_t count = 0; + unit_ok(zap_count_by_dnode(dn, &count)); + unit_eq(count, 26); + + zap_cursor_t zc; + zap_attribute_t *za = zap_attribute_alloc(); + + unit_ok(zap_cursor_init_by_dnode(&zc, dn)); + + /* + * Cursors don't guarantee an order, so we run over them them all, + * confirm the key matches the value, and then set a bit for each + * one we've seen. By the end, we should have seen them all. + */ + uint64_t seen = 0; + for (int i = 0; i < 26; i++) { + unit_ok(zap_cursor_retrieve(&zc, za)); + + /* Confirm attribute has the right details for the value. */ + unit_eq(za->za_integer_length, sizeof (uint64_t)); + unit_eq(za->za_num_integers, 1); + + /* + * And the right key in za_name. Note that we don't check + * za_name_len, which is the length of a buffer that can + * definitely hold the key, not the key length itself. + */ + char c = za->za_name[0]; + unit_true(c >= 'a' && c <= 'z'); + unit_zero(za->za_name[1]); + + /* Check the value in the attribute. */ + uint64_t v = (uint64_t)c * 11; + unit_eq(za->za_first_integer, v); + + /* + * Also do a direct lookup and confirm the value matches + * the value from the attribute. + */ + char k[2] = { c, '\0' }; + uint64_t result = 0; + unit_ok(zap_lookup_by_dnode(dn, k, + sizeof (uint64_t), 1, &result)); + unit_eq(result, v); + + /* This one is good, set the bit to remember this fact. */ + seen |= 1 << (c-'a'); + + zap_cursor_advance(&zc); + } + + /* There should be no more keys in the ZAP. */ + unit_err(zap_cursor_retrieve(&zc, za), ENOENT); + + /* Bits 0-25 should be set if we've seen them all. */ + unit_eq(seen, (1 << 26) - 1); + + zap_attribute_free(za); + zap_cursor_fini(&zc); + + mock_tx_destroy((mock_dmu_tx_t *)tx); + unit_true(mock_zap_is_params(dn, params, "type")); + mock_zap_destroy(dn); + + return (MUNIT_OK); +} + +/* + * Cursor serialize test. Add a bunch of items, use the cursor to read half of + * them back, then serialize the cursor. Reload the cursor from the serialized + * state and confirm that we pick up where we left off. Then do it again to + * ensure it doesn't rely on any internal state. + */ +static MunitResult +test_cursor_serialize(const MunitParameter params[], void *data) +{ + (void) data; + + dnode_t *dn = mock_zap_create_params(params, "type"); + dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create(); + + /* For each ASCII letter as key, add a unique value to the ZAP. */ + for (int i = 0; i < 26; i++) { + char c = (char)i + 'a'; + char k[2] = { c, '\0' }; + uint64_t v = (uint64_t)c * 11; + unit_ok(zap_add_by_dnode(dn, k, sizeof (uint64_t), 1, &v, tx)); + } + + /* Sanity check; confirm they're all there by count. */ + uint64_t count = 0; + unit_ok(zap_count_by_dnode(dn, &count)); + unit_eq(count, 26); + + /* + * Like test_cursor above, we'll walk over the ZAP and set bits + * for each key we see. + */ + zap_cursor_t zc; + zap_attribute_t *za = zap_attribute_alloc(); + uint64_t seen = 0; + + unit_ok(zap_cursor_init_by_dnode(&zc, dn)); + for (int i = 0; i < 13; i++) { + unit_ok(zap_cursor_retrieve(&zc, za)); + + char c = za->za_name[0]; + unit_true(c >= 'a' && c <= 'z'); + + /* This one is good, set the bit to remember this fact. */ + seen |= 1 << (c-'a'); + + zap_cursor_advance(&zc); + } + + /* Serialise the and terminate the cursor. */ + uint64_t cookie = zap_cursor_serialize(&zc); + zap_cursor_fini(&zc); + + /* + * Record the bits we saw in the first iteration; we'll use this + * when we reload the cursor a second time below. + */ + uint64_t orig_seen = seen; + + /* Reinitialise the cursor from the cookie. */ + unit_ok(zap_cursor_init_serialized_by_dnode(&zc, dn, cookie)); + + /* Loop over the remaining entries and track them. */ + for (int i = 0; i < 13; i++) { + unit_ok(zap_cursor_retrieve(&zc, za)); + + char c = za->za_name[0]; + unit_true(c >= 'a' && c <= 'z'); + + /* This one is good, set the bit to remember this fact. */ + seen |= 1 << (c-'a'); + + zap_cursor_advance(&zc); + } + + /* There should be no more keys in the ZAP. */ + unit_err(zap_cursor_retrieve(&zc, za), ENOENT); + + /* Bits 0-25 should be set if we've seen them all. */ + unit_eq(seen, (1 << 26) - 1); + + /* Cursor done. */ + zap_cursor_fini(&zc); + + /* + * Restore the seen state to before when we reinitialised the saved + * cursor. + */ + seen = orig_seen; + + /* + * Do it all again a second time. This is making sure that the saved + * cursor is usable even after the its been "used". + */ + unit_ok(zap_cursor_init_serialized_by_dnode(&zc, dn, cookie)); + for (int i = 0; i < 13; i++) { + unit_ok(zap_cursor_retrieve(&zc, za)); + + char c = za->za_name[0]; + unit_true(c >= 'a' && c <= 'z'); + + seen |= 1 << (c-'a'); + + zap_cursor_advance(&zc); + } + + unit_err(zap_cursor_retrieve(&zc, za), ENOENT); + unit_eq(seen, (1 << 26) - 1); + + zap_attribute_free(za); + zap_cursor_fini(&zc); + + mock_tx_destroy((mock_dmu_tx_t *)tx); + unit_true(mock_zap_is_params(dn, params, "type")); + mock_zap_destroy(dn); + + return (MUNIT_OK); +} + +/* + * The following tests confirm that the cursor is properly cleaning up dnode + * holds taken (or not) across the lifetime of the cursor. The test is not + * about how or when it takes holds, only that the dnode refcount is the + * same before zap_cursor_init() as after zap_cursor_fini(). + */ +static MunitResult +test_cursor_release_unused(const MunitParameter params[], void *data) +{ + (void) data; + + dnode_t *dn = mock_zap_create_params(params, "type"); + + uint64_t refcount = mock_dnode_refcount((mock_dnode_t *)dn); + + zap_cursor_t zc; + unit_ok(zap_cursor_init_by_dnode(&zc, dn)); + zap_cursor_fini(&zc); + + unit_eq(refcount, mock_dnode_refcount((mock_dnode_t *)dn)); + + unit_true(mock_zap_is_params(dn, params, "type")); + mock_zap_destroy(dn); + + return (MUNIT_OK); +} + +static MunitResult +test_cursor_release_advance(const MunitParameter params[], void *data) +{ + (void) data; + + dnode_t *dn = mock_zap_create_params(params, "type"); + + uint64_t refcount = mock_dnode_refcount((mock_dnode_t *)dn); + + zap_cursor_t zc; + unit_ok(zap_cursor_init_by_dnode(&zc, dn)); + zap_cursor_advance(&zc); + zap_cursor_fini(&zc); + + unit_eq(refcount, mock_dnode_refcount((mock_dnode_t *)dn)); + + unit_true(mock_zap_is_params(dn, params, "type")); + mock_zap_destroy(dn); + + return (MUNIT_OK); +} + +static MunitResult +test_cursor_release_empty(const MunitParameter params[], void *data) +{ + (void) data; + + dnode_t *dn = mock_zap_create_params(params, "type"); + + uint64_t refcount = mock_dnode_refcount((mock_dnode_t *)dn); + + zap_cursor_t zc; + zap_attribute_t *za = zap_attribute_alloc(); + + unit_ok(zap_cursor_init_by_dnode(&zc, dn)); + unit_err(zap_cursor_retrieve(&zc, za), ENOENT); + + zap_attribute_free(za); + zap_cursor_fini(&zc); + + unit_eq(refcount, mock_dnode_refcount((mock_dnode_t *)dn)); + + unit_true(mock_zap_is_params(dn, params, "type")); + mock_zap_destroy(dn); + + return (MUNIT_OK); +} + +static MunitResult +test_cursor_release_one(const MunitParameter params[], void *data) +{ + (void) data; + + dnode_t *dn = mock_zap_create_params(params, "type"); + dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create(); + + uint64_t v = 1; + unit_ok(zap_add_by_dnode(dn, "a", sizeof (uint64_t), 1, &v, tx)); + unit_ok(zap_add_by_dnode(dn, "b", sizeof (uint64_t), 1, &v, tx)); + + uint64_t refcount = mock_dnode_refcount((mock_dnode_t *)dn); + + zap_cursor_t zc; + zap_attribute_t *za = zap_attribute_alloc(); + + unit_ok(zap_cursor_init_by_dnode(&zc, dn)); + unit_ok(zap_cursor_retrieve(&zc, za)); + + zap_attribute_free(za); + zap_cursor_fini(&zc); + + unit_eq(refcount, mock_dnode_refcount((mock_dnode_t *)dn)); + + mock_tx_destroy((mock_dmu_tx_t *)tx); + unit_true(mock_zap_is_params(dn, params, "type")); + mock_zap_destroy(dn); + + return (MUNIT_OK); +} + +/* ========== */ + /* Test suite definition and boilerplate. */ #define UNIT_PARAM_ZAP_TYPES(p) \ @@ -544,6 +863,18 @@ static const MunitTest zap_tests[] = { UNIT_TEST("microzap_stats", test_microzap_stats), UNIT_TEST("fatzap_stats", test_fatzap_stats), + UNIT_TEST_ZAP_TYPES("cursor", test_cursor), + UNIT_TEST_ZAP_TYPES("cursor_serialize", test_cursor_serialize), + + UNIT_TEST_ZAP_TYPES( + "cursor_release_unused", test_cursor_release_unused), + UNIT_TEST_ZAP_TYPES( + "cursor_release_advance", test_cursor_release_advance), + UNIT_TEST_ZAP_TYPES( + "cursor_release_empty", test_cursor_release_empty), + UNIT_TEST_ZAP_TYPES( + "cursor_release_one", test_cursor_release_one), + { 0 }, };