diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index a2e65de6662..1dcd70f628b 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -6356,22 +6356,15 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, dmu_object_type_t type) { int i; + boolean_t claimed = B_FALSE; + boolean_t ddt_block = B_FALSE; + boolean_t brt_block = B_FALSE; ASSERT(type < ZDB_OT_TOTAL); if (zilog && zil_bp_tree_add(zilog, bp) != 0) return; - /* - * This flag controls if we will issue a claim for the block while - * counting it, to ensure that all blocks are referenced in space maps. - * We don't issue claims if we're not doing leak tracking, because it's - * expensive if the user isn't interested. We also don't claim the - * second or later occurences of cloned or dedup'd blocks, because we - * already claimed them the first time. - */ - boolean_t do_claim = !dump_opt['L']; - spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER); blkptr_t tempbp; @@ -6402,21 +6395,30 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, ddt_entry_t *dde = ddt_lookup(ddt, bp, B_TRUE); /* - * ddt_lookup() can return NULL if this block didn't exist - * in the DDT and creating it would take the DDT over its - * quota. Since we got the block from disk, it must exist in - * the DDT, so this can't happen. However, when unique entries - * are pruned, the dedup bit can be set with no corresponding - * entry in the DDT. + * ddt_lookup() can return NULL when unique entries are pruned + * from the DDT. */ if (dde == NULL) { ddt_exit(ddt); - goto skipped; + goto ddt_done; } /* Get the phys for this variant */ ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp); + /* + * DDT_PHYS_NONE means the block has the dedup bit set but + * its DVA doesn't match any phys in the entry. This can + * happen when a DVA was evicted from the DDT and re-added + * on a hash collision. The block may still have a BRT entry. + */ + if (v == DDT_PHYS_NONE) { + ddt_exit(ddt); + goto ddt_done; + } + + ddt_block = B_TRUE; + /* * This entry may have multiple sets of DVAs. We must claim * each set the first time we see them in a real block on disk, @@ -6431,8 +6433,14 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, dde->dde_io = (void *)(((uintptr_t)dde->dde_io) | (1 << v)); - /* Consume a reference for this block. */ - if (ddt_phys_total_refcnt(ddt, dde->dde_phys) > 0) + /* + * Consume a reference. If this variant's refcount is already + * zero, the DDT tracking is exhausted — more filesystem + * references exist than the DDT accounts for. + */ + boolean_t ddt_refcnt_exhausted = + (ddt_phys_refcnt(dde->dde_phys, v) == 0); + if (!ddt_refcnt_exhausted) ddt_phys_decref(dde->dde_phys, v); /* @@ -6461,20 +6469,21 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, bp = &tempbp; } - if (seen) { + if (seen && !ddt_refcnt_exhausted) { /* * The second or later time we see this block, * it's a duplicate and we count it. */ zcb->zcb_dedup_asize += BP_GET_ASIZE(bp); zcb->zcb_dedup_blocks++; - - /* Already claimed, don't do it again. */ - do_claim = B_FALSE; + claimed = B_TRUE; } ddt_exit(ddt); - } else if (zcb->zcb_brt_is_active && + } + +ddt_done: + if (!claimed && zcb->zcb_brt_is_active && brt_maybe_exists(zcb->zcb_spa, bp)) { /* * Cloned blocks are special. We need to count them, so we can @@ -6482,10 +6491,8 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, * only claim them once. * * To do this, we keep our own in-memory BRT. For each block - * we haven't seen before, we look it up in the real BRT and - * if its there, we note it and its refcount then proceed as - * normal. If we see the block again, we count it as a clone - * and then give it no further consideration. + * we haven't seen before, we look it up in the real BRT. If + * we see the block again, we count it as a clone. */ zdb_brt_entry_t zbre_search, *zbre; avl_index_t where; @@ -6493,36 +6500,27 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, zbre_search.zbre_dva = bp->blk_dva[0]; zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where); if (zbre == NULL) { - /* Not seen before; track it */ uint64_t refcnt = brt_entry_get_refcount(zcb->zcb_spa, bp); if (refcnt > 0) { + brt_block = B_TRUE; zbre = umem_zalloc(sizeof (zdb_brt_entry_t), UMEM_NOFAIL); zbre->zbre_dva = bp->blk_dva[0]; zbre->zbre_refcount = refcnt; avl_insert(&zcb->zcb_brt, zbre, where); } - } else { - /* - * Second or later occurrence, count it and take a - * refcount. - */ - zcb->zcb_clone_asize += BP_GET_ASIZE(bp); - zcb->zcb_clone_blocks++; - - zbre->zbre_refcount--; - if (zbre->zbre_refcount == 0) { - avl_remove(&zcb->zcb_brt, zbre); - umem_free(zbre, sizeof (zdb_brt_entry_t)); + } else { + brt_block = B_TRUE; + if (zbre->zbre_refcount > 0) { + zcb->zcb_clone_asize += BP_GET_ASIZE(bp); + zcb->zcb_clone_blocks++; + zbre->zbre_refcount--; + claimed = B_TRUE; } - - /* Already claimed, don't do it again. */ - do_claim = B_FALSE; } } -skipped: for (i = 0; i < 4; i++) { int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; int t = (i & 1) ? type : ZDB_OT_TOTAL; @@ -6681,12 +6679,21 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, #undef BIN hist_skipped: - if (!do_claim) + if (claimed || dump_opt['L']) return; - VERIFY0(zio_wait(zio_claim(NULL, zcb->zcb_spa, + int claim_err = zio_wait(zio_claim(NULL, zcb->zcb_spa, spa_min_claim_txg(zcb->zcb_spa), bp, NULL, NULL, - ZIO_FLAG_CANFAIL))); + ZIO_FLAG_CANFAIL)); + if (claim_err != 0) { + char blkbuf[BP_SPRINTF_LEN]; + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); + (void) printf("block claim error %d%s%s: %s\n", + claim_err, brt_block ? " (BRT)" : "", + ddt_block ? " (DDT)" : "", blkbuf); + zcb->zcb_haderrors = 1; + zcb->zcb_errors[claim_err]++; + } } static void @@ -7462,10 +7469,66 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb) static boolean_t zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb) { - if (dump_opt['L']) - return (B_FALSE); - boolean_t leaks = B_FALSE; + + /* + * Report leaked BRT entries whose refcount was not fully consumed by + * the traversal. + */ + if (zcb->zcb_brt_is_active) { + void *cookie = NULL; + zdb_brt_entry_t *zbre; + while ((zbre = avl_destroy_nodes( + &zcb->zcb_brt, &cookie)) != NULL) { + if (!dump_opt['L'] && zbre->zbre_refcount != 0) { + (void) printf("BRT leak: vdev %llu, " + "offset 0x%llx, refcount %llu\n", + (u_longlong_t)DVA_GET_VDEV( + &zbre->zbre_dva), + (u_longlong_t)DVA_GET_OFFSET( + &zbre->zbre_dva), + (u_longlong_t)zbre->zbre_refcount); + leaks = B_TRUE; + } + umem_free(zbre, sizeof (zdb_brt_entry_t)); + } + avl_destroy(&zcb->zcb_brt); + } + + if (dump_opt['L']) + return (leaks); + + /* + * Report leaked DDT entries whose refcount was not fully consumed by + * the traversal. Entries in the DDT ZAP that were never looked up + * are not detected here. + */ + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { + ddt_t *ddt = spa->spa_ddt[c]; + if (ddt == NULL) + continue; + ddt_enter(ddt); + for (ddt_entry_t *dde = avl_first(&ddt->ddt_tree); dde != NULL; + dde = AVL_NEXT(&ddt->ddt_tree, dde)) { + for (int p = 0; p < DDT_NPHYS(ddt); p++) { + ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p); + uint64_t refcnt = ddt_phys_refcnt(dde->dde_phys, + v); + if (refcnt == 0) + continue; + blkptr_t blk; + char blkbuf[BP_SPRINTF_LEN]; + ddt_bp_create(ddt->ddt_checksum, &dde->dde_key, + dde->dde_phys, v, &blk); + snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk); + (void) printf("DDT leak: refcount %llu %s\n", + (u_longlong_t)refcnt, blkbuf); + leaks = B_TRUE; + } + } + ddt_exit(ddt); + } + vdev_t *rvd = spa->spa_root_vdev; for (unsigned c = 0; c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh index 01e9cf49dc8..cda4b0ee953 100755 --- a/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh +++ b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh @@ -51,4 +51,7 @@ log_must zfs set recordsize=$RECORDSIZE $TESTDSTFS bclone_corner_cases_test $TESTSRCDIR $TESTDSTDIR +sync_pool $TESTPOOL +log_must zdb -b $TESTPOOL + log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh index e1b583813f1..0d2c0f6e16c 100755 --- a/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh +++ b/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh @@ -50,4 +50,7 @@ for filesize in 1 107 113 511 512 513 4095 4096 4097 131071 131072 131073 \ bclone_test random $filesize false $TESTSRCDIR $TESTDSTDIR done +sync_pool $TESTPOOL +log_must zdb -b $TESTPOOL + log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh index d18a1bd2490..619fc3e4216 100755 --- a/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh +++ b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh @@ -45,4 +45,7 @@ log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS bclone_corner_cases_test $TESTSRCDIR $TESTSRCDIR +sync_pool $TESTPOOL +log_must zdb -b $TESTPOOL + log_pass diff --git a/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh index 45551e04646..f1f80a9c059 100755 --- a/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh +++ b/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh @@ -46,4 +46,7 @@ for filesize in 1 107 113 511 512 513 4095 4096 4097 131071 131072 131073 \ bclone_test random $filesize false $TESTSRCDIR $TESTSRCDIR done +sync_pool $TESTPOOL +log_must zdb -b $TESTPOOL + log_pass diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_device_removal.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_device_removal.ksh index b407d4c541d..d4b7f01e8ba 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_device_removal.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_device_removal.ksh @@ -57,5 +57,9 @@ log_must zfs create $TESTPOOL/$TESTFS log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/$TESTFS/file log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=16M count=2 log_must zfs destroy -r $TESTPOOL/$TESTFS +wait_freeing $TESTPOOL +sync_pool $TESTPOOL + +log_must zdb -b $TESTPOOL log_pass $claim diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh index 4c652923545..7c183234922 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh @@ -83,5 +83,8 @@ typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 $TESTPOOL/$TESTFS file2 # FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). log_must [ "$blocks" = "$(seq -s " " 0 1021 | sed 's/ $//')" ] +sync_pool $TESTPOOL +log_must zdb -b $TESTPOOL + log_pass "LWB buffer overflow is not triggered with multiple VDEVs ZIL" diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh index 2e854d7e543..ad24c1f06ba 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh @@ -126,4 +126,7 @@ typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \ # FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). log_must [ "$blocks" = "$(seq -s " " 0 2047 | sed 's/ $//')" ] +sync_pool $TESTPOOL +log_must zdb -b $TESTPOOL + log_pass $claim diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh index eb1464ff4d4..6b9ea354226 100755 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh @@ -128,4 +128,7 @@ typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \ # FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). log_must [ "$blocks" = "$(seq -s " " 0 2047 | sed 's/ $//')" ] +sync_pool $TESTPOOL +log_must zdb -b $TESTPOOL + log_pass $claim diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh index 1a82e5d30a1..6e67a46b040 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh @@ -104,4 +104,6 @@ log_must eval "zdb -D $TESTPOOL | grep -q 'All DDTs are empty'" # logical table now destroyed; containing object destroyed log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256 | wc -l) -eq 0 +log_must zdb -b $TESTPOOL + log_pass "basic dedup (FDT) operations work" diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh index 5f6eb7c3400..3a90d656d00 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh @@ -117,4 +117,6 @@ obj=$(zdb -dddd $TESTPOOL 1 | grep DDT-sha256 | awk '{ print $NF }') # with only one ZAP inside log_must test $(zdb -dddd $TESTPOOL $obj | grep DDT-sha256-zap- | wc -l) -eq 1 +log_must zdb -b $TESTPOOL + log_pass "dedup (FDT) retains version after import" diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_fdt_pacing.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_fdt_pacing.ksh index 8028e4f0884..1fc598c5dd2 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_fdt_pacing.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_fdt_pacing.ksh @@ -107,4 +107,6 @@ log_entries3=$(get_ddt_log_entries) # Verify there are 256 entries in the unique table. log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=256'" +log_must zdb -b $TESTPOOL + log_pass "dedup (FDT) paces out log entries appropriately" diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh index 3348614cb74..4422502452b 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh @@ -93,4 +93,6 @@ log_must eval "zdb -D $TESTPOOL | grep -q 'All DDTs are empty'" # logical table now destroyed; all DDT ZAPs removed log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256-zap- | wc -l) -eq 0 +log_must zdb -b $TESTPOOL + log_pass "basic dedup (legacy) operations work" diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh index c962efaa7c5..b51eae2ad08 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh @@ -102,4 +102,6 @@ log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-blake3 | wc -l) -eq 1 obj=$(zdb -dddd $TESTPOOL 1 | grep DDT-blake3 | awk '{ print $NF }') log_must test $(zdb -dddd $TESTPOOL $obj | grep DDT-.*-zap- | wc -l) -eq 1 +log_must zdb -b $TESTPOOL + log_pass "legacy and FDT dedup tables on the same pool can happily coexist" diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh index 94f009fc0d0..ece43036c07 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh @@ -127,4 +127,6 @@ obj=$(zdb -dddd $TESTPOOL 1 | grep DDT-sha256 | awk '{ print $NF }') # with one ZAP inside log_must test $(zdb -dddd $TESTPOOL $obj | grep DDT-sha256-zap- | wc -l) -eq 1 +log_must zdb -b $TESTPOOL + log_pass "legacy dedup tables work after upgrade; new dedup tables created as FDT" diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh index 9f6b1ef12a9..550f51cdb82 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh @@ -102,4 +102,6 @@ log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'" # should be just one DDT ZAP in the MOS log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256-zap- | wc -l) -eq 1 +log_must zdb -b $TESTPOOL + log_pass "dedup (legacy) retains version after import" diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_prune.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_prune.ksh index 6b4937cc4a2..c0a2adb30c2 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_prune.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_prune.ksh @@ -95,5 +95,6 @@ new_entries=$(ddt_entries) [[ "$((entries / 4))" -eq "$new_entries" ]] || \ log_fail "DDT entries did not shrink enough: $entries -> $new_entries" +log_must zdb -b $TESTPOOL log_pass "DDT pruning correctly removes non-duplicate entries" diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_zap_shrink.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_zap_shrink.ksh index 597bad253ec..41586204333 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_zap_shrink.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_zap_shrink.ksh @@ -83,4 +83,6 @@ log_must zpool import $TESTPOOL nleafs=$(zdb -dddd $TESTPOOL "$zap_obj" | grep "Leaf blocks:" | awk -F\: '{print($2);}') log_must test $nleafs -lt $nleafs_old +log_must zdb -b $TESTPOOL + log_pass "ZAP object shrank after removing entries."