Fix double free for blocks cloned after DDT prune
Before this change, for blocks marked with D flag but absent in DDT (pruned from it), zio_ddt_free() fell back to ZIO_STAGE_DVA_FREE without trying ZIO_STAGE_BRT_FREE first. Same time such blocks might be present in BRT, and not handling that would result in double/multiple free. This change makes ZIO_DDT_FREE_PIPELINE include ZIO_FREE_PIPELINE, just adding required ZIO_STAGE_ISSUE_ASYNC and ZIO_STAGE_DDT_FREE, and moves DDT stages before BRT. This way, if the block is found in DDT by zio_ddt_free(), the pipeline is short-circuited to ZIO_INTERLOCK_PIPELINE, similar to what zio_brt_free() does. If not, then BRT is checked, and if also no match, the block is freed. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Rob Norris <rob.norris@truenas.com> Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com> Closes #18520
This commit is contained in:
committed by
Brian Behlendorf
parent
58c8dc5f69
commit
181e1b5227
@@ -139,12 +139,12 @@ enum zio_stage {
|
||||
|
||||
ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W---- */
|
||||
|
||||
ZIO_STAGE_BRT_FREE = 1 << 9, /* --F--- */
|
||||
ZIO_STAGE_DDT_READ_START = 1 << 9, /* R----- */
|
||||
ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R----- */
|
||||
ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W---- */
|
||||
ZIO_STAGE_DDT_FREE = 1 << 12, /* --F--- */
|
||||
|
||||
ZIO_STAGE_DDT_READ_START = 1 << 10, /* R----- */
|
||||
ZIO_STAGE_DDT_READ_DONE = 1 << 11, /* R----- */
|
||||
ZIO_STAGE_DDT_WRITE = 1 << 12, /* -W---- */
|
||||
ZIO_STAGE_DDT_FREE = 1 << 13, /* --F--- */
|
||||
ZIO_STAGE_BRT_FREE = 1 << 13, /* --F--- */
|
||||
|
||||
ZIO_STAGE_GANG_ASSEMBLE = 1 << 14, /* RWFC-- */
|
||||
ZIO_STAGE_GANG_ISSUE = 1 << 15, /* RWFC-- */
|
||||
@@ -259,8 +259,7 @@ enum zio_stage {
|
||||
ZIO_STAGE_DVA_FREE)
|
||||
|
||||
#define ZIO_DDT_FREE_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_STAGE_FREE_BP_INIT | \
|
||||
(ZIO_FREE_PIPELINE | \
|
||||
ZIO_STAGE_ISSUE_ASYNC | \
|
||||
ZIO_STAGE_DDT_FREE)
|
||||
|
||||
|
||||
@@ -458,12 +458,12 @@ ZIO_STAGE_CHECKSUM_GENERATE:0x00000080:-W----
|
||||
|
||||
ZIO_STAGE_NOP_WRITE:0x00000100:-W----
|
||||
|
||||
ZIO_STAGE_BRT_FREE:0x00000200:--F---
|
||||
ZIO_STAGE_DDT_READ_START:0x00000200:R-----
|
||||
ZIO_STAGE_DDT_READ_DONE:0x00000400:R-----
|
||||
ZIO_STAGE_DDT_WRITE:0x00000800:-W----
|
||||
ZIO_STAGE_DDT_FREE:0x00001000:--F---
|
||||
|
||||
ZIO_STAGE_DDT_READ_START:0x00000400:R-----
|
||||
ZIO_STAGE_DDT_READ_DONE:0x00000800:R-----
|
||||
ZIO_STAGE_DDT_WRITE:0x00001000:-W----
|
||||
ZIO_STAGE_DDT_FREE:0x00002000:--F---
|
||||
ZIO_STAGE_BRT_FREE:0x00002000:--F---
|
||||
|
||||
ZIO_STAGE_GANG_ASSEMBLE:0x00004000:RWFC--
|
||||
ZIO_STAGE_GANG_ISSUE:0x00008000:RWFC--
|
||||
|
||||
@@ -238,11 +238,11 @@ _VALSTR_BITFIELD_IMPL(zio_stage,
|
||||
{ 'E', "EN", "ENCRYPT" },
|
||||
{ 'C', "CG", "CHECKSUM_GENERATE" },
|
||||
{ 'N', "NW", "NOP_WRITE" },
|
||||
{ 'B', "BF", "BRT_FREE" },
|
||||
{ 'd', "dS", "DDT_READ_START" },
|
||||
{ 'd', "dD", "DDT_READ_DONE" },
|
||||
{ 'd', "dW", "DDT_WRITE" },
|
||||
{ 'd', "dF", "DDT_FREE" },
|
||||
{ 'B', "BF", "BRT_FREE" },
|
||||
{ 'G', "GA", "GANG_ASSEMBLE" },
|
||||
{ 'G', "GI", "GANG_ISSUE" },
|
||||
{ 'D', "DT", "DVA_THROTTLE" },
|
||||
|
||||
+13
-6
@@ -4168,14 +4168,21 @@ zio_ddt_free(zio_t *zio)
|
||||
}
|
||||
ddt_exit(ddt);
|
||||
|
||||
if (dde) {
|
||||
/*
|
||||
* When no entry was found, it must have been pruned,
|
||||
* so we can free it now instead of decrementing the
|
||||
* refcount in the DDT.
|
||||
* DDT entry found and the refcount has been decremented.
|
||||
* Stop the pipeline — there is nothing more to do right now.
|
||||
*/
|
||||
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
} else {
|
||||
/*
|
||||
* No DDT entry; the block must have been pruned from the
|
||||
* table. Clear the DEDUP bit so it is treated as a normal
|
||||
* block from here on. BRT_FREE and DVA_FREE follow in the
|
||||
* pipeline and will handle any cloned references and the
|
||||
* actual block free respectively.
|
||||
*/
|
||||
if (!dde) {
|
||||
BP_SET_DEDUP(bp, 0);
|
||||
zio->io_pipeline |= ZIO_STAGE_DVA_FREE;
|
||||
}
|
||||
|
||||
return (zio);
|
||||
@@ -5925,11 +5932,11 @@ static zio_pipe_stage_t *zio_pipeline[] = {
|
||||
zio_encrypt,
|
||||
zio_checksum_generate,
|
||||
zio_nop_write,
|
||||
zio_brt_free,
|
||||
zio_ddt_read_start,
|
||||
zio_ddt_read_done,
|
||||
zio_ddt_write,
|
||||
zio_ddt_free,
|
||||
zio_brt_free,
|
||||
zio_gang_assemble,
|
||||
zio_gang_issue,
|
||||
zio_dva_throttle,
|
||||
|
||||
@@ -717,10 +717,10 @@ post =
|
||||
tags = ['functional', 'deadman']
|
||||
|
||||
[tests/functional/dedup]
|
||||
tests = ['dedup_fdt_create', 'dedup_fdt_import', 'dedup_fdt_pacing',
|
||||
'dedup_legacy_create', 'dedup_legacy_import', 'dedup_legacy_fdt_upgrade',
|
||||
'dedup_legacy_fdt_mixed', 'dedup_quota', 'dedup_prune', 'dedup_prune_leak',
|
||||
'dedup_zap_shrink']
|
||||
tests = ['dedup_bclone_pruned', 'dedup_fdt_create', 'dedup_fdt_import',
|
||||
'dedup_fdt_pacing', 'dedup_legacy_create', 'dedup_legacy_import',
|
||||
'dedup_legacy_fdt_upgrade', 'dedup_legacy_fdt_mixed', 'dedup_quota',
|
||||
'dedup_prune', 'dedup_prune_leak', 'dedup_zap_shrink']
|
||||
pre =
|
||||
post =
|
||||
tags = ['functional', 'dedup']
|
||||
|
||||
@@ -1503,6 +1503,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/deadman/deadman_zio.ksh \
|
||||
functional/dedup/cleanup.ksh \
|
||||
functional/dedup/setup.ksh \
|
||||
functional/dedup/dedup_bclone_pruned.ksh \
|
||||
functional/dedup/dedup_fdt_create.ksh \
|
||||
functional/dedup/dedup_fdt_import.ksh \
|
||||
functional/dedup/dedup_fdt_pacing.ksh \
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2026, TrueNAS.
|
||||
#
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Verify that block cloning works correctly when the DDT entry for a
|
||||
# dedup block has been pruned. When a block has the DEDUP bit set but
|
||||
# no DDT entry (because it was pruned), cloning it must create a BRT
|
||||
# entry to track the extra reference. Freeing the original must
|
||||
# consult the BRT rather than proceeding directly to a DVA free,
|
||||
# otherwise the block is freed while the clone still references it.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool with both dedup and block_cloning enabled
|
||||
# 2. Write a file with dedup=on so blocks get DEDUP bit set in their BPs
|
||||
# 3. Prune the DDT to remove those entries (blocks remain, DEDUP bit
|
||||
# stays set in block pointers)
|
||||
# 4. Clone the file - brt_pending_apply_vdev() must fall back to BRT
|
||||
# since ddt_addref() returns B_FALSE for pruned entries
|
||||
# 5. Write a second copy via dd - same hash, new physical blocks, new
|
||||
# DDT entries at different DVAs from the BRT-tracked blocks
|
||||
# 6. Delete the clone first - must go through BRT, not DDT, even though
|
||||
# a matching DDT entry now exists for the same hash
|
||||
# 7. Delete the dd copy - DDT entries freed normally
|
||||
# 8. Delete the original - no DDT entry, no BRT entry, DVA freed
|
||||
# 9. Verify reference counts with zdb -b at each step
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
log_assert "Block cloning of dedup blocks with pruned DDT entries uses BRT"
|
||||
|
||||
# Flush DDT log every TXG so entries appear in the ZAP immediately,
|
||||
# making ddtprune effective and test behavior predictable.
|
||||
log_must save_tunable DEDUP_LOG_TXG_MAX
|
||||
log_must set_tunable32 DEDUP_LOG_TXG_MAX 1
|
||||
log_must save_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
|
||||
log_must set_tunable32 DEDUP_LOG_FLUSH_ENTRIES_MIN 100000
|
||||
|
||||
function cleanup
|
||||
{
|
||||
if poolexists $TESTPOOL ; then
|
||||
destroy_pool $TESTPOOL
|
||||
fi
|
||||
log_must restore_tunable DEDUP_LOG_TXG_MAX
|
||||
log_must restore_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must zpool create -f -o feature@block_cloning=enabled $TESTPOOL $DISKS
|
||||
|
||||
log_must zfs create -o dedup=sha256 -o recordsize=128k $TESTPOOL/$TESTFS
|
||||
typeset mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS)
|
||||
|
||||
# Write unique data: each block gets a DDT entry with refcnt=1.
|
||||
log_must dd if=/dev/urandom of=$mountpoint/file1 bs=128k count=8
|
||||
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
# Verify DDT has entries before pruning.
|
||||
typeset entries=$(zpool status -D $TESTPOOL | \
|
||||
grep "dedup: DDT entries" | awk '{print $4}')
|
||||
log_must test "$entries" -eq 8
|
||||
|
||||
# Sleep 1s so the DDT entries are at least 1 second old. ddtprune uses
|
||||
# an age-based cutoff and will silently skip entries that are too fresh.
|
||||
sleep 1
|
||||
|
||||
# Prune all unique (refcnt=1) entries. The blocks remain on disk and the
|
||||
# block pointers in file1 still have the DEDUP bit set, but there is no
|
||||
# longer a DDT entry for them.
|
||||
log_must zpool ddtprune -p 100 $TESTPOOL
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
# Confirm the prune actually removed all entries.
|
||||
entries=$(zpool status -D $TESTPOOL | \
|
||||
grep "dedup: DDT entries" | awk '{print $4}')
|
||||
[[ -z "$entries" || "$entries" -eq 0 ]] || \
|
||||
log_fail "DDT entries not pruned: $entries remain"
|
||||
|
||||
# Clone file1. brt_pending_apply_vdev() will see the DEDUP bit, call
|
||||
# ddt_addref(), receive B_FALSE (no DDT entry), and fall through to
|
||||
# create BRT entries instead.
|
||||
log_must clonefile -f $mountpoint/file1 $mountpoint/clone1
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
# BRT entries exist; reference counts must be consistent.
|
||||
log_must zdb -b $TESTPOOL
|
||||
|
||||
# Write a second copy via dd. Since the DDT was pruned, dedup can't find
|
||||
# an existing entry and writes new physical blocks at new DVAs, creating
|
||||
# fresh DDT entries with refcnt=1. The BRT-tracked blocks (file1/clone1)
|
||||
# are at the old DVAs and are unaffected.
|
||||
log_must dd if=$mountpoint/file1 of=$mountpoint/file2 bs=128k
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
# Eight new unique DDT entries (file2's blocks); BRT still holds refs for
|
||||
# file1/clone1's old blocks.
|
||||
typeset entries=$(zpool status -D $TESTPOOL | \
|
||||
grep "dedup: DDT entries" | awk '{print $4}')
|
||||
log_must test "$entries" -eq 8
|
||||
log_must zdb -b $TESTPOOL
|
||||
|
||||
# Delete the clone first. Its blocks carry the DEDUP bit and the same
|
||||
# hash as file2's DDT entries, but the DVAs differ — the free must go
|
||||
# through BRT, not DDT, leaving file2's DDT entries intact.
|
||||
log_must rm $mountpoint/clone1
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
entries=$(zpool status -D $TESTPOOL | \
|
||||
grep "dedup: DDT entries" | awk '{print $4}')
|
||||
log_must test "$entries" -eq 8
|
||||
log_must zdb -b $TESTPOOL
|
||||
|
||||
# Delete file2. DDT entries freed; file1's BRT-tracked blocks unaffected.
|
||||
log_must rm $mountpoint/file2
|
||||
sync_pool $TESTPOOL
|
||||
log_must eval "zdb -D $TESTPOOL | grep -q 'All DDTs are empty'"
|
||||
log_must zdb -b $TESTPOOL
|
||||
|
||||
# Delete the original. No DDT entry, no BRT entry; DVA freed directly.
|
||||
log_must rm $mountpoint/file1
|
||||
sync_pool $TESTPOOL
|
||||
log_must zdb -b $TESTPOOL
|
||||
|
||||
log_pass "Block cloning of dedup blocks with pruned DDT entries uses BRT"
|
||||
Reference in New Issue
Block a user