From 6fb72fda0f60d9efb591e320f83f78b19ec451cc Mon Sep 17 00:00:00 2001 From: Saju Palayur Date: Fri, 15 May 2026 14:15:05 -0700 Subject: [PATCH] zio_ddt_write: compute have_dvas after taking dde_io_lock In zio_ddt_write(), have_dvas and is_ganged were computed before dde_io_lock was taken. A concurrent zio_ddt_child_write_done() error path calls ddt_phys_unextend() under dde_io_lock, which can zero DVA[0] while another thread is between computing have_dvas and taking dde_io_lock. That thread then uses the stale have_dvas=1 to call ddt_bp_fill(), copying the zeroed DVA into the BP. A zero DVA resolves as a hole, producing blocks that read back as zeros with no checksum error (silent data corruption). Fix by moving have_dvas and is_ganged computation to after dde_io_lock is taken, so they always reflect the current state of dde->dde_phys. Regression introduced by a41ef36858 ("DDT: Reduce global DDT lock scope during writes"). Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Co-authored-by: Saju Palayur Signed-off-by: Saju Palayur Closes #18366 Closes #18544 --- module/zfs/zio.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 3e95103385c..4b7c13dd1e9 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3830,7 +3830,6 @@ zio_ddt_write(zio_t *zio) int p = DDT_PHYS_FOR_COPIES(ddt, zp->zp_copies); ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p); - ddt_univ_phys_t *ddp = dde->dde_phys; /* * In the common cases, at this point we have a regular BP with no @@ -3861,14 +3860,6 @@ zio_ddt_write(zio_t *zio) * end of the chain and letting the sequence play out. */ - /* - * Number of DVAs in the DDT entry. If the BP is encrypted we ignore - * the third one as normal. - */ - int have_dvas = ddt_phys_dva_count(ddp, v, BP_IS_ENCRYPTED(bp)); - IMPLY(have_dvas == 0, ddt_phys_birth(ddp, v) == 0); - boolean_t is_ganged = ddt_phys_is_gang(ddp, v); - /* Number of DVAs requested by the IO. */ uint8_t need_dvas = zp->zp_copies; /* Number of DVAs in outstanding writes for this dde. */ @@ -3883,6 +3874,21 @@ zio_ddt_write(zio_t *zio) if (dde_io != NULL) mutex_enter(&dde_io->dde_io_lock); + /* + * Number of DVAs in the DDT entry. If the BP is encrypted we ignore + * the third one as normal. + * + * Must be computed after taking dde_io_lock (if held) to avoid + * racing with ddt_phys_unextend() in zio_ddt_child_write_done() + * error path, which can zero DVAs under dde_io_lock. Without the + * lock, a stale have_dvas causes ddt_bp_fill() to copy a zeroed + * DVA into the BP, producing a hole that reads back as zeros. + */ + ddt_univ_phys_t *ddp = dde->dde_phys; + int have_dvas = ddt_phys_dva_count(ddp, v, BP_IS_ENCRYPTED(bp)); + IMPLY(have_dvas == 0, ddt_phys_birth(ddp, v) == 0); + boolean_t is_ganged = ddt_phys_is_gang(ddp, v); + if (dde_io == NULL || dde_io->dde_lead_zio[p] == NULL) { /* * No IO outstanding, so we only need to worry about ourselves.