zio_ddt_write: compute have_dvas after taking dde_io_lock
In zio_ddt_write(), have_dvas and is_ganged were computed before
dde_io_lock was taken. A concurrent zio_ddt_child_write_done() error
path calls ddt_phys_unextend() under dde_io_lock, which can zero
DVA[0] while another thread is between computing have_dvas and taking
dde_io_lock. That thread then uses the stale have_dvas=1 to call
ddt_bp_fill(), copying the zeroed DVA into the BP. A zero DVA resolves
as a hole, producing blocks that read back as zeros with no checksum
error (silent data corruption).
Fix by moving have_dvas and is_ganged computation to after dde_io_lock
is taken, so they always reflect the current state of dde->dde_phys.
Regression introduced by a41ef36858 ("DDT: Reduce global DDT lock
scope during writes").
Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Co-authored-by: Saju Palayur <spalayur@maxlinear.com>
Signed-off-by: Saju Palayur <spalayur@maxlinear.com>
Closes #18366
Closes #18544
This commit is contained in:
+15
-9
@@ -3830,7 +3830,6 @@ zio_ddt_write(zio_t *zio)
|
|||||||
|
|
||||||
int p = DDT_PHYS_FOR_COPIES(ddt, zp->zp_copies);
|
int p = DDT_PHYS_FOR_COPIES(ddt, zp->zp_copies);
|
||||||
ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);
|
ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);
|
||||||
ddt_univ_phys_t *ddp = dde->dde_phys;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In the common cases, at this point we have a regular BP with no
|
* In the common cases, at this point we have a regular BP with no
|
||||||
@@ -3861,14 +3860,6 @@ zio_ddt_write(zio_t *zio)
|
|||||||
* end of the chain and letting the sequence play out.
|
* end of the chain and letting the sequence play out.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
* Number of DVAs in the DDT entry. If the BP is encrypted we ignore
|
|
||||||
* the third one as normal.
|
|
||||||
*/
|
|
||||||
int have_dvas = ddt_phys_dva_count(ddp, v, BP_IS_ENCRYPTED(bp));
|
|
||||||
IMPLY(have_dvas == 0, ddt_phys_birth(ddp, v) == 0);
|
|
||||||
boolean_t is_ganged = ddt_phys_is_gang(ddp, v);
|
|
||||||
|
|
||||||
/* Number of DVAs requested by the IO. */
|
/* Number of DVAs requested by the IO. */
|
||||||
uint8_t need_dvas = zp->zp_copies;
|
uint8_t need_dvas = zp->zp_copies;
|
||||||
/* Number of DVAs in outstanding writes for this dde. */
|
/* Number of DVAs in outstanding writes for this dde. */
|
||||||
@@ -3883,6 +3874,21 @@ zio_ddt_write(zio_t *zio)
|
|||||||
if (dde_io != NULL)
|
if (dde_io != NULL)
|
||||||
mutex_enter(&dde_io->dde_io_lock);
|
mutex_enter(&dde_io->dde_io_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of DVAs in the DDT entry. If the BP is encrypted we ignore
|
||||||
|
* the third one as normal.
|
||||||
|
*
|
||||||
|
* Must be computed after taking dde_io_lock (if held) to avoid
|
||||||
|
* racing with ddt_phys_unextend() in zio_ddt_child_write_done()
|
||||||
|
* error path, which can zero DVAs under dde_io_lock. Without the
|
||||||
|
* lock, a stale have_dvas causes ddt_bp_fill() to copy a zeroed
|
||||||
|
* DVA into the BP, producing a hole that reads back as zeros.
|
||||||
|
*/
|
||||||
|
ddt_univ_phys_t *ddp = dde->dde_phys;
|
||||||
|
int have_dvas = ddt_phys_dva_count(ddp, v, BP_IS_ENCRYPTED(bp));
|
||||||
|
IMPLY(have_dvas == 0, ddt_phys_birth(ddp, v) == 0);
|
||||||
|
boolean_t is_ganged = ddt_phys_is_gang(ddp, v);
|
||||||
|
|
||||||
if (dde_io == NULL || dde_io->dde_lead_zio[p] == NULL) {
|
if (dde_io == NULL || dde_io->dde_lead_zio[p] == NULL) {
|
||||||
/*
|
/*
|
||||||
* No IO outstanding, so we only need to worry about ourselves.
|
* No IO outstanding, so we only need to worry about ourselves.
|
||||||
|
|||||||
Reference in New Issue
Block a user