zed: Prefer dRAID distributed spares to regular ones

One of the main dRAID features is avoiding single drive bottlenecks
by using distributed spares.  Activation of regular spare will take
more time, during which the dRAID redundancy is even lower than in
case of RAIDZ.  But regular spares might still be added to the pool
as a second line of defence, possibly shared by several vdevs.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com>
Closes #18578
This commit is contained in:
Alexander Motin
2026-06-01 17:49:38 -04:00
committed by GitHub
parent 20d56830f9
commit 4bc8c39b62
3 changed files with 31 additions and 12 deletions
+22 -8
View File
@@ -355,18 +355,30 @@ is_draid_fdomain_failure(fmd_hdl_t *hdl, libzfs_handle_t *zhdl,
* replacing a failed vdev with the given characteristics.
*
* Ordering criteria (most to least significant):
* 1. Matching rotational is preferred over mismatching.
* 2. Large enough is preferred over (potentially?) too small.
* 3. Smaller size is preferred over bigger (best fit).
* 1. Distributed spare matching the failed vdev's dRAID is preferred
* most (distributed spares rebuild faster than traditional spares).
* Regular spares (no TOP_GUID) come next. Non-matching distributed
* spares are tried last, as the kernel will reject them anyway.
* 2. Matching rotational is preferred over mismatching.
* 3. Large enough is preferred over too small.
* 4. Smaller size is preferred over bigger (best fit).
*/
static boolean_t
spare_is_preferred(nvlist_t *a, nvlist_t *b, boolean_t have_rotational,
uint64_t vdev_rotational, uint64_t vdev_size)
uint64_t vdev_rotational, uint64_t vdev_size, uint64_t top_guid)
{
uint64_t a_rotational = 0, b_rotational = 0;
uint64_t a_size = 0, b_size = 0;
uint64_t a_top = 0, b_top = 0;
(void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_TOP_GUID, &a_top);
(void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_TOP_GUID, &b_top);
int a_pri = (a_top == 0) ? 1 :
(a_top == top_guid || top_guid == 0) ? 2 : 0;
int b_pri = (b_top == 0) ? 1 :
(b_top == top_guid || top_guid == 0) ? 2 : 0;
if (a_pri != b_pri)
return (a_pri > b_pri);
if (have_rotational) {
uint64_t a_rotational = 0, b_rotational = 0;
(void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_VDEV_ROTATIONAL,
&a_rotational);
(void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_VDEV_ROTATIONAL,
@@ -378,6 +390,7 @@ spare_is_preferred(nvlist_t *a, nvlist_t *b, boolean_t have_rotational,
vdev_stat_t *vs;
unsigned int c;
uint64_t a_size = 0, b_size = 0;
if (nvlist_lookup_uint64_array(a, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0)
a_size = vs->vs_rsize;
@@ -405,7 +418,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
char *dev_name;
zprop_source_t source;
int ashift;
uint64_t vdev_rotational = 0, vdev_size = 0;
uint64_t vdev_rotational = 0, vdev_size = 0, top_guid = 0;
boolean_t have_vdev_rotational;
vdev_stat_t *vs;
unsigned int c;
@@ -430,6 +443,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0)
vdev_size = vs->vs_rsize;
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_TOP_GUID, &top_guid);
/*
* Build a sorted index array over the spares, so that better
@@ -443,7 +457,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
int j = (int)s - 1;
while (j >= 0 && spare_is_preferred(spares[key],
spares[order[j]], have_vdev_rotational, vdev_rotational,
vdev_size)) {
vdev_size, top_guid)) {
order[j + 1] = order[j];
j--;
}
+5
View File
@@ -467,6 +467,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)))
fnvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id);
fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid);
if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
vd->vdev_top != NULL) {
fnvlist_add_uint64(nv, ZPOOL_CONFIG_TOP_GUID,
vd->vdev_top->vdev_guid);
}
if (vd->vdev_path != NULL)
fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path);
@@ -84,8 +84,8 @@ for type in "mirror" "raidz" "raidz2" "raidz3" "draid2:1s"; do
log_must zpool create -f $TESTPOOL $type $SAFE_DEV1 \
$SAFE_DEV2 $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV3 $SAFE_DEV4 \
spare $SPARE_DEV1
SPARE1=$SPARE_DEV1
SPARE2="draid2-0-0"
SPARE1="draid2-0-0"
SPARE2=$SPARE_DEV1
elif [ "$type" = "mirror" ]; then
# 1. Create a 3-way mirror pool with two hot spares
truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS
@@ -167,8 +167,8 @@ for type in "mirror" "raidz2" "raidz3" "draid2:1s"; do
log_must zpool create -f $TESTPOOL $type $SAFE_DEV1 \
$SAFE_DEV2 $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV3 $SAFE_DEV4 \
spare $SPARE_DEV1
SPARE1=$SPARE_DEV1
SPARE2="draid2-0-0"
SPARE1="draid2-0-0"
SPARE2=$SPARE_DEV1
elif [ "$type" = "mirror" ]; then
# 1. Create a 3-way mirror pool with two hot spares
truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS