zed: Prefer dRAID distributed spares to regular ones
One of the main dRAID features is avoiding single drive bottlenecks by using distributed spares. Activation of regular spare will take more time, during which the dRAID redundancy is even lower than in case of RAIDZ. But regular spares might still be added to the pool as a second line of defence, possibly shared by several vdevs. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com> Closes #18578
This commit is contained in:
@@ -355,18 +355,30 @@ is_draid_fdomain_failure(fmd_hdl_t *hdl, libzfs_handle_t *zhdl,
|
||||
* replacing a failed vdev with the given characteristics.
|
||||
*
|
||||
* Ordering criteria (most to least significant):
|
||||
* 1. Matching rotational is preferred over mismatching.
|
||||
* 2. Large enough is preferred over (potentially?) too small.
|
||||
* 3. Smaller size is preferred over bigger (best fit).
|
||||
* 1. Distributed spare matching the failed vdev's dRAID is preferred
|
||||
* most (distributed spares rebuild faster than traditional spares).
|
||||
* Regular spares (no TOP_GUID) come next. Non-matching distributed
|
||||
* spares are tried last, as the kernel will reject them anyway.
|
||||
* 2. Matching rotational is preferred over mismatching.
|
||||
* 3. Large enough is preferred over too small.
|
||||
* 4. Smaller size is preferred over bigger (best fit).
|
||||
*/
|
||||
static boolean_t
|
||||
spare_is_preferred(nvlist_t *a, nvlist_t *b, boolean_t have_rotational,
|
||||
uint64_t vdev_rotational, uint64_t vdev_size)
|
||||
uint64_t vdev_rotational, uint64_t vdev_size, uint64_t top_guid)
|
||||
{
|
||||
uint64_t a_rotational = 0, b_rotational = 0;
|
||||
uint64_t a_size = 0, b_size = 0;
|
||||
uint64_t a_top = 0, b_top = 0;
|
||||
(void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_TOP_GUID, &a_top);
|
||||
(void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_TOP_GUID, &b_top);
|
||||
int a_pri = (a_top == 0) ? 1 :
|
||||
(a_top == top_guid || top_guid == 0) ? 2 : 0;
|
||||
int b_pri = (b_top == 0) ? 1 :
|
||||
(b_top == top_guid || top_guid == 0) ? 2 : 0;
|
||||
if (a_pri != b_pri)
|
||||
return (a_pri > b_pri);
|
||||
|
||||
if (have_rotational) {
|
||||
uint64_t a_rotational = 0, b_rotational = 0;
|
||||
(void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_VDEV_ROTATIONAL,
|
||||
&a_rotational);
|
||||
(void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_VDEV_ROTATIONAL,
|
||||
@@ -378,6 +390,7 @@ spare_is_preferred(nvlist_t *a, nvlist_t *b, boolean_t have_rotational,
|
||||
|
||||
vdev_stat_t *vs;
|
||||
unsigned int c;
|
||||
uint64_t a_size = 0, b_size = 0;
|
||||
if (nvlist_lookup_uint64_array(a, ZPOOL_CONFIG_VDEV_STATS,
|
||||
(uint64_t **)&vs, &c) == 0)
|
||||
a_size = vs->vs_rsize;
|
||||
@@ -405,7 +418,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
|
||||
char *dev_name;
|
||||
zprop_source_t source;
|
||||
int ashift;
|
||||
uint64_t vdev_rotational = 0, vdev_size = 0;
|
||||
uint64_t vdev_rotational = 0, vdev_size = 0, top_guid = 0;
|
||||
boolean_t have_vdev_rotational;
|
||||
vdev_stat_t *vs;
|
||||
unsigned int c;
|
||||
@@ -430,6 +443,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
|
||||
if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
|
||||
(uint64_t **)&vs, &c) == 0)
|
||||
vdev_size = vs->vs_rsize;
|
||||
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_TOP_GUID, &top_guid);
|
||||
|
||||
/*
|
||||
* Build a sorted index array over the spares, so that better
|
||||
@@ -443,7 +457,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
|
||||
int j = (int)s - 1;
|
||||
while (j >= 0 && spare_is_preferred(spares[key],
|
||||
spares[order[j]], have_vdev_rotational, vdev_rotational,
|
||||
vdev_size)) {
|
||||
vdev_size, top_guid)) {
|
||||
order[j + 1] = order[j];
|
||||
j--;
|
||||
}
|
||||
|
||||
@@ -467,6 +467,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
|
||||
if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)))
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id);
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid);
|
||||
if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
|
||||
vd->vdev_top != NULL) {
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_TOP_GUID,
|
||||
vd->vdev_top->vdev_guid);
|
||||
}
|
||||
|
||||
if (vd->vdev_path != NULL)
|
||||
fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path);
|
||||
|
||||
@@ -84,8 +84,8 @@ for type in "mirror" "raidz" "raidz2" "raidz3" "draid2:1s"; do
|
||||
log_must zpool create -f $TESTPOOL $type $SAFE_DEV1 \
|
||||
$SAFE_DEV2 $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV3 $SAFE_DEV4 \
|
||||
spare $SPARE_DEV1
|
||||
SPARE1=$SPARE_DEV1
|
||||
SPARE2="draid2-0-0"
|
||||
SPARE1="draid2-0-0"
|
||||
SPARE2=$SPARE_DEV1
|
||||
elif [ "$type" = "mirror" ]; then
|
||||
# 1. Create a 3-way mirror pool with two hot spares
|
||||
truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS
|
||||
@@ -167,8 +167,8 @@ for type in "mirror" "raidz2" "raidz3" "draid2:1s"; do
|
||||
log_must zpool create -f $TESTPOOL $type $SAFE_DEV1 \
|
||||
$SAFE_DEV2 $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV3 $SAFE_DEV4 \
|
||||
spare $SPARE_DEV1
|
||||
SPARE1=$SPARE_DEV1
|
||||
SPARE2="draid2-0-0"
|
||||
SPARE1="draid2-0-0"
|
||||
SPARE2=$SPARE_DEV1
|
||||
elif [ "$type" = "mirror" ]; then
|
||||
# 1. Create a 3-way mirror pool with two hot spares
|
||||
truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS
|
||||
|
||||
Reference in New Issue
Block a user