diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c index 0c6c30f2e86..ba3672a30a7 100644 --- a/cmd/zed/agents/zfs_retire.c +++ b/cmd/zed/agents/zfs_retire.c @@ -355,18 +355,30 @@ is_draid_fdomain_failure(fmd_hdl_t *hdl, libzfs_handle_t *zhdl, * replacing a failed vdev with the given characteristics. * * Ordering criteria (most to least significant): - * 1. Matching rotational is preferred over mismatching. - * 2. Large enough is preferred over (potentially?) too small. - * 3. Smaller size is preferred over bigger (best fit). + * 1. Distributed spare matching the failed vdev's dRAID is preferred + * most (distributed spares rebuild faster than traditional spares). + * Regular spares (no TOP_GUID) come next. Non-matching distributed + * spares are tried last, as the kernel will reject them anyway. + * 2. Matching rotational is preferred over mismatching. + * 3. Large enough is preferred over too small. + * 4. Smaller size is preferred over bigger (best fit). */ static boolean_t spare_is_preferred(nvlist_t *a, nvlist_t *b, boolean_t have_rotational, - uint64_t vdev_rotational, uint64_t vdev_size) + uint64_t vdev_rotational, uint64_t vdev_size, uint64_t top_guid) { - uint64_t a_rotational = 0, b_rotational = 0; - uint64_t a_size = 0, b_size = 0; + uint64_t a_top = 0, b_top = 0; + (void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_TOP_GUID, &a_top); + (void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_TOP_GUID, &b_top); + int a_pri = (a_top == 0) ? 1 : + (a_top == top_guid || top_guid == 0) ? 2 : 0; + int b_pri = (b_top == 0) ? 1 : + (b_top == top_guid || top_guid == 0) ? 2 : 0; + if (a_pri != b_pri) + return (a_pri > b_pri); if (have_rotational) { + uint64_t a_rotational = 0, b_rotational = 0; (void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_VDEV_ROTATIONAL, &a_rotational); (void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_VDEV_ROTATIONAL, @@ -378,6 +390,7 @@ spare_is_preferred(nvlist_t *a, nvlist_t *b, boolean_t have_rotational, vdev_stat_t *vs; unsigned int c; + uint64_t a_size = 0, b_size = 0; if (nvlist_lookup_uint64_array(a, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0) a_size = vs->vs_rsize; @@ -405,7 +418,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) char *dev_name; zprop_source_t source; int ashift; - uint64_t vdev_rotational = 0, vdev_size = 0; + uint64_t vdev_rotational = 0, vdev_size = 0, top_guid = 0; boolean_t have_vdev_rotational; vdev_stat_t *vs; unsigned int c; @@ -430,6 +443,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0) vdev_size = vs->vs_rsize; + (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_TOP_GUID, &top_guid); /* * Build a sorted index array over the spares, so that better @@ -443,7 +457,7 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) int j = (int)s - 1; while (j >= 0 && spare_is_preferred(spares[key], spares[order[j]], have_vdev_rotational, vdev_rotational, - vdev_size)) { + vdev_size, top_guid)) { order[j + 1] = order[j]; j--; } diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index b3042980aad..54d253c1b7d 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -467,6 +467,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE))) fnvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id); fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid); + if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) && + vd->vdev_top != NULL) { + fnvlist_add_uint64(nv, ZPOOL_CONFIG_TOP_GUID, + vd->vdev_top->vdev_guid); + } if (vd->vdev_path != NULL) fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path); diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh index 023f5b58a6e..529a6a8c3fe 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh @@ -84,8 +84,8 @@ for type in "mirror" "raidz" "raidz2" "raidz3" "draid2:1s"; do log_must zpool create -f $TESTPOOL $type $SAFE_DEV1 \ $SAFE_DEV2 $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV3 $SAFE_DEV4 \ spare $SPARE_DEV1 - SPARE1=$SPARE_DEV1 - SPARE2="draid2-0-0" + SPARE1="draid2-0-0" + SPARE2=$SPARE_DEV1 elif [ "$type" = "mirror" ]; then # 1. Create a 3-way mirror pool with two hot spares truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS @@ -167,8 +167,8 @@ for type in "mirror" "raidz2" "raidz3" "draid2:1s"; do log_must zpool create -f $TESTPOOL $type $SAFE_DEV1 \ $SAFE_DEV2 $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV3 $SAFE_DEV4 \ spare $SPARE_DEV1 - SPARE1=$SPARE_DEV1 - SPARE2="draid2-0-0" + SPARE1="draid2-0-0" + SPARE2=$SPARE_DEV1 elif [ "$type" = "mirror" ]; then # 1. Create a 3-way mirror pool with two hot spares truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS