Vdev allocation bias/class change

Normal, special and dedup vdevs differ only by space allocation
bias.  Normal and special vdevs might even legally store blocks
targeted to other classes.  Dedup vdevs don't normally do it, but
there is no real reason why they can't.  Considering this, it is
not impossible to change the allocation bias for those vdevs.

This change introduces a new top-level vdev property -- alloc_bias,
reporting current bias for the vdev, and allowing to change it.
This allows to easily change vdev role in a pool, especially if
vdev removal is impossible.  To not complicate the code, changes
take effect only on next pool import.

Changes to/from log vdev could also be theoretically possible, but
they are artificially blocked for now, partially due to additional
complications, and partially due to potential danger of placing
other blocks on log vdevs, that would otherwise be non-fatal.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alek Pinchuk <alek.pinchuk@connectwise.com>
Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com>
Closes #18493
This commit is contained in:
Alexander Motin
2026-05-07 12:16:39 -04:00
committed by GitHub
parent bdb8e8a2c5
commit d65015938e
12 changed files with 334 additions and 11 deletions
+11
View File
@@ -476,6 +476,7 @@ typedef enum {
VDEV_PROP_SCHEDULER,
VDEV_PROP_FDOMAIN,
VDEV_PROP_FGROUP,
VDEV_PROP_ALLOC_BIAS,
VDEV_NUM_PROPS
} vdev_prop_t;
@@ -491,6 +492,16 @@ typedef enum {
VDEV_SCHEDULER_OFF
} vdev_scheduler_type_t;
/*
* Allocation bias for top-level vdevs (alloc_bias property).
*/
typedef enum vdev_alloc_bias {
VDEV_BIAS_NONE,
VDEV_BIAS_LOG, /* dedicated to ZIL data (SLOG) */
VDEV_BIAS_SPECIAL, /* dedicated to ddt, metadata, and small blks */
VDEV_BIAS_DEDUP /* dedicated to dedup metadata */
} vdev_alloc_bias_t;
/*
* Dataset property functions shared between libzfs and kernel.
*/
-8
View File
@@ -155,14 +155,6 @@ struct vdev_queue {
kmutex_t vq_lock;
};
typedef enum vdev_alloc_bias {
VDEV_BIAS_NONE,
VDEV_BIAS_LOG, /* dedicated to ZIL data (SLOG) */
VDEV_BIAS_SPECIAL, /* dedicated to ddt, metadata, and small blks */
VDEV_BIAS_DEDUP /* dedicated to dedup metadata */
} vdev_alloc_bias_t;
/*
* On-disk indirect vdev state.
*
+6 -2
View File
@@ -2553,7 +2553,7 @@
<typedef-decl name='__uint32_t' type-id='f0981eeb' id='62f1140c'/>
<typedef-decl name='__uint64_t' type-id='7359adad' id='8910171f'/>
<typedef-decl name='size_t' type-id='7359adad' id='b59d7dce'/>
<class-decl name='libzfs_handle' size-in-bits='18432' is-struct='yes' visibility='default' id='c8a9d9d8'>
<class-decl name='libzfs_handle' size-in-bits='18496' is-struct='yes' visibility='default' id='c8a9d9d8'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='libzfs_error' type-id='95e97e5e' visibility='default'/>
</data-member>
@@ -2605,6 +2605,9 @@
<data-member access='public' layout-offset-in-bits='18112'>
<var-decl name='zh_mnttab' type-id='f20fbd51' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='18432'>
<var-decl name='zh_mnttab_cache_enabled' type-id='c19b74c3' visibility='default'/>
</data-member>
</class-decl>
<class-decl name='zfs_handle' size-in-bits='4928' is-struct='yes' visibility='default' id='f6ee4445'>
<data-member access='public' layout-offset-in-bits='0'>
@@ -6412,7 +6415,8 @@
<enumerator name='VDEV_PROP_SCHEDULER' value='55'/>
<enumerator name='VDEV_PROP_FDOMAIN' value='56'/>
<enumerator name='VDEV_PROP_FGROUP' value='57'/>
<enumerator name='VDEV_NUM_PROPS' value='58'/>
<enumerator name='VDEV_PROP_ALLOC_BIAS' value='58'/>
<enumerator name='VDEV_NUM_PROPS' value='59'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
+3
View File
@@ -5741,6 +5741,9 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,
return (ENOENT);
if (prop == VDEV_PROP_SIT_OUT)
return (ENOENT);
/* Only valid for top-level vdevs */
if (prop == VDEV_PROP_ALLOC_BIAS)
return (ENOENT);
}
if (vdev_prop_index_to_string(prop, intval,
(const char **)&strval) != 0)
+15
View File
@@ -218,6 +218,21 @@ If this device should perform new allocations, used to disable a device
when it is scheduled for later removal.
See
.Xr zpool-remove 8 .
.It Sy alloc_bias Ns = Ns Sy none Ns | Ns Sy log Ns | Ns Sy special Ns | Ns Sy dedup
Controls the allocation class for a top-level vdev.
Changes take effect after an export and import of the pool.
Changing to/from log is not implemented, since it may lead to data loss in
case of the log device failure.
Setting to
.Sy special
and
.Sy dedup
requires
.Sy feature@allocation_classes
to be enabled.
At least one top-level vdev must remain in the normal
.Pq Sy none
class.
.It Sy scheduler Ns = Ns Sy auto Ns | Ns Sy on Ns | Ns Sy off
Controls how I/O requests are added to the vdev queue when reading or
writing to this vdev.
+12
View File
@@ -388,6 +388,14 @@ vdev_prop_init(void)
{ NULL }
};
static const zprop_index_t vdev_alloc_bias_table[] = {
{ "none", VDEV_BIAS_NONE },
{ "log", VDEV_BIAS_LOG },
{ "special", VDEV_BIAS_SPECIAL },
{ "dedup", VDEV_BIAS_DEDUP },
{ NULL }
};
struct zfs_mod_supported_features *sfeatures =
zfs_mod_list_supported(ZFS_SYSFS_VDEV_PROPERTIES);
@@ -556,6 +564,10 @@ vdev_prop_init(void)
VDEV_SCHEDULER_AUTO, PROP_DEFAULT, ZFS_TYPE_VDEV,
"auto | on | off", "IO_SCHEDULER",
vdevschedulertype_table, sfeatures);
zprop_register_index(VDEV_PROP_ALLOC_BIAS, "alloc_bias",
VDEV_BIAS_NONE, PROP_DEFAULT, ZFS_TYPE_VDEV,
"none | log | special | dedup", "ALLOC_BIAS",
vdev_alloc_bias_table, sfeatures);
/* hidden properties */
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
+77
View File
@@ -6093,6 +6093,29 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
strval);
}
break;
case VDEV_PROP_ALLOC_BIAS: {
intval = fnvpair_value_uint64(elem);
ASSERT3U(intval, !=, VDEV_BIAS_LOG);
const char *bias_str =
(intval == VDEV_BIAS_SPECIAL) ?
VDEV_ALLOC_BIAS_SPECIAL :
(intval == VDEV_BIAS_DEDUP) ?
VDEV_ALLOC_BIAS_DEDUP : NULL;
if (bias_str == NULL) {
(void) zap_remove(mos, objid,
VDEV_TOP_ZAP_ALLOCATION_BIAS, tx);
} else {
VERIFY0(zap_update(mos, objid,
VDEV_TOP_ZAP_ALLOCATION_BIAS,
1, strlen(bias_str) + 1, bias_str, tx));
spa_activate_allocation_classes(spa, tx);
}
spa_history_log_internal(spa, "vdev set", tx,
"vdev_guid=%llu: alloc_bias=%s",
(u_longlong_t)vdev_guid,
bias_str != NULL ? bias_str : "none");
break;
}
default:
/* normalize the property name */
propname = vdev_prop_to_name(prop);
@@ -6319,6 +6342,53 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_scheduler = intval;
break;
case VDEV_PROP_ALLOC_BIAS:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
if (vd != vd->vdev_top || vd->vdev_top_zap == 0) {
error = ENOTSUP;
break;
}
/* Log vdevs are not supported: remove and re-add. */
if (vd->vdev_islog) {
error = ENOTSUP;
break;
}
/* special/dedup needs allocation_classes feature */
if (intval != VDEV_BIAS_NONE &&
((intval != VDEV_BIAS_SPECIAL &&
intval != VDEV_BIAS_DEDUP) ||
!spa_feature_is_enabled(spa,
SPA_FEATURE_ALLOCATION_CLASSES))) {
error = ENOTSUP;
break;
}
/*
* Disallow converting the last normal vdev to
* avoid pool suspension on failed allocations.
*/
if (intval != VDEV_BIAS_NONE &&
vd->vdev_alloc_bias == VDEV_BIAS_NONE) {
vdev_t *rvd = spa->spa_root_vdev;
int normal = 0;
for (uint64_t c = 0;
c < rvd->vdev_children; c++) {
vdev_t *cvd = rvd->vdev_child[c];
if (vdev_is_concrete(cvd) &&
cvd->vdev_alloc_bias ==
VDEV_BIAS_NONE &&
!cvd->vdev_noalloc)
normal++;
}
if (normal <= 1) {
error = ENOTSUP;
break;
}
}
vd->vdev_alloc_bias = (vdev_alloc_bias_t)intval;
break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
@@ -6746,6 +6816,13 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
vdev_prop_add_list(outnvl, propname, NULL,
boolval, src);
break;
case VDEV_PROP_ALLOC_BIAS:
if (vd == vd->vdev_top) {
vdev_prop_add_list(outnvl, propname,
NULL, vd->vdev_alloc_bias,
ZPROP_SRC_NONE);
}
continue;
case VDEV_PROP_CHECKSUM_N:
case VDEV_PROP_CHECKSUM_T:
case VDEV_PROP_IO_N:
+6
View File
@@ -3456,12 +3456,15 @@ zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
ASSERT(spa_writeable(spa));
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
spa_config_exit(spa, SCL_CONFIG, FTAG);
spa_close(spa, FTAG);
return (SET_ERROR(ENOENT));
}
error = vdev_prop_set(vd, innvl, outnvl);
spa_config_exit(spa, SCL_CONFIG, FTAG);
spa_close(spa, FTAG);
@@ -3500,12 +3503,15 @@ zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
if ((error = spa_open(poolname, &spa, FTAG)) != 0)
return (error);
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
spa_config_exit(spa, SCL_CONFIG, FTAG);
spa_close(spa, FTAG);
return (SET_ERROR(ENOENT));
}
error = vdev_prop_get(vd, innvl, outnvl);
spa_config_exit(spa, SCL_CONFIG, FTAG);
spa_close(spa, FTAG);
+2 -1
View File
@@ -37,7 +37,8 @@ tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos',
'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos',
'alloc_class_007_pos', 'alloc_class_008_pos', 'alloc_class_009_pos',
'alloc_class_010_pos', 'alloc_class_011_neg', 'alloc_class_012_pos',
'alloc_class_013_pos', 'alloc_class_016_pos']
'alloc_class_013_pos', 'alloc_class_014_pos', 'alloc_class_015_neg',
'alloc_class_016_pos']
tags = ['functional', 'alloc_class']
[tests/functional/append]
+2
View File
@@ -434,6 +434,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/alloc_class/alloc_class_011_neg.ksh \
functional/alloc_class/alloc_class_012_pos.ksh \
functional/alloc_class/alloc_class_013_pos.ksh \
functional/alloc_class/alloc_class_014_pos.ksh \
functional/alloc_class/alloc_class_015_neg.ksh \
functional/alloc_class/alloc_class_016_pos.ksh \
functional/alloc_class/cleanup.ksh \
functional/alloc_class/setup.ksh \
@@ -0,0 +1,109 @@
#!/bin/ksh -p
# SPDX-License-Identifier: CDDL-1.0
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2026, TrueNAS.
#
. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib
#
# DESCRIPTION:
# The alloc_bias vdev property is readable and settable on top-level vdevs.
#
# STRATEGY:
# 1. Create a pool with one normal mirror and one special mirror.
# 2. Verify alloc_bias getter returns "none" for normal and "special"
# for the special mirror.
# 3. Verify alloc_bias is not reported for leaf (child) vdevs.
# 4. Set alloc_bias=none on the special vdev; verify getter returns "none".
# 5. Export and import the pool; verify no "special" section in status.
# 6. Set alloc_bias=dedup on the same vdev; verify getter returns "dedup".
# 7. Export and import the pool; verify "dedup" section appears in status.
# 8. Set alloc_bias=special; verify getter returns "special".
# 9. Export and import; verify "special" section appears again.
#
verify_runnable "global"
claim="alloc_bias vdev property is readable and settable on top-level vdevs"
log_assert $claim
log_onexit cleanup
log_must disk_setup
# One normal mirror (always stays normal) and one special mirror.
# The normal mirror ensures the pool always has normal-class vdevs
# regardless of what we do to the second mirror.
log_must zpool create $TESTPOOL \
mirror $ZPOOL_DISK0 $ZPOOL_DISK1 \
special mirror $CLASS_DISK0 $CLASS_DISK1
# Find the special vdev name (mirror-N) from zpool status.
TVDEV=$(zpool status $TESTPOOL | \
awk '/special/{found=1} found && /mirror-/{print $1; exit}')
log_note "Special vdev: $TVDEV"
[[ -n "$TVDEV" ]] || log_fail "Could not determine special vdev name"
# Verify initial alloc_bias values.
BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL mirror-0)
[[ "$BIAS" == "none" ]] || \
log_fail "Normal mirror alloc_bias: expected none, got $BIAS"
BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV)
[[ "$BIAS" == "special" ]] || \
log_fail "Special mirror alloc_bias: expected special, got $BIAS"
# Verify alloc_bias is not reported for a leaf vdev.
LEAF_OUT=$(zpool get -H -o name,value alloc_bias $TESTPOOL \
$ZPOOL_DISK0 2>&1)
[[ -z "$LEAF_OUT" ]] || \
log_fail "alloc_bias reported for leaf vdev, got: $LEAF_OUT"
# --- special -> none, verify after export/import ---
log_must zpool set alloc_bias=none $TESTPOOL $TVDEV
BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV)
[[ "$BIAS" == "none" ]] || \
log_fail "After set none: alloc_bias expected none, got $BIAS"
log_must zpool export $TESTPOOL
log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL
zpool status $TESTPOOL | grep -q "special" && \
log_fail "special still shown after alloc_bias=none + reimport"
# --- none -> dedup, verify after export/import ---
log_must zpool set alloc_bias=dedup $TESTPOOL $TVDEV
BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV)
[[ "$BIAS" == "dedup" ]] || \
log_fail "After set dedup alloc_bias expected dedup, got $BIAS"
log_must zpool export $TESTPOOL
log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL
zpool status $TESTPOOL | grep -q "dedup" || \
log_fail "dedup not shown after alloc_bias=dedup + reimport"
# --- dedup -> special, verify after export/import ---
log_must zpool set alloc_bias=special $TESTPOOL $TVDEV
BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV)
[[ "$BIAS" == "special" ]] || \
log_fail "After set special alloc_bias expected special, got $BIAS"
log_must zpool export $TESTPOOL
log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL
zpool status $TESTPOOL | grep -q "special" || \
log_fail "special not shown after alloc_bias=special + reimport"
log_must zpool destroy -f $TESTPOOL
log_pass $claim
@@ -0,0 +1,91 @@
#!/bin/ksh -p
# SPDX-License-Identifier: CDDL-1.0
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2026, TrueNAS.
#
. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib
#
# DESCRIPTION:
# Setting the alloc_bias vdev property to invalid values or on
# unsupported vdev types fails.
#
# STRATEGY:
# 1. Create a pool with a normal mirror and a log vdev.
# 2. Verify setting alloc_bias on a leaf vdev fails.
# 3. Verify setting alloc_bias=log fails.
# 4. Verify setting alloc_bias to an unknown value fails.
# 5. Verify setting alloc_bias on a log vdev fails.
# 6. Verify setting alloc_bias=special fails when allocation_classes
# feature is not enabled.
# 7. Verify converting the last normal vdev fails.
#
verify_runnable "global"
claim="Setting alloc_bias to invalid values or on unsupported vdevs fails"
log_assert $claim
log_onexit cleanup
log_must disk_setup
# Create a pool with a normal mirror and a log vdev.
log_must zpool create $TESTPOOL \
mirror $ZPOOL_DISK0 $ZPOOL_DISK1 \
log $CLASS_DISK0
NORMAL_VDEV=$(zpool list -v -H $TESTPOOL | awk '$1 ~ /^mirror/ {print $1; exit}')
log_note "Normal vdev: $NORMAL_VDEV"
# Setting alloc_bias on a leaf vdev must fail.
log_mustnot zpool set alloc_bias=special $TESTPOOL $ZPOOL_DISK0
# Setting alloc_bias=log must fail (log vdevs must be removed and re-added).
log_mustnot zpool set alloc_bias=log $TESTPOOL $NORMAL_VDEV
# Setting alloc_bias to an unknown value must fail.
log_mustnot zpool set alloc_bias=bogus $TESTPOOL $NORMAL_VDEV
# Setting alloc_bias on a log vdev must fail.
# CLASS_DISK0 is a single-disk (non-mirror) top-level log vdev.
log_mustnot zpool set alloc_bias=special $TESTPOOL $CLASS_DISK0
log_must zpool destroy -f $TESTPOOL
# Verify setting alloc_bias=special fails when allocation_classes is disabled.
# Create a pool with the allocation_classes feature explicitly disabled.
log_must zpool create -o feature@allocation_classes=disabled $TESTPOOL \
mirror $ZPOOL_DISK0 $ZPOOL_DISK1
NORMAL_VDEV=$(zpool list -v -H $TESTPOOL | awk '$1 ~ /^mirror/ {print $1; exit}')
log_mustnot zpool set alloc_bias=special $TESTPOOL $NORMAL_VDEV
log_mustnot zpool set alloc_bias=dedup $TESTPOOL $NORMAL_VDEV
log_must zpool destroy -f $TESTPOOL
# Verify that converting the last normal-class top-level vdev fails.
# A pool must always retain at least one normal vdev.
log_must zpool create $TESTPOOL \
mirror $ZPOOL_DISK0 $ZPOOL_DISK1 \
special mirror $CLASS_DISK0 $CLASS_DISK1
NORMAL_VDEV=$(zpool list -v -H $TESTPOOL | awk '$1 ~ /^mirror/ {print $1; exit}')
log_mustnot zpool set alloc_bias=special $TESTPOOL $NORMAL_VDEV
log_mustnot zpool set alloc_bias=dedup $TESTPOOL $NORMAL_VDEV
log_must zpool destroy -f $TESTPOOL
log_pass $claim