diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index fcef32ecf9f..d9b6e7654b0 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -476,6 +476,7 @@ typedef enum { VDEV_PROP_SCHEDULER, VDEV_PROP_FDOMAIN, VDEV_PROP_FGROUP, + VDEV_PROP_ALLOC_BIAS, VDEV_NUM_PROPS } vdev_prop_t; @@ -491,6 +492,16 @@ typedef enum { VDEV_SCHEDULER_OFF } vdev_scheduler_type_t; +/* + * Allocation bias for top-level vdevs (alloc_bias property). + */ +typedef enum vdev_alloc_bias { + VDEV_BIAS_NONE, + VDEV_BIAS_LOG, /* dedicated to ZIL data (SLOG) */ + VDEV_BIAS_SPECIAL, /* dedicated to ddt, metadata, and small blks */ + VDEV_BIAS_DEDUP /* dedicated to dedup metadata */ +} vdev_alloc_bias_t; + /* * Dataset property functions shared between libzfs and kernel. */ diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 634594aca12..3c19b9abe9c 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -155,14 +155,6 @@ struct vdev_queue { kmutex_t vq_lock; }; -typedef enum vdev_alloc_bias { - VDEV_BIAS_NONE, - VDEV_BIAS_LOG, /* dedicated to ZIL data (SLOG) */ - VDEV_BIAS_SPECIAL, /* dedicated to ddt, metadata, and small blks */ - VDEV_BIAS_DEDUP /* dedicated to dedup metadata */ -} vdev_alloc_bias_t; - - /* * On-disk indirect vdev state. * diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index ad28c876630..be74babbcba 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -2553,7 +2553,7 @@ - + @@ -2605,6 +2605,9 @@ + + + @@ -6412,7 +6415,8 @@ - + + diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 0b015d8bce6..fd957d98313 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -5741,6 +5741,9 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name, return (ENOENT); if (prop == VDEV_PROP_SIT_OUT) return (ENOENT); + /* Only valid for top-level vdevs */ + if (prop == VDEV_PROP_ALLOC_BIAS) + return (ENOENT); } if (vdev_prop_index_to_string(prop, intval, (const char **)&strval) != 0) diff --git a/man/man7/vdevprops.7 b/man/man7/vdevprops.7 index b45128dd924..5f5e10723c1 100644 --- a/man/man7/vdevprops.7 +++ b/man/man7/vdevprops.7 @@ -218,6 +218,21 @@ If this device should perform new allocations, used to disable a device when it is scheduled for later removal. See .Xr zpool-remove 8 . +.It Sy alloc_bias Ns = Ns Sy none Ns | Ns Sy log Ns | Ns Sy special Ns | Ns Sy dedup +Controls the allocation class for a top-level vdev. +Changes take effect after an export and import of the pool. +Changing to/from log is not implemented, since it may lead to data loss in +case of the log device failure. +Setting to +.Sy special +and +.Sy dedup +requires +.Sy feature@allocation_classes +to be enabled. +At least one top-level vdev must remain in the normal +.Pq Sy none +class. .It Sy scheduler Ns = Ns Sy auto Ns | Ns Sy on Ns | Ns Sy off Controls how I/O requests are added to the vdev queue when reading or writing to this vdev. diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index ee86fe0c717..13a1390d1e1 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -388,6 +388,14 @@ vdev_prop_init(void) { NULL } }; + static const zprop_index_t vdev_alloc_bias_table[] = { + { "none", VDEV_BIAS_NONE }, + { "log", VDEV_BIAS_LOG }, + { "special", VDEV_BIAS_SPECIAL }, + { "dedup", VDEV_BIAS_DEDUP }, + { NULL } + }; + struct zfs_mod_supported_features *sfeatures = zfs_mod_list_supported(ZFS_SYSFS_VDEV_PROPERTIES); @@ -556,6 +564,10 @@ vdev_prop_init(void) VDEV_SCHEDULER_AUTO, PROP_DEFAULT, ZFS_TYPE_VDEV, "auto | on | off", "IO_SCHEDULER", vdevschedulertype_table, sfeatures); + zprop_register_index(VDEV_PROP_ALLOC_BIAS, "alloc_bias", + VDEV_BIAS_NONE, PROP_DEFAULT, ZFS_TYPE_VDEV, + "none | log | special | dedup", "ALLOC_BIAS", + vdev_alloc_bias_table, sfeatures); /* hidden properties */ zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING, diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 4cc75ad5a25..9f083cd510f 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -6093,6 +6093,29 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx) strval); } break; + case VDEV_PROP_ALLOC_BIAS: { + intval = fnvpair_value_uint64(elem); + ASSERT3U(intval, !=, VDEV_BIAS_LOG); + const char *bias_str = + (intval == VDEV_BIAS_SPECIAL) ? + VDEV_ALLOC_BIAS_SPECIAL : + (intval == VDEV_BIAS_DEDUP) ? + VDEV_ALLOC_BIAS_DEDUP : NULL; + if (bias_str == NULL) { + (void) zap_remove(mos, objid, + VDEV_TOP_ZAP_ALLOCATION_BIAS, tx); + } else { + VERIFY0(zap_update(mos, objid, + VDEV_TOP_ZAP_ALLOCATION_BIAS, + 1, strlen(bias_str) + 1, bias_str, tx)); + spa_activate_allocation_classes(spa, tx); + } + spa_history_log_internal(spa, "vdev set", tx, + "vdev_guid=%llu: alloc_bias=%s", + (u_longlong_t)vdev_guid, + bias_str != NULL ? bias_str : "none"); + break; + } default: /* normalize the property name */ propname = vdev_prop_to_name(prop); @@ -6319,6 +6342,53 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) } vd->vdev_scheduler = intval; break; + case VDEV_PROP_ALLOC_BIAS: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + if (vd != vd->vdev_top || vd->vdev_top_zap == 0) { + error = ENOTSUP; + break; + } + /* Log vdevs are not supported: remove and re-add. */ + if (vd->vdev_islog) { + error = ENOTSUP; + break; + } + /* special/dedup needs allocation_classes feature */ + if (intval != VDEV_BIAS_NONE && + ((intval != VDEV_BIAS_SPECIAL && + intval != VDEV_BIAS_DEDUP) || + !spa_feature_is_enabled(spa, + SPA_FEATURE_ALLOCATION_CLASSES))) { + error = ENOTSUP; + break; + } + /* + * Disallow converting the last normal vdev to + * avoid pool suspension on failed allocations. + */ + if (intval != VDEV_BIAS_NONE && + vd->vdev_alloc_bias == VDEV_BIAS_NONE) { + vdev_t *rvd = spa->spa_root_vdev; + int normal = 0; + for (uint64_t c = 0; + c < rvd->vdev_children; c++) { + vdev_t *cvd = rvd->vdev_child[c]; + if (vdev_is_concrete(cvd) && + cvd->vdev_alloc_bias == + VDEV_BIAS_NONE && + !cvd->vdev_noalloc) + normal++; + } + if (normal <= 1) { + error = ENOTSUP; + break; + } + } + vd->vdev_alloc_bias = (vdev_alloc_bias_t)intval; + break; default: /* Most processing is done in vdev_props_set_sync */ break; @@ -6746,6 +6816,13 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) vdev_prop_add_list(outnvl, propname, NULL, boolval, src); break; + case VDEV_PROP_ALLOC_BIAS: + if (vd == vd->vdev_top) { + vdev_prop_add_list(outnvl, propname, + NULL, vd->vdev_alloc_bias, + ZPROP_SRC_NONE); + } + continue; case VDEV_PROP_CHECKSUM_N: case VDEV_PROP_CHECKSUM_T: case VDEV_PROP_IO_N: diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index fe98e7db073..d31aa80641c 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -3456,12 +3456,15 @@ zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) ASSERT(spa_writeable(spa)); + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) { + spa_config_exit(spa, SCL_CONFIG, FTAG); spa_close(spa, FTAG); return (SET_ERROR(ENOENT)); } error = vdev_prop_set(vd, innvl, outnvl); + spa_config_exit(spa, SCL_CONFIG, FTAG); spa_close(spa, FTAG); @@ -3500,12 +3503,15 @@ zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) if ((error = spa_open(poolname, &spa, FTAG)) != 0) return (error); + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) { + spa_config_exit(spa, SCL_CONFIG, FTAG); spa_close(spa, FTAG); return (SET_ERROR(ENOENT)); } error = vdev_prop_get(vd, innvl, outnvl); + spa_config_exit(spa, SCL_CONFIG, FTAG); spa_close(spa, FTAG); diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 4c7e4e85ec0..df80437ad0c 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -37,7 +37,8 @@ tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos', 'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos', 'alloc_class_007_pos', 'alloc_class_008_pos', 'alloc_class_009_pos', 'alloc_class_010_pos', 'alloc_class_011_neg', 'alloc_class_012_pos', - 'alloc_class_013_pos', 'alloc_class_016_pos'] + 'alloc_class_013_pos', 'alloc_class_014_pos', 'alloc_class_015_neg', + 'alloc_class_016_pos'] tags = ['functional', 'alloc_class'] [tests/functional/append] diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index c4bcfea5595..a6242ba0f52 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -434,6 +434,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/alloc_class/alloc_class_011_neg.ksh \ functional/alloc_class/alloc_class_012_pos.ksh \ functional/alloc_class/alloc_class_013_pos.ksh \ + functional/alloc_class/alloc_class_014_pos.ksh \ + functional/alloc_class/alloc_class_015_neg.ksh \ functional/alloc_class/alloc_class_016_pos.ksh \ functional/alloc_class/cleanup.ksh \ functional/alloc_class/setup.ksh \ diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_pos.ksh new file mode 100755 index 00000000000..27c55bc5906 --- /dev/null +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_pos.ksh @@ -0,0 +1,109 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2026, TrueNAS. +# + +. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib + +# +# DESCRIPTION: +# The alloc_bias vdev property is readable and settable on top-level vdevs. +# +# STRATEGY: +# 1. Create a pool with one normal mirror and one special mirror. +# 2. Verify alloc_bias getter returns "none" for normal and "special" +# for the special mirror. +# 3. Verify alloc_bias is not reported for leaf (child) vdevs. +# 4. Set alloc_bias=none on the special vdev; verify getter returns "none". +# 5. Export and import the pool; verify no "special" section in status. +# 6. Set alloc_bias=dedup on the same vdev; verify getter returns "dedup". +# 7. Export and import the pool; verify "dedup" section appears in status. +# 8. Set alloc_bias=special; verify getter returns "special". +# 9. Export and import; verify "special" section appears again. +# + +verify_runnable "global" + +claim="alloc_bias vdev property is readable and settable on top-level vdevs" + +log_assert $claim +log_onexit cleanup + +log_must disk_setup + +# One normal mirror (always stays normal) and one special mirror. +# The normal mirror ensures the pool always has normal-class vdevs +# regardless of what we do to the second mirror. +log_must zpool create $TESTPOOL \ + mirror $ZPOOL_DISK0 $ZPOOL_DISK1 \ + special mirror $CLASS_DISK0 $CLASS_DISK1 + +# Find the special vdev name (mirror-N) from zpool status. +TVDEV=$(zpool status $TESTPOOL | \ + awk '/special/{found=1} found && /mirror-/{print $1; exit}') +log_note "Special vdev: $TVDEV" +[[ -n "$TVDEV" ]] || log_fail "Could not determine special vdev name" + +# Verify initial alloc_bias values. +BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL mirror-0) +[[ "$BIAS" == "none" ]] || \ + log_fail "Normal mirror alloc_bias: expected none, got $BIAS" + +BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV) +[[ "$BIAS" == "special" ]] || \ + log_fail "Special mirror alloc_bias: expected special, got $BIAS" + +# Verify alloc_bias is not reported for a leaf vdev. +LEAF_OUT=$(zpool get -H -o name,value alloc_bias $TESTPOOL \ + $ZPOOL_DISK0 2>&1) +[[ -z "$LEAF_OUT" ]] || \ + log_fail "alloc_bias reported for leaf vdev, got: $LEAF_OUT" + +# --- special -> none, verify after export/import --- +log_must zpool set alloc_bias=none $TESTPOOL $TVDEV +BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV) +[[ "$BIAS" == "none" ]] || \ + log_fail "After set none: alloc_bias expected none, got $BIAS" + +log_must zpool export $TESTPOOL +log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL +zpool status $TESTPOOL | grep -q "special" && \ + log_fail "special still shown after alloc_bias=none + reimport" + +# --- none -> dedup, verify after export/import --- +log_must zpool set alloc_bias=dedup $TESTPOOL $TVDEV +BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV) +[[ "$BIAS" == "dedup" ]] || \ + log_fail "After set dedup alloc_bias expected dedup, got $BIAS" + +log_must zpool export $TESTPOOL +log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL +zpool status $TESTPOOL | grep -q "dedup" || \ + log_fail "dedup not shown after alloc_bias=dedup + reimport" + +# --- dedup -> special, verify after export/import --- +log_must zpool set alloc_bias=special $TESTPOOL $TVDEV +BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV) +[[ "$BIAS" == "special" ]] || \ + log_fail "After set special alloc_bias expected special, got $BIAS" + +log_must zpool export $TESTPOOL +log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL +zpool status $TESTPOOL | grep -q "special" || \ + log_fail "special not shown after alloc_bias=special + reimport" + +log_must zpool destroy -f $TESTPOOL +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_neg.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_neg.ksh new file mode 100755 index 00000000000..43740690b3c --- /dev/null +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_neg.ksh @@ -0,0 +1,91 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2026, TrueNAS. +# + +. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib + +# +# DESCRIPTION: +# Setting the alloc_bias vdev property to invalid values or on +# unsupported vdev types fails. +# +# STRATEGY: +# 1. Create a pool with a normal mirror and a log vdev. +# 2. Verify setting alloc_bias on a leaf vdev fails. +# 3. Verify setting alloc_bias=log fails. +# 4. Verify setting alloc_bias to an unknown value fails. +# 5. Verify setting alloc_bias on a log vdev fails. +# 6. Verify setting alloc_bias=special fails when allocation_classes +# feature is not enabled. +# 7. Verify converting the last normal vdev fails. +# + +verify_runnable "global" + +claim="Setting alloc_bias to invalid values or on unsupported vdevs fails" + +log_assert $claim +log_onexit cleanup + +log_must disk_setup + +# Create a pool with a normal mirror and a log vdev. +log_must zpool create $TESTPOOL \ + mirror $ZPOOL_DISK0 $ZPOOL_DISK1 \ + log $CLASS_DISK0 + +NORMAL_VDEV=$(zpool list -v -H $TESTPOOL | awk '$1 ~ /^mirror/ {print $1; exit}') +log_note "Normal vdev: $NORMAL_VDEV" + +# Setting alloc_bias on a leaf vdev must fail. +log_mustnot zpool set alloc_bias=special $TESTPOOL $ZPOOL_DISK0 + +# Setting alloc_bias=log must fail (log vdevs must be removed and re-added). +log_mustnot zpool set alloc_bias=log $TESTPOOL $NORMAL_VDEV + +# Setting alloc_bias to an unknown value must fail. +log_mustnot zpool set alloc_bias=bogus $TESTPOOL $NORMAL_VDEV + +# Setting alloc_bias on a log vdev must fail. +# CLASS_DISK0 is a single-disk (non-mirror) top-level log vdev. +log_mustnot zpool set alloc_bias=special $TESTPOOL $CLASS_DISK0 + +log_must zpool destroy -f $TESTPOOL + +# Verify setting alloc_bias=special fails when allocation_classes is disabled. +# Create a pool with the allocation_classes feature explicitly disabled. +log_must zpool create -o feature@allocation_classes=disabled $TESTPOOL \ + mirror $ZPOOL_DISK0 $ZPOOL_DISK1 + +NORMAL_VDEV=$(zpool list -v -H $TESTPOOL | awk '$1 ~ /^mirror/ {print $1; exit}') +log_mustnot zpool set alloc_bias=special $TESTPOOL $NORMAL_VDEV +log_mustnot zpool set alloc_bias=dedup $TESTPOOL $NORMAL_VDEV + +log_must zpool destroy -f $TESTPOOL + +# Verify that converting the last normal-class top-level vdev fails. +# A pool must always retain at least one normal vdev. +log_must zpool create $TESTPOOL \ + mirror $ZPOOL_DISK0 $ZPOOL_DISK1 \ + special mirror $CLASS_DISK0 $CLASS_DISK1 + +NORMAL_VDEV=$(zpool list -v -H $TESTPOOL | awk '$1 ~ /^mirror/ {print $1; exit}') +log_mustnot zpool set alloc_bias=special $TESTPOOL $NORMAL_VDEV +log_mustnot zpool set alloc_bias=dedup $TESTPOOL $NORMAL_VDEV + +log_must zpool destroy -f $TESTPOOL +log_pass $claim