Notable upstream pull request merges:
 #17227 d64711c20 Detect a slow raidz child during reads
 #17543 -multiple zfs allow send:raw
 #17717 bc0b5318a Prevent scrubbing a read-only pool
 #17722 cb5f9aa58 FreeBSD: Satisfy ASSERT_VOP_IN_SEQC()
 #17729 3f4312a0a Fix two infinite loops if dmu_prefetch_max set to zero
 #17730 37cd30f71 Fix ddle memleak in ddt_log_load
 #17733 bc8bcfc71 Fix type in dbrrd_closest()
 #17735 9b772f328 Fix time database update calculations

Obtained from:	OpenZFS
OpenZFS commit:	3f4312a0a4
This commit is contained in:
Martin Matuska
2025-09-13 22:54:32 +02:00
111 changed files with 2804 additions and 281 deletions
@@ -6,6 +6,13 @@
set -eu
# We've been seeing this script take over 15min to run. This may or
# may not be normal. Just to get a little more insight, print out
# a message to stdout with the top running process, and do this every
# 30 seconds. We can delete this watchdog later once we get a better
# handle on what the timeout value should be.
(while [ 1 ] ; do sleep 30 && echo "[watchdog: $(ps -eo cmd --sort=-pcpu | head -n 2 | tail -n 1)}')]"; done) &
# install needed packages
export DEBIAN_FRONTEND="noninteractive"
sudo apt-get -y update
@@ -65,3 +72,6 @@ sudo zpool create -f -o ashift=12 zpool $SSD1 $SSD2 -O relatime=off \
for i in /sys/block/s*/queue/scheduler; do
echo "none" | sudo tee $i
done
# Kill off our watchdog
kill $(jobs -p)
+6 -2
View File
@@ -77,8 +77,12 @@ jobs:
ref: ${{ github.event.pull_request.head.sha }}
- name: Setup QEMU
timeout-minutes: 15
run: .github/workflows/scripts/qemu-1-setup.sh
timeout-minutes: 20
run: |
# Add a timestamp to each line to debug timeouts
while IFS=$'\n' read -r line; do
echo "$(date +'%H:%M:%S') $line"
done < <(.github/workflows/scripts/qemu-1-setup.sh)
- name: Start build machine
timeout-minutes: 10
+5 -10
View File
@@ -98,20 +98,15 @@ endif
if USING_PYTHON
bin_SCRIPTS += arc_summary arcstat dbufstat zilstat
CLEANFILES += arc_summary arcstat dbufstat zilstat
dist_noinst_DATA += %D%/arc_summary %D%/arcstat.in %D%/dbufstat.in %D%/zilstat.in
bin_SCRIPTS += zarcsummary zarcstat dbufstat zilstat
CLEANFILES += zarcsummary zarcstat dbufstat zilstat
dist_noinst_DATA += %D%/zarcsummary %D%/zarcstat.in %D%/dbufstat.in %D%/zilstat.in
$(call SUBST,arcstat,%D%/)
$(call SUBST,zarcstat,%D%/)
$(call SUBST,dbufstat,%D%/)
$(call SUBST,zilstat,%D%/)
arc_summary: %D%/arc_summary
zarcsummary: %D%/zarcsummary
$(AM_V_at)cp $< $@
cmd-rename-install-exec-hook:
$(LN_S) -f arcstat $(DESTDIR)$(bindir)/zarcstat
$(LN_S) -f arc_summary $(DESTDIR)$(bindir)/zarcsummary
INSTALL_EXEC_HOOKS += cmd-rename-install-exec-hook
endif
PHONY += cmd
@@ -2,7 +2,7 @@
# SPDX-License-Identifier: CDDL-1.0
#
# Print out ZFS ARC Statistics exported via kstat(1)
# For a definition of fields, or usage, use arcstat -v
# For a definition of fields, or usage, use zarcstat -v
#
# This script was originally a fork of the original arcstat.pl (0.1)
# by Neelakanth Nadgir, originally published on his Sun blog on
@@ -172,7 +172,7 @@ cols = {
"zactive": [7, 1000, "zfetch prefetches active per second"],
}
# ARC structural breakdown from arc_summary
# ARC structural breakdown from zarcsummary
structfields = {
"cmp": ["compressed", "Compressed"],
"ovh": ["overhead", "Overhead"],
@@ -188,7 +188,7 @@ structstats = { # size stats
"sz": ["_size", "size"],
}
# ARC types breakdown from arc_summary
# ARC types breakdown from zarcsummary
typefields = {
"data": ["data", "ARC data"],
"meta": ["metadata", "ARC metadata"],
@@ -199,7 +199,7 @@ typestats = { # size stats
"sz": ["_size", "size"],
}
# ARC states breakdown from arc_summary
# ARC states breakdown from zarcsummary
statefields = {
"ano": ["anon", "Anonymous"],
"mfu": ["mfu", "MFU"],
@@ -262,7 +262,7 @@ hdr_intr = 20 # Print header every 20 lines of output
opfile = None
sep = " " # Default separator is 2 spaces
l2exist = False
cmd = ("Usage: arcstat [-havxp] [-f fields] [-o file] [-s string] [interval "
cmd = ("Usage: zarcstat [-havxp] [-f fields] [-o file] [-s string] [interval "
"[count]]\n")
cur = {}
d = {}
@@ -349,10 +349,10 @@ def usage():
"character or string\n")
sys.stderr.write("\t -p : Disable auto-scaling of numerical fields\n")
sys.stderr.write("\nExamples:\n")
sys.stderr.write("\tarcstat -o /tmp/a.log 2 10\n")
sys.stderr.write("\tarcstat -s \",\" -o /tmp/a.log 2 10\n")
sys.stderr.write("\tarcstat -v\n")
sys.stderr.write("\tarcstat -f time,hit%,dh%,ph%,mh% 1\n")
sys.stderr.write("\tzarcstat -o /tmp/a.log 2 10\n")
sys.stderr.write("\tzarcstat -s \",\" -o /tmp/a.log 2 10\n")
sys.stderr.write("\tzarcstat -v\n")
sys.stderr.write("\tzarcstat -f time,hit%,dh%,ph%,mh% 1\n")
sys.stderr.write("\n")
sys.exit(1)
@@ -367,7 +367,7 @@ def snap_stats():
cur = kstat
# fill in additional values from arc_summary
# fill in additional values from zarcsummary
cur["caches_size"] = caches_size = cur["anon_data"]+cur["anon_metadata"]+\
cur["mfu_data"]+cur["mfu_metadata"]+cur["mru_data"]+cur["mru_metadata"]+\
cur["uncached_data"]+cur["uncached_metadata"]
@@ -768,13 +768,6 @@ def calculate():
def main():
# notify user for upcoming renaming in 2.4.0
abs_path = os.path.abspath(sys.argv[0].strip())
script_name = os.path.basename(abs_path)
if script_name != "zarcstat":
sys.stderr.write("Note: this script will be renamed to zarcstat in ")
sys.stderr.write("zfs 2.4.0. Please migrate ASAP.\n")
global sint
global count
global hdr_intr
@@ -34,7 +34,7 @@ Provides basic information on the ARC, its efficiency, the L2ARC (if present),
the Data Management Unit (DMU), Virtual Devices (VDEVs), and tunables. See
the in-source documentation and code at
https://github.com/openzfs/zfs/blob/master/module/zfs/arc.c for details.
The original introduction to arc_summary can be found at
The original introduction to zarcsummary can be found at
http://cuddletech.com/?p=454
"""
@@ -161,7 +161,7 @@ elif sys.platform.startswith('linux'):
return get_params(TUNABLES_PATH)
def get_version_impl(request):
# The original arc_summary called /sbin/modinfo/{spl,zfs} to get
# The original zarcsummary called /sbin/modinfo/{spl,zfs} to get
# the version information. We switch to /sys/module/{spl,zfs}/version
# to make sure we get what is really loaded in the kernel
try:
@@ -439,7 +439,7 @@ def print_header():
"""
# datetime is now recommended over time but we keep the exact formatting
# from the older version of arc_summary in case there are scripts
# from the older version of zarcsummary in case there are scripts
# that expect it in this way
daydate = time.strftime(DATE_FORMAT)
spc_date = LINE_LENGTH-len(daydate)
@@ -1021,13 +1021,6 @@ def main():
treated separately because they come with their own call.
"""
# notify user for upcoming renaming in 2.4.0
abs_path = os.path.abspath(sys.argv[0].strip())
script_name = os.path.basename(abs_path)
if script_name != "zarcsummary":
sys.stderr.write("Note: this script will be renamed to zarcsummary in ")
sys.stderr.write("zfs 2.4.0. Please migrate ASAP.\n")
kstats = get_kstats()
if ARGS.graph:
+39 -6
View File
@@ -107,7 +107,9 @@ extern uint_t zfs_reconstruct_indirect_combinations_max;
extern uint_t zfs_btree_verify_intensity;
static const char cmdname[] = "zdb";
uint8_t dump_opt[256];
uint8_t dump_opt[512];
#define ALLOCATED_OPT 256
typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
@@ -1666,6 +1668,16 @@ dump_metaslab_stats(metaslab_t *msp)
dump_histogram(rt->rt_histogram, ZFS_RANGE_TREE_HISTOGRAM_SIZE, 0);
}
static void
dump_allocated(void *arg, uint64_t start, uint64_t size)
{
uint64_t *off = arg;
if (*off != start)
(void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", *off,
start - *off);
*off = start + size;
}
static void
dump_metaslab(metaslab_t *msp)
{
@@ -1682,13 +1694,24 @@ dump_metaslab(metaslab_t *msp)
(u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
(u_longlong_t)space_map_object(sm), freebuf);
if (dump_opt['m'] > 2 && !dump_opt['L']) {
if (dump_opt[ALLOCATED_OPT] ||
(dump_opt['m'] > 2 && !dump_opt['L'])) {
mutex_enter(&msp->ms_lock);
VERIFY0(metaslab_load(msp));
}
if (dump_opt['m'] > 2 && !dump_opt['L']) {
zfs_range_tree_stat_verify(msp->ms_allocatable);
dump_metaslab_stats(msp);
metaslab_unload(msp);
mutex_exit(&msp->ms_lock);
}
if (dump_opt[ALLOCATED_OPT]) {
uint64_t off = msp->ms_start;
zfs_range_tree_walk(msp->ms_allocatable, dump_allocated,
&off);
if (off != msp->ms_start + msp->ms_size)
(void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", off,
msp->ms_size - off);
}
if (dump_opt['m'] > 1 && sm != NULL &&
@@ -1703,6 +1726,12 @@ dump_metaslab(metaslab_t *msp)
SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
}
if (dump_opt[ALLOCATED_OPT] ||
(dump_opt['m'] > 2 && !dump_opt['L'])) {
metaslab_unload(msp);
mutex_exit(&msp->ms_lock);
}
if (vd->vdev_ops == &vdev_draid_ops)
ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift);
else
@@ -1739,8 +1768,9 @@ print_vdev_metaslab_header(vdev_t *vd)
}
}
(void) printf("\tvdev %10llu %s",
(u_longlong_t)vd->vdev_id, bias_str);
(void) printf("\tvdev %10llu\t%s metaslab shift %4llu",
(u_longlong_t)vd->vdev_id, bias_str,
(u_longlong_t)vd->vdev_ms_shift);
if (ms_flush_data_obj != 0) {
(void) printf(" ms_unflushed_phys object %llu",
@@ -9375,6 +9405,8 @@ main(int argc, char **argv)
{"all-reconstruction", no_argument, NULL, 'Y'},
{"livelist", no_argument, NULL, 'y'},
{"zstd-headers", no_argument, NULL, 'Z'},
{"allocated-map", no_argument, NULL,
ALLOCATED_OPT},
{0, 0, 0, 0}
};
@@ -9405,6 +9437,7 @@ main(int argc, char **argv)
case 'u':
case 'y':
case 'Z':
case ALLOCATED_OPT:
dump_opt[c]++;
dump_all = 0;
break;
+1 -1
View File
@@ -29,6 +29,6 @@
#define _ZDB_H
void dump_intent_log(zilog_t *);
extern uint8_t dump_opt[256];
extern uint8_t dump_opt[512];
#endif /* _ZDB_H */
-2
View File
@@ -48,8 +48,6 @@
#include "zdb.h"
extern uint8_t dump_opt[256];
static char tab_prefix[4] = "\t\t\t";
static void
+12 -12
View File
@@ -9,18 +9,18 @@ dist_zedexec_SCRIPTS = \
%D%/all-debug.sh \
%D%/all-syslog.sh \
%D%/data-notify.sh \
%D%/deadman-slot_off.sh \
%D%/deadman-sync-slot_off.sh \
%D%/generic-notify.sh \
%D%/pool_import-led.sh \
%D%/pool_import-sync-led.sh \
%D%/resilver_finish-notify.sh \
%D%/resilver_finish-start-scrub.sh \
%D%/scrub_finish-notify.sh \
%D%/statechange-led.sh \
%D%/statechange-sync-led.sh \
%D%/statechange-notify.sh \
%D%/statechange-slot_off.sh \
%D%/statechange-sync-slot_off.sh \
%D%/trim_finish-notify.sh \
%D%/vdev_attach-led.sh \
%D%/vdev_clear-led.sh
%D%/vdev_attach-sync-led.sh \
%D%/vdev_clear-sync-led.sh
nodist_zedexec_SCRIPTS = \
%D%/history_event-zfs-list-cacher.sh
@@ -30,17 +30,17 @@ SUBSTFILES += $(nodist_zedexec_SCRIPTS)
zedconfdefaults = \
all-syslog.sh \
data-notify.sh \
deadman-slot_off.sh \
deadman-sync-slot_off.sh \
history_event-zfs-list-cacher.sh \
pool_import-led.sh \
pool_import-sync-led.sh \
resilver_finish-notify.sh \
resilver_finish-start-scrub.sh \
scrub_finish-notify.sh \
statechange-led.sh \
statechange-sync-led.sh \
statechange-notify.sh \
statechange-slot_off.sh \
vdev_attach-led.sh \
vdev_clear-led.sh
statechange-sync-slot_off.sh \
vdev_attach-sync-led.sh \
vdev_clear-sync-led.sh
dist_noinst_DATA += %D%/README
@@ -1 +0,0 @@
statechange-led.sh
@@ -0,0 +1 @@
statechange-sync-led.sh
@@ -1 +0,0 @@
statechange-led.sh
@@ -0,0 +1 @@
statechange-sync-led.sh
@@ -1 +0,0 @@
statechange-led.sh
@@ -0,0 +1 @@
statechange-sync-led.sh
+90 -21
View File
@@ -196,37 +196,29 @@ _nop(int sig)
(void) sig;
}
static void *
_reap_children(void *arg)
static void
wait_for_children(boolean_t do_pause, boolean_t wait)
{
(void) arg;
struct launched_process_node node, *pnode;
pid_t pid;
int status;
struct rusage usage;
struct sigaction sa = {};
(void) sigfillset(&sa.sa_mask);
(void) sigdelset(&sa.sa_mask, SIGCHLD);
(void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);
(void) sigemptyset(&sa.sa_mask);
sa.sa_handler = _nop;
sa.sa_flags = SA_NOCLDSTOP;
(void) sigaction(SIGCHLD, &sa, NULL);
int status;
struct launched_process_node node, *pnode;
for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) {
(void) pthread_mutex_lock(&_launched_processes_lock);
pid = wait4(0, &status, WNOHANG, &usage);
pid = wait4(0, &status, wait ? 0 : WNOHANG, &usage);
if (pid == 0 || pid == (pid_t)-1) {
(void) pthread_mutex_unlock(&_launched_processes_lock);
if (pid == 0 || errno == ECHILD)
pause();
else if (errno != EINTR)
if ((pid == 0) || (errno == ECHILD)) {
if (do_pause)
pause();
} else if (errno != EINTR)
zed_log_msg(LOG_WARNING,
"Failed to wait for children: %s",
strerror(errno));
if (!do_pause)
return;
} else {
memset(&node, 0, sizeof (node));
node.pid = pid;
@@ -278,6 +270,25 @@ _reap_children(void *arg)
}
}
}
static void *
_reap_children(void *arg)
{
(void) arg;
struct sigaction sa = {};
(void) sigfillset(&sa.sa_mask);
(void) sigdelset(&sa.sa_mask, SIGCHLD);
(void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);
(void) sigemptyset(&sa.sa_mask);
sa.sa_handler = _nop;
sa.sa_flags = SA_NOCLDSTOP;
(void) sigaction(SIGCHLD, &sa, NULL);
wait_for_children(B_TRUE, B_FALSE);
return (NULL);
}
@@ -306,6 +317,45 @@ zed_exec_fini(void)
_reap_children_tid = (pthread_t)-1;
}
/*
* Check if the zedlet name indicates if it is a synchronous zedlet
*
* Synchronous zedlets have a "-sync-" immediately following the event name in
* their zedlet filename, like:
*
* EVENT_NAME-sync-ZEDLETNAME.sh
*
* For example, if you wanted a synchronous statechange script:
*
* statechange-sync-myzedlet.sh
*
* Synchronous zedlets are guaranteed to be the only zedlet running. No other
* zedlets may run in parallel with a synchronous zedlet. A synchronous
* zedlet will wait for all previously spawned zedlets to finish before running.
* Users should be careful to only use synchronous zedlets when needed, since
* they decrease parallelism.
*/
static boolean_t
zedlet_is_sync(const char *zedlet, const char *event)
{
const char *sync_str = "-sync-";
size_t sync_str_len;
size_t zedlet_len;
size_t event_len;
sync_str_len = strlen(sync_str);
zedlet_len = strlen(zedlet);
event_len = strlen(event);
if (event_len + sync_str_len >= zedlet_len)
return (B_FALSE);
if (strncmp(&zedlet[event_len], sync_str, sync_str_len) == 0)
return (B_TRUE);
return (B_FALSE);
}
/*
* Process the event [eid] by synchronously invoking all zedlets with a
* matching class prefix.
@@ -368,9 +418,28 @@ zed_exec_process(uint64_t eid, const char *class, const char *subclass,
z = zed_strings_next(zcp->zedlets)) {
for (csp = class_strings; *csp; csp++) {
n = strlen(*csp);
if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n]))
if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n])) {
boolean_t is_sync = zedlet_is_sync(z, *csp);
if (is_sync) {
/*
* Wait for previous zedlets to
* finish
*/
wait_for_children(B_FALSE, B_TRUE);
}
_zed_exec_fork_child(eid, zcp->zedlet_dir,
z, e, zcp->zevent_fd, zcp->do_foreground);
if (is_sync) {
/*
* Wait for sync zedlet we just launched
* to finish.
*/
wait_for_children(B_FALSE, B_TRUE);
}
}
}
}
free(e);
+6
View File
@@ -5303,6 +5303,7 @@ zfs_do_receive(int argc, char **argv)
#define ZFS_DELEG_PERM_MOUNT "mount"
#define ZFS_DELEG_PERM_SHARE "share"
#define ZFS_DELEG_PERM_SEND "send"
#define ZFS_DELEG_PERM_SEND_RAW "send:raw"
#define ZFS_DELEG_PERM_RECEIVE "receive"
#define ZFS_DELEG_PERM_RECEIVE_APPEND "receive:append"
#define ZFS_DELEG_PERM_ALLOW "allow"
@@ -5345,6 +5346,7 @@ static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = {
{ ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME },
{ ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
{ ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
{ ZFS_DELEG_PERM_SEND_RAW, ZFS_DELEG_NOTE_SEND_RAW },
{ ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
{ ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
{ ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK },
@@ -5929,6 +5931,10 @@ deleg_perm_comment(zfs_deleg_note_t note)
case ZFS_DELEG_NOTE_SEND:
str = gettext("");
break;
case ZFS_DELEG_NOTE_SEND_RAW:
str = gettext("Allow sending ONLY encrypted (raw) replication"
"\n\t\t\t\tstreams");
break;
case ZFS_DELEG_NOTE_SHARE:
str = gettext("Allows sharing file systems over NFS or SMB"
"\n\t\t\t\tprotocols");
+194 -3
View File
@@ -54,6 +54,7 @@
#include <sys/dmu_tx.h>
#include <zfeature_common.h>
#include <libzutil.h>
#include <sys/metaslab_impl.h>
static importargs_t g_importargs;
static char *g_pool;
@@ -69,7 +70,8 @@ static __attribute__((noreturn)) void
usage(void)
{
(void) fprintf(stderr,
"Usage: zhack [-c cachefile] [-d dir] <subcommand> <args> ...\n"
"Usage: zhack [-o tunable] [-c cachefile] [-d dir] <subcommand> "
"<args> ...\n"
"where <subcommand> <args> is one of the following:\n"
"\n");
@@ -93,7 +95,10 @@ usage(void)
" -c repair corrupted label checksums\n"
" -u restore the label on a detached device\n"
"\n"
" <device> : path to vdev\n");
" <device> : path to vdev\n"
"\n"
" metaslab leak <pool>\n"
" apply allocation map from zdb to specified pool\n");
exit(1);
}
@@ -500,6 +505,186 @@ zhack_do_feature(int argc, char **argv)
return (0);
}
static boolean_t
strstarts(const char *a, const char *b)
{
return (strncmp(a, b, strlen(b)) == 0);
}
static void
metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size,
dmu_tx_t *tx)
{
ASSERT(msp->ms_disabled);
ASSERT(MUTEX_HELD(&msp->ms_lock));
uint64_t txg = dmu_tx_get_txg(tx);
uint64_t off = start;
while (off < start + size) {
uint64_t ostart, osize;
boolean_t found = zfs_range_tree_find_in(msp->ms_allocatable,
off, start + size - off, &ostart, &osize);
if (!found)
break;
zfs_range_tree_remove(msp->ms_allocatable, ostart, osize);
if (zfs_range_tree_is_empty(msp->ms_allocating[txg & TXG_MASK]))
vdev_dirty(msp->ms_group->mg_vd, VDD_METASLAB, msp,
txg);
zfs_range_tree_add(msp->ms_allocating[txg & TXG_MASK], ostart,
osize);
msp->ms_allocating_total += osize;
off = ostart + osize;
}
}
static void
zhack_do_metaslab_leak(int argc, char **argv)
{
int c;
char *target;
spa_t *spa;
optind = 1;
boolean_t force = B_FALSE;
while ((c = getopt(argc, argv, "f")) != -1) {
switch (c) {
case 'f':
force = B_TRUE;
break;
default:
usage();
break;
}
}
argc -= optind;
argv += optind;
if (argc < 1) {
(void) fprintf(stderr, "error: missing pool name\n");
usage();
}
target = argv[0];
zhack_spa_open(target, B_FALSE, FTAG, &spa);
spa_config_enter(spa, SCL_VDEV | SCL_ALLOC, FTAG, RW_READER);
char *line = NULL;
size_t cap = 0;
vdev_t *vd = NULL;
metaslab_t *prev = NULL;
dmu_tx_t *tx = NULL;
while (getline(&line, &cap, stdin) > 0) {
if (strstarts(line, "\tvdev ")) {
uint64_t vdev_id, ms_shift;
if (sscanf(line,
"\tvdev %10"PRIu64"\t%*s metaslab shift %4"PRIu64,
&vdev_id, &ms_shift) == 1) {
VERIFY3U(sscanf(line, "\tvdev %"PRIu64
"\t metaslab shift %4"PRIu64,
&vdev_id, &ms_shift), ==, 2);
}
vd = vdev_lookup_top(spa, vdev_id);
if (vd == NULL) {
fprintf(stderr, "error: no such vdev with "
"id %"PRIu64"\n", vdev_id);
break;
}
if (tx) {
dmu_tx_commit(tx);
mutex_exit(&prev->ms_lock);
metaslab_enable(prev, B_FALSE, B_FALSE);
tx = NULL;
prev = NULL;
}
if (vd->vdev_ms_shift != ms_shift) {
fprintf(stderr, "error: ms_shift mismatch: %"
PRIu64" != %"PRIu64"\n", vd->vdev_ms_shift,
ms_shift);
break;
}
} else if (strstarts(line, "\tmetaslabs ")) {
uint64_t ms_count;
VERIFY3U(sscanf(line, "\tmetaslabs %"PRIu64, &ms_count),
==, 1);
ASSERT(vd);
if (!force && vd->vdev_ms_count != ms_count) {
fprintf(stderr, "error: ms_count mismatch: %"
PRIu64" != %"PRIu64"\n", vd->vdev_ms_count,
ms_count);
break;
}
} else if (strstarts(line, "ALLOC:")) {
uint64_t start, size;
VERIFY3U(sscanf(line, "ALLOC: %"PRIu64" %"PRIu64"\n",
&start, &size), ==, 2);
ASSERT(vd);
metaslab_t *cur =
vd->vdev_ms[start >> vd->vdev_ms_shift];
if (prev != cur) {
if (prev) {
dmu_tx_commit(tx);
mutex_exit(&prev->ms_lock);
metaslab_enable(prev, B_FALSE, B_FALSE);
}
ASSERT(cur);
metaslab_disable(cur);
mutex_enter(&cur->ms_lock);
metaslab_load(cur);
prev = cur;
tx = dmu_tx_create_dd(
spa_get_dsl(vd->vdev_spa)->dp_root_dir);
dmu_tx_assign(tx, DMU_TX_WAIT);
}
metaslab_force_alloc(cur, start, size, tx);
} else {
continue;
}
}
if (tx) {
dmu_tx_commit(tx);
mutex_exit(&prev->ms_lock);
metaslab_enable(prev, B_FALSE, B_FALSE);
tx = NULL;
prev = NULL;
}
if (line)
free(line);
spa_config_exit(spa, SCL_VDEV | SCL_ALLOC, FTAG);
spa_close(spa, FTAG);
}
static int
zhack_do_metaslab(int argc, char **argv)
{
char *subcommand;
argc--;
argv++;
if (argc == 0) {
(void) fprintf(stderr,
"error: no metaslab operation specified\n");
usage();
}
subcommand = argv[0];
if (strcmp(subcommand, "leak") == 0) {
zhack_do_metaslab_leak(argc, argv);
} else {
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
subcommand);
usage();
}
return (0);
}
#define ASHIFT_UBERBLOCK_SHIFT(ashift) \
MIN(MAX(ashift, UBERBLOCK_SHIFT), \
MAX_UBERBLOCK_SHIFT)
@@ -985,7 +1170,7 @@ main(int argc, char **argv)
dprintf_setup(&argc, argv);
zfs_prop_init();
while ((c = getopt(argc, argv, "+c:d:")) != -1) {
while ((c = getopt(argc, argv, "+c:d:o:")) != -1) {
switch (c) {
case 'c':
g_importargs.cachefile = optarg;
@@ -994,6 +1179,10 @@ main(int argc, char **argv)
assert(g_importargs.paths < MAX_NUM_PATHS);
g_importargs.path[g_importargs.paths++] = optarg;
break;
case 'o':
if (handle_tunable_option(optarg, B_FALSE) != 0)
exit(1);
break;
default:
usage();
break;
@@ -1015,6 +1204,8 @@ main(int argc, char **argv)
rv = zhack_do_feature(argc, argv);
} else if (strcmp(subcommand, "label") == 0) {
return (zhack_do_label(argc, argv));
} else if (strcmp(subcommand, "metaslab") == 0) {
rv = zhack_do_metaslab(argc, argv);
} else {
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
subcommand);
+3 -1
View File
@@ -4,7 +4,7 @@ The detailed contributor information can be found in [2][3].
Files: contrib/debian/*
Copyright:
2013-2016, Aron Xu <aron@debian.org>
2013-2025, Aron Xu <aron@debian.org>
2016, Petter Reinholdtsen <pere@hungry.com>
2013, Carlos Alberto Lopez Perez <clopez@igalia.com>
2013, Turbo Fredriksson <turbo@bayour.com>
@@ -12,6 +12,8 @@ Copyright:
2011-2013, Darik Horn <dajhorn@vanadac.com>
2018-2019, Mo Zhou <cdluminate@gmail.com>
2018-2020, Mo Zhou <lumin@debian.org>
2023-2024, Shengqi Chen <harry-chen@outlook.com>
2024-2025, Shengqi Chen <harry@debian.org>
License: GPL-2+
[1] https://tracker.debian.org/pkg/zfs-linux
@@ -1,4 +1,4 @@
usr/bin/arc_summary.py
usr/bin/zarcsummary.py
usr/share/zfs/zfs-helpers.sh
etc/default/zfs
etc/init.d
@@ -9,4 +9,4 @@ etc/zfs/vdev_id.conf.sas_direct.example
etc/zfs/vdev_id.conf.sas_switch.example
etc/zfs/vdev_id.conf.scsi.example
etc/zfs/zfs-functions
lib/systemd/system/zfs-import.service
usr/lib/systemd/system/zfs-import.service
@@ -1 +1 @@
lib/@DEB_HOST_MULTIARCH@/libnvpair.so.*
usr/lib/@DEB_HOST_MULTIARCH@/libnvpair.so.*
@@ -1,2 +1,2 @@
lib/*/security/pam_zfs_key.so
usr/lib/*/security/pam_zfs_key.so
usr/share/pam-configs/zfs_key
@@ -1,7 +1,7 @@
#!/bin/sh
set -e
if ! $(ldd "/lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)/security/pam_zfs_key.so" | grep -q "libasan") ; then
if ! $(ldd "/usr/lib/$(dpkg-architecture -qDEB_HOST_MULTIARCH)/security/pam_zfs_key.so" | grep -q "libasan") ; then
pam-auth-update --package
fi
@@ -1 +1 @@
lib/@DEB_HOST_MULTIARCH@/libuutil.so.*
usr/lib/@DEB_HOST_MULTIARCH@/libuutil.so.*
@@ -1,3 +1,5 @@
lib/@DEB_HOST_MULTIARCH@/*.a usr/lib/@DEB_HOST_MULTIARCH@
usr/lib/@DEB_HOST_MULTIARCH@/*.a
usr/lib/@DEB_HOST_MULTIARCH@/*.so
usr/lib/@DEB_HOST_MULTIARCH@/pkgconfig
usr/include
usr/lib/@DEB_HOST_MULTIARCH@
@@ -1,2 +1,2 @@
lib/@DEB_HOST_MULTIARCH@/libzfs.so.*
lib/@DEB_HOST_MULTIARCH@/libzfs_core.so.*
usr/lib/@DEB_HOST_MULTIARCH@/libzfs.so.*
usr/lib/@DEB_HOST_MULTIARCH@/libzfs_core.so.*
@@ -1 +1 @@
lib/@DEB_HOST_MULTIARCH@/libzfsbootenv.so.*
usr/lib/@DEB_HOST_MULTIARCH@/libzfsbootenv.so.*
@@ -1 +1 @@
lib/@DEB_HOST_MULTIARCH@/libzpool.so.*
usr/lib/@DEB_HOST_MULTIARCH@/libzpool.so.*
@@ -1,4 +1,4 @@
sbin/ztest
usr/sbin/ztest
usr/bin/raidz_test
usr/share/man/man1/raidz_test.1
usr/share/man/man1/test-runner.1
@@ -1,5 +1,5 @@
etc/zfs/zed.d/*
lib/systemd/system/zfs-zed.service
usr/lib/systemd/system/zfs-zed.service
usr/lib/zfs-linux/zed.d/*
usr/sbin/zed
usr/share/man/man8/zed.8
@@ -1,50 +1,48 @@
etc/default/zfs
etc/zfs/zfs-functions
etc/zfs/zpool.d/
lib/systemd/system-generators/
lib/systemd/system-preset/
lib/systemd/system/zfs-import-cache.service
lib/systemd/system/zfs-import-scan.service
lib/systemd/system/zfs-import.target
lib/systemd/system/zfs-load-key.service
lib/systemd/system/zfs-mount.service
lib/systemd/system/zfs-mount@.service
lib/systemd/system/zfs-scrub-monthly@.timer
lib/systemd/system/zfs-scrub-weekly@.timer
lib/systemd/system/zfs-scrub@.service
lib/systemd/system/zfs-trim-monthly@.timer
lib/systemd/system/zfs-trim-weekly@.timer
lib/systemd/system/zfs-trim@.service
lib/systemd/system/zfs-share.service
lib/systemd/system/zfs-volume-wait.service
lib/systemd/system/zfs-volumes.target
lib/systemd/system/zfs.target
lib/udev/
sbin/fsck.zfs
sbin/mount.zfs
sbin/zdb
sbin/zfs
sbin/zfs_ids_to_path
sbin/zgenhostid
sbin/zhack
sbin/zinject
sbin/zpool
sbin/zstream
sbin/zstreamdump
usr/lib/systemd/system-generators/
usr/lib/systemd/system-preset/
usr/lib/systemd/system/zfs-import-cache.service
usr/lib/systemd/system/zfs-import-scan.service
usr/lib/systemd/system/zfs-import.target
usr/lib/systemd/system/zfs-load-key.service
usr/lib/systemd/system/zfs-mount.service
usr/lib/systemd/system/zfs-mount@.service
usr/lib/systemd/system/zfs-scrub-monthly@.timer
usr/lib/systemd/system/zfs-scrub-weekly@.timer
usr/lib/systemd/system/zfs-scrub@.service
usr/lib/systemd/system/zfs-trim-monthly@.timer
usr/lib/systemd/system/zfs-trim-weekly@.timer
usr/lib/systemd/system/zfs-trim@.service
usr/lib/systemd/system/zfs-share.service
usr/lib/systemd/system/zfs-volume-wait.service
usr/lib/systemd/system/zfs-volumes.target
usr/lib/systemd/system/zfs.target
usr/lib/udev/
usr/sbin/fsck.zfs
usr/sbin/mount.zfs
usr/sbin/zdb
usr/sbin/zfs
usr/sbin/zfs_ids_to_path
usr/sbin/zgenhostid
usr/sbin/zhack
usr/sbin/zinject
usr/sbin/zpool
usr/sbin/zstream
usr/sbin/zstreamdump
usr/bin/zvol_wait
usr/lib/modules-load.d/ lib/
usr/lib/modules-load.d/
usr/lib/zfs-linux/zpool.d/
usr/lib/zfs-linux/zpool_influxdb
usr/lib/zfs-linux/zfs_prepare_disk
usr/sbin/arc_summary
usr/sbin/zarcsummary
usr/sbin/arcstat
usr/sbin/zarcstat
usr/sbin/dbufstat
usr/sbin/zilstat
usr/bin/zarcsummary
usr/bin/zarcstat
usr/bin/dbufstat usr/sbin
usr/bin/zilstat
usr/share/zfs/compatibility.d/
usr/share/bash-completion/completions
usr/share/man/man1/arcstat.1
usr/share/man/man1/zarcstat.1
usr/share/man/man1/zhack.1
usr/share/man/man1/zvol_wait.1
usr/share/man/man5/
@@ -0,0 +1,3 @@
usr/sbin/zfs usr/bin/zfs
usr/sbin/zpool usr/bin/zpool
usr/lib/zfs-linux/zpool_influxdb usr/bin/zpool_influxdb
+11 -30
View File
@@ -37,18 +37,19 @@ override_dh_auto_configure:
@# Build the userland, but don't build the kernel modules.
dh_auto_configure -- @CFGOPTS@ \
--bindir=/usr/bin \
--sbindir=/sbin \
--libdir=/lib/"$(DEB_HOST_MULTIARCH)" \
--with-udevdir=/lib/udev \
--sbindir=/usr/sbin \
--with-mounthelperdir=/usr/sbin \
--libdir=/usr/lib/"$(DEB_HOST_MULTIARCH)" \
--with-udevdir=/usr/lib/udev \
--with-zfsexecdir=/usr/lib/zfs-linux \
--enable-systemd \
--enable-pyzfs \
--with-python=python3 \
--with-pammoduledir='/lib/$(DEB_HOST_MULTIARCH)/security' \
--with-pammoduledir='/usr/lib/$(DEB_HOST_MULTIARCH)/security' \
--with-pkgconfigdir='/usr/lib/$(DEB_HOST_MULTIARCH)/pkgconfig' \
--with-systemdunitdir=/lib/systemd/system \
--with-systemdpresetdir=/lib/systemd/system-preset \
--with-systemdgeneratordir=/lib/systemd/system-generators \
--with-systemdunitdir=/usr/lib/systemd/system \
--with-systemdpresetdir=/usr/lib/systemd/system-preset \
--with-systemdgeneratordir=/usr/lib/systemd/system-generators \
--with-config=user
for i in $(wildcard $(CURDIR)/debian/*.install.in) ; do \
@@ -77,21 +78,6 @@ override_dh_auto_install:
@# Install the utilities.
$(MAKE) install DESTDIR='$(CURDIR)/debian/tmp'
# Move from bin_dir to /usr/sbin
# Remove suffix (.py) as per policy 10.4 - Scripts
# https://www.debian.org/doc/debian-policy/ch-files.html#s-scripts
mkdir -p '$(CURDIR)/debian/tmp/usr/sbin/'
mv '$(CURDIR)/debian/tmp/usr/bin/arc_summary' '$(CURDIR)/debian/tmp/usr/sbin/arc_summary'
mv '$(CURDIR)/debian/tmp/usr/bin/zarcsummary' '$(CURDIR)/debian/tmp/usr/sbin/zarcsummary'
mv '$(CURDIR)/debian/tmp/usr/bin/arcstat' '$(CURDIR)/debian/tmp/usr/sbin/arcstat'
mv '$(CURDIR)/debian/tmp/usr/bin/zarcstat' '$(CURDIR)/debian/tmp/usr/sbin/zarcstat'
mv '$(CURDIR)/debian/tmp/usr/bin/dbufstat' '$(CURDIR)/debian/tmp/usr/sbin/dbufstat'
mv '$(CURDIR)/debian/tmp/usr/bin/zilstat' '$(CURDIR)/debian/tmp/usr/sbin/zilstat'
@# Zed has dependencies outside of the system root.
mv '$(CURDIR)/debian/tmp/sbin/zed' '$(CURDIR)/debian/tmp/usr/sbin/zed'
sed -i 's|ExecStart=/sbin/|ExecStart=/usr/sbin/|g' '$(CURDIR)/debian/tmp/lib/systemd/system/zfs-zed.service'
@# Install the DKMS source.
@# We only want the files needed to build the modules
install -D -t '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/scripts' \
@@ -133,11 +119,6 @@ override_dh_auto_install:
cd '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)'; ./autogen.sh
rm -fr '$(CURDIR)/debian/tmp/usr/src/$(NAME)-$(DEB_VERSION_UPSTREAM)/autom4te.cache'
for i in `ls $(CURDIR)/debian/tmp/lib/$(DEB_HOST_MULTIARCH)/*.so`; do \
ln -s '/lib/$(DEB_HOST_MULTIARCH)/'`readlink $${i}` '$(CURDIR)/debian/tmp/usr/lib/$(DEB_HOST_MULTIARCH)/'`basename $${i}`; \
rm $${i}; \
done
chmod a-x '$(CURDIR)/debian/tmp/etc/zfs/zfs-functions'
chmod a-x '$(CURDIR)/debian/tmp/etc/default/zfs'
@@ -161,7 +142,7 @@ override_dh_auto_clean:
@if test -e META.orig; then mv META.orig META; fi
override_dh_install:
find debian/tmp/lib -name '*.la' -delete
find debian/tmp/usr/lib -name '*.la' -delete
dh_install
override_dh_missing:
@@ -175,8 +156,8 @@ override_dh_installinit:
dh_installinit -R --name zfs-zed
override_dh_installsystemd:
mkdir -p debian/openzfs-zfsutils/lib/systemd/system
ln -sr /dev/null debian/openzfs-zfsutils/lib/systemd/system/zfs-import.service
mkdir -p debian/openzfs-zfsutils/usr/lib/systemd/system
ln -sr /dev/null debian/openzfs-zfsutils/usr/lib/systemd/system/zfs-import.service
dh_installsystemd --no-stop-on-upgrade -X zfs-zed.service
dh_installsystemd --name zfs-zed
@@ -5,7 +5,7 @@
PREREQ="udev"
PREREQ_UDEV_RULES="60-zvol.rules 69-vdev.rules"
COPY_EXEC_LIST="/lib/udev/zvol_id /lib/udev/vdev_id"
COPY_EXEC_LIST="/usr/lib/udev/zvol_id /usr/lib/udev/vdev_id"
# Generic result code.
RC=0
@@ -604,5 +604,4 @@ class RaidzExpansionRunning(ZFSError):
errno = ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS
message = "A raidz device is currently expanding"
# vim: softtabstop=4 tabstop=4 expandtab shiftwidth=4
@@ -62,6 +62,17 @@ typedef longlong_t hrtime_t;
#define SEC_TO_TICK(sec) ((sec) * hz)
#define NSEC_TO_TICK(nsec) ((nsec) / (NANOSEC / hz))
static __inline hrtime_t
getlrtime(void)
{
struct timespec ts;
hrtime_t nsec;
getnanouptime(&ts);
nsec = ((hrtime_t)ts.tv_sec * NANOSEC) + ts.tv_nsec;
return (nsec);
}
static __inline hrtime_t
gethrtime(void)
{
@@ -130,7 +130,7 @@ RW_READ_HELD(krwlock_t *rwp)
/*
* The Linux rwsem implementation does not require a matching destroy.
*/
#define rw_destroy(rwp) ((void) 0)
#define rw_destroy(rwp) ASSERT(!(RW_LOCK_HELD(rwp)))
/*
* Upgrading a rwsem from a reader to a writer is not supported by the
@@ -79,6 +79,14 @@ gethrestime_sec(void)
return (ts.tv_sec);
}
static inline hrtime_t
getlrtime(void)
{
inode_timespec_t ts;
ktime_get_coarse_ts64(&ts);
return (((hrtime_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec);
}
static inline hrtime_t
gethrtime(void)
{
@@ -46,6 +46,7 @@ extern "C" {
#define ZFS_DELEG_PERM_MOUNT "mount"
#define ZFS_DELEG_PERM_SHARE "share"
#define ZFS_DELEG_PERM_SEND "send"
#define ZFS_DELEG_PERM_SEND_RAW "send:raw"
#define ZFS_DELEG_PERM_RECEIVE "receive"
#define ZFS_DELEG_PERM_RECEIVE_APPEND "receive:append"
#define ZFS_DELEG_PERM_ALLOW "allow"
@@ -58,6 +58,7 @@ extern "C" {
#define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure"
#define FM_EREPORT_ZFS_LOG_REPLAY "log_replay"
#define FM_EREPORT_ZFS_CONFIG_CACHE_WRITE "config_cache_write"
#define FM_EREPORT_ZFS_SITOUT "sitout"
#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode"
+3
View File
@@ -385,6 +385,8 @@ typedef enum {
VDEV_PROP_TRIM_SUPPORT,
VDEV_PROP_TRIM_ERRORS,
VDEV_PROP_SLOW_IOS,
VDEV_PROP_SIT_OUT,
VDEV_PROP_AUTOSIT,
VDEV_NUM_PROPS
} vdev_prop_t;
@@ -1673,6 +1675,7 @@ typedef enum {
ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS,
ZFS_ERR_ASHIFT_MISMATCH,
ZFS_ERR_STREAM_LARGE_MICROZAP,
ZFS_ERR_TOO_MANY_SITOUTS,
} zfs_errno_t;
/*
@@ -279,10 +279,12 @@ struct vdev {
uint64_t vdev_noalloc; /* device is passivated? */
uint64_t vdev_removing; /* device is being removed? */
uint64_t vdev_failfast; /* device failfast setting */
boolean_t vdev_autosit; /* automatic sitout management */
boolean_t vdev_rz_expanding; /* raidz is being expanded? */
boolean_t vdev_ishole; /* is a hole in the namespace */
uint64_t vdev_top_zap;
vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */
uint64_t vdev_last_latency_check;
/* pool checkpoint related */
space_map_t *vdev_checkpoint_sm; /* contains reserved blocks */
@@ -431,6 +433,10 @@ struct vdev {
hrtime_t vdev_mmp_pending; /* 0 if write finished */
uint64_t vdev_mmp_kstat_id; /* to find kstat entry */
uint64_t vdev_expansion_time; /* vdev's last expansion time */
/* used to calculate average read latency */
uint64_t *vdev_prev_histo;
int64_t vdev_outlier_count; /* read outlier amongst peers */
hrtime_t vdev_read_sit_out_expire; /* end of sit out period */
list_node_t vdev_leaf_node; /* leaf vdev list */
/*
@@ -61,6 +61,9 @@ void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
struct raidz_row *vdev_raidz_row_alloc(int, zio_t *);
void vdev_raidz_reflow_copy_scratch(spa_t *);
void raidz_dtl_reassessed(vdev_t *);
boolean_t vdev_sit_out_reads(vdev_t *, zio_flag_t);
void vdev_raidz_sit_child(vdev_t *, uint64_t);
void vdev_raidz_unsit_child(vdev_t *);
extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
@@ -119,6 +119,7 @@ typedef struct raidz_col {
uint8_t rc_need_orig_restore:1; /* need to restore from orig_data? */
uint8_t rc_force_repair:1; /* Write good data to this column */
uint8_t rc_allow_repair:1; /* Allow repair I/O to this column */
uint8_t rc_latency_outlier:1; /* Latency outlier for this device */
int rc_shadow_devidx; /* for double write during expansion */
int rc_shadow_error; /* for double write during expansion */
uint64_t rc_shadow_offset; /* for double write during expansion */
@@ -133,6 +134,7 @@ typedef struct raidz_row {
int rr_firstdatacol; /* First data column/parity count */
abd_t *rr_abd_empty; /* dRAID empty sector buffer */
int rr_nempty; /* empty sectors included in parity */
int rr_outlier_cnt; /* Count of latency outlier devices */
#ifdef ZFS_DEBUG
uint64_t rr_offset; /* Logical offset for *_io_verify() */
uint64_t rr_size; /* Physical size for *_io_verify() */
+1 -1
View File
@@ -46,7 +46,7 @@ void zfs_file_close(zfs_file_t *fp);
int zfs_file_write(zfs_file_t *fp, const void *buf, size_t len, ssize_t *resid);
int zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t len, loff_t off,
ssize_t *resid);
uint8_t ashift, ssize_t *resid);
int zfs_file_read(zfs_file_t *fp, void *buf, size_t len, ssize_t *resid);
int zfs_file_pread(zfs_file_t *fp, void *buf, size_t len, loff_t off,
ssize_t *resid);
+1
View File
@@ -55,6 +55,7 @@ typedef enum {
ZFS_DELEG_NOTE_PROMOTE,
ZFS_DELEG_NOTE_RENAME,
ZFS_DELEG_NOTE_SEND,
ZFS_DELEG_NOTE_SEND_RAW,
ZFS_DELEG_NOTE_RECEIVE,
ZFS_DELEG_NOTE_ALLOW,
ZFS_DELEG_NOTE_USERPROP,
@@ -97,6 +97,15 @@ gethrestime_sec(void)
return (tv.tv_sec);
}
static inline hrtime_t
getlrtime(void)
{
struct timeval tv;
(void) gettimeofday(&tv, NULL);
return ((((uint64_t)tv.tv_sec) * NANOSEC) +
((uint64_t)tv.tv_usec * NSEC_PER_USEC));
}
static inline hrtime_t
gethrtime(void)
{
+3 -1
View File
@@ -6117,7 +6117,9 @@
<enumerator name='VDEV_PROP_TRIM_SUPPORT' value='49'/>
<enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/>
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
<enumerator name='VDEV_NUM_PROPS' value='52'/>
<enumerator name='VDEV_PROP_SIT_OUT' value='52'/>
<enumerator name='VDEV_PROP_AUTOSIT' value='53'/>
<enumerator name='VDEV_NUM_PROPS' value='54'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
+12 -2
View File
@@ -5549,6 +5549,8 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,
/* Only use if provided by the RAIDZ VDEV above */
if (prop == VDEV_PROP_RAIDZ_EXPANDING)
return (ENOENT);
if (prop == VDEV_PROP_SIT_OUT)
return (ENOENT);
}
if (vdev_prop_index_to_string(prop, intval,
(const char **)&strval) != 0)
@@ -5718,8 +5720,16 @@ zpool_set_vdev_prop(zpool_handle_t *zhp, const char *vdevname,
nvlist_free(nvl);
nvlist_free(outnvl);
if (ret)
(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
if (ret) {
if (errno == ENOTSUP) {
zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
"property not supported for this vdev"));
(void) zfs_error(zhp->zpool_hdl, EZFS_PROPTYPE, errbuf);
} else {
(void) zpool_standard_error(zhp->zpool_hdl, errno,
errbuf);
}
}
return (ret);
}
@@ -776,6 +776,11 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case ZFS_ERR_ASHIFT_MISMATCH:
zfs_verror(hdl, EZFS_ASHIFT_MISMATCH, fmt, ap);
break;
case ZFS_ERR_TOO_MANY_SITOUTS:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "too many disks "
"already sitting out"));
zfs_verror(hdl, EZFS_BUSY, fmt, ap);
break;
default:
zfs_error_aux(hdl, "%s", zfs_strerror(error));
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
+3 -3
View File
@@ -1238,7 +1238,7 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
*/
int
zfs_file_pwrite(zfs_file_t *fp, const void *buf,
size_t count, loff_t pos, ssize_t *resid)
size_t count, loff_t pos, uint8_t ashift, ssize_t *resid)
{
ssize_t rc, split, done;
int sectors;
@@ -1248,8 +1248,8 @@ zfs_file_pwrite(zfs_file_t *fp, const void *buf,
* system calls so that the process can be killed in between.
* This is used by ztest to simulate realistic failure modes.
*/
sectors = count >> SPA_MINBLOCKSHIFT;
split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT;
sectors = count >> ashift;
split = (sectors > 0 ? rand() % sectors : 0) << ashift;
rc = pwrite64(fp->f_fd, buf, split, pos);
if (rc != -1) {
done = rc;
+1 -1
View File
@@ -2,7 +2,7 @@ dist_noinst_man_MANS = \
%D%/man1/cstyle.1
dist_man_MANS = \
%D%/man1/arcstat.1 \
%D%/man1/zarcstat.1 \
%D%/man1/raidz_test.1 \
%D%/man1/test-runner.1 \
%D%/man1/zhack.1 \
@@ -14,14 +14,12 @@
.\" Copyright (c) 2020 by AJ Jordan. All rights reserved.
.\"
.Dd September 19, 2024
.Dt ARCSTAT 1
.Dt ZARCSTAT 1
.Os
.
.Sh NAME
.Nm arcstat
.Nm zarcstat
.Nd report ZFS ARC and L2ARC statistics
.Sh NOTICE
It will be renamed to zarcstat in zfs 2.4.0. Please migrate ASAP.
.Sh SYNOPSIS
.Nm
.Op Fl havxp
+20
View File
@@ -122,6 +122,24 @@ Example:
.Nm zhack Cm label repair Fl cu Ar device
Fix checksums and undetach a device
.
.It Xo
.Nm zhack
.Cm metaslab leak
.Op Fl f
.Ar pool
.Xc
Apply a fragmentation profile generated by
.Sy zdb
to the specified
.Ar pool Ns
\&.
.Pp
The
.Fl f
flag forces the profile to apply even if the vdevs in the
.Ar pool
don't have the same number of metaslabs as the fragmentation profile.
.
.El
.
.Sh GLOBAL OPTIONS
@@ -143,6 +161,8 @@ Search for
members in
.Ar dir .
Can be specified more than once.
.It Fl o Ar var Ns = Ns Ar value
Set the given tunable to the provided value.
.El
.
.Sh EXAMPLES
+37
View File
@@ -4,6 +4,7 @@
.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
.\" Copyright (c) 2019 Datto Inc.
.\" Copyright (c) 2023, 2024, 2025, Klara, Inc.
.\"
.\" The contents of this file are subject to the terms of the Common Development
.\" and Distribution License (the "License"). You may not use this file except
.\" in compliance with the License. You can obtain a copy of the license at
@@ -601,6 +602,42 @@ new format when enabling the
feature.
The default is to convert all log entries.
.
.It Sy vdev_read_sit_out_secs Ns = Ns Sy 600 Ns s Po 10 min Pc Pq ulong
When a slow disk outlier is detected it is placed in a sit out state.
While sitting out the disk will not participate in normal reads, instead its
data will be reconstructed as needed from parity.
Scrub operations will always read from a disk, even if it's sitting out.
A number of disks in a RAID-Z or dRAID vdev may sit out at the same time, up
to the number of parity devices.
Writes will still be issued to a disk which is sitting out to maintain full
redundancy.
Defaults to 600 seconds and a value of zero disables disk sit-outs in general,
including slow disk outlier detection.
.
.It Sy vdev_raidz_outlier_check_interval_ms Ns = Ns Sy 1000 Ns ms Po 1 sec Pc Pq ulong
How often each RAID-Z and dRAID vdev will check for slow disk outliers.
Increasing this interval will reduce the sensitivity of detection (since all
I/Os since the last check are included in the statistics), but will slow the
response to a disk developing a problem.
Defaults to once per second; setting extremely small values may cause negative
performance effects.
.
.It Sy vdev_raidz_outlier_insensitivity Ns = Ns Sy 50 Pq uint
When performing slow outlier checks for RAID-Z and dRAID vdevs, this value is
used to determine how far out an outlier must be before it counts as an event
worth consdering.
This is phrased as "insensitivity" because larger values result in fewer
detections.
Smaller values will result in more aggressive sitting out of disks that may have
problems, but may significantly increase the rate of spurious sit-outs.
.Pp
To provide a more technical definition of this parameter, this is the multiple
of the inter-quartile range (IQR) that is being used in a Tukey's Fence
detection algorithm.
This is much higher than a normal Tukey's Fence k-value, because the
distribution under consideration is probably an extreme-value distribution,
rather than a more typical Gaussian distribution.
.
.It Sy vdev_removal_max_span Ns = Ns Sy 32768 Ns B Po 32 KiB Pc Pq uint
During top-level vdev removal, chunks of data are copied from the vdev
which may include free space in order to trade bandwidth for IOPS.
+35 -4
View File
@@ -19,7 +19,7 @@
.\"
.\" CDDL HEADER END
.\"
.\" Copyright (c) 2021 Klara, Inc.
.\" Copyright (c) 2021, 2025, Klara, Inc.
.\"
.Dd July 23, 2024
.Dt VDEVPROPS 7
@@ -106,11 +106,17 @@ The number of children belonging to this vdev
.It Sy read_errors , write_errors , checksum_errors , initialize_errors , trim_errors
The number of errors of each type encountered by this vdev
.It Sy slow_ios
The number of slow I/Os encountered by this vdev,
These represent I/O operations that didn't complete in
This indicates the number of slow I/O operations encountered by this vdev.
A slow I/O is defined as an operation that did not complete within the
.Sy zio_slow_io_ms
milliseconds
threshold in milliseconds
.Pq Sy 30000 No by default .
For
.Sy RAIDZ
and
.Sy DRAID
configurations, this value also represents the number of times the vdev was
identified as an outlier and excluded from participating in read I/O operations.
.It Sy null_ops , read_ops , write_ops , free_ops , claim_ops , trim_ops
The number of I/O operations of each type performed by this vdev
.It Xo
@@ -150,6 +156,31 @@ The amount of space to reserve for the EFI system partition
.It Sy failfast
If this device should propagate BIO errors back to ZFS, used to disable
failfast.
.It Sy sit_out
Only valid for
.Sy RAIDZ
and
.Sy DRAID
vdevs.
True when a slow disk outlier was detected and the vdev is currently in a sit
out state.
This property can be manually set to cause vdevs to sit out.
It will also be automatically set by the
.Sy autosit
logic if that is enabled.
While sitting out, the vdev will not participate in normal reads, instead its
data will be reconstructed as needed from parity.
.It Sy autosit
Only valid for
.Sy RAIDZ
and
.Sy DRAID
vdevs.
If set, this enables the kernel-level slow disk detection logic.
This logic automatically causes any vdevs that are significant negative
performance outliers to sit out, as described in the
.Sy sit_out
property.
.It Sy path
The path to the device for this vdev
.It Sy allocating
+12
View File
@@ -69,6 +69,13 @@
.Op Fl U Ar cache
.Ar poolname Op Ar vdev Oo Ar metaslab Oc Ns …
.Nm
.Fl -allocated-map
.Op Fl mAFLPXY
.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
.Op Fl t Ar txg
.Op Fl U Ar cache
.Ar poolname Op Ar vdev Oo Ar metaslab Oc Ns …
.Nm
.Fl O
.Op Fl K Ar key
.Ar dataset path
@@ -128,6 +135,11 @@ that zdb may interpret inconsistent pool data and behave erratically.
.Sh OPTIONS
Display options:
.Bl -tag -width Ds
.It Fl Sy -allocated-map
Prints out a list of all the allocated regions in the pool.
Primarily intended for use with the
.Nm zhack metaslab leak
subcommand.
.It Fl b , -block-stats
Display statistics regarding the number, size
.Pq logical, physical and allocated
+32
View File
@@ -158,6 +158,8 @@ Multiple ZEDLETs may be invoked for a given zevent.
ZEDLETs are executables invoked by the ZED in response to a given zevent.
They should be written under the presumption they can be invoked concurrently,
and they should use appropriate locking to access any shared resources.
The one exception to this are "synchronous zedlets", which are described later
in this page.
Common variables used by ZEDLETs can be stored in the default rc file which
is sourced by scripts; these variables should be prefixed with
.Sy ZED_ .
@@ -233,6 +235,36 @@ and
.Sy ZPOOL .
These variables may be overridden in the rc file.
.
.Sh Synchronous ZEDLETS
ZED's normal behavior is to spawn off zedlets in parallel and ignore their
completion order.
This means that ZED can potentially
have zedlets for event ID number 2 starting before zedlets for event ID number
1 have finished.
Most of the time this is fine, and it actually helps when the system is getting
hammered with hundreds of events.
.Pp
However, there are times when you want your zedlets to be executed in sequence
with the event ID.
That is where synchronous zedlets come in.
.Pp
ZED will wait for all previously spawned zedlets to finish before running
a synchronous zedlet.
Synchronous zedlets are guaranteed to be the only
zedlet running.
No other zedlets may run in parallel with a synchronous zedlet.
Users should be careful to only use synchronous zedlets when needed, since
they decrease parallelism.
.Pp
To make a zedlet synchronous, simply add a "-sync-" immediately following the
event name in the zedlet's file name:
.Pp
.Sy EVENT_NAME-sync-ZEDLETNAME.sh
.Pp
For example, if you wanted a synchronous statechange script:
.Pp
.Sy statechange-sync-myzedlet.sh
.
.Sh FILES
.Bl -tag -width "-c"
.It Pa @sysconfdir@/zfs/zed.d
+3 -2
View File
@@ -30,7 +30,7 @@
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\"
.Dd March 13, 2025
.Dd September 8, 2025
.Dt ZFS-ALLOW 8
.Os
.
@@ -212,7 +212,8 @@ receive subcommand Must also have the \fBmount\fR and \fBcreate\fR ability, requ
release subcommand Allows releasing a user hold which might destroy the snapshot
rename subcommand Must also have the \fBmount\fR and \fBcreate\fR ability in the new parent
rollback subcommand Must also have the \fBmount\fR ability
send subcommand
send subcommand Allows sending a replication stream of a dataset.
send:raw subcommand Only allows sending raw replication streams, preventing encrypted datasets being sent in decrypted form.
share subcommand Allows sharing file systems over NFS or SMB protocols
snapshot subcommand Must also have the \fBmount\fR ability
@@ -190,6 +190,16 @@ Issued when a scrub is resumed on a pool.
.It Sy scrub.paused
Issued when a scrub is paused on a pool.
.It Sy bootfs.vdev.attach
.It Sy sitout
Issued when a
.Sy RAIDZ
or
.Sy DRAID
vdev triggers the
.Sy autosit
logic.
This logic detects when a disk in such a vdev is significantly slower than its
peers, and sits them out temporarily to preserve the performance of the pool.
.El
.
.Sh PAYLOADS
@@ -1175,7 +1175,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
int count = 0;
zfs_acl_phys_t acl_phys;
if (zp->z_zfsvfs->z_replay == B_FALSE) {
if (ZTOV(zp) != NULL && zp->z_zfsvfs->z_replay == B_FALSE) {
ASSERT_VOP_IN_SEQC(ZTOV(zp));
}
@@ -164,8 +164,9 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
int
zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
ssize_t *resid)
uint8_t ashift, ssize_t *resid)
{
(void) ashift;
return (zfs_file_write_impl(fp, buf, count, &off, resid));
}
@@ -389,7 +389,9 @@ zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
error = vn_lock(vp, LK_EXCLUSIVE);
if (error)
return (error);
vn_seqc_write_begin(vp);
error = zfs_ioctl_setxattr(vp, fsx, cred);
vn_seqc_write_end(vp);
VOP_UNLOCK(vp);
return (error);
}
@@ -2206,6 +2208,7 @@ zfs_setattr_dir(znode_t *dzp)
if (err)
break;
vn_seqc_write_begin(ZTOV(zp));
mutex_enter(&dzp->z_lock);
if (zp->z_uid != dzp->z_uid) {
@@ -2255,6 +2258,7 @@ zfs_setattr_dir(znode_t *dzp)
dmu_tx_abort(tx);
}
tx = NULL;
vn_seqc_write_end(ZTOV(zp));
if (err != 0 && err != ENOENT)
break;
@@ -817,6 +817,10 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
(*zpp)->z_dnodesize = dnodesize;
(*zpp)->z_projid = projid;
vnode_t *vp = ZTOV(*zpp);
if (!(flag & IS_ROOT_NODE))
vn_seqc_write_begin(vp);
if (vap->va_mask & AT_XVATTR)
zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
@@ -825,7 +829,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
}
if (!(flag & IS_ROOT_NODE)) {
vnode_t *vp = ZTOV(*zpp);
vn_seqc_write_end(vp);
vp->v_vflag |= VV_FORCEINSMQ;
int err = insmntque(vp, zfsvfs->z_vfs);
vp->v_vflag &= ~VV_FORCEINSMQ;
@@ -115,8 +115,9 @@ zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
*/
int
zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
ssize_t *resid)
uint8_t ashift, ssize_t *resid)
{
(void) ashift;
ssize_t rc;
rc = kernel_write(fp, buf, count, &off);
@@ -59,6 +59,7 @@ const zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
{ZFS_DELEG_PERM_SNAPSHOT},
{ZFS_DELEG_PERM_SHARE},
{ZFS_DELEG_PERM_SEND},
{ZFS_DELEG_PERM_SEND_RAW},
{ZFS_DELEG_PERM_USERPROP},
{ZFS_DELEG_PERM_USERQUOTA},
{ZFS_DELEG_PERM_GROUPQUOTA},
@@ -467,9 +467,15 @@ vdev_prop_init(void)
zprop_register_index(VDEV_PROP_RAIDZ_EXPANDING, "raidz_expanding", 0,
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "RAIDZ_EXPANDING",
boolean_table, sfeatures);
zprop_register_index(VDEV_PROP_SIT_OUT, "sit_out", 0,
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "SIT_OUT", boolean_table,
sfeatures);
zprop_register_index(VDEV_PROP_TRIM_SUPPORT, "trim_support", 0,
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "TRIMSUP",
boolean_table, sfeatures);
zprop_register_index(VDEV_PROP_AUTOSIT, "autosit", 0,
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "AUTOSIT", boolean_table,
sfeatures);
/* default index properties */
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
+11 -7
View File
@@ -244,6 +244,13 @@ ddt_log_alloc_entry(ddt_t *ddt)
return (ddle);
}
static void
ddt_log_free_entry(ddt_t *ddt, ddt_log_entry_t *ddle)
{
kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle);
}
static void
ddt_log_update_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe)
{
@@ -349,8 +356,7 @@ ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe)
ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, ddlwe);
avl_remove(&ddl->ddl_tree, ddle);
kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle);
ddt_log_free_entry(ddt, ddle);
return (B_TRUE);
}
@@ -367,8 +373,7 @@ ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl, const ddt_key_t *ddk)
ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, &ddlwe);
avl_remove(&ddl->ddl_tree, ddle);
kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle);
ddt_log_free_entry(ddt, ddle);
return (B_TRUE);
}
@@ -529,8 +534,7 @@ ddt_log_empty(ddt_t *ddt, ddt_log_t *ddl)
IMPLY(ddt->ddt_version == UINT64_MAX, avl_is_empty(&ddl->ddl_tree));
while ((ddle =
avl_destroy_nodes(&ddl->ddl_tree, &cookie)) != NULL) {
kmem_cache_free(ddt->ddt_flags & DDT_FLAG_FLAT ?
ddt_log_entry_flat_cache : ddt_log_entry_trad_cache, ddle);
ddt_log_free_entry(ddt, ddle);
}
ASSERT(avl_is_empty(&ddl->ddl_tree));
}
@@ -729,7 +733,7 @@ ddt_log_load(ddt_t *ddt)
ddle = fe;
fe = AVL_NEXT(fl, fe);
avl_remove(fl, ddle);
ddt_log_free_entry(ddt, ddle);
ddle = ae;
ae = AVL_NEXT(al, ae);
}
+4 -2
View File
@@ -759,6 +759,8 @@ dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
*/
uint8_t ibps = ibs - SPA_BLKPTRSHIFT;
limit = P2ROUNDUP(dmu_prefetch_max, 1 << ibs) >> ibs;
if (limit == 0)
end2 = start2;
do {
level2++;
start2 = P2ROUNDUP(start2, 1 << ibps) >> ibps;
@@ -1689,8 +1691,8 @@ dmu_object_cached_size(objset_t *os, uint64_t object,
dmu_object_info_from_dnode(dn, &doi);
for (uint64_t off = 0; off < doi.doi_max_offset;
off += dmu_prefetch_max) {
for (uint64_t off = 0; off < doi.doi_max_offset &&
dmu_prefetch_max > 0; off += dmu_prefetch_max) {
/* dbuf_read doesn't prefetch L1 blocks. */
dmu_prefetch_by_dnode(dn, 1, off,
dmu_prefetch_max, ZIO_PRIORITY_SYNC_READ);
+3 -3
View File
@@ -251,11 +251,11 @@ spa_mode_t spa_mode_global = SPA_MODE_UNINIT;
#ifdef ZFS_DEBUG
/*
* Everything except dprintf, set_error, spa, and indirect_remap is on
* by default in debug builds.
* Everything except dprintf, set_error, indirect_remap, and raidz_reconstruct
* is on by default in debug builds.
*/
int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SET_ERROR |
ZFS_DEBUG_INDIRECT_REMAP);
ZFS_DEBUG_INDIRECT_REMAP | ZFS_DEBUG_RAIDZ_RECONSTRUCT);
#else
int zfs_flags = 0;
#endif
+116 -1
View File
@@ -29,7 +29,7 @@
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, Datto Inc. All rights reserved.
* Copyright (c) 2021, Klara Inc.
* Copyright (c) 2021, 2025, Klara, Inc.
* Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
*/
@@ -1086,6 +1086,10 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
}
}
if (top_level && (ops == &vdev_raidz_ops || ops == &vdev_draid_ops))
vd->vdev_autosit =
vdev_prop_default_numeric(VDEV_PROP_AUTOSIT);
/*
* Add ourselves to the parent's list of children.
*/
@@ -1187,6 +1191,9 @@ vdev_free(vdev_t *vd)
spa_spare_remove(vd);
if (vd->vdev_isl2cache)
spa_l2cache_remove(vd);
if (vd->vdev_prev_histo)
kmem_free(vd->vdev_prev_histo,
sizeof (uint64_t) * VDEV_L_HISTO_BUCKETS);
txg_list_destroy(&vd->vdev_ms_list);
txg_list_destroy(&vd->vdev_dtl_list);
@@ -3857,6 +3864,26 @@ vdev_load(vdev_t *vd)
}
}
if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
spa_t *spa = vd->vdev_spa;
uint64_t autosit;
error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
vdev_prop_to_name(VDEV_PROP_AUTOSIT), sizeof (autosit),
1, &autosit);
if (error == 0) {
vd->vdev_autosit = autosit == 1;
} else if (error == ENOENT) {
vd->vdev_autosit = vdev_prop_default_numeric(
VDEV_PROP_AUTOSIT);
} else {
vdev_dbgmsg(vd,
"vdev_load: zap_lookup(top_zap=%llu) "
"failed [error=%d]",
(u_longlong_t)vd->vdev_top_zap, error);
}
}
/*
* Load any rebuild state from the top-level vdev zap.
*/
@@ -4616,6 +4643,8 @@ vdev_clear(spa_t *spa, vdev_t *vd)
vd->vdev_stat.vs_checksum_errors = 0;
vd->vdev_stat.vs_dio_verify_errors = 0;
vd->vdev_stat.vs_slow_ios = 0;
atomic_store_64(&vd->vdev_outlier_count, 0);
vd->vdev_read_sit_out_expire = 0;
for (int c = 0; c < vd->vdev_children; c++)
vdev_clear(spa, vd->vdev_child[c]);
@@ -6107,6 +6136,56 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_failfast = intval & 1;
break;
case VDEV_PROP_SIT_OUT:
/* Only expose this for a draid or raidz leaf */
if (!vd->vdev_ops->vdev_op_leaf ||
vd->vdev_top == NULL ||
(vd->vdev_top->vdev_ops != &vdev_raidz_ops &&
vd->vdev_top->vdev_ops != &vdev_draid_ops)) {
error = ENOTSUP;
break;
}
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
if (intval == 1) {
vdev_t *ancestor = vd;
while (ancestor->vdev_parent != vd->vdev_top)
ancestor = ancestor->vdev_parent;
vdev_t *pvd = vd->vdev_top;
uint_t sitouts = 0;
for (int i = 0; i < pvd->vdev_children; i++) {
if (pvd->vdev_child[i] == ancestor)
continue;
if (vdev_sit_out_reads(
pvd->vdev_child[i], 0)) {
sitouts++;
}
}
if (sitouts >= vdev_get_nparity(pvd)) {
error = ZFS_ERR_TOO_MANY_SITOUTS;
break;
}
if (error == 0)
vdev_raidz_sit_child(vd,
INT64_MAX - gethrestime_sec());
} else {
vdev_raidz_unsit_child(vd);
}
break;
case VDEV_PROP_AUTOSIT:
if (vd->vdev_ops != &vdev_raidz_ops &&
vd->vdev_ops != &vdev_draid_ops) {
error = ENOTSUP;
break;
}
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
vd->vdev_autosit = intval == 1;
break;
case VDEV_PROP_CHECKSUM_N:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
@@ -6456,6 +6535,19 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
ZPROP_SRC_NONE);
}
continue;
case VDEV_PROP_SIT_OUT:
/* Only expose this for a draid or raidz leaf */
if (vd->vdev_ops->vdev_op_leaf &&
vd->vdev_top != NULL &&
(vd->vdev_top->vdev_ops ==
&vdev_raidz_ops ||
vd->vdev_top->vdev_ops ==
&vdev_draid_ops)) {
vdev_prop_add_list(outnvl, propname,
NULL, vdev_sit_out_reads(vd, 0),
ZPROP_SRC_NONE);
}
continue;
case VDEV_PROP_TRIM_SUPPORT:
/* only valid for leaf vdevs */
if (vd->vdev_ops->vdev_op_leaf) {
@@ -6506,6 +6598,29 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
vdev_prop_add_list(outnvl, propname, strval,
intval, src);
break;
case VDEV_PROP_AUTOSIT:
/* Only raidz vdevs cannot have this property */
if (vd->vdev_ops != &vdev_raidz_ops &&
vd->vdev_ops != &vdev_draid_ops) {
src = ZPROP_SRC_NONE;
intval = ZPROP_BOOLEAN_NA;
} else {
err = vdev_prop_get_int(vd, prop,
&intval);
if (err && err != ENOENT)
break;
if (intval ==
vdev_prop_default_numeric(prop))
src = ZPROP_SRC_DEFAULT;
else
src = ZPROP_SRC_LOCAL;
}
vdev_prop_add_list(outnvl, propname, NULL,
intval, src);
break;
case VDEV_PROP_CHECKSUM_N:
case VDEV_PROP_CHECKSUM_T:
case VDEV_PROP_IO_N:
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2018 Intel Corporation.
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
* Copyright (c) 2025, Klara, Inc.
*/
#include <sys/zfs_context.h>
@@ -1996,6 +1997,33 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr)
rc->rc_allow_repair = 1;
}
}
if (vdev_sit_out_reads(cvd, zio->io_flags)) {
rr->rr_outlier_cnt++;
ASSERT0(rc->rc_latency_outlier);
rc->rc_latency_outlier = 1;
}
}
/*
* When the row contains a latency outlier and sufficient parity
* exists to reconstruct the column data, then skip reading the
* known slow child vdev as a performance optimization.
*/
if (rr->rr_outlier_cnt > 0 &&
(rr->rr_firstdatacol - rr->rr_missingparity) >=
(rr->rr_missingdata + 1)) {
for (int c = rr->rr_cols - 1; c >= rr->rr_firstdatacol; c--) {
raidz_col_t *rc = &rr->rr_col[c];
if (rc->rc_error == 0 && rc->rc_latency_outlier) {
rr->rr_missingdata++;
rc->rc_error = SET_ERROR(EAGAIN);
rc->rc_skipped = 1;
break;
}
}
}
/*
+2 -1
View File
@@ -228,7 +228,8 @@ vdev_file_io_strategy(void *arg)
abd_return_buf_copy(zio->io_abd, buf, size);
} else {
buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
err = zfs_file_pwrite(vf->vf_file, buf, size, off,
vd->vdev_ashift, &resid);
abd_return_buf(zio->io_abd, buf, size);
}
zio->io_error = err;
+345
View File
@@ -24,6 +24,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
* Copyright (c) 2016 Gvozden Nešković. All rights reserved.
* Copyright (c) 2025, Klara, Inc.
*/
#include <sys/zfs_context.h>
@@ -355,6 +356,32 @@ unsigned long raidz_expand_max_reflow_bytes = 0;
*/
uint_t raidz_expand_pause_point = 0;
/*
* This represents the duration for a slow drive read sit out.
*/
static unsigned long vdev_read_sit_out_secs = 600;
/*
* How often each RAID-Z and dRAID vdev will check for slow disk outliers.
* Increasing this interval will reduce the sensitivity of detection (since all
* I/Os since the last check are included in the statistics), but will slow the
* response to a disk developing a problem.
*
* Defaults to once per second; setting extremely small values may cause
* negative performance effects.
*/
static hrtime_t vdev_raidz_outlier_check_interval_ms = 1000;
/*
* When performing slow outlier checks for RAID-Z and dRAID vdevs, this value is
* used to determine how far out an outlier must be before it counts as an event
* worth consdering.
*
* Smaller values will result in more aggressive sitting out of disks that may
* have problems, but may significantly increase the rate of spurious sit-outs.
*/
static uint32_t vdev_raidz_outlier_insensitivity = 50;
/*
* Maximum amount of copy io's outstanding at once.
*/
@@ -2311,6 +2338,41 @@ vdev_raidz_min_asize(vdev_t *vd)
vd->vdev_children);
}
/*
* return B_TRUE if a read should be skipped due to being too slow.
*
* In vdev_child_slow_outlier() it looks for outliers based on disk
* latency from the most recent child reads. Here we're checking if,
* over time, a disk has has been an outlier too many times and is
* now in a sit out period.
*/
boolean_t
vdev_sit_out_reads(vdev_t *vd, zio_flag_t io_flags)
{
if (vdev_read_sit_out_secs == 0)
return (B_FALSE);
/* Avoid skipping a data column read when scrubbing */
if (io_flags & ZIO_FLAG_SCRUB)
return (B_FALSE);
if (!vd->vdev_ops->vdev_op_leaf) {
boolean_t sitting = B_FALSE;
for (int c = 0; c < vd->vdev_children; c++) {
sitting |= vdev_sit_out_reads(vd->vdev_child[c],
io_flags);
}
return (sitting);
}
if (vd->vdev_read_sit_out_expire >= gethrestime_sec())
return (B_TRUE);
vd->vdev_read_sit_out_expire = 0;
return (B_FALSE);
}
void
vdev_raidz_child_done(zio_t *zio)
{
@@ -2475,6 +2537,45 @@ vdev_raidz_io_start_read_row(zio_t *zio, raidz_row_t *rr, boolean_t forceparity)
rc->rc_skipped = 1;
continue;
}
if (vdev_sit_out_reads(cvd, zio->io_flags)) {
rr->rr_outlier_cnt++;
ASSERT0(rc->rc_latency_outlier);
rc->rc_latency_outlier = 1;
}
}
/*
* When the row contains a latency outlier and sufficient parity
* exists to reconstruct the column data, then skip reading the
* known slow child vdev as a performance optimization.
*/
if (rr->rr_outlier_cnt > 0 &&
(rr->rr_firstdatacol - rr->rr_missingparity) >=
(rr->rr_missingdata + 1)) {
for (int c = rr->rr_cols - 1; c >= 0; c--) {
raidz_col_t *rc = &rr->rr_col[c];
if (rc->rc_error == 0 && rc->rc_latency_outlier) {
if (c >= rr->rr_firstdatacol)
rr->rr_missingdata++;
else
rr->rr_missingparity++;
rc->rc_error = SET_ERROR(EAGAIN);
rc->rc_skipped = 1;
break;
}
}
}
for (int c = rr->rr_cols - 1; c >= 0; c--) {
raidz_col_t *rc = &rr->rr_col[c];
vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
if (rc->rc_error || rc->rc_size == 0)
continue;
if (forceparity ||
c >= rr->rr_firstdatacol || rr->rr_missingdata > 0 ||
(zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) {
@@ -2498,6 +2599,7 @@ vdev_raidz_io_start_read_phys_cols(zio_t *zio, raidz_map_t *rm)
ASSERT3U(prc->rc_devidx, ==, i);
vdev_t *cvd = vd->vdev_child[i];
if (!vdev_readable(cvd)) {
prc->rc_error = SET_ERROR(ENXIO);
prc->rc_tried = 1; /* don't even try */
@@ -2774,6 +2876,239 @@ vdev_raidz_worst_error(raidz_row_t *rr)
return (error);
}
/*
* Find the median value from a set of n values
*/
static uint64_t
latency_median_value(const uint64_t *data, size_t n)
{
uint64_t m;
if (n % 2 == 0)
m = (data[(n >> 1) - 1] + data[n >> 1]) >> 1;
else
m = data[((n + 1) >> 1) - 1];
return (m);
}
/*
* Calculate the outlier fence from a set of n latency values
*
* fence = Q3 + vdev_raidz_outlier_insensitivity x (Q3 - Q1)
*/
static uint64_t
latency_quartiles_fence(const uint64_t *data, size_t n, uint64_t *iqr)
{
uint64_t q1 = latency_median_value(&data[0], n >> 1);
uint64_t q3 = latency_median_value(&data[(n + 1) >> 1], n >> 1);
/*
* To avoid detecting false positive outliers when N is small and
* and the latencies values are very close, make sure the IQR
* is at least 25% larger than Q1.
*/
*iqr = MAX(q3 - q1, q1 / 4);
return (q3 + (*iqr * vdev_raidz_outlier_insensitivity));
}
#define LAT_CHILDREN_MIN 5
#define LAT_OUTLIER_LIMIT 20
static int
latency_compare(const void *arg1, const void *arg2)
{
const uint64_t *l1 = (uint64_t *)arg1;
const uint64_t *l2 = (uint64_t *)arg2;
return (TREE_CMP(*l1, *l2));
}
void
vdev_raidz_sit_child(vdev_t *svd, uint64_t secs)
{
for (int c = 0; c < svd->vdev_children; c++)
vdev_raidz_sit_child(svd->vdev_child[c], secs);
if (!svd->vdev_ops->vdev_op_leaf)
return;
/* Begin a sit out period for this slow drive */
svd->vdev_read_sit_out_expire = gethrestime_sec() +
secs;
/* Count each slow io period */
mutex_enter(&svd->vdev_stat_lock);
svd->vdev_stat.vs_slow_ios++;
mutex_exit(&svd->vdev_stat_lock);
}
void
vdev_raidz_unsit_child(vdev_t *vd)
{
for (int c = 0; c < vd->vdev_children; c++)
vdev_raidz_unsit_child(vd->vdev_child[c]);
if (!vd->vdev_ops->vdev_op_leaf)
return;
vd->vdev_read_sit_out_expire = 0;
}
/*
* Check for any latency outlier from latest set of child reads.
*
* Uses a Tukey's fence, with K = 50, for detecting extreme outliers. This
* rule defines extreme outliers as data points outside the fence of the
* third quartile plus fifty times the Interquartile Range (IQR). This range
* is the distance between the first and third quartile.
*
* Fifty is an extremely large value for Tukey's fence, but the outliers we're
* attempting to detect here are orders of magnitude times larger than the
* median. This large value should capture any truly fault disk quickly,
* without causing spurious sit-outs.
*
* To further avoid spurious sit-outs, vdevs must be detected multiple times
* as an outlier before they are sat, and outlier counts will gradually decay.
* Every nchildren times we have detected an outlier, we subtract 2 from the
* outlier count of all children. If detected outliers are close to uniformly
* distributed, this will result in the outlier count remaining close to 0
* (in expectation; over long enough time-scales, spurious sit-outs are still
* possible).
*/
static void
vdev_child_slow_outlier(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
if (!vd->vdev_autosit || vdev_read_sit_out_secs == 0 ||
vd->vdev_children < LAT_CHILDREN_MIN)
return;
hrtime_t now = getlrtime();
uint64_t last = atomic_load_64(&vd->vdev_last_latency_check);
if ((now - last) < MSEC2NSEC(vdev_raidz_outlier_check_interval_ms))
return;
/* Allow a single winner when there are racing callers. */
if (atomic_cas_64(&vd->vdev_last_latency_check, last, now) != last)
return;
int children = vd->vdev_children;
uint64_t *lat_data = kmem_alloc(sizeof (uint64_t) * children, KM_SLEEP);
for (int c = 0; c < children; c++) {
vdev_t *cvd = vd->vdev_child[c];
if (cvd->vdev_prev_histo == NULL) {
mutex_enter(&cvd->vdev_stat_lock);
size_t size =
sizeof (cvd->vdev_stat_ex.vsx_disk_histo[0]);
cvd->vdev_prev_histo = kmem_zalloc(size, KM_SLEEP);
memcpy(cvd->vdev_prev_histo,
cvd->vdev_stat_ex.vsx_disk_histo[ZIO_TYPE_READ],
size);
mutex_exit(&cvd->vdev_stat_lock);
}
}
uint64_t max = 0;
vdev_t *svd = NULL;
uint_t sitouts = 0;
boolean_t skip = B_FALSE, svd_sitting = B_FALSE;
for (int c = 0; c < children; c++) {
vdev_t *cvd = vd->vdev_child[c];
boolean_t sitting = vdev_sit_out_reads(cvd, 0) ||
cvd->vdev_state != VDEV_STATE_HEALTHY;
/* We can't sit out more disks than we have parity */
if (sitting && ++sitouts >= vdev_get_nparity(vd))
skip = B_TRUE;
mutex_enter(&cvd->vdev_stat_lock);
uint64_t *prev_histo = cvd->vdev_prev_histo;
uint64_t *histo =
cvd->vdev_stat_ex.vsx_disk_histo[ZIO_TYPE_READ];
if (skip) {
size_t size =
sizeof (cvd->vdev_stat_ex.vsx_disk_histo[0]);
memcpy(prev_histo, histo, size);
mutex_exit(&cvd->vdev_stat_lock);
continue;
}
uint64_t count = 0;
lat_data[c] = 0;
for (int i = 0; i < VDEV_L_HISTO_BUCKETS; i++) {
uint64_t this_count = histo[i] - prev_histo[i];
lat_data[c] += (1ULL << i) * this_count;
count += this_count;
}
size_t size = sizeof (cvd->vdev_stat_ex.vsx_disk_histo[0]);
memcpy(prev_histo, histo, size);
mutex_exit(&cvd->vdev_stat_lock);
lat_data[c] /= MAX(1, count);
/* Wait until all disks have been read from */
if (lat_data[c] == 0 && !sitting) {
skip = B_TRUE;
continue;
}
/* Keep track of the vdev with largest value */
if (lat_data[c] > max) {
max = lat_data[c];
svd = cvd;
svd_sitting = sitting;
}
}
if (skip) {
kmem_free(lat_data, sizeof (uint64_t) * children);
return;
}
qsort((void *)lat_data, children, sizeof (uint64_t), latency_compare);
uint64_t iqr;
uint64_t fence = latency_quartiles_fence(lat_data, children, &iqr);
ASSERT3U(lat_data[children - 1], ==, max);
if (max > fence && !svd_sitting) {
ASSERT3U(iqr, >, 0);
uint64_t incr = MAX(1, MIN((max - fence) / iqr,
LAT_OUTLIER_LIMIT / 4));
vd->vdev_outlier_count += incr;
if (vd->vdev_outlier_count >= children) {
for (int c = 0; c < children; c++) {
vdev_t *cvd = vd->vdev_child[c];
cvd->vdev_outlier_count -= 2;
cvd->vdev_outlier_count = MAX(0,
cvd->vdev_outlier_count);
}
vd->vdev_outlier_count = 0;
}
/*
* Keep track of how many times this child has had
* an outlier read. A disk that persitently has a
* higher than peers outlier count will be considered
* a slow disk.
*/
svd->vdev_outlier_count += incr;
if (svd->vdev_outlier_count > LAT_OUTLIER_LIMIT) {
ASSERT0(svd->vdev_read_sit_out_expire);
vdev_raidz_sit_child(svd, vdev_read_sit_out_secs);
(void) zfs_ereport_post(FM_EREPORT_ZFS_SITOUT,
zio->io_spa, svd, NULL, NULL, 0);
vdev_dbgmsg(svd, "begin read sit out for %d secs",
(int)vdev_read_sit_out_secs);
for (int c = 0; c < vd->vdev_children; c++)
vd->vdev_child[c]->vdev_outlier_count = 0;
}
}
kmem_free(lat_data, sizeof (uint64_t) * children);
}
static void
vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
{
@@ -3515,6 +3850,9 @@ vdev_raidz_io_done(zio_t *zio)
raidz_row_t *rr = rm->rm_row[i];
vdev_raidz_io_done_verified(zio, rr);
}
/* Periodically check for a read outlier */
if (zio->io_type == ZIO_TYPE_READ)
vdev_child_slow_outlier(zio);
zio_checksum_verified(zio);
} else {
/*
@@ -5155,3 +5493,10 @@ ZFS_MODULE_PARAM(zfs_vdev, raidz_, io_aggregate_rows, ULONG, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, scrub_after_expand, INT, ZMOD_RW,
"For expanded RAIDZ, automatically start a pool scrub when expansion "
"completes");
ZFS_MODULE_PARAM(zfs_vdev, vdev_, read_sit_out_secs, ULONG, ZMOD_RW,
"Raidz/draid slow disk sit out time period in seconds");
ZFS_MODULE_PARAM(zfs_vdev, vdev_, raidz_outlier_check_interval_ms, ULONG,
ZMOD_RW, "Interval to check for slow raidz/draid children");
ZFS_MODULE_PARAM(zfs_vdev, vdev_, raidz_outlier_insensitivity, UINT,
ZMOD_RW, "How insensitive the slow raidz/draid child check should be");
/* END CSTYLED */
+4 -3
View File
@@ -162,9 +162,9 @@ dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg)
daydiff = time - rrd_tail(&db->dbr_days);
monthdiff = time - rrd_tail(&db->dbr_months);
if (monthdiff >= 0 && monthdiff >= SEC2NSEC(30 * 24 * 60 * 60))
if (monthdiff >= 0 && monthdiff >= 30 * 24 * 60 * 60)
rrd_add(&db->dbr_months, time, txg);
else if (daydiff >= 0 && daydiff >= SEC2NSEC(24 * 60 * 60))
else if (daydiff >= 0 && daydiff >= 24 * 60 * 60)
rrd_add(&db->dbr_days, time, txg);
else if (minutedif >= 0)
rrd_add(&db->dbr_minutes, time, txg);
@@ -208,7 +208,8 @@ dbrrd_closest(hrtime_t tv, const rrd_data_t *r1, const rrd_data_t *r2)
if (r2 == NULL)
return (r1);
return (ABS(tv - r1->rrdd_time) < ABS(tv - r2->rrdd_time) ? r1 : r2);
return (ABS(tv - (hrtime_t)r1->rrdd_time) <
ABS(tv - (hrtime_t)r2->rrdd_time) ? r1 : r2);
}
uint64_t
+16 -3
View File
@@ -683,6 +683,7 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
dsl_dataset_t *ds;
const char *cp;
int error;
boolean_t rawok = (zc->zc_flags & 0x8);
/*
* Generate the current snapshot name from the given objsetid, then
@@ -705,6 +706,10 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
ZFS_DELEG_PERM_SEND, cr);
if (error != 0 && rawok == B_TRUE) {
error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
ZFS_DELEG_PERM_SEND_RAW, cr);
}
dsl_dataset_rele(ds, FTAG);
dsl_pool_rele(dp, FTAG);
@@ -714,9 +719,17 @@ zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
static int
zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
boolean_t rawok = nvlist_exists(innvl, "rawok");
int error;
(void) innvl;
return (zfs_secpolicy_write_perms(zc->zc_name,
ZFS_DELEG_PERM_SEND, cr));
error = zfs_secpolicy_write_perms(zc->zc_name,
ZFS_DELEG_PERM_SEND, cr);
if (error != 0 && rawok == B_TRUE) {
error = zfs_secpolicy_write_perms(zc->zc_name,
ZFS_DELEG_PERM_SEND_RAW, cr);
}
return (error);
}
static int
@@ -7619,7 +7632,7 @@ zfs_ioctl_init(void)
zfs_ioctl_register("scrub", ZFS_IOC_POOL_SCRUB,
zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME,
POOL_CHECK_NONE, B_TRUE, B_TRUE,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub));
zfs_ioctl_register("get_props", ZFS_IOC_POOL_GET_PROPS,
+1 -3
View File
@@ -433,7 +433,7 @@ make install DESTDIR=%{?buildroot}
find %{?buildroot}%{_libdir} -name '*.la' -exec rm -f {} \;
%if 0%{!?__brp_mangle_shebangs:1}
find %{?buildroot}%{_bindir} \
\( -name arc_summary -or -name arcstat -or -name dbufstat \
\( -name zarcsummary -or -name zarcstat -or -name dbufstat \
-or -name zilstat \) \
-exec %{__sed} -i 's|^#!.*|#!%{__python}|' {} \;
find %{?buildroot}%{_datadir} \
@@ -508,9 +508,7 @@ systemctl --system daemon-reload >/dev/null || true
%{_bindir}/raidz_test
%{_bindir}/zvol_wait
# Optional Python 3 scripts
%{_bindir}/arc_summary
%{_bindir}/zarcsummary
%{_bindir}/arcstat
%{_bindir}/zarcstat
%{_bindir}/dbufstat
%{_bindir}/zilstat
+2 -2
View File
@@ -83,8 +83,8 @@ my $tagged_patterns = q(
man/man?/*.?.in
# Unsuffixed programs (or generated of same)
cmd/arcstat.in
cmd/arc_summary
cmd/zarcstat.in
cmd/zarcsummary
cmd/dbufstat.in
cmd/zilstat.in
cmd/zpool/zpool.d/*
+17 -8
View File
@@ -323,6 +323,10 @@ tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos',
'zfs_send_raw', 'zfs_send_sparse', 'zfs_send-b', 'zfs_send_skip_missing']
tags = ['functional', 'cli_root', 'zfs_send']
[tests/functional/cli_root/zfs_send_delegation]
tests = ['zfs_send_test']
tags = ['functional', 'cli_root', 'zfs_send_delegation']
[tests/functional/cli_root/zfs_set]
tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos',
'canmount_002_pos', 'canmount_003_pos', 'canmount_004_pos',
@@ -379,7 +383,7 @@ tags = ['functional', 'cli_root', 'zfs_wait']
[tests/functional/cli_root/zhack]
tests = ['zhack_label_repair_001', 'zhack_label_repair_002',
'zhack_label_repair_003', 'zhack_label_repair_004']
'zhack_label_repair_003', 'zhack_label_repair_004', 'zhack_metaslab_leak']
pre =
post =
tags = ['functional', 'cli_root', 'zhack']
@@ -546,7 +550,7 @@ tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos',
'zpool_scrub_multiple_pools',
'zpool_error_scrub_001_pos', 'zpool_error_scrub_002_pos',
'zpool_error_scrub_003_pos', 'zpool_error_scrub_004_pos',
'zpool_scrub_date_range_001']
'zpool_scrub_date_range_001', 'zpool_scrub_date_range_002']
tags = ['functional', 'cli_root', 'zpool_scrub']
[tests/functional/cli_root/zpool_set]
@@ -624,8 +628,8 @@ tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg',
'zpool_history_001_neg', 'zpool_import_001_neg', 'zpool_import_002_neg',
'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg',
'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg',
'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos',
'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege',
'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'zarcstat_001_pos',
'zarcsummary_001_pos', 'zarcsummary_002_neg', 'zpool_wait_privilege',
'zilstat_001_pos']
user =
tags = ['functional', 'cli_user', 'misc']
@@ -637,6 +641,10 @@ tests = ['zfs_list_001_pos', 'zfs_list_002_pos', 'zfs_list_003_pos',
user =
tags = ['functional', 'cli_user', 'zfs_list']
[tests/functional/cli_root/zfs_send_delegation_user]
tests = ['zfs_send_usertest']
tags = ['functional', 'cli_root', 'zfs_send_delegation_user']
[tests/functional/cli_user/zpool_iostat]
tests = ['zpool_iostat_001_neg', 'zpool_iostat_002_pos',
'zpool_iostat_003_neg', 'zpool_iostat_004_pos',
@@ -940,10 +948,11 @@ tags = ['functional', 'rename_dirs']
[tests/functional/replacement]
tests = ['attach_import', 'attach_multiple', 'attach_rebuild',
'attach_resilver', 'detach', 'rebuild_disabled_feature',
'rebuild_multiple', 'rebuild_raidz', 'replace_import', 'replace_rebuild',
'replace_resilver', 'resilver_restart_001', 'resilver_restart_002',
'scrub_cancel']
'attach_resilver', 'attach_resilver_sit_out', 'detach',
'rebuild_disabled_feature', 'rebuild_multiple', 'rebuild_raidz',
'replace_import', 'replace_rebuild', 'replace_resilver',
'replace_resilver_sit_out', 'resilver_restart_001',
'resilver_restart_002', 'scrub_cancel']
tags = ['functional', 'replacement']
[tests/functional/reservation]
+3 -1
View File
@@ -109,7 +109,9 @@ tags = ['functional', 'direct']
[tests/functional/events:Linux]
tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill',
'zed_cksum_reported', 'zed_cksum_config', 'zed_io_config',
'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple']
'zed_slow_io', 'zed_slow_io_many_vdevs', 'zed_diagnose_multiple',
'zed_synchronous_zedlet', 'slow_vdev_sit_out', 'slow_vdev_sit_out_neg',
'slow_vdev_degraded_sit_out']
tags = ['functional', 'events']
[tests/functional/fallocate:Linux]
@@ -400,8 +400,8 @@ tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg',
'zpool_detach_001_neg', 'zpool_export_001_neg', 'zpool_get_001_neg',
'zpool_history_001_neg', 'zpool_offline_001_neg', 'zpool_online_001_neg',
'zpool_remove_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg',
'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos',
'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege',
'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'zarcstat_001_pos',
'zarcsummary_001_pos', 'zarcsummary_002_neg', 'zpool_wait_privilege',
'zilstat_001_pos']
user =
tags = ['functional', 'cli_user', 'misc']
@@ -232,7 +232,7 @@ maybe = {
'cli_root/zpool_trim/zpool_trim_fault_export_import_online':
['FAIL', known_reason],
'cli_root/zpool_upgrade/zpool_upgrade_004_pos': ['FAIL', 6141],
'cli_user/misc/arc_summary_001_pos': ['FAIL', known_reason],
'cli_user/misc/zarcsummary_001_pos': ['FAIL', known_reason],
'delegate/setup': ['SKIP', exec_reason],
'events/zed_cksum_config': ['FAIL', known_reason],
'fault/auto_replace_002_pos': ['FAIL', known_reason],
@@ -863,7 +863,8 @@ test_result(const crypto_test_t *test, int encrypt_rv, uint8_t *encrypt_buf,
return (pass);
/* print summary of test result */
printf("%s[%lu]: encrypt=%s decrypt=%s\n", test->fileloc, test->id,
printf("%s[%ju]: encrypt=%s decrypt=%s\n", test->fileloc,
(uintmax_t)test->id,
encrypt_pass ? "PASS" : "FAIL",
decrypt_pass ? "PASS" : "FAIL");
@@ -171,8 +171,8 @@ export ZFS_FILES='zdb
zpool
ztest
raidz_test
arc_summary
arcstat
zarcsummary
zarcstat
zilstat
dbufstat
mount.zfs
@@ -1112,6 +1112,16 @@ function get_pool_prop # property pool
zpool get -Hpo value "$prop" "$pool" || log_fail "zpool get $prop $pool"
}
# Get the specified vdev property in parsable format or fail
function get_vdev_prop
{
typeset prop="$1"
typeset pool="$2"
typeset vdev="$3"
zpool get -Hpo value "$prop" "$pool" "$vdev" || log_fail "zpool get $prop $pool $vdev"
}
# Return 0 if a pool exists; $? otherwise
#
# $1 - pool name
@@ -1970,6 +1980,28 @@ function wait_vdev_state # pool disk state timeout
return 1
}
#
# Wait for vdev 'sit_out' property to be cleared.
#
# $1 pool name
# $2 vdev name
# $3 timeout
#
function wait_sit_out #pool vdev timeout
{
typeset pool=${1:-$TESTPOOL}
typeset vdev="$2"
typeset timeout=${3:-300}
for (( timer = 0; timer < $timeout; timer++ )); do
if [ "$(get_vdev_prop sit_out "$pool" "$vdev")" = "off" ]; then
return 0
fi
sleep 1;
done
return 1
}
#
# Check the output of 'zpool status -v <pool>',
# and to see if the content of <token> contain the <keyword> specified.
@@ -2881,6 +2913,28 @@ function user_run
log_note "user: $user"
log_note "cmd: $*"
if ! sudo -Eu $user test -x $PATH ; then
log_note "-------------------------------------------------"
log_note "Warning: $user doesn't have permissions on $PATH"
log_note ""
log_note "This usually happens when you're running ZTS locally"
log_note "from inside the ZFS source dir, and are attempting to"
log_note "run a test that calls user_run. The ephemeral user"
log_note "($user) that ZTS is creating does not have permission"
log_note "to traverse to $PATH, or the binaries in $PATH are"
log_note "not the right permissions."
log_note ""
log_note "To get around this, copy your ZFS source directory"
log_note "to a world-accessible location (like /tmp), and "
log_note "change the permissions on your ZFS source dir "
log_note "to allow access."
log_note ""
log_note "Also, verify that /dev/zfs is RW for others:"
log_note ""
log_note " sudo chmod o+rw /dev/zfs"
log_note "-------------------------------------------------"
fi
typeset out=$TEST_BASE_DIR/out
typeset err=$TEST_BASE_DIR/err
@@ -72,6 +72,9 @@ MULTIHOST_INTERVAL multihost.interval zfs_multihost_interval
OVERRIDE_ESTIMATE_RECORDSIZE send.override_estimate_recordsize zfs_override_estimate_recordsize
PREFETCH_DISABLE prefetch.disable zfs_prefetch_disable
RAIDZ_EXPAND_MAX_REFLOW_BYTES vdev.expand_max_reflow_bytes raidz_expand_max_reflow_bytes
READ_SIT_OUT_SECS vdev.read_sit_out_secs vdev_read_sit_out_secs
SIT_OUT_CHECK_INTERVAL vdev.raidz_outlier_check_interval_ms vdev_raidz_outlier_check_interval_ms
SIT_OUT_INSENSITIVITY vdev.raidz_outlier_insensitivity vdev_raidz_outlier_insensitivity
REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled
REMOVAL_SUSPEND_PROGRESS vdev.removal_suspend_progress zfs_removal_suspend_progress
REMOVE_MAX_SEGMENT vdev.remove_max_segment zfs_remove_max_segment
@@ -88,6 +91,7 @@ SPA_DISCARD_MEMORY_LIMIT spa.discard_memory_limit zfs_spa_discard_memory_limit
SPA_LOAD_VERIFY_DATA spa.load_verify_data spa_load_verify_data
SPA_LOAD_VERIFY_METADATA spa.load_verify_metadata spa_load_verify_metadata
SPA_NOTE_TXG_TIME spa.note_txg_time spa_note_txg_time
SPA_FLUSH_TXG_TIME spa.flush_txg_time spa_flush_txg_time
TRIM_EXTENT_BYTES_MIN trim.extent_bytes_min zfs_trim_extent_bytes_min
TRIM_METASLAB_SKIP trim.metaslab_skip zfs_trim_metaslab_skip
TRIM_TXG_BATCH trim.txg_batch zfs_trim_txg_batch
@@ -892,6 +892,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zfs_send/zfs_send_raw.ksh \
functional/cli_root/zfs_send/zfs_send_skip_missing.ksh \
functional/cli_root/zfs_send/zfs_send_sparse.ksh \
functional/cli_root/zfs_send_delegation/cleanup.ksh \
functional/cli_root/zfs_send_delegation/setup.ksh \
functional/cli_root/zfs_send_delegation/zfs_send_test.ksh \
functional/cli_root/zfs_set/cache_001_pos.ksh \
functional/cli_root/zfs_set/cache_002_neg.ksh \
functional/cli_root/zfs_set/canmount_001_pos.ksh \
@@ -1009,6 +1012,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zhack/zhack_label_repair_002.ksh \
functional/cli_root/zhack/zhack_label_repair_003.ksh \
functional/cli_root/zhack/zhack_label_repair_004.ksh \
functional/cli_root/zhack/zhack_metaslab_leak.ksh \
functional/cli_root/zpool_add/add_nested_replacing_spare.ksh \
functional/cli_root/zpool_add/add-o_ashift.ksh \
functional/cli_root/zpool_add/add_prop_ashift.ksh \
@@ -1247,6 +1251,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh \
functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh \
functional/cli_root/zpool_scrub/zpool_scrub_date_range_001.ksh \
functional/cli_root/zpool_scrub/zpool_scrub_date_range_002.ksh \
functional/cli_root/zpool_scrub/zpool_error_scrub_001_pos.ksh \
functional/cli_root/zpool_scrub/zpool_error_scrub_002_pos.ksh \
functional/cli_root/zpool_scrub/zpool_error_scrub_003_pos.ksh \
@@ -1351,9 +1356,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool/zpool_002_pos.ksh \
functional/cli_root/zpool/zpool_003_pos.ksh \
functional/cli_root/zpool/zpool_colors.ksh \
functional/cli_user/misc/arcstat_001_pos.ksh \
functional/cli_user/misc/arc_summary_001_pos.ksh \
functional/cli_user/misc/arc_summary_002_neg.ksh \
functional/cli_user/misc/zarcstat_001_pos.ksh \
functional/cli_user/misc/zarcsummary_001_pos.ksh \
functional/cli_user/misc/zarcsummary_002_neg.ksh \
functional/cli_user/misc/zilstat_001_pos.ksh \
functional/cli_user/misc/cleanup.ksh \
functional/cli_user/misc/setup.ksh \
@@ -1408,6 +1413,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_user/zfs_list/zfs_list_005_neg.ksh \
functional/cli_user/zfs_list/zfs_list_007_pos.ksh \
functional/cli_user/zfs_list/zfs_list_008_neg.ksh \
functional/cli_user/zfs_send_delegation_user/cleanup.ksh \
functional/cli_user/zfs_send_delegation_user/setup.ksh \
functional/cli_user/zfs_send_delegation_user/zfs_send_usertest.ksh \
functional/cli_user/zpool_iostat/cleanup.ksh \
functional/cli_user/zpool_iostat/setup.ksh \
functional/cli_user/zpool_iostat/zpool_iostat_001_neg.ksh \
@@ -1524,6 +1532,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/events/events_001_pos.ksh \
functional/events/events_002_pos.ksh \
functional/events/setup.ksh \
functional/events/slow_vdev_degraded_sit_out.ksh \
functional/events/slow_vdev_sit_out.ksh \
functional/events/slow_vdev_sit_out_neg.ksh \
functional/events/zed_cksum_config.ksh \
functional/events/zed_cksum_reported.ksh \
functional/events/zed_diagnose_multiple.ksh \
@@ -1532,6 +1543,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/events/zed_rc_filter.ksh \
functional/events/zed_slow_io.ksh \
functional/events/zed_slow_io_many_vdevs.ksh \
functional/events/zed_synchronous_zedlet.ksh \
functional/exec/cleanup.ksh \
functional/exec/exec_001_pos.ksh \
functional/exec/exec_002_neg.ksh \
@@ -1936,6 +1948,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/replacement/attach_multiple.ksh \
functional/replacement/attach_rebuild.ksh \
functional/replacement/attach_resilver.ksh \
functional/replacement/attach_resilver_sit_out.ksh \
functional/replacement/cleanup.ksh \
functional/replacement/detach.ksh \
functional/replacement/rebuild_disabled_feature.ksh \
@@ -1944,6 +1957,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/replacement/replace_import.ksh \
functional/replacement/replace_rebuild.ksh \
functional/replacement/replace_resilver.ksh \
functional/replacement/replace_resilver_sit_out.ksh \
functional/replacement/resilver_restart_001.ksh \
functional/replacement/resilver_restart_002.ksh \
functional/replacement/scrub_cancel.ksh \
@@ -63,7 +63,7 @@ log_mustnot_expect 'no such tunable: 0' zdb -o show=0
log_mustnot_expect 'no such tunable: 1' zdb -o info=1
# can set multiple in same command
log_must eval 'zdb -o zfs_recover=1 -o zfs_flags=512 | xargs | grep -qE "^zfs_recover: 0 -> 1 zfs_flags: 4294965758 -> 512$"'
log_must eval 'zdb -o zfs_recover=1 -o zfs_flags=512 | xargs | grep -qE "^zfs_recover: 0 -> 1 zfs_flags: 4294932990 -> 512$"'
# can set and show in same command
log_must eval 'zdb -o zfs_recover=1 -o zfs_recover -o zfs_recover=0 | xargs | grep -qE "^zfs_recover: 0 -> 1 zfs_recover: 1 zfs_recover: 1 -> 0$"'
@@ -0,0 +1,43 @@
#!/bin/ksh -p
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2025, Klara Inc.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
poolexists $TESTPOOL1 && \
destroy_pool $TESTPOOL1
del_user $STAFF1
del_user $STAFF2
del_group $STAFF_GROUP
del_user $OTHER1
del_user $OTHER2
del_group $OTHER_GROUP
default_cleanup
@@ -0,0 +1,50 @@
#!/bin/ksh -p
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2025, Klara Inc.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
# Create staff group and add two user to it
log_must add_group $STAFF_GROUP
if ! id $STAFF1 > /dev/null 2>&1; then
log_must add_user $STAFF_GROUP $STAFF1
fi
if ! id $STAFF2 > /dev/null 2>&1; then
log_must add_user $STAFF_GROUP $STAFF2
fi
# Create other group and add two user to it
log_must add_group $OTHER_GROUP
if ! id $OTHER1 > /dev/null 2>&1; then
log_must add_user $OTHER_GROUP $OTHER1
fi
if ! id $OTHER2 > /dev/null 2>&1; then
log_must add_user $OTHER_GROUP $OTHER2
fi
DISK=${DISKS%% *}
default_raidz_setup $DISKS
@@ -0,0 +1,111 @@
#!/bin/ksh -p
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2025, Klara Inc.
#
# STRATEGY:
# 1. Create a pool (this is done by the test framework)
# 2. Create an encrypted dataset
# 3. Write random data to the encrypted dataset
# 4. Snapshot the dataset
# 5. As root: attempt a send and raw send (both should succeed)
# 6. Create a delegation (zfs allow -u user send testpool/encrypted_dataset)
# 7. As root: attempt a send and raw send (both should succeed)
# 8. Create a delegation (zfs allow -u user send:raw testpool/encrypted_dataset)
# 9. As root: attempt a send and raw send (both should succeed)
# 10. Disable delegation (zfs unallow)
# 11. As root: attempt a send and raw send (both should succeed)
# 12. Clean up (handled by framework)
#
# Tested as a user under ../cli_user/zfs_send_delegation_user/
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib
. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib
. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
. $STF_SUITE/tests/functional/delegate/delegate.cfg
# create encrypted dataset
log_must eval "echo $PASSPHRASE | zfs create -o encryption=on -o keyformat=passphrase $TESTPOOL/$TESTFS1"
# create target dataset for receives
if ! zfs list | grep testfs2 >/dev/null 2>&1; then
dataset_created="TRUE"
log_must zfs create $TESTPOOL/$TESTFS2
fi
# create user and group
typeset perms="snapshot,reservation,compression,checksum,userprop,receive"
log_note "Added permissions to the $OTHER1 user."
log_must zfs allow $OTHER1 $perms $TESTPOOL/$TESTFS1
log_must zfs allow $OTHER1 $perms $TESTPOOL/$TESTFS2
# create random data
log_must fill_fs $TESTPOOL/$TESTFS1/child 1 2047 1024 1 R
# snapshot
log_must zfs snapshot $TESTPOOL/$TESTFS1@snap1
# check baseline send abilities (should pass)
log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv0_datastream.$$"
log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv0raw_datastream.$$"
# create delegation
log_must zfs allow $OTHER1 send $TESTPOOL/$TESTFS1
# attempt send with full allow
log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv1_datastream.$$"
log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv1raw_datastream.$$"
# create raw delegation
log_must zfs allow $OTHER1 send:raw $TESTPOOL/$TESTFS1
log_must zfs unallow $OTHER1 send $TESTPOOL/$TESTFS1
# test new send abilities (should pass)
log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv2_datastream.$$"
log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv2raw_datastream.$$"
# disable raw delegation
zfs unallow $OTHER1 send:raw $TESTPOOL/$TESTFS1
zfs allow $OTHER1 send $TESTPOOL/$TESTFS1
# verify original send abilities (should pass)
log_must eval "zfs send $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv3_datastream.$$"
log_must eval "zfs send -w $TESTPOOL/$TESTFS1@snap1 | zfs receive $TESTPOOL/$TESTFS2/zfsrecv3raw_datastream.$$"
function cleanup
{
datasetexists $TESTPOOL/$TESTFS1 && \
destroy_dataset $TESTPOOL/$TESTFS1 -r \
destroy_dataset $TESTPOOL/$TESTFS2 -r
}
@@ -0,0 +1,70 @@
#!/bin/ksh
# SPDX-License-Identifier: CDDL-1.0
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
#
# Description:
#
# Test whether zhack metaslab leak functions correctly
#
# Strategy:
#
# 1. Create pool on a loopback device with some test data
# 2. Gather pool capacity stats
# 3. Generate fragmentation data with zdb
# 4. Destroy the pool
# 5. Create a new pool with the same configuration
# 6. Export the pool
# 7. Apply the fragmentation information with zhack metaslab leak
# 8. Import the pool
# 9. Verify that pool capacity stats match
. "$STF_SUITE"/include/libtest.shlib
verify_runnable "global"
function cleanup
{
zpool destroy $TESTPOOL
rm $tmp
}
log_onexit cleanup
log_assert "zhack metaslab leak leaks the right amount of space"
typeset tmp=$(mktemp)
log_must zpool create $TESTPOOL $DISKS
for i in `seq 1 16`; do
log_must dd if=/dev/urandom of=/$TESTPOOL/f$i bs=1M count=16
log_must zpool sync $TESTPOOL
done
for i in `seq 2 2 16`; do
log_must rm /$TESTPOOL/f$i
done
for i in `seq 1 16`; do
log_must touch /$TESTPOOL/g$i
log_must zpool sync $TESTPOOL
done
alloc=$(zpool get -Hpo value alloc $TESTPOOL)
log_must eval "zdb -m --allocated-map $TESTPOOL > $tmp"
log_must zpool destroy $TESTPOOL
log_must zpool create $TESTPOOL $DISKS
log_must zpool export $TESTPOOL
log_must eval "zhack metaslab leak $TESTPOOL < $tmp"
log_must zpool import $TESTPOOL
alloc2=$(zpool get -Hpo value alloc $TESTPOOL)
within_percent $alloc $alloc2 98 ||
log_fail "space usage changed too much: $alloc to $alloc2"
log_pass "zhack metaslab leak behaved correctly"
@@ -0,0 +1,76 @@
#!/bin/ksh -p
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright 2025 Klara, Inc.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
#
# DESCRIPTION:
# Verify that the timestamp database updates all the tables as expected.
#
# STRATEGY:
# 1. Decrease the note and flush frequency of the txg database.
# 2. Force the pool to sync several txgs
# 3. Verify that there are entries in each of the "month", "day", and
# "minute" tables.
#
verify_runnable "global"
function cleanup
{
log_must restore_tunable SPA_NOTE_TXG_TIME
log_must restore_tunable SPA_FLUSH_TXG_TIME
rm /$TESTPOOL/f1
}
log_onexit cleanup
log_assert "Verifiy timestamp databases all update as expected."
log_must save_tunable SPA_NOTE_TXG_TIME
log_must set_tunable64 SPA_NOTE_TXG_TIME 1
log_must save_tunable SPA_FLUSH_TXG_TIME
log_must set_tunable64 SPA_FLUSH_TXG_TIME 1
log_must touch /$TESTPOOL/f1
log_must zpool sync $TESTPOOL
sleep 1
log_must touch /$TESTPOOL/f1
log_must zpool sync $TESTPOOL
sleep 1
log_must touch /$TESTPOOL/f1
log_must zpool sync $TESTPOOL
mos_zap="$(zdb -dddd $TESTPOOL 1)"
minutes_entries=$(echo "$mos_zap" | grep "txg_log_time:minutes" | awk '{print $5}')
days_entries=$(echo "$mos_zap" | grep "txg_log_time:days" | awk '{print $5}')
months_entries=$(echo "$mos_zap" | grep "txg_log_time:months" | awk '{print $5}')
[[ "$minutes_entries" -ne "0" ]] || log_fail "0 entries in the minutes table"
[[ "$days_entries" -ne "0" ]] || log_fail "0 entries in the days table"
[[ "$months_entries" -ne "0" ]] || log_fail "0 entries in the months table"
log_pass "Verified all timestamp databases had entries as expected."
@@ -37,7 +37,7 @@ log_assert "arcstat generates output and doesn't return an error code"
typeset -i i=0
while [[ $i -lt ${#args[*]} ]]; do
log_must eval "arcstat ${args[i]} > /dev/null"
log_must eval "zarcstat ${args[i]} > /dev/null"
((i = i + 1))
done
log_pass "arcstat generates output and doesn't return an error code"
@@ -30,16 +30,16 @@
is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing"
log_assert "arc_summary generates output and doesn't return an error code"
log_assert "zarcsummary generates output and doesn't return an error code"
# Without this, the below checks aren't going to work the way we hope...
set -o pipefail
for arg in "" "-a" "-d" "-p 1" "-g" "-s arc" "-r"; do
log_must eval "arc_summary $arg > /dev/null"
log_must eval "zarcsummary $arg > /dev/null"
done
log_must eval "arc_summary | head > /dev/null"
log_must eval "arc_summary | head -1 > /dev/null"
log_must eval "zarcsummary | head > /dev/null"
log_must eval "zarcsummary | head -1 > /dev/null"
log_pass "arc_summary generates output and doesn't return an error code"
log_pass "zarcsummary generates output and doesn't return an error code"
@@ -30,10 +30,10 @@
is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing"
log_assert "arc_summary generates an error code with invalid options"
log_assert "zarcsummary generates an error code with invalid options"
for arg in "-x" "-5" "-p 7" "--err" "-@"; do
log_mustnot eval "arc_summary $arg > /dev/null"
log_mustnot eval "zarcsummary $arg > /dev/null"
done
log_pass "arc_summary generates an error code with invalid options"
log_pass "zarcsummary generates an error code with invalid options"
@@ -0,0 +1,43 @@
#!/bin/ksh -p
# SPDX-License-Identifier: CDDL-1.0
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2025, Klara Inc.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/delegate/delegate_common.kshlib
poolexists $TESTPOOL1 && \
destroy_pool $TESTPOOL1
del_user $STAFF1
del_user $STAFF2
del_group $STAFF_GROUP
del_user $OTHER1
del_user $OTHER2
del_group $OTHER_GROUP
default_cleanup

Some files were not shown because too many files have changed in this diff Show More