From 9f346abbe8645c58d8adb778c419bb5422c83cf4 Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Thu, 25 Aug 2022 13:33:32 -0700
Subject: [PATCH 01/69] Revert "Avoid panic with recordsize > 128k, raw sending
 and no large_blocks"

This reverts commit 80a650b7bb04bce3aef5e4cfd1d966e3599dafd4.  This change
inadvertently introduced a regression in ztest where one of the new ASSERTs
is triggered in dsl_scan_visitbp().

Reviewed-by: George Amanakis <gamanakis@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #12275
Closes #13799
---
 include/sys/dsl_dataset.h    |  1 -
 lib/libzfs/libzfs_sendrecv.c | 10 --------
 module/zfs/dmu_objset.c      | 10 --------
 module/zfs/dmu_send.c        |  4 ----
 module/zfs/dsl_dataset.c     | 46 ++++++++++++++++--------------------
 module/zfs/dsl_scan.c        | 15 ------------
 6 files changed, 20 insertions(+), 66 deletions(-)

diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h
index 36307c63151..81d25da831b 100644
--- a/include/sys/dsl_dataset.h
+++ b/include/sys/dsl_dataset.h
@@ -375,7 +375,6 @@ boolean_t dsl_dataset_modified_since_snap(dsl_dataset_t *ds,
 void dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx);
 void dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx);
 
-void dsl_dataset_feature_set_activation(const blkptr_t *bp, dsl_dataset_t *ds);
 void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp,
     dmu_tx_t *tx);
 int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
index 640051e3b02..577ebf6aad4 100644
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -874,11 +874,6 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
 		case EINVAL:
 			zfs_error_aux(hdl, "%s", strerror(errno));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
-		case ENOTSUP:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "large blocks detected but large_blocks feature "
-			    "is inactive; raw send unsupported"));
-			return (zfs_error(hdl, EZFS_NOTSUP, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
@@ -2702,11 +2697,6 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
 		case EROFS:
 			zfs_error_aux(hdl, "%s", strerror(errno));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
-		case ENOTSUP:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "large blocks detected but large_blocks feature "
-			    "is inactive; raw send unsupported"));
-			return (zfs_error(hdl, EZFS_NOTSUP, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index b9e16f79efc..4c20afcdb9c 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -1695,16 +1695,6 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
 	    &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
 	    os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
 
-	/*
-	 * In the codepath dsl_dataset_sync()->dmu_objset_sync() we cannot
-	 * rely on the zio above completing and calling back
-	 * dmu_objset_write_done()->dsl_dataset_block_born() before
-	 * dsl_dataset_sync() actually activates feature flags near its end.
-	 * Decide here if any features need to be activated, before
-	 * dsl_dataset_sync() completes its run.
-	 */
-	dsl_dataset_feature_set_activation(blkptr_copy, os->os_dsl_dataset);
-
 	/*
 	 * Sync special dnodes - the parent IO for the sync is the root block
 	 */
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 5e6ced2bb30..283e2d3b37b 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -493,7 +493,6 @@ dmu_dump_write(dmu_send_cookie_t *dscp, dmu_object_type_t type, uint64_t object,
 	    (bp != NULL ? BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
 	    io_compressed : lsize != psize);
 	if (raw || compressed) {
-		ASSERT(bp != NULL);
 		ASSERT(raw || dscp->dsc_featureflags &
 		    DMU_BACKUP_FEATURE_COMPRESSED);
 		ASSERT(!BP_IS_EMBEDDED(bp));
@@ -1018,9 +1017,6 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range)
 		if (srdp->datablksz > SPA_OLD_MAXBLOCKSIZE &&
 		    !(dscp->dsc_featureflags &
 		    DMU_BACKUP_FEATURE_LARGE_BLOCKS)) {
-			if (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW)
-				return (SET_ERROR(ENOTSUP));
-
 			while (srdp->datablksz > 0 && err == 0) {
 				int n = MIN(srdp->datablksz,
 				    SPA_OLD_MAXBLOCKSIZE);
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 21fef4c621b..8f3240a5deb 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -132,30 +132,6 @@ parent_delta(dsl_dataset_t *ds, int64_t delta)
 	return (new_bytes - old_bytes);
 }
 
-void
-dsl_dataset_feature_set_activation(const blkptr_t *bp, dsl_dataset_t *ds)
-{
-	spa_feature_t f;
-	if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) {
-		ds->ds_feature_activation[SPA_FEATURE_LARGE_BLOCKS] =
-		    (void *)B_TRUE;
-	}
-
-	f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
-	if (f != SPA_FEATURE_NONE) {
-		ASSERT3S(spa_feature_table[f].fi_type, ==,
-		    ZFEATURE_TYPE_BOOLEAN);
-		ds->ds_feature_activation[f] = (void *)B_TRUE;
-	}
-
-	f = zio_compress_to_feature(BP_GET_COMPRESS(bp));
-	if (f != SPA_FEATURE_NONE) {
-		ASSERT3S(spa_feature_table[f].fi_type, ==,
-		    ZFEATURE_TYPE_BOOLEAN);
-		ds->ds_feature_activation[f] = (void *)B_TRUE;
-	}
-}
-
 void
 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
 {
@@ -164,6 +140,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
 	int compressed = BP_GET_PSIZE(bp);
 	int uncompressed = BP_GET_UCSIZE(bp);
 	int64_t delta;
+	spa_feature_t f;
 
 	dprintf_bp(bp, "ds=%p", ds);
 
@@ -188,7 +165,25 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
 	dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed;
 	dsl_dataset_phys(ds)->ds_unique_bytes += used;
 
-	dsl_dataset_feature_set_activation(bp, ds);
+	if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) {
+		ds->ds_feature_activation[SPA_FEATURE_LARGE_BLOCKS] =
+		    (void *)B_TRUE;
+	}
+
+
+	f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
+	if (f != SPA_FEATURE_NONE) {
+		ASSERT3S(spa_feature_table[f].fi_type, ==,
+		    ZFEATURE_TYPE_BOOLEAN);
+		ds->ds_feature_activation[f] = (void *)B_TRUE;
+	}
+
+	f = zio_compress_to_feature(BP_GET_COMPRESS(bp));
+	if (f != SPA_FEATURE_NONE) {
+		ASSERT3S(spa_feature_table[f].fi_type, ==,
+		    ZFEATURE_TYPE_BOOLEAN);
+		ds->ds_feature_activation[f] = (void *)B_TRUE;
+	}
 
 	/*
 	 * Track block for livelist, but ignore embedded blocks because
@@ -5027,4 +5022,3 @@ EXPORT_SYMBOL(dsl_dsobj_to_dsname);
 EXPORT_SYMBOL(dsl_dataset_check_quota);
 EXPORT_SYMBOL(dsl_dataset_clone_swap_check_impl);
 EXPORT_SYMBOL(dsl_dataset_clone_swap_sync_impl);
-EXPORT_SYMBOL(dsl_dataset_feature_set_activation);
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 2b76bed1b69..28afc3dead7 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -2008,21 +2008,6 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
 		return;
 	}
 
-	/*
-	 * Check if this block contradicts any filesystem flags.
-	 */
-	spa_feature_t f = SPA_FEATURE_LARGE_BLOCKS;
-	if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE)
-		ASSERT3B(dsl_dataset_feature_is_active(ds, f), ==, B_TRUE);
-
-	f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
-	if (f != SPA_FEATURE_NONE)
-		ASSERT3B(dsl_dataset_feature_is_active(ds, f), ==, B_TRUE);
-
-	f = zio_compress_to_feature(BP_GET_COMPRESS(bp));
-	if (f != SPA_FEATURE_NONE)
-		ASSERT3B(dsl_dataset_feature_is_active(ds, f), ==, B_TRUE);
-
 	if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) {
 		scn->scn_lt_min_this_txg++;
 		return;

From 5bc0318047d9fbd6b740299df1cd3188285d9004 Mon Sep 17 00:00:00 2001
From: Christian Schwarz <me@cschwarz.com>
Date: Thu, 25 Aug 2022 23:22:10 +0200
Subject: [PATCH 02/69] ZTS: zvol_stress: fix race condition with zinject usage

In automated ZTS runs, I'd occasionally hit

    log_fail "Expected to see some write errors"

because there weren't any write errors.

The reason is that we're not syncing the zpool before `zinject -c`.
If the writes by `dd` aren't synced out at the time `zinject -c` runs,
they will not hit an error and we'll hit the log_fail above.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Christian Schwarz <christian.schwarz@nutanix.com>
Closes #13793
---
 .../zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
index 883d9984be4..3431d33d97d 100755
--- a/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
+++ b/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
@@ -151,6 +151,7 @@ for DISK in $DISKS ; do
 	log_must zinject -d $DISK -f 10 -e io -T write $TESTPOOL
 done
 log_must dd if=/dev/zero of=$ZVOL_DEVDIR/$TESTPOOL/testvol1 bs=512 count=50
+sync_pool $TESTPOOL
 log_must zinject -c all
 
 # We should see write errors

From 2d5622f5be15e9e977a4c8fe5d24baaf487b0432 Mon Sep 17 00:00:00 2001
From: George Wilson <george.wilson@delphix.com>
Date: Fri, 26 Aug 2022 16:04:27 -0500
Subject: [PATCH 03/69] Importing from cachefile can trip assertion

When importing from cachefile, it is possible that the builtin retry
logic will trip an assertion because it also fails to find the pool.
This fix addresses that case and returns the correct error message to
the user.

Reviewed-by: Richard Yao <ryao@gentoo.org>
Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: George Wilson <gwilson@delphix.com>
Closes #13781
---
 lib/libzutil/zutil_import.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c
index 0bbf232f24a..252b0bac685 100644
--- a/lib/libzutil/zutil_import.c
+++ b/lib/libzutil/zutil_import.c
@@ -1694,6 +1694,8 @@ zpool_find_import_cached(libpc_handle_t *hdl, importargs_t *iarg)
 			 * caller.
 			 */
 			nvpair_t *pair = nvlist_next_nvpair(nv, NULL);
+			if (pair == NULL)
+				continue;
 			fnvlist_add_nvlist(pools, nvpair_name(pair),
 			    fnvpair_value_nvlist(pair));
 

From 58e8054bce3f493cc1f38f7177cfdb942fa4deb9 Mon Sep 17 00:00:00 2001
From: Andrew Innes <andrew.c12@gmail.com>
Date: Sat, 3 Sep 2022 04:15:18 +0800
Subject: [PATCH 04/69] Alloc zdb_cd_t to fix stack issue

Alloc zdb_cd_t since it is too large for the stack on windows
which results in `zdb` crashing immediately.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Andrew Innes <andrew.c12@gmail.com>
Co-authored-by: Jorgen Lundman <lundman@lundman.net>
Closes #13807
---
 cmd/zdb/zdb.c | 81 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 36 deletions(-)

diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index fdf569691cb..5389520e803 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -6415,7 +6415,7 @@ deleted_livelists_dump_mos(spa_t *spa)
 static int
 dump_block_stats(spa_t *spa)
 {
-	zdb_cb_t zcb = {{{{0}}}};
+	zdb_cb_t *zcb;
 	zdb_blkstats_t *zb, *tzb;
 	uint64_t norm_alloc, norm_space, total_alloc, total_found;
 	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
@@ -6424,6 +6424,8 @@ dump_block_stats(spa_t *spa)
 	int e, c, err;
 	bp_embedded_type_t i;
 
+	zcb = umem_zalloc(sizeof (zdb_cb_t), UMEM_NOFAIL);
+
 	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
 	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
 	    (dump_opt['c'] == 1) ? "metadata " : "",
@@ -6443,39 +6445,39 @@ dump_block_stats(spa_t *spa)
 	 * pool claiming each block we discover, but we skip opening any space
 	 * maps.
 	 */
-	zdb_leak_init(spa, &zcb);
+	zdb_leak_init(spa, zcb);
 
 	/*
 	 * If there's a deferred-free bplist, process that first.
 	 */
 	(void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
-	    bpobj_count_block_cb, &zcb, NULL);
+	    bpobj_count_block_cb, zcb, NULL);
 
 	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
 		(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
-		    bpobj_count_block_cb, &zcb, NULL);
+		    bpobj_count_block_cb, zcb, NULL);
 	}
 
-	zdb_claim_removing(spa, &zcb);
+	zdb_claim_removing(spa, zcb);
 
 	if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
 		VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
 		    spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
-		    &zcb, NULL));
+		    zcb, NULL));
 	}
 
-	deleted_livelists_count_blocks(spa, &zcb);
+	deleted_livelists_count_blocks(spa, zcb);
 
 	if (dump_opt['c'] > 1)
 		flags |= TRAVERSE_PREFETCH_DATA;
 
-	zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
-	zcb.zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa));
-	zcb.zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa));
-	zcb.zcb_totalasize +=
+	zcb->zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
+	zcb->zcb_totalasize += metaslab_class_get_alloc(spa_special_class(spa));
+	zcb->zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa));
+	zcb->zcb_totalasize +=
 	    metaslab_class_get_alloc(spa_embedded_log_class(spa));
-	zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
-	err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
+	zcb->zcb_start = zcb->zcb_lastprint = gethrtime();
+	err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, zcb);
 
 	/*
 	 * If we've traversed the data blocks then we need to wait for those
@@ -6496,15 +6498,15 @@ dump_block_stats(spa_t *spa)
 	 * Done after zio_wait() since zcb_haderrors is modified in
 	 * zdb_blkptr_done()
 	 */
-	zcb.zcb_haderrors |= err;
+	zcb->zcb_haderrors |= err;
 
-	if (zcb.zcb_haderrors) {
+	if (zcb->zcb_haderrors) {
 		(void) printf("\nError counts:\n\n");
 		(void) printf("\t%5s  %s\n", "errno", "count");
 		for (e = 0; e < 256; e++) {
-			if (zcb.zcb_errors[e] != 0) {
+			if (zcb->zcb_errors[e] != 0) {
 				(void) printf("\t%5d  %llu\n",
-				    e, (u_longlong_t)zcb.zcb_errors[e]);
+				    e, (u_longlong_t)zcb->zcb_errors[e]);
 			}
 		}
 	}
@@ -6512,9 +6514,9 @@ dump_block_stats(spa_t *spa)
 	/*
 	 * Report any leaked segments.
 	 */
-	leaks |= zdb_leak_fini(spa, &zcb);
+	leaks |= zdb_leak_fini(spa, zcb);
 
-	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
+	tzb = &zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
 
 	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
 	norm_space = metaslab_class_get_space(spa_normal_class(spa));
@@ -6525,8 +6527,8 @@ dump_block_stats(spa_t *spa)
 	    metaslab_class_get_alloc(spa_special_class(spa)) +
 	    metaslab_class_get_alloc(spa_dedup_class(spa)) +
 	    get_unflushed_alloc_space(spa);
-	total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
-	    zcb.zcb_removing_size + zcb.zcb_checkpoint_size;
+	total_found = tzb->zb_asize - zcb->zcb_dedup_asize +
+	    zcb->zcb_removing_size + zcb->zcb_checkpoint_size;
 
 	if (total_found == total_alloc && !dump_opt['L']) {
 		(void) printf("\n\tNo leaks (block sum matches space"
@@ -6541,8 +6543,10 @@ dump_block_stats(spa_t *spa)
 		leaks = B_TRUE;
 	}
 
-	if (tzb->zb_count == 0)
+	if (tzb->zb_count == 0) {
+		umem_free(zcb, sizeof (zdb_cb_t));
 		return (2);
+	}
 
 	(void) printf("\n");
 	(void) printf("\t%-16s %14llu\n", "bp count:",
@@ -6561,9 +6565,9 @@ dump_block_stats(spa_t *spa)
 	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
 	    (double)tzb->zb_lsize / tzb->zb_asize);
 	(void) printf("\t%-16s %14llu    ref>1: %6llu   deduplication: %6.2f\n",
-	    "bp deduped:", (u_longlong_t)zcb.zcb_dedup_asize,
-	    (u_longlong_t)zcb.zcb_dedup_blocks,
-	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
+	    "bp deduped:", (u_longlong_t)zcb->zcb_dedup_asize,
+	    (u_longlong_t)zcb->zcb_dedup_blocks,
+	    (double)zcb->zcb_dedup_asize / tzb->zb_asize + 1.0);
 	(void) printf("\t%-16s %14llu     used: %5.2f%%\n", "Normal class:",
 	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
 
@@ -6601,19 +6605,19 @@ dump_block_stats(spa_t *spa)
 	}
 
 	for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
-		if (zcb.zcb_embedded_blocks[i] == 0)
+		if (zcb->zcb_embedded_blocks[i] == 0)
 			continue;
 		(void) printf("\n");
 		(void) printf("\tadditional, non-pointer bps of type %u: "
 		    "%10llu\n",
-		    i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
+		    i, (u_longlong_t)zcb->zcb_embedded_blocks[i]);
 
 		if (dump_opt['b'] >= 3) {
 			(void) printf("\t number of (compressed) bytes:  "
 			    "number of bps\n");
-			dump_histogram(zcb.zcb_embedded_histogram[i],
-			    sizeof (zcb.zcb_embedded_histogram[i]) /
-			    sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
+			dump_histogram(zcb->zcb_embedded_histogram[i],
+			    sizeof (zcb->zcb_embedded_histogram[i]) /
+			    sizeof (zcb->zcb_embedded_histogram[i][0]), 0);
 		}
 	}
 
@@ -6673,7 +6677,7 @@ dump_block_stats(spa_t *spa)
 			else
 				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
 
-			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
+			if (zcb->zcb_type[ZB_TOTAL][t].zb_asize == 0) {
 				(void) printf("%6s\t%5s\t%5s\t%5s"
 				    "\t%5s\t%5s\t%6s\t%s\n",
 				    "-",
@@ -6689,7 +6693,7 @@ dump_block_stats(spa_t *spa)
 
 			for (l = ZB_TOTAL - 1; l >= -1; l--) {
 				level = (l == -1 ? ZB_TOTAL : l);
-				zb = &zcb.zcb_type[level][t];
+				zb = &zcb->zcb_type[level][t];
 
 				if (zb->zb_asize == 0)
 					continue;
@@ -6698,7 +6702,7 @@ dump_block_stats(spa_t *spa)
 					continue;
 
 				if (level == 0 && zb->zb_asize ==
-				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
+				    zcb->zcb_type[ZB_TOTAL][t].zb_asize)
 					continue;
 
 				zdb_nicenum(zb->zb_count, csize,
@@ -6742,18 +6746,23 @@ dump_block_stats(spa_t *spa)
 
 		/* Output a table summarizing block sizes in the pool */
 		if (dump_opt['b'] >= 2) {
-			dump_size_histograms(&zcb);
+			dump_size_histograms(zcb);
 		}
 	}
 
 	(void) printf("\n");
 
-	if (leaks)
+	if (leaks) {
+		umem_free(zcb, sizeof (zdb_cb_t));
 		return (2);
+	}
 
-	if (zcb.zcb_haderrors)
+	if (zcb->zcb_haderrors) {
+		umem_free(zcb, sizeof (zdb_cb_t));
 		return (3);
+	}
 
+	umem_free(zcb, sizeof (zdb_cb_t));
 	return (0);
 }
 

From 0b30dc484f7e70bc8bfe53fefc8581d181044efa Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Fri, 2 Sep 2022 16:20:10 -0400
Subject: [PATCH 05/69] FreeBSD: Cleanup dead code from VFS

The vfs_*_feature() macros turn anything that uses them into dead code,
so we can delete all of it.

As a side effect, zfs_set_fuid_feature() is now identical in
module/os/freebsd/zfs/zfs_vnops_os.c and
module/os/linux/zfs/zfs_vnops_os.c. A few other functions are identical
too. Future cleanup could move these into a common file.

Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13832
---
 include/os/freebsd/spl/sys/vfs.h     |  4 ----
 module/os/freebsd/zfs/zfs_vfsops.c   | 26 --------------------------
 module/os/freebsd/zfs/zfs_vnops_os.c | 23 -----------------------
 3 files changed, 53 deletions(-)

diff --git a/include/os/freebsd/spl/sys/vfs.h b/include/os/freebsd/spl/sys/vfs.h
index 22d57cc473e..7f163fcfdb1 100644
--- a/include/os/freebsd/spl/sys/vfs.h
+++ b/include/os/freebsd/spl/sys/vfs.h
@@ -117,9 +117,5 @@ typedef	uint64_t	vfs_feature_t;
 #define	VFSFT_ZEROCOPY_SUPPORTED	0x100000200
 				/* Support loaning /returning cache buffer */
 
-#define	vfs_set_feature(vfsp, feature)		do { } while (0)
-#define	vfs_clear_feature(vfsp, feature)	do { } while (0)
-#define	vfs_has_feature(vfsp, feature)		(0)
-
 #include <sys/mount.h>
 #endif	/* _OPENSOLARIS_SYS_VFS_H_ */
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
index 24e06b1a880..4e4a5f8d215 100644
--- a/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
@@ -1151,23 +1151,6 @@ static void
 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
 {
 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
-	if (zfsvfs->z_vfs) {
-		if (zfsvfs->z_use_fuids) {
-			vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
-			vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
-			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
-			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
-			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
-			vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
-		} else {
-			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
-			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
-			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
-			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
-			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
-			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
-		}
-	}
 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
 }
 
@@ -1226,15 +1209,6 @@ zfs_domount(vfs_t *vfsp, char *osname)
 	 * Set features for file system.
 	 */
 	zfs_set_fuid_feature(zfsvfs);
-	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
-		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
-		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
-		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
-	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
-		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
-		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
-	}
-	vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
 
 	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
 		uint64_t pval;
diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c
index b46cc550c78..f0579626c5a 100644
--- a/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -1672,7 +1672,6 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 	int		outcount;
 	int		error;
 	uint8_t		prefetch;
-	boolean_t	check_sysattrs;
 	uint8_t		type;
 	int		ncooks;
 	cookie_t	*cooks = NULL;
@@ -1756,19 +1755,6 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 		*cookies = cooks;
 		*ncookies = ncooks;
 	}
-	/*
-	 * If this VFS supports the system attribute view interface; and
-	 * we're looking at an extended attribute directory; and we care
-	 * about normalization conflicts on this vfs; then we must check
-	 * for normalization conflicts with the sysattr name space.
-	 */
-#ifdef TODO
-	check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
-	    (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
-	    (flags & V_RDDIR_ENTFLAGS);
-#else
-	check_sysattrs = 0;
-#endif
 
 	/*
 	 * Transform to file-system independent format
@@ -1824,15 +1810,6 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 			 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
 			 */
 			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
-
-			if (check_sysattrs && !zap.za_normalization_conflict) {
-#ifdef TODO
-				zap.za_normalization_conflict =
-				    xattr_sysattr_casechk(zap.za_name);
-#else
-				panic("%s:%u: TODO", __func__, __LINE__);
-#endif
-			}
 		}
 
 		if (flags & V_RDDIR_ACCFILTER) {

From f933b3fd4dda8b37aa37aeae05951b76f51ddae7 Mon Sep 17 00:00:00 2001
From: Alexander Motin <mav@FreeBSD.org>
Date: Fri, 2 Sep 2022 16:21:18 -0400
Subject: [PATCH 06/69] Apply arc_shrink_shift to ARC above arc_c_min

It makes sense to free memory in smaller chunks when approaching
arc_c_min to let other kernel subsystems to free more, since after
that point we can't free anything.  This also matches behavior on
Linux, where to shrinker reported only the size above arc_c_min.

Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Closes #13794
---
 module/os/freebsd/zfs/arc_os.c | 5 ++++-
 module/zfs/arc.c               | 9 +++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/module/os/freebsd/zfs/arc_os.c b/module/os/freebsd/zfs/arc_os.c
index ca2bf884257..dbd71ea43fd 100644
--- a/module/os/freebsd/zfs/arc_os.c
+++ b/module/os/freebsd/zfs/arc_os.c
@@ -221,7 +221,10 @@ arc_lowmem(void *arg __unused, int howto __unused)
 	arc_warm = B_TRUE;
 	arc_growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
 	free_memory = arc_available_memory();
-	to_free = (arc_c >> arc_shrink_shift) - MIN(free_memory, 0);
+	int64_t can_free = arc_c - arc_c_min;
+	if (can_free <= 0)
+		return;
+	to_free = (can_free >> arc_shrink_shift) - MIN(free_memory, 0);
 	DTRACE_PROBE2(arc__needfree, int64_t, free_memory, int64_t, to_free);
 	arc_reduce_target_size(to_free);
 
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 579e78befe1..980dc60d0cc 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -5051,10 +5051,11 @@ arc_reap_cb(void *arg, zthr_t *zthr)
 	 */
 	free_memory = arc_available_memory();
 
-	int64_t to_free =
-	    (arc_c >> arc_shrink_shift) - free_memory;
-	if (to_free > 0) {
-		arc_reduce_target_size(to_free);
+	int64_t can_free = arc_c - arc_c_min;
+	if (can_free > 0) {
+		int64_t to_free = (can_free >> arc_shrink_shift) - free_memory;
+		if (to_free > 0)
+			arc_reduce_target_size(to_free);
 	}
 	spl_fstrans_unmark(cookie);
 }

From 899355d293830f250e46d6b651db5afed08b91ea Mon Sep 17 00:00:00 2001
From: Ameer Hamza <106930537+ixhamza@users.noreply.github.com>
Date: Sat, 3 Sep 2022 01:24:07 +0500
Subject: [PATCH 07/69] Add zilstat script to report zil kstats in a user
 friendly manner

Added a python script to process both global and per dataset
zil kstats and report them in a user friendly manner similar
to arcstat and dbufstat.

Reviewed-by: George Melikov <mail@gmelikov.ru>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com>
Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
Closes #13704
---
 cmd/Makefile.am                               |   7 +-
 cmd/zilstat.in                                | 467 ++++++++++++++++++
 rpm/generic/zfs.spec.in                       |   4 +-
 tests/runfiles/common.run                     |   3 +-
 tests/runfiles/sanity.run                     |   3 +-
 tests/zfs-tests/include/commands.cfg          |   1 +
 tests/zfs-tests/tests/Makefile.am             |   1 +
 .../cli_user/misc/zilstat_001_pos.ksh         |  37 ++
 8 files changed, 517 insertions(+), 6 deletions(-)
 create mode 100755 cmd/zilstat.in
 create mode 100755 tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh

diff --git a/cmd/Makefile.am b/cmd/Makefile.am
index 65de980da30..6d6de4adb42 100644
--- a/cmd/Makefile.am
+++ b/cmd/Makefile.am
@@ -100,12 +100,13 @@ endif
 
 
 if USING_PYTHON
-bin_SCRIPTS      += arc_summary     arcstat        dbufstat
-CLEANFILES       += arc_summary     arcstat        dbufstat
-dist_noinst_DATA += %D%/arc_summary %D%/arcstat.in %D%/dbufstat.in
+bin_SCRIPTS      += arc_summary     arcstat        dbufstat        zilstat
+CLEANFILES       += arc_summary     arcstat        dbufstat        zilstat
+dist_noinst_DATA += %D%/arc_summary %D%/arcstat.in %D%/dbufstat.in %D%/zilstat.in
 
 $(call SUBST,arcstat,%D%/)
 $(call SUBST,dbufstat,%D%/)
+$(call SUBST,zilstat,%D%/)
 arc_summary: %D%/arc_summary
 	$(AM_V_at)cp $< $@
 endif
diff --git a/cmd/zilstat.in b/cmd/zilstat.in
new file mode 100755
index 00000000000..cf4e2e0dd0c
--- /dev/null
+++ b/cmd/zilstat.in
@@ -0,0 +1,467 @@
+#!/usr/bin/env @PYTHON_SHEBANG@
+#
+# Print out statistics for all zil stats. This information is
+# available through the zil kstat.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License").  You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# This script must remain compatible with Python 3.6+.
+#
+
+import sys
+import subprocess
+import time
+import copy
+import os
+import re
+import signal
+from collections import defaultdict
+import argparse
+from argparse import RawTextHelpFormatter
+
+cols = {
+	# hdr:       [size,      scale, 	 kstat name]
+	"time":      [8,         -1,         "time"],
+	"pool":      [12,        -1,         "pool"],
+	"ds":        [12,        -1,         "dataset_name"],
+	"obj":       [12,        -1,         "objset"],
+	"zcc":       [10,        1000,       "zil_commit_count"],
+	"zcwc":      [10,        1000,       "zil_commit_writer_count"],
+	"ziic":      [10,        1000,       "zil_itx_indirect_count"],
+	"zic":       [10,        1000,       "zil_itx_count"],
+	"ziib":      [10,        1024,       "zil_itx_indirect_bytes"],
+	"zicc":      [10,        1000,       "zil_itx_copied_count"],
+	"zicb":      [10,        1024,       "zil_itx_copied_bytes"],
+	"zinc":      [10,        1000,       "zil_itx_needcopy_count"],
+	"zinb":      [10,        1024,       "zil_itx_needcopy_bytes"],
+	"zimnc":     [10,        1000,       "zil_itx_metaslab_normal_count"],
+	"zimnb":     [10,        1024,       "zil_itx_metaslab_normal_bytes"],
+	"zimsc":     [10,        1000,       "zil_itx_metaslab_slog_count"],
+	"zimsb":     [10,        1024,       "zil_itx_metaslab_slog_bytes"],
+}
+
+hdr = ["time", "pool", "ds", "obj", "zcc", "zcwc", "ziic", "zic", "ziib", \
+	"zicc", "zicb", "zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]
+
+ghdr = ["time", "zcc", "zcwc", "ziic", "zic", "ziib", "zicc", "zicb",
+	"zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]
+
+cmd = ("Usage: zilstat [-hgdv] [-i interval] [-p pool_name]")
+
+curr = {}
+diff = {}
+kstat = {}
+ds_pairs = {}
+pool_name = None
+dataset_name = None
+interval = 0
+sep = "  "
+gFlag = True
+dsFlag = False
+
+def prettynum(sz, scale, num=0):
+	suffix = [' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']
+	index = 0
+	save = 0
+
+	if scale == -1:
+		return "%*s" % (sz, num)
+
+	# Rounding error, return 0
+	elif 0 < num < 1:
+		num = 0
+
+	while num > scale and index < 5:
+		save = num
+		num = num / scale
+		index += 1
+
+	if index == 0:
+		return "%*d" % (sz, num)
+
+	if (save / scale) < 10:
+		return "%*.1f%s" % (sz - 1, num, suffix[index])
+	else:
+		return "%*d%s" % (sz - 1, num, suffix[index])
+
+def print_header():
+	global hdr
+	global sep
+	for col in hdr:
+		new_col = col
+		if interval > 0 and col not in ['time', 'pool', 'ds', 'obj']:
+			new_col += "/s"
+		sys.stdout.write("%*s%s" % (cols[col][0], new_col, sep))
+	sys.stdout.write("\n")
+
+def print_values(v):
+	global hdr
+	global sep
+	for col in hdr:
+		val = v[cols[col][2]]
+		if col not in ['time', 'pool', 'ds', 'obj'] and interval > 0:
+			val = v[cols[col][2]] // interval
+		sys.stdout.write("%s%s" % (
+			prettynum(cols[col][0], cols[col][1], val), sep))
+	sys.stdout.write("\n")
+
+def print_dict(d):
+	for pool in d:
+		for objset in d[pool]:
+			print_values(d[pool][objset])
+
+def detailed_usage():
+	sys.stderr.write("%s\n" % cmd)
+	sys.stderr.write("Field definitions are as follows:\n")
+	for key in cols:
+		sys.stderr.write("%11s : %s\n" % (key, cols[key][2]))
+	sys.stderr.write("\n")
+	sys.exit(0)
+
+def init():
+	global pool_name
+	global dataset_name
+	global interval
+	global hdr
+	global curr
+	global gFlag
+	global sep
+
+	curr = dict()
+
+	parser = argparse.ArgumentParser(description='Program to print zilstats',
+                                	 add_help=True,
+					 formatter_class=RawTextHelpFormatter,
+					 epilog="\nUsage Examples\n"\
+				 		"Note: Global zilstats is shown by default,"\
+						" if none of a|p|d option is not provided\n"\
+				 		"\tzilstat -a\n"\
+						'\tzilstat -v\n'\
+						'\tzilstat -p tank\n'\
+						'\tzilstat -d tank/d1,tank/d2,tank/zv1\n'\
+						'\tzilstat -i 1\n'\
+						'\tzilstat -s \"***\"\n'\
+						'\tzilstat -f zcwc,zimnb,zimsb\n')
+
+	parser.add_argument(
+		"-v", "--verbose",
+		action="store_true",
+		help="List field headers and definitions"
+	)
+
+	pool_grp = parser.add_mutually_exclusive_group()
+
+	pool_grp.add_argument(
+		"-a", "--all",
+		action="store_true",
+		dest="all",
+		help="Print all dataset stats"
+	)
+
+	pool_grp.add_argument(
+		"-p", "--pool",
+		type=str,
+		help="Print stats for all datasets of a speicfied pool"
+	)
+
+	pool_grp.add_argument(
+		"-d", "--dataset",
+		type=str,
+		help="Print given dataset(s) (Comma separated)"
+	)
+
+	parser.add_argument(
+		"-f", "--columns",
+		type=str,
+		help="Specify specific fields to print (see -v)"
+	)
+
+	parser.add_argument(
+		"-s", "--separator",
+		type=str,
+		help="Override default field separator with custom "
+			 "character or string"
+	)
+
+	parser.add_argument(
+		"-i", "--interval",
+		type=int,
+		dest="interval",
+		help="Print stats between specified interval"
+			 " (in seconds)"
+	)
+
+	parsed_args = parser.parse_args()
+
+	if parsed_args.verbose:
+		detailed_usage()
+
+	if parsed_args.all:
+		gFlag = False
+
+	if parsed_args.interval:
+		interval = parsed_args.interval
+
+	if parsed_args.pool:
+		pool_name = parsed_args.pool
+		gFlag = False
+
+	if parsed_args.dataset:
+		dataset_name = parsed_args.dataset
+		gFlag = False
+
+	if parsed_args.separator:
+		sep = parsed_args.separator
+
+	if gFlag:
+		hdr = ghdr
+
+	if parsed_args.columns:
+		hdr = parsed_args.columns.split(",")
+
+		invalid = []
+		for ele in hdr:
+			if gFlag and ele not in ghdr:
+				invalid.append(ele)
+			elif ele not in cols:
+				invalid.append(ele)
+
+		if len(invalid) > 0:
+			sys.stderr.write("Invalid column definition! -- %s\n" % invalid)
+			sys.exit(1)
+
+	if pool_name and dataset_name:
+		print ("Error: Can not filter both dataset and pool")
+		sys.exit(1)
+
+def FileCheck(fname):
+	try:
+		return (open(fname))
+	except IOError:
+		print ("Unable to open zilstat proc file: " + fname)
+		sys.exit(1)
+
+if sys.platform.startswith('freebsd'):
+	# Requires py-sysctl on FreeBSD
+	import sysctl
+
+	def kstat_update(pool = None, objid = None):
+		global kstat
+		kstat = {}
+		if not pool:
+			file = "kstat.zfs.misc.zil"
+			k = [ctl for ctl in sysctl.filter(file) \
+				if ctl.type != sysctl.CTLTYPE_NODE]
+			kstat_process_str(k, file, "GLOBAL", len(file + "."))
+		elif objid:
+			file = "kstat.zfs." + pool + ".dataset.objset-" + objid
+			k = [ctl for ctl in sysctl.filter(file) if ctl.type \
+				!= sysctl.CTLTYPE_NODE]
+			kstat_process_str(k, file, objid, len(file + "."))
+		else:
+			file = "kstat.zfs." + pool + ".dataset"
+			zil_start = len(file + ".")
+			obj_start = len("kstat.zfs." + pool + ".")
+			k = [ctl for ctl in sysctl.filter(file)
+				if ctl.type != sysctl.CTLTYPE_NODE]
+			for s in k:
+				if not s or (s.name.find("zil") == -1 and \
+					s.name.find("dataset_name") == -1):
+					continue
+				name, value = s.name, s.value
+				objid = re.findall(r'0x[0-9A-F]+', \
+					name[obj_start:], re.I)[0]
+				if objid not in kstat:
+					kstat[objid] = dict()
+				zil_start = len(file + ".objset-" + \
+					objid + ".")
+				kstat[objid][name[zil_start:]] = value \
+					if (name.find("dataset_name")) \
+					else int(value)
+
+	def kstat_process_str(k, file, objset = "GLOBAL", zil_start = 0):
+			global kstat
+			if not k:
+				print("Unable to process kstat for: " + file)
+				sys.exit(1)
+			kstat[objset] = dict()
+			for s in k:
+				if not s or (s.name.find("zil") == -1 and \
+				    s.name.find("dataset_name") == -1):
+					continue
+				name, value = s.name, s.value
+				kstat[objset][name[zil_start:]] = value \
+				    if (name.find("dataset_name")) else int(value)
+
+elif sys.platform.startswith('linux'):
+	def kstat_update(pool = None, objid = None):
+		global kstat
+		kstat = {}
+		if not pool:
+			k = [line.strip() for line in \
+				FileCheck("/proc/spl/kstat/zfs/zil")]
+			kstat_process_str(k, "/proc/spl/kstat/zfs/zil")
+		elif objid:
+			file = "/proc/spl/kstat/zfs/" + pool + "/objset-" + objid
+			k = [line.strip() for line in FileCheck(file)]
+			kstat_process_str(k, file, objid)
+		else:
+			if not os.path.exists(f"/proc/spl/kstat/zfs/{pool}"):
+				print("Pool \"" + pool + "\" does not exist, Exitting")
+				sys.exit(1)
+			objsets = os.listdir(f'/proc/spl/kstat/zfs/{pool}')
+			for objid in objsets:
+				if objid.find("objset-") == -1:
+					continue
+				file = "/proc/spl/kstat/zfs/" + pool + "/" + objid
+				k = [line.strip() for line in FileCheck(file)]
+				kstat_process_str(k, file, objid.replace("objset-", ""))
+
+	def kstat_process_str(k, file, objset = "GLOBAL", zil_start = 0):
+			global kstat
+			if not k:
+				print("Unable to process kstat for: " + file)
+				sys.exit(1)
+
+			kstat[objset] = dict()
+			for s in k:
+				if not s or (s.find("zil") == -1 and \
+				    s.find("dataset_name") == -1):
+					continue
+				name, unused, value = s.split()
+				kstat[objset][name] = value \
+				    if (name == "dataset_name") else int(value)
+
+def zil_process_kstat():
+	global curr, pool_name, dataset_name, dsFlag, ds_pairs
+	curr.clear()
+	if gFlag == True:
+		kstat_update()
+		zil_build_dict()
+	else:
+		if pool_name:
+			kstat_update(pool_name)
+			zil_build_dict(pool_name)
+		elif dataset_name:
+			if dsFlag == False:
+				dsFlag = True
+				datasets = dataset_name.split(',')
+				ds_pairs = defaultdict(list)
+				for ds in datasets:
+					try:
+						objid = subprocess.check_output(['zfs',
+						    'list', '-Hpo', 'objsetid', ds], \
+						    stderr=subprocess.DEVNULL) \
+						    .decode('utf-8').strip()
+					except subprocess.CalledProcessError as e:
+						print("Command: \"zfs list -Hpo objset "\
+						+ str(ds) + "\" failed with error code:"\
+						+ str(e.returncode))
+						print("Please make sure that dataset \""\
+						+ str(ds) + "\" exists")
+						sys.exit(1)
+					if not objid:
+						continue
+					ds_pairs[ds.split('/')[0]]. \
+						append(hex(int(objid)))
+			for pool, objids in ds_pairs.items():
+				for objid in objids:
+					kstat_update(pool, objid)
+					zil_build_dict(pool)
+		else:
+			try:
+				pools = subprocess.check_output(['zpool', 'list', '-Hpo',\
+				    'name']).decode('utf-8').split()
+			except subprocess.CalledProcessError as e:
+				print("Command: \"zpool list -Hpo name\" failed with error"\
+				    "code: " + str(e.returncode))
+				sys.exit(1)
+			for pool in pools:
+				kstat_update(pool)
+				zil_build_dict(pool)
+
+def calculate_diff():
+	global curr, diff
+	prev = copy.deepcopy(curr)
+	zil_process_kstat()
+	diff = copy.deepcopy(curr)
+	for pool in curr:
+		for objset in curr[pool]:
+			for col in hdr:
+				if col not in ['time', 'pool', 'ds', 'obj']:
+					key = cols[col][2]
+					# If prev is NULL, this is the
+					# first time we are here
+					if not prev:
+						diff[pool][objset][key] = 0
+					else:
+						diff[pool][objset][key] \
+							= curr[pool][objset][key] \
+							- prev[pool][objset][key]
+
+def zil_build_dict(pool = "GLOBAL"):
+	global kstat
+	for objset in kstat:
+		for key in kstat[objset]:
+			val = kstat[objset][key]
+			if pool not in curr:
+				curr[pool] = dict()
+			if objset not in curr[pool]:
+				curr[pool][objset] = dict()
+			curr[pool][objset][key] = val
+		curr[pool][objset]["pool"] = pool
+		curr[pool][objset]["objset"] = objset
+		curr[pool][objset]["time"] = time.strftime("%H:%M:%S", \
+			time.localtime())
+
+def sign_handler_epipe(sig, frame):
+	print("Caught EPIPE signal: " + str(frame))
+	print("Exitting...")
+	sys.exit(0)
+
+def main():
+	global interval
+	global curr
+	hprint = False
+	init()
+	signal.signal(signal.SIGINT, signal.SIG_DFL)
+	signal.signal(signal.SIGPIPE, sign_handler_epipe)
+
+	if interval > 0:
+		while True:
+			calculate_diff()
+			if not diff:
+				print ("Error: No stats to show")
+				sys.exit(0)
+			if hprint == False:
+				print_header()
+				hprint = True
+			print_dict(diff)
+			time.sleep(interval)
+	else:
+		zil_process_kstat()
+		if not curr:
+			print ("Error: No stats to show")
+			sys.exit(0)
+		print_header()
+		print_dict(curr)
+
+if __name__ == '__main__':
+	main()
+
diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
index b1a94fbb7ab..aea82d24178 100644
--- a/rpm/generic/zfs.spec.in
+++ b/rpm/generic/zfs.spec.in
@@ -409,7 +409,8 @@ make install DESTDIR=%{?buildroot}
 find %{?buildroot}%{_libdir} -name '*.la' -exec rm -f {} \;
 %if 0%{!?__brp_mangle_shebangs:1}
 find %{?buildroot}%{_bindir} \
-    \( -name arc_summary -or -name arcstat -or -name dbufstat \) \
+    \( -name arc_summary -or -name arcstat -or -name dbufstat \
+    -or -name zilstat \) \
     -exec %{__sed} -i 's|^#!.*|#!%{__python}|' {} \;
 find %{?buildroot}%{_datadir} \
     \( -name test-runner.py -or -name zts-report.py \) \
@@ -487,6 +488,7 @@ systemctl --system daemon-reload >/dev/null || true
 %{_bindir}/arc_summary
 %{_bindir}/arcstat
 %{_bindir}/dbufstat
+%{_bindir}/zilstat
 # Man pages
 %{_mandir}/man1/*
 %{_mandir}/man4/*
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index b9a9e0efcc8..e8443ffabcf 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -551,7 +551,8 @@ tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg',
     'zpool_offline_001_neg', 'zpool_online_001_neg', 'zpool_remove_001_neg',
     'zpool_replace_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg',
     'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos',
-    'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege']
+    'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege',
+    'zilstat_001_pos']
 user =
 tags = ['functional', 'cli_user', 'misc']
 
diff --git a/tests/runfiles/sanity.run b/tests/runfiles/sanity.run
index 7c466719643..f115f0b578c 100644
--- a/tests/runfiles/sanity.run
+++ b/tests/runfiles/sanity.run
@@ -396,7 +396,8 @@ tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg',
     'zpool_history_001_neg', 'zpool_offline_001_neg', 'zpool_online_001_neg',
     'zpool_remove_001_neg', 'zpool_scrub_001_neg', 'zpool_set_001_neg',
     'zpool_status_001_neg', 'zpool_upgrade_001_neg', 'arcstat_001_pos',
-    'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege']
+    'arc_summary_001_pos', 'arc_summary_002_neg', 'zpool_wait_privilege',
+    'zilstat_001_pos']
 user =
 tags = ['functional', 'cli_user', 'misc']
 
diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg
index 47357dca57f..4098562210b 100644
--- a/tests/zfs-tests/include/commands.cfg
+++ b/tests/zfs-tests/include/commands.cfg
@@ -169,6 +169,7 @@ export ZFS_FILES='zdb
     raidz_test
     arc_summary
     arcstat
+    zilstat
     dbufstat
     mount.zfs
     zed
diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am
index 4a815db8a6d..b80489af255 100644
--- a/tests/zfs-tests/tests/Makefile.am
+++ b/tests/zfs-tests/tests/Makefile.am
@@ -1230,6 +1230,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/cli_user/misc/arcstat_001_pos.ksh \
 	functional/cli_user/misc/arc_summary_001_pos.ksh \
 	functional/cli_user/misc/arc_summary_002_neg.ksh \
+	functional/cli_user/misc/zilstat_001_pos.ksh \
 	functional/cli_user/misc/cleanup.ksh \
 	functional/cli_user/misc/setup.ksh \
 	functional/cli_user/misc/zdb_001_neg.ksh \
diff --git a/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh
new file mode 100755
index 00000000000..9bf6a94cfc8
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh
@@ -0,0 +1,37 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing"
+
+set -A args  "" "-s \",\"" "-v" \
+    "-f time,zcwc,zimnb,zimsb"
+
+log_assert "zilstat generates output and doesn't return an error code"
+
+typeset -i i=0
+while [[ $i -lt ${#args[*]} ]]; do
+        log_must eval "zilstat ${args[i]} > /dev/null"
+        ((i = i + 1))
+done
+log_pass "zilstat generates output and doesn't return an error code"

From 4723eba8c0af10fc25d9203ffa0cd4499b4a875d Mon Sep 17 00:00:00 2001
From: Ryan Moeller <ryan@iXsystems.com>
Date: Tue, 9 Aug 2022 09:05:29 +0000
Subject: [PATCH 08/69] FreeBSD: Mark ZFS_MODULE_PARAM_CALL as MPSAFE

ZFS_MODULE_PARAM_CALL handlers implement their own locking if needed
and do not require Giant.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Ryan Moeller <ryan@iXsystems.com>
Closes #13756
---
 include/os/freebsd/spl/sys/mod_os.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/os/freebsd/spl/sys/mod_os.h b/include/os/freebsd/spl/sys/mod_os.h
index 3a9ebbfc3bc..d64a1733ad8 100644
--- a/include/os/freebsd/spl/sys/mod_os.h
+++ b/include/os/freebsd/spl/sys/mod_os.h
@@ -47,7 +47,7 @@
 
 #define	ZFS_MODULE_PARAM_CALL_IMPL(parent, name, perm, args, desc) \
     SYSCTL_DECL(parent); \
-    SYSCTL_PROC(parent, OID_AUTO, name, perm | args, desc)
+    SYSCTL_PROC(parent, OID_AUTO, name, CTLFLAG_MPSAFE | perm | args, desc)
 
 #define	ZFS_MODULE_PARAM_CALL( \
     scope_prefix, name_prefix, name, func, _, perm, desc) \

From 7bb707ffafbea79c5b2f9ea24959825a3c4b8802 Mon Sep 17 00:00:00 2001
From: Ryan Moeller <ryan@iXsystems.com>
Date: Tue, 9 Aug 2022 09:05:47 +0000
Subject: [PATCH 09/69] FreeBSD: Organize sysctls

FreeBSD had a few platform-specific ARC tunables in the wrong place:

- Move FreeBSD-specifc ARC tunables into the same vfs.zfs.arc node as
  the rest of the ARC tunables.
- Move the handlers from arc_os.c to sysctl_os.c and add compat sysctls
  for the legacy names.

While here, some additional clean up:

- Most handlers are specific to a particular variable and don't need a
  pointer passed through the args.
- Group blocks of related variables, handlers, and sysctl declarations
  into logical sections.
- Match variable types for temporaries in handlers with the type of the
  global variable.
- Remove leftover comments.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Ryan Moeller <ryan@iXsystems.com>
Closes #13756
---
 include/os/freebsd/spl/sys/mod_os.h |  26 +-
 module/os/freebsd/zfs/arc_os.c      |  31 +-
 module/os/freebsd/zfs/sysctl_os.c   | 706 +++++++++++++++++-----------
 3 files changed, 455 insertions(+), 308 deletions(-)

diff --git a/include/os/freebsd/spl/sys/mod_os.h b/include/os/freebsd/spl/sys/mod_os.h
index d64a1733ad8..95a19cc940c 100644
--- a/include/os/freebsd/spl/sys/mod_os.h
+++ b/include/os/freebsd/spl/sys/mod_os.h
@@ -59,15 +59,21 @@
 #define	param_set_arc_long_args(var) \
     CTLTYPE_ULONG, &var, 0, param_set_arc_long, "LU"
 
-#define	param_set_arc_min_args(var) \
-    CTLTYPE_ULONG, &var, 0, param_set_arc_min, "LU"
-
-#define	param_set_arc_max_args(var) \
-    CTLTYPE_ULONG, &var, 0, param_set_arc_max, "LU"
-
 #define	param_set_arc_int_args(var) \
     CTLTYPE_INT, &var, 0, param_set_arc_int, "I"
 
+#define	param_set_arc_min_args(var) \
+    CTLTYPE_ULONG, NULL, 0, param_set_arc_min, "LU"
+
+#define	param_set_arc_max_args(var) \
+    CTLTYPE_ULONG, NULL, 0, param_set_arc_max, "LU"
+
+#define	param_set_arc_free_target_args(var) \
+    CTLTYPE_UINT, NULL, 0, param_set_arc_free_target, "IU"
+
+#define	param_set_arc_no_grow_shift_args(var) \
+    CTLTYPE_INT, NULL, 0, param_set_arc_no_grow_shift, "I"
+
 #define	param_set_deadman_failmode_args(var) \
     CTLTYPE_STRING, NULL, 0, param_set_deadman_failmode, "A"
 
@@ -78,16 +84,16 @@
     CTLTYPE_ULONG, NULL, 0, param_set_deadman_ziotime, "LU"
 
 #define	param_set_multihost_interval_args(var) \
-    CTLTYPE_ULONG, &var, 0, param_set_multihost_interval, "LU"
+    CTLTYPE_ULONG, NULL, 0, param_set_multihost_interval, "LU"
 
 #define	param_set_slop_shift_args(var) \
-    CTLTYPE_INT, &var, 0, param_set_slop_shift, "I"
+    CTLTYPE_INT, NULL, 0, param_set_slop_shift, "I"
 
 #define	param_set_min_auto_ashift_args(var) \
-    CTLTYPE_U64, &var, 0, param_set_min_auto_ashift, "QU"
+    CTLTYPE_U64, NULL, 0, param_set_min_auto_ashift, "QU"
 
 #define	param_set_max_auto_ashift_args(var) \
-    CTLTYPE_U64, &var, 0, param_set_max_auto_ashift, "QU"
+    CTLTYPE_U64, NULL, 0, param_set_max_auto_ashift, "QU"
 
 #define	fletcher_4_param_set_args(var) \
     CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
diff --git a/module/os/freebsd/zfs/arc_os.c b/module/os/freebsd/zfs/arc_os.c
index dbd71ea43fd..b4833adedcc 100644
--- a/module/os/freebsd/zfs/arc_os.c
+++ b/module/os/freebsd/zfs/arc_os.c
@@ -72,31 +72,14 @@ SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
  * We don't have a tunable for arc_free_target due to the dependency on
  * pagedaemon initialisation.
  */
-static int
-sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS)
-{
-	uint_t val;
-	int err;
-
-	val = zfs_arc_free_target;
-	err = sysctl_handle_int(oidp, &val, 0, req);
-	if (err != 0 || req->newptr == NULL)
-		return (err);
-
-	if (val < minfree)
-		return (EINVAL);
-	if (val > vm_cnt.v_page_count)
-		return (EINVAL);
-
-	zfs_arc_free_target = val;
-
-	return (0);
-}
-SYSCTL_DECL(_vfs_zfs);
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
-    CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof (uint_t),
-    sysctl_vfs_zfs_arc_free_target, "IU",
+int param_set_arc_free_target(SYSCTL_HANDLER_ARGS);
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, free_target,
+    param_set_arc_free_target, 0, CTLFLAG_RW,
 	"Desired number of free pages below which ARC triggers reclaim");
+int param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS);
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, no_grow_shift,
+    param_set_arc_no_grow_shift, 0, ZMOD_RW,
+	"log2(fraction of ARC which must be free to allow growing)");
 
 int64_t
 arc_available_memory(void)
diff --git a/module/os/freebsd/zfs/sysctl_os.c b/module/os/freebsd/zfs/sysctl_os.c
index c774f05ff70..cd384c205df 100644
--- a/module/os/freebsd/zfs/sysctl_os.c
+++ b/module/os/freebsd/zfs/sysctl_os.c
@@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/arc_impl.h>
 #include <sys/dsl_pool.h>
 
+#include <sys/vmmeter.h>
 
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, arc, CTLFLAG_RW, 0,
@@ -132,170 +133,8 @@ SYSCTL_DECL(_vfs_zfs_version);
 SYSCTL_CONST_STRING(_vfs_zfs_version, OID_AUTO, module, CTLFLAG_RD,
 	(ZFS_META_VERSION "-" ZFS_META_RELEASE), "OpenZFS module version");
 
-extern arc_state_t ARC_anon;
-extern arc_state_t ARC_mru;
-extern arc_state_t ARC_mru_ghost;
-extern arc_state_t ARC_mfu;
-extern arc_state_t ARC_mfu_ghost;
-extern arc_state_t ARC_l2c_only;
-
-/*
- * minimum lifespan of a prefetch block in clock ticks
- * (initialized in arc_init())
- */
-
 /* arc.c */
 
-int
-param_set_arc_max(SYSCTL_HANDLER_ARGS)
-{
-	uint64_t val;
-	int err;
-
-	val = zfs_arc_max;
-	err = sysctl_handle_long(oidp, &val, 0, req);
-	if (err != 0 || req->newptr == NULL)
-		return (SET_ERROR(err));
-
-	if (val != 0 && (val < MIN_ARC_MAX || val <= arc_c_min ||
-	    val >= arc_all_memory()))
-		return (SET_ERROR(EINVAL));
-
-	zfs_arc_max = val;
-	arc_tuning_update(B_TRUE);
-
-	/* Update the sysctl to the tuned value */
-	if (val != 0)
-		zfs_arc_max = arc_c_max;
-
-	return (0);
-}
-
-int
-param_set_arc_min(SYSCTL_HANDLER_ARGS)
-{
-	uint64_t val;
-	int err;
-
-	val = zfs_arc_min;
-	err = sysctl_handle_64(oidp, &val, 0, req);
-	if (err != 0 || req->newptr == NULL)
-		return (SET_ERROR(err));
-
-	if (val != 0 && (val < 2ULL << SPA_MAXBLOCKSHIFT || val > arc_c_max))
-		return (SET_ERROR(EINVAL));
-
-	zfs_arc_min = val;
-	arc_tuning_update(B_TRUE);
-
-	/* Update the sysctl to the tuned value */
-	if (val != 0)
-		zfs_arc_min = arc_c_min;
-
-	return (0);
-}
-
-/* legacy compat */
-extern uint64_t l2arc_write_max;	/* def max write size */
-extern uint64_t l2arc_write_boost;	/* extra warmup write */
-extern uint64_t l2arc_headroom;		/* # of dev writes */
-extern uint64_t l2arc_headroom_boost;
-extern uint64_t l2arc_feed_secs;	/* interval seconds */
-extern uint64_t l2arc_feed_min_ms;	/* min interval msecs */
-extern int l2arc_noprefetch;			/* don't cache prefetch bufs */
-extern int l2arc_feed_again;			/* turbo warmup */
-extern int l2arc_norw;			/* no reads during writes */
-
-/* BEGIN CSTYLED */
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, CTLFLAG_RW,
-	&l2arc_write_max, 0, "max write size (LEGACY)");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, CTLFLAG_RW,
-	&l2arc_write_boost, 0, "extra write during warmup (LEGACY)");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, CTLFLAG_RW,
-	&l2arc_headroom, 0, "number of dev writes (LEGACY)");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, CTLFLAG_RW,
-	&l2arc_feed_secs, 0, "interval seconds (LEGACY)");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, CTLFLAG_RW,
-	&l2arc_feed_min_ms, 0, "min interval milliseconds (LEGACY)");
-
-SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, CTLFLAG_RW,
-	&l2arc_noprefetch, 0, "don't cache prefetch bufs (LEGACY)");
-SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, CTLFLAG_RW,
-	&l2arc_feed_again, 0, "turbo warmup (LEGACY)");
-SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RW,
-	&l2arc_norw, 0, "no reads during writes (LEGACY)");
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD,
-	&ARC_anon.arcs_size.rc_count, 0, "size of anonymous state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
-	&ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
-	"size of anonymous state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
-	&ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
-	"size of anonymous state");
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD,
-	&ARC_mru.arcs_size.rc_count, 0, "size of mru state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
-	&ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
-	"size of metadata in mru state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
-	&ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
-	"size of data in mru state");
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD,
-	&ARC_mru_ghost.arcs_size.rc_count, 0, "size of mru ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
-	&ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
-	"size of metadata in mru ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
-	&ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
-	"size of data in mru ghost state");
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD,
-	&ARC_mfu.arcs_size.rc_count, 0, "size of mfu state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
-	&ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
-	"size of metadata in mfu state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
-	&ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
-	"size of data in mfu state");
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD,
-	&ARC_mfu_ghost.arcs_size.rc_count, 0, "size of mfu ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
-	&ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
-	"size of metadata in mfu ghost state");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
-	&ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
-	"size of data in mfu ghost state");
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,
-	&ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state");
-/* END CSTYLED */
-
-static int
-sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
-{
-	int err, val;
-
-	val = arc_no_grow_shift;
-	err = sysctl_handle_int(oidp, &val, 0, req);
-	if (err != 0 || req->newptr == NULL)
-		return (err);
-
-	if (val < 0 || val >= arc_shrink_shift)
-		return (EINVAL);
-
-	arc_no_grow_shift = val;
-	return (0);
-}
-
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
-    CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, sizeof (int),
-    sysctl_vfs_zfs_arc_no_grow_shift, "I",
-	"log2(fraction of ARC which must be free to allow growing)");
-
 int
 param_set_arc_long(SYSCTL_HANDLER_ARGS)
 {
@@ -324,55 +163,319 @@ param_set_arc_int(SYSCTL_HANDLER_ARGS)
 	return (0);
 }
 
+int
+param_set_arc_max(SYSCTL_HANDLER_ARGS)
+{
+	unsigned long val;
+	int err;
+
+	val = zfs_arc_max;
+	err = sysctl_handle_long(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (SET_ERROR(err));
+
+	if (val != 0 && (val < MIN_ARC_MAX || val <= arc_c_min ||
+	    val >= arc_all_memory()))
+		return (SET_ERROR(EINVAL));
+
+	zfs_arc_max = val;
+	arc_tuning_update(B_TRUE);
+
+	/* Update the sysctl to the tuned value */
+	if (val != 0)
+		zfs_arc_max = arc_c_max;
+
+	return (0);
+}
+
+/* BEGIN CSTYLED */
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
+	CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	NULL, 0, param_set_arc_max, "LU",
+	"Maximum ARC size in bytes (LEGACY)");
+/* END CSTYLED */
+
+int
+param_set_arc_min(SYSCTL_HANDLER_ARGS)
+{
+	unsigned long val;
+	int err;
+
+	val = zfs_arc_min;
+	err = sysctl_handle_long(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (SET_ERROR(err));
+
+	if (val != 0 && (val < 2ULL << SPA_MAXBLOCKSHIFT || val > arc_c_max))
+		return (SET_ERROR(EINVAL));
+
+	zfs_arc_min = val;
+	arc_tuning_update(B_TRUE);
+
+	/* Update the sysctl to the tuned value */
+	if (val != 0)
+		zfs_arc_min = arc_c_min;
+
+	return (0);
+}
+
 /* BEGIN CSTYLED */
 SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
 	CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
-	&zfs_arc_min, sizeof (zfs_arc_min), param_set_arc_min, "LU",
-	"min arc size (LEGACY)");
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
-	CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
-	&zfs_arc_max, sizeof (zfs_arc_max), param_set_arc_max, "LU",
-	"max arc size (LEGACY)");
+	NULL, 0, param_set_arc_min, "LU",
+	"Minimum ARC size in bytes (LEGACY)");
+/* END CSTYLED */
+
+extern uint_t zfs_arc_free_target;
+
+static int
+param_set_arc_free_target(SYSCTL_HANDLER_ARGS)
+{
+	uint_t val;
+	int err;
+
+	val = zfs_arc_free_target;
+	err = sysctl_handle_int(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	if (val < minfree)
+		return (EINVAL);
+	if (val > vm_cnt.v_page_count)
+		return (EINVAL);
+
+	zfs_arc_free_target = val;
+
+	return (0);
+}
+
+/*
+ * NOTE: This sysctl is CTLFLAG_RW not CTLFLAG_RWTUN due to its dependency on
+ * pagedaemon initialization.
+ */
+/* BEGIN CSTYLED */
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
+	CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+	NULL, 0, param_set_arc_free_target, "IU",
+	"Desired number of free pages below which ARC triggers reclaim"
+	" (LEGACY)");
+/* END CSTYLED */
+
+static int
+param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
+{
+	int err, val;
+
+	val = arc_no_grow_shift;
+	err = sysctl_handle_int(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	if (val < 0 || val >= arc_shrink_shift)
+		return (EINVAL);
+
+	arc_no_grow_shift = val;
+
+	return (0);
+}
+
+/* BEGIN CSTYLED */
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
+	CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	NULL, 0, param_set_arc_no_grow_shift, "I",
+	"log2(fraction of ARC which must be free to allow growing) (LEGACY)");
+/* END CSTYLED */
+
+extern uint64_t l2arc_write_max;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max,
+	CTLFLAG_RWTUN, &l2arc_write_max, 0,
+	"Max write bytes per interval (LEGACY)");
+/* END CSTYLED */
+
+extern uint64_t l2arc_write_boost;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost,
+	CTLFLAG_RWTUN, &l2arc_write_boost, 0,
+	"Extra write bytes during device warmup (LEGACY)");
+/* END CSTYLED */
+
+extern uint64_t l2arc_headroom;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom,
+	CTLFLAG_RWTUN, &l2arc_headroom, 0,
+	"Number of max device writes to precache (LEGACY)");
+/* END CSTYLED */
+
+extern uint64_t l2arc_headroom_boost;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom_boost,
+	CTLFLAG_RWTUN, &l2arc_headroom_boost, 0,
+	"Compressed l2arc_headroom multiplier (LEGACY)");
+/* END CSTYLED */
+
+extern uint64_t l2arc_feed_secs;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs,
+	CTLFLAG_RWTUN, &l2arc_feed_secs, 0,
+	"Seconds between L2ARC writing (LEGACY)");
+/* END CSTYLED */
+
+extern uint64_t l2arc_feed_min_ms;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms,
+	CTLFLAG_RWTUN, &l2arc_feed_min_ms, 0,
+	"Min feed interval in milliseconds (LEGACY)");
+/* END CSTYLED */
+
+extern int l2arc_noprefetch;
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch,
+	CTLFLAG_RWTUN, &l2arc_noprefetch, 0,
+	"Skip caching prefetched buffers (LEGACY)");
+/* END CSTYLED */
+
+extern int l2arc_feed_again;
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again,
+	CTLFLAG_RWTUN, &l2arc_feed_again, 0,
+	"Turbo L2ARC warmup (LEGACY)");
+/* END CSTYLED */
+
+extern int l2arc_norw;
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw,
+	CTLFLAG_RWTUN, &l2arc_norw, 0,
+	"No reads during writes (LEGACY)");
+/* END CSTYLED */
+
+extern arc_state_t ARC_anon;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD,
+	&ARC_anon.arcs_size.rc_count, 0, "size of anonymous state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
+	&ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of anonymous state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
+	&ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of anonymous state");
+/* END CSTYLED */
+
+extern arc_state_t ARC_mru;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD,
+	&ARC_mru.arcs_size.rc_count, 0, "size of mru state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
+	&ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of metadata in mru state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
+	&ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of data in mru state");
+/* END CSTYLED */
+
+extern arc_state_t ARC_mru_ghost;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD,
+	&ARC_mru_ghost.arcs_size.rc_count, 0, "size of mru ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
+	&ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of metadata in mru ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
+	&ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of data in mru ghost state");
+/* END CSTYLED */
+
+extern arc_state_t ARC_mfu;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD,
+	&ARC_mfu.arcs_size.rc_count, 0, "size of mfu state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
+	&ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of metadata in mfu state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
+	&ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of data in mfu state");
+/* END CSTYLED */
+
+extern arc_state_t ARC_mfu_ghost;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD,
+	&ARC_mfu_ghost.arcs_size.rc_count, 0, "size of mfu ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
+	&ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
+	"size of metadata in mfu ghost state");
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
+	&ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
+	"size of data in mfu ghost state");
+/* END CSTYLED */
+
+extern arc_state_t ARC_l2c_only;
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,
+	&ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state");
 /* END CSTYLED */
 
 /* dbuf.c */
 
-
 /* dmu.c */
 
 /* dmu_zfetch.c */
+
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)");
 
-/* max bytes to prefetch per stream (default 8MB) */
 extern uint32_t	zfetch_max_distance;
-SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance, CTLFLAG_RWTUN,
-	&zfetch_max_distance, 0, "Max bytes to prefetch per stream (LEGACY)");
 
-/* max bytes to prefetch indirects for per stream (default 64MB) */
-extern uint32_t	zfetch_max_idistance;
 /* BEGIN CSTYLED */
-SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, CTLFLAG_RWTUN,
-	&zfetch_max_idistance, 0,
+SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance,
+	CTLFLAG_RWTUN, &zfetch_max_distance, 0,
+	"Max bytes to prefetch per stream (LEGACY)");
+/* END CSTYLED */
+
+extern uint32_t	zfetch_max_idistance;
+
+/* BEGIN CSTYLED */
+SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance,
+	CTLFLAG_RWTUN, &zfetch_max_idistance, 0,
 	"Max bytes to prefetch indirects for per stream (LEGACY)");
 /* END CSTYLED */
 
 /* dsl_pool.c */
 
 /* dnode.c */
+
 extern int zfs_default_bs;
+
+/* BEGIN CSTYLED */
 SYSCTL_INT(_vfs_zfs, OID_AUTO, default_bs, CTLFLAG_RWTUN,
 	&zfs_default_bs, 0, "Default dnode block shift");
+/* END CSTYLED */
 
 extern int zfs_default_ibs;
-SYSCTL_INT(_vfs_zfs, OID_AUTO, default_ibs, CTLFLAG_RWTUN,
-	&zfs_default_ibs, 0, "Default dnode indirect block shift");
 
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs, OID_AUTO, default_ibs, CTLFLAG_RWTUN,
+    &zfs_default_ibs, 0, "Default dnode indirect block shift");
+/* END CSTYLED */
 
 /* dsl_scan.c */
 
 /* metaslab.c */
 
-/* BEGIN CSTYLED */
 /*
  * In pools where the log space map feature is not enabled we touch
  * multiple metaslabs (and their respective space maps) with each
@@ -382,10 +485,13 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, default_ibs, CTLFLAG_RWTUN,
  * is 8~16K.
  */
 extern int zfs_metaslab_sm_blksz_no_log;
-SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_no_log, CTLFLAG_RDTUN,
-	&zfs_metaslab_sm_blksz_no_log, 0,
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_no_log,
+	CTLFLAG_RDTUN, &zfs_metaslab_sm_blksz_no_log, 0,
 	"Block size for space map in pools with log space map disabled.  "
 	"Power of 2 greater than 4096.");
+/* END CSTYLED */
 
 /*
  * When the log space map feature is enabled, we accumulate a lot of
@@ -393,10 +499,13 @@ SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_no_log, CTLFLAG_RDTUN,
  * from a bigger block size like 128K for the metaslab space maps.
  */
 extern int zfs_metaslab_sm_blksz_with_log;
-SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log, CTLFLAG_RDTUN,
-	&zfs_metaslab_sm_blksz_with_log, 0,
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log,
+	CTLFLAG_RDTUN, &zfs_metaslab_sm_blksz_with_log, 0,
 	"Block size for space map in pools with log space map enabled.  "
 	"Power of 2 greater than 4096.");
+/* END CSTYLED */
 
 /*
  * The in-core space map representation is more compact than its on-disk form.
@@ -405,21 +514,30 @@ SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log, CTLFLAG_RDTUN,
  * Values should be greater than or equal to 100.
  */
 extern int zfs_condense_pct;
-SYSCTL_INT(_vfs_zfs, OID_AUTO, condense_pct, CTLFLAG_RWTUN,
-	&zfs_condense_pct, 0,
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs, OID_AUTO, condense_pct,
+	CTLFLAG_RWTUN, &zfs_condense_pct, 0,
 	"Condense on-disk spacemap when it is more than this many percents"
 	" of in-memory counterpart");
+/* END CSTYLED */
 
 extern int zfs_remove_max_segment;
-SYSCTL_INT(_vfs_zfs, OID_AUTO, remove_max_segment, CTLFLAG_RWTUN,
-	&zfs_remove_max_segment, 0, "Largest contiguous segment ZFS will"
-	" attempt to allocate when removing a device");
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs, OID_AUTO, remove_max_segment,
+	CTLFLAG_RWTUN, &zfs_remove_max_segment, 0,
+	"Largest contiguous segment ZFS will attempt to allocate when removing"
+	" a device");
+/* END CSTYLED */
 
 extern int zfs_removal_suspend_progress;
-SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, CTLFLAG_RWTUN,
-	&zfs_removal_suspend_progress, 0,
-	"Ensures certain actions can happen while in the middle of a removal");
 
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress,
+	CTLFLAG_RWTUN, &zfs_removal_suspend_progress, 0,
+	"Ensures certain actions can happen while in the middle of a removal");
+/* END CSTYLED */
 
 /*
  * Minimum size which forces the dynamic allocator to change
@@ -428,9 +546,13 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, CTLFLAG_RWTUN,
  * aggressive strategy (i.e search by size rather than offset).
  */
 extern uint64_t metaslab_df_alloc_threshold;
-SYSCTL_QUAD(_vfs_zfs_metaslab, OID_AUTO, df_alloc_threshold, CTLFLAG_RWTUN,
-	&metaslab_df_alloc_threshold, 0, "Minimum size which forces the dynamic"
-	" allocator to change its allocation strategy");
+
+/* BEGIN CSTYLED */
+SYSCTL_QUAD(_vfs_zfs_metaslab, OID_AUTO, df_alloc_threshold,
+	CTLFLAG_RWTUN, &metaslab_df_alloc_threshold, 0,
+	"Minimum size which forces the dynamic allocator to change its"
+	" allocation strategy");
+/* END CSTYLED */
 
 /*
  * The minimum free space, in percent, which must be available
@@ -439,46 +561,84 @@ SYSCTL_QUAD(_vfs_zfs_metaslab, OID_AUTO, df_alloc_threshold, CTLFLAG_RWTUN,
  * switch to using best-fit allocations.
  */
 extern int metaslab_df_free_pct;
-SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct, CTLFLAG_RWTUN,
-	&metaslab_df_free_pct, 0,
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct,
+	CTLFLAG_RWTUN, &metaslab_df_free_pct, 0,
 	"The minimum free space, in percent, which must be available in a"
 	" space map to continue allocations in a first-fit fashion");
+/* END CSTYLED */
 
 /*
  * Percentage of all cpus that can be used by the metaslab taskq.
  */
 extern int metaslab_load_pct;
-SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct, CTLFLAG_RWTUN,
-	&metaslab_load_pct, 0,
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct,
+	CTLFLAG_RWTUN, &metaslab_load_pct, 0,
 	"Percentage of cpus that can be used by the metaslab taskq");
+/* END CSTYLED */
 
 /*
  * Max number of metaslabs per group to preload.
  */
 extern int metaslab_preload_limit;
-SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, preload_limit, CTLFLAG_RWTUN,
-	&metaslab_preload_limit, 0,
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, preload_limit,
+	CTLFLAG_RWTUN, &metaslab_preload_limit, 0,
 	"Max number of metaslabs per group to preload");
+/* END CSTYLED */
+
+/* mmp.c */
+
+int
+param_set_multihost_interval(SYSCTL_HANDLER_ARGS)
+{
+	int err;
+
+	err = sysctl_handle_long(oidp, &zfs_multihost_interval, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	if (spa_mode_global != SPA_MODE_UNINIT)
+		mmp_signal_all_threads();
+
+	return (0);
+}
 
 /* spa.c */
+
 extern int zfs_ccw_retry_interval;
-SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RWTUN,
-	&zfs_ccw_retry_interval, 0, "Configuration cache file write,"
-	" retry after failure, interval (seconds)");
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval,
+	CTLFLAG_RWTUN, &zfs_ccw_retry_interval, 0,
+	"Configuration cache file write, retry after failure, interval"
+	" (seconds)");
+/* END CSTYLED */
 
 extern uint64_t zfs_max_missing_tvds_cachefile;
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile, CTLFLAG_RWTUN,
-	&zfs_max_missing_tvds_cachefile, 0,
-	"allow importing pools with missing top-level vdevs in cache file");
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile,
+	CTLFLAG_RWTUN, &zfs_max_missing_tvds_cachefile, 0,
+	"Allow importing pools with missing top-level vdevs in cache file");
+/* END CSTYLED */
 
 extern uint64_t zfs_max_missing_tvds_scan;
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan, CTLFLAG_RWTUN,
-	&zfs_max_missing_tvds_scan, 0,
-	"allow importing pools with missing top-level vdevs during scan");
+
+/* BEGIN CSTYLED */
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan,
+	CTLFLAG_RWTUN, &zfs_max_missing_tvds_scan, 0,
+	"Allow importing pools with missing top-level vdevs during scan");
 /* END CSTYLED */
 
 /* spa_misc.c */
+
 extern int zfs_flags;
+
 static int
 sysctl_vfs_zfs_debug_flags(SYSCTL_HANDLER_ARGS)
 {
@@ -566,14 +726,37 @@ param_set_deadman_failmode(SYSCTL_HANDLER_ARGS)
 	return (-param_set_deadman_failmode_common(buf));
 }
 
+int
+param_set_slop_shift(SYSCTL_HANDLER_ARGS)
+{
+	int val;
+	int err;
+
+	val = spa_slop_shift;
+	err = sysctl_handle_int(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	if (val < 1 || val > 31)
+		return (EINVAL);
+
+	spa_slop_shift = val;
+
+	return (0);
+}
 
 /* spacemap.c */
+
 extern int space_map_ibs;
+
+/* BEGIN CSTYLED */
 SYSCTL_INT(_vfs_zfs, OID_AUTO, space_map_ibs, CTLFLAG_RWTUN,
 	&space_map_ibs, 0, "Space map indirect block shift");
+/* END CSTYLED */
 
 
 /* vdev.c */
+
 int
 param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
 {
@@ -593,6 +776,14 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
 	return (0);
 }
 
+/* BEGIN CSTYLED */
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
+	CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	&zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift),
+	param_set_min_auto_ashift, "QU",
+	"Min ashift used when creating new top-level vdev. (LEGACY)");
+/* END CSTYLED */
+
 int
 param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
 {
@@ -613,26 +804,25 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
 }
 
 /* BEGIN CSTYLED */
-SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
-	CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
-	&zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift),
-	param_set_min_auto_ashift, "QU",
-	"Min ashift used when creating new top-level vdev. (LEGACY)");
 SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
 	CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
 	&zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift),
 	param_set_max_auto_ashift, "QU",
 	"Max ashift used when optimizing for logical -> physical sector size on"
 	" new top-level vdevs. (LEGACY)");
+/* END CSTYLED */
 
 /*
  * Since the DTL space map of a vdev is not expected to have a lot of
  * entries, we default its block size to 4K.
  */
 extern int zfs_vdev_dtl_sm_blksz;
-SYSCTL_INT(_vfs_zfs, OID_AUTO, dtl_sm_blksz, CTLFLAG_RDTUN,
-	&zfs_vdev_dtl_sm_blksz, 0,
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs, OID_AUTO, dtl_sm_blksz,
+	CTLFLAG_RDTUN, &zfs_vdev_dtl_sm_blksz, 0,
 	"Block size for DTL space map.  Power of 2 greater than 4096.");
+/* END CSTYLED */
 
 /*
  * vdev-wide space maps that have lots of entries written to them at
@@ -640,80 +830,48 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, dtl_sm_blksz, CTLFLAG_RDTUN,
  * (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
  */
 extern int zfs_vdev_standard_sm_blksz;
-SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz, CTLFLAG_RDTUN,
-	&zfs_vdev_standard_sm_blksz, 0,
+
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz,
+	CTLFLAG_RDTUN, &zfs_vdev_standard_sm_blksz, 0,
 	"Block size for standard space map.  Power of 2 greater than 4096.");
 /* END CSTYLED */
 
 extern int vdev_validate_skip;
-SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip, CTLFLAG_RDTUN,
-	&vdev_validate_skip, 0, "Enable to bypass vdev_validate().");
 
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip,
+	CTLFLAG_RDTUN, &vdev_validate_skip, 0,
+	"Enable to bypass vdev_validate().");
+/* END CSTYLED */
 
 /* vdev_cache.c */
 
 /* vdev_mirror.c */
-/*
- * The load configuration settings below are tuned by default for
- * the case where all devices are of the same rotational type.
- *
- * If there is a mixture of rotating and non-rotating media, setting
- * non_rotating_seek_inc to 0 may well provide better results as it
- * will direct more reads to the non-rotating vdevs which are more
- * likely to have a higher performance.
- */
-
 
 /* vdev_queue.c */
-/* BEGIN CSTYLED */
+
 extern uint32_t zfs_vdev_max_active;
-SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RWTUN,
-	&zfs_vdev_max_active, 0,
+
+/* BEGIN CSTYLED */
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight,
+	CTLFLAG_RWTUN, &zfs_vdev_max_active, 0,
 	"The maximum number of I/Os of all types active for each device."
 	" (LEGACY)");
-
-extern int zfs_vdev_def_queue_depth;
-SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, def_queue_depth, CTLFLAG_RWTUN,
-	&zfs_vdev_def_queue_depth, 0,
-	"Default queue depth for each allocator");
-
-
-SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata, CTLFLAG_RDTUN,
-	&zio_exclude_metadata, 0,
-	"Exclude metadata buffers from dumps as well");
 /* END CSTYLED */
 
-int
-param_set_slop_shift(SYSCTL_HANDLER_ARGS)
-{
-	int val;
-	int err;
+extern int zfs_vdev_def_queue_depth;
 
-	val = *(int *)arg1;
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, def_queue_depth,
+	CTLFLAG_RWTUN, &zfs_vdev_def_queue_depth, 0,
+	"Default queue depth for each allocator");
+/* END CSTYLED */
 
-	err = sysctl_handle_int(oidp, &val, 0, req);
-	if (err != 0 || req->newptr == NULL)
-		return (err);
+/* zio.c */
 
-	if (val < 1 || val > 31)
-		return (EINVAL);
-
-	*(int *)arg1 = val;
-
-	return (0);
-}
-
-int
-param_set_multihost_interval(SYSCTL_HANDLER_ARGS)
-{
-	int err;
-
-	err = sysctl_handle_long(oidp, arg1, 0, req);
-	if (err != 0 || req->newptr == NULL)
-		return (err);
-
-	if (spa_mode_global != SPA_MODE_UNINIT)
-		mmp_signal_all_threads();
-
-	return (0);
-}
+/* BEGIN CSTYLED */
+SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata,
+	CTLFLAG_RDTUN, &zio_exclude_metadata, 0,
+	"Exclude metadata buffers from dumps as well");
+/* END CSTYLED */

From ee9f3bca5574192589d7c7734fdc81b361aa77db Mon Sep 17 00:00:00 2001
From: Andriy Gapon <avg@FreeBSD.org>
Date: Fri, 2 Sep 2022 23:31:19 +0300
Subject: [PATCH 10/69] Add zfs.sync.snapshot_rename

Only the single snapshot rename is provided.
The recursive or more complex rename can be scripted.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Signed-off-by: Andriy Gapon <avg@FreeBSD.org>
Closes #13802
---
 include/sys/dsl_dataset.h                     | 11 +++++
 man/man8/zfs-program.8                        | 13 ++++++
 module/zfs/dsl_dataset.c                      | 12 +-----
 module/zfs/zcp_synctask.c                     | 37 +++++++++++++++++
 tests/zfs-tests/tests/Makefile.am             |  2 +
 .../synctask_core/tst.snapshot_rename.ksh     | 41 +++++++++++++++++++
 .../synctask_core/tst.snapshot_rename.zcp     | 27 ++++++++++++
 7 files changed, 133 insertions(+), 10 deletions(-)
 create mode 100755 tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.snapshot_rename.ksh
 create mode 100644 tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.snapshot_rename.zcp

diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h
index 81d25da831b..3450527af7e 100644
--- a/include/sys/dsl_dataset.h
+++ b/include/sys/dsl_dataset.h
@@ -301,6 +301,14 @@ typedef struct dsl_dataset_snapshot_arg {
 	proc_t *ddsa_proc;
 } dsl_dataset_snapshot_arg_t;
 
+typedef struct dsl_dataset_rename_snapshot_arg {
+	const char *ddrsa_fsname;
+	const char *ddrsa_oldsnapname;
+	const char *ddrsa_newsnapname;
+	boolean_t ddrsa_recursive;
+	dmu_tx_t *ddrsa_tx;
+} dsl_dataset_rename_snapshot_arg_t;
+
 /*
  * The max length of a temporary tag prefix is the number of hex digits
  * required to express UINT64_MAX plus one for the hyphen.
@@ -473,6 +481,9 @@ void dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx);
 int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
     nvlist_t *result);
 
+int dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx);
+void dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx);
+
 uint64_t dsl_dataset_get_remap_deadlist_object(dsl_dataset_t *ds);
 void dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx);
 boolean_t dsl_dataset_remap_deadlist_exists(dsl_dataset_t *ds);
diff --git a/man/man8/zfs-program.8 b/man/man8/zfs-program.8
index 06415b2190e..928620362be 100644
--- a/man/man8/zfs-program.8
+++ b/man/man8/zfs-program.8
@@ -424,6 +424,19 @@ To enable taking snapshots from ZCP scripts, the pool must be upgraded.
 .It Ar dataset Pq string
 Name of snapshot to create.
 .El
+.It Fn zfs.sync.rename_snapshot dataset oldsnapname newsnapname
+Rename a snapshot of a filesystem or a volume.
+Returns 0 if the snapshot was successfully renamed,
+and a nonzero error code otherwise.
+.Pp
+.Bl -tag -compact -width "newbookmark (string)"
+.It Ar dataset Pq string
+Name of the snapshot's parent dataset.
+.It Ar oldsnapname Pq string
+Original name of the snapshot.
+.It Ar newsnapname Pq string
+New name of the snapshot.
+.El
 .It Fn zfs.sync.bookmark source newbookmark
 Create a bookmark of an existing source snapshot or bookmark.
 Returns 0 if the new bookmark was successfully created,
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 8f3240a5deb..44da6a3f0d4 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -2915,14 +2915,6 @@ dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)
 	return (B_FALSE);
 }
 
-typedef struct dsl_dataset_rename_snapshot_arg {
-	const char *ddrsa_fsname;
-	const char *ddrsa_oldsnapname;
-	const char *ddrsa_newsnapname;
-	boolean_t ddrsa_recursive;
-	dmu_tx_t *ddrsa_tx;
-} dsl_dataset_rename_snapshot_arg_t;
-
 static int
 dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,
     dsl_dataset_t *hds, void *arg)
@@ -2953,7 +2945,7 @@ dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,
 	return (error);
 }
 
-static int
+int
 dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx)
 {
 	dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
@@ -3015,7 +3007,7 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
 	return (0);
 }
 
-static void
+void
 dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
diff --git a/module/zfs/zcp_synctask.c b/module/zfs/zcp_synctask.c
index 24210117eca..058910054d9 100644
--- a/module/zfs/zcp_synctask.c
+++ b/module/zfs/zcp_synctask.c
@@ -302,6 +302,42 @@ zcp_synctask_snapshot(lua_State *state, boolean_t sync, nvlist_t *err_details)
 	return (err);
 }
 
+static int zcp_synctask_rename_snapshot(lua_State *, boolean_t, nvlist_t *);
+static const zcp_synctask_info_t zcp_synctask_rename_snapshot_info = {
+	.name = "rename_snapshot",
+	.func = zcp_synctask_rename_snapshot,
+	.pargs = {
+	    {.za_name = "filesystem | volume", .za_lua_type = LUA_TSTRING },
+	    {.za_name = "oldsnapname", .za_lua_type = LUA_TSTRING },
+	    {.za_name = "newsnapname", .za_lua_type = LUA_TSTRING },
+	    {NULL, 0}
+	},
+	.space_check = ZFS_SPACE_CHECK_RESERVED,
+	.blocks_modified = 1
+};
+
+static int
+zcp_synctask_rename_snapshot(lua_State *state, boolean_t sync,
+    nvlist_t *err_details)
+{
+	(void) err_details;
+	int err;
+	const char *fsname = lua_tostring(state, 1);
+	const char *oldsnapname = lua_tostring(state, 2);
+	const char *newsnapname = lua_tostring(state, 3);
+
+	struct dsl_dataset_rename_snapshot_arg ddrsa = { 0 };
+	ddrsa.ddrsa_fsname = fsname;
+	ddrsa.ddrsa_oldsnapname = oldsnapname;
+	ddrsa.ddrsa_newsnapname = newsnapname;
+	ddrsa.ddrsa_recursive = B_FALSE;
+
+	err = zcp_sync_task(state, dsl_dataset_rename_snapshot_check,
+	    dsl_dataset_rename_snapshot_sync, &ddrsa, sync, NULL);
+
+	return (err);
+}
+
 static int zcp_synctask_inherit_prop(lua_State *, boolean_t,
     nvlist_t *err_details);
 static const zcp_synctask_info_t zcp_synctask_inherit_prop_info = {
@@ -529,6 +565,7 @@ zcp_load_synctask_lib(lua_State *state, boolean_t sync)
 		&zcp_synctask_promote_info,
 		&zcp_synctask_rollback_info,
 		&zcp_synctask_snapshot_info,
+		&zcp_synctask_rename_snapshot_info,
 		&zcp_synctask_inherit_prop_info,
 		&zcp_synctask_bookmark_info,
 		&zcp_synctask_set_prop_info,
diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am
index b80489af255..89b2ca866c2 100644
--- a/tests/zfs-tests/tests/Makefile.am
+++ b/tests/zfs-tests/tests/Makefile.am
@@ -129,6 +129,7 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
 	functional/channel_program/synctask_core/tst.snapshot_destroy.zcp \
 	functional/channel_program/synctask_core/tst.snapshot_neg.zcp \
 	functional/channel_program/synctask_core/tst.snapshot_recursive.zcp \
+	functional/channel_program/synctask_core/tst.snapshot_rename.zcp \
 	functional/channel_program/synctask_core/tst.snapshot_simple.zcp \
 	functional/checksum/default.cfg \
 	functional/clean_mirror/clean_mirror_common.kshlib \
@@ -536,6 +537,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/channel_program/synctask_core/tst.snapshot_destroy.ksh \
 	functional/channel_program/synctask_core/tst.snapshot_neg.ksh \
 	functional/channel_program/synctask_core/tst.snapshot_recursive.ksh \
+	functional/channel_program/synctask_core/tst.snapshot_rename.ksh \
 	functional/channel_program/synctask_core/tst.snapshot_simple.ksh \
 	functional/channel_program/synctask_core/tst.terminate_by_signal.ksh \
 	functional/chattr/chattr_001_pos.ksh \
diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.snapshot_rename.ksh b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.snapshot_rename.ksh
new file mode 100755
index 00000000000..0561e4b7c63
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.snapshot_rename.ksh
@@ -0,0 +1,41 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2022 by Andriy Gapon. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/channel_program/channel_common.kshlib
+
+#
+# DESCRIPTION: Make sure basic snapshot functionality works in channel programs
+#
+
+verify_runnable "global"
+
+fs=$TESTPOOL/$TESTFS/testchild
+snapname1=testsnap1
+snapname2=testsnap2
+
+function cleanup
+{
+	destroy_dataset $fs "-R"
+}
+
+log_onexit cleanup
+
+log_must zfs create $fs
+
+log_must_program_sync $TESTPOOL \
+    $ZCP_ROOT/synctask_core/tst.snapshot_rename.zcp $fs $snapname1 $snapname2
+
+log_pass "Snapshot renaming works"
diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.snapshot_rename.zcp b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.snapshot_rename.zcp
new file mode 100644
index 00000000000..ef893d1551d
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.snapshot_rename.zcp
@@ -0,0 +1,27 @@
+--
+-- This file and its contents are supplied under the terms of the
+-- Common Development and Distribution License ("CDDL"), version 1.0.
+-- You may only use this file in accordance with the terms of version
+-- 1.0 of the CDDL.
+--
+-- A full copy of the text of the CDDL should have accompanied this
+-- source.  A copy of the CDDL is also available via the Internet at
+-- http://www.illumos.org/license/CDDL.
+--
+
+--
+-- Copyright (c) 2022 by Andriy Gapon. All rights reserved.
+--
+
+-- This program should be invoked as "zfs program <pool> <prog> <fs> <snap>"
+
+args = ...
+argv = args["argv"]
+assert(zfs.sync.snapshot(argv[1] .. "@" .. argv[2]) == 0)
+assert(zfs.sync.rename_snapshot(argv[1], argv[2], argv[3]) == 0)
+snaps = {}
+for s in zfs.list.snapshots(argv[1]) do
+	table.insert(snaps, s)
+end
+assert(#snaps == 1)
+assert(snaps[1] == (argv[1] .. "@" .. argv[3]))

From 59767479acb6edb12335460c9e5f7cfd9a3823cc Mon Sep 17 00:00:00 2001
From: Umer Saleem <usaleem@ixsystems.com>
Date: Sat, 3 Sep 2022 01:33:50 +0500
Subject: [PATCH 11/69] Add DD_FIELD string for snapshots_changed property

This commit adds DD_FIELD string used in extensified dsl_dir zap object
for snapshots_changed property.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Umer Saleem <usaleem@ixsystems.com>
Closes #13819
---
 include/sys/dsl_dir.h | 1 +
 module/zfs/dsl_dir.c  | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h
index 664230f146a..384f98e8f72 100644
--- a/include/sys/dsl_dir.h
+++ b/include/sys/dsl_dir.h
@@ -52,6 +52,7 @@ struct zthr;
 #define	DD_FIELD_SNAPSHOT_COUNT		"com.joyent:snapshot_count"
 #define	DD_FIELD_CRYPTO_KEY_OBJ		"com.datto:crypto_key_obj"
 #define	DD_FIELD_LIVELIST		"com.delphix:livelist"
+#define	DD_FIELD_SNAPSHOTS_CHANGED	"com.ixsystems:snapshots_changed"
 
 typedef enum dd_used {
 	DD_USED_HEAD,
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 7460269384b..a4db3ee2f30 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -271,7 +271,7 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
 		if (dsl_dir_is_zapified(dd)) {
 			inode_timespec_t t = {0};
 			zap_lookup(dp->dp_meta_objset, ddobj,
-			    zfs_prop_to_name(ZFS_PROP_SNAPSHOTS_CHANGED),
+			    DD_FIELD_SNAPSHOTS_CHANGED,
 			    sizeof (uint64_t),
 			    sizeof (inode_timespec_t) / sizeof (uint64_t),
 			    &t);
@@ -2265,7 +2265,7 @@ dsl_dir_snap_cmtime_update(dsl_dir_t *dd, dmu_tx_t *tx)
 		uint64_t ddobj = dd->dd_object;
 		dsl_dir_zapify(dd, tx);
 		VERIFY0(zap_update(mos, ddobj,
-		    zfs_prop_to_name(ZFS_PROP_SNAPSHOTS_CHANGED),
+		    DD_FIELD_SNAPSHOTS_CHANGED,
 		    sizeof (uint64_t),
 		    sizeof (inode_timespec_t) / sizeof (uint64_t),
 		    &t, tx));

From 7c0e3941cdd4692d46c9171b791fa689f6bb1bfd Mon Sep 17 00:00:00 2001
From: Samuel <50765275+npc203@users.noreply.github.com>
Date: Tue, 6 Sep 2022 22:07:47 +0530
Subject: [PATCH 12/69] Fix column width in 'zpool iostat -v' and 'zpool list
 -v'

This commit fixes a minor spacing issue caused when
enumerating vdev names, which originated from #13031

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Akash B <akash-b@hpe.com>
Signed-off-by: Samuel Wycliffe <samuelwycliffe@gmail.com>
Closes #13811
---
 cmd/zpool/zpool_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index 13a51691fa7..b5b0beef532 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -5466,8 +5466,8 @@ get_namewidth_iostat(zpool_handle_t *zhp, void *data)
 	 * get_namewidth() returns the maximum width of any name in that column
 	 * for any pool/vdev/device line that will be output.
 	 */
-	width = get_namewidth(zhp, cb->cb_namewidth, cb->cb_vdevs.cb_name_flags,
-	    cb->cb_verbose);
+	width = get_namewidth(zhp, cb->cb_namewidth,
+	    cb->cb_vdevs.cb_name_flags | VDEV_NAME_TYPE_ID, cb->cb_verbose);
 
 	/*
 	 * The width we are calculating is the width of the header and also the
@@ -6298,8 +6298,8 @@ get_namewidth_list(zpool_handle_t *zhp, void *data)
 	list_cbdata_t *cb = data;
 	int width;
 
-	width = get_namewidth(zhp, cb->cb_namewidth, cb->cb_name_flags,
-	    cb->cb_verbose);
+	width = get_namewidth(zhp, cb->cb_namewidth,
+	    cb->cb_name_flags | VDEV_NAME_TYPE_ID, cb->cb_verbose);
 
 	if (width < 9)
 		width = 9;

From 11df48ab8ba374de944cd0483c55ddaaad46b91d Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Tue, 6 Sep 2022 12:43:21 -0400
Subject: [PATCH 13/69] Cleanup Raid-Z Typo fixes

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13834
---
 include/sys/vdev_raidz_impl.h     | 2 +-
 module/zfs/vdev_raidz_math_impl.h | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/sys/vdev_raidz_impl.h b/include/sys/vdev_raidz_impl.h
index 12f5eff22c6..c1037fa12e3 100644
--- a/include/sys/vdev_raidz_impl.h
+++ b/include/sys/vdev_raidz_impl.h
@@ -321,7 +321,7 @@ vdev_raidz_exp2(const uint8_t a, const unsigned exp)
  * Galois Field operations.
  *
  * gf_exp2	- computes 2 raised to the given power
- * gf_exp2	- computes 4 raised to the given power
+ * gf_exp4	- computes 4 raised to the given power
  * gf_mul	- multiplication
  * gf_div	- division
  * gf_inv	- multiplicative inverse
diff --git a/module/zfs/vdev_raidz_math_impl.h b/module/zfs/vdev_raidz_math_impl.h
index 2d96f602314..8ba7e0cd769 100644
--- a/module/zfs/vdev_raidz_math_impl.h
+++ b/module/zfs/vdev_raidz_math_impl.h
@@ -460,8 +460,8 @@ static void
 raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
     const size_t dsize)
 {
-	v_t *p = (v_t *)c[0];
-	v_t *q = (v_t *)c[1];
+	v_t *p = (v_t *)c[CODE_P];
+	v_t *q = (v_t *)c[CODE_Q];
 	v_t *r = (v_t *)c[CODE_R];
 	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
@@ -486,7 +486,7 @@ raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
 
 
 /*
- * Generate PQR parity (RAIDZ2)
+ * Generate PQR parity (RAIDZ3)
  *
  * @rr	RAIDZ row
  */

From 9d0887402ba505fa7f82ffeb1e22c34fb07c83ed Mon Sep 17 00:00:00 2001
From: Rob Wing <rew@FreeBSD.org>
Date: Sat, 13 Aug 2022 21:09:49 -0800
Subject: [PATCH 14/69] FreeBSD: add knlist_init_sx() for exclusive locks

This will be used to implement kqfilter support for zvol cdevs.

Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Wing <rew@FreeBSD.org>
Closes #13773
---
 include/os/freebsd/zfs/sys/freebsd_event.h | 37 ++++++++++++
 module/Makefile.bsd                        |  1 +
 module/os/freebsd/zfs/event_os.c           | 65 ++++++++++++++++++++++
 3 files changed, 103 insertions(+)
 create mode 100644 include/os/freebsd/zfs/sys/freebsd_event.h
 create mode 100644 module/os/freebsd/zfs/event_os.c

diff --git a/include/os/freebsd/zfs/sys/freebsd_event.h b/include/os/freebsd/zfs/sys/freebsd_event.h
new file mode 100644
index 00000000000..544ff8b0f81
--- /dev/null
+++ b/include/os/freebsd/zfs/sys/freebsd_event.h
@@ -0,0 +1,37 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2022 Rob Wing
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef	_ZFS_FREEBSD_EVENT_H
+#define	_ZFS_FREEBSD_EVENT_H
+
+#ifdef _KERNEL
+
+void   knlist_init_sx(struct knlist *knl, struct sx *lock);
+
+#endif /* !_KERNEL */
+
+#endif /* !_ZFS_FREEBSD_EVENT_H */
diff --git a/module/Makefile.bsd b/module/Makefile.bsd
index 589ca60b29b..050b6c21e5e 100644
--- a/module/Makefile.bsd
+++ b/module/Makefile.bsd
@@ -172,6 +172,7 @@ SRCS+=	abd_os.c \
 	arc_os.c \
 	crypto_os.c \
 	dmu_os.c \
+	event_os.c \
 	hkdf.c \
 	kmod_core.c \
 	spa_os.c \
diff --git a/module/os/freebsd/zfs/event_os.c b/module/os/freebsd/zfs/event_os.c
new file mode 100644
index 00000000000..97ac151e4fa
--- /dev/null
+++ b/module/os/freebsd/zfs/event_os.c
@@ -0,0 +1,65 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2022 Rob Wing
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/sx.h>
+#include <sys/event.h>
+
+#include <sys/freebsd_event.h>
+
+static void
+knlist_sx_xlock(void *arg)
+{
+
+	sx_xlock((struct sx *)arg);
+}
+
+static void
+knlist_sx_xunlock(void *arg)
+{
+
+	sx_xunlock((struct sx *)arg);
+}
+
+static void
+knlist_sx_assert_lock(void *arg, int what)
+{
+
+	if (what == LA_LOCKED)
+		sx_assert((struct sx *)arg, SX_LOCKED);
+	else
+		sx_assert((struct sx *)arg, SX_UNLOCKED);
+}
+
+void
+knlist_init_sx(struct knlist *knl, struct sx *lock)
+{
+
+	knlist_init(knl, lock, knlist_sx_xlock, knlist_sx_xunlock,
+	    knlist_sx_assert_lock);
+}

From 983096a1b46982a86d25fda2ccdf08079c3e51b9 Mon Sep 17 00:00:00 2001
From: Rob Wing <rew@FreeBSD.org>
Date: Tue, 1 Feb 2022 20:00:57 -0900
Subject: [PATCH 15/69] FreeBSD: add kqfilter support for zvol cdev

The only event hooked up is NOTE_ATTRIB, which is triggered when the
device is resized.

Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Wing <rew@FreeBSD.org>
Closes #13773
---
 module/os/freebsd/zfs/zvol_os.c | 64 +++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/module/os/freebsd/zfs/zvol_os.c b/module/os/freebsd/zfs/zvol_os.c
index ac030f75323..8d2a6d77624 100644
--- a/module/os/freebsd/zfs/zvol_os.c
+++ b/module/os/freebsd/zfs/zvol_os.c
@@ -92,6 +92,7 @@
 #include <sys/zio_checksum.h>
 #include <sys/zil_impl.h>
 #include <sys/filio.h>
+#include <sys/freebsd_event.h>
 
 #include <geom/geom.h>
 #include <sys/zvol.h>
@@ -123,6 +124,7 @@ struct zvol_state_os {
 		struct zvol_state_dev {
 			struct cdev *zsd_cdev;
 			uint64_t zsd_sync_cnt;
+			struct selinfo zsd_selinfo;
 		} _zso_dev;
 
 		/* volmode=geom */
@@ -167,6 +169,7 @@ static d_ioctl_t	zvol_cdev_ioctl;
 static d_read_t		zvol_cdev_read;
 static d_write_t	zvol_cdev_write;
 static d_strategy_t	zvol_geom_bio_strategy;
+static d_kqfilter_t	zvol_cdev_kqfilter;
 
 static struct cdevsw zvol_cdevsw = {
 	.d_name =	"zvol",
@@ -178,6 +181,16 @@ static struct cdevsw zvol_cdevsw = {
 	.d_read =	zvol_cdev_read,
 	.d_write =	zvol_cdev_write,
 	.d_strategy =	zvol_geom_bio_strategy,
+	.d_kqfilter =	zvol_cdev_kqfilter,
+};
+
+static void		zvol_filter_detach(struct knote *kn);
+static int		zvol_filter_vnode(struct knote *kn, long hint);
+
+static struct filterops zvol_filterops_vnode = {
+	.f_isfd = 1,
+	.f_detach = zvol_filter_detach,
+	.f_event = zvol_filter_vnode,
 };
 
 extern uint_t zfs_geom_probe_vdev_key;
@@ -601,6 +614,49 @@ zvol_geom_bio_getattr(struct bio *bp)
 	return (1);
 }
 
+static void
+zvol_filter_detach(struct knote *kn)
+{
+	zvol_state_t *zv;
+	struct zvol_state_dev *zsd;
+
+	zv = kn->kn_hook;
+	zsd = &zv->zv_zso->zso_dev;
+
+	knlist_remove(&zsd->zsd_selinfo.si_note, kn, 0);
+}
+
+static int
+zvol_filter_vnode(struct knote *kn, long hint)
+{
+	kn->kn_fflags |= kn->kn_sfflags & hint;
+
+	return (kn->kn_fflags != 0);
+}
+
+static int
+zvol_cdev_kqfilter(struct cdev *dev, struct knote *kn)
+{
+	zvol_state_t *zv;
+	struct zvol_state_dev *zsd;
+
+	zv = dev->si_drv2;
+	zsd = &zv->zv_zso->zso_dev;
+
+	if (kn->kn_filter != EVFILT_VNODE)
+		return (EINVAL);
+
+	/* XXX: extend support for other NOTE_* events */
+	if (kn->kn_sfflags != NOTE_ATTRIB)
+		return (EINVAL);
+
+	kn->kn_fop = &zvol_filterops_vnode;
+	kn->kn_hook = zv;
+	knlist_add(&zsd->zsd_selinfo.si_note, kn, 0);
+
+	return (0);
+}
+
 static void
 zvol_geom_bio_strategy(struct bio *bp)
 {
@@ -1306,6 +1362,8 @@ zvol_os_free(zvol_state_t *zv)
 		if (dev != NULL) {
 			ASSERT3P(dev->si_drv2, ==, NULL);
 			destroy_dev(dev);
+			knlist_clear(&zsd->zsd_selinfo.si_note, 0);
+			knlist_destroy(&zsd->zsd_selinfo.si_note);
 		}
 	}
 
@@ -1409,6 +1467,8 @@ zvol_os_create_minor(const char *name)
 			dev->si_iosize_max = MAXPHYS;
 #endif
 			zsd->zsd_cdev = dev;
+			knlist_init_sx(&zsd->zsd_selinfo.si_note,
+			    &zv->zv_state_lock);
 		}
 	}
 	(void) strlcpy(zv->zv_name, name, MAXPATHLEN);
@@ -1515,6 +1575,10 @@ zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize)
 			g_resize_provider(pp, zv->zv_volsize);
 
 		g_topology_unlock();
+	} else if (zv->zv_volmode == ZFS_VOLMODE_DEV) {
+		struct zvol_state_dev *zsd = &zv->zv_zso->zso_dev;
+
+		KNOTE_UNLOCKED(&zsd->zsd_selinfo.si_note, NOTE_ATTRIB);
 	}
 	return (0);
 }

From 238cd4b863ba5c1e1d56215d9bbd77be466f7845 Mon Sep 17 00:00:00 2001
From: Walter Huf <hufman@gmail.com>
Date: Tue, 6 Sep 2022 10:02:18 -0700
Subject: [PATCH 16/69] Add xattr_handler support for Android kernels

Some ARM BSPs run the Android kernel, which has
a modified xattr_handler->get() function signature.
This adds support to compile against these kernels.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Walter Huf <hufman@gmail.com>
Closes #13824
---
 config/kernel-xattr-handler.m4               | 29 +++++++++++++++++++-
 include/os/linux/kernel/linux/xattr_compat.h | 14 ++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/config/kernel-xattr-handler.m4 b/config/kernel-xattr-handler.m4
index 00b1e74a9cc..b6cbfa15500 100644
--- a/config/kernel-xattr-handler.m4
+++ b/config/kernel-xattr-handler.m4
@@ -100,6 +100,19 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_GET], [
 			.get = get,
 		};
 	],[])
+
+	ZFS_LINUX_TEST_SRC([xattr_handler_get_dentry_inode_flags], [
+		#include <linux/xattr.h>
+
+		int get(const struct xattr_handler *handler,
+		    struct dentry *dentry, struct inode *inode,
+		    const char *name, void *buffer,
+		    size_t size, int flags) { return 0; }
+		static const struct xattr_handler
+		    xops __attribute__ ((unused)) = {
+			.get = get,
+		};
+	],[])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [
@@ -142,7 +155,21 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [
 				AC_DEFINE(HAVE_XATTR_GET_DENTRY, 1,
 				    [xattr_handler->get() wants dentry])
 			],[
-				ZFS_LINUX_TEST_ERROR([xattr get()])
+				dnl #
+				dnl # Android API change,
+				dnl # The xattr_handler->get() callback was
+				dnl # changed to take dentry, inode and flags.
+				dnl #
+				AC_MSG_RESULT(no)
+				AC_MSG_CHECKING(
+				    [whether xattr_handler->get() wants dentry and inode and flags])
+				ZFS_LINUX_TEST_RESULT([xattr_handler_get_dentry_inode_flags], [
+					AC_MSG_RESULT(yes)
+					AC_DEFINE(HAVE_XATTR_GET_DENTRY_INODE_FLAGS, 1,
+					    [xattr_handler->get() wants dentry and inode and flags])
+				],[
+					ZFS_LINUX_TEST_ERROR([xattr get()])
+				])
 			])
 		])
 	])
diff --git a/include/os/linux/kernel/linux/xattr_compat.h b/include/os/linux/kernel/linux/xattr_compat.h
index 21c88dd0771..9b83813db70 100644
--- a/include/os/linux/kernel/linux/xattr_compat.h
+++ b/include/os/linux/kernel/linux/xattr_compat.h
@@ -115,6 +115,20 @@ fn(struct dentry *dentry, const char *name, void *buffer, size_t size,	\
 {									\
 	return (__ ## fn(dentry->d_inode, name, buffer, size));		\
 }
+/*
+ * Android API change,
+ * The xattr_handler->get() callback was changed to take a dentry and inode
+ * and flags, because the dentry might not be attached to an inode yet.
+ */
+#elif defined(HAVE_XATTR_GET_DENTRY_INODE_FLAGS)
+#define	ZPL_XATTR_GET_WRAPPER(fn)					\
+static int								\
+fn(const struct xattr_handler *handler, struct dentry *dentry,		\
+    struct inode *inode, const char *name, void *buffer,		\
+    size_t size, int flags)						\
+{									\
+	return (__ ## fn(inode, name, buffer, size));			\
+}
 #else
 #error "Unsupported kernel"
 #endif

From 5724073517d41cf0a3cc8cc0992274a8dab601da Mon Sep 17 00:00:00 2001
From: Christian Schwarz <me@cschwarz.com>
Date: Thu, 8 Sep 2022 02:04:15 +0200
Subject: [PATCH 17/69] make DMU_OT_IS_METADATA and DMU_OT_IS_ENCRYPTED return
 B_TRUE or B_FALSE

Without this patch, the

    ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf));

at the beginning of dbuf_assign_arcbuf can panic
if the object type is a DMU_OT_NEWTYPE that has
DMU_OT_METADATA set.

While we're at it, fix DMU_OT_IS_ENCRYPTED as well.

Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Christian Schwarz <christian.schwarz@nutanix.com>
Closes #13842
---
 include/sys/dmu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index 5a3d7d6a505..0a4827e5ec3 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -136,7 +136,7 @@ typedef enum dmu_object_byteswap {
 #endif
 
 #define	DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
-	((ot) & DMU_OT_METADATA) : \
+	(((ot) & DMU_OT_METADATA) != 0) : \
 	DMU_OT_IS_METADATA_IMPL(ot))
 
 #define	DMU_OT_IS_DDT(ot) \
@@ -147,7 +147,7 @@ typedef enum dmu_object_byteswap {
 	((ot) == DMU_OT_PLAIN_FILE_CONTENTS || (ot) == DMU_OT_UINT64_OTHER)
 
 #define	DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
-	((ot) & DMU_OT_ENCRYPTED) : \
+	(((ot) & DMU_OT_ENCRYPTED) != 0) : \
 	DMU_OT_IS_ENCRYPTED_IMPL(ot))
 
 /*

From dff541f698d616ed9f9b1ad3afa44e450efdad7a Mon Sep 17 00:00:00 2001
From: pkubaj <pkubaj@anongoth.pl>
Date: Thu, 8 Sep 2022 17:27:25 +0000
Subject: [PATCH 18/69] Fix build on FreeBSD/powerpc64*

There's no VSX handler on FreeBSD for now.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Piotr Kubaj <pkubaj@FreeBSD.org>
Closes #13848
---
 module/icp/algs/blake3/blake3_x86-64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/module/icp/algs/blake3/blake3_x86-64.c b/module/icp/algs/blake3/blake3_x86-64.c
index a7552bdde4a..aecd29edb16 100644
--- a/module/icp/algs/blake3/blake3_x86-64.c
+++ b/module/icp/algs/blake3/blake3_x86-64.c
@@ -74,7 +74,7 @@ static boolean_t blake3_is_sse2_supported(void)
 {
 #if defined(__x86_64)
 	return (kfpu_allowed() && zfs_sse2_available());
-#elif defined(__PPC64__)
+#elif defined(__PPC64__) && defined(__linux__)
 	return (kfpu_allowed() && zfs_vsx_available());
 #else
 	return (kfpu_allowed());
@@ -140,7 +140,7 @@ static boolean_t blake3_is_sse41_supported(void)
 {
 #if defined(__x86_64)
 	return (kfpu_allowed() && zfs_sse4_1_available());
-#elif defined(__PPC64__)
+#elif defined(__PPC64__) && defined(__linux__)
 	return (kfpu_allowed() && zfs_vsx_available());
 #else
 	return (kfpu_allowed());

From 380b08098edf152b1d98e4f48b9577ce44d39166 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Thu, 8 Sep 2022 13:28:20 -0400
Subject: [PATCH 19/69] Linux SPL module init: Handle memory allocation
 failures correctly

Upon inspection of our code, I noticed that we assume that
__alloc_percpu() cannot fail, and while it probably never has failed in
practice, technically, it can fail, so we should handle that.

Additionally, we incorrectly assume that `taskq_create()` in
spl_kmem_cache_init() cannot fail. The same remark applies to it.

Lastly, `spl-init()` failures should always return negative error
values, but in some places, we are returning positive 1, which is
incorrect. We change those values to their correct error codes.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13847
---
 module/os/linux/spl/spl-generic.c    | 12 ++++++++++--
 module/os/linux/spl/spl-kmem-cache.c |  3 +++
 module/os/linux/spl/spl-taskq.c      |  6 +++---
 module/os/linux/spl/spl-tsd.c        |  2 +-
 module/os/linux/spl/spl-zlib.c       |  2 +-
 5 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c
index 5179100d166..de91c44257a 100644
--- a/module/os/linux/spl/spl-generic.c
+++ b/module/os/linux/spl/spl-generic.c
@@ -705,7 +705,7 @@ spl_kvmem_init(void)
  * initialize each of the per-cpu seeds so that the sequences generated on each
  * CPU are guaranteed to never overlap in practice.
  */
-static void __init
+static int __init
 spl_random_init(void)
 {
 	uint64_t s[2];
@@ -714,6 +714,9 @@ spl_random_init(void)
 	spl_pseudo_entropy = __alloc_percpu(2 * sizeof (uint64_t),
 	    sizeof (uint64_t));
 
+	if (!spl_pseudo_entropy)
+		return (-ENOMEM);
+
 	get_random_bytes(s, sizeof (s));
 
 	if (s[0] == 0 && s[1] == 0) {
@@ -737,6 +740,8 @@ spl_random_init(void)
 		wordp[0] = s[0];
 		wordp[1] = s[1];
 	}
+
+	return (0);
 }
 
 static void
@@ -757,7 +762,8 @@ spl_init(void)
 {
 	int rc = 0;
 
-	spl_random_init();
+	if ((rc = spl_random_init()))
+		goto out0;
 
 	if ((rc = spl_kvmem_init()))
 		goto out1;
@@ -800,6 +806,8 @@ spl_init(void)
 out2:
 	spl_kvmem_fini();
 out1:
+	spl_random_fini();
+out0:
 	return (rc);
 }
 
diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c
index ba4ca49a2ac..efb8d0c3033 100644
--- a/module/os/linux/spl/spl-kmem-cache.c
+++ b/module/os/linux/spl/spl-kmem-cache.c
@@ -1452,6 +1452,9 @@ spl_kmem_cache_init(void)
 	    spl_kmem_cache_kmem_threads * 8, INT_MAX,
 	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 
+	if (spl_kmem_cache_taskq == NULL)
+		return (-ENOMEM);
+
 	return (0);
 }
 
diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c
index 0aab148975a..3b0c29606c2 100644
--- a/module/os/linux/spl/spl-taskq.c
+++ b/module/os/linux/spl/spl-taskq.c
@@ -1379,7 +1379,7 @@ spl_taskq_init(void)
 	system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64),
 	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
 	if (system_taskq == NULL)
-		return (1);
+		return (-ENOMEM);
 
 	system_delay_taskq = taskq_create("spl_delay_taskq", MAX(boot_ncpus, 4),
 	    maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
@@ -1388,7 +1388,7 @@ spl_taskq_init(void)
 		cpuhp_remove_multi_state(spl_taskq_cpuhp_state);
 #endif
 		taskq_destroy(system_taskq);
-		return (1);
+		return (-ENOMEM);
 	}
 
 	dynamic_taskq = taskq_create("spl_dynamic_taskq", 1,
@@ -1399,7 +1399,7 @@ spl_taskq_init(void)
 #endif
 		taskq_destroy(system_taskq);
 		taskq_destroy(system_delay_taskq);
-		return (1);
+		return (-ENOMEM);
 	}
 
 	/*
diff --git a/module/os/linux/spl/spl-tsd.c b/module/os/linux/spl/spl-tsd.c
index 546db9ab8bd..389c9d0d6df 100644
--- a/module/os/linux/spl/spl-tsd.c
+++ b/module/os/linux/spl/spl-tsd.c
@@ -706,7 +706,7 @@ spl_tsd_init(void)
 {
 	tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
 	if (tsd_hash_table == NULL)
-		return (1);
+		return (-ENOMEM);
 
 	return (0);
 }
diff --git a/module/os/linux/spl/spl-zlib.c b/module/os/linux/spl/spl-zlib.c
index 589496da0c7..8c6282ee5d1 100644
--- a/module/os/linux/spl/spl-zlib.c
+++ b/module/os/linux/spl/spl-zlib.c
@@ -204,7 +204,7 @@ spl_zlib_init(void)
 	    size, 0, NULL, NULL, NULL, NULL, NULL,
 	    KMC_KVMEM);
 	if (!zlib_workspace_cache)
-		return (1);
+		return (-ENOMEM);
 
 	return (0);
 }

From 320f0c6022e1c9bdc9063f849c6b2e4fa3b93995 Mon Sep 17 00:00:00 2001
From: Finix1979 <yanchongwen@hotmail.com>
Date: Fri, 9 Sep 2022 01:29:41 +0800
Subject: [PATCH 20/69] Add Linux posix_fadvise support

The purpose of this PR is to accepts fadvise ioctl from userland
to do read-ahead by demand.

It could dramatically improve sequential read performance especially
when primarycache is set to metadata or zfs_prefetch_disable is 1.

If the file is mmaped, generic_fadvise is also called for page cache
read-ahead besides dmu_prefetch.

Only POSIX_FADV_WILLNEED and POSIX_FADV_SEQUENTIAL are supported in
this PR currently.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Finix Yan <yancw@info2soft.com>
Closes #13694
---
 config/kernel-fadvise.m4                      | 23 +++++
 config/kernel-generic_fadvise.m4              | 27 ++++++
 config/kernel.m4                              |  4 +
 module/os/linux/zfs/zpl_file.c                | 62 ++++++++++++
 tests/runfiles/linux.run                      |  4 +
 tests/zfs-tests/cmd/.gitignore                |  1 +
 tests/zfs-tests/cmd/Makefile.am               |  3 +
 tests/zfs-tests/cmd/file/file_fadvise.c       | 97 +++++++++++++++++++
 tests/zfs-tests/include/commands.cfg          |  1 +
 tests/zfs-tests/tests/Makefile.am             |  3 +
 .../functional/checksum/filetest_002_pos.ksh  |  2 +-
 .../tests/functional/fadvise/cleanup.ksh      | 28 ++++++
 .../functional/fadvise/fadvise_sequential.ksh | 80 +++++++++++++++
 .../tests/functional/fadvise/setup.ksh        | 30 ++++++
 .../functional/fault/auto_spare_002_pos.ksh   |  2 +-
 15 files changed, 365 insertions(+), 2 deletions(-)
 create mode 100644 config/kernel-fadvise.m4
 create mode 100644 config/kernel-generic_fadvise.m4
 create mode 100644 tests/zfs-tests/cmd/file/file_fadvise.c
 create mode 100755 tests/zfs-tests/tests/functional/fadvise/cleanup.ksh
 create mode 100755 tests/zfs-tests/tests/functional/fadvise/fadvise_sequential.ksh
 create mode 100755 tests/zfs-tests/tests/functional/fadvise/setup.ksh

diff --git a/config/kernel-fadvise.m4 b/config/kernel-fadvise.m4
new file mode 100644
index 00000000000..08912de16ed
--- /dev/null
+++ b/config/kernel-fadvise.m4
@@ -0,0 +1,23 @@
+dnl #
+dnl # Linux 4.19 API
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_FADVISE], [
+	ZFS_LINUX_TEST_SRC([file_fadvise], [
+		#include <linux/fs.h>
+
+		static const struct file_operations
+		    fops __attribute__ ((unused)) = {
+			.fadvise = NULL,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_FADVISE], [
+	AC_MSG_CHECKING([whether fops->fadvise() exists])
+	ZFS_LINUX_TEST_RESULT([file_fadvise], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_FILE_FADVISE, 1, [fops->fadvise() exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
diff --git a/config/kernel-generic_fadvise.m4 b/config/kernel-generic_fadvise.m4
new file mode 100644
index 00000000000..8d122064b22
--- /dev/null
+++ b/config/kernel-generic_fadvise.m4
@@ -0,0 +1,27 @@
+dnl #
+dnl # 5.3 API change
+dnl # The generic_fadvise() function is present since 4.19 kernel
+dnl # but it was not exported until Linux 5.3.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FADVISE], [
+	ZFS_LINUX_TEST_SRC([generic_fadvise], [
+		#include <linux/fs.h>
+	], [
+		struct file *fp __attribute__ ((unused)) = NULL;
+		loff_t offset __attribute__ ((unused)) = 0;
+		loff_t len __attribute__ ((unused)) = 0;
+		int advise __attribute__ ((unused)) = 0;
+		generic_fadvise(fp, offset, len, advise);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FADVISE], [
+	AC_MSG_CHECKING([whether generic_fadvise() is available])
+	ZFS_LINUX_TEST_RESULT_SYMBOL([generic_fadvise],
+	[generic_fadvise], [mm/fadvise.c], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_GENERIC_FADVISE, 1, [yes])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 1f274cbe4f3..6aad2cf88e0 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -42,6 +42,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE
 	ZFS_AC_KERNEL_SRC_PDE_DATA
 	ZFS_AC_KERNEL_SRC_FALLOCATE
+	ZFS_AC_KERNEL_SRC_FADVISE
+	ZFS_AC_KERNEL_SRC_GENERIC_FADVISE
 	ZFS_AC_KERNEL_SRC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
 	ZFS_AC_KERNEL_SRC_RWSEM
 	ZFS_AC_KERNEL_SRC_SCHED
@@ -161,6 +163,8 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_OBJTOOL
 	ZFS_AC_KERNEL_PDE_DATA
 	ZFS_AC_KERNEL_FALLOCATE
+	ZFS_AC_KERNEL_FADVISE
+	ZFS_AC_KERNEL_GENERIC_FADVISE
 	ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
 	ZFS_AC_KERNEL_RWSEM
 	ZFS_AC_KERNEL_SCHED
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index 43b7fb60a99..b0d9f37a3ec 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -27,6 +27,7 @@
 #ifdef CONFIG_COMPAT
 #include <linux/compat.h>
 #endif
+#include <linux/fs.h>
 #include <sys/file.h>
 #include <sys/dmu_objset.h>
 #include <sys/zfs_znode.h>
@@ -37,6 +38,9 @@
     defined(HAVE_VFS_FILEMAP_DIRTY_FOLIO)
 #include <linux/pagemap.h>
 #endif
+#ifdef HAVE_FILE_FADVISE
+#include <linux/fadvise.h>
+#endif
 #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
 #include <linux/writeback.h>
 #endif
@@ -906,6 +910,61 @@ zpl_ioctl_getversion(struct file *filp, void __user *arg)
 	return (copy_to_user(arg, &generation, sizeof (generation)));
 }
 
+#ifdef HAVE_FILE_FADVISE
+static int
+zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice)
+{
+	struct inode *ip = file_inode(filp);
+	znode_t *zp = ITOZ(ip);
+	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	objset_t *os = zfsvfs->z_os;
+	int error = 0;
+
+	if (S_ISFIFO(ip->i_mode))
+		return (-ESPIPE);
+
+	if (offset < 0 || len < 0)
+		return (-EINVAL);
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	switch (advice) {
+	case POSIX_FADV_SEQUENTIAL:
+	case POSIX_FADV_WILLNEED:
+#ifdef HAVE_GENERIC_FADVISE
+		if (zn_has_cached_data(zp))
+			error = generic_fadvise(filp, offset, len, advice);
+#endif
+		/*
+		 * Pass on the caller's size directly, but note that
+		 * dmu_prefetch_max will effectively cap it.  If there
+		 * really is a larger sequential access pattern, perhaps
+		 * dmu_zfetch will detect it.
+		 */
+		if (len == 0)
+			len = i_size_read(ip) - offset;
+
+		dmu_prefetch(os, zp->z_id, 0, offset, len,
+		    ZIO_PRIORITY_ASYNC_READ);
+		break;
+	case POSIX_FADV_NORMAL:
+	case POSIX_FADV_RANDOM:
+	case POSIX_FADV_DONTNEED:
+	case POSIX_FADV_NOREUSE:
+		/* ignored for now */
+		break;
+	default:
+		error = -EINVAL;
+		break;
+	}
+
+	ZFS_EXIT(zfsvfs);
+
+	return (error);
+}
+#endif /* HAVE_FILE_FADVISE */
+
 #define	ZFS_FL_USER_VISIBLE	(FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL)
 #define	ZFS_FL_USER_MODIFIABLE	(FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL)
 
@@ -1259,6 +1318,9 @@ const struct file_operations zpl_file_operations = {
 	.aio_fsync	= zpl_aio_fsync,
 #endif
 	.fallocate	= zpl_fallocate,
+#ifdef HAVE_FILE_FADVISE
+	.fadvise	= zpl_fadvise,
+#endif
 	.unlocked_ioctl	= zpl_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= zpl_compat_ioctl,
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 9b32e73afb1..09dfb5eb1e1 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -89,6 +89,10 @@ tags = ['functional', 'devices']
 tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill']
 tags = ['functional', 'events']
 
+[tests/functional/fadvise:Linux]
+tests = ['fadvise_sequential']
+tags = ['functional', 'fadvise']
+
 [tests/functional/fallocate:Linux]
 tests = ['fallocate_prealloc', 'fallocate_zero-range']
 tags = ['functional', 'fallocate']
diff --git a/tests/zfs-tests/cmd/.gitignore b/tests/zfs-tests/cmd/.gitignore
index 20d1382532b..1fd54c1dd51 100644
--- a/tests/zfs-tests/cmd/.gitignore
+++ b/tests/zfs-tests/cmd/.gitignore
@@ -4,6 +4,7 @@
 /devname2devid
 /dir_rd_update
 /draid
+/file_fadvise
 /file_append
 /file_check
 /file_trunc
diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am
index 3c8faf5afbb..c19c870cf69 100644
--- a/tests/zfs-tests/cmd/Makefile.am
+++ b/tests/zfs-tests/cmd/Makefile.am
@@ -128,4 +128,7 @@ scripts_zfs_tests_bin_PROGRAMS  += %D%/read_dos_attributes %D%/write_dos_attribu
 
 scripts_zfs_tests_bin_PROGRAMS += %D%/randfree_file
 %C%_randfree_file_SOURCES       = %D%/file/randfree_file.c
+
+scripts_zfs_tests_bin_PROGRAMS += %D%/file_fadvise
+%C%_file_fadvise_SOURCES  = %D%/file/file_fadvise.c
 endif
diff --git a/tests/zfs-tests/cmd/file/file_fadvise.c b/tests/zfs-tests/cmd/file/file_fadvise.c
new file mode 100644
index 00000000000..e1afb6d0a11
--- /dev/null
+++ b/tests/zfs-tests/cmd/file/file_fadvise.c
@@ -0,0 +1,97 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2022 by Information2 Software, Inc. All rights reserved.
+ */
+
+#include "file_common.h"
+#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+
+/*
+ * Call fadvise to prefetch data
+ */
+static const char *execname = "file_fadvise";
+
+static void
+usage(void)
+{
+	(void) fprintf(stderr,
+	    "usage: %s -f filename -a advise \n", execname);
+}
+
+int
+main(int argc, char *argv[])
+{
+	char *filename = NULL;
+	int advise = 0;
+	int fd, ch;
+	int	err = 0;
+
+	while ((ch = getopt(argc, argv, "a:f:")) != EOF) {
+		switch (ch) {
+		case 'a':
+			advise = atoll(optarg);
+			break;
+		case 'f':
+			filename = optarg;
+			break;
+		case '?':
+			(void) printf("unknown arg %c\n", optopt);
+			usage();
+			break;
+		}
+	}
+
+	if (!filename) {
+		(void) printf("Filename not specified (-f <file>)\n");
+		err++;
+	}
+
+	if (advise < POSIX_FADV_NORMAL || advise > POSIX_FADV_NOREUSE) {
+		(void) printf("advise is invalid\n");
+		err++;
+	}
+
+	if (err) {
+		usage(); /* no return */
+		return (1);
+	}
+
+	if ((fd = open(filename, O_RDWR, 0666)) < 0) {
+		perror("open");
+		return (1);
+	}
+
+	posix_fadvise(fd, 0, 0, advise);
+
+	close(fd);
+
+	return (0);
+}
diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg
index 4098562210b..c05b918325b 100644
--- a/tests/zfs-tests/include/commands.cfg
+++ b/tests/zfs-tests/include/commands.cfg
@@ -184,6 +184,7 @@ export ZFSTEST_FILES='badsend
     devname2devid
     dir_rd_update
     draid
+    file_fadvise
     file_append
     file_check
     file_trunc
diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am
index 89b2ca866c2..d53316643bc 100644
--- a/tests/zfs-tests/tests/Makefile.am
+++ b/tests/zfs-tests/tests/Makefile.am
@@ -1370,6 +1370,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/exec/exec_001_pos.ksh \
 	functional/exec/exec_002_neg.ksh \
 	functional/exec/setup.ksh \
+	functional/fadvise/cleanup.ksh \
+	functional/fadvise/fadvise_sequential.ksh \
+	functional/fadvise/setup.ksh \
 	functional/fallocate/cleanup.ksh \
 	functional/fallocate/fallocate_prealloc.ksh \
 	functional/fallocate/fallocate_punch-hole.ksh \
diff --git a/tests/zfs-tests/tests/functional/checksum/filetest_002_pos.ksh b/tests/zfs-tests/tests/functional/checksum/filetest_002_pos.ksh
index a0be1c2050b..23e7aa57748 100755
--- a/tests/zfs-tests/tests/functional/checksum/filetest_002_pos.ksh
+++ b/tests/zfs-tests/tests/functional/checksum/filetest_002_pos.ksh
@@ -76,7 +76,7 @@ while [[ $j -lt ${#CHECKSUM_TYPES[*]} ]]; do
 	log_must zpool export $TESTPOOL
 	log_must zpool import $TESTPOOL
 
-	log_mustnot eval "cat $TESTDIR/test_$type >/dev/null"
+	log_mustnot eval "dd if=$TESTDIR/test_$type of=/dev/null bs=$WRITESZ count=$NWRITES"
 
 	cksum=$(zpool status -P -v $TESTPOOL | grep "$firstvdev" | \
 	    awk '{print $5}')
diff --git a/tests/zfs-tests/tests/functional/fadvise/cleanup.ksh b/tests/zfs-tests/tests/functional/fadvise/cleanup.ksh
new file mode 100755
index 00000000000..8b5b43a74c1
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/fadvise/cleanup.ksh
@@ -0,0 +1,28 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+#
+# Portions Copyright (c) 2022 Information2 Software, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup
diff --git a/tests/zfs-tests/tests/functional/fadvise/fadvise_sequential.ksh b/tests/zfs-tests/tests/functional/fadvise/fadvise_sequential.ksh
new file mode 100755
index 00000000000..7b7d1d379ac
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/fadvise/fadvise_sequential.ksh
@@ -0,0 +1,80 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Portions Copyright (c) 2022 Information2 Software, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/math.shlib
+
+#
+# DESCRIPTION:
+# Test posix_fadvise.
+#
+# STRATEGY:
+# 1. Set primarycache to metadata in order to disable prefetch
+# 2. Write some data to file 
+# 3. get data_size field from arcstat
+# 4. call file_fadvise with POSIX_FADV_SEQUENTIAL
+# 5. get data_size field from arcstat again
+# 6. latter data_size should be bigger than former one
+#
+
+# NOTE: if HAVE_FILE_FADVISE is not defined former data_size
+# should less or eaqul to latter one
+
+verify_runnable "global"
+
+FILE=$TESTDIR/$TESTFILE0
+BLKSZ=$(get_prop recordsize $TESTPOOL)
+
+function cleanup
+{
+	log_must zfs set primarycache=all $TESTPOOL
+	[[ -e $TESTDIR ]] && log_must rm -Rf $TESTDIR/*
+}
+
+getstat() {
+	awk -v c="$1" '$1 == c {print $3; exit}' /proc/spl/kstat/zfs/arcstats
+}
+
+log_assert "Ensure fadvise prefetch data"
+
+log_onexit cleanup
+
+log_must zfs set primarycache=metadata $TESTPOOL
+
+log_must file_write -o create -f $FILE -b $BLKSZ -c 1000
+sync_pool $TESTPOOL
+
+data_size1=$(getstat data_size)
+
+log_must file_fadvise -f $FILE -a 2
+sleep 10
+
+data_size2=$(getstat data_size)
+log_note "original data_size is $data_size1, final data_size is $data_size2"
+
+log_must [ $data_size1 -le $data_size2 ]
+
+log_pass "Ensure data could be prefetched"
diff --git a/tests/zfs-tests/tests/functional/fadvise/setup.ksh b/tests/zfs-tests/tests/functional/fadvise/setup.ksh
new file mode 100755
index 00000000000..8ddd73307bb
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/fadvise/setup.ksh
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+#
+# Portions Copyright (c) 2022 Information2 Software, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+DISK=${DISKS%% *}
+default_setup_noexit $DISK
+log_pass
diff --git a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
index e9517bad713..bd32be9a4ff 100755
--- a/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
+++ b/tests/zfs-tests/tests/functional/fault/auto_spare_002_pos.ksh
@@ -73,7 +73,7 @@ for type in "mirror" "raidz" "raidz2"; do
 
 	# 4. Inject CHECKSUM ERRORS on read with a zinject error handler
 	log_must zinject -d $FAULT_FILE -e corrupt -f 50 -T read $TESTPOOL
-	log_must cp $TESTFILE /dev/null
+	log_must dd if=$TESTFILE of=/dev/null bs=1M count=64
 
 	# 5. Verify the ZED kicks in a hot spare and expected pool/device status
 	log_note "Wait for ZED to auto-spare"

From 37f6845c6f86b1d04593e55d94318326006f4b5d Mon Sep 17 00:00:00 2001
From: Alexander Motin <mav@FreeBSD.org>
Date: Thu, 8 Sep 2022 13:30:53 -0400
Subject: [PATCH 21/69] Improve too large physical ashift handling

When iterating through children physical ashifts for vdev, prefer
ones above the maximum logical ashift, that we can actually use,
but within the administrator defined maximum.

When selecting top-level vdev ashift, do not set it to the defined
maximum in case physical ashift is even higher, but just ignore one.
Using the maximum does not prevent misaligned writes, but reduces
space efficiency.  Since ZFS tries to write data sequentially and
aggregates the writes, in many cases large misanigned writes may be
not as bad as the space penalty otherwise.

Allow internal physical ashifts for vdevs higher than SHIFT_MAX.
May be one day allocator or aggregation could benefit from that.

Reduce zfs_vdev_max_auto_ashift default from 16 (64KB) to 14 (16KB),
so that ZFS may still use bigger ashifts up to SHIFT_MAX (64KB),
but only if it really has to or explicitly told to, but not as an
"optimization".

There are some read-intensive NVMe SSDs that report Preferred Write
Alignment of 64KB, and attempt to build RAIDZ2 of those leads to a
space inefficiency that can't be justified.  Instead these changes
make ZFS fall back to logical ashift of 12 (4KB) by default and
only warn user that it may be suboptimal for performance.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #13798
---
 include/sys/vdev_impl.h                       |  1 +
 man/man4/zfs.4                                |  5 ++-
 module/os/freebsd/zfs/vdev_geom.c             |  3 +-
 module/zfs/vdev.c                             | 36 +++++++++++++++++--
 module/zfs/vdev_draid.c                       | 10 ++++--
 module/zfs/vdev_mirror.c                      | 10 ++++--
 module/zfs/vdev_raidz.c                       | 10 ++++--
 tests/zfs-tests/include/tunables.cfg          |  2 ++
 .../cli_root/zpool_add/add-o_ashift.ksh       |  5 ++-
 9 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index d22abfbc259..470eaa763d5 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -641,6 +641,7 @@ extern int vdev_obsolete_counts_are_precise(vdev_t *vd, boolean_t *are_precise);
  */
 int vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj);
 void vdev_metaslab_group_create(vdev_t *vd);
+uint64_t vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b);
 
 /*
  * Vdev ashift optimization tunables
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index cc55ee32ba2..cecaf7e7f0a 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -347,9 +347,12 @@ When a vdev is added, target this number of metaslabs per top-level vdev.
 .It Sy zfs_vdev_default_ms_shift Ns = Ns Sy 29 Po 512 MiB Pc Pq int
 Default limit for metaslab size.
 .
-.It Sy zfs_vdev_max_auto_ashift Ns = Ns Sy ASHIFT_MAX Po 16 Pc Pq ulong
+.It Sy zfs_vdev_max_auto_ashift Ns = Ns Sy 14 Pq ulong
 Maximum ashift used when optimizing for logical \[->] physical sector size on new
 top-level vdevs.
+May be increased up to
+.Sy ASHIFT_MAX Po 16 Pc ,
+but this may negatively impact pool space efficiency.
 .
 .It Sy zfs_vdev_min_auto_ashift Ns = Ns Sy ASHIFT_MIN Po 9 Pc Pq ulong
 Minimum ashift used when creating new top-level vdevs.
diff --git a/module/os/freebsd/zfs/vdev_geom.c b/module/os/freebsd/zfs/vdev_geom.c
index f3b4846f4e6..fef6a1b88e3 100644
--- a/module/os/freebsd/zfs/vdev_geom.c
+++ b/module/os/freebsd/zfs/vdev_geom.c
@@ -955,8 +955,7 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
 	*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
 	*physical_ashift = 0;
 	if (pp->stripesize && pp->stripesize > (1 << *logical_ashift) &&
-	    ISP2(pp->stripesize) && pp->stripesize <= (1 << ASHIFT_MAX) &&
-	    pp->stripeoffset == 0)
+	    ISP2(pp->stripesize) && pp->stripeoffset == 0)
 		*physical_ashift = highbit(pp->stripesize) - 1;
 
 	/*
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index ea0245610fb..048616c253c 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -136,7 +136,15 @@ int zfs_vdev_standard_sm_blksz = (1 << 17);
  */
 int zfs_nocacheflush = 0;
 
-uint64_t zfs_vdev_max_auto_ashift = ASHIFT_MAX;
+/*
+ * Maximum and minimum ashift values that can be automatically set based on
+ * vdev's physical ashift (disk's physical sector size).  While ASHIFT_MAX
+ * is higher than the maximum value, it is intentionally limited here to not
+ * excessively impact pool space efficiency.  Higher ashift values may still
+ * be forced by vdev logical ashift or by user via ashift property, but won't
+ * be set automatically as a performance optimization.
+ */
+uint64_t zfs_vdev_max_auto_ashift = 14;
 uint64_t zfs_vdev_min_auto_ashift = ASHIFT_MIN;
 
 void
@@ -1845,6 +1853,24 @@ vdev_set_deflate_ratio(vdev_t *vd)
 	}
 }
 
+/*
+ * Choose the best of two ashifts, preferring one between logical ashift
+ * (absolute minimum) and administrator defined maximum, otherwise take
+ * the biggest of the two.
+ */
+uint64_t
+vdev_best_ashift(uint64_t logical, uint64_t a, uint64_t b)
+{
+	if (a > logical && a <= zfs_vdev_max_auto_ashift) {
+		if (b <= logical || b > zfs_vdev_max_auto_ashift)
+			return (a);
+		else
+			return (MAX(a, b));
+	} else if (b <= logical || b > zfs_vdev_max_auto_ashift)
+		return (MAX(a, b));
+	return (b);
+}
+
 /*
  * Maximize performance by inflating the configured ashift for top level
  * vdevs to be as close to the physical ashift as possible while maintaining
@@ -1856,7 +1882,8 @@ vdev_ashift_optimize(vdev_t *vd)
 {
 	ASSERT(vd == vd->vdev_top);
 
-	if (vd->vdev_ashift < vd->vdev_physical_ashift) {
+	if (vd->vdev_ashift < vd->vdev_physical_ashift &&
+	    vd->vdev_physical_ashift <= zfs_vdev_max_auto_ashift) {
 		vd->vdev_ashift = MIN(
 		    MAX(zfs_vdev_max_auto_ashift, vd->vdev_ashift),
 		    MAX(zfs_vdev_min_auto_ashift,
@@ -4463,7 +4490,10 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
 		vs->vs_configured_ashift = vd->vdev_top != NULL
 		    ? vd->vdev_top->vdev_ashift : vd->vdev_ashift;
 		vs->vs_logical_ashift = vd->vdev_logical_ashift;
-		vs->vs_physical_ashift = vd->vdev_physical_ashift;
+		if (vd->vdev_physical_ashift <= ASHIFT_MAX)
+			vs->vs_physical_ashift = vd->vdev_physical_ashift;
+		else
+			vs->vs_physical_ashift = 0;
 
 		/*
 		 * Report fragmentation and rebuild progress for top-level,
diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c
index 24034d9d931..24ea5d2cbe1 100644
--- a/module/zfs/vdev_draid.c
+++ b/module/zfs/vdev_draid.c
@@ -1496,8 +1496,14 @@ vdev_draid_calculate_asize(vdev_t *vd, uint64_t *asizep, uint64_t *max_asizep,
 		asize = MIN(asize - 1, cvd->vdev_asize - 1) + 1;
 		max_asize = MIN(max_asize - 1, cvd->vdev_max_asize - 1) + 1;
 		logical_ashift = MAX(logical_ashift, cvd->vdev_ashift);
-		physical_ashift = MAX(physical_ashift,
-		    cvd->vdev_physical_ashift);
+	}
+	for (int c = 0; c < vd->vdev_children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+
+		if (cvd->vdev_ops == &vdev_draid_spare_ops)
+			continue;
+		physical_ashift = vdev_best_ashift(logical_ashift,
+		    physical_ashift, cvd->vdev_physical_ashift);
 	}
 
 	*asizep = asize;
diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c
index 3879de68045..f9a01c9f53f 100644
--- a/module/zfs/vdev_mirror.c
+++ b/module/zfs/vdev_mirror.c
@@ -409,8 +409,14 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
 		*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
 		*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
 		*logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
-		*physical_ashift = MAX(*physical_ashift,
-		    cvd->vdev_physical_ashift);
+	}
+	for (int c = 0; c < vd->vdev_children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+
+		if (cvd->vdev_open_error)
+			continue;
+		*physical_ashift = vdev_best_ashift(*logical_ashift,
+		    *physical_ashift, cvd->vdev_physical_ashift);
 	}
 
 	if (numerrors == vd->vdev_children) {
diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c
index b4daf642ed2..5a44983e551 100644
--- a/module/zfs/vdev_raidz.c
+++ b/module/zfs/vdev_raidz.c
@@ -1527,8 +1527,14 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
 		*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
 		*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
 		*logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
-		*physical_ashift = MAX(*physical_ashift,
-		    cvd->vdev_physical_ashift);
+	}
+	for (c = 0; c < vd->vdev_children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+
+		if (cvd->vdev_open_error != 0)
+			continue;
+		*physical_ashift = vdev_best_ashift(*logical_ashift,
+		    *physical_ashift, cvd->vdev_physical_ashift);
 	}
 
 	*asize *= vd->vdev_children;
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
index d6a2fe5db7c..80e7bcb3bd0 100644
--- a/tests/zfs-tests/include/tunables.cfg
+++ b/tests/zfs-tests/include/tunables.cfg
@@ -81,7 +81,9 @@ TRIM_TXG_BATCH			trim.txg_batch			zfs_trim_txg_batch
 TXG_HISTORY			txg.history			zfs_txg_history
 TXG_TIMEOUT			txg.timeout			zfs_txg_timeout
 UNLINK_SUSPEND_PROGRESS		UNSUPPORTED			zfs_unlink_suspend_progress
+VDEV_FILE_LOGICAL_ASHIFT	vdev.file.logical_ashift	vdev_file_logical_ashift
 VDEV_FILE_PHYSICAL_ASHIFT	vdev.file.physical_ashift	vdev_file_physical_ashift
+VDEV_MAX_AUTO_ASHIFT		vdev.max_auto_ashift		zfs_vdev_max_auto_ashift
 VDEV_MIN_MS_COUNT		vdev.min_ms_count		zfs_vdev_min_ms_count
 VDEV_VALIDATE_SKIP		vdev.validate_skip		vdev_validate_skip
 VOL_INHIBIT_DEV			UNSUPPORTED			zvol_inhibit_dev
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh
index 8d5ce5efa52..0166e84baa1 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_add/add-o_ashift.ksh
@@ -57,7 +57,9 @@ disk2=$TEST_BASE_DIR/disk2
 log_must mkfile $SIZE $disk1
 log_must mkfile $SIZE $disk2
 
+logical_ashift=$(get_tunable VDEV_FILE_LOGICAL_ASHIFT)
 orig_ashift=$(get_tunable VDEV_FILE_PHYSICAL_ASHIFT)
+max_auto_ashift=$(get_tunable VDEV_MAX_AUTO_ASHIFT)
 
 typeset ashifts=("9" "10" "11" "12" "13" "14" "15" "16")
 for ashift in ${ashifts[@]}
@@ -77,7 +79,8 @@ do
 	log_must zpool create $TESTPOOL $disk1
 	log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $ashift
 	log_must zpool add $TESTPOOL $disk2
-	log_must verify_ashift $disk2 $ashift
+	exp=$(( (ashift <= max_auto_ashift) ? ashift : logical_ashift ))
+	log_must verify_ashift $disk2 $exp
 
 	# clean things for the next run
 	log_must set_tunable64 VDEV_FILE_PHYSICAL_ASHIFT $orig_ashift

From e27e692bcc2c3e5b79f60ef16a2183f2231ff012 Mon Sep 17 00:00:00 2001
From: Tony Hutter <hutter2@llnl.gov>
Date: Thu, 8 Sep 2022 10:32:30 -0700
Subject: [PATCH 22/69] zed: Fix config_sync autoexpand flood

Users were seeing floods of `config_sync` events when autoexpand was
enabled.  This happened because all "disk status change" udev events
invoke the autoexpand codepath, which calls zpool_relabel_disk(),
which in turn cause another "disk status change" event to happen,
in a feedback loop.  Note that "disk status change" happens every time
a user calls close() on a block device.

This commit breaks the feedback loop by only allowing an autoexpand
to happen if the disk actually changed size.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes: #7132
Closes: #7366
Closes #13729
---
 cmd/zed/agents/zfs_mod.c   | 155 +++++++++++++++++++++++++++++++++++--
 cmd/zed/zed_disk_event.c   |  16 ++++
 include/sys/sysevent/dev.h |   3 +
 3 files changed, 166 insertions(+), 8 deletions(-)

diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c
index d75854f2875..7364dd2c628 100644
--- a/cmd/zed/agents/zfs_mod.c
+++ b/cmd/zed/agents/zfs_mod.c
@@ -894,14 +894,90 @@ zfs_deliver_check(nvlist_t *nvl)
 	return (0);
 }
 
+/*
+ * Given a path to a vdev, lookup the vdev's physical size from its
+ * config nvlist.
+ *
+ * Returns the vdev's physical size in bytes on success, 0 on error.
+ */
+static uint64_t
+vdev_size_from_config(zpool_handle_t *zhp, const char *vdev_path)
+{
+	nvlist_t *nvl = NULL;
+	boolean_t avail_spare, l2cache, log;
+	vdev_stat_t *vs = NULL;
+	uint_t c;
+
+	nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
+	if (!nvl)
+		return (0);
+
+	verify(nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_VDEV_STATS,
+	    (uint64_t **)&vs, &c) == 0);
+	if (!vs) {
+		zed_log_msg(LOG_INFO, "%s: no nvlist for '%s'", __func__,
+		    vdev_path);
+		return (0);
+	}
+
+	return (vs->vs_pspace);
+}
+
+/*
+ * Given a path to a vdev, lookup if the vdev is a "whole disk" in the
+ * config nvlist.  "whole disk" means that ZFS was passed a whole disk
+ * at pool creation time, which it partitioned up and has full control over.
+ * Thus a partition with wholedisk=1 set tells us that zfs created the
+ * partition at creation time.  A partition without whole disk set would have
+ * been created by externally (like with fdisk) and passed to ZFS.
+ *
+ * Returns the whole disk value (either 0 or 1).
+ */
+static uint64_t
+vdev_whole_disk_from_config(zpool_handle_t *zhp, const char *vdev_path)
+{
+	nvlist_t *nvl = NULL;
+	boolean_t avail_spare, l2cache, log;
+	uint64_t wholedisk;
+
+	nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
+	if (!nvl)
+		return (0);
+
+	verify(nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_WHOLE_DISK,
+	    &wholedisk) == 0);
+
+	return (wholedisk);
+}
+
+/*
+ * If the device size grew more than 1% then return true.
+ */
+#define	DEVICE_GREW(oldsize, newsize) \
+		    ((newsize > oldsize) && \
+		    ((newsize / (newsize - oldsize)) <= 100))
+
 static int
 zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
 {
-	char *devname = data;
 	boolean_t avail_spare, l2cache;
+	nvlist_t *udev_nvl = data;
 	nvlist_t *tgt;
 	int error;
 
+	char *tmp_devname, devname[MAXPATHLEN];
+	uint64_t guid;
+
+	if (nvlist_lookup_uint64(udev_nvl, ZFS_EV_VDEV_GUID, &guid) == 0) {
+		sprintf(devname, "%llu", (u_longlong_t)guid);
+	} else if (nvlist_lookup_string(udev_nvl, DEV_PHYS_PATH,
+	    &tmp_devname) == 0) {
+		strlcpy(devname, tmp_devname, MAXPATHLEN);
+		zfs_append_partition(devname, MAXPATHLEN);
+	} else {
+		zed_log_msg(LOG_INFO, "%s: no guid or physpath", __func__);
+	}
+
 	zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'",
 	    devname, zpool_get_name(zhp));
 
@@ -953,12 +1029,75 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
 			vdev_state_t newstate;
 
 			if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
-				error = zpool_vdev_online(zhp, fullpath, 0,
-				    &newstate);
-				zed_log_msg(LOG_INFO, "zfsdle_vdev_online: "
-				    "setting device '%s' to ONLINE state "
-				    "in pool '%s': %d", fullpath,
-				    zpool_get_name(zhp), error);
+				/*
+				 * If this disk size has not changed, then
+				 * there's no need to do an autoexpand.  To
+				 * check we look at the disk's size in its
+				 * config, and compare it to the disk size
+				 * that udev is reporting.
+				 */
+				uint64_t udev_size = 0, conf_size = 0,
+				    wholedisk = 0, udev_parent_size = 0;
+
+				/*
+				 * Get the size of our disk that udev is
+				 * reporting.
+				 */
+				if (nvlist_lookup_uint64(udev_nvl, DEV_SIZE,
+				    &udev_size) != 0) {
+					udev_size = 0;
+				}
+
+				/*
+				 * Get the size of our disk's parent device
+				 * from udev (where sda1's parent is sda).
+				 */
+				if (nvlist_lookup_uint64(udev_nvl,
+				    DEV_PARENT_SIZE, &udev_parent_size) != 0) {
+					udev_parent_size = 0;
+				}
+
+				conf_size = vdev_size_from_config(zhp,
+				    fullpath);
+
+				wholedisk = vdev_whole_disk_from_config(zhp,
+				    fullpath);
+
+				/*
+				 * Only attempt an autoexpand if the vdev size
+				 * changed.  There are two different cases
+				 * to consider.
+				 *
+				 * 1. wholedisk=1
+				 * If you do a 'zpool create' on a whole disk
+				 * (like /dev/sda), then zfs will create
+				 * partitions on the disk (like /dev/sda1).  In
+				 * that case, wholedisk=1 will be set in the
+				 * partition's nvlist config.  So zed will need
+				 * to see if your parent device (/dev/sda)
+				 * expanded in size, and if so, then attempt
+				 * the autoexpand.
+				 *
+				 * 2. wholedisk=0
+				 * If you do a 'zpool create' on an existing
+				 * partition, or a device that doesn't allow
+				 * partitions, then wholedisk=0, and you will
+				 * simply need to check if the device itself
+				 * expanded in size.
+				 */
+				if (DEVICE_GREW(conf_size, udev_size) ||
+				    (wholedisk && DEVICE_GREW(conf_size,
+				    udev_parent_size))) {
+					error = zpool_vdev_online(zhp, fullpath,
+					    0, &newstate);
+
+					zed_log_msg(LOG_INFO,
+					    "%s: autoexpanding '%s' from %llu"
+					    " to %llu bytes in pool '%s': %d",
+					    __func__, fullpath, conf_size,
+					    MAX(udev_size, udev_parent_size),
+					    zpool_get_name(zhp), error);
+				}
 			}
 		}
 		zpool_close(zhp);
@@ -989,7 +1128,7 @@ zfs_deliver_dle(nvlist_t *nvl)
 		zed_log_msg(LOG_INFO, "zfs_deliver_dle: no guid or physpath");
 	}
 
-	if (zpool_iter(g_zfshdl, zfsdle_vdev_online, name) != 1) {
+	if (zpool_iter(g_zfshdl, zfsdle_vdev_online, nvl) != 1) {
 		zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not "
 		    "found", name);
 		return (1);
diff --git a/cmd/zed/zed_disk_event.c b/cmd/zed/zed_disk_event.c
index 8845c5b2d00..3c8e2fb38c1 100644
--- a/cmd/zed/zed_disk_event.c
+++ b/cmd/zed/zed_disk_event.c
@@ -78,6 +78,8 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
 		zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
 	if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
+	if (nvlist_lookup_uint64(nvl, DEV_PARENT_SIZE, &numval) == 0)
+		zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_PARENT_SIZE, numval);
 	if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
 	if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
@@ -130,6 +132,20 @@ dev_event_nvlist(struct udev_device *dev)
 
 		numval *= strtoull(value, NULL, 10);
 		(void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
+
+		/*
+		 * If the device has a parent, then get the parent block
+		 * device's size as well.  For example, /dev/sda1's parent
+		 * is /dev/sda.
+		 */
+		struct udev_device *parent_dev = udev_device_get_parent(dev);
+		if ((value = udev_device_get_sysattr_value(parent_dev, "size"))
+		    != NULL) {
+			uint64_t numval = DEV_BSIZE;
+
+			numval *= strtoull(value, NULL, 10);
+			(void) nvlist_add_uint64(nvl, DEV_PARENT_SIZE, numval);
+		}
 	}
 
 	/*
diff --git a/include/sys/sysevent/dev.h b/include/sys/sysevent/dev.h
index da6539b4a0d..0783d007316 100644
--- a/include/sys/sysevent/dev.h
+++ b/include/sys/sysevent/dev.h
@@ -244,6 +244,9 @@ extern "C" {
 #define	DEV_PATH		"path"
 #define	DEV_IS_PART		"is_slice"
 #define	DEV_SIZE		"dev_size"
+
+/* Size of the whole parent block device (if dev is a partition) */
+#define	DEV_PARENT_SIZE		"dev_parent_size"
 #endif /* __linux__ */
 
 #define	EV_V1			1

From 60d995727a19104a2832d475f5c0861ffbae2c97 Mon Sep 17 00:00:00 2001
From: Ryan Moeller <ryan@iXsystems.com>
Date: Thu, 8 Sep 2022 13:40:18 -0400
Subject: [PATCH 23/69] FreeBSD: Replace legacy make_dev() interface usage

The function make_dev_s() was introduced to replace make_dev() in
FreeBSD 11.0.  It allows further specification of properties and flags
and returns an error code on failure.  Using this we can fail loading
the module more gracefully than a panic in situations such as when a
device named zfs already exists.  We already use it for zvols.

Use make_dev_s() for /dev/zfs.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Ryan Moeller <ryan@iXsystems.com>
Closes #13854
---
 module/os/freebsd/zfs/kmod_core.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/module/os/freebsd/zfs/kmod_core.c b/module/os/freebsd/zfs/kmod_core.c
index 2b808357ecc..020ef6a39b5 100644
--- a/module/os/freebsd/zfs/kmod_core.c
+++ b/module/os/freebsd/zfs/kmod_core.c
@@ -219,9 +219,16 @@ static struct cdevsw zfs_cdevsw = {
 int
 zfsdev_attach(void)
 {
-	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
-	    ZFS_DRIVER);
-	return (0);
+	struct make_dev_args args;
+
+	make_dev_args_init(&args);
+	args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK;
+	args.mda_devsw = &zfs_cdevsw;
+	args.mda_cr = NULL;
+	args.mda_uid = UID_ROOT;
+	args.mda_gid = GID_OPERATOR;
+	args.mda_mode = 0666;
+	return (make_dev_s(&args, &zfsdev, ZFS_DRIVER));
 }
 
 void

From ede037cda73675f42b1452187e8dd3438fafc220 Mon Sep 17 00:00:00 2001
From: Don Brady <don.brady@delphix.com>
Date: Fri, 9 Sep 2022 11:54:16 -0600
Subject: [PATCH 24/69] Make zfs-share service resilient to stale exports

The are a few cases where stale entries in /etc/exports.d/zfs.exports
will cause the nfs-server service to fail when starting up.

Since the nfs-server startup consumes /etc/exports.d/zfs.exports, the
zfs-share service (which rebuilds the list of zfs exports) should run
before the nfs-server service.

To make the zfs-share service resilient to stale exports, this change
truncates the zfs config file as part of the zfs share -a operation.

Reviewed-by: Allan Jude <allan@klarasystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #13775
---
 cmd/zfs/zfs_main.c                      |  3 ++
 etc/systemd/system/zfs-share.service.in |  2 +-
 include/libzfs.h                        |  3 +-
 lib/libshare/libshare.c                 | 12 +++++-
 lib/libshare/libshare_impl.h            |  3 +-
 lib/libshare/nfs.c                      | 12 ++++++
 lib/libshare/nfs.h                      |  2 +
 lib/libshare/os/freebsd/nfs.c           |  9 +++-
 lib/libshare/os/linux/nfs.c             |  9 +++-
 lib/libspl/include/libshare.h           |  3 +-
 lib/libzfs/libzfs.abi                   | 55 ++++++++++++++++++-------
 lib/libzfs/libzfs_mount.c               | 12 +++++-
 12 files changed, 101 insertions(+), 24 deletions(-)

diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index f1d686753c2..008f1bea0ec 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -7093,6 +7093,9 @@ share_mount(int op, int argc, char **argv)
 		share_mount_state.sm_total = cb.cb_used;
 		pthread_mutex_init(&share_mount_state.sm_lock, NULL);
 
+		/* For a 'zfs share -a' operation start with a clean slate. */
+		zfs_truncate_shares(NULL);
+
 		/*
 		 * libshare isn't mt-safe, so only do the operation in parallel
 		 * if we're mounting. Additionally, the key-loading option must
diff --git a/etc/systemd/system/zfs-share.service.in b/etc/systemd/system/zfs-share.service.in
index 263055e5281..1a6342a06fe 100644
--- a/etc/systemd/system/zfs-share.service.in
+++ b/etc/systemd/system/zfs-share.service.in
@@ -1,7 +1,7 @@
 [Unit]
 Description=ZFS file system shares
 Documentation=man:zfs(8)
-After=nfs-server.service nfs-kernel-server.service
+Before=nfs-server.service nfs-kernel-server.service
 After=smb.service
 Before=rpc-statd-notify.service
 Wants=zfs-mount.service
diff --git a/include/libzfs.h b/include/libzfs.h
index 96cf1e18652..92c7bf6d1c9 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2022 by Delphix. All rights reserved.
  * Copyright Joyent, Inc.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2016, Intel Corporation.
@@ -895,6 +895,7 @@ _LIBZFS_H int zfs_unshare(zfs_handle_t *zhp, const char *mountpoint,
 _LIBZFS_H int zfs_unshareall(zfs_handle_t *zhp,
     const enum sa_protocol *proto);
 _LIBZFS_H void zfs_commit_shares(const enum sa_protocol *proto);
+_LIBZFS_H void zfs_truncate_shares(const enum sa_protocol *proto);
 
 _LIBZFS_H int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
 
diff --git a/lib/libshare/libshare.c b/lib/libshare/libshare.c
index d6257aa1ef3..d50b4550d6d 100644
--- a/lib/libshare/libshare.c
+++ b/lib/libshare/libshare.c
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Gunnar Beutner
- * Copyright (c) 2018, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2018, 2022 by Delphix. All rights reserved.
  */
 
 #include <stdio.h>
@@ -96,6 +96,16 @@ sa_commit_shares(enum sa_protocol protocol)
 	fstypes[protocol]->commit_shares();
 }
 
+void
+sa_truncate_shares(enum sa_protocol protocol)
+{
+	/* CSTYLED */
+	VALIDATE_PROTOCOL(protocol, );
+
+	if (fstypes[protocol]->truncate_shares != NULL)
+		fstypes[protocol]->truncate_shares();
+}
+
 int
 sa_validate_shareopts(const char *options, enum sa_protocol protocol)
 {
diff --git a/lib/libshare/libshare_impl.h b/lib/libshare/libshare_impl.h
index b845eb2d8ac..d8c924757fe 100644
--- a/lib/libshare/libshare_impl.h
+++ b/lib/libshare/libshare_impl.h
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Gunnar Beutner
- * Copyright (c) 2019, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2019, 2022 by Delphix. All rights reserved.
  */
 #ifndef _LIBSPL_LIBSHARE_IMPL_H
 #define	_LIBSPL_LIBSHARE_IMPL_H
@@ -39,6 +39,7 @@ typedef struct {
 	boolean_t (*const is_shared)(sa_share_impl_t share);
 	int (*const validate_shareopts)(const char *shareopts);
 	int (*const commit_shares)(void);
+	void (*const truncate_shares)(void);
 } sa_fstype_t;
 
 extern const sa_fstype_t libshare_nfs_type, libshare_smb_type;
diff --git a/lib/libshare/nfs.c b/lib/libshare/nfs.c
index 161bbfb0ceb..bbaea93fca5 100644
--- a/lib/libshare/nfs.c
+++ b/lib/libshare/nfs.c
@@ -28,6 +28,7 @@
 #include <stdio.h>
 #include <errno.h>
 #include <libshare.h>
+#include <unistd.h>
 #include "nfs.h"
 
 
@@ -281,6 +282,17 @@ nfs_toggle_share(const char *lockfile, const char *exports,
 	return (error);
 }
 
+void
+nfs_reset_shares(const char *lockfile, const char *exports)
+{
+	int nfs_lock_fd = -1;
+
+	if (nfs_exports_lock(lockfile, &nfs_lock_fd) == 0) {
+		(void) ! truncate(exports, 0);
+		nfs_exports_unlock(lockfile, &nfs_lock_fd);
+	}
+}
+
 static boolean_t
 nfs_is_shared_cb(void *userdata, char *line, boolean_t found_mountpoint)
 {
diff --git a/lib/libshare/nfs.h b/lib/libshare/nfs.h
index 58523c8f02e..f4340b18f89 100644
--- a/lib/libshare/nfs.h
+++ b/lib/libshare/nfs.h
@@ -22,6 +22,7 @@
 /*
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Gunnar Beutner
+ * Copyright (c) 2022 by Delphix. All rights reserved.
  */
 
 #include "libshare_impl.h"
@@ -33,3 +34,4 @@ boolean_t nfs_is_shared_impl(const char *exports, sa_share_impl_t impl_share);
 int nfs_toggle_share(const char *lockfile, const char *exports,
     const char *expdir, sa_share_impl_t impl_share,
     int(*cbk)(sa_share_impl_t impl_share, FILE *tmpfile));
+void nfs_reset_shares(const char *lockfile, const char *exports);
diff --git a/lib/libshare/os/freebsd/nfs.c b/lib/libshare/os/freebsd/nfs.c
index 78977a25f4f..521631c51f0 100644
--- a/lib/libshare/os/freebsd/nfs.c
+++ b/lib/libshare/os/freebsd/nfs.c
@@ -23,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * Copyright (c) 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2020, 2022 by Delphix. All rights reserved.
  */
 
 #include <sys/cdefs.h>
@@ -195,6 +195,12 @@ nfs_commit_shares(void)
 	return (SA_OK);
 }
 
+static void
+nfs_truncate_shares(void)
+{
+	nfs_reset_shares(ZFS_EXPORTS_LOCK, ZFS_EXPORTS_FILE);
+}
+
 const sa_fstype_t libshare_nfs_type = {
 	.enable_share = nfs_enable_share,
 	.disable_share = nfs_disable_share,
@@ -202,4 +208,5 @@ const sa_fstype_t libshare_nfs_type = {
 
 	.validate_shareopts = nfs_validate_shareopts,
 	.commit_shares = nfs_commit_shares,
+	.truncate_shares = nfs_truncate_shares,
 };
diff --git a/lib/libshare/os/linux/nfs.c b/lib/libshare/os/linux/nfs.c
index 0870f37e581..0d63c989d34 100644
--- a/lib/libshare/os/linux/nfs.c
+++ b/lib/libshare/os/linux/nfs.c
@@ -23,7 +23,7 @@
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Gunnar Beutner
  * Copyright (c) 2012 Cyril Plisko. All rights reserved.
- * Copyright (c) 2019, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2019, 2022 by Delphix. All rights reserved.
  */
 
 #include <dirent.h>
@@ -495,6 +495,12 @@ nfs_commit_shares(void)
 	return (libzfs_run_process(argv[0], argv, 0));
 }
 
+static void
+nfs_truncate_shares(void)
+{
+	nfs_reset_shares(ZFS_EXPORTS_LOCK, ZFS_EXPORTS_FILE);
+}
+
 const sa_fstype_t libshare_nfs_type = {
 	.enable_share = nfs_enable_share,
 	.disable_share = nfs_disable_share,
@@ -502,6 +508,7 @@ const sa_fstype_t libshare_nfs_type = {
 
 	.validate_shareopts = nfs_validate_shareopts,
 	.commit_shares = nfs_commit_shares,
+	.truncate_shares = nfs_truncate_shares,
 };
 
 static boolean_t
diff --git a/lib/libspl/include/libshare.h b/lib/libspl/include/libshare.h
index d976f096ac3..deeb15c9770 100644
--- a/lib/libspl/include/libshare.h
+++ b/lib/libspl/include/libshare.h
@@ -22,7 +22,7 @@
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
- * Copyright (c) 2019, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2019, 2022 by Delphix. All rights reserved.
  */
 #ifndef _LIBSPL_LIBSHARE_H
 #define	_LIBSPL_LIBSHARE_H extern __attribute__((visibility("default")))
@@ -88,6 +88,7 @@ _LIBSPL_LIBSHARE_H int sa_enable_share(const char *, const char *, const char *,
 _LIBSPL_LIBSHARE_H int sa_disable_share(const char *, enum sa_protocol);
 _LIBSPL_LIBSHARE_H boolean_t sa_is_shared(const char *, enum sa_protocol);
 _LIBSPL_LIBSHARE_H void sa_commit_shares(enum sa_protocol);
+_LIBSPL_LIBSHARE_H void sa_truncate_shares(enum sa_protocol);
 
 /* protocol specific interfaces */
 _LIBSPL_LIBSHARE_H int sa_validate_shareopts(const char *, enum sa_protocol);
diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi
index 0494aec208e..7dd12df8171 100644
--- a/lib/libzfs/libzfs.abi
+++ b/lib/libzfs/libzfs.abi
@@ -245,6 +245,7 @@
     <elf-symbol name='sa_enable_share' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='sa_errorstr' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='sa_is_shared' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='sa_truncate_shares' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='sa_validate_shareopts' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='snapshot_namecheck' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='spl_pagesize' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -428,6 +429,7 @@
     <elf-symbol name='zfs_strcmp_pathname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zfs_strip_partition' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zfs_strip_path' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_truncate_shares' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zfs_type_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zfs_unmount' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zfs_unmountall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -758,6 +760,10 @@
       <parameter type-id='9155d4b5' name='protocol'/>
       <return type-id='48b5725f'/>
     </function-decl>
+    <function-decl name='sa_truncate_shares' mangled-name='sa_truncate_shares' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='sa_truncate_shares'>
+      <parameter type-id='9155d4b5' name='protocol'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
     <function-decl name='sa_validate_shareopts' mangled-name='sa_validate_shareopts' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='sa_validate_shareopts'>
       <parameter type-id='80f4b756' name='options'/>
       <parameter type-id='9155d4b5' name='protocol'/>
@@ -787,7 +793,7 @@
       </data-member>
     </class-decl>
     <typedef-decl name='sa_share_impl_t' type-id='946a2c6b' id='a48b47d0'/>
-    <class-decl name='sa_fstype_t' size-in-bits='320' is-struct='yes' naming-typedef-id='639af739' visibility='default' id='944afa86'>
+    <class-decl name='sa_fstype_t' size-in-bits='384' is-struct='yes' naming-typedef-id='639af739' visibility='default' id='944afa86'>
       <data-member access='public' layout-offset-in-bits='0'>
         <var-decl name='enable_share' type-id='2f78a9c1' visibility='default'/>
       </data-member>
@@ -803,6 +809,9 @@
       <data-member access='public' layout-offset-in-bits='256'>
         <var-decl name='commit_shares' type-id='797ee7da' visibility='default'/>
       </data-member>
+      <data-member access='public' layout-offset-in-bits='320'>
+        <var-decl name='truncate_shares' type-id='5d51038b' visibility='default'/>
+      </data-member>
     </class-decl>
     <typedef-decl name='sa_fstype_t' type-id='944afa86' id='639af739'/>
     <qualified-type-def type-id='639af739' const='yes' id='d19dbca9'/>
@@ -816,6 +825,8 @@
     <qualified-type-def type-id='fa1f29ce' const='yes' id='2f78a9c1'/>
     <pointer-type-def type-id='86373eb1' size-in-bits='64' id='f337456d'/>
     <qualified-type-def type-id='f337456d' const='yes' id='81020bc2'/>
+    <pointer-type-def type-id='ee076206' size-in-bits='64' id='953b12f8'/>
+    <qualified-type-def type-id='953b12f8' const='yes' id='5d51038b'/>
     <var-decl name='libshare_nfs_type' type-id='d19dbca9' visibility='default'/>
     <function-type size-in-bits='64' id='276427e1'>
       <return type-id='95e97e5e'/>
@@ -832,6 +843,9 @@
       <parameter type-id='a48b47d0'/>
       <return type-id='c19b74c3'/>
     </function-type>
+    <function-type size-in-bits='64' id='ee076206'>
+      <return type-id='48b5725f'/>
+    </function-type>
   </abi-instr>
   <abi-instr address-size='64' path='lib/libshare/os/linux/smb.c' language='LANG_C99'>
     <var-decl name='libshare_smb_type' type-id='d19dbca9' visibility='default'/>
@@ -2302,6 +2316,7 @@
       <underlying-type type-id='9cac1fee'/>
       <enumerator name='ZPROP_CONT' value='-2'/>
       <enumerator name='ZPROP_INVAL' value='-1'/>
+      <enumerator name='ZPROP_USERPROP' value='-1'/>
       <enumerator name='ZFS_PROP_TYPE' value='0'/>
       <enumerator name='ZFS_PROP_CREATION' value='1'/>
       <enumerator name='ZFS_PROP_USED' value='2'/>
@@ -3034,6 +3049,10 @@
       <parameter type-id='4567bbc9' name='proto'/>
       <return type-id='48b5725f'/>
     </function-decl>
+    <function-decl name='zfs_truncate_shares' mangled-name='zfs_truncate_shares' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_truncate_shares'>
+      <parameter type-id='4567bbc9' name='proto'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
     <function-decl name='zfs_unshare' mangled-name='zfs_unshare' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_unshare'>
       <parameter type-id='9200a744' name='zhp'/>
       <parameter type-id='80f4b756' name='mountpoint'/>
@@ -3150,6 +3169,7 @@
     <enum-decl name='vdev_prop_t' naming-typedef-id='5aa5c90c' id='1573bec8'>
       <underlying-type type-id='9cac1fee'/>
       <enumerator name='VDEV_PROP_INVAL' value='-1'/>
+      <enumerator name='VDEV_PROP_USERPROP' value='-1'/>
       <enumerator name='VDEV_PROP_NAME' value='0'/>
       <enumerator name='VDEV_PROP_CAPACITY' value='1'/>
       <enumerator name='VDEV_PROP_STATE' value='2'/>
@@ -3750,7 +3770,7 @@
     </class-decl>
     <typedef-decl name='sendflags_t' type-id='f6aa15be' id='945467e6'/>
     <typedef-decl name='snapfilter_cb_t' type-id='d2a5e211' id='3d3ffb69'/>
-    <class-decl name='recvflags' size-in-bits='416' is-struct='yes' visibility='default' id='34a384dc'>
+    <class-decl name='recvflags' size-in-bits='448' is-struct='yes' visibility='default' id='34a384dc'>
       <data-member access='public' layout-offset-in-bits='0'>
         <var-decl name='verbose' type-id='c19b74c3' visibility='default'/>
       </data-member>
@@ -3790,6 +3810,9 @@
       <data-member access='public' layout-offset-in-bits='384'>
         <var-decl name='forceunmount' type-id='c19b74c3' visibility='default'/>
       </data-member>
+      <data-member access='public' layout-offset-in-bits='416'>
+        <var-decl name='heal' type-id='c19b74c3' visibility='default'/>
+      </data-member>
     </class-decl>
     <typedef-decl name='recvflags_t' type-id='34a384dc' id='9e59d1d4'/>
     <pointer-type-def type-id='f20fbd51' size-in-bits='64' id='a3681dea'/>
@@ -3903,16 +3926,17 @@
       <enumerator name='ZPOOL_ERRATA_ZOL_8308_ENCRYPTION' value='4'/>
     </enum-decl>
     <typedef-decl name='zpool_errata_t' type-id='d9abbf54' id='688c495b'/>
+    <pointer-type-def type-id='80f4b756' size-in-bits='64' id='7d3cd834'/>
     <pointer-type-def type-id='688c495b' size-in-bits='64' id='cec6f2e4'/>
     <function-decl name='zpool_get_status' mangled-name='zpool_get_status' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_status'>
       <parameter type-id='4c81de99' name='zhp'/>
-      <parameter type-id='9b23c9ad' name='msgid'/>
+      <parameter type-id='7d3cd834' name='msgid'/>
       <parameter type-id='cec6f2e4' name='errata'/>
       <return type-id='d3dd6294'/>
     </function-decl>
     <function-decl name='zpool_import_status' mangled-name='zpool_import_status' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_import_status'>
       <parameter type-id='5ce45b60' name='config'/>
-      <parameter type-id='9b23c9ad' name='msgid'/>
+      <parameter type-id='7d3cd834' name='msgid'/>
       <parameter type-id='cec6f2e4' name='errata'/>
       <return type-id='d3dd6294'/>
     </function-decl>
@@ -4032,8 +4056,8 @@
       <return type-id='48b5725f'/>
     </function-decl>
     <function-decl name='libzfs_envvar_is_set' mangled-name='libzfs_envvar_is_set' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_envvar_is_set'>
-      <parameter type-id='26a90f95' name='envvar'/>
-      <return type-id='95e97e5e'/>
+      <parameter type-id='80f4b756' name='envvar'/>
+      <return type-id='c19b74c3'/>
     </function-decl>
     <function-decl name='libzfs_init' mangled-name='libzfs_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libzfs_init'>
       <return type-id='b0382bb3'/>
@@ -4102,15 +4126,15 @@
       <return type-id='95e97e5e'/>
     </function-decl>
     <function-decl name='color_start' mangled-name='color_start' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='color_start'>
-      <parameter type-id='26a90f95' name='color'/>
+      <parameter type-id='80f4b756' name='color'/>
       <return type-id='48b5725f'/>
     </function-decl>
     <function-decl name='color_end' mangled-name='color_end' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='color_end'>
       <return type-id='48b5725f'/>
     </function-decl>
     <function-decl name='printf_color' mangled-name='printf_color' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='printf_color'>
-      <parameter type-id='26a90f95' name='color'/>
-      <parameter type-id='26a90f95' name='format'/>
+      <parameter type-id='80f4b756' name='color'/>
+      <parameter type-id='80f4b756' name='format'/>
       <parameter is-variadic='yes'/>
       <return type-id='95e97e5e'/>
     </function-decl>
@@ -4123,7 +4147,7 @@
   <abi-instr address-size='64' path='lib/libzfs/os/linux/libzfs_mount_os.c' language='LANG_C99'>
     <pointer-type-def type-id='7359adad' size-in-bits='64' id='1d2c2b85'/>
     <function-decl name='zfs_parse_mount_options' mangled-name='zfs_parse_mount_options' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_parse_mount_options'>
-      <parameter type-id='26a90f95' name='mntopts'/>
+      <parameter type-id='80f4b756' name='mntopts'/>
       <parameter type-id='1d2c2b85' name='mntflags'/>
       <parameter type-id='1d2c2b85' name='zfsflags'/>
       <parameter type-id='95e97e5e' name='sloppy'/>
@@ -4771,8 +4795,8 @@
     </function-decl>
   </abi-instr>
   <abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'>
-    <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='16576' id='9d5e9e2e'>
-      <subrange length='37' type-id='7359adad' id='ae666bde'/>
+    <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='16576' id='d95b2b0b'>
+      <subrange length='37' type-id='7359adad' id='aa6426fb'/>
     </array-type-def>
     <enum-decl name='spa_feature' id='33ecb627'>
       <underlying-type type-id='9cac1fee'/>
@@ -4872,7 +4896,7 @@
     <qualified-type-def type-id='3eee3342' const='yes' id='0c1d5bbb'/>
     <pointer-type-def type-id='0c1d5bbb' size-in-bits='64' id='a3372543'/>
     <pointer-type-def type-id='d6618c78' size-in-bits='64' id='a8425263'/>
-    <var-decl name='spa_feature_table' type-id='9d5e9e2e' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
+    <var-decl name='spa_feature_table' type-id='d95b2b0b' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
     <var-decl name='zfeature_checks_disable' type-id='c19b74c3' mangled-name='zfeature_checks_disable' visibility='default' elf-symbol-id='zfeature_checks_disable'/>
     <function-decl name='zfeature_is_valid_guid' mangled-name='zfeature_is_valid_guid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfeature_is_valid_guid'>
       <parameter type-id='80f4b756' name='name'/>
@@ -4935,7 +4959,7 @@
     </function-decl>
     <function-decl name='zfs_special_devs' mangled-name='zfs_special_devs' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_special_devs'>
       <parameter type-id='5ce45b60' name='nv'/>
-      <parameter type-id='26a90f95' name='type'/>
+      <parameter type-id='80f4b756' name='type'/>
       <return type-id='c19b74c3'/>
     </function-decl>
     <function-decl name='zpool_get_load_policy' mangled-name='zpool_get_load_policy' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_get_load_policy'>
@@ -5013,7 +5037,7 @@
     <typedef-decl name='zfs_deleg_note_t' type-id='729d4547' id='4613c173'/>
     <class-decl name='zfs_deleg_perm_tab' size-in-bits='128' is-struct='yes' visibility='default' id='5aa05c1f'>
       <data-member access='public' layout-offset-in-bits='0'>
-        <var-decl name='z_perm' type-id='26a90f95' visibility='default'/>
+        <var-decl name='z_perm' type-id='80f4b756' visibility='default'/>
       </data-member>
       <data-member access='public' layout-offset-in-bits='64'>
         <var-decl name='z_note' type-id='4613c173' visibility='default'/>
@@ -5455,7 +5479,6 @@
       </data-member>
     </class-decl>
     <typedef-decl name='zprop_desc_t' type-id='bbff5e4b' id='ffa52b96'/>
-    <pointer-type-def type-id='80f4b756' size-in-bits='64' id='7d3cd834'/>
     <qualified-type-def type-id='64636ce3' const='yes' id='072f7953'/>
     <pointer-type-def type-id='072f7953' size-in-bits='64' id='c8bc397b'/>
     <pointer-type-def type-id='ffa52b96' size-in-bits='64' id='76c8174b'/>
diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c
index fdfdd8d2808..44f7d698c82 100644
--- a/lib/libzfs/libzfs_mount.c
+++ b/lib/libzfs/libzfs_mount.c
@@ -22,7 +22,7 @@
 /*
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, 2021 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2022 by Delphix. All rights reserved.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  * Copyright 2017 RackTop Systems.
  * Copyright (c) 2018 Datto Inc.
@@ -788,6 +788,16 @@ zfs_commit_shares(const enum sa_protocol *proto)
 		sa_commit_shares(*p);
 }
 
+void
+zfs_truncate_shares(const enum sa_protocol *proto)
+{
+	if (proto == NULL)
+		proto = share_all_proto;
+
+	for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)
+		sa_truncate_shares(*p);
+}
+
 /*
  * Unshare the given filesystem.
  */

From 0e4c830bc19766e860e760e10e0d59250f12cced Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Mon, 12 Sep 2022 12:55:37 -0400
Subject: [PATCH 25/69] Cleanup: Use OpenSolaris functions to call scheduler

In our codebase, `cond_resched() and `schedule()` are Linux kernel
functions that have replaced the OpenSolaris `kpreempt()` functions in
the codebase to such an extent that `kpreempt()` in zfs_context.h was
broken. Nobody noticed because we did not actually use it. The header
had defined `kpreempt()` as `yield()`, which works on OpenSolaris and
Illumos where `sched_yield()` is a wrapper for `yield()`, but that does
not work on any other platform.

The FreeBSD platform specific code implemented shims for these, but the
shim for `schedule()` forced us to wait, which is different than merely
rescheduling to another thread as the original Linux code does, while
the shim for `cond_resched()` had the same definition as its kernel
kpreempt() shim.

After studying this, I have concluded that we should reintroduce the
kpreempt() function in platform independent code with the following
definitions:

	- In the Linux kernel:
		kpreempt(unused)	-> cond_resched()

	- In the FreeBSD kernel:
		kpreempt(unused)	-> kern_yield(PRI_USER)

	- In userspace:
		kpreempt(unused)	-> sched_yield()

In userspace, nothing changes from this cleanup. In the kernels, the
function `fm_fini()` will now call `kern_yield(PRI_USER)` on FreeBSD and
`cond_resched()` on Linux.  This is instead of `pause("schedule", 1)` on
FreeBSD and `schedule()` on Linux. This makes our behavior consistent
across platforms.

Note that Linux's SPL continues to use `cond_resched()` and
`schedule()`.  However, those functions have been removed from both the
FreeBSD code and userspace code.

This should have the benefit of making it slightly easier to port the
code to new platforms by making how things should be mapped less
confusing.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13845
---
 include/os/freebsd/spl/sys/disp.h           | 2 ++
 include/os/freebsd/spl/sys/timer.h          | 2 --
 include/os/freebsd/zfs/sys/zfs_context_os.h | 2 --
 include/os/linux/spl/sys/disp.h             | 4 +++-
 include/sys/zfs_context.h                   | 5 +++--
 module/zfs/arc.c                            | 4 ++--
 module/zfs/dnode.c                          | 6 +++---
 module/zfs/fm.c                             | 2 +-
 module/zfs/spa_log_spacemap.c               | 2 +-
 9 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/include/os/freebsd/spl/sys/disp.h b/include/os/freebsd/spl/sys/disp.h
index 2be1b76e433..d46a7d2c014 100644
--- a/include/os/freebsd/spl/sys/disp.h
+++ b/include/os/freebsd/spl/sys/disp.h
@@ -31,6 +31,8 @@
 
 #include <sys/proc.h>
 
+#define	KPREEMPT_SYNC		(-1)
+
 #define	kpreempt(x)	kern_yield(PRI_USER)
 
 #endif	/* _OPENSOLARIS_SYS_DISP_H_ */
diff --git a/include/os/freebsd/spl/sys/timer.h b/include/os/freebsd/spl/sys/timer.h
index d4694bb7c09..7ff77e9b1b7 100644
--- a/include/os/freebsd/spl/sys/timer.h
+++ b/include/os/freebsd/spl/sys/timer.h
@@ -33,6 +33,4 @@
 #define	usleep_range(wakeup, wakeupepsilon)				   \
 	pause_sbt("usleep_range", ustosbt(wakeup), \
 	ustosbt(wakeupepsilon - wakeup), 0)
-
-#define	schedule() pause("schedule", 1)
 #endif
diff --git a/include/os/freebsd/zfs/sys/zfs_context_os.h b/include/os/freebsd/zfs/sys/zfs_context_os.h
index 86719950139..1ce72330412 100644
--- a/include/os/freebsd/zfs/sys/zfs_context_os.h
+++ b/include/os/freebsd/zfs/sys/zfs_context_os.h
@@ -45,8 +45,6 @@
 #define	HAVE_LARGE_STACKS	1
 #endif
 
-#define	cond_resched()		kern_yield(PRI_USER)
-
 #define	taskq_create_sysdc(a, b, d, e, p, dc, f) \
 	    ((void) sizeof (dc), taskq_create(a, b, maxclsyspri, d, e, f))
 
diff --git a/include/os/linux/spl/sys/disp.h b/include/os/linux/spl/sys/disp.h
index e106d3c5438..c8be6ffbf10 100644
--- a/include/os/linux/spl/sys/disp.h
+++ b/include/os/linux/spl/sys/disp.h
@@ -26,7 +26,9 @@
 
 #include <linux/preempt.h>
 
-#define	kpreempt(unused)	schedule()
+#define	KPREEMPT_SYNC		(-1)
+
+#define	kpreempt(unused)	cond_resched()
 #define	kpreempt_disable()	preempt_disable()
 #define	kpreempt_enable()	preempt_enable()
 
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index aa4f7878963..83ed97fbec7 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -219,7 +219,6 @@ typedef pthread_t	kthread_t;
 #define	TS_JOINABLE	0x00000004
 
 #define	curthread	((void *)(uintptr_t)pthread_self())
-#define	kpreempt(x)	yield()
 #define	getcomm()	"unknown"
 
 #define	thread_create_named(name, stk, stksize, func, arg, len, \
@@ -248,9 +247,11 @@ extern kthread_t *zk_thread_create(void (*func)(void *), void *arg,
 #define	issig(why)	(FALSE)
 #define	ISSIG(thr, why)	(FALSE)
 
+#define	KPREEMPT_SYNC		(-1)
+
+#define	kpreempt(x)		sched_yield()
 #define	kpreempt_disable()	((void)0)
 #define	kpreempt_enable()	((void)0)
-#define	cond_resched()		sched_yield()
 
 /*
  * Mutexes
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 980dc60d0cc..b9969bff534 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -4165,7 +4165,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
 	 * this CPU are able to make progress, make a voluntary preemption
 	 * call here.
 	 */
-	cond_resched();
+	kpreempt(KPREEMPT_SYNC);
 
 	return (bytes_evicted);
 }
@@ -10335,7 +10335,7 @@ l2arc_rebuild(l2arc_dev_t *dev)
 		    !dev->l2ad_first)
 			goto out;
 
-		cond_resched();
+		kpreempt(KPREEMPT_SYNC);
 		for (;;) {
 			mutex_enter(&l2arc_rebuild_thr_lock);
 			if (dev->l2ad_rebuild_cancel) {
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 67fe1e2c9a0..ef27dfd40af 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -1142,7 +1142,7 @@ dnode_free_interior_slots(dnode_t *dn)
 
 	while (!dnode_slots_tryenter(children, idx, slots)) {
 		DNODE_STAT_BUMP(dnode_free_interior_lock_retry);
-		cond_resched();
+		kpreempt(KPREEMPT_SYNC);
 	}
 
 	dnode_set_slots(children, idx, slots, DN_SLOT_FREE);
@@ -1423,7 +1423,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
 			dnode_slots_rele(dnc, idx, slots);
 			while (!dnode_slots_tryenter(dnc, idx, slots)) {
 				DNODE_STAT_BUMP(dnode_hold_alloc_lock_retry);
-				cond_resched();
+				kpreempt(KPREEMPT_SYNC);
 			}
 
 			/*
@@ -1478,7 +1478,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
 		dnode_slots_rele(dnc, idx, slots);
 		while (!dnode_slots_tryenter(dnc, idx, slots)) {
 			DNODE_STAT_BUMP(dnode_hold_free_lock_retry);
-			cond_resched();
+			kpreempt(KPREEMPT_SYNC);
 		}
 
 		if (!dnode_check_slots_free(dnc, idx, slots)) {
diff --git a/module/zfs/fm.c b/module/zfs/fm.c
index e7a7ad58324..bc13b5517c4 100644
--- a/module/zfs/fm.c
+++ b/module/zfs/fm.c
@@ -1354,7 +1354,7 @@ fm_fini(void)
 	zevent_flags |= ZEVENT_SHUTDOWN;
 	while (zevent_waiters > 0) {
 		mutex_exit(&zevent_lock);
-		schedule();
+		kpreempt(KPREEMPT_SYNC);
 		mutex_enter(&zevent_lock);
 	}
 	mutex_exit(&zevent_lock);
diff --git a/module/zfs/spa_log_spacemap.c b/module/zfs/spa_log_spacemap.c
index 19e334916bd..4ecce8214f6 100644
--- a/module/zfs/spa_log_spacemap.c
+++ b/module/zfs/spa_log_spacemap.c
@@ -1176,7 +1176,7 @@ spa_ld_log_sm_data(spa_t *spa)
 		}
 
 		/* Load TXG log spacemap into ms_unflushed_allocs/frees. */
-		cond_resched();
+		kpreempt(KPREEMPT_SYNC);
 		ASSERT0(sls->sls_nblocks);
 		sls->sls_nblocks = space_map_nblocks(sls->sls_sm);
 		spa->spa_unflushed_stats.sus_nblocks += sls->sls_nblocks;

From 13f2b8fb92c23090b9f6e701c8471aef6b8e917b Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Mon, 12 Sep 2022 14:22:15 -0400
Subject: [PATCH 26/69] Fix use-after-free in btree code

Coverty static analysis found these.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #10989
Closes #13861
---
 module/zfs/btree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/module/zfs/btree.c b/module/zfs/btree.c
index 14cab4054cb..60b063ed907 100644
--- a/module/zfs/btree.c
+++ b/module/zfs/btree.c
@@ -1608,8 +1608,8 @@ zfs_btree_remove_from_node(zfs_btree_t *tree, zfs_btree_core_t *node,
 	zfs_btree_poison_node_at(tree, keep_hdr, keep_hdr->bth_count, 1);
 
 	new_rm_hdr->bth_count = 0;
-	zfs_btree_node_destroy(tree, new_rm_hdr);
 	zfs_btree_remove_from_node(tree, parent, new_rm_hdr);
+	zfs_btree_node_destroy(tree, new_rm_hdr);
 }
 
 /* Remove the element at the specific location. */
@@ -1817,10 +1817,10 @@ zfs_btree_remove_idx(zfs_btree_t *tree, zfs_btree_index_t *where)
 
 	/* Move our elements to the left neighbor. */
 	bt_transfer_leaf(tree, rm, 0, rm_hdr->bth_count, keep, k_count + 1);
-	zfs_btree_node_destroy(tree, rm_hdr);
 
 	/* Remove the emptied node from the parent. */
 	zfs_btree_remove_from_node(tree, parent, rm_hdr);
+	zfs_btree_node_destroy(tree, rm_hdr);
 	zfs_btree_verify(tree);
 }
 

From 7195c04d986ecd26c25c13e3c180790a2e85a723 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Mon, 12 Sep 2022 15:34:10 -0400
Subject: [PATCH 27/69] Fix file descriptor handling in zdb_copy_object()

Coverity found a file descriptor leak. Eyeballing it showed that we had
no handling for the `open()` call failing either. We can address both of
these at once.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13862
---
 cmd/zdb/zdb.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 5389520e803..0fc4f0d0d1b 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -4737,6 +4737,8 @@ zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
 	}
 
 	int fd = open(destfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+	if (fd == -1)
+		return (errno);
 	/*
 	 * We cap the size at 1 mebibyte here to prevent
 	 * allocation failures and nigh-infinite printing if the
@@ -4746,6 +4748,7 @@ zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
 	offset = 0;
 	char *buf = kmem_alloc(oursize, KM_NOSLEEP);
 	if (buf == NULL) {
+		(void) close(fd);
 		return (ENOMEM);
 	}
 
@@ -4755,6 +4758,7 @@ zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
 		if (err != 0) {
 			(void) printf("got error %u from dmu_read\n", err);
 			kmem_free(buf, oursize);
+			(void) close(fd);
 			return (err);
 		}
 		if (dump_opt['v'] > 3) {

From e5327e7f9790ed7e884a7f8d9fa412632506b826 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Mon, 12 Sep 2022 15:51:17 -0400
Subject: [PATCH 28/69] vdev_draid_lookup_map() should not iterate outside
 draid_maps

Coverity reported this as an out-of-bounds read.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13865
---
 module/zfs/vdev_draid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module/zfs/vdev_draid.c b/module/zfs/vdev_draid.c
index 24ea5d2cbe1..032e8825a29 100644
--- a/module/zfs/vdev_draid.c
+++ b/module/zfs/vdev_draid.c
@@ -541,7 +541,7 @@ vdev_draid_generate_perms(const draid_map_t *map, uint8_t **permsp)
 int
 vdev_draid_lookup_map(uint64_t children, const draid_map_t **mapp)
 {
-	for (int i = 0; i <= VDEV_DRAID_MAX_MAPS; i++) {
+	for (int i = 0; i < VDEV_DRAID_MAX_MAPS; i++) {
 		if (draid_maps[i].dm_children == children) {
 			*mapp = &draid_maps[i];
 			return (0);

From 710fd1ded68491a164d85aedc69ffd4675ec5c59 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Mon, 12 Sep 2022 15:54:43 -0400
Subject: [PATCH 29/69] zpool_load_compat() should create strings of length
 ZFS_MAXPROPLEN

Otherwise, `strlcat()` can overflow them.

Coverity found this.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13866
---
 lib/libzfs/libzfs_pool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index 928f8b4287b..eea388cf348 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -4684,8 +4684,8 @@ zpool_load_compat(const char *compat, boolean_t *features, char *report,
 		for (uint_t i = 0; i < SPA_FEATURES; i++)
 			features[i] = B_TRUE;
 
-	char err_badfile[1024] = "";
-	char err_badtoken[1024] = "";
+	char err_badfile[ZFS_MAXPROPLEN] = "";
+	char err_badtoken[ZFS_MAXPROPLEN] = "";
 
 	/*
 	 * We ignore errors from the directory open()

From d5d10f2aef98e86d4873c435cdbd9b3ced447caf Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Tue, 13 Sep 2022 19:40:10 -0400
Subject: [PATCH 30/69] Cleanup dead spa_boot code

Unused code detected by coverity.

Reviewed-by: Allan Jude <allan@klarasystems.com>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13868
---
 include/Makefile.am                |  1 -
 include/sys/spa_boot.h             | 42 -------------------------
 lib/libzpool/Makefile.am           |  1 -
 module/Kbuild.in                   |  1 -
 module/Makefile.bsd                |  1 -
 module/os/freebsd/zfs/spa_os.c     |  1 -
 module/os/freebsd/zfs/zfs_vfsops.c |  1 -
 module/os/linux/zfs/zfs_vfsops.c   |  1 -
 module/zfs/spa.c                   |  1 -
 module/zfs/spa_boot.c              | 50 ------------------------------
 10 files changed, 100 deletions(-)
 delete mode 100644 include/sys/spa_boot.h
 delete mode 100644 module/zfs/spa_boot.c

diff --git a/include/Makefile.am b/include/Makefile.am
index 1a7f67e9c44..19726bba186 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -76,7 +76,6 @@ COMMON_H = \
 	sys/sa_impl.h \
 	sys/skein.h \
 	sys/spa.h \
-	sys/spa_boot.h \
 	sys/spa_checkpoint.h \
 	sys/spa_checksum.h \
 	sys/spa_impl.h \
diff --git a/include/sys/spa_boot.h b/include/sys/spa_boot.h
deleted file mode 100644
index 4a69efdda94..00000000000
--- a/include/sys/spa_boot.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_SPA_BOOT_H
-#define	_SYS_SPA_BOOT_H
-
-#include <sys/nvpair.h>
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-extern char *spa_get_bootprop(char *prop);
-extern void spa_free_bootprop(char *prop);
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif	/* _SYS_SPA_BOOT_H */
diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am
index eaa920e5610..0cc1997f7a9 100644
--- a/lib/libzpool/Makefile.am
+++ b/lib/libzpool/Makefile.am
@@ -121,7 +121,6 @@ nodist_libzpool_la_SOURCES = \
 	module/zfs/sha256.c \
 	module/zfs/skein_zfs.c \
 	module/zfs/spa.c \
-	module/zfs/spa_boot.c \
 	module/zfs/spa_checkpoint.c \
 	module/zfs/spa_config.c \
 	module/zfs/spa_errlog.c \
diff --git a/module/Kbuild.in b/module/Kbuild.in
index 4803952cbfe..7a20e6ee461 100644
--- a/module/Kbuild.in
+++ b/module/Kbuild.in
@@ -345,7 +345,6 @@ ZFS_OBJS := \
 	sha256.o \
 	skein_zfs.o \
 	spa.o \
-	spa_boot.o \
 	spa_checkpoint.o \
 	spa_config.o \
 	spa_errlog.o \
diff --git a/module/Makefile.bsd b/module/Makefile.bsd
index 050b6c21e5e..8829ad94213 100644
--- a/module/Makefile.bsd
+++ b/module/Makefile.bsd
@@ -271,7 +271,6 @@ SRCS+=	abd.c \
 	sha256.c \
 	skein_zfs.c \
 	spa.c \
-	spa_boot.c \
 	spa_checkpoint.c \
 	spa_config.c \
 	spa_errlog.c \
diff --git a/module/os/freebsd/zfs/spa_os.c b/module/os/freebsd/zfs/spa_os.c
index 251fafcc964..9bc61a6c8fe 100644
--- a/module/os/freebsd/zfs/spa_os.c
+++ b/module/os/freebsd/zfs/spa_os.c
@@ -58,7 +58,6 @@
 #include <sys/fs/zfs.h>
 #include <sys/arc.h>
 #include <sys/callb.h>
-#include <sys/spa_boot.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/dsl_scan.h>
 #include <sys/dmu_send.h>
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
index 4e4a5f8d215..8b60b34d85c 100644
--- a/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
@@ -62,7 +62,6 @@
 #include <sys/sunddi.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dir.h>
-#include <sys/spa_boot.h>
 #include <sys/jail.h>
 #include <ufs/ufs/quota.h>
 #include <sys/zfs_quota.h>
diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
index eac3dcb6a55..d0575fe5e98 100644
--- a/module/os/linux/zfs/zfs_vfsops.c
+++ b/module/os/linux/zfs/zfs_vfsops.c
@@ -56,7 +56,6 @@
 #include <sys/sunddi.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dir.h>
-#include <sys/spa_boot.h>
 #include <sys/objlist.h>
 #include <sys/zpl.h>
 #include <linux/vfs_compat.h>
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index b2b59af4294..eeec3b6be9c 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -81,7 +81,6 @@
 #include <sys/arc.h>
 #include <sys/callb.h>
 #include <sys/systeminfo.h>
-#include <sys/spa_boot.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/dsl_scan.h>
 #include <sys/zfeature.h>
diff --git a/module/zfs/spa_boot.c b/module/zfs/spa_boot.c
deleted file mode 100644
index fddb5c3c968..00000000000
--- a/module/zfs/spa_boot.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifdef _KERNEL
-
-#include <sys/zio.h>
-#include <sys/spa_boot.h>
-#include <sys/sunddi.h>
-
-char *
-spa_get_bootprop(char *propname)
-{
-	char *value;
-
-	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
-	    DDI_PROP_DONTPASS, propname, &value) != DDI_SUCCESS)
-		return (NULL);
-	return (value);
-}
-
-void
-spa_free_bootprop(char *value)
-{
-	ddi_prop_free(value);
-}
-
-#endif /* _KERNEL */

From 8fdc229a9cb6c7f5ba6cd8dc3b40a3c1355f66c5 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Tue, 13 Sep 2022 19:53:21 -0400
Subject: [PATCH 31/69] Fix memory leak in ztest

Coverity found this.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13863
---
 cmd/ztest.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cmd/ztest.c b/cmd/ztest.c
index 31b9990a1fc..847c3a5b06c 100644
--- a/cmd/ztest.c
+++ b/cmd/ztest.c
@@ -7966,6 +7966,7 @@ exec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp)
 				VERIFY3S(-1, !=,
 				    asprintf(&newlp, "%s:%s", libpath, curlp));
 				VERIFY0(setenv("LD_LIBRARY_PATH", newlp, 1));
+				free(newlp);
 			}
 		}
 		(void) execl(cmd, cmd, (char *)NULL);

From cf66e7e594fc7063db8050f2b7c718ae3f94641b Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Tue, 13 Sep 2022 19:59:33 -0400
Subject: [PATCH 32/69] Cleanup: Make memory barrier definitions consistent
 across kernels

We inherited membar_consumer() and membar_producer() from OpenSolaris,
but we had replaced membar_consumer() with Linux's smp_rmb() in
zfs_ioctl.c. The FreeBSD SPL consequently implemented a shim for the
Linux-only smp_rmb().

We reinstate membar_consumer() in platform independent code and fix the
FreeBSD SPL to implement membar_consumer() in a way analogous to Linux.

Reviewed-by: Konstantin Belousov <kib@FreeBSD.org>
Reviewed-by: Mateusz Guzik <mjguzik@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13843
---
 include/os/freebsd/linux/compiler.h | 1 -
 include/os/freebsd/spl/sys/atomic.h | 3 ++-
 include/os/linux/spl/sys/vmsystm.h  | 2 ++
 module/zfs/zfs_ioctl.c              | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/os/freebsd/linux/compiler.h b/include/os/freebsd/linux/compiler.h
index 3a66da19589..b408b77c746 100644
--- a/include/os/freebsd/linux/compiler.h
+++ b/include/os/freebsd/linux/compiler.h
@@ -83,7 +83,6 @@
 #define	__printf(a, b)			__printflike(a, b)
 
 #define	barrier()			__asm__ __volatile__("": : :"memory")
-#define	smp_rmb()		rmb()
 #define	___PASTE(a, b) a##b
 #define	__PASTE(a, b) ___PASTE(a, b)
 
diff --git a/include/os/freebsd/spl/sys/atomic.h b/include/os/freebsd/spl/sys/atomic.h
index 1a68bfc4de2..01b13fc9afd 100644
--- a/include/os/freebsd/spl/sys/atomic.h
+++ b/include/os/freebsd/spl/sys/atomic.h
@@ -57,7 +57,8 @@ extern uint64_t atomic_cas_64(volatile uint64_t *target, uint64_t cmp,
     uint64_t newval);
 #endif
 
-#define	membar_producer	atomic_thread_fence_rel
+#define	membar_consumer()		atomic_thread_fence_acq()
+#define	membar_producer()		atomic_thread_fence_rel()
 
 static __inline uint32_t
 atomic_add_32_nv(volatile uint32_t *target, int32_t delta)
diff --git a/include/os/linux/spl/sys/vmsystm.h b/include/os/linux/spl/sys/vmsystm.h
index b3f121ecf0c..fcd61e818fa 100644
--- a/include/os/linux/spl/sys/vmsystm.h
+++ b/include/os/linux/spl/sys/vmsystm.h
@@ -44,7 +44,9 @@
 #define	zfs_totalhigh_pages	totalhigh_pages
 #endif
 
+#define	membar_consumer()		smp_rmb()
 #define	membar_producer()		smp_wmb()
+
 #define	physmem				zfs_totalram_pages
 
 #define	xcopyin(from, to, size)		copy_from_user(to, from, size)
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 382975208b9..6b9b43271ba 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -7482,7 +7482,7 @@ zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
 
 	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
 		if (zs->zs_minor == minor) {
-			smp_rmb();
+			membar_consumer();
 			switch (which) {
 			case ZST_ONEXIT:
 				return (zs->zs_onexit);

From fcd7293d4e7852a99c5c57443d6799895e10bc9f Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Tue, 13 Sep 2022 20:00:53 -0400
Subject: [PATCH 33/69] Remove incorrect free() in zfs_get_pci_slots_sys_path()

Coverity found this. We attempted to free tmp, which is a pointer to a
string that should be freed by the caller.

Reviewed-by: Neal Gompa <ngompa@datto.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13864
---
 lib/libzutil/os/linux/zutil_device_path_os.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/libzutil/os/linux/zutil_device_path_os.c b/lib/libzutil/os/linux/zutil_device_path_os.c
index f081ef53da7..9f4c74f50f3 100644
--- a/lib/libzutil/os/linux/zutil_device_path_os.c
+++ b/lib/libzutil/os/linux/zutil_device_path_os.c
@@ -273,7 +273,6 @@ zfs_get_pci_slots_sys_path(const char *dev_name)
 			free(address2);
 			if (asprintf(&path, "/sys/bus/pci/slots/%s",
 			    ep->d_name) == -1) {
-				free(tmp);
 				continue;
 			}
 			break;

From d954ca19ba8b0c505e88a74a9681c4c81e7cfc57 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Tue, 13 Sep 2022 20:58:29 -0400
Subject: [PATCH 34/69] Fix theoretical "use-after-free" in
 dbuf_prefetch_indirect_done()

Coverity complains about a "use-after-free" bug in
`dbuf_prefetch_indirect_done()` because we use a pointer value after
freeing its buffer. The pointer is used for refcounting in ARC (as the
reference holder). There is a theoretical situation where the pointer
would be reused in a way that causes the refcounting to collide, so we
change the order in which we call arc_buf_destroy() and
dbuf_prefetch_fini() to match the rest of the function. This prevents
the theoretical situation from being a possibility.

Also, we have a few return statements with a value, despite this being a
void function. We clean those up while we are making changes here.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13869
---
 module/zfs/dbuf.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index b2d1b956878..80cab8177bc 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -3254,7 +3254,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
 
 	if (abuf == NULL) {
 		ASSERT(zio == NULL || zio->io_error != 0);
-		return (dbuf_prefetch_fini(dpa, B_TRUE));
+		dbuf_prefetch_fini(dpa, B_TRUE);
+		return;
 	}
 	ASSERT(zio == NULL || zio->io_error == 0);
 
@@ -3287,7 +3288,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
 		    dpa->dpa_curlevel, curblkid, FTAG);
 		if (db == NULL) {
 			arc_buf_destroy(abuf, private);
-			return (dbuf_prefetch_fini(dpa, B_TRUE));
+			dbuf_prefetch_fini(dpa, B_TRUE);
+			return;
 		}
 		(void) dbuf_read(db, NULL,
 		    DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_HAVESTRUCT);
@@ -3305,7 +3307,9 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
 	    dpa->dpa_dnode->dn_objset->os_dsl_dataset,
 	    SPA_FEATURE_REDACTED_DATASETS));
 	if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) {
+		arc_buf_destroy(abuf, private);
 		dbuf_prefetch_fini(dpa, B_TRUE);
+		return;
 	} else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) {
 		ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid);
 		dbuf_issue_final_prefetch(dpa, bp);

From 4a6e8b99f5171705466b5a9542b47a935cad793d Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Wed, 14 Sep 2022 15:50:03 -0400
Subject: [PATCH 35/69] Add assertion to dsl_dataset_set_compression_sync

Coverity pointed out that if we somehow receive SPA_FEATURE_NONE, we
will use a negative number as an array index. A defensive assertion
seems appropriate.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Neal Gompa <ngompa@datto.com>
Reviewed-by: Allan Jude <allan@klarasystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13872
---
 module/zfs/dsl_dataset.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index 44da6a3f0d4..94b77aa1b74 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -4519,6 +4519,7 @@ dsl_dataset_set_compression_sync(void *arg, dmu_tx_t *tx)
 
 	uint64_t compval = ZIO_COMPRESS_ALGO(ddsca->ddsca_value);
 	spa_feature_t f = zio_compress_to_feature(compval);
+	ASSERT3S(f, !=, SPA_FEATURE_NONE);
 	ASSERT3S(spa_feature_table[f].fi_type, ==, ZFEATURE_TYPE_BOOLEAN);
 
 	VERIFY0(dsl_dataset_hold(dp, ddsca->ddsca_name, FTAG, &ds));

From ccec88f11a44746f78f88aac90f5172a52e04506 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Wed, 14 Sep 2022 15:51:55 -0400
Subject: [PATCH 36/69] FreeBSD: Fix integer conversion for
 vnlru_free{,_vfsops}()

When reviewing #13875, I noticed that our FreeBSD code has an issue
where it converts from `int64_t` to `int` when calling
`vnlru_free{,_vfsops}()`. The result is that if the int64_t is `1 <<
36`, the int will be 0, since the low bits are 0. Even when some low
bits are set, a value such as `((1 << 36) + 1)` would truncate to 1,
which is wrong.

There is protection against this on 32-bit platforms, but on 64-bit
platforms, there is no check to protect us, so we add a check.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13882
---
 module/os/freebsd/zfs/arc_os.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/module/os/freebsd/zfs/arc_os.c b/module/os/freebsd/zfs/arc_os.c
index b4833adedcc..f1a3a0fafa9 100644
--- a/module/os/freebsd/zfs/arc_os.c
+++ b/module/os/freebsd/zfs/arc_os.c
@@ -142,6 +142,12 @@ arc_prune_task(void *arg)
 	int64_t nr_scan = (intptr_t)arg;
 
 	arc_reduce_target_size(ptob(nr_scan));
+
+#ifndef __ILP32__
+	if (nr_scan > INT_MAX)
+		nr_scan = INT_MAX;
+#endif
+
 #if __FreeBSD_version >= 1300139
 	sx_xlock(&arc_vnlru_lock);
 	vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);

From 6f8602a5ede2c156f41630ba687701262f1350d6 Mon Sep 17 00:00:00 2001
From: George Melikov <mail@gmelikov.ru>
Date: Thu, 15 Sep 2022 02:26:57 +0300
Subject: [PATCH 37/69] CI: revert `--with-config=dist` to hotfix Ubuntu 20.04

Recently Github action runners started to fail on kmod build.
Revert --with-config=dist from ./configure section of github
runners to stabilize CI for now.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: George Melikov <mail@gmelikov.ru>
Closes #13894
---
 .github/workflows/zfs-tests-functional.yml | 2 +-
 .github/workflows/zfs-tests-sanity.yml     | 2 +-
 .github/workflows/zloop.yml                | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/zfs-tests-functional.yml b/.github/workflows/zfs-tests-functional.yml
index 328cb97f10e..0273610af04 100644
--- a/.github/workflows/zfs-tests-functional.yml
+++ b/.github/workflows/zfs-tests-functional.yml
@@ -28,7 +28,7 @@ jobs:
         ./autogen.sh
     - name: Configure
       run: |
-        ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan --with-config=dist
+        ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan
     - name: Make
       run: |
         make -j$(nproc) --no-print-directory --silent pkg-utils pkg-kmod
diff --git a/.github/workflows/zfs-tests-sanity.yml b/.github/workflows/zfs-tests-sanity.yml
index 4c15cecf58d..73606f909e1 100644
--- a/.github/workflows/zfs-tests-sanity.yml
+++ b/.github/workflows/zfs-tests-sanity.yml
@@ -24,7 +24,7 @@ jobs:
         ./autogen.sh
     - name: Configure
       run: |
-        ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan --with-config=dist
+        ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan
     - name: Make
       run: |
         make -j$(nproc) --no-print-directory --silent pkg-utils pkg-kmod
diff --git a/.github/workflows/zloop.yml b/.github/workflows/zloop.yml
index 64fe96a3ab6..d49eeae1653 100644
--- a/.github/workflows/zloop.yml
+++ b/.github/workflows/zloop.yml
@@ -23,7 +23,7 @@ jobs:
         ./autogen.sh
     - name: Configure
       run: |
-        ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan --with-config=dist
+        ./configure --enable-debug --enable-debuginfo --enable-asan --enable-ubsan
     - name: Make
       run: |
         make -j$(nproc) --no-print-directory --silent pkg-utils pkg-kmod

From fd8c3012b3eedc6eed3dda67bf71cfb243400128 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Thu, 15 Sep 2022 14:46:42 -0400
Subject: [PATCH 38/69] Fix use-after-free bugs in icp code

These were reported by Coverity as "Read from pointer after free" bugs.
Presumably, it did not report it as a use-after-free bug because it does
not understand the inline assembly that implements the atomic
instruction.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13881
---
 module/icp/core/kcf_mech_tabs.c | 2 +-
 module/icp/core/kcf_prov_tabs.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/module/icp/core/kcf_mech_tabs.c b/module/icp/core/kcf_mech_tabs.c
index 3d5063b28f6..41705e84bc4 100644
--- a/module/icp/core/kcf_mech_tabs.c
+++ b/module/icp/core/kcf_mech_tabs.c
@@ -342,8 +342,8 @@ kcf_remove_mech_provider(const char *mech_name, kcf_provider_desc_t *prov_desc)
 	mech_entry->me_sw_prov = NULL;
 
 	/* free entry  */
-	KCF_PROV_REFRELE(prov_mech->pm_prov_desc);
 	KCF_PROV_IREFRELE(prov_mech->pm_prov_desc);
+	KCF_PROV_REFRELE(prov_mech->pm_prov_desc);
 	kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t));
 }
 
diff --git a/module/icp/core/kcf_prov_tabs.c b/module/icp/core/kcf_prov_tabs.c
index 865d4e19c6e..93af61a235d 100644
--- a/module/icp/core/kcf_prov_tabs.c
+++ b/module/icp/core/kcf_prov_tabs.c
@@ -158,8 +158,8 @@ kcf_prov_tab_rem_provider(crypto_provider_id_t prov_id)
 	 * at that time.
 	 */
 
-	KCF_PROV_REFRELE(prov_desc);
 	KCF_PROV_IREFRELE(prov_desc);
+	KCF_PROV_REFRELE(prov_desc);
 
 	return (CRYPTO_SUCCESS);
 }

From 621a7ebe5818033527d67564e538f1dc0caf5e22 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Thu, 15 Sep 2022 14:50:19 -0400
Subject: [PATCH 39/69] Add coverity model to repository

Other projects such as the python project include their coverity models
in their repositories. This provides transparency, which is beneficial
in open source projects. Therefore, it is a good idea to include the
coverity model in our repository too.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13884
---
 contrib/coverity/model.c | 407 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 407 insertions(+)
 create mode 100644 contrib/coverity/model.c

diff --git a/contrib/coverity/model.c b/contrib/coverity/model.c
new file mode 100644
index 00000000000..ee2d01e7f3c
--- /dev/null
+++ b/contrib/coverity/model.c
@@ -0,0 +1,407 @@
+/*
+ * Coverity Scan model
+ * https://scan.coverity.com/models
+ *
+ * This is a modeling file for Coverity Scan.
+ * Modeling helps to avoid false positives.
+ *
+ * - Modeling doesn't need full structs and typedefs. Rudimentary structs
+ *   and similar types are sufficient.
+ * - An uninitialized local pointer is not an error. It signifies that the
+ *   variable could be either NULL or have some data.
+ *
+ * Coverity Scan doesn't pick up modifications automatically. The model file
+ * must be uploaded by an admin in the analysis settings.
+ *
+ * Some of this initially cribbed from:
+ *
+ * https://github.com/kees/coverity-linux/blob/trunk/model.c
+ *
+ * The below model was based on the original model by Brian Behlendorf for the
+ * original zfsonlinux/zfs repository. Some inspiration was taken from
+ * kees/coverity-linux, specifically involving memory copies.
+ */
+
+#include <stdarg.h>
+
+#define	UMEM_DEFAULT		0x0000  /* normal -- may fail */
+#define	UMEM_NOFAIL		0x0100  /* Never fails */
+
+#define	NULL	(0)
+
+int condition0, condition1;
+
+void
+abort()
+{
+	__coverity_panic__();
+}
+
+void
+exit(int status)
+{
+	(void) status;
+
+	__coverity_panic__();
+}
+
+void
+_exit(int status)
+{
+	(void) status;
+
+	__coverity_panic__();
+}
+
+void
+zed_log_die(const char *fmt, ...)
+{
+	__coverity_format_string_sink__(fmt);
+	__coverity_panic__();
+}
+
+void
+panic(const char *fmt, ...)
+{
+	__coverity_format_string_sink__(fmt);
+	__coverity_panic__();
+}
+
+void
+vpanic(const char *fmt, va_list adx)
+{
+	(void) fmt;
+	(void) adx;
+
+	__coverity_panic__();
+}
+
+int
+ddi_copyin(const void *from, void *to, size_t len, int flags)
+{
+	__coverity_tainted_data_argument__(from);
+	__coverity_tainted_data_argument__(to);
+	__coverity_writeall__(to);
+}
+
+void *
+memset(void *dst, int c, size_t len)
+{
+	__coverity_writeall__(dst);
+	return (dst);
+}
+
+void *
+memmove(void *dst, void *src, size_t len)
+{
+	__coverity_writeall__(dst);
+	return (dst);
+}
+
+void *
+memcpy(void *dst, void *src, size_t len)
+{
+	__coverity_writeall__(dst);
+	return (dst);
+}
+
+void *
+umem_alloc_aligned(size_t size, size_t align, int kmflags)
+{
+	(void) align;
+
+	if (UMEM_NOFAIL & kmflags == UMEM_NOFAIL)
+		return (__coverity_alloc__(size));
+	else if (condition0)
+		return (__coverity_alloc__(size));
+	else
+		return (NULL);
+}
+
+void *
+umem_alloc(size_t size, int kmflags)
+{
+	if (UMEM_NOFAIL & kmflags == UMEM_NOFAIL)
+		return (__coverity_alloc__(size));
+	else if (condition0)
+		return (__coverity_alloc__(size));
+	else
+		return (NULL);
+}
+
+void *
+umem_zalloc(size_t size, int kmflags)
+{
+	if (UMEM_NOFAIL & kmflags == UMEM_NOFAIL)
+		return (__coverity_alloc__(size));
+	else if (condition0)
+		return (__coverity_alloc__(size));
+	else
+		return (NULL);
+}
+
+void
+umem_free(void *buf, size_t size)
+{
+	(void) size;
+
+	__coverity_free__(buf);
+}
+
+void *
+spl_kmem_alloc(size_t sz, int fl, const char *func, int line)
+{
+	(void) func;
+	(void) line;
+
+	if (condition1)
+		__coverity_sleep__();
+
+	if (fl == 0) {
+		return (__coverity_alloc__(sz));
+	} else if (condition0)
+		return (__coverity_alloc__(sz));
+	else
+		return (NULL);
+}
+
+void *
+spl_kmem_zalloc(size_t sz, int fl, const char *func, int line)
+{
+	(void) func;
+	(void) line;
+
+	if (condition1)
+		__coverity_sleep__();
+
+	if (fl == 0) {
+		return (__coverity_alloc__(sz));
+	} else if (condition0)
+		return (__coverity_alloc__(sz));
+	else
+		return (NULL);
+}
+
+void
+spl_kmem_free(const void *ptr, size_t sz)
+{
+	(void) sz;
+
+	__coverity_free__(ptr);
+}
+
+typedef struct {} spl_kmem_cache_t;
+
+void *
+spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
+{
+	(void) skc;
+
+	if (condition1)
+		__coverity_sleep__();
+
+	if (flags == 0) {
+		return (__coverity_alloc_nosize__());
+	} else if (condition0)
+		return (__coverity_alloc_nosize__());
+	else
+		return (NULL);
+}
+
+void
+spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
+{
+	(void) skc;
+
+	__coverity_free__(obj);
+}
+
+void
+malloc(size_t size)
+{
+	__coverity_alloc__(size);
+}
+
+void
+free(void *buf)
+{
+	__coverity_free__(buf);
+}
+
+int
+spl_panic(const char *file, const char *func, int line, const char *fmt, ...)
+{
+	__coverity_format_string_sink__(fmt);
+	__coverity_panic__();
+}
+
+int
+sched_yield(void)
+{
+	__coverity_sleep__();
+}
+
+typedef struct {} kmutex_t;
+typedef struct {} krwlock_t;
+typedef int krw_t;
+
+/*
+ * Coverty reportedly does not support macros, so this only works for
+ * userspace.
+ */
+
+void
+mutex_enter(kmutex_t *mp)
+{
+	if (condition0)
+		__coverity_sleep__();
+
+	__coverity_exclusive_lock_acquire__(mp);
+}
+
+int
+mutex_tryenter(kmutex_t *mp)
+{
+	if (condition0) {
+		__coverity_exclusive_lock_acquire__(mp);
+		return (1);
+	}
+
+	return (0);
+}
+
+void
+mutex_exit(kmutex_t *mp)
+{
+	__coverity_exclusive_lock_release__(mp);
+}
+
+void
+rw_enter(krwlock_t *rwlp, krw_t rw)
+{
+	(void) rw;
+
+	if (condition0)
+		__coverity_sleep__();
+
+	__coverity_recursive_lock_acquire__(rwlp);
+}
+
+void
+rw_exit(krwlock_t *rwlp)
+{
+	__coverity_recursive_lock_release__(rwlp);
+
+}
+
+int
+rw_tryenter(krwlock_t *rwlp, krw_t rw)
+{
+	if (condition0) {
+		__coverity_recursive_lock_acquire__(rwlp);
+		return (1);
+	}
+
+	return (0);
+}
+
+/* Thus, we fallback to the Linux kernel locks */
+struct {} mutex;
+struct {} rw_semaphore;
+
+void
+mutex_lock(struct mutex *lock)
+{
+	if (condition0) {
+		__coverity_sleep__();
+	}
+	__coverity_exclusive_lock_acquire__(lock);
+}
+
+void
+mutex_unlock(struct mutex *lock)
+{
+	__coverity_exclusive_lock_release__(lock);
+}
+
+void
+down_read(struct rw_semaphore *sem)
+{
+	if (condition0) {
+		__coverity_sleep__();
+	}
+	__coverity_recursive_lock_acquire__(sem);
+}
+
+void
+down_write(struct rw_semaphore *sem)
+{
+	if (condition0) {
+		__coverity_sleep__();
+	}
+	__coverity_recursive_lock_acquire__(sem);
+}
+
+int
+down_read_trylock(struct rw_semaphore *sem)
+{
+	if (condition0) {
+		__coverity_recursive_lock_acquire__(sem);
+		return (1);
+	}
+
+	return (0);
+}
+
+int
+down_write_trylock(struct rw_semaphore *sem)
+{
+	if (condition0) {
+		__coverity_recursive_lock_acquire__(sem);
+		return (1);
+	}
+
+	return (0);
+}
+
+void
+up_read(struct rw_semaphore *sem)
+{
+	__coverity_recursive_lock_release__(sem);
+}
+
+void
+up_write(struct rw_semaphore *sem)
+{
+	__coverity_recursive_lock_release__(sem);
+}
+
+int
+__cond_resched(void)
+{
+	if (condition0) {
+		__coverity_sleep__();
+	}
+}
+
+/*
+ * An endian-independent filesystem must support doing byte swaps on data. We
+ * attempt to suppress taint warnings, which are false positives for us.
+ */
+void
+byteswap_uint64_array(void *vbuf, size_t size)
+{
+	__coverity_tainted_data_sanitize__(vbuf);
+}
+
+void
+byteswap_uint32_array(void *vbuf, size_t size)
+{
+	__coverity_tainted_data_sanitize__(vbuf);
+}
+
+void
+byteswap_uint16_array(void *vbuf, size_t size)
+{
+	__coverity_tainted_data_sanitize__(vbuf);
+}

From dc2fe24ca22392a589fbafdf15e4c32f42442006 Mon Sep 17 00:00:00 2001
From: John Wren Kennedy <john.kennedy@delphix.com>
Date: Thu, 15 Sep 2022 14:14:35 -0600
Subject: [PATCH 40/69] ZTS: parameter expansion in zfs_unshare_006_pos

zfs_unshare_006 checks to see if a dataset still has an active SMB
share after doing an NFS unshare -a. The test could fail because the
check for the SMB share does not expect dashes in a dataset name to be
converted to underscores as pathname delimiters are.

Reviewed-by: Tony Nguyen <tony.nguyen@delphix.com>
Signed-off-by: John Kennedy <john.kennedy@delphix.com>
Closes #13893
---
 tests/zfs-tests/include/libtest.shlib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib
index 435dcb81c3c..d163fc7c8cc 100644
--- a/tests/zfs-tests/include/libtest.shlib
+++ b/tests/zfs-tests/include/libtest.shlib
@@ -1265,7 +1265,7 @@ function is_shared_smb
 	datasetexists "$fs" || return
 
 	if is_linux; then
-		net usershare list | grep -xFq "${fs//\//_}"
+		net usershare list | grep -xFq "${fs//[-\/]/_}"
 	else
 		log_note "SMB on $UNAME currently unsupported by the test framework"
 		return 1

From e949d36040e5e79fe0dfda6a33451111cc5a0476 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Thu, 15 Sep 2022 16:24:00 -0400
Subject: [PATCH 41/69] Fix assertions in crypto reference helpers

The assertions are racy and the use of `membar_exit()` did nothing to
fix that.

The helpers use atomic functions, so we cleverly get values from the
atomics that we can use to ensure that the assertions operate on the
correct values.

We also use `membar_producer()` prior to decrementing reference counts
so that operations that happened prior to a decrement to 0 will be
guaranteed to happen before the decrement on architectures that reorder
atomics.

This also slightly improves performance by eliminating unnecessary
reads, although I doubt it would be measurable in any benchmark.

Reviewed-by: Mateusz Guzik <mjguzik@gmail.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13880
---
 module/icp/include/sys/crypto/impl.h       | 42 +++++++++++-----------
 module/icp/include/sys/crypto/sched_impl.h |  7 ++--
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/module/icp/include/sys/crypto/impl.h b/module/icp/include/sys/crypto/impl.h
index 32ac43475a3..4d17221ea9a 100644
--- a/module/icp/include/sys/crypto/impl.h
+++ b/module/icp/include/sys/crypto/impl.h
@@ -126,28 +126,26 @@ typedef struct kcf_provider_desc {
 	crypto_provider_id_t		pd_prov_id;
 } kcf_provider_desc_t;
 
-/* atomic operations in linux implicitly form a memory barrier */
-#define	membar_exit()
-
 /*
  * If a component has a reference to a kcf_provider_desc_t,
  * it REFHOLD()s. A new provider descriptor which is referenced only
  * by the providers table has a reference counter of one.
  */
-#define	KCF_PROV_REFHOLD(desc) {		\
-	atomic_add_32(&(desc)->pd_refcnt, 1);	\
-	ASSERT((desc)->pd_refcnt != 0);		\
+#define	KCF_PROV_REFHOLD(desc) {				\
+	int newval = atomic_add_32_nv(&(desc)->pd_refcnt, 1);	\
+	ASSERT(newval != 0);					\
 }
 
-#define	KCF_PROV_IREFHOLD(desc) {		\
-	atomic_add_32(&(desc)->pd_irefcnt, 1);	\
-	ASSERT((desc)->pd_irefcnt != 0);	\
+#define	KCF_PROV_IREFHOLD(desc) {				\
+	int newval = atomic_add_32_nv(&(desc)->pd_irefcnt, 1);	\
+	ASSERT(newval != 0);					\
 }
 
 #define	KCF_PROV_IREFRELE(desc) {				\
-	ASSERT((desc)->pd_irefcnt != 0);			\
-	membar_exit();						\
-	if (atomic_add_32_nv(&(desc)->pd_irefcnt, -1) == 0) {	\
+	membar_producer();					\
+	int newval = atomic_add_32_nv(&(desc)->pd_irefcnt, -1);	\
+	ASSERT(newval != -1);					\
+	if (newval == 0) {					\
 		cv_broadcast(&(desc)->pd_remove_cv);		\
 	}							\
 }
@@ -155,9 +153,10 @@ typedef struct kcf_provider_desc {
 #define	KCF_PROV_REFHELD(desc)	((desc)->pd_refcnt >= 1)
 
 #define	KCF_PROV_REFRELE(desc) {				\
-	ASSERT((desc)->pd_refcnt != 0);				\
-	membar_exit();						\
-	if (atomic_add_32_nv(&(desc)->pd_refcnt, -1) == 0) {	\
+	membar_producer();					\
+	int newval = atomic_add_32_nv(&(desc)->pd_refcnt, -1);	\
+	ASSERT(newval != -1);					\
+	if (newval == 0) {					\
 		kcf_provider_zero_refcnt((desc));		\
 	}							\
 }
@@ -193,9 +192,9 @@ typedef	struct kcf_mech_entry {
  * it REFHOLD()s. A new policy descriptor which is referenced only
  * by the policy table has a reference count of one.
  */
-#define	KCF_POLICY_REFHOLD(desc) {		\
-	atomic_add_32(&(desc)->pd_refcnt, 1);	\
-	ASSERT((desc)->pd_refcnt != 0);		\
+#define	KCF_POLICY_REFHOLD(desc) {				\
+	int newval = atomic_add_32_nv(&(desc)->pd_refcnt, 1);	\
+	ASSERT(newval != 0);					\
 }
 
 /*
@@ -203,9 +202,10 @@ typedef	struct kcf_mech_entry {
  * reference is released, the descriptor is freed.
  */
 #define	KCF_POLICY_REFRELE(desc) {				\
-	ASSERT((desc)->pd_refcnt != 0);				\
-	membar_exit();						\
-	if (atomic_add_32_nv(&(desc)->pd_refcnt, -1) == 0)	\
+	membar_producer();					\
+	int newval = atomic_add_32_nv(&(desc)->pd_refcnt, -1);	\
+	ASSERT(newval != -1);					\
+	if (newval == 0)					\
 		kcf_policy_free_desc(desc);			\
 }
 
diff --git a/module/icp/include/sys/crypto/sched_impl.h b/module/icp/include/sys/crypto/sched_impl.h
index 1989d5244e2..355c1a87faa 100644
--- a/module/icp/include/sys/crypto/sched_impl.h
+++ b/module/icp/include/sys/crypto/sched_impl.h
@@ -73,9 +73,10 @@ typedef struct kcf_context {
  * context structure is freed along with the global context.
  */
 #define	KCF_CONTEXT_REFRELE(ictx) {				\
-	ASSERT((ictx)->kc_refcnt != 0);				\
-	membar_exit();						\
-	if (atomic_add_32_nv(&(ictx)->kc_refcnt, -1) == 0)	\
+	membar_producer();					\
+	int newval = atomic_add_32_nv(&(ictx)->kc_refcnt, -1);	\
+	ASSERT(newval != -1);					\
+	if (newval == 0)					\
 		kcf_free_context(ictx);				\
 }
 

From fa22ec569c093d5583a7f406ba0a9bb223eae436 Mon Sep 17 00:00:00 2001
From: Mateusz Piotrowski <0mp@FreeBSD.org>
Date: Thu, 15 Sep 2022 23:22:00 +0200
Subject: [PATCH 42/69] Use correct mdoc macros for arguments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Signed-off-by: Mateusz Piotrowski <0mp@FreeBSD.org>
Closes #13890
---
 man/man7/zpoolprops.7 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/man7/zpoolprops.7 b/man/man7/zpoolprops.7
index a150f6d4370..2164c126011 100644
--- a/man/man7/zpoolprops.7
+++ b/man/man7/zpoolprops.7
@@ -177,7 +177,7 @@ changed with the
 .Nm zpool Cm set
 command:
 .Bl -tag -width Ds
-.It Sy ashift Ns = Ns Sy ashift
+.It Sy ashift Ns = Ns Ar ashift
 Pool sector size exponent, to the power of
 .Sy 2
 (internally referred to as

From ddb1fd91c0dbf64847235ee65e50e87c43257b05 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Thu, 15 Sep 2022 19:21:21 -0400
Subject: [PATCH 43/69] Fix incorrect size given to bqueue_enqueue() call in
 dmu_redact.c

We pass sizeof (struct redact_record *) rather than sizeof (struct
redact_record). Passing the pointer size is wrong.

Coverity caught this in two places.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13885
---
 module/zfs/dmu_redact.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/module/zfs/dmu_redact.c b/module/zfs/dmu_redact.c
index 09ca7d509ce..7afcc123134 100644
--- a/module/zfs/dmu_redact.c
+++ b/module/zfs/dmu_redact.c
@@ -142,7 +142,7 @@ record_merge_enqueue(bqueue_t *q, struct redact_record **build,
 {
 	if (new->eos_marker) {
 		if (*build != NULL)
-			bqueue_enqueue(q, *build, sizeof (*build));
+			bqueue_enqueue(q, *build, sizeof (**build));
 		bqueue_enqueue_flush(q, new, sizeof (*new));
 		return;
 	}
@@ -824,7 +824,7 @@ perform_thread_merge(bqueue_t *q, uint32_t num_threads,
 	avl_destroy(&end_tree);
 	kmem_free(redact_nodes, num_threads * sizeof (*redact_nodes));
 	if (current_record != NULL)
-		bqueue_enqueue(q, current_record, sizeof (current_record));
+		bqueue_enqueue(q, current_record, sizeof (*current_record));
 	return (err);
 }
 

From b24d1c77f7fc53d26ee915b5203a139f13fd9791 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Thu, 15 Sep 2022 19:22:33 -0400
Subject: [PATCH 44/69] Add zfs_btree_verify_intensity kernel module parameter

I see a few issues in the issue tracker that might be aided by being
able to turn this on. We have no module parameter for it, so I would
like to add one.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13874
---
 cmd/zdb/zdb.c      |  2 +-
 man/man4/zfs.4     | 16 ++++++++++++++++
 module/zfs/btree.c |  8 +++++++-
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 0fc4f0d0d1b..92df3dd167b 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -121,7 +121,7 @@ extern int zfs_vdev_async_read_max_active;
 extern boolean_t spa_load_verify_dryrun;
 extern boolean_t spa_mode_readable_spacemaps;
 extern int zfs_reconstruct_indirect_combinations_max;
-extern int zfs_btree_verify_intensity;
+extern uint_t zfs_btree_verify_intensity;
 
 static const char cmdname[] = "zdb";
 uint8_t dump_opt[256];
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index cecaf7e7f0a..b2f3e7c61fb 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -1354,6 +1354,22 @@ _
 .TE
 .Sy \& * No Requires debug build.
 .
+.It Sy zfs_btree_verify_intensity Ns = Ns Sy 0 Pq uint
+Enables btree verification.
+The following settings are culminative:
+.TS
+box;
+lbz r l l .
+	Value	Description
+
+	1	Verify height.
+	2	Verify pointers from children to parent.
+	3	Verify element counts.
+	4	Verify element order. (expensive)
+*	5	Verify unused memory is poisoned. (expensive)
+.TE
+.Sy \& * No Requires debug build.
+.
 .It Sy zfs_free_leak_on_eio Ns = Ns Sy 0 Ns | Ns 1 Pq int
 If destroy encounters an
 .Sy EIO
diff --git a/module/zfs/btree.c b/module/zfs/btree.c
index 60b063ed907..f0a9222a430 100644
--- a/module/zfs/btree.c
+++ b/module/zfs/btree.c
@@ -53,7 +53,7 @@ kmem_cache_t *zfs_btree_leaf_cache;
  * (while the asymptotic complexity of the other steps is the same, the
  * importance of the constant factors cannot be denied).
  */
-int zfs_btree_verify_intensity = 0;
+uint_t zfs_btree_verify_intensity = 0;
 
 /*
  * Convenience functions to silence warnings from memcpy/memmove's
@@ -2171,3 +2171,9 @@ zfs_btree_verify(zfs_btree_t *tree)
 		return;
 	zfs_btree_verify_poison(tree);
 }
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, zfs_, btree_verify_intensity, UINT, ZMOD_RW,
+	"Enable btree verification. Levels above 4 require ZFS be built "
+	"with debugging");
+/* END CSTYLED */

From 768eacedef54922962562e601ca2c3366c4bcc4b Mon Sep 17 00:00:00 2001
From: Chunwei Chen <tuxoko@gmail.com>
Date: Fri, 16 Sep 2022 13:36:47 -0700
Subject: [PATCH 45/69] zfs_enter rework

Replace ZFS_ENTER and ZFS_VERIFY_ZP, which have hidden returns, with
functions that return error code. The reason we want to do this is
because hidden returns are not obvious and had caused some missing fail
path unwinding.

This patch changes the common, linux, and freebsd parts. Also fixes
fail path unwinding in zfs_fsync, zpl_fsync, zpl_xattr_{list,get,set}, and
zfs_lookup().

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Chunwei Chen <david.chen@nutanix.com>
Closes #13831
---
 include/os/freebsd/zfs/sys/zfs_znode_impl.h |  35 +-
 include/os/linux/zfs/sys/zfs_znode_impl.h   |  60 ++--
 include/sys/zfs_znode.h                     |  23 ++
 module/os/freebsd/zfs/zfs_ctldir.c          |  14 +-
 module/os/freebsd/zfs/zfs_vfsops.c          |  48 +--
 module/os/freebsd/zfs/zfs_vnops_os.c        | 369 +++++++++++---------
 module/os/linux/zfs/zfs_acl.c               |   5 +-
 module/os/linux/zfs/zfs_ctldir.c            |  44 ++-
 module/os/linux/zfs/zfs_vfsops.c            |  36 +-
 module/os/linux/zfs/zfs_vnops_os.c          | 300 ++++++++--------
 module/os/linux/zfs/zpl_ctldir.c            |  30 +-
 module/os/linux/zfs/zpl_file.c              |  18 +-
 module/os/linux/zfs/zpl_super.c             |   6 +-
 module/os/linux/zfs/zpl_xattr.c             |  21 +-
 module/zfs/zfs_vnops.c                      |  68 ++--
 15 files changed, 591 insertions(+), 486 deletions(-)

diff --git a/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/include/os/freebsd/zfs/sys/zfs_znode_impl.h
index f76a841472f..41a5bb218c1 100644
--- a/include/os/freebsd/zfs/sys/zfs_znode_impl.h
+++ b/include/os/freebsd/zfs/sys/zfs_znode_impl.h
@@ -121,29 +121,24 @@ typedef struct zfs_soft_state {
 #define	zn_rlimit_fsize(zp, uio) \
     vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio))
 
-#define	ZFS_ENTER_ERROR(zfsvfs, error) do {			\
-	ZFS_TEARDOWN_ENTER_READ((zfsvfs), FTAG);		\
-	if (__predict_false((zfsvfs)->z_unmounted)) {		\
-		ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG);		\
-		return (error);					\
-	}							\
-} while (0)
-
 /* Called on entry to each ZFS vnode and vfs operation  */
-#define	ZFS_ENTER(zfsvfs)	ZFS_ENTER_ERROR(zfsvfs, EIO)
+static inline int
+zfs_enter(zfsvfs_t *zfsvfs, const char *tag)
+{
+	ZFS_TEARDOWN_ENTER_READ(zfsvfs, tag);
+	if (__predict_false((zfsvfs)->z_unmounted)) {
+		ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag);
+		return (SET_ERROR(EIO));
+	}
+	return (0);
+}
 
 /* Must be called before exiting the vop */
-#define	ZFS_EXIT(zfsvfs)	ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG)
-
-#define	ZFS_VERIFY_ZP_ERROR(zp, error) do {			\
-	if (__predict_false((zp)->z_sa_hdl == NULL)) {		\
-		ZFS_EXIT((zp)->z_zfsvfs);			\
-		return (error);					\
-	}							\
-} while (0)
-
-/* Verifies the znode is valid */
-#define	ZFS_VERIFY_ZP(zp)	ZFS_VERIFY_ZP_ERROR(zp, EIO)
+static inline void
+zfs_exit(zfsvfs_t *zfsvfs, const char *tag)
+{
+	ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag);
+}
 
 /*
  * Macros for dealing with dmu_buf_hold
diff --git a/include/os/linux/zfs/sys/zfs_znode_impl.h b/include/os/linux/zfs/sys/zfs_znode_impl.h
index a6fa06a3f1a..52568781011 100644
--- a/include/os/linux/zfs/sys/zfs_znode_impl.h
+++ b/include/os/linux/zfs/sys/zfs_znode_impl.h
@@ -84,39 +84,41 @@ extern "C" {
 #define	zrele(zp)	iput(ZTOI((zp)))
 
 /* Called on entry to each ZFS inode and vfs operation. */
-#define	ZFS_ENTER_ERROR(zfsvfs, error)				\
-do {								\
-	ZFS_TEARDOWN_ENTER_READ(zfsvfs, FTAG);			\
-	if (unlikely((zfsvfs)->z_unmounted)) {			\
-		ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG);		\
-		return (error);					\
-	}							\
-} while (0)
-#define	ZFS_ENTER(zfsvfs)	ZFS_ENTER_ERROR(zfsvfs, EIO)
-#define	ZPL_ENTER(zfsvfs)	ZFS_ENTER_ERROR(zfsvfs, -EIO)
+static inline int
+zfs_enter(zfsvfs_t *zfsvfs, const char *tag)
+{
+	ZFS_TEARDOWN_ENTER_READ(zfsvfs, tag);
+	if (unlikely(zfsvfs->z_unmounted)) {
+		ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag);
+		return (SET_ERROR(EIO));
+	}
+	return (0);
+}
 
 /* Must be called before exiting the operation. */
-#define	ZFS_EXIT(zfsvfs)					\
-do {								\
-	zfs_exit_fs(zfsvfs);					\
-	ZFS_TEARDOWN_EXIT_READ(zfsvfs, FTAG);			\
-} while (0)
+static inline void
+zfs_exit(zfsvfs_t *zfsvfs, const char *tag)
+{
+	zfs_exit_fs(zfsvfs);
+	ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag);
+}
 
-#define	ZPL_EXIT(zfsvfs)					\
-do {								\
-	rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG);		\
-} while (0)
+static inline int
+zpl_enter(zfsvfs_t *zfsvfs, const char *tag)
+{
+	return (-zfs_enter(zfsvfs, tag));
+}
 
-/* Verifies the znode is valid. */
-#define	ZFS_VERIFY_ZP_ERROR(zp, error)				\
-do {								\
-	if (unlikely((zp)->z_sa_hdl == NULL)) {			\
-		ZFS_EXIT(ZTOZSB(zp));				\
-		return (error);					\
-	}							\
-} while (0)
-#define	ZFS_VERIFY_ZP(zp)	ZFS_VERIFY_ZP_ERROR(zp, EIO)
-#define	ZPL_VERIFY_ZP(zp)	ZFS_VERIFY_ZP_ERROR(zp, -EIO)
+static inline void
+zpl_exit(zfsvfs_t *zfsvfs, const char *tag)
+{
+	ZFS_TEARDOWN_EXIT_READ(zfsvfs, tag);
+}
+
+/* zfs_verify_zp and zfs_enter_verify_zp are defined in zfs_znode.h */
+#define	zpl_verify_zp(zp)	(-zfs_verify_zp(zp))
+#define	zpl_enter_verify_zp(zfsvfs, zp, tag)	\
+	(-zfs_enter_verify_zp(zfsvfs, zp, tag))
 
 /*
  * Macros for dealing with dmu_buf_hold
diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
index b223c4b3b30..7c906050bc4 100644
--- a/include/sys/zfs_znode.h
+++ b/include/sys/zfs_znode.h
@@ -218,6 +218,29 @@ typedef struct znode {
 	ZNODE_OS_FIELDS;
 } znode_t;
 
+/* Verifies the znode is valid. */
+static inline int
+zfs_verify_zp(znode_t *zp)
+{
+	if (unlikely(zp->z_sa_hdl == NULL))
+		return (SET_ERROR(EIO));
+	return (0);
+}
+
+/* zfs_enter and zfs_verify_zp together */
+static inline int
+zfs_enter_verify_zp(zfsvfs_t *zfsvfs, znode_t *zp, const char *tag)
+{
+	int error;
+	if ((error = zfs_enter(zfsvfs, tag)) != 0)
+		return (error);
+	if ((error = zfs_verify_zp(zp)) != 0) {
+		zfs_exit(zfsvfs, tag);
+		return (error);
+	}
+	return (0);
+}
+
 typedef struct znode_hold {
 	uint64_t	zh_obj;		/* object id */
 	kmutex_t	zh_lock;	/* lock serializing object access */
diff --git a/module/os/freebsd/zfs/zfs_ctldir.c b/module/os/freebsd/zfs/zfs_ctldir.c
index 2c35b74cd3f..4b95b49dc40 100644
--- a/module/os/freebsd/zfs/zfs_ctldir.c
+++ b/module/os/freebsd/zfs/zfs_ctldir.c
@@ -1053,7 +1053,8 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap)
 		return (error);
 	}
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 	for (;;) {
 		uint64_t cookie;
 		uint64_t id;
@@ -1070,7 +1071,7 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap)
 					*eofp = 1;
 				error = 0;
 			}
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 
@@ -1083,7 +1084,7 @@ zfsctl_snapdir_readdir(struct vop_readdir_args *ap)
 		if (error != 0) {
 			if (error == ENAMETOOLONG)
 				error = 0;
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(error));
 		}
 		zfs_uio_setoffset(&uio, cookie + dots_offset);
@@ -1101,7 +1102,8 @@ zfsctl_snapdir_getattr(struct vop_getattr_args *ap)
 	uint64_t snap_count;
 	int err;
 
-	ZFS_ENTER(zfsvfs);
+	if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (err);
 	ds = dmu_objset_ds(zfsvfs->z_os);
 	zfsctl_common_getattr(vp, vap);
 	vap->va_ctime = dmu_objset_snap_cmtime(zfsvfs->z_os);
@@ -1111,14 +1113,14 @@ zfsctl_snapdir_getattr(struct vop_getattr_args *ap)
 		err = zap_count(dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
 		    dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
 		if (err != 0) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (err);
 		}
 		vap->va_nlink += snap_count;
 	}
 	vap->va_size = vap->va_nlink;
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
index 8b60b34d85c..b290c36748c 100644
--- a/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
@@ -286,7 +286,8 @@ zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
 	cmd = cmds >> SUBCMDSHIFT;
 	type = cmds & SUBCMDMASK;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 	if (id == -1) {
 		switch (type) {
 		case USRQUOTA:
@@ -385,7 +386,7 @@ zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
 		break;
 	}
 done:
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -426,7 +427,8 @@ zfs_sync(vfs_t *vfsp, int waitfor)
 		if (error != 0)
 			return (error);
 
-		ZFS_ENTER(zfsvfs);
+		if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+			return (error);
 		dp = dmu_objset_pool(zfsvfs->z_os);
 
 		/*
@@ -434,14 +436,14 @@ zfs_sync(vfs_t *vfsp, int waitfor)
 		 * filesystems which may exist on a suspended pool.
 		 */
 		if (rebooting && spa_suspended(dp->dp_spa)) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (0);
 		}
 
 		if (zfsvfs->z_log != NULL)
 			zil_commit(zfsvfs->z_log, 0);
 
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 	} else {
 		/*
 		 * Sync all ZFS filesystems.  This is what happens when you
@@ -1408,10 +1410,12 @@ zfs_statfs(vfs_t *vfsp, struct statfs *statp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
+	int error;
 
 	statp->f_version = STATFS_VERSION;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	dmu_objset_space(zfsvfs->z_os,
 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
@@ -1458,7 +1462,7 @@ zfs_statfs(vfs_t *vfsp, struct statfs *statp)
 
 	statp->f_namemax = MAXNAMELEN - 1;
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -1469,13 +1473,14 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
 	znode_t *rootzp;
 	int error;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
 	if (error == 0)
 		*vpp = ZTOV(rootzp);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	if (error == 0) {
 		error = vn_lock(*vpp, flags);
@@ -1712,7 +1717,8 @@ zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
 	    (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
 		return (EOPNOTSUPP);
 
-	ZFS_ENTER(zfsvfs);
+	if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (err);
 	err = zfs_zget(zfsvfs, ino, &zp);
 	if (err == 0 && zp->z_unlinked) {
 		vrele(ZTOV(zp));
@@ -1720,7 +1726,7 @@ zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
 	}
 	if (err == 0)
 		*vpp = ZTOV(zp);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	if (err == 0) {
 		err = vn_lock(*vpp, flags);
 		if (err != 0)
@@ -1774,7 +1780,8 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
 
 	*vpp = NULL;
 
-	ZFS_ENTER(zfsvfs);
+	if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (err);
 
 	/*
 	 * On FreeBSD we can get snapshot's mount point or its parent file
@@ -1790,12 +1797,13 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
 
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 
 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
 		if (err)
 			return (SET_ERROR(EINVAL));
-		ZFS_ENTER(zfsvfs);
+		if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
+			return (err);
 	}
 
 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
@@ -1807,7 +1815,7 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
 	} else {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -1825,7 +1833,7 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
 	if ((fid_gen == 0 &&
 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
 	    (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp));
 		if (object == ZFSCTL_INO_SNAPDIR) {
 			cn.cn_nameptr = "snapshot";
@@ -1860,7 +1868,7 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
 	    (u_longlong_t)fid_gen,
 	    (u_longlong_t)gen_mask);
 	if ((err = zfs_zget(zfsvfs, object, &zp))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (err);
 	}
 	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
@@ -1872,12 +1880,12 @@ zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
 		dprintf("znode gen (%llu) != fid gen (%llu)\n",
 		    (u_longlong_t)zp_gen, (u_longlong_t)fid_gen);
 		vrele(ZTOV(zp));
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	*vpp = ZTOV(zp);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	err = vn_lock(*vpp, flags);
 	if (err == 0)
 		vnode_create_vobject(*vpp, zp->z_size, curthread);
@@ -1945,7 +1953,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
 	/*
 	 * Attempt to re-establish all the active znodes with
 	 * their dbufs.  If a zfs_rezget() fails, then we'll let
-	 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
+	 * any potential callers discover that via zfs_enter_verify_zp
 	 * when they try to use their znode.
 	 */
 	mutex_enter(&zfsvfs->z_znodes_lock);
diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c
index f0579626c5a..57889b7390e 100644
--- a/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -135,13 +135,13 @@ typedef ulong_t cookie_t;
  * to freed memory.  The example below illustrates the following Big Rules:
  *
  *  (1)	A check must be made in each zfs thread for a mounted file system.
- *	This is done avoiding races using ZFS_ENTER(zfsvfs).
- *	A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
- *	must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
+ *	This is done avoiding races using zfs_enter(zfsvfs).
+ *	A zfs_exit(zfsvfs) is needed before all returns.  Any znodes
+ *	must be checked with zfs_verify_zp(zp).  Both of these macros
  *	can return EIO from the calling function.
  *
  *  (2)	VN_RELE() should always be the last thing except for zil_commit()
- *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
+ *	(if necessary) and zfs_exit(). This is for 3 reasons:
  *	First, if it's the last reference, the vnode/znode
  *	can be freed, so the zp may point to freed memory.  Second, the last
  *	reference will call zfs_zinactive(), which may induce a lot of work --
@@ -157,7 +157,7 @@ typedef ulong_t cookie_t;
  *      dmu_tx_assign().  This is critical because we don't want to block
  *      while holding locks.
  *
- *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
+ *	If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT.  This
  *	reduces lock contention and CPU usage when we must wait (note that if
  *	throughput is constrained by the storage, nearly every transaction
  *	must wait).
@@ -192,7 +192,7 @@ typedef ulong_t cookie_t;
  *
  * In general, this is how things should be ordered in each vnode op:
  *
- *	ZFS_ENTER(zfsvfs);		// exit if unmounted
+ *	zfs_enter(zfsvfs);		// exit if unmounted
  * top:
  *	zfs_dirent_lookup(&dl, ...)	// lock directory entry (may VN_HOLD())
  *	rw_enter(...);			// grab any other locks you need
@@ -210,7 +210,7 @@ typedef ulong_t cookie_t;
  *			goto top;
  *		}
  *		dmu_tx_abort(tx);	// abort DMU tx
- *		ZFS_EXIT(zfsvfs);	// finished in zfs
+ *		zfs_exit(zfsvfs);	// finished in zfs
  *		return (error);		// really out of space
  *	}
  *	error = do_real_work();		// do whatever this VOP does
@@ -221,7 +221,7 @@ typedef ulong_t cookie_t;
  *	zfs_dirent_unlock(dl);		// unlock directory entry
  *	VN_RELE(...);			// release held vnodes
  *	zil_commit(zilog, foid);	// synchronous when necessary
- *	ZFS_EXIT(zfsvfs);		// finished in zfs
+ *	zfs_exit(zfsvfs);		// finished in zfs
  *	return (error);			// done, report error
  */
 static int
@@ -230,13 +230,14 @@ zfs_open(vnode_t **vpp, int flag, cred_t *cr)
 	(void) cr;
 	znode_t	*zp = VTOZ(*vpp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
 	    ((flag & FAPPEND) == 0)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
@@ -244,7 +245,7 @@ zfs_open(vnode_t **vpp, int flag, cred_t *cr)
 	if (flag & O_SYNC)
 		atomic_inc_32(&zp->z_sync_cnt);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -254,15 +255,16 @@ zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
 	(void) offset, (void) cr;
 	znode_t	*zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	/* Decrement the synchronous opens in the znode */
 	if ((flag & O_SYNC) && (count == 1))
 		atomic_dec_32(&zp->z_sync_cnt);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -800,8 +802,8 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
 	DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
 	    const char *, nm);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zdp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
+		return (error);
 
 #if	__FreeBSD_version > 1300124
 	dvp_seqc = vn_seqc_read_notmodify(dvp);
@@ -814,7 +816,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
 		 * If the xattr property is off, refuse the lookup request.
 		 */
 		if (!(zfsvfs->z_flags & ZSB_XATTR)) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(EOPNOTSUPP));
 		}
 
@@ -823,12 +825,12 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
 		 * Maybe someday we will.
 		 */
 		if (zdp->z_pflags & ZFS_XATTR) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(EINVAL));
 		}
 
 		if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 		*vpp = ZTOV(zp);
@@ -841,7 +843,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
 			vrele(ZTOV(zp));
 		}
 
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -856,14 +858,14 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
 		} else
 #endif
 		if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
@@ -881,7 +883,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
 			vnode_t *zfsctl_vp;
 			int ltype;
 
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			ltype = VOP_ISLOCKED(dvp);
 			VOP_UNLOCK1(dvp);
 			error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
@@ -900,7 +902,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
 		}
 	}
 	if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
 			return (SET_ERROR(ENOTSUP));
 		error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
@@ -918,7 +920,7 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
 		if (error == 0)
 			*vpp = ZTOV(zp);
 
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		if (error != 0)
 			break;
 
@@ -936,7 +938,11 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
 		if ((cnp->cn_flags & ISDOTDOT) == 0)
 			break;
 
-		ZFS_ENTER(zfsvfs);
+		if ((error = zfs_enter(zfsvfs, FTAG)) != 0) {
+			vput(ZTOV(zp));
+			*vpp = NULL;
+			return (error);
+		}
 		if (zdp->z_sa_hdl == NULL) {
 			error = SET_ERROR(EIO);
 		} else {
@@ -944,12 +950,12 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
 			    &parent, sizeof (parent));
 		}
 		if (error != 0) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			vput(ZTOV(zp));
 			break;
 		}
 		if (zp->z_id == parent) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			break;
 		}
 		vput(ZTOV(zp));
@@ -1066,21 +1072,21 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	os = zfsvfs->z_os;
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (vap->va_mask & AT_XVATTR) {
 		if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_type)) != 0) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
@@ -1092,7 +1098,7 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
 
 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
 	if (error) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	ASSERT3P(zp, ==, NULL);
@@ -1150,7 +1156,7 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		getnewvnode_drop_reserve();
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
@@ -1175,7 +1181,7 @@ zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -1210,10 +1216,13 @@ zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 	int		error;
 
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	zp = VTOZ(vp);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_verify_zp(zp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 	zilog = zfsvfs->z_log;
 
 	xattr_obj = 0;
@@ -1271,7 +1280,7 @@ zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1303,7 +1312,7 @@ zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 		zil_commit(zilog, 0);
 
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -1408,32 +1417,32 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (dzp->z_pflags & ZFS_XATTR) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(dirname,
 	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (vap->va_mask & AT_XVATTR) {
 		if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_type)) != 0) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
 	    NULL, &acl_ids)) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1448,20 +1457,20 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
 
 	if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
 		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	ASSERT3P(zp, ==, NULL);
 
 	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
 		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
 		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EDQUOT));
 	}
 
@@ -1488,7 +1497,7 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		getnewvnode_drop_reserve();
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1520,7 +1529,7 @@ zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -1561,9 +1570,12 @@ zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 	dmu_tx_t	*tx;
 	int		error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
+	if ((error = zfs_verify_zp(zp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 	zilog = zfsvfs->z_log;
 
 
@@ -1588,7 +1600,7 @@ zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1607,7 +1619,7 @@ zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -1677,12 +1689,12 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 	cookie_t	*cooks = NULL;
 	int		flags = 0;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (parent))) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1697,7 +1709,7 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 	 * Check for valid iov_len.
 	 */
 	if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -1705,7 +1717,7 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 	 * Quit if directory has been removed (posix)
 	 */
 	if ((*eofp = zp->z_unlinked) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -1930,7 +1942,7 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 
 	zfs_uio_setoffset(uio, offset);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	if (error != 0 && cookies != NULL) {
 		free(*cookies, M_TEMP);
 		*cookies = NULL;
@@ -1968,8 +1980,8 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
 	sa_bulk_attr_t bulk[4];
 	int count = 0;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
 
@@ -1981,7 +1993,7 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
 		    &rdev, 8);
 
 	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1994,7 +2006,7 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
 	    (vap->va_uid != crgetuid(cr))) {
 		if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
 		    skipaclchk, cr))) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
@@ -2145,7 +2157,7 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
 		vap->va_blksize = zfsvfs->z_max_blksz;
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -2203,8 +2215,8 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 	if (mask & AT_NOSET)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (err);
 
 	os = zfsvfs->z_os;
 	zilog = zfsvfs->z_log;
@@ -2218,17 +2230,17 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 	    (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
 	    ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
 	    (mask & AT_XVATTR))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (mask & AT_SIZE && vp->v_type == VDIR) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EISDIR));
 	}
 
 	if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -2246,7 +2258,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
 	    ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
 	    ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
@@ -2263,27 +2275,27 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 	if (mask & (AT_ATIME | AT_MTIME)) {
 		if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
 		    ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(EOVERFLOW));
 		}
 	}
 	if (xoap != NULL && (mask & AT_XVATTR)) {
 		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
 		    TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(EOVERFLOW));
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
 			if (!dmu_objset_projectquota_enabled(os) ||
 			    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
-				ZFS_EXIT(zfsvfs);
+				zfs_exit(zfsvfs, FTAG);
 				return (SET_ERROR(EOPNOTSUPP));
 			}
 
 			projid = xoap->xoa_projid;
 			if (unlikely(projid == ZFS_INVALID_PROJID)) {
-				ZFS_EXIT(zfsvfs);
+				zfs_exit(zfsvfs, FTAG);
 				return (SET_ERROR(EINVAL));
 			}
 
@@ -2298,7 +2310,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
 		    (!dmu_objset_projectquota_enabled(os) ||
 		    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(EOPNOTSUPP));
 		}
 	}
@@ -2307,7 +2319,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 	aclp = NULL;
 
 	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EROFS));
 	}
 
@@ -2325,7 +2337,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 		/* XXX - would it be OK to generate a log record here? */
 		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
 		if (err) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (err);
 		}
 	}
@@ -2473,7 +2485,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(EPERM));
 		}
 
@@ -2489,7 +2501,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 			err = secpolicy_setid_setsticky_clear(vp, vap,
 			    &oldva, cr);
 			if (err) {
-				ZFS_EXIT(zfsvfs);
+				zfs_exit(zfsvfs, FTAG);
 				return (err);
 			}
 			trim_mask |= AT_MODE;
@@ -2521,7 +2533,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 		err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
 		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
 		if (err) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (err);
 		}
 
@@ -2879,7 +2891,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 	if (os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (err);
 }
 
@@ -2904,14 +2916,17 @@ zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
 	 * The current code can invalidate the znode without acquiring the
 	 * corresponding vnode lock if the object represented by the znode
 	 * and vnode is no longer valid after a rollback or receive operation.
-	 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock
+	 * z_teardown_lock hidden behind zfs_enter and zfs_exit is the lock
 	 * that protects the znodes from the invalidation.
 	 */
 	zfsvfs = sdzp->z_zfsvfs;
 	ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(sdzp);
-	ZFS_VERIFY_ZP(tdzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
+		return (error);
+	if ((error = zfs_verify_zp(tdzp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	/*
 	 * Re-resolve svp to be certain it still exists and fetch the
@@ -2939,7 +2954,7 @@ zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
 	}
 	*tzpp = tzp;
 out:
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3209,9 +3224,12 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
 	sdzp = VTOZ(sdvp);
 	zfsvfs = tdzp->z_zfsvfs;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(tdzp);
-	ZFS_VERIFY_ZP(sdzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
+		return (error);
+	if ((error = zfs_verify_zp(sdzp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(tnm,
@@ -3234,10 +3252,17 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
 	}
 
 	szp = VTOZ(*svpp);
-	ZFS_VERIFY_ZP(szp);
+	if ((error = zfs_verify_zp(szp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 	tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
-	if (tzp != NULL)
-		ZFS_VERIFY_ZP(tzp);
+	if (tzp != NULL) {
+		if ((error = zfs_verify_zp(tzp)) != 0) {
+			zfs_exit(zfsvfs, FTAG);
+			return (error);
+		}
+	}
 
 	/*
 	 * This is to prevent the creation of links into attribute space
@@ -3412,7 +3437,7 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
 out:
 	if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -3487,24 +3512,24 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
 
 	ASSERT3S(vap->va_type, ==, VLNK);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (len > MAXPATHLEN) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENAMETOOLONG));
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0,
 	    vap, cr, NULL, &acl_ids)) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3514,20 +3539,20 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
 		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
 	    0 /* projid */)) {
 		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EDQUOT));
 	}
 
@@ -3550,7 +3575,7 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		getnewvnode_drop_reserve();
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3589,7 +3614,7 @@ zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3617,8 +3642,8 @@ zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	int		error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if (zp->z_is_sa)
 		error = sa_lookup_uio(zp->z_sa_hdl,
@@ -3628,7 +3653,7 @@ zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
 
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3661,8 +3686,8 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
 
 	ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(tdzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	/*
@@ -3670,11 +3695,14 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
 	 * Better choices include ENOTSUP or EISDIR.
 	 */
 	if (ZTOV(szp)->v_type == VDIR) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
-	ZFS_VERIFY_ZP(szp);
+	if ((error = zfs_verify_zp(szp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	/*
 	 * If we are using project inheritance, means if the directory has
@@ -3685,13 +3713,13 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
 	 */
 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
 	    tdzp->z_projid != szp->z_projid) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EXDEV));
 	}
 
 	if (szp->z_pflags & (ZFS_APPENDONLY |
 	    ZFS_IMMUTABLE | ZFS_READONLY)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
@@ -3699,17 +3727,17 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
 
 	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (uint64_t))) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	if (parent == zfsvfs->z_shares_dir) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(name,
 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
@@ -3720,19 +3748,19 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
 	 * imposed in attribute space.
 	 */
 	if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 
 	owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
 	if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3741,7 +3769,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
 	 */
 	error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
 	if (error) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3753,7 +3781,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3773,7 +3801,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3804,11 +3832,11 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 	uint64_t	off, len;
 	int		error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if (cmd != F_FREESP) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -3817,12 +3845,12 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EROFS));
 	}
 
 	if (bfp->l_len < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -3833,7 +3861,7 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 	 * operates directly on inodes, so we need to check access rights.
 	 */
 	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3842,7 +3870,7 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 
 	error = zfs_freesp(zp, off, len, flag, TRUE);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3910,12 +3938,12 @@ zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 	zfid_short_t	*zfid;
 	int		size, i, error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
 	    &gen64, sizeof (uint64_t))) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3951,7 +3979,7 @@ zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 			zlfid->zf_setgen[i] = 0;
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -3961,6 +3989,7 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 {
 	znode_t *zp;
 	zfsvfs_t *zfsvfs;
+	int error;
 
 	switch (cmd) {
 	case _PC_LINK_MAX:
@@ -3977,10 +4006,10 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 #if 0		/* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
 		zp = VTOZ(vp);
 		zfsvfs = zp->z_zfsvfs;
-		ZFS_ENTER(zfsvfs);
-		ZFS_VERIFY_ZP(zp);
+		if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+			return (error);
 		*valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 #else
 		*valp = 0;
 #endif
@@ -3989,10 +4018,10 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 	case _PC_ACL_NFS4:
 		zp = VTOZ(vp);
 		zfsvfs = zp->z_zfsvfs;
-		ZFS_ENTER(zfsvfs);
-		ZFS_VERIFY_ZP(zp);
+		if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+			return (error);
 		*valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 
 	case _PC_ACL_PATH_MAX:
@@ -4017,8 +4046,8 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
 	int pgsin_b, pgsin_a;
 	int error;
 
-	ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error);
-	ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error);
+	if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
+		return (zfs_vm_pagerret_error);
 
 	start = IDX_TO_OFF(ma[0]->pindex);
 	end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
@@ -4055,7 +4084,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
 	if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
 		if (lr != NULL)
 			zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (zfs_vm_pagerret_bad);
 	}
 
@@ -4088,7 +4117,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
 
 	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	if (error != 0)
 		return (zfs_vm_pagerret_error);
@@ -4151,8 +4180,8 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
 	for (i = 0; i < pcount; i++)
 		rtvals[i] = zfs_vm_pagerret_error;
 
-	ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error);
-	ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error);
+	if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
+		return (zfs_vm_pagerret_error);
 
 	off = IDX_TO_OFF(ma[0]->pindex);
 	blksz = zp->z_blksz;
@@ -4267,7 +4296,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
 
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (rtvals[0]);
 }
 
@@ -5425,9 +5454,9 @@ zfs_getextattr(struct vop_getextattr_args *ap)
 	if (error != 0)
 		return (error);
 
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 	error = ENOENT;
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
 	rw_enter(&zp->z_xattr_lock, RW_READER);
 
 	error = zfs_getextattr_impl(ap, zfs_xattr_compat);
@@ -5441,7 +5470,7 @@ zfs_getextattr(struct vop_getextattr_args *ap)
 	}
 
 	rw_exit(&zp->z_xattr_lock);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	if (error == ENOENT)
 		error = SET_ERROR(ENOATTR);
 	return (error);
@@ -5568,8 +5597,8 @@ zfs_deleteextattr(struct vop_deleteextattr_args *ap)
 	if (error != 0)
 		return (error);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
 
 	error = zfs_deleteextattr_impl(ap, zfs_xattr_compat);
@@ -5583,7 +5612,7 @@ zfs_deleteextattr(struct vop_deleteextattr_args *ap)
 	}
 
 	rw_exit(&zp->z_xattr_lock);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	if (error == ENOENT)
 		error = SET_ERROR(ENOATTR);
 	return (error);
@@ -5756,14 +5785,14 @@ zfs_setextattr(struct vop_setextattr_args *ap)
 	if (error != 0)
 		return (error);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
 
 	error = zfs_setextattr_impl(ap, zfs_xattr_compat);
 
 	rw_exit(&zp->z_xattr_lock);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -5960,8 +5989,8 @@ zfs_listextattr(struct vop_listextattr_args *ap)
 	if (error != 0)
 		return (SET_ERROR(error));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 	rw_enter(&zp->z_xattr_lock, RW_READER);
 
 	error = zfs_listextattr_impl(ap, zfs_xattr_compat);
@@ -5971,7 +6000,7 @@ zfs_listextattr(struct vop_listextattr_args *ap)
 	}
 
 	rw_exit(&zp->z_xattr_lock);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -6087,8 +6116,8 @@ zfs_vptocnp(struct vop_vptocnp_args *ap)
 	int ltype;
 	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	/*
 	 * If we are a snapshot mounted under .zfs, run the operation
@@ -6110,10 +6139,10 @@ zfs_vptocnp(struct vop_vptocnp_args *ap)
 			memcpy(ap->a_buf + *ap->a_buflen, name, len);
 			*ap->a_vpp = ZTOV(dzp);
 		}
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	covered_vp = vp->v_mount->mnt_vnodecovered;
 #if __FreeBSD_version >= 1300045
@@ -6154,15 +6183,15 @@ zfs_deallocate(struct vop_deallocate_args *ap)
 	off_t off, len, file_sz;
 	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	/*
 	 * Callers might not be able to detect properly that we are read-only,
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EROFS));
 	}
 
@@ -6175,7 +6204,7 @@ zfs_deallocate(struct vop_deallocate_args *ap)
 	/* Fast path for out-of-range request. */
 	if (len <= 0) {
 		*ap->a_len = 0;
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -6188,7 +6217,7 @@ zfs_deallocate(struct vop_deallocate_args *ap)
 		*ap->a_len = 0;
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 #endif
diff --git a/module/os/linux/zfs/zfs_acl.c b/module/os/linux/zfs/zfs_acl.c
index a139ee12c4d..4fd071d3cb2 100644
--- a/module/os/linux/zfs/zfs_acl.c
+++ b/module/os/linux/zfs/zfs_acl.c
@@ -2596,9 +2596,10 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
 
 slow:
 	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
-	ZFS_ENTER(ZTOZSB(zdp));
+	if ((error = zfs_enter(ZTOZSB(zdp), FTAG)) != 0)
+		return (error);
 	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
-	ZFS_EXIT(ZTOZSB(zdp));
+	zfs_exit(ZTOZSB(zdp), FTAG);
 	return (error);
 }
 
diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c
index 32342d25ce6..4ae0a65370e 100644
--- a/module/os/linux/zfs/zfs_ctldir.c
+++ b/module/os/linux/zfs/zfs_ctldir.c
@@ -673,17 +673,19 @@ zfsctl_fid(struct inode *ip, fid_t *fidp)
 	uint64_t	object = zp->z_id;
 	zfid_short_t	*zfid;
 	int		i;
+	int		error;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (zfsctl_is_snapdir(ip)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (zfsctl_snapdir_fid(ip, fidp));
 	}
 
 	if (fidp->fid_len < SHORT_FID_LEN) {
 		fidp->fid_len = SHORT_FID_LEN;
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOSPC));
 	}
 
@@ -698,7 +700,7 @@ zfsctl_fid(struct inode *ip, fid_t *fidp)
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = 0;
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -776,7 +778,8 @@ zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp,
 	zfsvfs_t *zfsvfs = ITOZSB(dip);
 	int error = 0;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (strcmp(name, "..") == 0) {
 		*ipp = dip->i_sb->s_root->d_inode;
@@ -793,7 +796,7 @@ zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp,
 	if (*ipp == NULL)
 		error = SET_ERROR(ENOENT);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -810,11 +813,12 @@ zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp,
 	uint64_t id;
 	int error;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id);
 	if (error) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -823,7 +827,7 @@ zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp,
 	if (*ipp == NULL)
 		error = SET_ERROR(ENOENT);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -844,7 +848,8 @@ zfsctl_snapdir_rename(struct inode *sdip, const char *snm,
 	if (!zfs_admin_snapshot)
 		return (SET_ERROR(EACCES));
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
@@ -902,7 +907,7 @@ zfsctl_snapdir_rename(struct inode *sdip, const char *snm,
 	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -922,7 +927,8 @@ zfsctl_snapdir_remove(struct inode *dip, const char *name, cred_t *cr,
 	if (!zfs_admin_snapshot)
 		return (SET_ERROR(EACCES));
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
@@ -951,7 +957,7 @@ zfsctl_snapdir_remove(struct inode *dip, const char *name, cred_t *cr,
 	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -1076,7 +1082,8 @@ zfsctl_snapshot_mount(struct path *path, int flags)
 		return (SET_ERROR(EISDIR));
 
 	zfsvfs = ITOZSB(ip);
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
@@ -1164,7 +1171,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
 	kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(full_path, MAXPATHLEN);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -1228,10 +1235,11 @@ zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
 	znode_t *dzp;
 	int error;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (zfsvfs->z_shares_dir == 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOTSUP));
 	}
 
@@ -1240,7 +1248,7 @@ zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
 		zrele(dzp);
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
index d0575fe5e98..251d9e9a40f 100644
--- a/module/os/linux/zfs/zfs_vfsops.c
+++ b/module/os/linux/zfs/zfs_vfsops.c
@@ -273,8 +273,10 @@ zfs_sync(struct super_block *sb, int wait, cred_t *cr)
 		 * Sync a specific filesystem.
 		 */
 		dsl_pool_t *dp;
+		int error;
 
-		ZFS_ENTER(zfsvfs);
+		if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+			return (error);
 		dp = dmu_objset_pool(zfsvfs->z_os);
 
 		/*
@@ -282,14 +284,14 @@ zfs_sync(struct super_block *sb, int wait, cred_t *cr)
 		 * filesystems which may exist on a suspended pool.
 		 */
 		if (spa_suspended(dp->dp_spa)) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (0);
 		}
 
 		if (zfsvfs->z_log != NULL)
 			zil_commit(zfsvfs->z_log, 0);
 
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 	} else {
 		/*
 		 * Sync all ZFS filesystems.  This is what happens when you
@@ -1092,7 +1094,8 @@ zfs_statvfs(struct inode *ip, struct kstatfs *statp)
 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
 	int err = 0;
 
-	ZFS_ENTER(zfsvfs);
+	if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (err);
 
 	dmu_objset_space(zfsvfs->z_os,
 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
@@ -1153,7 +1156,7 @@ zfs_statvfs(struct inode *ip, struct kstatfs *statp)
 			err = zfs_statfs_project(zfsvfs, zp, statp, bshift);
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (err);
 }
 
@@ -1163,13 +1166,14 @@ zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp)
 	znode_t *rootzp;
 	int error;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
 	if (error == 0)
 		*ipp = ZTOI(rootzp);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -1247,7 +1251,8 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
 		.gfp_mask = GFP_KERNEL,
 	};
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 #if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \
 	defined(SHRINK_CONTROL_HAS_NID) && \
@@ -1288,7 +1293,7 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
 		*objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
 #endif
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
 	    "pruning, nr_to_scan=%lu objects=%d error=%d\n",
@@ -1745,7 +1750,8 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 		return (zfsctl_snapdir_vget(sb, objsetid, fid_gen, ipp));
 	}
 
-	ZFS_ENTER(zfsvfs);
+	if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (err);
 	/* A zero fid_gen means we are in the .zfs control directories */
 	if (fid_gen == 0 &&
 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
@@ -1761,7 +1767,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 			 */
 			VERIFY3P(igrab(*ipp), !=, NULL);
 		}
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -1769,14 +1775,14 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 
 	dprintf("getting %llu [%llu mask %llx]\n", object, fid_gen, gen_mask);
 	if ((err = zfs_zget(zfsvfs, object, &zp))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (err);
 	}
 
 	/* Don't export xattr stuff */
 	if (zp->z_pflags & ZFS_XATTR) {
 		zrele(zp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOENT));
 	}
 
@@ -1791,7 +1797,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 		dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,
 		    fid_gen);
 		zrele(zp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOENT));
 	}
 
@@ -1799,7 +1805,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 	if (*ipp)
 		zfs_znode_update_vfs(ITOZ(*ipp));
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
index 0b3f7f2501e..1ff88c121a7 100644
--- a/module/os/linux/zfs/zfs_vnops_os.c
+++ b/module/os/linux/zfs/zfs_vnops_os.c
@@ -82,13 +82,13 @@
  * to freed memory.  The example below illustrates the following Big Rules:
  *
  *  (1) A check must be made in each zfs thread for a mounted file system.
- *	This is done avoiding races using ZFS_ENTER(zfsvfs).
- *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
- *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
+ *	This is done avoiding races using zfs_enter(zfsvfs).
+ *      A zfs_exit(zfsvfs) is needed before all returns.  Any znodes
+ *      must be checked with zfs_verify_zp(zp).  Both of these macros
  *      can return EIO from the calling function.
  *
  *  (2) zrele() should always be the last thing except for zil_commit() (if
- *	necessary) and ZFS_EXIT(). This is for 3 reasons: First, if it's the
+ *	necessary) and zfs_exit(). This is for 3 reasons: First, if it's the
  *	last reference, the vnode/znode can be freed, so the zp may point to
  *	freed memory.  Second, the last reference will call zfs_zinactive(),
  *	which may induce a lot of work -- pushing cached pages (which acquires
@@ -107,7 +107,7 @@
  *      dmu_tx_assign().  This is critical because we don't want to block
  *      while holding locks.
  *
- *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
+ *	If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT.  This
  *	reduces lock contention and CPU usage when we must wait (note that if
  *	throughput is constrained by the storage, nearly every transaction
  *	must wait).
@@ -142,7 +142,7 @@
  *
  * In general, this is how things should be ordered in each vnode op:
  *
- *	ZFS_ENTER(zfsvfs);		// exit if unmounted
+ *	zfs_enter(zfsvfs);		// exit if unmounted
  * top:
  *	zfs_dirent_lock(&dl, ...)	// lock directory entry (may igrab())
  *	rw_enter(...);			// grab any other locks you need
@@ -160,7 +160,7 @@
  *			goto top;
  *		}
  *		dmu_tx_abort(tx);	// abort DMU tx
- *		ZFS_EXIT(zfsvfs);	// finished in zfs
+ *		zfs_exit(zfsvfs);	// finished in zfs
  *		return (error);		// really out of space
  *	}
  *	error = do_real_work();		// do whatever this VOP does
@@ -171,7 +171,7 @@
  *	zfs_dirent_unlock(dl);		// unlock directory entry
  *	zrele(...);			// release held znodes
  *	zil_commit(zilog, foid);	// synchronous when necessary
- *	ZFS_EXIT(zfsvfs);		// finished in zfs
+ *	zfs_exit(zfsvfs);		// finished in zfs
  *	return (error);			// done, report error
  */
 int
@@ -180,14 +180,15 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
 	(void) cr;
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	/* Honor ZFS_APPENDONLY file attribute */
 	if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
 	    ((flag & O_APPEND) == 0)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
@@ -195,7 +196,7 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
 	if (flag & O_SYNC)
 		atomic_inc_32(&zp->z_sync_cnt);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -205,15 +206,16 @@ zfs_close(struct inode *ip, int flag, cred_t *cr)
 	(void) cr;
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	/* Decrement the synchronous opens in the znode */
 	if (flag & O_SYNC)
 		atomic_dec_32(&zp->z_sync_cnt);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -449,8 +451,8 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
 		}
 	}
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zdp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
+		return (error);
 
 	*zpp = NULL;
 
@@ -460,12 +462,12 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
 		 * Maybe someday we will.
 		 */
 		if (zdp->z_pflags & ZFS_XATTR) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(EINVAL));
 		}
 
 		if ((error = zfs_get_xattrdir(zdp, zpp, cr, flags))) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 
@@ -479,12 +481,12 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
 			*zpp = NULL;
 		}
 
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (!S_ISDIR(ZTOI(zdp)->i_mode)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOTDIR));
 	}
 
@@ -493,13 +495,13 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
 	 */
 
 	if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
@@ -507,7 +509,7 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
 	if ((error == 0) && (*zpp))
 		zfs_znode_update_vfs(*zpp);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -566,21 +568,21 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	os = zfsvfs->z_os;
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
@@ -609,7 +611,7 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
 				zfs_acl_ids_free(&acl_ids);
 			if (strcmp(name, "..") == 0)
 				error = SET_ERROR(EISDIR);
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
@@ -681,7 +683,7 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
 			}
 			zfs_acl_ids_free(&acl_ids);
 			dmu_tx_abort(tx);
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
@@ -774,7 +776,7 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -808,14 +810,14 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
 	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	os = zfsvfs->z_os;
 
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
@@ -870,7 +872,7 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
@@ -894,7 +896,7 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
 		*ipp = ZTOI(zp);
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -941,8 +943,8 @@ zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (flags & FIGNORECASE) {
@@ -961,7 +963,7 @@ zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
 	    NULL, realnmp))) {
 		if (realnmp)
 			pn_free(realnmp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1042,7 +1044,7 @@ zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
 		zrele(zp);
 		if (xzp)
 			zrele(xzp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1131,7 +1133,7 @@ zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -1188,18 +1190,18 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
 	if (dirname == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (dzp->z_pflags & ZFS_XATTR) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(dirname,
 	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
@@ -1208,14 +1210,14 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
 	    vsecp, &acl_ids)) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	/*
@@ -1231,21 +1233,21 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
 	if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
 	    NULL, NULL))) {
 		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EDQUOT));
 	}
 
@@ -1277,7 +1279,7 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1323,7 +1325,7 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
 		zfs_znode_update_vfs(dzp);
 		zfs_znode_update_vfs(zp);
 	}
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -1359,8 +1361,8 @@ zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (flags & FIGNORECASE)
@@ -1373,7 +1375,7 @@ zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
 	 */
 	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
 	    NULL, NULL))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1424,7 +1426,7 @@ zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
 		}
 		dmu_tx_abort(tx);
 		zrele(zp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -1452,7 +1454,7 @@ zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -1491,8 +1493,8 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
 	uint64_t	parent;
 	uint64_t	offset; /* must be unsigned; checks for < 1 */
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (parent))) != 0)
@@ -1611,7 +1613,7 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
 	if (error == ENOENT)
 		error = 0;
 out:
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -1636,9 +1638,10 @@ zfs_getattr_fast(struct user_namespace *user_ns, struct inode *ip,
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	uint32_t blksize;
 	u_longlong_t nblocks;
+	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	mutex_enter(&zp->z_lock);
 
@@ -1673,7 +1676,7 @@ zfs_getattr_fast(struct user_namespace *user_ns, struct inode *ip,
 			    dmu_objset_id(zfsvfs->z_os);
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (0);
 }
@@ -1849,8 +1852,8 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 	if (mask == 0)
 		return (0);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (err);
 	ip = ZTOI(zp);
 
 	/*
@@ -1862,13 +1865,13 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
 			if (!dmu_objset_projectquota_enabled(os) ||
 			    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode))) {
-				ZFS_EXIT(zfsvfs);
+				zfs_exit(zfsvfs, FTAG);
 				return (SET_ERROR(ENOTSUP));
 			}
 
 			projid = xoap->xoa_projid;
 			if (unlikely(projid == ZFS_INVALID_PROJID)) {
-				ZFS_EXIT(zfsvfs);
+				zfs_exit(zfsvfs, FTAG);
 				return (SET_ERROR(EINVAL));
 			}
 
@@ -1883,7 +1886,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
 		    (!dmu_objset_projectquota_enabled(os) ||
 		    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode)))) {
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(ENOTSUP));
 		}
 	}
@@ -1899,17 +1902,17 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 	    (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
 	    ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
 	    (mask & ATTR_XVATTR))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EISDIR));
 	}
 
 	if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -2526,7 +2529,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 	kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks);
 	kmem_free(bulk, sizeof (sa_bulk_attr_t) * bulks);
 	kmem_free(tmpxvattr, sizeof (xvattr_t));
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (err);
 }
 
@@ -2661,11 +2664,14 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
 	if (snm == NULL || tnm == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(sdzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
-	ZFS_VERIFY_ZP(tdzp);
+	if ((error = zfs_verify_zp(tdzp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	/*
 	 * We check i_sb because snapshots and the ctldir must have different
@@ -2673,13 +2679,13 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
 	 */
 	if (ZTOI(tdzp)->i_sb != ZTOI(sdzp)->i_sb ||
 	    zfsctl_is_node(ZTOI(tdzp))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EXDEV));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(tnm,
 	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
@@ -2697,7 +2703,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
 	 * See the comment in zfs_link() for why this is considered bad.
 	 */
 	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -2727,7 +2733,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
 			 * the rename() function shall return successfully
 			 * and perform no other action."
 			 */
-			ZFS_EXIT(zfsvfs);
+			zfs_exit(zfsvfs, FTAG);
 			return (0);
 		}
 		/*
@@ -2799,7 +2805,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
 
 		if (strcmp(snm, "..") == 0)
 			serr = EINVAL;
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (serr);
 	}
 	if (terr) {
@@ -2811,7 +2817,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
 
 		if (strcmp(tnm, "..") == 0)
 			terr = EINVAL;
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (terr);
 	}
 
@@ -2915,7 +2921,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
 		zrele(szp);
 		if (tzp)
 			zrele(tzp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -2989,7 +2995,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3032,26 +3038,26 @@ zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
 		zflg |= ZCILOOK;
 
 	if (len > MAXPATHLEN) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENAMETOOLONG));
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0,
 	    vap, cr, NULL, &acl_ids)) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 top:
@@ -3063,21 +3069,21 @@ zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
 	error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EDQUOT));
 	}
 	tx = dmu_tx_create(zfsvfs->z_os);
@@ -3104,7 +3110,7 @@ zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3159,7 +3165,7 @@ zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
 		zrele(zp);
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3185,8 +3191,8 @@ zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr)
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	int		error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	mutex_enter(&zp->z_lock);
 	if (zp->z_is_sa)
@@ -3196,7 +3202,7 @@ zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr)
 		error = zfs_sa_readlink(zp, uio);
 	mutex_exit(&zp->z_lock);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3241,8 +3247,8 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(tdzp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
+		return (error);
 	zilog = zfsvfs->z_log;
 
 	/*
@@ -3250,11 +3256,14 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	 * Better choices include ENOTSUP or EISDIR.
 	 */
 	if (S_ISDIR(sip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
-	ZFS_VERIFY_ZP(szp);
+	if ((error = zfs_verify_zp(szp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	/*
 	 * If we are using project inheritance, means if the directory has
@@ -3265,7 +3274,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	 */
 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
 	    tdzp->z_projid != szp->z_projid) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EXDEV));
 	}
 
@@ -3274,7 +3283,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	 * super blocks.
 	 */
 	if (sip->i_sb != ZTOI(tdzp)->i_sb || zfsctl_is_node(sip)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EXDEV));
 	}
 
@@ -3282,17 +3291,17 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 
 	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (uint64_t))) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	if (parent == zfsvfs->z_shares_dir) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(name,
 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
@@ -3305,19 +3314,19 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	 * imposed in attribute space.
 	 */
 	if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(sip->i_uid),
 	    cr, ZFS_OWNER);
 	if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3327,7 +3336,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 	 */
 	error = zfs_dirent_lock(&dl, tdzp, name, &tzp, zf, NULL, NULL);
 	if (error) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3349,7 +3358,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 			goto top;
 		}
 		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	/* unmark z_unlinked so zfs_link_create will not reject */
@@ -3391,7 +3400,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
 
 	zfs_znode_update_vfs(tdzp);
 	zfs_znode_update_vfs(szp);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3448,8 +3457,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 	int		cnt = 0;
 	struct address_space *mapping;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (err);
 
 	ASSERT(PageLocked(pp));
 
@@ -3461,7 +3470,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 	/* Page is beyond end of file */
 	if (pgoff >= offset) {
 		unlock_page(pp);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -3521,7 +3530,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 	if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) {
 		unlock_page(pp);
 		zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -3549,7 +3558,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 #endif
 		}
 
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -3557,7 +3566,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 	if (!clear_page_dirty_for_io(pp)) {
 		unlock_page(pp);
 		zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -3592,7 +3601,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 		if (!for_sync)
 			atomic_dec_32(&zp->z_async_writes_cnt);
 		zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (err);
 	}
 
@@ -3643,7 +3652,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
 
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (err);
 }
 
@@ -3665,8 +3674,8 @@ zfs_dirty_inode(struct inode *ip, int flags)
 	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
 		return (0);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 #ifdef I_DIRTY_TIME
 	/*
@@ -3714,7 +3723,7 @@ zfs_dirty_inode(struct inode *ip, int flags)
 
 	dmu_tx_commit(tx);
 out:
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3831,14 +3840,14 @@ zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
 	if (pl == NULL)
 		return (0);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (err);
 
 	err = zfs_fillpage(ip, pl, nr_pages);
 
 	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nr_pages*PAGESIZE);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (err);
 }
 
@@ -3861,28 +3870,29 @@ zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
 	(void) addrp;
 	znode_t  *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if ((vm_flags & VM_WRITE) && (zp->z_pflags &
 	    (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	if ((vm_flags & (VM_READ | VM_EXEC)) &&
 	    (zp->z_pflags & ZFS_AV_QUARANTINED)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EACCES));
 	}
 
 	if (off < 0 || len > MAXOFFSET_T - off) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENXIO));
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -3913,11 +3923,11 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 	uint64_t	off, len;
 	int		error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if (cmd != F_FREESP) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -3926,12 +3936,12 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EROFS));
 	}
 
 	if (bfp->l_len < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -3942,7 +3952,7 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 	 * operates directly on inodes, so we need to check access rights.
 	 */
 	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3951,7 +3961,7 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
 
 	error = zfs_freesp(zp, off, len, flag, TRUE);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -3966,19 +3976,23 @@ zfs_fid(struct inode *ip, fid_t *fidp)
 	zfid_short_t	*zfid;
 	int		size, i, error;
 
-	ZFS_ENTER(zfsvfs);
+	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (fidp->fid_len < SHORT_FID_LEN) {
 		fidp->fid_len = SHORT_FID_LEN;
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOSPC));
 	}
 
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_verify_zp(zp)) != 0) {
+		zfs_exit(zfsvfs, FTAG);
+		return (error);
+	}
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
 	    &gen64, sizeof (uint64_t))) != 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -3999,7 +4013,7 @@ zfs_fid(struct inode *ip, fid_t *fidp)
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
diff --git a/module/os/linux/zfs/zpl_ctldir.c b/module/os/linux/zfs/zpl_ctldir.c
index ec8f2938598..1a688687ac4 100644
--- a/module/os/linux/zfs/zpl_ctldir.c
+++ b/module/os/linux/zfs/zpl_ctldir.c
@@ -57,7 +57,8 @@ zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx)
 	zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
 	int error = 0;
 
-	ZPL_ENTER(zfsvfs);
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (!zpl_dir_emit_dots(filp, ctx))
 		goto out;
@@ -78,7 +79,7 @@ zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx)
 		ctx->pos++;
 	}
 out:
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -258,7 +259,8 @@ zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx)
 	uint64_t id, pos;
 	int error = 0;
 
-	ZPL_ENTER(zfsvfs);
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 	cookie = spl_fstrans_mark();
 
 	if (!zpl_dir_emit_dots(filp, ctx))
@@ -282,7 +284,7 @@ zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx)
 	}
 out:
 	spl_fstrans_unmark(cookie);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 
 	if (error == -ENOENT)
 		return (0);
@@ -401,8 +403,10 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
 	(void) request_mask, (void) query_flags;
 	struct inode *ip = path->dentry->d_inode;
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	int error;
 
-	ZPL_ENTER(zfsvfs);
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 #ifdef HAVE_USERNS_IOPS_GETATTR
 #ifdef HAVE_GENERIC_FILLATTR_USERNS
 	generic_fillattr(user_ns, ip, stat);
@@ -422,7 +426,7 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
 		    dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
 		    dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
 		if (err != 0) {
-			ZPL_EXIT(zfsvfs);
+			zpl_exit(zfsvfs, FTAG);
 			return (-err);
 		}
 		stat->nlink += snap_count;
@@ -430,7 +434,7 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
 
 	stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
 	stat->atime = current_time(ip);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 
 	return (0);
 }
@@ -508,7 +512,8 @@ zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx)
 	znode_t *dzp;
 	int error = 0;
 
-	ZPL_ENTER(zfsvfs);
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 	cookie = spl_fstrans_mark();
 
 	if (zfsvfs->z_shares_dir == 0) {
@@ -527,7 +532,7 @@ zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx)
 	iput(ZTOI(dzp));
 out:
 	spl_fstrans_unmark(cookie);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
@@ -564,7 +569,8 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
 	znode_t *dzp;
 	int error;
 
-	ZPL_ENTER(zfsvfs);
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	if (zfsvfs->z_shares_dir == 0) {
 #ifdef HAVE_USERNS_IOPS_GETATTR
@@ -578,7 +584,7 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
 #endif
 		stat->nlink = stat->size = 2;
 		stat->atime = current_time(ip);
-		ZPL_EXIT(zfsvfs);
+		zpl_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -596,7 +602,7 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
 		iput(ZTOI(dzp));
 	}
 
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index b0d9f37a3ec..f6bdfd08b83 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -195,9 +195,12 @@ zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 	 * zfs_putpage() respectively.
 	 */
 	if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
-		ZPL_ENTER(zfsvfs);
+		if ((error = zpl_enter(zfsvfs, FTAG)) != 0) {
+			atomic_dec_32(&zp->z_sync_writes_cnt);
+			return (error);
+		}
 		zil_commit(zfsvfs->z_log, zp->z_id);
-		ZPL_EXIT(zfsvfs);
+		zpl_exit(zfsvfs, FTAG);
 	}
 
 	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -752,10 +755,11 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	enum writeback_sync_modes sync_mode;
 	int result;
 
-	ZPL_ENTER(zfsvfs);
+	if ((result = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (result);
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		wbc->sync_mode = WB_SYNC_ALL;
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 	sync_mode = wbc->sync_mode;
 
 	/*
@@ -769,11 +773,11 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	wbc->sync_mode = WB_SYNC_NONE;
 	result = write_cache_pages(mapping, wbc, zpl_putpage, &for_sync);
 	if (sync_mode != wbc->sync_mode) {
-		ZPL_ENTER(zfsvfs);
-		ZPL_VERIFY_ZP(zp);
+		if ((result = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+			return (result);
 		if (zfsvfs->z_log != NULL)
 			zil_commit(zfsvfs->z_log, zp->z_id);
-		ZPL_EXIT(zfsvfs);
+		zpl_exit(zfsvfs, FTAG);
 
 		/*
 		 * We need to call write_cache_pages() again (we can't just
diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c
index cf879a2897b..e3945a2a05f 100644
--- a/module/os/linux/zfs/zpl_super.c
+++ b/module/os/linux/zfs/zpl_super.c
@@ -185,7 +185,9 @@ zpl_remount_fs(struct super_block *sb, int *flags, char *data)
 static int
 __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs)
 {
-	ZPL_ENTER(zfsvfs);
+	int error;
+	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
+		return (error);
 
 	char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	dmu_objset_name(zfsvfs->z_os, fsname);
@@ -205,7 +207,7 @@ __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs)
 
 	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
 
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
 
 	return (0);
 }
diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c
index e7e299dcf1c..a010667adfa 100644
--- a/module/os/linux/zfs/zpl_xattr.c
+++ b/module/os/linux/zfs/zpl_xattr.c
@@ -246,8 +246,8 @@ zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
+	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		goto out1;
 	rw_enter(&zp->z_xattr_lock, RW_READER);
 
 	if (zfsvfs->z_use_sa && zp->z_is_sa) {
@@ -264,7 +264,8 @@ zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 out:
 
 	rw_exit(&zp->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
+out1:
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 
@@ -435,12 +436,13 @@ zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
+	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		goto out;
 	rw_enter(&zp->z_xattr_lock, RW_READER);
 	error = __zpl_xattr_get(ip, name, value, size, cr);
 	rw_exit(&zp->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
+out:
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 
@@ -604,8 +606,8 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value,
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
+	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		goto out1;
 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
 
 	/*
@@ -658,7 +660,8 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value,
 		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
 out:
 	rw_exit(&zp->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
+	zpl_exit(zfsvfs, FTAG);
+out1:
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index b02e8283c77..57f03f11627 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -61,21 +61,23 @@ static ulong_t zfs_fsync_sync_cnt = 4;
 int
 zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
 {
+	int error = 0;
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 
 	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
 
 	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
-		ZFS_ENTER(zfsvfs);
-		ZFS_VERIFY_ZP(zp);
+		if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+			goto out;
 		atomic_inc_32(&zp->z_sync_writes_cnt);
 		zil_commit(zfsvfs->z_log, zp->z_id);
 		atomic_dec_32(&zp->z_sync_writes_cnt);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 	}
+out:
 	tsd_set(zfs_fsyncer_key, NULL);
 
-	return (0);
+	return (error);
 }
 
 
@@ -146,12 +148,12 @@ zfs_holey(znode_t *zp, ulong_t cmd, loff_t *off)
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	error = zfs_holey_common(zp, cmd, off);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 #endif /* SEEK_HOLE && SEEK_DATA */
@@ -162,15 +164,15 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr)
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if (flag & V_ACE_MASK)
 		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
 	else
 		error = zfs_zaccess_rwx(zp, mode, flag, cr);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -201,17 +203,17 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 	boolean_t frsync = B_FALSE;
 
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EACCES));
 	}
 
 	/* We don't copy out anything useful for directories. */
 	if (Z_ISDIR(ZTOTYPE(zp))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EISDIR));
 	}
 
@@ -219,7 +221,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 	 * Validate file offset
 	 */
 	if (zfs_uio_offset(uio) < (offset_t)0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -227,7 +229,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 	 * Fasttrack empty reads
 	 */
 	if (zfs_uio_resid(uio) == 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
@@ -312,7 +314,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 	zfs_rangelock_exit(lr);
 
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
@@ -404,8 +406,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 		return (0);
 
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	sa_bulk_attr_t bulk[4];
 	int count = 0;
@@ -422,7 +424,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EROFS));
 	}
 
@@ -434,7 +436,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 	if ((zp->z_pflags & ZFS_IMMUTABLE) ||
 	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & O_APPEND) &&
 	    (zfs_uio_offset(uio) < zp->z_size))) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
@@ -443,7 +445,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 	 */
 	offset_t woff = ioflag & O_APPEND ? zp->z_size : zfs_uio_offset(uio);
 	if (woff < 0) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -455,7 +457,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 	 * Skip this if uio contains loaned arc_buf.
 	 */
 	if (zfs_uio_prefaultpages(MIN(n, max_blksz), uio)) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EFAULT));
 	}
 
@@ -490,7 +492,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 
 	if (zn_rlimit_fsize(zp, uio)) {
 		zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EFBIG));
 	}
 
@@ -498,7 +500,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 
 	if (woff >= limit) {
 		zfs_rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EFBIG));
 	}
 
@@ -761,7 +763,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 	 */
 	if (zfsvfs->z_replay || zfs_uio_resid(uio) == start_resid ||
 	    error == EFAULT) {
-		ZFS_EXIT(zfsvfs);
+		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
@@ -773,7 +775,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
 	task_io_account_write(nwritten);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
@@ -784,10 +786,10 @@ zfs_getsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
 	int error;
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
@@ -800,15 +802,15 @@ zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	zilog_t	*zilog = zfsvfs->z_log;
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 

From 1b6f3368dd5b416753178da06cb19c32798671e6 Mon Sep 17 00:00:00 2001
From: Chunwei Chen <tuxoko@gmail.com>
Date: Fri, 16 Sep 2022 13:43:26 -0700
Subject: [PATCH 46/69] Fix unable to export zpool without nfs-utils

Don't return error in nfs_disable_share when nfs is not available, since
it wouldn't have been able to share in the first place.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Chunwei Chen <david.chen@nutanix.com>
Closes #13534
Closes #13800
---
 lib/libshare/os/linux/nfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/libshare/os/linux/nfs.c b/lib/libshare/os/linux/nfs.c
index 0d63c989d34..c27e5564c1e 100644
--- a/lib/libshare/os/linux/nfs.c
+++ b/lib/libshare/os/linux/nfs.c
@@ -449,7 +449,7 @@ static int
 nfs_disable_share(sa_share_impl_t impl_share)
 {
 	if (!nfs_available())
-		return (SA_SYSTEM_ERR);
+		return (SA_OK);
 
 	return (nfs_toggle_share(
 	    ZFS_EXPORTS_LOCK, ZFS_EXPORTS_FILE, ZFS_EXPORTS_DIR, impl_share,

From 8da218a7a2ee0d0c0a0741f1007ebce0bf22584a Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Fri, 16 Sep 2022 16:45:15 -0400
Subject: [PATCH 47/69] Update coverity model

`uu_panic()` needs to be modelled and the definition of `vpanic()` from
the original coverity model was missing
`__coverity_format_string_sink__()`.

We also model `libspl_assertf()` as part of an attempt to eliminate
false positives.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13901
---
 contrib/coverity/model.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/contrib/coverity/model.c b/contrib/coverity/model.c
index ee2d01e7f3c..d27abd03876 100644
--- a/contrib/coverity/model.c
+++ b/contrib/coverity/model.c
@@ -70,9 +70,24 @@ panic(const char *fmt, ...)
 void
 vpanic(const char *fmt, va_list adx)
 {
-	(void) fmt;
 	(void) adx;
 
+	__coverity_format_string_sink__(fmt);
+	__coverity_panic__();
+}
+
+void
+uu_panic(const char *format, ...)
+{
+	__coverity_format_string_sink__(format);
+	__coverity_panic__();
+}
+
+int
+libspl_assertf(const char *file, const char *func, int line,
+    const char *format, ...)
+{
+	__coverity_format_string_sink__(format);
 	__coverity_panic__();
 }
 

From 577d41d3b2e4b37f51270c399c85b2708e21238a Mon Sep 17 00:00:00 2001
From: Ameer Hamza <106930537+ixhamza@users.noreply.github.com>
Date: Sat, 17 Sep 2022 01:52:25 +0500
Subject: [PATCH 48/69] zfs recv hangs if max recordsize is less than received
 recordsize

- Some optimizations for bqueue enqueue/dequeue.
- Added a fix to prevent deadlock when both bqueue_enqueue_impl()
and bqueue_dequeue() waits for signal to be triggered.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
Closes #13855
---
 include/sys/bqueue.h | 14 +++++++-------
 include/sys/fs/zfs.h |  6 +++---
 module/zfs/bqueue.c  | 23 +++++++++++++----------
 3 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/include/sys/bqueue.h b/include/sys/bqueue.h
index 797aecd791a..b9621966027 100644
--- a/include/sys/bqueue.h
+++ b/include/sys/bqueue.h
@@ -30,22 +30,22 @@ typedef struct bqueue {
 	kmutex_t bq_lock;
 	kcondvar_t bq_add_cv;
 	kcondvar_t bq_pop_cv;
-	uint64_t bq_size;
-	uint64_t bq_maxsize;
-	uint64_t bq_fill_fraction;
+	size_t bq_size;
+	size_t bq_maxsize;
+	uint_t bq_fill_fraction;
 	size_t bq_node_offset;
 } bqueue_t;
 
 typedef struct bqueue_node {
 	list_node_t bqn_node;
-	uint64_t bqn_size;
+	size_t bqn_size;
 } bqueue_node_t;
 
 
-int bqueue_init(bqueue_t *, uint64_t, uint64_t, size_t);
+int bqueue_init(bqueue_t *, uint_t, size_t, size_t);
 void bqueue_destroy(bqueue_t *);
-void bqueue_enqueue(bqueue_t *, void *, uint64_t);
-void bqueue_enqueue_flush(bqueue_t *, void *, uint64_t);
+void bqueue_enqueue(bqueue_t *, void *, size_t);
+void bqueue_enqueue_flush(bqueue_t *, void *, size_t);
 void *bqueue_dequeue(bqueue_t *);
 boolean_t bqueue_empty(bqueue_t *);
 
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 8cbd0e6024a..dedee0e7bd5 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -1758,9 +1758,9 @@ typedef enum {
  * against the cost of COWing a giant block to modify one byte, and the
  * large latency of reading or writing a large block.
  *
- * Note that although blocks up to 16MB are supported, the recordsize
- * property can not be set larger than zfs_max_recordsize (default 1MB).
- * See the comment near zfs_max_recordsize in dsl_dataset.c for details.
+ * The recordsize property can not be set larger than zfs_max_recordsize
+ * (default 16MB on 64-bit and 1MB on 32-bit). See the comment near
+ * zfs_max_recordsize in dsl_dataset.c for details.
  *
  * Note that although the LSIZE field of the blkptr_t can store sizes up
  * to 32MB, the dnode's dn_datablkszsec can only store sizes up to
diff --git a/module/zfs/bqueue.c b/module/zfs/bqueue.c
index 22539efc4e2..ec5ce4388ec 100644
--- a/module/zfs/bqueue.c
+++ b/module/zfs/bqueue.c
@@ -42,8 +42,7 @@ obj2node(bqueue_t *q, void *data)
  * Return 0 on success, or -1 on failure.
  */
 int
-bqueue_init(bqueue_t *q, uint64_t fill_fraction, uint64_t size,
-    size_t node_offset)
+bqueue_init(bqueue_t *q, uint_t fill_fraction, size_t size, size_t node_offset)
 {
 	if (fill_fraction == 0) {
 		return (-1);
@@ -78,22 +77,26 @@ bqueue_destroy(bqueue_t *q)
 }
 
 static void
-bqueue_enqueue_impl(bqueue_t *q, void *data, uint64_t item_size,
-    boolean_t flush)
+bqueue_enqueue_impl(bqueue_t *q, void *data, size_t item_size, boolean_t flush)
 {
 	ASSERT3U(item_size, >, 0);
 	ASSERT3U(item_size, <=, q->bq_maxsize);
 	mutex_enter(&q->bq_lock);
 	obj2node(q, data)->bqn_size = item_size;
-	while (q->bq_size + item_size > q->bq_maxsize) {
+	while (q->bq_size && q->bq_size + item_size > q->bq_maxsize) {
+		/*
+		 * Wake up bqueue_dequeue() thread if already sleeping in order
+		 * to prevent the deadlock condition
+		 */
+		cv_signal(&q->bq_pop_cv);
 		cv_wait_sig(&q->bq_add_cv, &q->bq_lock);
 	}
 	q->bq_size += item_size;
 	list_insert_tail(&q->bq_list, data);
-	if (q->bq_size >= q->bq_maxsize / q->bq_fill_fraction)
-		cv_signal(&q->bq_pop_cv);
 	if (flush)
 		cv_broadcast(&q->bq_pop_cv);
+	else if (q->bq_size >= q->bq_maxsize / q->bq_fill_fraction)
+		cv_signal(&q->bq_pop_cv);
 	mutex_exit(&q->bq_lock);
 }
 
@@ -103,7 +106,7 @@ bqueue_enqueue_impl(bqueue_t *q, void *data, uint64_t item_size,
  * > 0.
  */
 void
-bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
+bqueue_enqueue(bqueue_t *q, void *data, size_t item_size)
 {
 	bqueue_enqueue_impl(q, data, item_size, B_FALSE);
 }
@@ -117,7 +120,7 @@ bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
  * destroy the condvar before the enqueuing thread is done.
  */
 void
-bqueue_enqueue_flush(bqueue_t *q, void *data, uint64_t item_size)
+bqueue_enqueue_flush(bqueue_t *q, void *data, size_t item_size)
 {
 	bqueue_enqueue_impl(q, data, item_size, B_TRUE);
 }
@@ -130,7 +133,7 @@ void *
 bqueue_dequeue(bqueue_t *q)
 {
 	void *ret = NULL;
-	uint64_t item_size;
+	size_t item_size;
 	mutex_enter(&q->bq_lock);
 	while (q->bq_size == 0) {
 		cv_wait_sig(&q->bq_pop_cv, &q->bq_lock);

From 6c8e9f09c22446cb8a1415ed1db05231cd659f69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= <nabijaczleweli@nabijaczleweli.xyz>
Date: Fri, 16 Sep 2022 22:59:25 +0200
Subject: [PATCH 49/69] =?UTF-8?q?Handle=20ECKSUM=20as=20new=20EZFS=5FCKSUM?=
 =?UTF-8?q?=20=E2=80=92=20"insufficient=20replicas"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a meaningful error message for ECKSUM to common error messages.

Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Closes #6805
Closes #13808
Closes #13898
---
 include/libzfs.h         | 1 +
 lib/libzfs/libzfs_util.c | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/include/libzfs.h b/include/libzfs.h
index 92c7bf6d1c9..4fc77612259 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -151,6 +151,7 @@ typedef enum zfs_error {
 	EZFS_REBUILDING,	/* resilvering (sequential reconstrution) */
 	EZFS_VDEV_NOTSUP,	/* ops not supported for this type of vdev */
 	EZFS_NOT_USER_NAMESPACE,	/* a file is not a user namespace */
+	EZFS_CKSUM,		/* insufficient replicas */
 	EZFS_UNKNOWN
 } zfs_error_t;
 
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
index cca86d2d782..3067e8d4639 100644
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -170,6 +170,8 @@ libzfs_error_description(libzfs_handle_t *hdl)
 		return (dgettext(TEXT_DOMAIN, "I/O error"));
 	case EZFS_INTR:
 		return (dgettext(TEXT_DOMAIN, "signal received"));
+	case EZFS_CKSUM:
+		return (dgettext(TEXT_DOMAIN, "insufficient replicas"));
 	case EZFS_ISSPARE:
 		return (dgettext(TEXT_DOMAIN, "device is reserved as a hot "
 		    "spare"));
@@ -396,6 +398,10 @@ zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
 	case EINTR:
 		zfs_verror(hdl, EZFS_INTR, fmt, ap);
 		return (-1);
+
+	case ECKSUM:
+		zfs_verror(hdl, EZFS_CKSUM, fmt, ap);
+		return (-1);
 	}
 
 	return (0);

From 4df8ccc83dc59c6921a4b8df4cd01f08ead3114a Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Fri, 16 Sep 2022 17:02:54 -0400
Subject: [PATCH 50/69] Fix null pointer dereferences in PAM

Coverity caught these.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13889
---
 contrib/pam_zfs_key/pam_zfs_key.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/contrib/pam_zfs_key/pam_zfs_key.c b/contrib/pam_zfs_key/pam_zfs_key.c
index 6f95d468074..c1001e6b81c 100644
--- a/contrib/pam_zfs_key/pam_zfs_key.c
+++ b/contrib/pam_zfs_key/pam_zfs_key.c
@@ -531,7 +531,6 @@ zfs_key_config_get_dataset(zfs_key_config_t *config)
 		if (zhp == NULL) {
 			pam_syslog(NULL, LOG_ERR, "dataset %s not found",
 			    config->homes_prefix);
-			zfs_close(zhp);
 			return (NULL);
 		}
 
@@ -543,6 +542,10 @@ zfs_key_config_get_dataset(zfs_key_config_t *config)
 		return (dsname);
 	}
 
+	if (config->homes_prefix == NULL) {
+		return (NULL);
+	}
+
 	size_t len = ZFS_MAX_DATASET_NAME_LEN;
 	size_t total_len = strlen(config->homes_prefix) + 1
 	    + strlen(config->username);

From 7dee043af5d9fce99611bca5863bf6ca28b741ba Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Fri, 16 Sep 2022 14:22:52 -0700
Subject: [PATCH 51/69] zfs_enter rework followup

The zpl_fadvise() function was recently added and was not included
in the initial patch.  Update it accordingly.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #13831
---
 module/os/linux/zfs/zpl_file.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index f6bdfd08b83..25fc6b22329 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -930,8 +930,8 @@ zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice)
 	if (offset < 0 || len < 0)
 		return (-EINVAL);
 
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
+	if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+		return (error);
 
 	switch (advice) {
 	case POSIX_FADV_SEQUENTIAL:
@@ -963,7 +963,7 @@ zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice)
 		break;
 	}
 
-	ZFS_EXIT(zfsvfs);
+	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }

From 75e8b5ad847ed7fd9e40ffdf33989b6578469903 Mon Sep 17 00:00:00 2001
From: Tino Reichardt <milky-zfs@mcmilk.de>
Date: Wed, 3 Aug 2022 18:36:41 +0200
Subject: [PATCH 52/69] Fix BLAKE3 tuneable and module loading on Linux and
 FreeBSD

Apply similar options to BLAKE3 as it is done for zfs_fletcher_4_impl.

The zfs module parameter on Linux changes from icp_blake3_impl to
zfs_blake3_impl.

You can check and set it on Linux via sysfs like this:
```
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle [fastest] generic sse2 sse41 avx2

[bash]# echo sse2 > /sys/module/zfs/parameters/zfs_blake3_impl
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic [sse2] sse41 avx2
```

The modprobe module parameters may also be used now:
```
[bash]# modprobe zfs zfs_blake3_impl=sse41
[bash]# cat /sys/module/zfs/parameters/zfs_blake3_impl
cycle fastest generic sse2 [sse41] avx2
```

On FreeBSD the BLAKE3 implementation can be set via sysctl like this:
```
[bsd]# sysctl vfs.zfs.blake3_impl
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2
[bsd]# sysctl vfs.zfs.blake3_impl=sse2
vfs.zfs.blake3_impl: cycle [fastest] generic sse2 sse41 avx2 \
  -> cycle fastest generic [sse2] sse41 avx2
```

This commit changes also some Blake3 internals like these:
- blake3_impl_ops_t was renamed to blake3_ops_t
- all functions are named blake3_impl_NAME() now

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Co-authored-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13725
---
 cmd/ztest.c                                |   4 +-
 include/os/freebsd/spl/sys/mod_os.h        |   7 +-
 include/sys/blake3.h                       |  23 +-
 module/icp/algs/blake3/blake3.c            |  14 +-
 module/icp/algs/blake3/blake3_generic.c    |   2 +-
 module/icp/algs/blake3/blake3_impl.c       | 366 +++++++++++++--------
 module/icp/algs/blake3/blake3_impl.h       |  14 +-
 module/icp/algs/blake3/blake3_x86-64.c     |   8 +-
 module/zfs/zfs_chksum.c                    |  20 +-
 tests/zfs-tests/cmd/checksum/blake3_test.c |  12 +-
 10 files changed, 273 insertions(+), 197 deletions(-)

diff --git a/cmd/ztest.c b/cmd/ztest.c
index 847c3a5b06c..0712f286bf6 100644
--- a/cmd/ztest.c
+++ b/cmd/ztest.c
@@ -6413,7 +6413,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
 		void *res2 = &zc_res2;
 
 		/* BLAKE3_KEY_LEN = 32 */
-		VERIFY0(blake3_set_impl_name("generic"));
+		VERIFY0(blake3_impl_setname("generic"));
 		templ = abd_checksum_blake3_tmpl_init(&salt);
 		Blake3_InitKeyed(&ctx, salt_ptr);
 		Blake3_Update(&ctx, buf, size);
@@ -6422,7 +6422,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
 		ZIO_CHECKSUM_BSWAP(&zc_ref2);
 		abd_checksum_blake3_tmpl_free(templ);
 
-		VERIFY0(blake3_set_impl_name("cycle"));
+		VERIFY0(blake3_impl_setname("cycle"));
 		while (run_count-- > 0) {
 
 			/* Test current implementation */
diff --git a/include/os/freebsd/spl/sys/mod_os.h b/include/os/freebsd/spl/sys/mod_os.h
index 95a19cc940c..e2815ce9e54 100644
--- a/include/os/freebsd/spl/sys/mod_os.h
+++ b/include/os/freebsd/spl/sys/mod_os.h
@@ -31,10 +31,6 @@
 
 #include <sys/sysctl.h>
 
-#define	EXPORT_SYMBOL(x)
-#define	module_param(a, b, c)
-#define	MODULE_PARM_DESC(a, b)
-
 #define	ZMOD_RW CTLFLAG_RWTUN
 #define	ZMOD_RD CTLFLAG_RDTUN
 
@@ -98,6 +94,9 @@
 #define	fletcher_4_param_set_args(var) \
     CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
 
+#define	blake3_param_set_args(var) \
+    CTLTYPE_STRING, NULL, 0, blake3_param, "A"
+
 #include <sys/kernel.h>
 #define	module_init(fn) \
 static void \
diff --git a/include/sys/blake3.h b/include/sys/blake3.h
index 19500585f38..ad65fc8db7b 100644
--- a/include/sys/blake3.h
+++ b/include/sys/blake3.h
@@ -72,7 +72,7 @@ typedef struct {
 	 */
 	uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
 
-	/* const blake3_impl_ops_t *ops */
+	/* const blake3_ops_t *ops */
 	const void *ops;
 } BLAKE3_CTX;
 
@@ -97,26 +97,23 @@ extern void **blake3_per_cpu_ctx;
 extern void blake3_per_cpu_ctx_init(void);
 extern void blake3_per_cpu_ctx_fini(void);
 
-/* return number of supported implementations */
-extern int blake3_get_impl_count(void);
+/* get count of supported implementations */
+extern uint32_t blake3_impl_getcnt(void);
 
-/* return id of selected implementation */
-extern int blake3_get_impl_id(void);
+/* get id of selected implementation */
+extern uint32_t blake3_impl_getid(void);
 
-/* return name of selected implementation */
-extern const char *blake3_get_impl_name(void);
+/* get name of selected implementation */
+extern const char *blake3_impl_getname(void);
 
 /* setup id as fastest implementation */
-extern void blake3_set_impl_fastest(uint32_t id);
+extern void blake3_impl_set_fastest(uint32_t id);
 
 /* set implementation by id */
-extern void blake3_set_impl_id(uint32_t id);
+extern void blake3_impl_setid(uint32_t id);
 
 /* set implementation by name */
-extern int blake3_set_impl_name(const char *name);
-
-/* set startup implementation */
-extern void blake3_setup_impl(void);
+extern int blake3_impl_setname(const char *name);
 
 #ifdef __cplusplus
 }
diff --git a/module/icp/algs/blake3/blake3.c b/module/icp/algs/blake3/blake3.c
index b9600207b67..5f701859882 100644
--- a/module/icp/algs/blake3/blake3.c
+++ b/module/icp/algs/blake3/blake3.c
@@ -129,7 +129,7 @@ static output_t make_output(const uint32_t input_cv[8],
  * bytes. For that reason, chaining values in the CV stack are represented as
  * bytes.
  */
-static void output_chaining_value(const blake3_impl_ops_t *ops,
+static void output_chaining_value(const blake3_ops_t *ops,
     const output_t *ctx, uint8_t cv[32])
 {
 	uint32_t cv_words[8];
@@ -139,7 +139,7 @@ static void output_chaining_value(const blake3_impl_ops_t *ops,
 	store_cv_words(cv, cv_words);
 }
 
-static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
+static void output_root_bytes(const blake3_ops_t *ops, const output_t *ctx,
     uint64_t seek, uint8_t *out, size_t out_len)
 {
 	uint64_t output_block_counter = seek / 64;
@@ -163,7 +163,7 @@ static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
 	}
 }
 
-static void chunk_state_update(const blake3_impl_ops_t *ops,
+static void chunk_state_update(const blake3_ops_t *ops,
     blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len)
 {
 	if (ctx->buf_len > 0) {
@@ -230,7 +230,7 @@ static size_t left_len(size_t content_len)
  * number of chunks hashed. These chunks are never the root and never empty;
  * those cases use a different codepath.
  */
-static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
+static size_t compress_chunks_parallel(const blake3_ops_t *ops,
     const uint8_t *input, size_t input_len, const uint32_t key[8],
     uint64_t chunk_counter, uint8_t flags, uint8_t *out)
 {
@@ -274,7 +274,7 @@ static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
  * return it as an additional output.) These parents are never the root and
  * never empty; those cases use a different codepath.
  */
-static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
+static size_t compress_parents_parallel(const blake3_ops_t *ops,
     const uint8_t *child_chaining_values, size_t num_chaining_values,
     const uint32_t key[8], uint8_t flags, uint8_t *out)
 {
@@ -320,7 +320,7 @@ static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
  * of implementing this special rule? Because we don't want to limit SIMD or
  * multi-threading parallelism for that update().
  */
-static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
+static size_t blake3_compress_subtree_wide(const blake3_ops_t *ops,
     const uint8_t *input, size_t input_len, const uint32_t key[8],
     uint64_t chunk_counter, uint8_t flags, uint8_t *out)
 {
@@ -406,7 +406,7 @@ static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
  * As with compress_subtree_wide(), this function is not used on inputs of 1
  * chunk or less. That's a different codepath.
  */
-static void compress_subtree_to_parent_node(const blake3_impl_ops_t *ops,
+static void compress_subtree_to_parent_node(const blake3_ops_t *ops,
     const uint8_t *input, size_t input_len, const uint32_t key[8],
     uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN])
 {
diff --git a/module/icp/algs/blake3/blake3_generic.c b/module/icp/algs/blake3/blake3_generic.c
index 6c1eb33e89c..94a1f108236 100644
--- a/module/icp/algs/blake3/blake3_generic.c
+++ b/module/icp/algs/blake3/blake3_generic.c
@@ -192,7 +192,7 @@ static inline boolean_t blake3_is_generic_supported(void)
 	return (B_TRUE);
 }
 
-const blake3_impl_ops_t blake3_generic_impl = {
+const blake3_ops_t blake3_generic_impl = {
 	.compress_in_place = blake3_compress_in_place_generic,
 	.compress_xof = blake3_compress_xof_generic,
 	.hash_many = blake3_hash_many_generic,
diff --git a/module/icp/algs/blake3/blake3_impl.c b/module/icp/algs/blake3/blake3_impl.c
index 10741c82de7..5276fd88fbb 100644
--- a/module/icp/algs/blake3/blake3_impl.c
+++ b/module/icp/algs/blake3/blake3_impl.c
@@ -28,7 +28,7 @@
 
 #include "blake3_impl.h"
 
-static const blake3_impl_ops_t *const blake3_impls[] = {
+static const blake3_ops_t *const blake3_impls[] = {
 	&blake3_generic_impl,
 #if defined(__aarch64__) || \
 	(defined(__x86_64) && defined(HAVE_SSE2)) || \
@@ -48,160 +48,199 @@ static const blake3_impl_ops_t *const blake3_impls[] = {
 #endif
 };
 
-/* this pointer holds current ops for implementation */
-static const blake3_impl_ops_t *blake3_selected_impl = &blake3_generic_impl;
-
-/* special implementation selections */
+/* Select BLAKE3 implementation */
 #define	IMPL_FASTEST	(UINT32_MAX)
-#define	IMPL_CYCLE	(UINT32_MAX-1)
-#define	IMPL_USER	(UINT32_MAX-2)
-#define	IMPL_PARAM	(UINT32_MAX-3)
+#define	IMPL_CYCLE	(UINT32_MAX - 1)
 
-#define	IMPL_READ(i) (*(volatile uint32_t *) &(i))
-static uint32_t icp_blake3_impl = IMPL_FASTEST;
+#define	IMPL_READ(i)	(*(volatile uint32_t *) &(i))
 
-#define	BLAKE3_IMPL_NAME_MAX	16
+/* Indicate that benchmark has been done */
+static boolean_t blake3_initialized = B_FALSE;
 
-/* id of fastest implementation */
-static uint32_t blake3_fastest_id = 0;
+/* Implementation that contains the fastest methods */
+static blake3_ops_t blake3_fastest_impl = {
+	.name = "fastest"
+};
 
-/* currently used id */
-static uint32_t blake3_current_id = 0;
+/* Hold all supported implementations */
+static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)];
+static uint32_t blake3_supp_impls_cnt = 0;
 
-/* id of module parameter (-1 == unused) */
-static int blake3_param_id = -1;
+/* Currently selected implementation */
+static uint32_t blake3_impl_chosen = IMPL_FASTEST;
 
-/* return number of supported implementations */
-int
-blake3_get_impl_count(void)
+static struct blake3_impl_selector {
+	const char *name;
+	uint32_t sel;
+} blake3_impl_selectors[] = {
+	{ "cycle",	IMPL_CYCLE },
+	{ "fastest",	IMPL_FASTEST }
+};
+
+/* check the supported implementations */
+static void blake3_impl_init(void)
 {
-	static int impls = 0;
-	int i;
+	int i, c;
 
-	if (impls)
-		return (impls);
+	/* init only once */
+	if (likely(blake3_initialized))
+		return;
 
-	for (i = 0; i < ARRAY_SIZE(blake3_impls); i++) {
-		if (!blake3_impls[i]->is_supported()) continue;
-		impls++;
+	/* move supported implementations into blake3_supp_impls */
+	for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) {
+		const blake3_ops_t *impl = blake3_impls[i];
+
+		if (impl->is_supported && impl->is_supported())
+			blake3_supp_impls[c++] = impl;
 	}
+	blake3_supp_impls_cnt = c;
 
-	return (impls);
+	/* first init generic impl, may be changed via set_fastest() */
+	memcpy(&blake3_fastest_impl, blake3_impls[0],
+	    sizeof (blake3_fastest_impl));
+	blake3_initialized = B_TRUE;
 }
 
-/* return id of selected implementation */
-int
-blake3_get_impl_id(void)
+/* get number of supported implementations */
+uint32_t
+blake3_impl_getcnt(void)
 {
-	return (blake3_current_id);
+	blake3_impl_init();
+	return (blake3_supp_impls_cnt);
 }
 
-/* return name of selected implementation */
+/* get id of selected implementation */
+uint32_t
+blake3_impl_getid(void)
+{
+	return (IMPL_READ(blake3_impl_chosen));
+}
+
+/* get name of selected implementation */
 const char *
-blake3_get_impl_name(void)
+blake3_impl_getname(void)
 {
-	return (blake3_selected_impl->name);
+	uint32_t impl = IMPL_READ(blake3_impl_chosen);
+
+	blake3_impl_init();
+	switch (impl) {
+	case IMPL_FASTEST:
+		return ("fastest");
+	case IMPL_CYCLE:
+		return ("cycle");
+	default:
+		return (blake3_supp_impls[impl]->name);
+	}
 }
 
 /* setup id as fastest implementation */
 void
-blake3_set_impl_fastest(uint32_t id)
+blake3_impl_set_fastest(uint32_t id)
 {
-	blake3_fastest_id = id;
+	/* setup fastest impl */
+	memcpy(&blake3_fastest_impl, blake3_supp_impls[id],
+	    sizeof (blake3_fastest_impl));
 }
 
 /* set implementation by id */
 void
-blake3_set_impl_id(uint32_t id)
+blake3_impl_setid(uint32_t id)
 {
-	int i, cid;
-
-	/* select fastest */
-	if (id == IMPL_FASTEST)
-		id = blake3_fastest_id;
-
-	/* select next or first */
-	if (id == IMPL_CYCLE)
-		id = (++blake3_current_id) % blake3_get_impl_count();
-
-	/* 0..N for the real impl */
-	for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
-		if (!blake3_impls[i]->is_supported()) continue;
-		if (cid == id) {
-			blake3_current_id = cid;
-			blake3_selected_impl = blake3_impls[i];
-			return;
-		}
-		cid++;
+	blake3_impl_init();
+	switch (id) {
+	case IMPL_FASTEST:
+		atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST);
+		break;
+	case IMPL_CYCLE:
+		atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE);
+		break;
+	default:
+		ASSERT3U(id, >=, 0);
+		ASSERT3U(id, <, blake3_supp_impls_cnt);
+		atomic_swap_32(&blake3_impl_chosen, id);
+		break;
 	}
 }
 
 /* set implementation by name */
 int
-blake3_set_impl_name(const char *name)
+blake3_impl_setname(const char *val)
 {
-	int i, cid;
+	uint32_t impl = IMPL_READ(blake3_impl_chosen);
+	size_t val_len;
+	int i, err = -EINVAL;
 
-	if (strcmp(name, "fastest") == 0) {
-		atomic_swap_32(&icp_blake3_impl, IMPL_FASTEST);
-		blake3_set_impl_id(IMPL_FASTEST);
-		return (0);
-	} else if (strcmp(name, "cycle") == 0) {
-		atomic_swap_32(&icp_blake3_impl, IMPL_CYCLE);
-		blake3_set_impl_id(IMPL_CYCLE);
-		return (0);
-	}
+	blake3_impl_init();
+	val_len = strlen(val);
+	while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */
+		val_len--;
 
-	for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
-		if (!blake3_impls[i]->is_supported()) continue;
-		if (strcmp(name, blake3_impls[i]->name) == 0) {
-			if (icp_blake3_impl == IMPL_PARAM) {
-				blake3_param_id = cid;
-				return (0);
-			}
-			blake3_selected_impl = blake3_impls[i];
-			blake3_current_id = cid;
-			return (0);
+	/* check mandatory implementations */
+	for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) {
+		const char *name = blake3_impl_selectors[i].name;
+
+		if (val_len == strlen(name) &&
+		    strncmp(val, name, val_len) == 0) {
+			impl = blake3_impl_selectors[i].sel;
+			err = 0;
+			break;
 		}
-		cid++;
 	}
 
-	return (-EINVAL);
-}
+	if (err != 0 && blake3_initialized) {
+		/* check all supported implementations */
+		for (i = 0; i < blake3_supp_impls_cnt; i++) {
+			const char *name = blake3_supp_impls[i]->name;
 
-/* setup implementation */
-void
-blake3_setup_impl(void)
-{
-	switch (IMPL_READ(icp_blake3_impl)) {
-	case IMPL_PARAM:
-		blake3_set_impl_id(blake3_param_id);
-		atomic_swap_32(&icp_blake3_impl, IMPL_USER);
-		break;
-	case IMPL_FASTEST:
-		blake3_set_impl_id(IMPL_FASTEST);
-		break;
-	case IMPL_CYCLE:
-		blake3_set_impl_id(IMPL_CYCLE);
-		break;
-	default:
-		blake3_set_impl_id(blake3_current_id);
-		break;
+			if (val_len == strlen(name) &&
+			    strncmp(val, name, val_len) == 0) {
+				impl = i;
+				err = 0;
+				break;
+			}
+		}
 	}
+
+	if (err == 0) {
+		atomic_swap_32(&blake3_impl_chosen, impl);
+	}
+
+	return (err);
 }
 
-/* return selected implementation */
-const blake3_impl_ops_t *
+const blake3_ops_t *
 blake3_impl_get_ops(void)
 {
-	/* each call to ops will cycle */
-	if (icp_blake3_impl == IMPL_CYCLE)
-		blake3_set_impl_id(IMPL_CYCLE);
+	const blake3_ops_t *ops = NULL;
+	uint32_t impl = IMPL_READ(blake3_impl_chosen);
 
-	return (blake3_selected_impl);
+	blake3_impl_init();
+	switch (impl) {
+	case IMPL_FASTEST:
+		ASSERT(blake3_initialized);
+		ops = &blake3_fastest_impl;
+		break;
+	case IMPL_CYCLE:
+		/* Cycle through supported implementations */
+		ASSERT(blake3_initialized);
+		ASSERT3U(blake3_supp_impls_cnt, >, 0);
+		static uint32_t cycle_count = 0;
+		uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt;
+		ops = blake3_supp_impls[idx];
+		break;
+	default:
+		ASSERT3U(blake3_supp_impls_cnt, >, 0);
+		ASSERT3U(impl, <, blake3_supp_impls_cnt);
+		ops = blake3_supp_impls[impl];
+		break;
+	}
+
+	ASSERT3P(ops, !=, NULL);
+	return (ops);
 }
 
 #if defined(_KERNEL)
+
 void **blake3_per_cpu_ctx;
 
 void
@@ -215,6 +254,9 @@ blake3_per_cpu_ctx_init(void)
 		blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
 		    KM_SLEEP);
 	}
+
+	/* init once in kernel mode */
+	blake3_impl_init();
 }
 
 void
@@ -227,58 +269,94 @@ blake3_per_cpu_ctx_fini(void)
 	memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
 	kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
 }
-#endif
 
-#if defined(_KERNEL) && defined(__linux__)
-static int
-icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp)
-{
-	char req_name[BLAKE3_IMPL_NAME_MAX];
-	size_t i;
+#define	IMPL_FMT(impl, i)	(((impl) == (i)) ? "[%s] " : "%s ")
 
-	/* sanitize input */
-	i = strnlen(name, BLAKE3_IMPL_NAME_MAX);
-	if (i == 0 || i >= BLAKE3_IMPL_NAME_MAX)
-		return (-EINVAL);
-
-	strlcpy(req_name, name, BLAKE3_IMPL_NAME_MAX);
-	while (i > 0 && isspace(req_name[i-1]))
-		i--;
-	req_name[i] = '\0';
-
-	atomic_swap_32(&icp_blake3_impl, IMPL_PARAM);
-	return (blake3_set_impl_name(req_name));
-}
+#if defined(__linux__)
 
 static int
-icp_blake3_impl_get(char *buffer, zfs_kernel_param_t *kp)
+blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
 {
-	int i, cid, cnt = 0;
+	const uint32_t impl = IMPL_READ(blake3_impl_chosen);
 	char *fmt;
+	int cnt = 0;
 
 	/* cycling */
-	fmt = (icp_blake3_impl == IMPL_CYCLE) ? "[cycle] " : "cycle ";
-	cnt += sprintf(buffer + cnt, fmt);
+	fmt = IMPL_FMT(impl, IMPL_CYCLE);
+	cnt += sprintf(buffer + cnt, fmt, "cycle");
 
-	/* fastest one */
-	fmt = (icp_blake3_impl == IMPL_FASTEST) ? "[fastest] " : "fastest ";
-	cnt += sprintf(buffer + cnt, fmt);
+	/* list fastest */
+	fmt = IMPL_FMT(impl, IMPL_FASTEST);
+	cnt += sprintf(buffer + cnt, fmt, "fastest");
 
-	/* user selected */
-	for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
-		if (!blake3_impls[i]->is_supported()) continue;
-		fmt = (icp_blake3_impl == IMPL_USER &&
-		    cid == blake3_current_id) ? "[%s] " : "%s ";
-		cnt += sprintf(buffer + cnt, fmt, blake3_impls[i]->name);
-		cid++;
+	/* list all supported implementations */
+	for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
+		fmt = IMPL_FMT(impl, i);
+		cnt += sprintf(buffer + cnt, fmt,
+		    blake3_supp_impls[i]->name);
 	}
 
-	buffer[cnt] = 0;
-
 	return (cnt);
 }
 
-module_param_call(icp_blake3_impl, icp_blake3_impl_set, icp_blake3_impl_get,
-    NULL, 0644);
-MODULE_PARM_DESC(icp_blake3_impl, "Select BLAKE3 implementation.");
+static int
+blake3_param_set(const char *val, zfs_kernel_param_t *unused)
+{
+	(void) unused;
+	return (blake3_impl_setname(val));
+}
+
+#elif defined(__FreeBSD__)
+
+#include <sys/sbuf.h>
+
+static int
+blake3_param(ZFS_MODULE_PARAM_ARGS)
+{
+	int err;
+
+	if (req->newptr == NULL) {
+		const uint32_t impl = IMPL_READ(blake3_impl_chosen);
+		const int init_buflen = 64;
+		const char *fmt;
+		struct sbuf *s;
+
+		s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
+
+		/* cycling */
+		fmt = IMPL_FMT(impl, IMPL_CYCLE);
+		(void) sbuf_printf(s, fmt, "cycle");
+
+		/* list fastest */
+		fmt = IMPL_FMT(impl, IMPL_FASTEST);
+		(void) sbuf_printf(s, fmt, "fastest");
+
+		/* list all supported implementations */
+		for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
+			fmt = IMPL_FMT(impl, i);
+			(void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name);
+		}
+
+		err = sbuf_finish(s);
+		sbuf_delete(s);
+
+		return (err);
+	}
+
+	char buf[16];
+
+	err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
+	if (err) {
+		return (err);
+	}
+
+	return (-blake3_impl_setname(buf));
+}
+#endif
+
+#undef IMPL_FMT
+
+ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl,
+    blake3_param_set, blake3_param_get, ZMOD_RW, \
+	"Select BLAKE3 implementation.");
 #endif
diff --git a/module/icp/algs/blake3/blake3_impl.h b/module/icp/algs/blake3/blake3_impl.h
index 5254061c737..eef74eaa909 100644
--- a/module/icp/algs/blake3/blake3_impl.h
+++ b/module/icp/algs/blake3/blake3_impl.h
@@ -62,31 +62,31 @@ typedef struct blake3_impl_ops {
 	blake3_is_supported_f is_supported;
 	int degree;
 	const char *name;
-} blake3_impl_ops_t;
+} blake3_ops_t;
 
 /* Return selected BLAKE3 implementation ops */
-extern const blake3_impl_ops_t *blake3_impl_get_ops(void);
+extern const blake3_ops_t *blake3_impl_get_ops(void);
 
-extern const blake3_impl_ops_t blake3_generic_impl;
+extern const blake3_ops_t blake3_generic_impl;
 
 #if defined(__aarch64__) || \
 	(defined(__x86_64) && defined(HAVE_SSE2)) || \
 	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-extern const blake3_impl_ops_t blake3_sse2_impl;
+extern const blake3_ops_t blake3_sse2_impl;
 #endif
 
 #if defined(__aarch64__) || \
 	(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
 	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-extern const blake3_impl_ops_t blake3_sse41_impl;
+extern const blake3_ops_t blake3_sse41_impl;
 #endif
 
 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
-extern const blake3_impl_ops_t blake3_avx2_impl;
+extern const blake3_ops_t blake3_avx2_impl;
 #endif
 
 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
-extern const blake3_impl_ops_t blake3_avx512_impl;
+extern const blake3_ops_t blake3_avx512_impl;
 #endif
 
 #if defined(__x86_64)
diff --git a/module/icp/algs/blake3/blake3_x86-64.c b/module/icp/algs/blake3/blake3_x86-64.c
index aecd29edb16..8139789fd77 100644
--- a/module/icp/algs/blake3/blake3_x86-64.c
+++ b/module/icp/algs/blake3/blake3_x86-64.c
@@ -81,7 +81,7 @@ static boolean_t blake3_is_sse2_supported(void)
 #endif
 }
 
-const blake3_impl_ops_t blake3_sse2_impl = {
+const blake3_ops_t blake3_sse2_impl = {
 	.compress_in_place = blake3_compress_in_place_sse2,
 	.compress_xof = blake3_compress_xof_sse2,
 	.hash_many = blake3_hash_many_sse2,
@@ -147,7 +147,7 @@ static boolean_t blake3_is_sse41_supported(void)
 #endif
 }
 
-const blake3_impl_ops_t blake3_sse41_impl = {
+const blake3_ops_t blake3_sse41_impl = {
 	.compress_in_place = blake3_compress_in_place_sse41,
 	.compress_xof = blake3_compress_xof_sse41,
 	.hash_many = blake3_hash_many_sse41,
@@ -179,7 +179,7 @@ static boolean_t blake3_is_avx2_supported(void)
 	    zfs_avx2_available());
 }
 
-const blake3_impl_ops_t blake3_avx2_impl = {
+const blake3_ops_t blake3_avx2_impl = {
 	.compress_in_place = blake3_compress_in_place_sse41,
 	.compress_xof = blake3_compress_xof_sse41,
 	.hash_many = blake3_hash_many_avx2,
@@ -237,7 +237,7 @@ static boolean_t blake3_is_avx512_supported(void)
 	    zfs_avx512vl_available());
 }
 
-const blake3_impl_ops_t blake3_avx512_impl = {
+const blake3_ops_t blake3_avx512_impl = {
 	.compress_in_place = blake3_compress_in_place_avx512,
 	.compress_xof = blake3_compress_xof_avx512,
 	.hash_many = blake3_hash_many_avx512,
diff --git a/module/zfs/zfs_chksum.c b/module/zfs/zfs_chksum.c
index b9dc907afa8..74b4cb8d2e6 100644
--- a/module/zfs/zfs_chksum.c
+++ b/module/zfs/zfs_chksum.c
@@ -244,12 +244,13 @@ chksum_benchmark(void)
 #endif
 
 	chksum_stat_t *cs;
-	int cbid = 0, id;
+	int cbid = 0;
 	uint64_t max = 0;
+	uint32_t id, id_save;
 
 	/* space for the benchmark times */
 	chksum_stat_cnt = 4;
-	chksum_stat_cnt += blake3_get_impl_count();
+	chksum_stat_cnt += blake3_impl_getcnt();
 	chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
 	    sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
 
@@ -290,20 +291,24 @@ chksum_benchmark(void)
 	chksum_benchit(cs);
 
 	/* blake3 */
-	for (id = 0; id < blake3_get_impl_count(); id++) {
-		blake3_set_impl_id(id);
+	id_save = blake3_impl_getid();
+	for (id = 0; id < blake3_impl_getcnt(); id++) {
+		blake3_impl_setid(id);
 		cs = &chksum_stat_data[cbid++];
 		cs->init = abd_checksum_blake3_tmpl_init;
 		cs->func = abd_checksum_blake3_native;
 		cs->free = abd_checksum_blake3_tmpl_free;
 		cs->name = "blake3";
-		cs->impl = blake3_get_impl_name();
+		cs->impl = blake3_impl_getname();
 		chksum_benchit(cs);
 		if (cs->bs256k > max) {
 			max = cs->bs256k;
-			blake3_set_impl_fastest(id);
+			blake3_impl_set_fastest(id);
 		}
 	}
+
+	/* restore initial value */
+	blake3_impl_setid(id_save);
 }
 
 void
@@ -329,9 +334,6 @@ chksum_init(void)
 		    chksum_kstat_addr);
 		kstat_install(chksum_kstat);
 	}
-
-	/* setup implementations */
-	blake3_setup_impl();
 }
 
 void
diff --git a/tests/zfs-tests/cmd/checksum/blake3_test.c b/tests/zfs-tests/cmd/checksum/blake3_test.c
index d57d0e047f0..648e1faaaeb 100644
--- a/tests/zfs-tests/cmd/checksum/blake3_test.c
+++ b/tests/zfs-tests/cmd/checksum/blake3_test.c
@@ -497,9 +497,9 @@ main(int argc, char *argv[])
 	}
 
 	(void) printf("Running algorithm correctness tests:\n");
-	for (id = 0; id < blake3_get_impl_count(); id++) {
-		blake3_set_impl_id(id);
-		const char *name = blake3_get_impl_name();
+	for (id = 0; id < blake3_impl_getcnt(); id++) {
+		blake3_impl_setid(id);
+		const char *name = blake3_impl_getname();
 		dprintf("Result for BLAKE3-%s:\n", name);
 		for (i = 0; TestArray[i].hash; i++) {
 			blake3_test_t *cur = &TestArray[i];
@@ -565,9 +565,9 @@ main(int argc, char *argv[])
 	} while (0)
 
 	printf("Running performance tests (hashing 1024 MiB of data):\n");
-	for (id = 0; id < blake3_get_impl_count(); id++) {
-		blake3_set_impl_id(id);
-		const char *name = blake3_get_impl_name();
+	for (id = 0; id < blake3_impl_getcnt(); id++) {
+		blake3_impl_setid(id);
+		const char *name = blake3_impl_getname();
 		BLAKE3_PERF_TEST(name, 256);
 	}
 

From eeca9d27d7f6936f433fedd7a1d37233bdd670cd Mon Sep 17 00:00:00 2001
From: Tino Reichardt <milky-zfs@mcmilk.de>
Date: Sat, 3 Sep 2022 10:40:29 +0200
Subject: [PATCH 53/69] Add zfs_blake3_impl to zfs.4

The zfs module parameter zfs_blake3_impl got no manual page entry while
adding BLAKE3 to OpenZFS. This commit adds the required notes about the
parameter into zfs.4

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Co-authored-by: Ryan Moeller <ryan@freqlabs.com>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13725
---
 man/man4/zfs.4 | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index b2f3e7c61fb..90a8ca788c7 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -1162,6 +1162,20 @@ Selecting any option other than
 results in vector instructions
 from the respective CPU instruction set being used.
 .
+.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
+Select a BLAKE3 implementation.
+.Pp
+Supported selectors are:
+.Sy cycle , fastest , generic , sse2 , sse41 , avx2 , avx512 .
+All except
+.Sy cycle , fastest No and Sy generic
+require instruction set extensions to be available,
+and will only appear if ZFS detects that they are present at runtime.
+If multiple implementations of BLAKE3 are available, the
+.Sy fastest will be chosen using a micro benchmark. You can see the
+benchmark results by reading this kstat file:
+.Pa /proc/spl/kstat/zfs/chksum_bench .
+.
 .It Sy zfs_free_bpobj_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
 Enable/disable the processing of the free_bpobj object.
 .

From 48cf170d5a9f6610db0f576238e054e727239e82 Mon Sep 17 00:00:00 2001
From: Tino Reichardt <milky-zfs@mcmilk.de>
Date: Wed, 7 Sep 2022 20:33:59 +0200
Subject: [PATCH 54/69] Add PPC cpu feature tests for FreeBSD and Linux

Add needed cpu feature tests for powerpc architecture.

Overview:
zfs_altivec_available() - needed by RAID-Z
zfs_vsx_available()     - needed by BLAKE3
zfs_isa207_available()  - needed by SHA2

Part 1 - Userspace
- use getauxval() for Linux and elf_aux_info() for FreeBSD
- direct including <sys/auxv.h> fails with double definitions
- so we self define the needed functions and definitions

Part 2 - Kernel space FreeBSD
- use exported cpu_features of <powerpc/cpu.h>

Part 3 - Kernel space Linux
- use cpu_has_feature() function of <asm/cpufeature.h>

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13725
---
 include/os/freebsd/Makefile.am               |  1 +
 include/os/freebsd/spl/sys/simd.h            |  8 +-
 include/os/freebsd/spl/sys/simd_powerpc.h    | 90 ++++++++++++++++++++
 include/os/freebsd/spl/sys/simd_x86.h        | 50 +++++------
 include/os/linux/kernel/linux/simd_powerpc.h | 83 +++++++++---------
 lib/libspl/include/sys/simd.h                | 89 ++++++++++---------
 6 files changed, 204 insertions(+), 117 deletions(-)
 create mode 100644 include/os/freebsd/spl/sys/simd_powerpc.h

diff --git a/include/os/freebsd/Makefile.am b/include/os/freebsd/Makefile.am
index 5ddb7cd710b..3796f20ae7e 100644
--- a/include/os/freebsd/Makefile.am
+++ b/include/os/freebsd/Makefile.am
@@ -50,6 +50,7 @@ noinst_HEADERS = \
 	%D%/spl/sys/sid.h \
 	%D%/spl/sys/sig.h \
 	%D%/spl/sys/simd.h \
+	%D%/spl/sys/simd_powerpc.h \
 	%D%/spl/sys/simd_x86.h \
 	%D%/spl/sys/spl_condvar.h \
 	%D%/spl/sys/string.h \
diff --git a/include/os/freebsd/spl/sys/simd.h b/include/os/freebsd/spl/sys/simd.h
index 53503e83891..3106e4853c7 100644
--- a/include/os/freebsd/spl/sys/simd.h
+++ b/include/os/freebsd/spl/sys/simd.h
@@ -26,13 +26,16 @@
  * $FreeBSD$
  */
 
-
 #ifndef _FREEBSD_SIMD_H
 #define	_FREEBSD_SIMD_H
+
 #if defined(__amd64__) || defined(__i386__)
 #include <sys/simd_x86.h>
-#else
 
+#elif defined(__powerpc__)
+#include <sys/simd_powerpc.h>
+
+#else
 #define	kfpu_allowed()		0
 #define	kfpu_initialize(tsk)	do {} while (0)
 #define	kfpu_begin()		do {} while (0)
@@ -40,4 +43,5 @@
 #define	kfpu_init()		(0)
 #define	kfpu_fini()		do {} while (0)
 #endif
+
 #endif
diff --git a/include/os/freebsd/spl/sys/simd_powerpc.h b/include/os/freebsd/spl/sys/simd_powerpc.h
new file mode 100644
index 00000000000..b90240580c7
--- /dev/null
+++ b/include/os/freebsd/spl/sys/simd_powerpc.h
@@ -0,0 +1,90 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+/*
+ * USER API:
+ *
+ * Kernel fpu methods:
+ *	kfpu_allowed()
+ *	kfpu_begin()
+ *	kfpu_end()
+ *	kfpu_init()
+ *	kfpu_fini()
+ *
+ * SIMD support:
+ *
+ * Following functions should be called to determine whether CPU feature
+ * is supported. All functions are usable in kernel and user space.
+ * If a SIMD algorithm is using more than one instruction set
+ * all relevant feature test functions should be called.
+ *
+ * Supported features:
+ *   zfs_altivec_available()
+ *   zfs_vsx_available()
+ *   zfs_isa207_available()
+ */
+
+#ifndef _FREEBSD_SIMD_POWERPC_H
+#define	_FREEBSD_SIMD_POWERPC_H
+
+#include <sys/types.h>
+#include <sys/cdefs.h>
+
+#include <machine/pcb.h>
+#include <powerpc/cpu.h>
+
+#define	kfpu_allowed()		1
+#define	kfpu_initialize(tsk)	do {} while (0)
+#define	kfpu_begin()		do {} while (0)
+#define	kfpu_end()		do {} while (0)
+#define	kfpu_init()		(0)
+#define	kfpu_fini()		do {} while (0)
+
+/*
+ * Check if Altivec is available
+ */
+static inline boolean_t
+zfs_altivec_available(void)
+{
+	return ((cpu_features & PPC_FEATURE_HAS_ALTIVEC) != 0);
+}
+
+/*
+ * Check if VSX is available
+ */
+static inline boolean_t
+zfs_vsx_available(void)
+{
+	return ((cpu_features & PPC_FEATURE_HAS_VSX) != 0);
+}
+
+/*
+ * Check if POWER ISA 2.07 is available (SHA2)
+ */
+static inline boolean_t
+zfs_isa207_available(void)
+{
+	return ((cpu_features2 & PPC_FEATURE2_ARCH_2_07) != 0);
+}
diff --git a/include/os/freebsd/spl/sys/simd_x86.h b/include/os/freebsd/spl/sys/simd_x86.h
index 480bfd28973..7a0ca243f76 100644
--- a/include/os/freebsd/spl/sys/simd_x86.h
+++ b/include/os/freebsd/spl/sys/simd_x86.h
@@ -77,7 +77,7 @@ __simd_state_enabled(const uint64_t state)
 	boolean_t has_osxsave;
 	uint64_t xcr0;
 
-	has_osxsave = !!(cpu_feature2 & CPUID2_OSXSAVE);
+	has_osxsave = (cpu_feature2 & CPUID2_OSXSAVE) != 0;
 
 	if (!has_osxsave)
 		return (B_FALSE);
@@ -99,7 +99,7 @@ __simd_state_enabled(const uint64_t state)
 static inline boolean_t
 zfs_sse_available(void)
 {
-	return (!!(cpu_feature & CPUID_SSE));
+	return ((cpu_feature & CPUID_SSE) != 0);
 }
 
 /*
@@ -108,7 +108,7 @@ zfs_sse_available(void)
 static inline boolean_t
 zfs_sse2_available(void)
 {
-	return (!!(cpu_feature & CPUID_SSE2));
+	return ((cpu_feature & CPUID_SSE2) != 0);
 }
 
 /*
@@ -117,7 +117,7 @@ zfs_sse2_available(void)
 static inline boolean_t
 zfs_sse3_available(void)
 {
-	return (!!(cpu_feature2 & CPUID2_SSE3));
+	return ((cpu_feature2 & CPUID2_SSE3) != 0);
 }
 
 /*
@@ -126,7 +126,7 @@ zfs_sse3_available(void)
 static inline boolean_t
 zfs_ssse3_available(void)
 {
-	return (!!(cpu_feature2 & CPUID2_SSSE3));
+	return ((cpu_feature2 & CPUID2_SSSE3) != 0);
 }
 
 /*
@@ -135,7 +135,7 @@ zfs_ssse3_available(void)
 static inline boolean_t
 zfs_sse4_1_available(void)
 {
-	return (!!(cpu_feature2 & CPUID2_SSE41));
+	return ((cpu_feature2 & CPUID2_SSE41) != 0);
 }
 
 /*
@@ -144,7 +144,7 @@ zfs_sse4_1_available(void)
 static inline boolean_t
 zfs_sse4_2_available(void)
 {
-	return (!!(cpu_feature2 & CPUID2_SSE42));
+	return ((cpu_feature2 & CPUID2_SSE42) != 0);
 }
 
 /*
@@ -155,7 +155,7 @@ zfs_avx_available(void)
 {
 	boolean_t has_avx;
 
-	has_avx = !!(cpu_feature2 & CPUID2_AVX);
+	has_avx = (cpu_feature2 & CPUID2_AVX) != 0;
 
 	return (has_avx && __ymm_enabled());
 }
@@ -168,7 +168,7 @@ zfs_avx2_available(void)
 {
 	boolean_t has_avx2;
 
-	has_avx2 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX2);
+	has_avx2 = (cpu_stdext_feature & CPUID_STDEXT_AVX2) != 0;
 
 	return (has_avx2 && __ymm_enabled());
 }
@@ -196,7 +196,7 @@ zfs_avx512f_available(void)
 {
 	boolean_t has_avx512;
 
-	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F);
+	has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0;
 
 	return (has_avx512 && __zmm_enabled());
 }
@@ -207,8 +207,8 @@ zfs_avx512cd_available(void)
 {
 	boolean_t has_avx512;
 
-	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
-	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512CD);
+	has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
+	    (cpu_stdext_feature & CPUID_STDEXT_AVX512CD) != 0;
 
 	return (has_avx512 && __zmm_enabled());
 }
@@ -219,8 +219,8 @@ zfs_avx512er_available(void)
 {
 	boolean_t has_avx512;
 
-	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
-	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512CD);
+	has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
+	    (cpu_stdext_feature & CPUID_STDEXT_AVX512CD) != 0;
 
 	return (has_avx512 && __zmm_enabled());
 }
@@ -231,8 +231,8 @@ zfs_avx512pf_available(void)
 {
 	boolean_t has_avx512;
 
-	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
-	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512PF);
+	has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
+	    (cpu_stdext_feature & CPUID_STDEXT_AVX512PF) != 0;
 
 	return (has_avx512 && __zmm_enabled());
 }
@@ -243,7 +243,7 @@ zfs_avx512bw_available(void)
 {
 	boolean_t has_avx512 = B_FALSE;
 
-	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512BW);
+	has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512BW) != 0;
 
 	return (has_avx512 && __zmm_enabled());
 }
@@ -254,8 +254,8 @@ zfs_avx512dq_available(void)
 {
 	boolean_t has_avx512;
 
-	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
-	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512DQ);
+	has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
+	    (cpu_stdext_feature & CPUID_STDEXT_AVX512DQ) != 0;
 
 	return (has_avx512 && __zmm_enabled());
 }
@@ -266,8 +266,8 @@ zfs_avx512vl_available(void)
 {
 	boolean_t has_avx512;
 
-	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
-	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512VL);
+	has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
+	    (cpu_stdext_feature & CPUID_STDEXT_AVX512VL) != 0;
 
 	return (has_avx512 && __zmm_enabled());
 }
@@ -278,8 +278,8 @@ zfs_avx512ifma_available(void)
 {
 	boolean_t has_avx512;
 
-	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
-	    !!(cpu_stdext_feature & CPUID_STDEXT_AVX512IFMA);
+	has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
+	    (cpu_stdext_feature & CPUID_STDEXT_AVX512IFMA) != 0;
 
 	return (has_avx512 && __zmm_enabled());
 }
@@ -290,8 +290,8 @@ zfs_avx512vbmi_available(void)
 {
 	boolean_t has_avx512;
 
-	has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) &&
-	    !!(cpu_stdext_feature & CPUID_STDEXT_BMI1);
+	has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
+	    (cpu_stdext_feature & CPUID_STDEXT_BMI1) != 0;
 
 	return (has_avx512 && __zmm_enabled());
 }
diff --git a/include/os/linux/kernel/linux/simd_powerpc.h b/include/os/linux/kernel/linux/simd_powerpc.h
index 764c5dc51f9..2a2f92bc499 100644
--- a/include/os/linux/kernel/linux/simd_powerpc.h
+++ b/include/os/linux/kernel/linux/simd_powerpc.h
@@ -21,6 +21,7 @@
 /*
  * Copyright (C) 2019 Romain Dolbeau
  *           <romain.dolbeau@european-processor-initiative.eu>
+ * Copyright (C) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
  */
 
 /*
@@ -41,7 +42,9 @@
  * all relevant feature test functions should be called.
  *
  * Supported features:
- *	zfs_altivec_available()
+ *   zfs_altivec_available()
+ *   zfs_vsx_available()
+ *   zfs_isa207_available()
  */
 
 #ifndef _LINUX_SIMD_POWERPC_H
@@ -57,73 +60,65 @@
 #include <sys/types.h>
 #include <linux/version.h>
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+#include <asm/cpufeature.h>
+#else
+#include <asm/cputable.h>
+#endif
+
 #define	kfpu_allowed()			1
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
-#define	kfpu_end()				\
-	{					\
-		disable_kernel_vsx();		\
-		disable_kernel_altivec();	\
-		preempt_enable();		\
-	}
 #define	kfpu_begin()				\
 	{					\
 		preempt_disable();		\
 		enable_kernel_altivec();	\
 		enable_kernel_vsx();		\
+		enable_kernel_spe();		\
+	}
+#define	kfpu_end()				\
+	{					\
+		disable_kernel_spe();		\
+		disable_kernel_vsx();		\
+		disable_kernel_altivec();	\
+		preempt_enable();		\
 	}
 #else
 /* seems that before 4.5 no-one bothered */
 #define	kfpu_begin()
 #define	kfpu_end()		preempt_enable()
 #endif
+
 #define	kfpu_init()		0
 #define	kfpu_fini()		((void) 0)
 
-static inline boolean_t
-zfs_vsx_available(void)
-{
-	boolean_t res;
-#if defined(__powerpc64__)
-	u64 msr;
-#else
-	u32 msr;
-#endif
-	kfpu_begin();
-	__asm volatile("mfmsr %0" : "=r"(msr));
-	res = (msr & 0x800000) != 0;
-	kfpu_end();
-	return (res);
-}
-
 /*
  * Check if AltiVec instruction set is available
  */
 static inline boolean_t
 zfs_altivec_available(void)
 {
-	boolean_t res;
-	/* suggested by macallan at netbsd dot org */
-#if defined(__powerpc64__)
-	u64 msr;
-#else
-	u32 msr;
-#endif
-	kfpu_begin();
-	__asm volatile("mfmsr %0" : "=r"(msr));
-	/*
-	 * 64 bits -> need to check bit 38
-	 * Power ISA Version 3.0B
-	 * p944
-	 * 32 bits -> Need to check bit 6
-	 * AltiVec Technology Programming Environments Manual
-	 * p49 (2-9)
-	 * They are the same, as ppc counts 'backward' ...
-	 */
-	res = (msr & 0x2000000) != 0;
-	kfpu_end();
-	return (res);
+	return (cpu_has_feature(CPU_FTR_ALTIVEC));
 }
+
+/*
+ * Check if VSX is available
+ */
+static inline boolean_t
+zfs_vsx_available(void)
+{
+	return (cpu_has_feature(CPU_FTR_VSX));
+}
+
+/*
+ * Check if POWER ISA 2.07 is available (SHA2)
+ */
+static inline boolean_t
+zfs_isa207_available(void)
+{
+	return (cpu_has_feature(CPU_FTR_ARCH_207S));
+}
+
 #endif /* defined(__powerpc) */
 
 #endif /* _LINUX_SIMD_POWERPC_H */
diff --git a/lib/libspl/include/sys/simd.h b/lib/libspl/include/sys/simd.h
index c9d86a0808f..c0099dd7919 100644
--- a/lib/libspl/include/sys/simd.h
+++ b/lib/libspl/include/sys/simd.h
@@ -20,8 +20,8 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
  */
 
 #ifndef _LIBSPL_SYS_SIMD_H
@@ -452,63 +452,60 @@ zfs_avx512vbmi_available(void)
 
 #elif defined(__powerpc__)
 
+/* including <sys/auxv.h> clashes with AT_UID and others */
+extern unsigned long getauxval(unsigned long type);
+#if defined(__FreeBSD__)
+#define	AT_HWCAP	25	/* CPU feature flags. */
+#define	AT_HWCAP2	26	/* CPU feature flags 2. */
+extern int elf_aux_info(int aux, void *buf, int buflen);
+static unsigned long getauxval(unsigned long key)
+{
+	unsigned long val = 0UL;
+
+	if (elf_aux_info((int)key, &val, sizeof (val)) != 0)
+		return (0UL);
+
+	return (val);
+}
+#elif defined(__linux__)
+#define	AT_HWCAP	16	/* CPU feature flags. */
+#define	AT_HWCAP2	26	/* CPU feature flags 2. */
+#endif
+
 #define	kfpu_allowed()		1
 #define	kfpu_initialize(tsk)	do {} while (0)
 #define	kfpu_begin()		do {} while (0)
 #define	kfpu_end()		do {} while (0)
 
-/*
- * Check if AltiVec instruction set is available
- * No easy way beyond 'altivec works' :-(
- */
-#include <signal.h>
-#include <setjmp.h>
-
-#if defined(__ALTIVEC__) && !defined(__FreeBSD__)
-static jmp_buf env;
-static void sigillhandler(int x)
-{
-	(void) x;
-	longjmp(env, 1);
-}
-#endif
-
+#define	PPC_FEATURE_HAS_ALTIVEC	0x10000000
 static inline boolean_t
 zfs_altivec_available(void)
 {
-	boolean_t has_altivec = B_FALSE;
-#if defined(__ALTIVEC__) && !defined(__FreeBSD__)
-	sighandler_t savesig;
-	savesig = signal(SIGILL, sigillhandler);
-	if (setjmp(env)) {
-		signal(SIGILL, savesig);
-		has_altivec = B_FALSE;
-	} else {
-		__asm__ __volatile__("vor 0,0,0\n" : : : "v0");
-		signal(SIGILL, savesig);
-		has_altivec = B_TRUE;
-	}
-#endif
-	return (has_altivec);
+	unsigned long hwcap = getauxval(AT_HWCAP);
+
+	return (hwcap & PPC_FEATURE_HAS_ALTIVEC);
 }
+
+#define	PPC_FEATURE_HAS_VSX	0x00000080
 static inline boolean_t
 zfs_vsx_available(void)
 {
-	boolean_t has_vsx = B_FALSE;
-#if defined(__ALTIVEC__) && !defined(__FreeBSD__)
-	sighandler_t savesig;
-	savesig = signal(SIGILL, sigillhandler);
-	if (setjmp(env)) {
-		signal(SIGILL, savesig);
-		has_vsx = B_FALSE;
-	} else {
-		__asm__ __volatile__("xssubsp 0,0,0\n");
-		signal(SIGILL, savesig);
-		has_vsx = B_TRUE;
-	}
-#endif
-	return (has_vsx);
+	unsigned long hwcap = getauxval(AT_HWCAP);
+
+	return (hwcap & PPC_FEATURE_HAS_VSX);
 }
+
+#define	PPC_FEATURE2_ARCH_2_07	0x80000000
+static inline boolean_t
+zfs_isa207_available(void)
+{
+	unsigned long hwcap = getauxval(AT_HWCAP);
+	unsigned long hwcap2 = getauxval(AT_HWCAP2);
+
+	return ((hwcap & PPC_FEATURE_HAS_VSX) &&
+	    (hwcap2 & PPC_FEATURE2_ARCH_2_07));
+}
+
 #else
 
 #define	kfpu_allowed()		0

From 9a671fe7ecbc5f6ca07d96869207720a37b088e4 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Tue, 20 Sep 2022 02:17:27 +0200
Subject: [PATCH 55/69] FreeBSD: stop passing LK_INTERLOCK to VOP_LOCK

There is an ongoing effort to eliminate this feature.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Closes #13908
---
 module/os/freebsd/zfs/zfs_ctldir.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/module/os/freebsd/zfs/zfs_ctldir.c b/module/os/freebsd/zfs/zfs_ctldir.c
index 4b95b49dc40..42bb7551e9c 100644
--- a/module/os/freebsd/zfs/zfs_ctldir.c
+++ b/module/os/freebsd/zfs/zfs_ctldir.c
@@ -977,12 +977,13 @@ zfsctl_snapdir_lookup(struct vop_lookup_args *ap)
 		 */
 		VI_LOCK(*vpp);
 		if (((*vpp)->v_iflag & VI_MOUNT) == 0) {
+			VI_UNLOCK(*vpp);
 			/*
 			 * Upgrade to exclusive lock in order to:
 			 * - avoid race conditions
 			 * - satisfy the contract of mount_snapshot()
 			 */
-			err = VOP_LOCK(*vpp, LK_TRYUPGRADE | LK_INTERLOCK);
+			err = VOP_LOCK(*vpp, LK_TRYUPGRADE);
 			if (err == 0)
 				break;
 		} else {

From 042d43a1ddf114ea72d83fd45cc926724f74f5fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Matu=C5=A1ka?= <mm@FreeBSD.org>
Date: Tue, 20 Sep 2022 02:21:45 +0200
Subject: [PATCH 56/69] FreeBSD: fix static module build broken in 7bb707ffa

param_set_arc_free_target(SYSCTL_HANDLER_ARGS) and
param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS) defined in
sysctl_os.c must be made available to arc_os.c.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Martin Matuska <mm@FreeBSD.org>
Closes #13915
---
 include/os/freebsd/zfs/sys/arc_os.h | 34 +++++++++++++++++++++++++++++
 module/os/freebsd/zfs/arc_os.c      |  3 +--
 module/os/freebsd/zfs/sysctl_os.c   |  5 +++--
 3 files changed, 38 insertions(+), 4 deletions(-)
 create mode 100644 include/os/freebsd/zfs/sys/arc_os.h

diff --git a/include/os/freebsd/zfs/sys/arc_os.h b/include/os/freebsd/zfs/sys/arc_os.h
new file mode 100644
index 00000000000..a95618b91fe
--- /dev/null
+++ b/include/os/freebsd/zfs/sys/arc_os.h
@@ -0,0 +1,34 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2022 Martin Matuska
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef	_SYS_ARC_OS_H
+#define	_SYS_ARC_OS_H
+
+int param_set_arc_free_target(SYSCTL_HANDLER_ARGS);
+int param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS);
+
+#endif
diff --git a/module/os/freebsd/zfs/arc_os.c b/module/os/freebsd/zfs/arc_os.c
index f1a3a0fafa9..30e96a889e0 100644
--- a/module/os/freebsd/zfs/arc_os.c
+++ b/module/os/freebsd/zfs/arc_os.c
@@ -27,6 +27,7 @@
 #include <sys/zio_checksum.h>
 #include <sys/zfs_context.h>
 #include <sys/arc.h>
+#include <sys/arc_os.h>
 #include <sys/zfs_refcount.h>
 #include <sys/vdev.h>
 #include <sys/vdev_trim.h>
@@ -72,11 +73,9 @@ SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
  * We don't have a tunable for arc_free_target due to the dependency on
  * pagedaemon initialisation.
  */
-int param_set_arc_free_target(SYSCTL_HANDLER_ARGS);
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, free_target,
     param_set_arc_free_target, 0, CTLFLAG_RW,
 	"Desired number of free pages below which ARC triggers reclaim");
-int param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS);
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, no_grow_shift,
     param_set_arc_no_grow_shift, 0, ZMOD_RW,
 	"log2(fraction of ARC which must be free to allow growing)");
diff --git a/module/os/freebsd/zfs/sysctl_os.c b/module/os/freebsd/zfs/sysctl_os.c
index cd384c205df..4d908381c40 100644
--- a/module/os/freebsd/zfs/sysctl_os.c
+++ b/module/os/freebsd/zfs/sysctl_os.c
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/spa_impl.h>
 #include <sys/vdev.h>
 #include <sys/vdev_impl.h>
+#include <sys/arc_os.h>
 #include <sys/dmu.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_dataset.h>
@@ -228,7 +229,7 @@ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
 
 extern uint_t zfs_arc_free_target;
 
-static int
+int
 param_set_arc_free_target(SYSCTL_HANDLER_ARGS)
 {
 	uint_t val;
@@ -261,7 +262,7 @@ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
 	" (LEGACY)");
 /* END CSTYLED */
 
-static int
+int
 param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
 {
 	int err, val;

From 891ac937beb959cad94a2ba267e4b56dee930a5e Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Mon, 19 Sep 2022 20:30:58 -0400
Subject: [PATCH 57/69] Linux: Fix use-after-free in zfsvfs_create()

Coverity reported that we pass a pointer to zfsvfs to
`dmu_objset_disown()` after freeing zfsvfs in zfsvfs_create_impl() after
a failure in zfsvfs_init().

We have nearly identical duplicate versions of this code for FreeBSD and
Linux, but interestingly, the FreeBSD version of this code differs in
such a way that it does not suffer from this bug. We remove the
difference from the FreeBSD version to fix this bug.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13883
---
 module/os/linux/zfs/zfs_vfsops.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
index 251d9e9a40f..64d6b4616e1 100644
--- a/module/os/linux/zfs/zfs_vfsops.c
+++ b/module/os/linux/zfs/zfs_vfsops.c
@@ -784,9 +784,7 @@ zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
 	}
 
 	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
-	if (error != 0) {
-		dmu_objset_disown(os, B_TRUE, zfsvfs);
-	}
+
 	return (error);
 }
 
@@ -826,6 +824,7 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
 
 	error = zfsvfs_init(zfsvfs, os);
 	if (error != 0) {
+		dmu_objset_disown(os, B_TRUE, zfsvfs);
 		*zfvp = NULL;
 		zfsvfs_free(zfsvfs);
 		return (error);

From f272960d52bdc5689078d3cb7cd9e0233cd1a8cd Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Mon, 19 Sep 2022 20:32:18 -0400
Subject: [PATCH 58/69] Fix usage of zed_log_msg() and zfs_panic_recover()

Coverity complained about the format specifiers not matching variables.
In one case, the variable is a constant, so we fix it. In another, we
were missing an argument (about which coverity also complained).

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13888
---
 cmd/zed/agents/fmd_api.c    | 2 +-
 cmd/zed/agents/zfs_mod.c    | 2 +-
 cmd/zed/zed_conf.c          | 4 ++--
 cmd/zed/zed_disk_event.c    | 2 +-
 cmd/zed/zed_exec.c          | 2 +-
 module/zfs/spa_checkpoint.c | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cmd/zed/agents/fmd_api.c b/cmd/zed/agents/fmd_api.c
index 9e46e831d51..56c134b731b 100644
--- a/cmd/zed/agents/fmd_api.c
+++ b/cmd/zed/agents/fmd_api.c
@@ -372,7 +372,7 @@ zed_log_fault(nvlist_t *nvl, const char *uuid, const char *code)
 	if (code != NULL)
 		zed_log_msg(LOG_INFO, "\t%s: %s", FM_SUSPECT_DIAG_CODE, code);
 	if (nvlist_lookup_uint8(nvl, FM_FAULT_CERTAINTY, &byte) == 0)
-		zed_log_msg(LOG_INFO, "\t%s: %llu", FM_FAULT_CERTAINTY, byte);
+		zed_log_msg(LOG_INFO, "\t%s: %hhu", FM_FAULT_CERTAINTY, byte);
 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0) {
 		if (nvlist_lookup_string(rsrc, FM_FMRI_SCHEME, &strval) == 0)
 			zed_log_msg(LOG_INFO, "\t%s: %s", FM_FMRI_SCHEME,
diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c
index 7364dd2c628..af6de73a1cc 100644
--- a/cmd/zed/agents/zfs_mod.c
+++ b/cmd/zed/agents/zfs_mod.c
@@ -364,7 +364,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 	    (vs->vs_state != VDEV_STATE_FAULTED) &&
 	    (vs->vs_state != VDEV_STATE_CANT_OPEN)) {
 		zed_log_msg(LOG_INFO, "  not autoreplacing since disk isn't in "
-		    "a bad state (currently %d)", vs->vs_state);
+		    "a bad state (currently %llu)", vs->vs_state);
 		return;
 	}
 
diff --git a/cmd/zed/zed_conf.c b/cmd/zed/zed_conf.c
index 9a39d1a8098..29de27c77c3 100644
--- a/cmd/zed/zed_conf.c
+++ b/cmd/zed/zed_conf.c
@@ -657,7 +657,7 @@ zed_conf_read_state(struct zed_conf *zcp, uint64_t *eidp, int64_t etime[])
 	} else if (n != len) {
 		errno = EIO;
 		zed_log_msg(LOG_WARNING,
-		    "Failed to read state file \"%s\": Read %d of %d bytes",
+		    "Failed to read state file \"%s\": Read %zd of %zd bytes",
 		    zcp->state_file, n, len);
 		return (-1);
 	}
@@ -706,7 +706,7 @@ zed_conf_write_state(struct zed_conf *zcp, uint64_t eid, int64_t etime[])
 	if (n != len) {
 		errno = EIO;
 		zed_log_msg(LOG_WARNING,
-		    "Failed to write state file \"%s\": Wrote %d of %d bytes",
+		    "Failed to write state file \"%s\": Wrote %zd of %zd bytes",
 		    zcp->state_file, n, len);
 		return (-1);
 	}
diff --git a/cmd/zed/zed_disk_event.c b/cmd/zed/zed_disk_event.c
index 3c8e2fb38c1..db89ecc907b 100644
--- a/cmd/zed/zed_disk_event.c
+++ b/cmd/zed/zed_disk_event.c
@@ -49,7 +49,7 @@ struct udev_monitor *g_mon;
 #define	DEV_BYID_PATH	"/dev/disk/by-id/"
 
 /* 64MB is minimum usable disk for ZFS */
-#define	MINIMUM_SECTORS		131072
+#define	MINIMUM_SECTORS		131072ULL
 
 
 /*
diff --git a/cmd/zed/zed_exec.c b/cmd/zed/zed_exec.c
index 369c4b6950c..51c292d41cc 100644
--- a/cmd/zed/zed_exec.c
+++ b/cmd/zed/zed_exec.c
@@ -263,7 +263,7 @@ _reap_children(void *arg)
 				zed_log_msg(LOG_INFO,
 				    "Finished \"%s\" eid=%llu pid=%d "
 				    "time=%llu.%06us status=0x%X",
-				    node.name, node.eid,
+				    node.name, node.eid, pid,
 				    (unsigned long long) usage.ru_utime.tv_sec,
 				    (unsigned int) usage.ru_utime.tv_usec,
 				    (unsigned int) status);
diff --git a/module/zfs/spa_checkpoint.c b/module/zfs/spa_checkpoint.c
index b5b1dfa8a08..a837b1ce97e 100644
--- a/module/zfs/spa_checkpoint.c
+++ b/module/zfs/spa_checkpoint.c
@@ -347,7 +347,7 @@ spa_checkpoint_discard_thread_sync(void *arg, dmu_tx_t *tx)
 		if (error != 0) {
 			zfs_panic_recover("zfs: error %lld was returned "
 			    "while incrementally destroying the checkpoint "
-			    "space map of vdev %u\n",
+			    "space map of vdev %llu\n",
 			    (longlong_t)error, vd->vdev_id);
 		}
 		ASSERT0(words_after);

From e8bdc74528c2d0a97e324051e74aeda2e501d1d0 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Mon, 19 Sep 2022 20:33:52 -0400
Subject: [PATCH 59/69] Cleanup: Remove unused uu_pname code

Coverity caught a possible NULL pointer dereference in dead code. We can
delete it all.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Chunwei Chen <david.chen@nutanix.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13900
---
 include/libuutil.h        |  33 -------
 lib/libuutil/Makefile.am  |   1 -
 lib/libuutil/libuutil.abi |  73 --------------
 lib/libuutil/uu_pname.c   | 202 --------------------------------------
 4 files changed, 309 deletions(-)
 delete mode 100644 lib/libuutil/uu_pname.c

diff --git a/include/libuutil.h b/include/libuutil.h
index cb3d366c476..906b49ea5ca 100644
--- a/include/libuutil.h
+++ b/include/libuutil.h
@@ -56,13 +56,6 @@ extern "C" {
 #define	UU_ERROR_SYSTEM		99	/* underlying system error */
 #define	UU_ERROR_UNKNOWN	100	/* error status not known */
 
-/*
- * Standard program exit codes.
- */
-#define	UU_EXIT_OK	(*(uu_exit_ok()))
-#define	UU_EXIT_FATAL	(*(uu_exit_fatal()))
-#define	UU_EXIT_USAGE	(*(uu_exit_usage()))
-
 /*
  * Exit status profiles.
  */
@@ -75,32 +68,6 @@ extern "C" {
 uint32_t uu_error(void);
 const char *uu_strerror(uint32_t);
 
-/*
- * Program notification functions.
- */
-extern void uu_alt_exit(int);
-extern const char *uu_setpname(char *);
-extern const char *uu_getpname(void);
-extern void uu_warn(const char *, ...)
-    __attribute__((format(printf, 1, 2)));
-extern void uu_vwarn(const char *, va_list)
-    __attribute__((format(printf, 1, 0)));
-extern __attribute__((noreturn)) void uu_die(const char *, ...)
-    __attribute__((format(printf, 1, 2)));
-extern __attribute__((noreturn)) void uu_vdie(const char *, va_list)
-    __attribute__((format(printf, 1, 0)));
-extern __attribute__((noreturn)) void uu_xdie(int, const char *, ...)
-    __attribute__((format(printf, 2, 3)));
-extern __attribute__((noreturn)) void uu_vxdie(int, const char *, va_list)
-    __attribute__((format(printf, 2, 0)));
-
-/*
- * Exit status functions (not to be used directly)
- */
-extern int *uu_exit_ok(void);
-extern int *uu_exit_fatal(void);
-extern int *uu_exit_usage(void);
-
 /*
  * Identifier test flags and function.
  */
diff --git a/lib/libuutil/Makefile.am b/lib/libuutil/Makefile.am
index 339f9a06474..b973ce3cca4 100644
--- a/lib/libuutil/Makefile.am
+++ b/lib/libuutil/Makefile.am
@@ -9,7 +9,6 @@ libuutil_la_SOURCES = \
 	%D%/uu_ident.c \
 	%D%/uu_list.c \
 	%D%/uu_misc.c \
-	%D%/uu_pname.c \
 	%D%/uu_string.c
 
 libuutil_la_LIBADD = \
diff --git a/lib/libuutil/libuutil.abi b/lib/libuutil/libuutil.abi
index 766d8843000..f5186a0837a 100644
--- a/lib/libuutil/libuutil.abi
+++ b/lib/libuutil/libuutil.abi
@@ -1744,79 +1744,6 @@
       <return type-id='48b5725f'/>
     </function-decl>
   </abi-instr>
-  <abi-instr address-size='64' path='uu_pname.c' language='LANG_C99'>
-    <class-decl name='__va_list_tag' size-in-bits='192' is-struct='yes' visibility='default' id='d5027220'>
-      <data-member access='public' layout-offset-in-bits='0'>
-        <var-decl name='gp_offset' type-id='f0981eeb' visibility='default'/>
-      </data-member>
-      <data-member access='public' layout-offset-in-bits='32'>
-        <var-decl name='fp_offset' type-id='f0981eeb' visibility='default'/>
-      </data-member>
-      <data-member access='public' layout-offset-in-bits='64'>
-        <var-decl name='overflow_arg_area' type-id='eaa32e2f' visibility='default'/>
-      </data-member>
-      <data-member access='public' layout-offset-in-bits='128'>
-        <var-decl name='reg_save_area' type-id='eaa32e2f' visibility='default'/>
-      </data-member>
-    </class-decl>
-    <pointer-type-def type-id='d5027220' size-in-bits='64' id='b7f2d5e6'/>
-    <pointer-type-def type-id='95e97e5e' size-in-bits='64' id='7292109c'/>
-    <var-decl name='uu_exit_ok_value' type-id='95e97e5e' mangled-name='uu_exit_ok_value' visibility='default' elf-symbol-id='uu_exit_ok_value'/>
-    <var-decl name='uu_exit_fatal_value' type-id='95e97e5e' mangled-name='uu_exit_fatal_value' visibility='default' elf-symbol-id='uu_exit_fatal_value'/>
-    <var-decl name='uu_exit_usage_value' type-id='95e97e5e' mangled-name='uu_exit_usage_value' visibility='default' elf-symbol-id='uu_exit_usage_value'/>
-    <function-decl name='uu_exit_ok' mangled-name='uu_exit_ok' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_exit_ok'>
-      <return type-id='7292109c'/>
-    </function-decl>
-    <function-decl name='uu_exit_fatal' mangled-name='uu_exit_fatal' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_exit_fatal'>
-      <return type-id='7292109c'/>
-    </function-decl>
-    <function-decl name='uu_exit_usage' mangled-name='uu_exit_usage' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_exit_usage'>
-      <return type-id='7292109c'/>
-    </function-decl>
-    <function-decl name='uu_alt_exit' mangled-name='uu_alt_exit' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_alt_exit'>
-      <parameter type-id='95e97e5e' name='profile'/>
-      <return type-id='48b5725f'/>
-    </function-decl>
-    <function-decl name='uu_vwarn' mangled-name='uu_vwarn' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_vwarn'>
-      <parameter type-id='80f4b756' name='format'/>
-      <parameter type-id='b7f2d5e6' name='alist'/>
-      <return type-id='48b5725f'/>
-    </function-decl>
-    <function-decl name='uu_warn' mangled-name='uu_warn' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_warn'>
-      <parameter type-id='80f4b756' name='format'/>
-      <parameter is-variadic='yes'/>
-      <return type-id='48b5725f'/>
-    </function-decl>
-    <function-decl name='uu_vdie' mangled-name='uu_vdie' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_vdie'>
-      <parameter type-id='80f4b756' name='format'/>
-      <parameter type-id='b7f2d5e6' name='alist'/>
-      <return type-id='48b5725f'/>
-    </function-decl>
-    <function-decl name='uu_die' mangled-name='uu_die' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_die'>
-      <parameter type-id='80f4b756' name='format'/>
-      <parameter is-variadic='yes'/>
-      <return type-id='48b5725f'/>
-    </function-decl>
-    <function-decl name='uu_vxdie' mangled-name='uu_vxdie' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_vxdie'>
-      <parameter type-id='95e97e5e' name='status'/>
-      <parameter type-id='80f4b756' name='format'/>
-      <parameter type-id='b7f2d5e6' name='alist'/>
-      <return type-id='48b5725f'/>
-    </function-decl>
-    <function-decl name='uu_xdie' mangled-name='uu_xdie' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_xdie'>
-      <parameter type-id='95e97e5e' name='status'/>
-      <parameter type-id='80f4b756' name='format'/>
-      <parameter is-variadic='yes'/>
-      <return type-id='48b5725f'/>
-    </function-decl>
-    <function-decl name='uu_setpname' mangled-name='uu_setpname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_setpname'>
-      <parameter type-id='26a90f95' name='arg0'/>
-      <return type-id='80f4b756'/>
-    </function-decl>
-    <function-decl name='uu_getpname' mangled-name='uu_getpname' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='uu_getpname'>
-      <return type-id='80f4b756'/>
-    </function-decl>
-  </abi-instr>
   <abi-instr address-size='64' path='uu_string.c' language='LANG_C99'>
     <type-decl name='unnamed-enum-underlying-type-32' is-anonymous='yes' size-in-bits='32' alignment-in-bits='32' id='9cac1fee'/>
     <enum-decl name='boolean_t' naming-typedef-id='c19b74c3' id='f58c8277'>
diff --git a/lib/libuutil/uu_pname.c b/lib/libuutil/uu_pname.c
deleted file mode 100644
index 37c093731ef..00000000000
--- a/lib/libuutil/uu_pname.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-
-
-#include "libuutil_common.h"
-
-#include <libintl.h>
-#include <limits.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <errno.h>
-#include <wchar.h>
-#include <unistd.h>
-
-static const char *pname;
-
-static __attribute__((noreturn)) void
-uu_die_internal(int status, const char *format, va_list alist);
-
-int uu_exit_ok_value = EXIT_SUCCESS;
-int uu_exit_fatal_value = EXIT_FAILURE;
-int uu_exit_usage_value = 2;
-
-int *
-uu_exit_ok(void)
-{
-	return (&uu_exit_ok_value);
-}
-
-int *
-uu_exit_fatal(void)
-{
-	return (&uu_exit_fatal_value);
-}
-
-int *
-uu_exit_usage(void)
-{
-	return (&uu_exit_usage_value);
-}
-
-void
-uu_alt_exit(int profile)
-{
-	switch (profile) {
-	case UU_PROFILE_DEFAULT:
-		uu_exit_ok_value = EXIT_SUCCESS;
-		uu_exit_fatal_value = EXIT_FAILURE;
-		uu_exit_usage_value = 2;
-		break;
-	case UU_PROFILE_LAUNCHER:
-		uu_exit_ok_value = EXIT_SUCCESS;
-		uu_exit_fatal_value = 124;
-		uu_exit_usage_value = 125;
-		break;
-	}
-}
-
-static __attribute__((format(printf, 2, 0))) void
-uu_warn_internal(int err, const char *format, va_list alist)
-{
-	if (pname != NULL)
-		(void) fprintf(stderr, "%s: ", pname);
-
-	if (format != NULL)
-		(void) vfprintf(stderr, format, alist);
-
-	if (strrchr(format, '\n') == NULL)
-		(void) fprintf(stderr, ": %s\n", strerror(err));
-}
-
-void
-uu_vwarn(const char *format, va_list alist)
-{
-	uu_warn_internal(errno, format, alist);
-}
-
-void
-uu_warn(const char *format, ...)
-{
-	va_list alist;
-	va_start(alist, format);
-	uu_warn_internal(errno, format, alist);
-	va_end(alist);
-}
-
-static __attribute__((format(printf, 2, 0))) __attribute__((noreturn)) void
-uu_die_internal(int status, const char *format, va_list alist)
-{
-	uu_warn_internal(errno, format, alist);
-#ifdef DEBUG
-	{
-		char *cp;
-
-		if (!issetugid()) {
-			cp = getenv("UU_DIE_ABORTS");
-			if (cp != NULL && *cp != '\0')
-				abort();
-		}
-	}
-#endif
-	exit(status);
-}
-
-void
-uu_vdie(const char *format, va_list alist)
-{
-	uu_die_internal(UU_EXIT_FATAL, format, alist);
-}
-
-void
-uu_die(const char *format, ...)
-{
-	va_list alist;
-	va_start(alist, format);
-	uu_die_internal(UU_EXIT_FATAL, format, alist);
-	va_end(alist);
-}
-
-void
-uu_vxdie(int status, const char *format, va_list alist)
-{
-	uu_die_internal(status, format, alist);
-}
-
-void
-uu_xdie(int status, const char *format, ...)
-{
-	va_list alist;
-	va_start(alist, format);
-	uu_die_internal(status, format, alist);
-	va_end(alist);
-}
-
-const char *
-uu_setpname(char *arg0)
-{
-	/*
-	 * Having a NULL argv[0], while uncommon, is possible.  It
-	 * makes more sense to handle this event in uu_setpname rather
-	 * than in each of its consumers.
-	 */
-	if (arg0 == NULL) {
-		pname = getexecname();
-		if (pname == NULL)
-			pname = "unknown_command";
-		return (pname);
-	}
-
-	/*
-	 * Guard against '/' at end of command invocation.
-	 */
-	for (;;) {
-		char *p = strrchr(arg0, '/');
-		if (p == NULL) {
-			pname = arg0;
-			break;
-		} else {
-			if (*(p + 1) == '\0') {
-				*p = '\0';
-				continue;
-			}
-
-			pname = p + 1;
-			break;
-		}
-	}
-
-	return (pname);
-}
-
-const char *
-uu_getpname(void)
-{
-	return (pname);
-}

From 9276e202eba330baf253ff7b0f7a631d5915c116 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Tue, 20 Sep 2022 17:43:03 -0400
Subject: [PATCH 60/69] FreeBSD: Fix uninitialized pointer read in
 spa_import_rootpool()

The FreeBSD project's coverity scans found this.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13923
---
 module/os/freebsd/zfs/spa_os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module/os/freebsd/zfs/spa_os.c b/module/os/freebsd/zfs/spa_os.c
index 9bc61a6c8fe..45ea10bb487 100644
--- a/module/os/freebsd/zfs/spa_os.c
+++ b/module/os/freebsd/zfs/spa_os.c
@@ -249,7 +249,7 @@ spa_import_rootpool(const char *name, bool checkpointrewind)
 		mutex_exit(&spa_namespace_lock);
 		fnvlist_free(config);
 		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
-		    pname);
+		    name);
 		return (error);
 	}
 

From 3f400b0f5851e3e5e4f90dd8f5fe083acdeb7ea6 Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Tue, 20 Sep 2022 17:50:16 -0400
Subject: [PATCH 61/69] FreeBSD: Cleanup zfs_readdir()

The FreeBSD project's coverity scans found dead code in `zfs_readdir()`.
Also, the comment above `zfs_readdir()` is out of date.

I fixed the comment and deleted all of the dead code, plus additional
dead code that was found upon review.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13924
---
 include/os/freebsd/Makefile.am          |  1 -
 include/os/freebsd/spl/sys/extdirent.h  | 71 -------------------------
 include/os/freebsd/spl/sys/vnode_impl.h | 13 -----
 module/os/freebsd/zfs/zfs_dir.c         |  1 -
 module/os/freebsd/zfs/zfs_vnops_os.c    | 68 ++++++-----------------
 5 files changed, 16 insertions(+), 138 deletions(-)
 delete mode 100644 include/os/freebsd/spl/sys/extdirent.h

diff --git a/include/os/freebsd/Makefile.am b/include/os/freebsd/Makefile.am
index 3796f20ae7e..a750f52e7d2 100644
--- a/include/os/freebsd/Makefile.am
+++ b/include/os/freebsd/Makefile.am
@@ -21,7 +21,6 @@ noinst_HEADERS = \
 	%D%/spl/sys/dirent.h \
 	%D%/spl/sys/disp.h \
 	%D%/spl/sys/dkio.h \
-	%D%/spl/sys/extdirent.h \
 	%D%/spl/sys/fcntl.h \
 	%D%/spl/sys/file.h \
 	%D%/spl/sys/freebsd_rwlock.h \
diff --git a/include/os/freebsd/spl/sys/extdirent.h b/include/os/freebsd/spl/sys/extdirent.h
deleted file mode 100644
index d6927ae40bb..00000000000
--- a/include/os/freebsd/spl/sys/extdirent.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_EXTDIRENT_H
-#define	_SYS_EXTDIRENT_H
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-#include <sys/types.h>
-#include <sys/dirent.h>
-
-/*
- * Extended file-system independent directory entry.  This style of
- * dirent provides additional informational flag bits for each
- * directory entry.  This dirent will be returned instead of the
- * standard dirent if a VOP_READDIR() requests dirent flags via
- * V_RDDIR_ENTFLAGS, and if the file system supports the flags.
- */
-typedef struct edirent {
-	ino64_t		ed_ino;		/* "inode number" of entry */
-	off64_t		ed_off;		/* offset of disk directory entry */
-	uint32_t	ed_eflags;	/* per-entry flags */
-	unsigned short	ed_reclen;	/* length of this record */
-	char		ed_name[1];	/* name of file */
-} edirent_t;
-
-#define	EDIRENT_RECLEN(namelen)	\
-	((offsetof(edirent_t, ed_name[0]) + 1 + (namelen) + 7) & ~ 7)
-#define	EDIRENT_NAMELEN(reclen)	\
-	((reclen) - (offsetof(edirent_t, ed_name[0])))
-
-/*
- * Extended entry flags
- *	Extended entries include a bitfield of extra information
- *	regarding that entry.
- */
-#define	ED_CASE_CONFLICT  0x10  /* Disconsidering case, entry is not unique */
-
-/*
- * Extended flags accessor function
- */
-#define	ED_CASE_CONFLICTS(x)	((x)->ed_eflags & ED_CASE_CONFLICT)
-#ifdef	__cplusplus
-}
-#endif
-
-#endif	/* _SYS_EXTDIRENT_H */
diff --git a/include/os/freebsd/spl/sys/vnode_impl.h b/include/os/freebsd/spl/sys/vnode_impl.h
index 3e698d7ac92..4e04b5e80a0 100644
--- a/include/os/freebsd/spl/sys/vnode_impl.h
+++ b/include/os/freebsd/spl/sys/vnode_impl.h
@@ -44,8 +44,6 @@
 #define	IS_DEVVP(vp)	\
 	((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO)
 
-#define	V_XATTRDIR	0x0000	/* attribute unnamed directory */
-
 #define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
 
 /*
@@ -193,11 +191,6 @@
 #define	MODEMASK	07777		/* mode bits plus permission bits */
 #define	PERMMASK	00777		/* permission bits */
 
-/*
- * VOP_ACCESS flags
- */
-#define	V_ACE_MASK	0x1	/* mask represents  NFSv4 ACE permissions */
-
 /*
  * Flags for vnode operations.
  */
@@ -234,12 +227,6 @@ struct taskq;
 #define	CREATE_XATTR_DIR	0x04	/* Create extended attr dir */
 #define	LOOKUP_HAVE_SYSATTR_DIR	0x08	/* Already created virtual GFS dir */
 
-/*
- * Flags for VOP_READDIR
- */
-#define	V_RDDIR_ENTFLAGS	0x01	/* request dirent flags */
-#define	V_RDDIR_ACCFILTER	0x02	/* filter out inaccessible dirents */
-
 /*
  * Public vnode manipulation functions.
  */
diff --git a/module/os/freebsd/zfs/zfs_dir.c b/module/os/freebsd/zfs/zfs_dir.c
index 6321f0b532a..778e4151656 100644
--- a/module/os/freebsd/zfs/zfs_dir.c
+++ b/module/os/freebsd/zfs/zfs_dir.c
@@ -33,7 +33,6 @@
 #include <sys/resource.h>
 #include <sys/vfs.h>
 #include <sys/vnode.h>
-#include <sys/extdirent.h>
 #include <sys/file.h>
 #include <sys/kmem.h>
 #include <sys/uio.h>
diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c
index 57889b7390e..e2222df123f 100644
--- a/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -76,7 +76,6 @@
 #include <sys/zfs_quota.h>
 #include <sys/zfs_sa.h>
 #include <sys/zfs_rlock.h>
-#include <sys/extdirent.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sched.h>
@@ -1648,10 +1647,11 @@ zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
  *			  and return buffer.
  *		cr	- credentials of caller.
  *		ct	- caller context
- *		flags	- case flags
  *
  *	OUT:	uio	- updated offset and range, buffer filled.
  *		eofp	- set to true if end-of-file detected.
+ *		ncookies- number of entries in cookies
+ *		cookies	- offsets to directory entries
  *
  *	RETURN:	0 on success, error code on failure.
  *
@@ -1669,7 +1669,6 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 {
 	znode_t		*zp = VTOZ(vp);
 	iovec_t		*iovp;
-	edirent_t	*eodp;
 	dirent64_t	*odp;
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	objset_t	*os;
@@ -1687,7 +1686,6 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 	uint8_t		type;
 	int		ncooks;
 	cookie_t	*cooks = NULL;
-	int		flags = 0;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
@@ -1755,7 +1753,6 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 		outbuf = NULL;
 		odp = (struct dirent64 *)iovp->iov_base;
 	}
-	eodp = (struct edirent *)odp;
 
 	if (ncookies != NULL) {
 		/*
@@ -1824,25 +1821,7 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
 		}
 
-		if (flags & V_RDDIR_ACCFILTER) {
-			/*
-			 * If we have no access at all, don't include
-			 * this entry in the returned information
-			 */
-			znode_t	*ezp;
-			if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
-				goto skip_entry;
-			if (!zfs_has_access(ezp, cr)) {
-				vrele(ZTOV(ezp));
-				goto skip_entry;
-			}
-			vrele(ZTOV(ezp));
-		}
-
-		if (flags & V_RDDIR_ENTFLAGS)
-			reclen = EDIRENT_RECLEN(strlen(zap.za_name));
-		else
-			reclen = DIRENT64_RECLEN(strlen(zap.za_name));
+		reclen = DIRENT64_RECLEN(strlen(zap.za_name));
 
 		/*
 		 * Will this entry fit in the buffer?
@@ -1857,33 +1836,19 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 			}
 			break;
 		}
-		if (flags & V_RDDIR_ENTFLAGS) {
-			/*
-			 * Add extended flag entry:
-			 */
-			eodp->ed_ino = objnum;
-			eodp->ed_reclen = reclen;
-			/* NOTE: ed_off is the offset for the *next* entry */
-			next = &(eodp->ed_off);
-			eodp->ed_eflags = zap.za_normalization_conflict ?
-			    ED_CASE_CONFLICT : 0;
-			(void) strncpy(eodp->ed_name, zap.za_name,
-			    EDIRENT_NAMELEN(reclen));
-			eodp = (edirent_t *)((intptr_t)eodp + reclen);
-		} else {
-			/*
-			 * Add normal entry:
-			 */
-			odp->d_ino = objnum;
-			odp->d_reclen = reclen;
-			odp->d_namlen = strlen(zap.za_name);
-			/* NOTE: d_off is the offset for the *next* entry. */
-			next = &odp->d_off;
-			strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
-			odp->d_type = type;
-			dirent_terminate(odp);
-			odp = (dirent64_t *)((intptr_t)odp + reclen);
-		}
+		/*
+		 * Add normal entry:
+		 */
+		odp->d_ino = objnum;
+		odp->d_reclen = reclen;
+		odp->d_namlen = strlen(zap.za_name);
+		/* NOTE: d_off is the offset for the *next* entry. */
+		next = &odp->d_off;
+		strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
+		odp->d_type = type;
+		dirent_terminate(odp);
+		odp = (dirent64_t *)((intptr_t)odp + reclen);
+
 		outcount += reclen;
 
 		ASSERT3S(outcount, <=, bufsize);
@@ -1893,7 +1858,6 @@ zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
 			dmu_prefetch(os, objnum, 0, 0, 0,
 			    ZIO_PRIORITY_SYNC_READ);
 
-	skip_entry:
 		/*
 		 * Move to the next entry, fill in the previous offset.
 		 */

From c50b3f14d33cd469af47e16f0c6c76f2b4b5158e Mon Sep 17 00:00:00 2001
From: Ameer Hamza <106930537+ixhamza@users.noreply.github.com>
Date: Wed, 21 Sep 2022 03:19:05 +0500
Subject: [PATCH 62/69] Delay ZFS_PROP_SHARESMB property to handle it for
 encrypted raw receive

For encrypted raw receive, objset creation is delayed until a call to
dmu_recv_stream(). ZFS_PROP_SHARESMB property requires objset to be
populated when calling zpl_earlier_version(). To correctly handle the
ZFS_PROP_SHARESMB property for encrypted raw receive, this change
delays setting the property.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
Closes #13878
---
 module/zfs/zfs_ioctl.c                            | 15 +++++++++++++++
 .../functional/rsend/send_encrypted_props.ksh     |  8 ++++++++
 2 files changed, 23 insertions(+)

diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 6b9b43271ba..259d68c477d 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -4875,6 +4875,11 @@ extract_delay_props(nvlist_t *props)
 	static const zfs_prop_t delayable[] = {
 		ZFS_PROP_REFQUOTA,
 		ZFS_PROP_KEYLOCATION,
+		/*
+		 * Setting ZFS_PROP_SHARESMB requires the objset type to be
+		 * known, which is not possible prior to receipt of raw sends.
+		 */
+		ZFS_PROP_SHARESMB,
 		0
 	};
 	int i;
@@ -4938,6 +4943,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
 	offset_t off, noff;
 	nvlist_t *local_delayprops = NULL;
 	nvlist_t *recv_delayprops = NULL;
+	nvlist_t *inherited_delayprops = NULL;
 	nvlist_t *origprops = NULL; /* existing properties */
 	nvlist_t *origrecvd = NULL; /* existing received properties */
 	boolean_t first_recvd_props = B_FALSE;
@@ -5052,6 +5058,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
 		local_delayprops = extract_delay_props(oprops);
 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
 		    oprops, *errors);
+		inherited_delayprops = extract_delay_props(xprops);
 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
 		    xprops, *errors);
 
@@ -5109,6 +5116,10 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
 			    local_delayprops, *errors);
 		}
+		if (inherited_delayprops != NULL && error == 0) {
+			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
+			    inherited_delayprops, *errors);
+		}
 	}
 
 	/*
@@ -5128,6 +5139,10 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
 		ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
 		nvlist_free(local_delayprops);
 	}
+	if (inherited_delayprops != NULL) {
+		ASSERT(nvlist_merge(localprops, inherited_delayprops, 0) == 0);
+		nvlist_free(inherited_delayprops);
+	}
 	*read_bytes = off - noff;
 
 #ifdef	ZFS_DEBUG
diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh
index 793904db91c..c0c7b682def 100755
--- a/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh
+++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_props.ksh
@@ -133,6 +133,14 @@ recv_cksum=$(md5digest /$ds/$TESTFILE0)
 log_must test "$recv_cksum" == "$cksum"
 log_must zfs destroy -r $ds
 
+# Test that we can override sharesmb property for encrypted raw stream.
+log_note "Must be able to override sharesmb property for encrypted raw stream"
+ds=$TESTPOOL/recv
+log_must eval "zfs send -w $esnap > $sendfile"
+log_must eval "zfs recv -o sharesmb=on $ds < $sendfile"
+log_must test "$(get_prop 'sharesmb' $ds)" == "on"
+log_must zfs destroy -r $ds
+
 # Test that we can override encryption properties on a properties stream
 # of an unencrypted dataset, turning it into an encryption root.
 log_note "Must be able to receive stream with props as encryption root"

From de6c0d3d8cb279e0dd6a4831d85a9c45047908ba Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Tue, 20 Sep 2022 18:20:04 -0400
Subject: [PATCH 63/69] Fix potential NULL pointer dereference in
 zfsdle_vdev_online()

Coverity complained about this.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Chunwei Chen <david.chen@nutanix.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13903
---
 cmd/zed/agents/zfs_mod.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c
index af6de73a1cc..53d9ababded 100644
--- a/cmd/zed/agents/zfs_mod.c
+++ b/cmd/zed/agents/zfs_mod.c
@@ -965,7 +965,7 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
 	nvlist_t *tgt;
 	int error;
 
-	char *tmp_devname, devname[MAXPATHLEN];
+	char *tmp_devname, devname[MAXPATHLEN] = "";
 	uint64_t guid;
 
 	if (nvlist_lookup_uint64(udev_nvl, ZFS_EV_VDEV_GUID, &guid) == 0) {

From 7c6d94728c9f5deef7e7cc0bf4320385345de5bd Mon Sep 17 00:00:00 2001
From: Richard Yao <richard.yao@alumni.stonybrook.edu>
Date: Tue, 20 Sep 2022 18:20:56 -0400
Subject: [PATCH 64/69] Call va_end() before return in
 zpool_standard_error_fmt()

Commit ecd6cf800b63704be73fb264c3f5b6e0dafc068d by marks in OpenSolaris
at Tue Jun 26 07:44:24 2007 -0700 introduced a bug where we fail to call
`va_end()` before returning.

The man page for va_start() says:

"Each invocation of va_start() must be matched by a corresponding
invocation of va_end() in the same function."

Coverity complained about this.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Chunwei Chen <david.chen@nutanix.com>
Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Closes #13904
---
 lib/libzfs/libzfs_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
index 3067e8d4639..bc00a8dffd8 100644
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -685,7 +685,7 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
 	case ENOSPC:
 	case EDQUOT:
 		zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
-		return (-1);
+		break;
 
 	case EAGAIN:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,

From 3e5caef4c5b0cca3a892b92217955178ae8652bc Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Wed, 21 Sep 2022 00:21:30 +0200
Subject: [PATCH 65/69] FreeBSD: catch up to 1400068

Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Closes #13909
---
 module/os/freebsd/zfs/zfs_vnops_os.c | 41 ++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c
index e2222df123f..fae390a148d 100644
--- a/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -970,13 +970,17 @@ zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
 		case RENAME:
 			if (error == ENOENT) {
 				error = EJUSTRETURN;
+#if __FreeBSD_version < 1400068
 				cnp->cn_flags |= SAVENAME;
+#endif
 				break;
 			}
 			zfs_fallthrough;
 		case DELETE:
+#if __FreeBSD_version < 1400068
 			if (error == 0)
 				cnp->cn_flags |= SAVENAME;
+#endif
 			break;
 		}
 	}
@@ -1326,7 +1330,10 @@ zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
 	cnp->cn_nameptr = __DECONST(char *, name);
 	cnp->cn_namelen = strlen(name);
 	cnp->cn_nameiop = nameiop;
-	cnp->cn_flags = ISLASTCN | SAVENAME;
+	cnp->cn_flags = ISLASTCN;
+#if __FreeBSD_version < 1400068
+	cnp->cn_flags |= SAVENAME;
+#endif
 	cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
 	cnp->cn_cred = kcred;
 #if __FreeBSD_version < 1400037
@@ -4590,7 +4597,9 @@ zfs_freebsd_create(struct vop_create_args *ap)
 	znode_t *zp = NULL;
 	int rc, mode;
 
+#if __FreeBSD_version < 1400068
 	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
 
 	vattr_init_mask(vap);
 	mode = vap->va_mode & ALLPERMS;
@@ -4620,7 +4629,9 @@ static int
 zfs_freebsd_remove(struct vop_remove_args *ap)
 {
 
+#if __FreeBSD_version < 1400068
 	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
+#endif
 
 	return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
 	    ap->a_cnp->cn_cred));
@@ -4642,7 +4653,9 @@ zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
 	znode_t *zp = NULL;
 	int rc;
 
+#if __FreeBSD_version < 1400068
 	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
+#endif
 
 	vattr_init_mask(vap);
 	*ap->a_vpp = NULL;
@@ -4668,7 +4681,9 @@ zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 
+#if __FreeBSD_version < 1400068
 	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
 
 	return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
 }
@@ -4922,8 +4937,10 @@ zfs_freebsd_rename(struct vop_rename_args *ap)
 	vnode_t *tvp = ap->a_tvp;
 	int error;
 
+#if __FreeBSD_version < 1400068
 	ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
 	ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
+#endif
 
 	error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
 	    ap->a_tcnp, ap->a_fcnp->cn_cred);
@@ -4959,7 +4976,9 @@ zfs_freebsd_symlink(struct vop_symlink_args *ap)
 #endif
 	int rc;
 
+#if __FreeBSD_version < 1400068
 	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
 
 	vap->va_type = VLNK;	/* FreeBSD: Syscall only sets va_mode. */
 	vattr_init_mask(vap);
@@ -5053,7 +5072,9 @@ zfs_freebsd_link(struct vop_link_args *ap)
 	if (tdvp->v_mount != vp->v_mount)
 		return (EXDEV);
 
+#if __FreeBSD_version < 1400068
 	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
 
 	return (zfs_link(VTOZ(tdvp), VTOZ(vp),
 	    cnp->cn_nameptr, cnp->cn_cred, 0));
@@ -5325,10 +5346,10 @@ zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
 #endif
 	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
-	vp = nd.ni_vp;
-	NDFREE_PNBUF(&nd);
 	if (error != 0)
 		return (SET_ERROR(error));
+	vp = nd.ni_vp;
+	NDFREE_PNBUF(&nd);
 
 	if (ap->a_size != NULL) {
 		error = VOP_GETATTR(vp, &va, ap->a_cred);
@@ -5470,12 +5491,10 @@ zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
 	    UIO_SYSSPACE, attrname, xvp);
 #endif
 	error = namei(&nd);
-	vp = nd.ni_vp;
-	if (error != 0) {
-		NDFREE_PNBUF(&nd);
+	if (error != 0)
 		return (SET_ERROR(error));
-	}
 
+	vp = nd.ni_vp;
 	error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 	NDFREE_PNBUF(&nd);
 
@@ -5615,10 +5634,10 @@ zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
 #endif
 	error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
 	    NULL);
-	vp = nd.ni_vp;
-	NDFREE_PNBUF(&nd);
 	if (error != 0)
 		return (SET_ERROR(error));
+	vp = nd.ni_vp;
+	NDFREE_PNBUF(&nd);
 
 	VATTR_NULL(&va);
 	va.va_size = 0;
@@ -5802,10 +5821,10 @@ zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
 	    UIO_SYSSPACE, ".", xvp);
 #endif
 	error = namei(&nd);
-	vp = nd.ni_vp;
-	NDFREE_PNBUF(&nd);
 	if (error != 0)
 		return (SET_ERROR(error));
+	vp = nd.ni_vp;
+	NDFREE_PNBUF(&nd);
 
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;

From fbf874a4acd86a118a695fb695fe934e68fc6b6f Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Wed, 21 Sep 2022 00:22:32 +0200
Subject: [PATCH 66/69] FreeBSD: handle V_PCATCH

See https://cgit.FreeBSD.org/src/commit/?id=a75d1ddd74312f5dd79bc1e965f7077679659f2e

Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Closes #13910
---
 module/os/freebsd/zfs/zfs_file_os.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/module/os/freebsd/zfs/zfs_file_os.c b/module/os/freebsd/zfs/zfs_file_os.c
index fd86a75416e..60c9ff0581e 100644
--- a/module/os/freebsd/zfs/zfs_file_os.c
+++ b/module/os/freebsd/zfs/zfs_file_os.c
@@ -226,7 +226,11 @@ zfs_vop_fsync(vnode_t *vp)
 	struct mount *mp;
 	int error;
 
+#if __FreeBSD_version < 1400068
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+#else
+	if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
+#endif
 		goto drop;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	error = VOP_FSYNC(vp, MNT_WAIT, curthread);

From 62e2a2881f6b441c136fb4ccb66ab491a5e6101f Mon Sep 17 00:00:00 2001
From: youzhongyang <youzhong@gmail.com>
Date: Tue, 20 Sep 2022 18:25:21 -0400
Subject: [PATCH 67/69] Fix minor issues in namespace delegation support

get_user_ns() is only done once for each namespace, so put_user_ns()
should be done once too.

Fix two typos in user_namespace/user_namespace_002.ksh and
user_namespace/user_namespace_003.ksh.

Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Youzhong Yang <yyang@mathworks.com>
Closes #13918
---
 module/os/linux/spl/spl-zone.c                                  | 2 +-
 .../tests/functional/user_namespace/user_namespace_002.ksh      | 2 +-
 .../tests/functional/user_namespace/user_namespace_003.ksh      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/module/os/linux/spl/spl-zone.c b/module/os/linux/spl/spl-zone.c
index b8a8b7cd8cd..234ae7f6cd0 100644
--- a/module/os/linux/spl/spl-zone.c
+++ b/module/os/linux/spl/spl-zone.c
@@ -415,8 +415,8 @@ spl_zone_fini(void)
 			    zone_dataset_t, zd_list);
 			list_del(&zd->zd_list);
 			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
-			put_user_ns(zds->zds_userns);
 		}
+		put_user_ns(zds->zds_userns);
 		list_del(&zds->zds_list);
 		kmem_free(zds, sizeof (*zds));
 	}
diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh
index b04898fa81a..cfc478cd359 100755
--- a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh
+++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh
@@ -85,7 +85,7 @@ fi
 list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')"
 log_must test -z "$list"
 log_must zfs zone $proc_ns $TESTPOOL/userns
-proc_ns_added="$ns"
+proc_ns_added="$proc_ns"
 
 # 2. 'zfs list'
 list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')"
diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh
index 2a875d09b6a..6a746c6d33f 100755
--- a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh
+++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh
@@ -88,7 +88,7 @@ list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')"
 log_must test -z "$list"
 log_must zfs zone $proc_ns $TESTPOOL/userns
 log_must zfs zone $proc_ns $TESTPOOL/otheruserns
-proc_ns_added="$ns"
+proc_ns_added="$proc_ns"
 
 # 2. 'zfs list'
 list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')"

From 402426c7d81f410fa088c3bd893d4941a97d8332 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Wed, 21 Sep 2022 00:32:44 +0200
Subject: [PATCH 68/69] Add membar_sync

Provides the missing full barrier variant to the membar primitive set.

While not used right now, this is probably going to change down the
road.

Name taken from Solaris, to follow the existing routines.

Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Closes #13907
---
 include/os/freebsd/spl/sys/atomic.h | 1 +
 include/os/linux/spl/sys/vmsystm.h  | 1 +
 lib/libspl/atomic.c                 | 6 ++++++
 lib/libspl/include/atomic.h         | 7 +++++++
 4 files changed, 15 insertions(+)

diff --git a/include/os/freebsd/spl/sys/atomic.h b/include/os/freebsd/spl/sys/atomic.h
index 01b13fc9afd..8b9cec15c5e 100644
--- a/include/os/freebsd/spl/sys/atomic.h
+++ b/include/os/freebsd/spl/sys/atomic.h
@@ -59,6 +59,7 @@ extern uint64_t atomic_cas_64(volatile uint64_t *target, uint64_t cmp,
 
 #define	membar_consumer()		atomic_thread_fence_acq()
 #define	membar_producer()		atomic_thread_fence_rel()
+#define	membar_sync()			atomic_thread_fence_seq_cst()
 
 static __inline uint32_t
 atomic_add_32_nv(volatile uint32_t *target, int32_t delta)
diff --git a/include/os/linux/spl/sys/vmsystm.h b/include/os/linux/spl/sys/vmsystm.h
index fcd61e818fa..c6d99fb3183 100644
--- a/include/os/linux/spl/sys/vmsystm.h
+++ b/include/os/linux/spl/sys/vmsystm.h
@@ -46,6 +46,7 @@
 
 #define	membar_consumer()		smp_rmb()
 #define	membar_producer()		smp_wmb()
+#define	membar_sync()			smp_mb()
 
 #define	physmem				zfs_totalram_pages
 
diff --git a/lib/libspl/atomic.c b/lib/libspl/atomic.c
index ba14b113f58..8cc350710ba 100644
--- a/lib/libspl/atomic.c
+++ b/lib/libspl/atomic.c
@@ -381,6 +381,12 @@ membar_exit(void)
 	__atomic_thread_fence(__ATOMIC_SEQ_CST);
 }
 
+void
+membar_sync(void)
+{
+	__atomic_thread_fence(__ATOMIC_SEQ_CST);
+}
+
 void
 membar_producer(void)
 {
diff --git a/lib/libspl/include/atomic.h b/lib/libspl/include/atomic.h
index 1249d42b604..4ebdbbda986 100644
--- a/lib/libspl/include/atomic.h
+++ b/lib/libspl/include/atomic.h
@@ -313,6 +313,13 @@ extern void membar_enter(void);
  */
 extern void membar_exit(void);
 
+/*
+ * Make all stores and loads emitted prior to the the barrier complete before
+ * crossing it, while also making sure stores and loads emitted after the
+ * barrier only start being executed after crossing it.
+ */
+extern void membar_sync(void);
+
 /*
  * Arrange that all stores issued before this point in the code reach
  * global visibility before any stores that follow; useful in producer

From c629f0bf62e351355716f9870d6c2e377584b016 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Wed, 21 Sep 2022 00:34:41 +0200
Subject: [PATCH 69/69] Retire ZFS_TEARDOWN_TRY_ENTER_READ

There were never any users and it so happens the operation is not even
supported by rrm locks -- the macros were wrong for Linux and FreeBSD
when not using it's RMS locks.

Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Closes #13906
---
 include/os/freebsd/zfs/sys/zfs_vfsops_os.h | 6 ------
 include/os/linux/zfs/sys/zfs_vfsops_os.h   | 3 ---
 2 files changed, 9 deletions(-)

diff --git a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
index c44f7c6f06b..f765d38dbac 100644
--- a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
+++ b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
@@ -128,9 +128,6 @@ struct zfsvfs {
 #define	ZFS_TEARDOWN_DESTROY(zfsvfs)		\
 	rms_destroy(&(zfsvfs)->z_teardown_lock)
 
-#define	ZFS_TEARDOWN_TRY_ENTER_READ(zfsvfs)	\
-	rms_try_rlock(&(zfsvfs)->z_teardown_lock)
-
 #define	ZFS_TEARDOWN_ENTER_READ(zfsvfs, tag)	\
 	rms_rlock(&(zfsvfs)->z_teardown_lock);
 
@@ -161,9 +158,6 @@ struct zfsvfs {
 #define	ZFS_TEARDOWN_DESTROY(zfsvfs)		\
 	rrm_destroy(&(zfsvfs)->z_teardown_lock)
 
-#define	ZFS_TEARDOWN_TRY_ENTER_READ(zfsvfs)	\
-	rw_tryenter(&(zfsvfs)->z_teardown_lock, RW_READER)
-
 #define	ZFS_TEARDOWN_ENTER_READ(zfsvfs, tag)	\
 	rrm_enter_read(&(zfsvfs)->z_teardown_lock, tag);
 
diff --git a/include/os/linux/zfs/sys/zfs_vfsops_os.h b/include/os/linux/zfs/sys/zfs_vfsops_os.h
index 697ae2018ec..e320b8de422 100644
--- a/include/os/linux/zfs/sys/zfs_vfsops_os.h
+++ b/include/os/linux/zfs/sys/zfs_vfsops_os.h
@@ -143,9 +143,6 @@ struct zfsvfs {
 #define	ZFS_TEARDOWN_DESTROY(zfsvfs)		\
 	rrm_destroy(&(zfsvfs)->z_teardown_lock)
 
-#define	ZFS_TEARDOWN_TRY_ENTER_READ(zfsvfs)	\
-	rw_tryenter(&(zfsvfs)->z_teardown_lock, RW_READER)
-
 #define	ZFS_TEARDOWN_ENTER_READ(zfsvfs, tag)	\
 	rrm_enter_read(&(zfsvfs)->z_teardown_lock, tag);