diff --git a/cddl/lib/libzpool/Makefile b/cddl/lib/libzpool/Makefile
index 0c997e38d37..0c2030faa3d 100644
--- a/cddl/lib/libzpool/Makefile
+++ b/cddl/lib/libzpool/Makefile
@@ -163,6 +163,8 @@ KERNEL_C = \
 	vdev_root.c \
 	vdev_trim.c \
 	zap.c \
+	zap_fat.c \
+	zap_impl.c \
 	zap_leaf.c \
 	zap_micro.c \
 	zcp.c \
diff --git a/sys/conf/files b/sys/conf/files
index 379685d8371..324ee35d490 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -346,6 +346,8 @@ contrib/openzfs/module/zfs/vdev_removal.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_root.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_trim.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/zap.c		optional zfs compile-with "${ZFS_C}"
+contrib/openzfs/module/zfs/zap_fat.c		optional zfs compile-with "${ZFS_C}"
+contrib/openzfs/module/zfs/zap_impl.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/zap_leaf.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/zap_micro.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/zcp.c		optional zfs compile-with "${ZFS_C}"
diff --git a/sys/contrib/openzfs/.github/workflows/README.md b/sys/contrib/openzfs/.github/workflows/README.md
index eef47dae3dc..78774aac52f 100644
--- a/sys/contrib/openzfs/.github/workflows/README.md
+++ b/sys/contrib/openzfs/.github/workflows/README.md
@@ -1,61 +1,96 @@
 
-## The testings are done this way
+## CI overview
+
+The main test pipeline is `zfs-qemu.yml`. Code checking and other
+workflows run independently alongside it.
 
 ```mermaid
 flowchart TB
-subgraph CleanUp and Summary
-  CleanUp+Summary
+subgraph Functional testing
+  Setup[test-config: pick ci_type + OS matrix]
+  Setup --> almalinux
+  Setup --> centos[centos-stream]
+  Setup --> debian
+  Setup --> fedora
+  Setup --> ubuntu
+  Setup --> freebsd
+  almalinux --> Cleanup[cleanup + summary]
+  centos --> Cleanup
+  debian --> Cleanup
+  fedora --> Cleanup
+  ubuntu --> Cleanup
+  freebsd --> Cleanup
 end
 
-subgraph Functional Testings
-  sanity-checks-20.04
-  zloop-checks-20.04
-  functional-testing-20.04-->Part1-20.04
-  functional-testing-20.04-->Part2-20.04
-  functional-testing-20.04-->Part3-20.04
-  functional-testing-20.04-->Part4-20.04
-  functional-testing-22.04-->Part1-22.04
-  functional-testing-22.04-->Part2-22.04
-  functional-testing-22.04-->Part3-22.04
-  functional-testing-22.04-->Part4-22.04
-  sanity-checks-22.04
-  zloop-checks-22.04
-end
-
-subgraph Code Checking + Building
-  Build-Ubuntu-20.04
+subgraph Code checking
+  checkstyle.yaml
   codeql.yml
-  checkstyle.yml
-  Build-Ubuntu-22.04
+  smatch.yml
 end
 
-  Build-Ubuntu-20.04-->sanity-checks-20.04
-  Build-Ubuntu-20.04-->zloop-checks-20.04
-  Build-Ubuntu-20.04-->functional-testing-20.04
-  Build-Ubuntu-22.04-->sanity-checks-22.04
-  Build-Ubuntu-22.04-->zloop-checks-22.04
-  Build-Ubuntu-22.04-->functional-testing-22.04
-
-  sanity-checks-20.04-->CleanUp+Summary
-  Part1-20.04-->CleanUp+Summary
-  Part2-20.04-->CleanUp+Summary
-  Part3-20.04-->CleanUp+Summary
-  Part4-20.04-->CleanUp+Summary
-  Part1-22.04-->CleanUp+Summary
-  Part2-22.04-->CleanUp+Summary
-  Part3-22.04-->CleanUp+Summary
-  Part4-22.04-->CleanUp+Summary
-  sanity-checks-22.04-->CleanUp+Summary
+subgraph Other workflows
+  zfs-arm.yml
+  zloop.yml
+  labels.yml
+end
 ```
 
+Every `qemu-vm` matrix entry runs on a fixed `ubuntu-24.04` host.
+The steps inside one entry are:
 
-1) build zfs modules for Ubuntu 20.04 and 22.04 (~15m)
-2) 2x zloop test (~10m) + 2x sanity test (~25m)
-3) 4x functional testings in parts 1..4 (each ~1h)
-4) cleanup and create summary
-   - content of summary depends on the results of the steps
+1) set up QEMU and boot the guest (~2-4m)
+2) install build dependencies in the guest (~2-4m)
+3) build zfs modules in the guest (~8-12m)
+4) run functional tests (~2-4h)
+5) package and upload per-OS test logs (~10s)
 
-When everything runs fine, the full run should be done in
-about 2 hours.
+A per-OS entry takes about 3 to 4 hours. Once all entries finish, the
+`cleanup` job aggregates the results into a summary.
 
-The codeql.yml and checkstyle.yml are not part in this circle.
+### `ci_type` selection
+
+`test-config` runs `.github/workflows/scripts/generate-ci-type.py` against
+the PR's changed files and picks one of:
+
+| `ci_type` | OS matrix                                  |
+|-----------|--------------------------------------------|
+| `docs`    | empty (documentation-only PRs)             |
+| `quick`   | 6 Linux + 1 FreeBSD                        |
+| `linux`   | all supported Linux distros                |
+| `freebsd` | all supported FreeBSD versions             |
+| default   | cross-platform sample                      |
+
+Pushes to `openzfs/zfs` skip the matrix entirely; only PRs (and pushes to
+forks) build.
+
+Authors can force a specific ci_type by adding `ZFS-CI-Type: <type>` to
+the most recent commit message. The `ZTS_OS_OVERRIDE` repository variable
+can also alter the selection. The `workflow_dispatch` trigger accepts
+`fedora_kernel_ver` (Fedora-only run with a chosen kernel) and
+`specific_os` (pin the matrix to one OS).
+
+### Supported guests
+
+Auto-selected:
+
+- Linux: almalinux 8/9/10, centos-stream 9/10, debian 11/12/13,
+  fedora 43/44, ubuntu 22/24/26
+- FreeBSD: 14.4-RELEASE/STABLE, 15.0-RELEASE, 15.1-STABLE, 16.0-CURRENT
+
+Available via `specific_os` or `ZTS_OS_OVERRIDE`:
+
+- archlinux, tumbleweed
+
+### Code checking
+
+- `checkstyle.yaml`: source-style checks
+- `codeql.yml`: CodeQL analysis
+- `smatch.yml`: smatch analysis
+
+### Other workflows
+
+- `zfs-arm.yml`: ARM build on `ubuntu-24.04-arm`
+- `zloop.yml`: host-side zloop
+- `labels.yml`: maintains PR status labels
+- `zfs-qemu-packages.yml`: manually dispatched, builds release RPMs or
+  tests RPM installation from the ZFS yum repo
diff --git a/sys/contrib/openzfs/.github/workflows/checkstyle.yaml b/sys/contrib/openzfs/.github/workflows/checkstyle.yaml
index ddcc2b8581f..ce1e1fb8a46 100644
--- a/sys/contrib/openzfs/.github/workflows/checkstyle.yaml
+++ b/sys/contrib/openzfs/.github/workflows/checkstyle.yaml
@@ -12,7 +12,7 @@ jobs:
   checkstyle:
     runs-on: ubuntu-22.04
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
       with:
         ref: ${{ github.event.pull_request.head.sha }}
     - name: Install dependencies
diff --git a/sys/contrib/openzfs/.github/workflows/codeql.yml b/sys/contrib/openzfs/.github/workflows/codeql.yml
index 689fe71fddc..fbaf53dc61e 100644
--- a/sys/contrib/openzfs/.github/workflows/codeql.yml
+++ b/sys/contrib/openzfs/.github/workflows/codeql.yml
@@ -11,7 +11,7 @@ concurrency:
 jobs:
   analyze:
     name: Analyze
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
     permissions:
       actions: read
       contents: read
@@ -31,15 +31,15 @@ jobs:
       uses: actions/checkout@v6
 
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v3
+      uses: github/codeql-action/init@v4
       with:
         config-file: .github/codeql-${{ matrix.language }}.yml
         languages: ${{ matrix.language }}
 
     - name: Autobuild
-      uses: github/codeql-action/autobuild@v3
+      uses: github/codeql-action/autobuild@v4
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v3
+      uses: github/codeql-action/analyze@v4
       with:
         category: "/language:${{matrix.language}}"
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
index b1910ab630a..4862cc16139 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
+++ b/sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
@@ -6,6 +6,9 @@
 Output format: "<type> <source>" where source is "manual" (from
 ZFS-CI-Type commit tag) or "auto" (from file change heuristics).
 
+Prints "docs auto" if every changed file is documentation; the qemu
+matrix is skipped in that case.
+
 Prints "quick manual" if:
 - the *last* commit message contains 'ZFS-CI-Type: quick'
 or "quick auto" if (heuristics):
@@ -28,10 +31,24 @@
     r'.*\.gitignore'
 ]))
 
+"""
+Patterns of files that are documentation only.
+"""
+DOCS_ONLY_REGEX = list(map(re.compile, [
+    r'man/.*',
+    r'.*\.md',
+    r'AUTHORS',
+    r'COPYRIGHT',
+    r'LICENSE',
+    r'NOTICE',
+    r'\.gitignore',
+]))
+
 """
 Patterns of files that are considered to trigger full CI.
 """
 FULL_RUN_REGEX = list(map(re.compile, [
+    r'\.github/workflows/.*\.ya?ml',
     r'\.github/workflows/scripts/.*',
     r'cmd.*',
     r'configs/.*',
@@ -116,6 +133,12 @@ def output_type(type, source, reason):
                         f'changed file "{f}" matches pattern "{r.pattern}"'
                         )
 
+    if changed_files and all(
+            any(r.match(f) for r in DOCS_ONLY_REGEX)
+            for f in changed_files):
+        output_type('docs', 'auto',
+                    'all changed files are documentation')
+
     # catch-all
     output_type('quick', 'auto',
                 'no changed file matches full CI patterns')
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
index 5c41a4d6a49..2e83b441588 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
@@ -17,6 +17,8 @@ sudo docker builder prune -a
 unneeded="microsoft-edge-stable|azure-cli|google-cloud|google-chrome-stable|"\
 "temurin|llvm|firefox|mysql-server|snapd|android|dotnet|haskell|ghcup|"\
 "powershell|julia|swift|miniconda|chromium"
+# refresh package index before removing packages
+sudo apt-get -y update
 sudo apt-get -y remove $(dpkg-query -f '${binary:Package}\n' -W | grep -E "'$unneeded'")
 sudo apt-get -y autoremove
 
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
index e63aece389c..7e72030adb9 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
@@ -28,6 +28,7 @@ NIC="virtio"
 # additional options for virt-install
 OPTS[0]=""
 OPTS[1]=""
+ALT_URL=""
 
 case "$OS" in
   almalinux8)
@@ -56,11 +57,22 @@ case "$OS" in
   centos-stream9)
     OSNAME="CentOS Stream 9"
     URL="https://cloud.centos.org/centos/9-stream/x86_64/images/CentOS-Stream-GenericCloud-9-latest.x86_64.qcow2"
+
+    # Sometimes we get HTTP errors for the first link.  Fall back to the
+    # "Composes" repo as an alternative.  The "Composes" repo includes
+    # autogenerated nightly CentOS Stream images.  We have to lookup the URL
+    # dynamically since the qcow2 file name has the date in it.
+    ALT_URL=$(wget --accept "CentOS-Stream-GenericCloud-9-*.x86_64.qcow2" --spider -np --recursive  --no-verbose \
+              https://composes.stream.centos.org/stream-9/production/latest-CentOS-Stream/compose/BaseOS/x86_64/images/ 2>&1 | \
+              awk '/200 OK/{print $(NF-2)}')
     ;;
   centos-stream10)
     OSNAME="CentOS Stream 10"
     OSv="centos-stream9"
     URL="https://cloud.centos.org/centos/10-stream/x86_64/images/CentOS-Stream-GenericCloud-10-latest.x86_64.qcow2"
+    ALT_URL=$(wget --accept "CentOS-Stream-GenericCloud-10-*.x86_64.qcow2" --spider -np --recursive  --no-verbose \
+              https://composes.stream.centos.org/stream-10/production/latest-CentOS-Stream/compose/BaseOS/x86_64/images/ 2>&1 | \
+              awk '/200 OK/{print $(NF-2)}')
     ;;
   debian11)
     OSNAME="Debian 11"
@@ -78,11 +90,6 @@ case "$OS" in
     OPTS[0]="--boot"
     OPTS[1]="uefi=on"
     ;;
-  fedora42)
-    OSNAME="Fedora 42"
-    OSv="fedora-unknown"
-    URL="https://download.fedoraproject.org/pub/fedora/linux/releases/42/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-42-1.1.x86_64.qcow2"
-    ;;
   fedora43)
     OSNAME="Fedora 43"
     OSv="fedora-unknown"
@@ -93,14 +100,6 @@ case "$OS" in
     OSv="fedora-unknown"
     URL="https://download.fedoraproject.org/pub/fedora/linux/releases/44/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-44-1.7.x86_64.qcow2"
     ;;
-  freebsd13-5r)
-    FreeBSD="13.5-RELEASE"
-    OSNAME="FreeBSD $FreeBSD"
-    OSv="freebsd13.0"
-    URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz"
-    KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz"
-    NIC="rtl8139"
-    ;;
   freebsd14-4r)
     FreeBSD="14.4-RELEASE"
     OSNAME="FreeBSD $FreeBSD"
@@ -111,18 +110,10 @@ case "$OS" in
   freebsd15-0r)
     FreeBSD="15.0-RELEASE"
     OSNAME="FreeBSD $FreeBSD"
-    OSv="freebsd15.0"
+    OSv="freebsd14.0"
     URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz"
     KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz"
     ;;
-  freebsd13-5s)
-    FreeBSD="13.5-STABLE"
-    OSNAME="FreeBSD $FreeBSD"
-    OSv="freebsd13.0"
-    URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz"
-    KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz"
-    NIC="rtl8139"
-    ;;
   freebsd14-4s)
     FreeBSD="14.4-STABLE"
     OSNAME="FreeBSD $FreeBSD"
@@ -131,7 +122,7 @@ case "$OS" in
     KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz"
     ;;
   freebsd15-1s)
-    FreeBSD="15.1-PRERELEASE"
+    FreeBSD="15.1-STABLE"
     OSNAME="FreeBSD $FreeBSD"
     OSv="freebsd14.0"
     URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz"
@@ -160,6 +151,11 @@ case "$OS" in
     OSv="ubuntu24.04"
     URL="$UBMIRROR/noble/current/noble-server-cloudimg-amd64.img"
     ;;
+  ubuntu26)
+    OSNAME="Ubuntu 26.04"
+    OSv="ubuntu24.04"
+    URL="$UBMIRROR/resolute/current/resolute-server-cloudimg-amd64.img"
+    ;;
   *)
     echo "Wrong value for OS variable!"
     exit 111
@@ -173,7 +169,6 @@ echo "ENV=$ENV" >> $ENV
 # result path
 echo 'RESPATH="/var/tmp/test_results"' >> $ENV
 
-# FreeBSD 13 has problems with: e1000 and virtio
 echo "NIC=$NIC" >> $ENV
 
 # freebsd15 -> used in zfs-qemu.yml
@@ -221,6 +216,16 @@ for cmd in 'axel -q -o' 'curl --fail -LSs -o' ; do
   if [ -s "$IMG" ] ; then
     # Successful download
     break
+  else
+    if [ -n "$ALT_URL" ] ; then
+      # Try the $ALT_URL if specified
+      echo "Loading alternative $ALT_URL with $cmd..."
+      time eval "$cmd $IMG $ALT_URL"
+      if [ -s "$IMG" ]; then
+        # Successful ALT_URL download
+        break
+      fi
+    fi
   fi
 done
 
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh
index 6a83ef45fd2..d61e97cf423 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh
@@ -215,7 +215,7 @@ case "$1" in
   tumbleweed)
     tumbleweed
     ;;
-  ubuntu*)
+  ubuntu22|ubuntu24)
     debian
     echo "##[group]Install Ubuntu specific"
     sudo apt-get install -yq linux-tools-common libtirpc-dev \
@@ -226,6 +226,27 @@ case "$1" in
     # https://github.com/actions/runner-images/issues/9946
     sudo apt-get install -yq build-essential
 
+    echo "##[endgroup]"
+    echo "##[group]Delete Ubuntu OpenZFS modules"
+    for i in $(find /lib/modules -name zfs -type d); do sudo rm -rvf $i; done
+    echo "##[endgroup]"
+    ;;
+  ubuntu26)
+    debian
+    echo "##[group]Install Ubuntu specific"
+    # Skip linux-modules-extra which is already installed
+    sudo apt-get install -yq linux-tools-common
+    sudo apt-get install -yq libtirpc-dev
+    sudo apt-get install -yq dh-sequence-dkms
+
+    # Need 'build-essential' explicitly for ARM builder
+    # https://github.com/actions/runner-images/issues/9946
+    sudo apt-get install -yq build-essential
+
+    # Replace sudo-rs with sudo for now because the Rust version
+    # does not support -E to preserve the entire environment
+    sudo update-alternatives --set sudo /usr/bin/sudo.ws
+
     echo "##[endgroup]"
     echo "##[group]Delete Ubuntu OpenZFS modules"
     for i in $(find /lib/modules -name zfs -type d); do sudo rm -rvf $i; done
@@ -267,8 +288,19 @@ case "$1" in
     ;;
   debian*|ubuntu*)
     sudo -E systemctl enable nfs-kernel-server
-    sudo -E systemctl enable qemu-guest-agent
     sudo -E systemctl enable smbd
+
+    # enable usershares (disabled by default on ubuntu 26.04)
+    sudo -E sed -i '/usershare max shares/s/^#//' /etc/samba/smb.conf
+
+    # add systemd drop-in to allow the service to be enabled
+    sudo -E mkdir -p /etc/systemd/system/qemu-guest-agent.service.d/
+    sudo -E tee /etc/systemd/system/qemu-guest-agent.service.d/override.conf <<EOF
+[Install]
+WantedBy=multi-user.target
+EOF
+    sudo -E systemctl daemon-reload
+    sudo -E systemctl enable qemu-guest-agent
     ;;
   *)
     # All other linux distros
@@ -292,7 +324,7 @@ case "$1" in
     echo 'GRUB_SERIAL_COMMAND="serial --speed=115200"' \
       | sudo tee -a /etc/default/grub >/dev/null
     ;;
-  ubuntu24)
+  ubuntu24|ubuntu26)
     GRUB_CFG="/boot/grub/grub.cfg"
     GRUB_MKCONFIG="grub-mkconfig"
     echo 'GRUB_DISABLE_OS_PROBER="false"' \
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps.sh
index 267ae4ad3c7..6e8dd6d7546 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps.sh
@@ -2,9 +2,12 @@
 # 3) Wait for VM to boot from previous step and launch dependencies
 #    script on it.
 #
-# $1: OS name (like 'fedora41')
-# $2: (optional) Experimental kernel version to install on fedora,
-#     like "6.14".
+# qemu-3-deps.sh [--poweroff] OS_NAME [FEDORA_VERSION]
+#
+# --poweroff: Power off the VM after installing dependencies
+# OS_NAME: OS name (like 'fedora41')
+# FEDORA_VERSION: (optional) Experimental Fedora kernel version, like "6.14" to
+#     install instead of Fedora defaults.
 ######################################################################
 
 .github/workflows/scripts/qemu-wait-for-vm.sh vm0
@@ -15,8 +18,13 @@
 # we need to update the kernel version in zfs's META file to allow the
 # build to happen.  We update our local copy of META here, since we know
 # it will be rsync'd up in the next step.
-if [ -n "${2:-}" ] ; then
-  sed -i -E 's/Linux-Maximum: .+/Linux-Maximum: 99.99/g' META
+#
+# Look to see if the last argument looks like a kernel version.
+ver="${@: -1}"
+if [[ $ver =~ ^[0-9]+\.[0-9]+ ]] ; then
+  # We got a kernel version, update META to say we support it so we
+  # can test against it.
+  sed -i -E 's/Linux-Maximum: .+/Linux-Maximum: '$ver'/g' META
 fi
 
 scp .github/workflows/scripts/qemu-3-deps-vm.sh zfs@vm0:qemu-3-deps-vm.sh
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh
index bbfa2ec85b8..dfe70c4f1ef 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh
@@ -5,10 +5,12 @@
 #
 # Usage:
 #
-#       qemu-4-build-vm.sh OS [--enable-debug][--dkms][--patch-level NUM]
-#               [--poweroff][--release][--repo][--tarball]
+#       qemu-4-build-vm.sh OS [--custom-branch BRANCH][--enable-debug][--dkms]
+#               [--patch-level NUM][--poweroff][--release][--repo][--tarball]
 #
 # OS:           OS name like 'fedora41'
+# --custom-branch: When building packages, checkout this version of ZFS to
+#                  build, but use the current CI scripts to do it.
 # --enable-debug:  Build RPMs with '--enable-debug' (for testing)
 # --dkms:       Build DKMS RPMs as well
 # --patch-level NUM:    Use a custom patch level number for packages.
@@ -27,8 +29,27 @@ POWEROFF=""
 RELEASE=""
 REPO=""
 TARBALL=""
+CUSTOM_BRANCH=""
+PREV_BRANCH=""
+
+cleanup() {
+  if [ -n "$PREV_BRANCH" ] ; then
+    git checkout $PREV_BRANCH
+  fi
+}
+
 while [[ $# -gt 0 ]]; do
   case $1 in
+    --custom-branch)
+      CUSTOM_BRANCH="$2"
+      # If the user specifies a custom tag/branch to build, and the build
+      # fails, we want to make sure our workflow scripts are restored to the
+      # current (more modern) versions so the subsequent CI steps use those.
+      shift
+      shift
+      PREV_BRANCH=$(git branch --show-current)
+      trap 'cleanup' ERR
+      ;;
     --enable-debug)
       ENABLE_DEBUG=1
       shift
@@ -337,7 +358,7 @@ fi
 #
 # rhel8.10
 # almalinux9.5
-# fedora42
+# fedora44
 source /etc/os-release
  if which hostnamectl &> /dev/null ; then
   # Fedora 42+ use hostnamectl
@@ -367,6 +388,11 @@ if [ -n "$ENABLE_DEBUG" ] ; then
   extra="--enable-debug"
 fi
 
+if [ -n "$CUSTOM_BRANCH" ] ; then
+  git fetch --unshallow
+  git checkout $CUSTOM_BRANCH
+fi
+
 # build
 case "$OS" in
   freebsd*)
@@ -393,6 +419,8 @@ case "$OS" in
     ;;
 esac
 
+git checkout $PREV_BRANCH
+PREV_BRANCH=""
 
 # building the zfs module was ok
 echo 0 > /var/tmp/build-exitcode.txt
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh
index ff3f0a356bb..62917f4cb72 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh
@@ -25,8 +25,14 @@ cd lustre-release
 
 # Include Lustre patches to build against master/zfs-2.4.x.  Once these
 # patches are merged we can remove these lines.
+#
+# LU-19539 osd-zfs: use osd_dmu_write() wrapper for xattrs
+# LU-19761 osd-zfs: Build against ZFS 2.4.0
+# LU-19249 build: Compatibility updates for kernel v6.16
+#
 patches=('https://review.whamcloud.com/changes/fs%2Flustre-release~62101/revisions/2/patch?download'
-	'https://review.whamcloud.com/changes/fs%2Flustre-release~63267/revisions/9/patch?download')
+	'https://review.whamcloud.com/changes/fs%2Flustre-release~63267/revisions/9/patch?download'
+	'https://review.whamcloud.com/changes/fs%2Flustre-release~60619/revisions/13/patch?download')
 
 for p in "${patches[@]}" ; do
 	curl $p | base64 -d > patch
diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh
index c261cbfca06..a0612e5e0b2 100755
--- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh
+++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh
@@ -79,6 +79,7 @@ function do_builtin_build() {
 
   cd $HOME/linux-$fullver
   ./scripts/config --enable ZFS
+  ./scripts/config --enable ZFS_DEBUG
   yes "" | make oldconfig
   make -j `nproc`
   ) &> /var/tmp/builtin.txt || rc=$?
@@ -185,6 +186,13 @@ case "$OS" in
     sudo mount -o noatime /dev/vdb /var/tmp
     sudo chmod 1777 /var/tmp
     sudo mv -f /tmp/*.txt /var/tmp
+
+    # Allow for longer RCU timeouts due to the heavily virtualized and
+    # potentially oversubscribed nature of the CI environment.
+    rcu_cpu_stall_timeout="/sys/module/rcupdate/parameters/rcu_cpu_stall_timeout"
+    if test -f $rcu_cpu_stall_timeout; then
+        echo 120 | sudo sh -c "cat > '$rcu_cpu_stall_timeout'"
+    fi
     ;;
 esac
 
diff --git a/sys/contrib/openzfs/.github/workflows/smatch.yml b/sys/contrib/openzfs/.github/workflows/smatch.yml
index 305a1f0179b..b6f47d8d41a 100644
--- a/sys/contrib/openzfs/.github/workflows/smatch.yml
+++ b/sys/contrib/openzfs/.github/workflows/smatch.yml
@@ -3,6 +3,14 @@ name: smatch
 on:
   push:
   pull_request:
+    paths-ignore:
+      - 'man/**'
+      - '**.md'
+      - 'AUTHORS'
+      - 'COPYRIGHT'
+      - 'LICENSE'
+      - 'NOTICE'
+      - '.gitignore'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -10,6 +18,7 @@ concurrency:
 
 jobs:
   smatch:
+    if: github.event_name == 'pull_request' || github.repository != 'openzfs/zfs'
     runs-on: ubuntu-24.04
     steps:
     - name: Checkout smatch
diff --git a/sys/contrib/openzfs/.github/workflows/zfs-arm.yml b/sys/contrib/openzfs/.github/workflows/zfs-arm.yml
index 6039e4736c4..84e1272f713 100644
--- a/sys/contrib/openzfs/.github/workflows/zfs-arm.yml
+++ b/sys/contrib/openzfs/.github/workflows/zfs-arm.yml
@@ -3,11 +3,30 @@ name: zfs-arm
 on:
   push:
   pull_request:
+    paths-ignore:
+      - 'man/**'
+      - '**.md'
+      - 'AUTHORS'
+      - 'COPYRIGHT'
+      - 'LICENSE'
+      - 'NOTICE'
+      - '.gitignore'
   workflow_dispatch:
+    inputs:
+      gcc_ver:
+        type: string
+        required: false
+        default: ""
+        description: "(optional) install specific GCC version, like '16'"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
 
 jobs:
   zfs-arm:
     name: ZFS ARM build
+    if: github.event_name == 'pull_request' || github.repository != 'openzfs/zfs'
     runs-on: ubuntu-24.04-arm
     steps:
     - uses: actions/checkout@v6
@@ -18,6 +37,31 @@ jobs:
       timeout-minutes: 20
       run: |
         sudo apt-get -y remove firefox || true
+
+        # Do we want to test with a custom GCC version?
+        if [ "${{ github.event.inputs.gcc_ver }}" != "" ] ; then
+          ver="${{ github.event.inputs.gcc_ver }}"
+
+          sudo add-apt-repository ppa:ubuntu-toolchain-r/test
+          sudo apt-get update
+
+          echo "GCCs available:"
+          awk '/Package: gcc-/{print $2}'  /var/lib/apt/lists/*ubuntu-toolchain-r*Packages
+
+          sudo apt-get -y install gcc g++ gcc-$ver g++-$ver
+
+          sudo update-alternatives --remove-all gcc || true 2>&1
+          sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-$ver 100
+          sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-$ver 100
+          sudo update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100
+          sudo update-alternatives --set cc /usr/bin/gcc
+          sudo update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100
+          sudo update-alternatives --set c++ /usr/bin/g++
+
+          sudo update-alternatives --set gcc "/usr/bin/gcc-$ver"
+          sudo update-alternatives --set g++ "/usr/bin/g++-$ver"
+        fi
+
         .github/workflows/scripts/qemu-3-deps-vm.sh ubuntu24
 
         # We're running the VM scripts locally on the runner, so need to fix
@@ -28,7 +72,12 @@ jobs:
     - name: Build modules
       timeout-minutes: 30
       run: |
-        .github/workflows/scripts/qemu-4-build-vm.sh --enable-debug ubuntu24
+        # Even though we may have installed a newer GCC, the kernel builds don't
+        # seem to honor it, and instead use the older GCC.  I assume this is
+        # to match up with whatever GCC version was used for the kernel.  Always
+        # specify KERNEL_CC to get around this.  This works when using the
+        # default GCC and with a custom GCC.
+        KERNEL_CC=/usr/bin/gcc .github/workflows/scripts/qemu-4-build-vm.sh --enable-debug ubuntu24
 
         # Quick sanity test since we're not running the full ZTS
         sudo modprobe zfs
diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml
index c3a7397c6ae..e3333086e62 100644
--- a/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml
+++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml
@@ -42,6 +42,11 @@ on:
         required: false
         default: ""
         description: "(optional) repo URL (blank: use http://download.zfsonlinux.org)"
+      custom_branch:
+        type: string
+        required: false
+        default: ""
+        description: "(optional) custom tag/branch to build using current CI (like 'zfs-2.2.9')"
       lookup:
         type: boolean
         required: false
@@ -58,7 +63,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: ['almalinux8', 'almalinux9', 'almalinux10', 'fedora42', 'fedora43', 'fedora44']
+        os: ['almalinux8', 'almalinux9', 'almalinux10', 'fedora43', 'fedora44']
     runs-on: ubuntu-24.04
     steps:
     - uses: actions/checkout@v6
@@ -94,9 +99,16 @@ jobs:
                 if [ -n "${{ github.event.inputs.patch_level }}" ] ; then
                         EXTRA="--patch-level ${{ github.event.inputs.patch_level }}"
                 fi
+                if [ -n "${{ github.event.inputs.custom_branch }}" ] ; then
+                        EXTRA+=" --custom-branch ${{ github.event.inputs.custom_branch }}"
+                fi
 
                 .github/workflows/scripts/qemu-4-build.sh $EXTRA \
                         --repo --release --dkms --tarball ${{ matrix.os }}
+
+                if [ -n "${{ github.event.inputs.custom_branch }}" ] ; then
+                        echo "Built packages for ${{ github.event.inputs.custom_branch }}"
+                fi
         fi
 
     - name: Prepare artifacts
diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
index 4b4fd27543f..64ffee484a5 100644
--- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
+++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
@@ -14,7 +14,7 @@ on:
         type: string
         required: false
         default: ""
-        description: "(optional) Only run on this specific OS (like 'fedora42' or 'alpine3-23')"
+        description: "(optional) Only run on this specific OS (like 'fedora44' or 'alpine3-23')"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -23,6 +23,7 @@ concurrency:
 jobs:
   test-config:
     name: Setup
+    if: github.event_name == 'pull_request' || github.repository != 'openzfs/zfs'
     runs-on: ubuntu-24.04
     outputs:
       test_os: ${{ steps.os.outputs.os }}
@@ -45,24 +46,27 @@ jobs:
           fi
 
           case "$ci_type" in
+          docs)
+            os_selection='[]'
+            ;;
           quick)
-            os_selection='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd15-1s", "ubuntu24"]'
+            os_selection='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora44", "freebsd15-1s", "ubuntu26"]'
             ;;
           linux)
-            os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "debian13", "fedora42", "fedora43", "fedora44", "ubuntu22", "ubuntu24"]'
+            os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "debian13", "fedora43", "fedora44", "ubuntu22", "ubuntu24", "ubuntu26"]'
             ;;
           freebsd)
-            os_selection='["freebsd13-5r", "freebsd14-4r", "freebsd13-5s", "freebsd14-4s", "freebsd15-1s", "freebsd16-0c"]'
+            os_selection='["freebsd14-4r", "freebsd14-4s", "freebsd15-0r", "freebsd15-1s", "freebsd16-0c"]'
             ;;
           *)
             # default list
-            os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora42", "fedora43", "fedora44", "freebsd14-4r", "freebsd15-1s", "freebsd16-0c", "ubuntu22", "ubuntu24"]'
+            os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora43", "fedora44", "freebsd14-4r", "freebsd15-0r", "freebsd15-1s", "freebsd16-0c", "ubuntu22", "ubuntu24", "ubuntu26"]'
             ;;
           esac
 
           # Repository-level override for OS selection.
           # Set vars.ZTS_OS_OVERRIDE in repo settings to restrict targets
-          # (e.g. '["debian13"]' or '["debian13", "fedora42"]').
+          # (e.g. '["debian13"]' or '["debian13", "fedora44"]').
           # Manual ZFS-CI-Type in commit messages bypasses the override.
           if [ -n "${{ vars.ZTS_OS_OVERRIDE }}" ] && [ "$ci_source" != "manual" ]; then
             override='${{ vars.ZTS_OS_OVERRIDE }}'
@@ -91,15 +95,19 @@ jobs:
   qemu-vm:
     name: qemu-x86
     needs: [ test-config ]
+    if: >-
+      (github.event_name == 'pull_request' ||
+      github.repository != 'openzfs/zfs') &&
+      needs.test-config.outputs.ci_type != 'docs'
     strategy:
       fail-fast: false
       matrix:
         # rhl:     almalinux8, almalinux9, centos-streamX, fedora4x
-        # debian:  debian12, debian13, ubuntu22, ubuntu24
+        # debian:  debian12, debian13, ubuntu22, ubuntu24, ubuntu26
         # misc:    archlinux, tumbleweed
         # FreeBSD variants of november 2025:
-        # FreeBSD Release: freebsd13-5r, freebsd14-4r, freebsd15-0r
-        # FreeBSD Stable:  freebsd13-5s, freebsd14-4s, freebsd15-1s
+        # FreeBSD Release: freebsd14-4r, freebsd15-0r
+        # FreeBSD Stable:  freebsd14-4s, freebsd15-1s
         # FreeBSD Current: freebsd16-0c
         os: ${{ fromJson(needs.test-config.outputs.test_os) }}
     runs-on: ubuntu-24.04
@@ -153,7 +161,10 @@ jobs:
       run: .github/workflows/scripts/qemu-8-summary.sh '${{ steps.artifact-upload.outputs.artifact-url }}'
 
   cleanup:
-    if: always()
+    if: >-
+      (github.event_name == 'pull_request' ||
+      github.repository != 'openzfs/zfs') &&
+      always()
     name: Cleanup
     runs-on: ubuntu-latest
     needs: [ qemu-vm ]
diff --git a/sys/contrib/openzfs/.github/workflows/zloop.yml b/sys/contrib/openzfs/.github/workflows/zloop.yml
index 7f76a670af9..edd2c391583 100644
--- a/sys/contrib/openzfs/.github/workflows/zloop.yml
+++ b/sys/contrib/openzfs/.github/workflows/zloop.yml
@@ -3,6 +3,14 @@ name: zloop
 on:
   push:
   pull_request:
+    paths-ignore:
+      - 'man/**'
+      - '**.md'
+      - 'AUTHORS'
+      - 'COPYRIGHT'
+      - 'LICENSE'
+      - 'NOTICE'
+      - '.gitignore'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
diff --git a/sys/contrib/openzfs/Makefile.am b/sys/contrib/openzfs/Makefile.am
index 73382f86e6f..c1638aa4288 100644
--- a/sys/contrib/openzfs/Makefile.am
+++ b/sys/contrib/openzfs/Makefile.am
@@ -138,6 +138,7 @@ cstyle:
 		! -path './include/sys/lua/*' \
 		! -path './module/lua/l*.[ch]' \
 		! -path './module/zfs/lz4.c' \
+		! -path './tests/unit/munit.[ch]' \
 		$(cstyle_line)
 
 filter_executable = -exec test -x '{}' \; -print
diff --git a/sys/contrib/openzfs/README.md b/sys/contrib/openzfs/README.md
index fa348a24383..6a77cedb635 100644
--- a/sys/contrib/openzfs/README.md
+++ b/sys/contrib/openzfs/README.md
@@ -52,7 +52,7 @@ All RHEL (and compatible systems: AlmaLinux OS, Rocky Linux, etc) on the **full*
 
 All Ubuntu **LTS** releases are supported.
 
-**Supported Ubuntu releases**: **24.04 “Noble”**, **22.04 “Jammy”**.
+**Supported Ubuntu releases**: **26.04 “Resolute”**, **24.04 “Noble”**, **22.04 “Jammy”**.
 
 ### Debian
 
@@ -68,4 +68,4 @@ Generally, if a distribution is following an LTS kernel, it should work well wit
 
 All FreeBSD releases receiving [security support](https://www.freebsd.org/security/#sup) are supported by OpenZFS.
 
-**Supported FreeBSD releases**: **15.0**, **14.4**, **13.5**.
+**Supported FreeBSD releases**: **15.0**, **14.4**.
diff --git a/sys/contrib/openzfs/cmd/Makefile.am b/sys/contrib/openzfs/cmd/Makefile.am
index 6f8d0c4b1db..6e54be7466a 100644
--- a/sys/contrib/openzfs/cmd/Makefile.am
+++ b/sys/contrib/openzfs/cmd/Makefile.am
@@ -54,7 +54,6 @@ ztest_LDADD = \
 	libnvpair.la
 
 ztest_LDADD += -lm
-ztest_LDFLAGS = -pthread
 
 
 include $(srcdir)/%D%/raidz_test/Makefile.am
diff --git a/sys/contrib/openzfs/cmd/zarcstat.in b/sys/contrib/openzfs/cmd/zarcstat.in
index 8ffd2048116..ad0e12e9fbf 100755
--- a/sys/contrib/openzfs/cmd/zarcstat.in
+++ b/sys/contrib/openzfs/cmd/zarcstat.in
@@ -565,10 +565,10 @@ def init():
 
     update_hdr_intr()
 
-    # check if L2ARC exists
+    # check if L2ARC exists; fall back to l2_size for older kernels that
+    # do not export l2_ndev
     snap_stats()
-    l2_size = cur.get("l2_size")
-    if l2_size:
+    if cur.get("l2_ndev") or cur.get("l2_size"):
         l2exist = True
 
     if desired_cols:
diff --git a/sys/contrib/openzfs/cmd/zarcsummary b/sys/contrib/openzfs/cmd/zarcsummary
index 24a129d9ca7..5b6e35465ea 100755
--- a/sys/contrib/openzfs/cmd/zarcsummary
+++ b/sys/contrib/openzfs/cmd/zarcsummary
@@ -856,7 +856,10 @@ def section_l2arc(kstats_dict):
     # The L2ARC statistics live in the same section as the normal ARC stuff
     arc_stats = isolate_section('arcstats', kstats_dict)
 
-    if arc_stats['l2_size'] == '0':
+    # Skip the section only when no cache device is attached. Fall back to
+    # l2_size for older kernels that do not export l2_ndev.
+    if arc_stats.get('l2_ndev', '0') == '0' and \
+            arc_stats['l2_size'] == '0':
         print('L2ARC not detected, skipping section\n')
         return
 
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
index 68c9696a8aa..05e005d929a 100644
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -2802,18 +2802,18 @@ print_file_layout_raidz(vdev_t *vd, blkptr_t *bp, uint64_t file_offset,
 	    vd->vdev_children, vdrz->vd_nparity);
 	raidz_row_t *rr = rm->rm_row[0];
 
-	/*
-	 * Account for out of order disks in raidz1.
-	 * For now just reverse them back and adjust for it later.
-	 */
-	if (rr->rr_firstdatacol == 1 && (zio.io_offset & (1ULL << 20))) {
-		uint64_t devidx = rr->rr_col[0].rc_devidx;
-		rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
-		rr->rr_col[1].rc_devidx = devidx;
-	}
-
 	if (!dump_opt['H']) {
 		int last_disk = vd->vdev_children - 1;
+		/*
+		 * Account for out of order disks in raidz1.
+		 * For now just reverse them back and adjust for it later.
+		 */
+		if (rr->rr_firstdatacol == 1 &&
+		    (zio.io_offset & (1ULL << 20))) {
+			uint64_t devidx = rr->rr_col[0].rc_devidx;
+			rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
+			rr->rr_col[1].rc_devidx = devidx;
+		}
 		int first_disk = rr->rr_col[0].rc_devidx;
 
 		(void) printf("%12llx", (u_longlong_t)file_offset);
@@ -2843,23 +2843,49 @@ print_file_layout_raidz(vdev_t *vd, blkptr_t *bp, uint64_t file_offset,
 		static uint64_t next_offset = 0;
 
 		if (next_offset != file_offset) {
-			(void) printf("skip hole\t-\t%llx\n",
-			    (u_longlong_t)((file_offset - next_offset) >>
-			    vd->vdev_ashift));
+			(void) printf("skip hole\t-\t\t%lld\n",
+			    (u_longlong_t)((file_offset - next_offset) / 512));
 		}
 		next_offset = file_offset + BP_GET_LSIZE(bp);
+		uint64_t tmp_offset = file_offset;
+
 
 		for (int c = 0; c < rr->rr_cols; c++) {
+			boolean_t pcol = c < rr->rr_firstdatacol;
 			raidz_col_t *rc = &rr->rr_col[c];
 			char *path = vd->vdev_child[rc->rc_devidx]->vdev_path;
-			// c < rr->rr_firstdatacol
+
 			if (rc->rc_size == 0)
 				continue;
-			(void) printf("%s\t%llu\t%d\n",
+			(void) printf("%s\t\t%llu\t%d",
 			    zfs_basename(path),
 			    (u_longlong_t)(rc->rc_offset +
 			    VDEV_LABEL_START_SIZE)/512,
 			    (int)rc->rc_size/512);
+			if (dump_opt['v']) {
+				char label = pcol ? 'P' : 'D';
+				int num;
+
+				if (c < 2) {
+					num = 0;
+				} else {
+					num = pcol ? c :
+					    (c - rr->rr_firstdatacol);
+				}
+				printf("\t%c%d", label, num);
+				if (dump_opt['v'] > 1) {
+					unsigned long long off;
+					if (pcol)
+						off = file_offset;
+					else
+						off = tmp_offset;
+					off = off / 512ULL;
+					printf("\t%llu", off);
+				}
+			}
+			if (!pcol)
+				tmp_offset += rc->rc_size;
+			printf("\n");
 		}
 	}
 }
@@ -2989,7 +3015,12 @@ dump_indirect_layout(dnode_t *dn)
 	 * Start layout with a header
 	 */
 	if (dump_opt['H']) {
-		(void) printf("DISK\t\tLBA\t\tCOUNT\n");
+		(void) printf("DISK\t\t\tLBA\tCOUNT");
+		if (dump_opt['v'])
+			(void) printf("\tTYPE");
+		if (dump_opt['v'] > 1)
+			(void) printf("\tOFFSET");
+		printf("\n");
 	} else {
 		char diskhdr[16];
 
@@ -6325,22 +6356,15 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
     dmu_object_type_t type)
 {
 	int i;
+	boolean_t claimed = B_FALSE;
+	boolean_t ddt_block = B_FALSE;
+	boolean_t brt_block = B_FALSE;
 
 	ASSERT(type < ZDB_OT_TOTAL);
 
 	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
 		return;
 
-	/*
-	 * This flag controls if we will issue a claim for the block while
-	 * counting it, to ensure that all blocks are referenced in space maps.
-	 * We don't issue claims if we're not doing leak tracking, because it's
-	 * expensive if the user isn't interested. We also don't claim the
-	 * second or later occurences of cloned or dedup'd blocks, because we
-	 * already claimed them the first time.
-	 */
-	boolean_t do_claim = !dump_opt['L'];
-
 	spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER);
 
 	blkptr_t tempbp;
@@ -6371,21 +6395,30 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
 		ddt_entry_t *dde = ddt_lookup(ddt, bp, B_TRUE);
 
 		/*
-		 * ddt_lookup() can return NULL if this block didn't exist
-		 * in the DDT and creating it would take the DDT over its
-		 * quota. Since we got the block from disk, it must exist in
-		 * the DDT, so this can't happen. However, when unique entries
-		 * are pruned, the dedup bit can be set with no corresponding
-		 * entry in the DDT.
+		 * ddt_lookup() can return NULL when unique entries are pruned
+		 * from the DDT.
 		 */
 		if (dde == NULL) {
 			ddt_exit(ddt);
-			goto skipped;
+			goto ddt_done;
 		}
 
 		/* Get the phys for this variant */
 		ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);
 
+		/*
+		 * DDT_PHYS_NONE means the block has the dedup bit set but
+		 * its DVA doesn't match any phys in the entry.  This can
+		 * happen when a DVA was evicted from the DDT and re-added
+		 * on a hash collision.  The block may still have a BRT entry.
+		 */
+		if (v == DDT_PHYS_NONE) {
+			ddt_exit(ddt);
+			goto ddt_done;
+		}
+
+		ddt_block = B_TRUE;
+
 		/*
 		 * This entry may have multiple sets of DVAs. We must claim
 		 * each set the first time we see them in a real block on disk,
@@ -6400,8 +6433,14 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
 			dde->dde_io =
 			    (void *)(((uintptr_t)dde->dde_io) | (1 << v));
 
-		/* Consume a reference for this block. */
-		if (ddt_phys_total_refcnt(ddt, dde->dde_phys) > 0)
+		/*
+		 * Consume a reference.  If this variant's refcount is already
+		 * zero, the DDT tracking is exhausted — more filesystem
+		 * references exist than the DDT accounts for.
+		 */
+		boolean_t ddt_refcnt_exhausted =
+		    (ddt_phys_refcnt(dde->dde_phys, v) == 0);
+		if (!ddt_refcnt_exhausted)
 			ddt_phys_decref(dde->dde_phys, v);
 
 		/*
@@ -6430,20 +6469,21 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
 			bp = &tempbp;
 		}
 
-		if (seen) {
+		if (seen && !ddt_refcnt_exhausted) {
 			/*
 			 * The second or later time we see this block,
 			 * it's a duplicate and we count it.
 			 */
 			zcb->zcb_dedup_asize += BP_GET_ASIZE(bp);
 			zcb->zcb_dedup_blocks++;
-
-			/* Already claimed, don't do it again. */
-			do_claim = B_FALSE;
+			claimed = B_TRUE;
 		}
 
 		ddt_exit(ddt);
-	} else if (zcb->zcb_brt_is_active &&
+	}
+
+ddt_done:
+	if (!claimed && zcb->zcb_brt_is_active &&
 	    brt_maybe_exists(zcb->zcb_spa, bp)) {
 		/*
 		 * Cloned blocks are special. We need to count them, so we can
@@ -6451,10 +6491,8 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
 		 * only claim them once.
 		 *
 		 * To do this, we keep our own in-memory BRT. For each block
-		 * we haven't seen before, we look it up in the real BRT and
-		 * if its there, we note it and its refcount then proceed as
-		 * normal. If we see the block again, we count it as a clone
-		 * and then give it no further consideration.
+		 * we haven't seen before, we look it up in the real BRT. If
+		 * we see the block again, we count it as a clone.
 		 */
 		zdb_brt_entry_t zbre_search, *zbre;
 		avl_index_t where;
@@ -6462,36 +6500,27 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
 		zbre_search.zbre_dva = bp->blk_dva[0];
 		zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where);
 		if (zbre == NULL) {
-			/* Not seen before; track it */
 			uint64_t refcnt =
 			    brt_entry_get_refcount(zcb->zcb_spa, bp);
 			if (refcnt > 0) {
+				brt_block = B_TRUE;
 				zbre = umem_zalloc(sizeof (zdb_brt_entry_t),
 				    UMEM_NOFAIL);
 				zbre->zbre_dva = bp->blk_dva[0];
 				zbre->zbre_refcount = refcnt;
 				avl_insert(&zcb->zcb_brt, zbre, where);
 			}
-		} else  {
-			/*
-			 * Second or later occurrence, count it and take a
-			 * refcount.
-			 */
-			zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
-			zcb->zcb_clone_blocks++;
-
-			zbre->zbre_refcount--;
-			if (zbre->zbre_refcount == 0) {
-				avl_remove(&zcb->zcb_brt, zbre);
-				umem_free(zbre, sizeof (zdb_brt_entry_t));
+		} else {
+			brt_block = B_TRUE;
+			if (zbre->zbre_refcount > 0) {
+				zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
+				zcb->zcb_clone_blocks++;
+				zbre->zbre_refcount--;
+				claimed = B_TRUE;
 			}
-
-			/* Already claimed, don't do it again. */
-			do_claim = B_FALSE;
 		}
 	}
 
-skipped:
 	for (i = 0; i < 4; i++) {
 		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
 		int t = (i & 1) ? type : ZDB_OT_TOTAL;
@@ -6650,12 +6679,21 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
 #undef BIN
 
 hist_skipped:
-	if (!do_claim)
+	if (claimed || dump_opt['L'])
 		return;
 
-	VERIFY0(zio_wait(zio_claim(NULL, zcb->zcb_spa,
+	int claim_err = zio_wait(zio_claim(NULL, zcb->zcb_spa,
 	    spa_min_claim_txg(zcb->zcb_spa), bp, NULL, NULL,
-	    ZIO_FLAG_CANFAIL)));
+	    ZIO_FLAG_CANFAIL));
+	if (claim_err != 0) {
+		char blkbuf[BP_SPRINTF_LEN];
+		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
+		(void) printf("block claim error %d%s%s: %s\n",
+		    claim_err, brt_block ? " (BRT)" : "",
+		    ddt_block ? " (DDT)" : "", blkbuf);
+		zcb->zcb_haderrors = 1;
+		zcb->zcb_errors[claim_err]++;
+	}
 }
 
 static void
@@ -7431,10 +7469,66 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
 static boolean_t
 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
 {
-	if (dump_opt['L'])
-		return (B_FALSE);
-
 	boolean_t leaks = B_FALSE;
+
+	/*
+	 * Report leaked BRT entries whose refcount was not fully consumed by
+	 * the traversal.
+	 */
+	if (zcb->zcb_brt_is_active) {
+		void *cookie = NULL;
+		zdb_brt_entry_t *zbre;
+		while ((zbre = avl_destroy_nodes(
+		    &zcb->zcb_brt, &cookie)) != NULL) {
+			if (!dump_opt['L'] && zbre->zbre_refcount != 0) {
+				(void) printf("BRT leak: vdev %llu, "
+				    "offset 0x%llx, refcount %llu\n",
+				    (u_longlong_t)DVA_GET_VDEV(
+				    &zbre->zbre_dva),
+				    (u_longlong_t)DVA_GET_OFFSET(
+				    &zbre->zbre_dva),
+				    (u_longlong_t)zbre->zbre_refcount);
+				leaks = B_TRUE;
+			}
+			umem_free(zbre, sizeof (zdb_brt_entry_t));
+		}
+		avl_destroy(&zcb->zcb_brt);
+	}
+
+	if (dump_opt['L'])
+		return (leaks);
+
+	/*
+	 * Report leaked DDT entries whose refcount was not fully consumed by
+	 * the traversal.  Entries in the DDT ZAP that were never looked up
+	 * are not detected here.
+	 */
+	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+		ddt_t *ddt = spa->spa_ddt[c];
+		if (ddt == NULL)
+			continue;
+		ddt_enter(ddt);
+		for (ddt_entry_t *dde = avl_first(&ddt->ddt_tree); dde != NULL;
+		    dde = AVL_NEXT(&ddt->ddt_tree, dde)) {
+			for (int p = 0; p < DDT_NPHYS(ddt); p++) {
+				ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);
+				uint64_t refcnt = ddt_phys_refcnt(dde->dde_phys,
+				    v);
+				if (refcnt == 0)
+					continue;
+				blkptr_t blk;
+				char blkbuf[BP_SPRINTF_LEN];
+				ddt_bp_create(ddt->ddt_checksum, &dde->dde_key,
+				    dde->dde_phys, v, &blk);
+				snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
+				(void) printf("DDT leak: refcount %llu %s\n",
+				    (u_longlong_t)refcnt, blkbuf);
+				leaks = B_TRUE;
+			}
+		}
+		ddt_exit(ddt);
+	}
+
 	vdev_t *rvd = spa->spa_root_vdev;
 	for (unsigned c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *vd = rvd->vdev_child[c];
@@ -10136,7 +10230,7 @@ main(int argc, char **argv)
 	 * Automate cachefile
 	 */
 	if (!spa_config_path_env && !config_path_console && target &&
-	    libzfs_core_init() == 0) {
+	    !dump_opt['l'] && libzfs_core_init() == 0) {
 		char *pname = strdup(target);
 		const char *value;
 		nvlist_t *pnvl = NULL;
@@ -10519,6 +10613,7 @@ main(int argc, char **argv)
 		}
 
 		if (dump_opt['f'] && os != NULL) {
+			dump_opt['v'] = verbose;
 			dump_file_data_layout(os);
 		} else if (dump_opt['B']) {
 			dump_backup(target, objset_id,
diff --git a/sys/contrib/openzfs/cmd/zed/Makefile.am b/sys/contrib/openzfs/cmd/zed/Makefile.am
index 0166d072356..712917401a0 100644
--- a/sys/contrib/openzfs/cmd/zed/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zed/Makefile.am
@@ -41,6 +41,5 @@ zed_LDADD = \
 	libnvpair.la
 
 zed_LDADD += -lrt $(LIBATOMIC_LIBS) $(LIBUDEV_LIBS) $(LIBUUID_LIBS)
-zed_LDFLAGS = -pthread
 
 dist_noinst_DATA += %D%/agents/README.md
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c
index 8aabf6d3bf7..ba3672a30a7 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c
@@ -350,6 +350,60 @@ is_draid_fdomain_failure(fmd_hdl_t *hdl, libzfs_handle_t *zhdl,
 	return (res);
 }
 
+/*
+ * Returns B_TRUE if spare 'a' should be tried before spare 'b' when
+ * replacing a failed vdev with the given characteristics.
+ *
+ * Ordering criteria (most to least significant):
+ *  1. Distributed spare matching the failed vdev's dRAID is preferred
+ *     most (distributed spares rebuild faster than traditional spares).
+ *     Regular spares (no TOP_GUID) come next.  Non-matching distributed
+ *     spares are tried last, as the kernel will reject them anyway.
+ *  2. Matching rotational is preferred over mismatching.
+ *  3. Large enough is preferred over too small.
+ *  4. Smaller size is preferred over bigger (best fit).
+ */
+static boolean_t
+spare_is_preferred(nvlist_t *a, nvlist_t *b, boolean_t have_rotational,
+    uint64_t vdev_rotational, uint64_t vdev_size, uint64_t top_guid)
+{
+	uint64_t a_top = 0, b_top = 0;
+	(void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_TOP_GUID, &a_top);
+	(void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_TOP_GUID, &b_top);
+	int a_pri = (a_top == 0) ? 1 :
+	    (a_top == top_guid || top_guid == 0) ? 2 : 0;
+	int b_pri = (b_top == 0) ? 1 :
+	    (b_top == top_guid || top_guid == 0) ? 2 : 0;
+	if (a_pri != b_pri)
+		return (a_pri > b_pri);
+
+	if (have_rotational) {
+		uint64_t a_rotational = 0, b_rotational = 0;
+		(void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_VDEV_ROTATIONAL,
+		    &a_rotational);
+		(void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_VDEV_ROTATIONAL,
+		    &b_rotational);
+		if ((a_rotational == vdev_rotational) !=
+		    (b_rotational == vdev_rotational))
+			return (a_rotational == vdev_rotational);
+	}
+
+	vdev_stat_t *vs;
+	unsigned int c;
+	uint64_t a_size = 0, b_size = 0;
+	if (nvlist_lookup_uint64_array(a, ZPOOL_CONFIG_VDEV_STATS,
+	    (uint64_t **)&vs, &c) == 0)
+		a_size = vs->vs_rsize;
+	if (nvlist_lookup_uint64_array(b, ZPOOL_CONFIG_VDEV_STATS,
+	    (uint64_t **)&vs, &c) == 0)
+		b_size = vs->vs_rsize;
+	boolean_t a_ok = (a_size >= vdev_size);
+	boolean_t b_ok = (b_size >= vdev_size);
+	if (a_ok != b_ok)
+		return (a_ok);
+	return (a_size < b_size);
+}
+
 /*
  * Given a vdev, attempt to replace it with every known spare until one
  * succeeds or we run out of devices to try.
@@ -364,6 +418,10 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
 	char *dev_name;
 	zprop_source_t source;
 	int ashift;
+	uint64_t vdev_rotational = 0, vdev_size = 0, top_guid = 0;
+	boolean_t have_vdev_rotational;
+	vdev_stat_t *vs;
+	unsigned int c;
 
 	config = zpool_get_config(zhp, NULL);
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
@@ -377,6 +435,35 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
 	    &spares, &nspares) != 0)
 		return (B_FALSE);
 
+	/*
+	 * Collect the failed vdev's parameters for optimal replacement.
+	 */
+	have_vdev_rotational = (nvlist_lookup_uint64(vdev,
+	    ZPOOL_CONFIG_VDEV_ROTATIONAL, &vdev_rotational) == 0);
+	if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
+	    (uint64_t **)&vs, &c) == 0)
+		vdev_size = vs->vs_rsize;
+	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_TOP_GUID, &top_guid);
+
+	/*
+	 * Build a sorted index array over the spares, so that better
+	 * candicates are tried first.
+	 */
+	uint_t order[nspares];
+	for (s = 0; s < nspares; s++)
+		order[s] = s;
+	for (s = 1; s < nspares; s++) {
+		uint_t key = order[s];
+		int j = (int)s - 1;
+		while (j >= 0 && spare_is_preferred(spares[key],
+		    spares[order[j]], have_vdev_rotational, vdev_rotational,
+		    vdev_size, top_guid)) {
+			order[j + 1] = order[j];
+			j--;
+		}
+		order[j + 1] = key;
+	}
+
 	/*
 	 * lookup "ashift" pool property, we may need it for the replacement
 	 */
@@ -394,25 +481,26 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
 	 * replace it.
 	 */
 	for (s = 0; s < nspares; s++) {
+		nvlist_t *spare = spares[order[s]];
 		boolean_t rebuild = B_FALSE;
 		const char *spare_name, *type;
 
-		if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
+		if (nvlist_lookup_string(spare, ZPOOL_CONFIG_PATH,
 		    &spare_name) != 0)
 			continue;
 
 		/* prefer sequential resilvering for distributed spares */
-		if ((nvlist_lookup_string(spares[s], ZPOOL_CONFIG_TYPE,
+		if ((nvlist_lookup_string(spare, ZPOOL_CONFIG_TYPE,
 		    &type) == 0) && strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0)
 			rebuild = B_TRUE;
 
 		/* if set, add the "ashift" pool property to the spare nvlist */
 		if (source != ZPROP_SRC_DEFAULT)
-			(void) nvlist_add_uint64(spares[s],
+			(void) nvlist_add_uint64(spare,
 			    ZPOOL_CONFIG_ASHIFT, ashift);
 
 		(void) nvlist_add_nvlist_array(replacement,
-		    ZPOOL_CONFIG_CHILDREN, (const nvlist_t **)&spares[s], 1);
+		    ZPOOL_CONFIG_CHILDREN, (const nvlist_t **)&spare, 1);
 
 		fmd_hdl_debug(hdl, "zpool_vdev_replace '%s' with spare '%s'",
 		    dev_name, zfs_basename(spare_name));
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
index 4c21c92bcd2..d448a3df60e 100644
--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
@@ -9399,6 +9399,18 @@ main(int argc, char **argv)
 		return (1);
 	}
 
+	/*
+	 * Special case '<subcommand> --help|-?'
+	 */
+	if (argc >= 3 && (strcmp(argv[2], "--help") == 0 ||
+	    strcmp(argv[2], "-?") == 0)) {
+		int idx;
+		if (find_command_idx(cmdname, &idx) == 0) {
+			current_command = &command_table[idx];
+			usage(B_FALSE);
+		}
+	}
+
 	zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
 
 	libzfs_print_on_error(g_zfs, B_TRUE);
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
index 3ed7babc1ca..05ea5e35446 100644
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@@ -13878,6 +13878,18 @@ main(int argc, char **argv)
 	if (strcmp(cmdname, "help") == 0)
 		return (zpool_do_help(argc, argv));
 
+	/*
+	 * Special case '<subcommand> --help|-?'
+	 */
+	if (argc >= 3 && (strcmp(argv[2], "--help") == 0 ||
+	    strcmp(argv[2], "-?") == 0)) {
+		int idx;
+		if (find_command_idx(cmdname, &idx) == 0) {
+			current_command = &command_table[idx];
+			usage(B_FALSE);
+		}
+	}
+
 	if ((g_zfs = libzfs_init()) == NULL) {
 		(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
 		return (1);
diff --git a/sys/contrib/openzfs/cmd/zstream/zstream.c b/sys/contrib/openzfs/cmd/zstream/zstream.c
index f1a2fa75740..da74ab6e1e5 100644
--- a/sys/contrib/openzfs/cmd/zstream/zstream.c
+++ b/sys/contrib/openzfs/cmd/zstream/zstream.c
@@ -29,6 +29,8 @@
 #include <libintl.h>
 #include <stddef.h>
 #include <libzfs.h>
+#include <signal.h>
+#include <sys/backtrace.h>
 #include "zstream.h"
 
 void
@@ -53,9 +55,43 @@ zstream_usage(void)
 	exit(1);
 }
 
+static void sig_handler(int signo)
+{
+	struct sigaction action;
+	libspl_backtrace(STDERR_FILENO);
+
+	/*
+	 * Restore default action and re-raise signal so SIGSEGV and
+	 * SIGABRT can trigger a core dump.
+	 */
+	action.sa_handler = SIG_DFL;
+	sigemptyset(&action.sa_mask);
+	action.sa_flags = 0;
+	(void) sigaction(signo, &action, NULL);
+	raise(signo);
+}
+
+
 int
 main(int argc, char *argv[])
 {
+	/*
+	 * Set up signal handlers, so if we crash due to bad data in the stream
+	 * we can get more info. Unlike ztest, we don't bail out if we can't
+	 * set up signal handlers, because zstream is very useful without them.
+	 */
+	struct sigaction action = { .sa_handler = sig_handler };
+	sigemptyset(&action.sa_mask);
+	action.sa_flags = 0;
+	if (sigaction(SIGSEGV, &action, NULL) < 0) {
+		(void) fprintf(stderr, "zstream: cannot catch SIGSEGV: %s\n",
+		    strerror(errno));
+	}
+	if (sigaction(SIGABRT, &action, NULL) < 0) {
+		(void) fprintf(stderr, "zstream: cannot catch SIGABRT: %s\n",
+		    strerror(errno));
+	}
+
 	char *basename = strrchr(argv[0], '/');
 	basename = basename ? (basename + 1) : argv[0];
 	if (argc >= 1 && strcmp(basename, "zstreamdump") == 0)
diff --git a/sys/contrib/openzfs/cmd/zstream/zstream_dump.c b/sys/contrib/openzfs/cmd/zstream/zstream_dump.c
index 6ccc57204c8..7757ee3b175 100644
--- a/sys/contrib/openzfs/cmd/zstream/zstream_dump.c
+++ b/sys/contrib/openzfs/cmd/zstream/zstream_dump.c
@@ -385,6 +385,20 @@ zstream_do_dump(int argc, char *argv[])
 				(void) ssread(buf, sz, &zc);
 				if (ferror(send_stream))
 					perror("fread");
+
+				uint8_t *nv_header = (uint8_t *)buf;
+				boolean_t xdr = nv_header[0] == NV_ENCODE_XDR;
+				boolean_t big_endian = nv_header[1] == 0;
+				const char *nc;
+				if (xdr) {
+					nc = "NV_ENCODE_XDR";
+				} else if (big_endian) {
+					nc = "NV_ENCODE_NATIVE (big-endian)";
+				} else {
+					nc = "NV_ENCODE_NATIVE (little-endian)";
+				}
+				printf("nvlist encoding = %s\n", nc);
+
 				err = nvlist_unpack(buf, sz, &nv, 0);
 				if (err) {
 					perror(strerror(err));
diff --git a/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c b/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c
index 5092b534a8f..f5abfa98b18 100644
--- a/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c
+++ b/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c
@@ -99,6 +99,7 @@ zstream_do_recompress(int argc, char *argv[])
 		exit(1);
 	}
 
+	zfs_refcount_init();
 	abd_init();
 	fletcher_4_init();
 	zio_init();
@@ -353,6 +354,7 @@ zstream_do_recompress(int argc, char *argv[])
 	zio_fini();
 	zstd_fini();
 	abd_fini();
+	zfs_refcount_fini();
 
 	return (0);
 }
diff --git a/sys/contrib/openzfs/config/Rules.am b/sys/contrib/openzfs/config/Rules.am
index 5117929cac5..c4a9641f58f 100644
--- a/sys/contrib/openzfs/config/Rules.am
+++ b/sys/contrib/openzfs/config/Rules.am
@@ -23,6 +23,7 @@ AM_CFLAGS += $(IMPLICIT_FALLTHROUGH)
 AM_CFLAGS += $(DEBUG_CFLAGS)
 AM_CFLAGS += $(ASAN_CFLAGS)
 AM_CFLAGS += $(UBSAN_CFLAGS)
+AM_CFLAGS += $(PTHREAD_CFLAGS)
 AM_CFLAGS += $(CODE_COVERAGE_CFLAGS)
 AM_CFLAGS += $(NO_FORMAT_ZERO_LENGTH)
 AM_CFLAGS += $(NO_FORMAT_TRUNCATION)
@@ -57,6 +58,7 @@ endif
 AM_LDFLAGS  = $(DEBUG_LDFLAGS)
 AM_LDFLAGS += $(ASAN_LDFLAGS)
 AM_LDFLAGS += $(UBSAN_LDFLAGS)
+AM_LDFLAGS += $(PTHREAD_LIBS)
 
 if BUILD_FREEBSD
 AM_LDFLAGS += -fstack-protector-strong
diff --git a/sys/contrib/openzfs/config/ax_pthread.m4 b/sys/contrib/openzfs/config/ax_pthread.m4
new file mode 100644
index 00000000000..daea8c5987e
--- /dev/null
+++ b/sys/contrib/openzfs/config/ax_pthread.m4
@@ -0,0 +1,523 @@
+# SPDX-License-Identifier: GPL-3.0-or-later WITH Autoconf-exception-macro
+# ===========================================================================
+#        https://www.gnu.org/software/autoconf-archive/ax_pthread.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
+#
+# DESCRIPTION
+#
+#   This macro figures out how to build C programs using POSIX threads. It
+#   sets the PTHREAD_LIBS output variable to the threads library and linker
+#   flags, and the PTHREAD_CFLAGS output variable to any special C compiler
+#   flags that are needed. (The user can also force certain compiler
+#   flags/libs to be tested by setting these environment variables.)
+#
+#   Also sets PTHREAD_CC and PTHREAD_CXX to any special C compiler that is
+#   needed for multi-threaded programs (defaults to the value of CC
+#   respectively CXX otherwise). (This is necessary on e.g. AIX to use the
+#   special cc_r/CC_r compiler alias.)
+#
+#   NOTE: You are assumed to not only compile your program with these flags,
+#   but also to link with them as well. For example, you might link with
+#   $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS
+#   $PTHREAD_CXX $CXXFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS
+#
+#   If you are only building threaded programs, you may wish to use these
+#   variables in your default LIBS, CFLAGS, and CC:
+#
+#     LIBS="$PTHREAD_LIBS $LIBS"
+#     CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+#     CXXFLAGS="$CXXFLAGS $PTHREAD_CFLAGS"
+#     CC="$PTHREAD_CC"
+#     CXX="$PTHREAD_CXX"
+#
+#   In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant
+#   has a nonstandard name, this macro defines PTHREAD_CREATE_JOINABLE to
+#   that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX).
+#
+#   Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the
+#   PTHREAD_PRIO_INHERIT symbol is defined when compiling with
+#   PTHREAD_CFLAGS.
+#
+#   ACTION-IF-FOUND is a list of shell commands to run if a threads library
+#   is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it
+#   is not found. If ACTION-IF-FOUND is not specified, the default action
+#   will define HAVE_PTHREAD.
+#
+#   Please let the authors know if this macro fails on any platform, or if
+#   you have any other suggestions or comments. This macro was based on work
+#   by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help
+#   from M. Frigo), as well as ac_pthread and hb_pthread macros posted by
+#   Alejandro Forero Cuervo to the autoconf macro repository. We are also
+#   grateful for the helpful feedback of numerous users.
+#
+#   Updated for Autoconf 2.68 by Daniel Richard G.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
+#   Copyright (c) 2011 Daniel Richard G. <skunk@iSKUNK.ORG>
+#   Copyright (c) 2019 Marc Stevens <marc.stevens@cwi.nl>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 31
+
+AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD])
+AC_DEFUN([AX_PTHREAD], [
+AC_REQUIRE([AC_CANONICAL_HOST])
+AC_REQUIRE([AC_PROG_CC])
+AC_REQUIRE([AC_PROG_SED])
+AC_LANG_PUSH([C])
+ax_pthread_ok=no
+
+# We used to check for pthread.h first, but this fails if pthread.h
+# requires special compiler flags (e.g. on Tru64 or Sequent).
+# It gets checked for in the link test anyway.
+
+# First of all, check if the user has set any of the PTHREAD_LIBS,
+# etcetera environment variables, and if threads linking works using
+# them:
+if test "x$PTHREAD_CFLAGS$PTHREAD_LIBS" != "x"; then
+        ax_pthread_save_CC="$CC"
+        ax_pthread_save_CFLAGS="$CFLAGS"
+        ax_pthread_save_LIBS="$LIBS"
+        AS_IF([test "x$PTHREAD_CC" != "x"], [CC="$PTHREAD_CC"])
+        AS_IF([test "x$PTHREAD_CXX" != "x"], [CXX="$PTHREAD_CXX"])
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+        AC_MSG_CHECKING([for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS])
+        AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_join])], [ax_pthread_ok=yes])
+        AC_MSG_RESULT([$ax_pthread_ok])
+        if test "x$ax_pthread_ok" = "xno"; then
+                PTHREAD_LIBS=""
+                PTHREAD_CFLAGS=""
+        fi
+        CC="$ax_pthread_save_CC"
+        CFLAGS="$ax_pthread_save_CFLAGS"
+        LIBS="$ax_pthread_save_LIBS"
+fi
+
+# We must check for the threads library under a number of different
+# names; the ordering is very important because some systems
+# (e.g. DEC) have both -lpthread and -lpthreads, where one of the
+# libraries is broken (non-POSIX).
+
+# Create a list of thread flags to try. Items with a "," contain both
+# C compiler flags (before ",") and linker flags (after ","). Other items
+# starting with a "-" are C compiler flags, and remaining items are
+# library names, except for "none" which indicates that we try without
+# any flags at all, and "pthread-config" which is a program returning
+# the flags for the Pth emulation library.
+
+ax_pthread_flags="pthreads none -Kthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config"
+
+# The ordering *is* (sometimes) important.  Some notes on the
+# individual items follow:
+
+# pthreads: AIX (must check this before -lpthread)
+# none: in case threads are in libc; should be tried before -Kthread and
+#       other compiler flags to prevent continual compiler warnings
+# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
+# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads), Tru64
+#           (Note: HP C rejects this with "bad form for `-t' option")
+# -pthreads: Solaris/gcc (Note: HP C also rejects)
+# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
+#      doesn't hurt to check since this sometimes defines pthreads and
+#      -D_REENTRANT too), HP C (must be checked before -lpthread, which
+#      is present but should not be used directly; and before -mthreads,
+#      because the compiler interprets this as "-mt" + "-hreads")
+# -mthreads: Mingw32/gcc, Lynx/gcc
+# pthread: Linux, etcetera
+# --thread-safe: KAI C++
+# pthread-config: use pthread-config program (for GNU Pth library)
+
+case $host_os in
+
+        freebsd*)
+
+        # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
+        # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
+
+        ax_pthread_flags="-kthread lthread $ax_pthread_flags"
+        ;;
+
+        hpux*)
+
+        # From the cc(1) man page: "[-mt] Sets various -D flags to enable
+        # multi-threading and also sets -lpthread."
+
+        ax_pthread_flags="-mt -pthread pthread $ax_pthread_flags"
+        ;;
+
+        openedition*)
+
+        # IBM z/OS requires a feature-test macro to be defined in order to
+        # enable POSIX threads at all, so give the user a hint if this is
+        # not set. (We don't define these ourselves, as they can affect
+        # other portions of the system API in unpredictable ways.)
+
+        AC_EGREP_CPP([AX_PTHREAD_ZOS_MISSING],
+            [
+#            if !defined(_OPEN_THREADS) && !defined(_UNIX03_THREADS)
+             AX_PTHREAD_ZOS_MISSING
+#            endif
+            ],
+            [AC_MSG_WARN([IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support.])])
+        ;;
+
+        solaris*)
+
+        # On Solaris (at least, for some versions), libc contains stubbed
+        # (non-functional) versions of the pthreads routines, so link-based
+        # tests will erroneously succeed. (N.B.: The stubs are missing
+        # pthread_cleanup_push, or rather a function called by this macro,
+        # so we could check for that, but who knows whether they'll stub
+        # that too in a future libc.)  So we'll check first for the
+        # standard Solaris way of linking pthreads (-mt -lpthread).
+
+        ax_pthread_flags="-mt,-lpthread pthread $ax_pthread_flags"
+        ;;
+esac
+
+# Are we compiling with Clang?
+
+AC_CACHE_CHECK([whether $CC is Clang],
+    [ax_cv_PTHREAD_CLANG],
+    [ax_cv_PTHREAD_CLANG=no
+     # Note that Autoconf sets GCC=yes for Clang as well as GCC
+     if test "x$GCC" = "xyes"; then
+        AC_EGREP_CPP([AX_PTHREAD_CC_IS_CLANG],
+            [/* Note: Clang 2.7 lacks __clang_[a-z]+__ */
+#            if defined(__clang__) && defined(__llvm__)
+             AX_PTHREAD_CC_IS_CLANG
+#            endif
+            ],
+            [ax_cv_PTHREAD_CLANG=yes])
+     fi
+    ])
+ax_pthread_clang="$ax_cv_PTHREAD_CLANG"
+
+
+# GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC)
+
+# Note that for GCC and Clang -pthread generally implies -lpthread,
+# except when -nostdlib is passed.
+# This is problematic using libtool to build C++ shared libraries with pthread:
+# [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25460
+# [2] https://bugzilla.redhat.com/show_bug.cgi?id=661333
+# [3] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=468555
+# To solve this, first try -pthread together with -lpthread for GCC
+
+AS_IF([test "x$GCC" = "xyes"],
+      [ax_pthread_flags="-pthread,-lpthread -pthread -pthreads $ax_pthread_flags"])
+
+# Clang takes -pthread (never supported any other flag), but we'll try with -lpthread first
+
+AS_IF([test "x$ax_pthread_clang" = "xyes"],
+      [ax_pthread_flags="-pthread,-lpthread -pthread"])
+
+
+# The presence of a feature test macro requesting re-entrant function
+# definitions is, on some systems, a strong hint that pthreads support is
+# correctly enabled
+
+case $host_os in
+        darwin* | hpux* | linux* | osf* | solaris*)
+        ax_pthread_check_macro="_REENTRANT"
+        ;;
+
+        aix*)
+        ax_pthread_check_macro="_THREAD_SAFE"
+        ;;
+
+        *)
+        ax_pthread_check_macro="--"
+        ;;
+esac
+AS_IF([test "x$ax_pthread_check_macro" = "x--"],
+      [ax_pthread_check_cond=0],
+      [ax_pthread_check_cond="!defined($ax_pthread_check_macro)"])
+
+
+if test "x$ax_pthread_ok" = "xno"; then
+for ax_pthread_try_flag in $ax_pthread_flags; do
+
+        case $ax_pthread_try_flag in
+                none)
+                AC_MSG_CHECKING([whether pthreads work without any flags])
+                ;;
+
+                *,*)
+                PTHREAD_CFLAGS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\1/"`
+                PTHREAD_LIBS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\2/"`
+                AC_MSG_CHECKING([whether pthreads work with "$PTHREAD_CFLAGS" and "$PTHREAD_LIBS"])
+                ;;
+
+                -*)
+                AC_MSG_CHECKING([whether pthreads work with $ax_pthread_try_flag])
+                PTHREAD_CFLAGS="$ax_pthread_try_flag"
+                ;;
+
+                pthread-config)
+                AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no])
+                AS_IF([test "x$ax_pthread_config" = "xno"], [continue])
+                PTHREAD_CFLAGS="`pthread-config --cflags`"
+                PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
+                ;;
+
+                *)
+                AC_MSG_CHECKING([for the pthreads library -l$ax_pthread_try_flag])
+                PTHREAD_LIBS="-l$ax_pthread_try_flag"
+                ;;
+        esac
+
+        ax_pthread_save_CFLAGS="$CFLAGS"
+        ax_pthread_save_LIBS="$LIBS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+
+        # Check for various functions.  We must include pthread.h,
+        # since some functions may be macros.  (On the Sequent, we
+        # need a special flag -Kthread to make this header compile.)
+        # We check for pthread_join because it is in -lpthread on IRIX
+        # while pthread_create is in libc.  We check for pthread_attr_init
+        # due to DEC craziness with -lpthreads.  We check for
+        # pthread_cleanup_push because it is one of the few pthread
+        # functions on Solaris that doesn't have a non-functional libc stub.
+        # We try pthread_create on general principles.
+
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>
+#                       if $ax_pthread_check_cond
+#                        error "$ax_pthread_check_macro must be defined"
+#                       endif
+                        static void *some_global = NULL;
+                        static void routine(void *a)
+                          {
+                             /* To avoid any unused-parameter or
+                                unused-but-set-parameter warning.  */
+                             some_global = a;
+                          }
+                        static void *start_routine(void *a) { return a; }],
+                       [pthread_t th; pthread_attr_t attr;
+                        pthread_create(&th, 0, start_routine, 0);
+                        pthread_join(th, 0);
+                        pthread_attr_init(&attr);
+                        pthread_cleanup_push(routine, 0);
+                        pthread_cleanup_pop(0) /* ; */])],
+            [ax_pthread_ok=yes],
+            [])
+
+        CFLAGS="$ax_pthread_save_CFLAGS"
+        LIBS="$ax_pthread_save_LIBS"
+
+        AC_MSG_RESULT([$ax_pthread_ok])
+        AS_IF([test "x$ax_pthread_ok" = "xyes"], [break])
+
+        PTHREAD_LIBS=""
+        PTHREAD_CFLAGS=""
+done
+fi
+
+
+# Clang needs special handling, because older versions handle the -pthread
+# option in a rather... idiosyncratic way
+
+if test "x$ax_pthread_clang" = "xyes"; then
+
+        # Clang takes -pthread; it has never supported any other flag
+
+        # (Note 1: This will need to be revisited if a system that Clang
+        # supports has POSIX threads in a separate library.  This tends not
+        # to be the way of modern systems, but it's conceivable.)
+
+        # (Note 2: On some systems, notably Darwin, -pthread is not needed
+        # to get POSIX threads support; the API is always present and
+        # active.  We could reasonably leave PTHREAD_CFLAGS empty.  But
+        # -pthread does define _REENTRANT, and while the Darwin headers
+        # ignore this macro, third-party headers might not.)
+
+        # However, older versions of Clang make a point of warning the user
+        # that, in an invocation where only linking and no compilation is
+        # taking place, the -pthread option has no effect ("argument unused
+        # during compilation").  They expect -pthread to be passed in only
+        # when source code is being compiled.
+        #
+        # Problem is, this is at odds with the way Automake and most other
+        # C build frameworks function, which is that the same flags used in
+        # compilation (CFLAGS) are also used in linking.  Many systems
+        # supported by AX_PTHREAD require exactly this for POSIX threads
+        # support, and in fact it is often not straightforward to specify a
+        # flag that is used only in the compilation phase and not in
+        # linking.  Such a scenario is extremely rare in practice.
+        #
+        # Even though use of the -pthread flag in linking would only print
+        # a warning, this can be a nuisance for well-run software projects
+        # that build with -Werror.  So if the active version of Clang has
+        # this misfeature, we search for an option to squash it.
+
+        AC_CACHE_CHECK([whether Clang needs flag to prevent "argument unused" warning when linking with -pthread],
+            [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG],
+            [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown
+             # Create an alternate version of $ac_link that compiles and
+             # links in two steps (.c -> .o, .o -> exe) instead of one
+             # (.c -> exe), because the warning occurs only in the second
+             # step
+             ax_pthread_save_ac_link="$ac_link"
+             ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g'
+             ax_pthread_link_step=`AS_ECHO(["$ac_link"]) | sed "$ax_pthread_sed"`
+             ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)"
+             ax_pthread_save_CFLAGS="$CFLAGS"
+             for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do
+                AS_IF([test "x$ax_pthread_try" = "xunknown"], [break])
+                CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS"
+                ac_link="$ax_pthread_save_ac_link"
+                AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])],
+                    [ac_link="$ax_pthread_2step_ac_link"
+                     AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])],
+                         [break])
+                    ])
+             done
+             ac_link="$ax_pthread_save_ac_link"
+             CFLAGS="$ax_pthread_save_CFLAGS"
+             AS_IF([test "x$ax_pthread_try" = "x"], [ax_pthread_try=no])
+             ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try"
+            ])
+
+        case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in
+                no | unknown) ;;
+                *) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;;
+        esac
+
+fi # $ax_pthread_clang = yes
+
+
+
+# Various other checks:
+if test "x$ax_pthread_ok" = "xyes"; then
+        ax_pthread_save_CFLAGS="$CFLAGS"
+        ax_pthread_save_LIBS="$LIBS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+
+        # Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
+        AC_CACHE_CHECK([for joinable pthread attribute],
+            [ax_cv_PTHREAD_JOINABLE_ATTR],
+            [ax_cv_PTHREAD_JOINABLE_ATTR=unknown
+             for ax_pthread_attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
+                 AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>],
+                                                 [int attr = $ax_pthread_attr; return attr /* ; */])],
+                                [ax_cv_PTHREAD_JOINABLE_ATTR=$ax_pthread_attr; break],
+                                [])
+             done
+            ])
+        AS_IF([test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xunknown" && \
+               test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xPTHREAD_CREATE_JOINABLE" && \
+               test "x$ax_pthread_joinable_attr_defined" != "xyes"],
+              [AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE],
+                                  [$ax_cv_PTHREAD_JOINABLE_ATTR],
+                                  [Define to necessary symbol if this constant
+                                   uses a non-standard name on your system.])
+               ax_pthread_joinable_attr_defined=yes
+              ])
+
+        AC_CACHE_CHECK([whether more special flags are required for pthreads],
+            [ax_cv_PTHREAD_SPECIAL_FLAGS],
+            [ax_cv_PTHREAD_SPECIAL_FLAGS=no
+             case $host_os in
+             solaris*)
+             ax_cv_PTHREAD_SPECIAL_FLAGS="-D_POSIX_PTHREAD_SEMANTICS"
+             ;;
+             esac
+            ])
+        AS_IF([test "x$ax_cv_PTHREAD_SPECIAL_FLAGS" != "xno" && \
+               test "x$ax_pthread_special_flags_added" != "xyes"],
+              [PTHREAD_CFLAGS="$ax_cv_PTHREAD_SPECIAL_FLAGS $PTHREAD_CFLAGS"
+               ax_pthread_special_flags_added=yes])
+
+        AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT],
+            [ax_cv_PTHREAD_PRIO_INHERIT],
+            [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]],
+                                             [[int i = PTHREAD_PRIO_INHERIT;
+                                               return i;]])],
+                            [ax_cv_PTHREAD_PRIO_INHERIT=yes],
+                            [ax_cv_PTHREAD_PRIO_INHERIT=no])
+            ])
+        AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" && \
+               test "x$ax_pthread_prio_inherit_defined" != "xyes"],
+              [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.])
+               ax_pthread_prio_inherit_defined=yes
+              ])
+
+        CFLAGS="$ax_pthread_save_CFLAGS"
+        LIBS="$ax_pthread_save_LIBS"
+
+        # More AIX lossage: compile with *_r variant
+        if test "x$GCC" != "xyes"; then
+            case $host_os in
+                aix*)
+                AS_CASE(["x/$CC"],
+                    [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6],
+                    [#handle absolute path differently from PATH based program lookup
+                     AS_CASE(["x$CC"],
+                         [x/*],
+                         [
+			   AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])
+			   AS_IF([test "x${CXX}" != "x"], [AS_IF([AS_EXECUTABLE_P([${CXX}_r])],[PTHREAD_CXX="${CXX}_r"])])
+			 ],
+                         [
+			   AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])
+			   AS_IF([test "x${CXX}" != "x"], [AC_CHECK_PROGS([PTHREAD_CXX],[${CXX}_r],[$CXX])])
+			 ]
+                     )
+                    ])
+                ;;
+            esac
+        fi
+fi
+
+test -n "$PTHREAD_CC" || PTHREAD_CC="$CC"
+test -n "$PTHREAD_CXX" || PTHREAD_CXX="$CXX"
+
+AC_SUBST([PTHREAD_LIBS])
+AC_SUBST([PTHREAD_CFLAGS])
+AC_SUBST([PTHREAD_CC])
+AC_SUBST([PTHREAD_CXX])
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test "x$ax_pthread_ok" = "xyes"; then
+        ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1])
+        :
+else
+        ax_pthread_ok=no
+        $2
+fi
+AC_LANG_POP
+])dnl AX_PTHREAD
diff --git a/sys/contrib/openzfs/config/kernel-fs-parse.m4 b/sys/contrib/openzfs/config/kernel-fs-parse.m4
new file mode 100644
index 00000000000..7a6ffa77238
--- /dev/null
+++ b/sys/contrib/openzfs/config/kernel-fs-parse.m4
@@ -0,0 +1,34 @@
+dnl # SPDX-License-Identifier: CDDL-1.0
+dnl #
+dnl # 5.6 API change
+dnl # Before 5.6, fs_parse() took a struct fs_parameter_description
+dnl # which wraps the parameter specs with name and enum pointers. From 5.6,
+dnl # the description struct was removed and fs_parse() accepts the
+dnl # fs_parameter_spec directly.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_FS_PARSE], [
+	ZFS_LINUX_TEST_SRC([fs_parse], [
+		#include <linux/fs_context.h>
+		#include <linux/fs_parser.h>
+	],[
+		static const struct fs_parameter_spec specs[] = {
+			{}
+		};
+		int test __attribute__ ((unused));
+		struct fs_context *fc __attribute__ ((unused)) = NULL;
+		struct fs_parameter param __attribute__ ((unused));
+		struct fs_parse_result result __attribute__ ((unused));
+		test = fs_parse(fc, specs, &param, &result);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_FS_PARSE], [
+	AC_MSG_CHECKING([whether fs_parse() takes fs_parameter_spec directly])
+	ZFS_LINUX_TEST_RESULT([fs_parse], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_FS_PARSE_TAKES_SPEC, 1,
+		    [fs_parse() takes fs_parameter_spec directly])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4
index b40e34d373f..55f40767567 100644
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@@ -78,6 +78,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
 	ZFS_AC_KERNEL_SRC_SECURITY_INODE
 	ZFS_AC_KERNEL_SRC_FS_CONTEXT
+	ZFS_AC_KERNEL_SRC_FS_PARSE
 	ZFS_AC_KERNEL_SRC_SB_DYING
 	ZFS_AC_KERNEL_SRC_SET_NLINK
 	ZFS_AC_KERNEL_SRC_SGET
@@ -153,9 +154,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 			;;
 	esac
 
-	AC_MSG_CHECKING([for available kernel interfaces])
-	ZFS_LINUX_TEST_COMPILE_ALL([kabi])
-	AC_MSG_RESULT([done])
+	ZFS_LINUX_TEST_COMPILE_ALL([kabi], [for available kernel interfaces])
 ])
 
 dnl #
@@ -203,6 +202,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_TRUNCATE_SETSIZE
 	ZFS_AC_KERNEL_SECURITY_INODE
 	ZFS_AC_KERNEL_FS_CONTEXT
+	ZFS_AC_KERNEL_FS_PARSE
 	ZFS_AC_KERNEL_SB_DYING
 	ZFS_AC_KERNEL_SET_NLINK
 	ZFS_AC_KERNEL_SGET
@@ -753,6 +753,108 @@ AC_DEFUN([ZFS_LINUX_TEST_MODPOST], [
 	], [], [yes])
 ])
 
+dnl #
+dnl # Progress output for ZFS_LINUX_TEST_COMPILE_ALL
+dnl #
+dnl # From clean, we currently have ~250 kernel tests to compile. This can
+dnl # take anywhere from a few seconds to a few minutes while we wait for
+dnl # the module build invocation to complete (see ZFS_LINUX_COMPILE).
+dnl #
+dnl # To show some progress in the main set of tests, we start a background
+dnl # job to monitor the build progress and update the output.
+dnl #
+AC_DEFUN([_ZFS_LINUX_TEST_COMPILE_PROGRESS_START], [
+	dnl # normal "checking for..." output
+	AC_MSG_CHECKING([$2])
+
+	dnl # don't start the background job if configure was called with
+	dnl # --silent or --quiet, or if configure's output stream is not
+	dnl # attached to a terminal
+	AS_IF([test "x$silent" != "xyes" -a -t AS_MESSAGE_FD], [
+		dnl # save "checking" message for cleanup later
+		_zfs_linux_test_progress_text="$2"
+
+		dnl # new shell job in background
+		(
+			dnl # ZFS_LINUX_CONFTEST_MAKEFILE adds one line per
+			dnl # test to the top Makefile, so the line count
+			dnl # is our target
+			total=$(wc -l < $1/Makefile)
+			count=0
+
+			dnl # eject if our parent process has gone away. this
+			dnl # is protection against the parent being killed.
+			dnl # (we can't use trap because autoconf generates
+			dnl # that and doesn't provide an easy way to hook it).
+			while kill -0 $$ 2>/dev/null ; do
+
+				dnl # ZFS_LINUX_TEST_COMPILE_ALL has a short
+				dnl # second stage for modpost, where build.log
+				dnl # recreated. we make some effort to both
+				dnl # detect that and handle it, mostly by
+				dnl # making sure the counter never goes
+				dnl # backwards.
+				if test "$count" -lt "$total" ; then
+					dnl # if build.log went away, then
+					dnl # we never got to do a last count,
+					dnl # so we can assume they're all
+					dnl # finished and just bump the count
+					dnl # to the total
+					if ! test -f $1/build.log ; then
+						count=$total
+					else
+						dnl # look for compilation lines
+						dnl # (CC) for .o files that
+						dnl # are in a dir (so not
+						dnl # whole-of-build artifacts)
+						dnl # and only have a a single
+						dnl # period (so not .mod.o
+						dnl # link artifacts)
+						count_n=$(awk '/CC/ && /\/[[^\.]]+\.o$/ { c++ } END { print c }' $1/build.log 2>/dev/null)
+						if test "x$count_n" != "x" ; then
+							dnl # empty output
+							dnl # means awk failed,
+							dnl # likely build.log
+							dnl # went away. use
+							dnl # the current count
+							count=$count_n
+						fi
+					fi
+
+					dnl # re-output the entire message with
+					dnl # the new counts
+					printf '\rchecking %s... %d/%d' "$2" "$count" "$total" >&6
+				fi
+
+				dnl # yield before loop
+				sleep 0.5
+			done
+		) &
+
+		dnl # save the pid so we can kill it later
+		_zfs_linux_test_progress_pid=$!
+	])
+])
+
+AC_DEFUN([_ZFS_LINUX_TEST_COMPILE_PROGRESS_DONE], [
+	dnl # only do cleanup if we actually started the job
+	AS_IF([test "x$_zfs_linux_test_progress_pid" != "x"], [
+		dnl # kill it; no-op if it already died
+		kill $_zfs_linux_test_progress_pid 2>/dev/null
+		dnl # wait for it to really go away and clean it up
+		wait $_zfs_linux_test_progress_pid 2>/dev/null
+		dnl # reprint the original checking line. the control code
+		dnl # is ANSI "erase entire line"
+		printf '\r\033\1332Kchecking %s... ' "$_zfs_linux_test_progress_text" >&AS_MESSAGE_FD
+		dnl # cleanup for next run
+		_zfs_linux_test_progress_pid=
+		_zfs_linux_test_progress_text=
+	])
+
+	dnl # normal final output for screen and config.log
+	AC_MSG_RESULT([$1])
+])
+
 dnl #
 dnl # Perform the compilation of the test cases in two phases.
 dnl #
@@ -771,6 +873,10 @@ dnl # The maximum allowed parallelism can be controlled by setting the
 dnl # TEST_JOBS environment variable.  Otherwise, it default to $(nproc).
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [
+	AS_IF([test "x$2" != "x"], [
+		_ZFS_LINUX_TEST_COMPILE_PROGRESS_START([build], [$2])
+	])
+
 	dnl # Phase 1 - Compilation only, final linking is skipped.
 	ZFS_LINUX_TEST_COMPILE([$1], [build])
 
@@ -818,6 +924,10 @@ AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [
 			])
 		done
 	])
+
+	AS_IF([test "x$2" != "x"], [
+		_ZFS_LINUX_TEST_COMPILE_PROGRESS_DONE([done])
+	])
 ])
 
 dnl #
diff --git a/sys/contrib/openzfs/config/zfs-build.m4 b/sys/contrib/openzfs/config/zfs-build.m4
index 1e1485c4cf4..7aabad3868b 100644
--- a/sys/contrib/openzfs/config/zfs-build.m4
+++ b/sys/contrib/openzfs/config/zfs-build.m4
@@ -39,6 +39,18 @@ dnl # (If INVARIANTS is detected, we need to force DEBUG, or strange panics
 dnl # can ensue.)
 dnl #
 AC_DEFUN([ZFS_AC_DEBUG], [
+	dnl #
+	dnl # In the Linux kernel copy-builtin build, assertion/debug support
+	dnl # is selected by CONFIG_ZFS_DEBUG (Kconfig).
+	dnl #
+	AH_BOTTOM([
+#ifdef CONFIG_ZFS
+#undef ZFS_DEBUG
+#ifdef CONFIG_ZFS_DEBUG
+#define ZFS_DEBUG 1
+#endif
+#endif])
+
 	AC_MSG_CHECKING([whether assertion support will be enabled])
 	AC_ARG_ENABLE([debug],
 		[AS_HELP_STRING([--enable-debug],
diff --git a/sys/contrib/openzfs/configure.ac b/sys/contrib/openzfs/configure.ac
index 3757b5e2cac..74e4ab3bdf8 100644
--- a/sys/contrib/openzfs/configure.ac
+++ b/sys/contrib/openzfs/configure.ac
@@ -54,6 +54,7 @@ AC_PROG_LN_S
 PKG_PROG_PKG_CONFIG
 AM_PROG_AS
 AM_PROG_CC_C_O
+AX_PTHREAD
 AX_CODE_COVERAGE
 _AM_PROG_TAR(pax)
 
diff --git a/sys/contrib/openzfs/contrib/debian/not-installed b/sys/contrib/openzfs/contrib/debian/not-installed
index 9c08da5a6a7..efe17c90c3b 100644
--- a/sys/contrib/openzfs/contrib/debian/not-installed
+++ b/sys/contrib/openzfs/contrib/debian/not-installed
@@ -2,7 +2,6 @@ usr/bin/zarcsummary.py
 usr/share/zfs/zfs-helpers.sh
 etc/default/zfs
 etc/init.d
-etc/sudoers.d
 etc/zfs/vdev_id.conf.alias.example
 etc/zfs/vdev_id.conf.multipath.example
 etc/zfs/vdev_id.conf.sas_direct.example
diff --git a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
index d5513b7a43f..5477c7dc611 100644
--- a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
+++ b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
@@ -840,27 +840,41 @@ zfs_key_config_modify_session_counter(pam_handle_t *pamh,
 		    errno);
 		return (-1);
 	}
-	if (chown(runtime_path, 0, 0) != 0) {
-		pam_syslog(pamh, LOG_ERR, "Can't chown runtime path: %d",
-		    errno);
+	const int runtime_fd = open(runtime_path,
+	    O_RDONLY | O_CLOEXEC | O_NOFOLLOW | O_DIRECTORY);
+	if (runtime_fd < 0) {
+		pam_syslog(pamh, LOG_ERR, "Can't open runtime path: %d", errno);
 		return (-1);
 	}
-	if (chmod(runtime_path, S_IRWXU) != 0) {
+	if (fchown(runtime_fd, 0, 0) != 0) {
+		pam_syslog(pamh, LOG_ERR, "Can't chown runtime path: %d",
+		    errno);
+		close(runtime_fd);
+		return (-1);
+	}
+	if (fchmod(runtime_fd, S_IRWXU) != 0) {
 		pam_syslog(pamh, LOG_ERR, "Can't chmod runtime path: %d",
 		    errno);
+		close(runtime_fd);
 		return (-1);
 	}
 
 	char *counter_path;
-	if (asprintf(&counter_path, "%s/%u", runtime_path, config->uid) == -1)
+	if (asprintf(&counter_path, "%u", config->uid) == -1) {
+		close(runtime_fd);
 		return (-1);
+	}
 
-	const int fd = open(counter_path,
+	const int fd = openat(runtime_fd, counter_path,
 	    O_RDWR | O_CLOEXEC | O_CREAT | O_NOFOLLOW,
 	    S_IRUSR | S_IWUSR);
+	int ret = errno;
+
 	free(counter_path);
+	close(runtime_fd);
+
 	if (fd < 0) {
-		pam_syslog(pamh, LOG_ERR, "Can't open counter file: %d", errno);
+		pam_syslog(pamh, LOG_ERR, "Can't open counter file: %d", ret);
 		return (-1);
 	}
 	if (flock(fd, LOCK_EX) != 0) {
@@ -871,7 +885,6 @@ zfs_key_config_modify_session_counter(pam_handle_t *pamh,
 	char counter[20];
 	char *pos = counter;
 	int remaining = sizeof (counter) - 1;
-	int ret;
 	counter[sizeof (counter) - 1] = 0;
 	while (remaining > 0 && (ret = read(fd, pos, remaining)) > 0) {
 		remaining -= ret;
diff --git a/sys/contrib/openzfs/copy-builtin b/sys/contrib/openzfs/copy-builtin
index 9a430bfb289..d412437f556 100755
--- a/sys/contrib/openzfs/copy-builtin
+++ b/sys/contrib/openzfs/copy-builtin
@@ -43,6 +43,17 @@ config ZFS
 
 	  To compile this file system support as a module, choose M here.
 
+	  If unsure, say N.
+
+config ZFS_DEBUG
+	bool "ZFS debugging"
+	depends on ZFS
+	help
+	  Enable ZFS debugging. This turns on all ASSERT() assertions,
+	  enables additional debug-only code paths, and promotes
+	  compiler warnings to errors. This should only be enabled for
+	  development or troubleshooting.
+
 	  If unsure, say N.
 EOF
 
diff --git a/sys/contrib/openzfs/etc/Makefile.am b/sys/contrib/openzfs/etc/Makefile.am
index 58b3cf563b6..5168c3cde13 100644
--- a/sys/contrib/openzfs/etc/Makefile.am
+++ b/sys/contrib/openzfs/etc/Makefile.am
@@ -1,10 +1,4 @@
 # SPDX-License-Identifier: CDDL-1.0
-sudoersddir = $(sysconfdir)/sudoers.d
-sudoersd_DATA = \
-	%D%/sudoers.d/zfs
-
-dist_noinst_DATA += $(sudoersd_DATA)
-
 
 sysconf_zfsdir = $(sysconfdir)/zfs
 
@@ -88,8 +82,6 @@ systemdgenerator_PROGRAMS = \
 %C%_systemd_system_generators_zfs_mount_generator_LDADD = \
 	libzfs.la
 
-%C%_systemd_system_generators_zfs_mount_generator_LDFLAGS = -pthread
-
 CPPCHECKTARGETS += $(systemdgenerator_PROGRAMS)
 endif
 
diff --git a/sys/contrib/openzfs/etc/sudoers.d/zfs b/sys/contrib/openzfs/etc/sudoers.d/zfs
deleted file mode 100644
index 82a25ba81ec..00000000000
--- a/sys/contrib/openzfs/etc/sudoers.d/zfs
+++ /dev/null
@@ -1,9 +0,0 @@
-##
-## Allow any user to run `zpool iostat/status -c smart` in order
-## to read basic SMART health statistics for a pool.
-##
-## CAUTION: Any syntax error introduced here will break sudo.
-## Editing with 'visudo' is recommended: visudo -f  /etc/sudoers.d/zfs 
-##
-
-# ALL ALL = (root) NOPASSWD: /usr/sbin/smartctl -a /dev/[hsv]d[a-z0-9]*
diff --git a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/arc_os.h b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/arc_os.h
index ad2aba23b90..6334d453f48 100644
--- a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/arc_os.h
+++ b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/arc_os.h
@@ -29,6 +29,5 @@
 #define	_SYS_ARC_OS_H
 
 int param_set_arc_free_target(SYSCTL_HANDLER_ARGS);
-int param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS);
 
 #endif
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h b/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h
index c883836c2f8..64361bea90e 100644
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h
@@ -30,7 +30,6 @@
 #include <linux/sched.h>
 
 typedef enum {
-	RW_DRIVER	= 2,
 	RW_DEFAULT	= 4,
 	RW_NOLOCKDEP	= 5
 } krw_type_t;
@@ -75,20 +74,35 @@ spl_rw_set_type(krwlock_t *rwp, krw_type_t type)
 {
 	rwp->rw_type = type;
 }
+
+static inline void
+spl_rw_lockdep_off(void)
+{
+	lockdep_off();
+}
+
+static inline void
+spl_rw_lockdep_on(void)
+{
+	lockdep_on();
+}
+
 static inline void
 spl_rw_lockdep_off_maybe(krwlock_t *rwp)		\
 {							\
 	if (rwp && rwp->rw_type == RW_NOLOCKDEP)	\
-		lockdep_off();				\
+		spl_rw_lockdep_off();			\
 }
 static inline void
 spl_rw_lockdep_on_maybe(krwlock_t *rwp)			\
 {							\
 	if (rwp && rwp->rw_type == RW_NOLOCKDEP)	\
-		lockdep_on();				\
+		spl_rw_lockdep_on();			\
 }
 #else  /* CONFIG_LOCKDEP */
 #define	spl_rw_set_type(rwp, type)
+#define	spl_rw_lockdep_off()
+#define	spl_rw_lockdep_on()
 #define	spl_rw_lockdep_off_maybe(rwp)
 #define	spl_rw_lockdep_on_maybe(rwp)
 #endif /* CONFIG_LOCKDEP */
@@ -117,6 +131,56 @@ RW_READ_HELD(krwlock_t *rwp)
  * will be correctly located in the users code which is important
  * for the built in kernel lock analysis tools
  */
+#define	spl_rw_tryenter_impl(rwp, rw) /* CSTYLED */			\
+({									\
+	int _rc_ = 0;							\
+									\
+	switch (rw) {							\
+	case RW_READER:							\
+		_rc_ = down_read_trylock(SEM(rwp));			\
+		break;							\
+	case RW_WRITER:							\
+		if ((_rc_ = down_write_trylock(SEM(rwp))))		\
+			spl_rw_set_owner(rwp);				\
+		break;							\
+	default:							\
+		VERIFY(0);						\
+	}								\
+	_rc_;								\
+})
+
+#define	spl_rw_enter_impl(rwp, rw) /* CSTYLED */			\
+({									\
+	switch (rw) {							\
+	case RW_READER:							\
+		down_read(SEM(rwp));					\
+		break;							\
+	case RW_WRITER:							\
+		down_write(SEM(rwp));					\
+		spl_rw_set_owner(rwp);					\
+		break;							\
+	default:							\
+		VERIFY(0);						\
+	}								\
+})
+
+#define	spl_rw_exit_impl(rwp) /* CSTYLED */				\
+({									\
+	if (RW_WRITE_HELD(rwp)) {					\
+		spl_rw_clear_owner(rwp);				\
+		up_write(SEM(rwp));					\
+	} else {							\
+		ASSERT(RW_READ_HELD(rwp));				\
+		up_read(SEM(rwp));					\
+	}								\
+})
+
+#define	spl_rw_downgrade_impl(rwp) /* CSTYLED */			\
+({									\
+	spl_rw_clear_owner(rwp);					\
+	downgrade_write(SEM(rwp));					\
+})
+
 #define	rw_init(rwp, name, type, arg) /* CSTYLED */			\
 ({									\
 	static struct lock_class_key __key;				\
@@ -140,60 +204,60 @@ RW_READ_HELD(krwlock_t *rwp)
 
 #define	rw_tryenter(rwp, rw) /* CSTYLED */				\
 ({									\
-	int _rc_ = 0;							\
-									\
 	spl_rw_lockdep_off_maybe(rwp);					\
-	switch (rw) {							\
-	case RW_READER:							\
-		_rc_ = down_read_trylock(SEM(rwp));			\
-		break;							\
-	case RW_WRITER:							\
-		if ((_rc_ = down_write_trylock(SEM(rwp))))		\
-			spl_rw_set_owner(rwp);				\
-		break;							\
-	default:							\
-		VERIFY(0);						\
-	}								\
+	int _rc_ = spl_rw_tryenter_impl(rwp, rw);			\
 	spl_rw_lockdep_on_maybe(rwp);					\
 	_rc_;								\
 })
 
+#define	rw_tryenter_nolockdep(rwp, rw) /* CSTYLED */			\
+({									\
+	spl_rw_lockdep_off();						\
+	int _rc_ = spl_rw_tryenter_impl(rwp, rw);			\
+	spl_rw_lockdep_on();						\
+	_rc_;								\
+})
+
 #define	rw_enter(rwp, rw) /* CSTYLED */					\
 ({									\
 	spl_rw_lockdep_off_maybe(rwp);					\
-	switch (rw) {							\
-	case RW_READER:							\
-		down_read(SEM(rwp));					\
-		break;							\
-	case RW_WRITER:							\
-		down_write(SEM(rwp));					\
-		spl_rw_set_owner(rwp);					\
-		break;							\
-	default:							\
-		VERIFY(0);						\
-	}								\
+	spl_rw_enter_impl(rwp, rw);					\
 	spl_rw_lockdep_on_maybe(rwp);					\
 })
 
+#define	rw_enter_nolockdep(rwp, rw) /* CSTYLED */			\
+({									\
+	spl_rw_lockdep_off();						\
+	spl_rw_enter_impl(rwp, rw);					\
+	spl_rw_lockdep_on();						\
+})
+
 #define	rw_exit(rwp) /* CSTYLED */					\
 ({									\
 	spl_rw_lockdep_off_maybe(rwp);					\
-	if (RW_WRITE_HELD(rwp)) {					\
-		spl_rw_clear_owner(rwp);				\
-		up_write(SEM(rwp));					\
-	} else {							\
-		ASSERT(RW_READ_HELD(rwp));				\
-		up_read(SEM(rwp));					\
-	}								\
+	spl_rw_exit_impl(rwp);						\
 	spl_rw_lockdep_on_maybe(rwp);					\
 })
 
+#define	rw_exit_nolockdep(rwp) /* CSTYLED */				\
+({									\
+	spl_rw_lockdep_off();						\
+	spl_rw_exit_impl(rwp);						\
+	spl_rw_lockdep_on();						\
+})
+
 #define	rw_downgrade(rwp) /* CSTYLED */					\
 ({									\
 	spl_rw_lockdep_off_maybe(rwp);					\
-	spl_rw_clear_owner(rwp);					\
-	downgrade_write(SEM(rwp));					\
+	spl_rw_downgrade_impl(rwp);					\
 	spl_rw_lockdep_on_maybe(rwp);					\
 })
 
+#define	rw_downgrade_nolockdep(rwp) /* CSTYLED */			\
+({									\
+	spl_rw_lockdep_off();						\
+	spl_rw_downgrade_impl(rwp);					\
+	spl_rw_lockdep_on();						\
+})
+
 #endif /* _SPL_RWLOCK_H */
diff --git a/sys/contrib/openzfs/include/sys/arc.h b/sys/contrib/openzfs/include/sys/arc.h
index 2b3668c6086..d6f025d0942 100644
--- a/sys/contrib/openzfs/include/sys/arc.h
+++ b/sys/contrib/openzfs/include/sys/arc.h
@@ -95,8 +95,7 @@ typedef void arc_prune_func_t(uint64_t bytes, void *priv);
 extern uint_t zfs_arc_average_blocksize;
 extern int l2arc_exclude_special;
 
-/* generic arc_done_func_t's which you can use */
-arc_read_done_func_t arc_bcopy_func;
+/* generic arc_done_func_t which can be used */
 arc_read_done_func_t arc_getbuf_func;
 
 /* generic arc_prune_func_t wrapper for callbacks */
diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h
index dbe712e2e73..7fbf5cee4fa 100644
--- a/sys/contrib/openzfs/include/sys/arc_impl.h
+++ b/sys/contrib/openzfs/include/sys/arc_impl.h
@@ -832,6 +832,8 @@ typedef struct arc_stats {
 	 * due to ARC_FLAG_UNCACHED being set.
 	 */
 	kstat_named_t arcstat_uncached_evictable_metadata;
+	/* Number of L2ARC devices currently attached across all pools. */
+	kstat_named_t arcstat_l2_ndev;
 	kstat_named_t arcstat_l2_hits;
 	kstat_named_t arcstat_l2_misses;
 	/*
@@ -1103,7 +1105,7 @@ extern arc_sums_t arc_sums;
 extern hrtime_t arc_growtime;
 extern boolean_t arc_warm;
 extern uint_t arc_grow_retry;
-extern uint_t arc_no_grow_shift;
+extern uint_t zfs_arc_no_grow_shift;
 extern uint_t arc_shrink_shift;
 extern kmutex_t arc_prune_mtx;
 extern list_t arc_prune_list;
@@ -1134,6 +1136,7 @@ extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);
 extern int param_set_arc_min(ZFS_MODULE_PARAM_ARGS);
 extern int param_set_arc_max(ZFS_MODULE_PARAM_ARGS);
 extern int param_set_l2arc_dwpd_limit(ZFS_MODULE_PARAM_ARGS);
+extern int param_set_arc_no_grow_shift(ZFS_MODULE_PARAM_ARGS);
 extern void l2arc_dwpd_bump_reset(void);
 
 /* used in zdb.c */
diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h
index fcef32ecf9f..8e877166ada 100644
--- a/sys/contrib/openzfs/include/sys/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fs/zfs.h
@@ -363,6 +363,7 @@ typedef enum {
 /* Small enough to not hog a whole line of printout in zpool(8). */
 #define	ZPROP_MAX_COMMENT	32
 #define	ZPROP_BOOLEAN_NA	2
+#define	ZPROP_BOOLEAN_INHERIT	2
 
 #define	ZPROP_VALUE		"value"
 #define	ZPROP_SOURCE		"source"
@@ -476,6 +477,8 @@ typedef enum {
 	VDEV_PROP_SCHEDULER,
 	VDEV_PROP_FDOMAIN,
 	VDEV_PROP_FGROUP,
+	VDEV_PROP_ALLOC_BIAS,
+	VDEV_PROP_ROTATIONAL,
 	VDEV_NUM_PROPS
 } vdev_prop_t;
 
@@ -491,6 +494,16 @@ typedef enum {
 	VDEV_SCHEDULER_OFF
 } vdev_scheduler_type_t;
 
+/*
+ * Allocation bias for top-level vdevs (alloc_bias property).
+ */
+typedef enum vdev_alloc_bias {
+	VDEV_BIAS_NONE,
+	VDEV_BIAS_LOG,		/* dedicated to ZIL data (SLOG) */
+	VDEV_BIAS_SPECIAL,	/* dedicated to ddt, metadata, and small blks */
+	VDEV_BIAS_DEDUP		/* dedicated to dedup metadata */
+} vdev_alloc_bias_t;
+
 /*
  * Dataset property functions shared between libzfs and kernel.
  */
@@ -919,6 +932,7 @@ typedef struct zpool_load_policy {
 #define	ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH	"vdev_enc_sysfs_path"
 
 #define	ZPOOL_CONFIG_WHOLE_DISK		"whole_disk"
+#define	ZPOOL_CONFIG_VDEV_ROTATIONAL	"rotational"
 #define	ZPOOL_CONFIG_ERRCOUNT		"error_count"
 #define	ZPOOL_CONFIG_NOT_PRESENT	"not_present"
 #define	ZPOOL_CONFIG_SPARES		"spares"
diff --git a/sys/contrib/openzfs/include/sys/metaslab_impl.h b/sys/contrib/openzfs/include/sys/metaslab_impl.h
index faeb96fe965..44a4d4ddf75 100644
--- a/sys/contrib/openzfs/include/sys/metaslab_impl.h
+++ b/sys/contrib/openzfs/include/sys/metaslab_impl.h
@@ -330,7 +330,7 @@ struct metaslab_group {
  *
  * As the space map grows (as a result of the appends) it will
  * eventually become space-inefficient.  When the metaslab's in-core
- * free tree is zfs_condense_pct/100 times the size of the minimal
+ * free tree is zfs_metaslab_condense_pct/100 times the size of the minimal
  * on-disk representation, we rewrite it in its minimized form.  If a
  * metaslab needs to condense then we must set the ms_condensing flag to
  * ensure that allocations are not performed on the metaslab that is
diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h
index 634594aca12..84e78f5dbc8 100644
--- a/sys/contrib/openzfs/include/sys/vdev_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_impl.h
@@ -155,14 +155,6 @@ struct vdev_queue {
 	kmutex_t	vq_lock;
 };
 
-typedef enum vdev_alloc_bias {
-	VDEV_BIAS_NONE,
-	VDEV_BIAS_LOG,		/* dedicated to ZIL data (SLOG) */
-	VDEV_BIAS_SPECIAL,	/* dedicated to ddt, metadata, and small blks */
-	VDEV_BIAS_DEDUP		/* dedicated to dedup metadata */
-} vdev_alloc_bias_t;
-
-
 /*
  * On-disk indirect vdev state.
  *
@@ -600,6 +592,7 @@ extern boolean_t vdev_log_state_valid(vdev_t *vd);
 extern int vdev_load(vdev_t *vd);
 extern int vdev_dtl_load(vdev_t *vd);
 extern void vdev_sync(vdev_t *vd, uint64_t txg);
+extern void vdev_sync_dispatch(vdev_t *vd, uint64_t txg);
 extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
 extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);
 extern void vdev_dirty_leaves(vdev_t *vd, int flags, uint64_t txg);
diff --git a/sys/contrib/openzfs/include/sys/zap.h b/sys/contrib/openzfs/include/sys/zap.h
index 66fbc1385d2..50e7079e014 100644
--- a/sys/contrib/openzfs/include/sys/zap.h
+++ b/sys/contrib/openzfs/include/sys/zap.h
@@ -24,6 +24,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright (c) 2026, TrueNAS.
  */
 
 #ifndef	_SYS_ZAP_H
@@ -121,13 +122,13 @@ typedef enum zap_flags {
 /*
  * Create a new zapobj with no attributes and return its object number.
  */
-uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
+uint64_t zap_create(objset_t *os, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
-uint64_t zap_create_dnsize(objset_t *ds, dmu_object_type_t ot,
+uint64_t zap_create_dnsize(objset_t *os, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
-uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
+uint64_t zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
-uint64_t zap_create_norm_dnsize(objset_t *ds, int normflags,
+uint64_t zap_create_norm_dnsize(objset_t *os, int normflags,
     dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,
     int dnodesize, dmu_tx_t *tx);
 uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
@@ -137,11 +138,22 @@ uint64_t zap_create_flags_dnsize(objset_t *os, int normflags,
     zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift,
     int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
     int dnodesize, dmu_tx_t *tx);
+
+/*
+ * Create a zap object and return a pointer to the newly allocated dnode via
+ * the allocated_dnode argument.  The returned dnode will be held and the
+ * caller is responsible for releasing the hold by calling dnode_rele().
+ */
 uint64_t zap_create_hold(objset_t *os, int normflags, zap_flags_t flags,
     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
     dmu_object_type_t bonustype, int bonuslen, int dnodesize,
     dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx);
 
+/*
+ * Create a new zapobj with no attributes, and add an entry to an existing
+ * zapobj with the given name as key and the object number of the new zapobj as
+ * the value. Returns the object number of the new zapobj.
+ */
 uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,
     uint64_t parent_obj, const char *name, dmu_tx_t *tx);
 uint64_t zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot,
@@ -157,20 +169,21 @@ void mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags,
  * Create a new zapobj with no attributes from the given (unallocated)
  * object number.
  */
-int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
+int zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
-int zap_create_claim_dnsize(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
+int zap_create_claim_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
-int zap_create_claim_norm(objset_t *ds, uint64_t obj,
+int zap_create_claim_norm(objset_t *os, uint64_t obj,
     int normflags, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
-int zap_create_claim_norm_dnsize(objset_t *ds, uint64_t obj,
+int zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj,
     int normflags, dmu_object_type_t ot,
     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
 
 /*
- * The zapobj passed in must be a valid ZAP object for all of the
- * following routines.
+ * All operations on a zapobj take either the the objset/objectid pair
+ * that "names" the object, or an existing dnode_t for the object. The
+ * zapobj passed in must be a valid ZAP object.
  */
 
 /*
@@ -178,7 +191,7 @@ int zap_create_claim_norm_dnsize(objset_t *ds, uint64_t obj,
  *
  * Frees the object number using dmu_object_free.
  */
-int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
+int zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);
 
 /*
  * Manipulate attributes.
@@ -207,21 +220,32 @@ int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
  * fit will be transferred to 'buf'.  If the entire attribute was not
  * transferred, the call will return EOVERFLOW.
  */
-int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
+int zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
+    uint64_t integer_size, uint64_t num_integers, void *buf);
+int zap_lookup_by_dnode(dnode_t *dn, const char *name,
     uint64_t integer_size, uint64_t num_integers, void *buf);
 
 /*
  * If rn_len is nonzero, realname will be set to the name of the found
  * entry (which may be different from the requested name if matchtype is
- * not MT_EXACT).
+ * not zero).
  *
  * If normalization_conflictp is not NULL, it will be set if there is
  * another name with the same case/unicode normalized form.
  */
-int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
+int zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
     uint64_t integer_size, uint64_t num_integers, void *buf,
     matchtype_t mt, char *realname, int rn_len,
     boolean_t *normalization_conflictp);
+int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
+    uint64_t integer_size, uint64_t num_integers, void *buf,
+    matchtype_t mt, char *realname, int rn_len,
+    boolean_t *ncp);
+
+/*
+ * The _uint64 variants take an array of uint64_t as the key. The ZAP must
+ * be created with ZAP_FLAG_UINT64_KEY.
+ */
 int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
 int zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
@@ -229,20 +253,31 @@ int zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
 int zap_lookup_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,
     uint64_t *actual_num_integers);
-int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
+
+/*
+ * Lookup the attribute with the given name. Returns ENOENT if it does not
+ * exist, 0 if it does. This is like zap_lookup(), but may be more efficient.
+ */
+int zap_contains(objset_t *os, uint64_t zapobj, const char *name);
+int zap_contains_by_dnode(dnode_t *dn, const char *name);
+
+/*
+ * Prefetch the blocks within the ZAP where the given key is stored. The
+ * prefetch IO will occure in the background.
+ */
 int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);
-int zap_prefetch_object(objset_t *os, uint64_t zapobj);
+
+/* Prefetch by uint64_t[] key. */
 int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
     int key_numints);
 int zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
     int key_numints);
 
-int zap_lookup_by_dnode(dnode_t *dn, const char *name,
-    uint64_t integer_size, uint64_t num_integers, void *buf);
-int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
-    uint64_t integer_size, uint64_t num_integers, void *buf,
-    matchtype_t mt, char *realname, int rn_len,
-    boolean_t *ncp);
+/*
+ * Prefetch the entire ZAP object. Unlike zap_prefetch(), will block until
+ * the entire object is loaded into the ARC.
+ */
+int zap_prefetch_object(objset_t *os, uint64_t zapobj);
 
 /*
  * Create an attribute with the given name and value.
@@ -250,13 +285,15 @@ int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
  * If an attribute with the given name already exists, the call will
  * fail and return EEXIST.
  */
-int zap_add(objset_t *ds, uint64_t zapobj, const char *key,
+int zap_add(objset_t *os, uint64_t zapobj, const char *key,
     int integer_size, uint64_t num_integers,
     const void *val, dmu_tx_t *tx);
 int zap_add_by_dnode(dnode_t *dn, const char *key,
     int integer_size, uint64_t num_integers,
     const void *val, dmu_tx_t *tx);
-int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key,
+
+/* Add by uint64_t[] key. */
+int zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
     int key_numints, int integer_size, uint64_t num_integers,
     const void *val, dmu_tx_t *tx);
 int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
@@ -271,8 +308,12 @@ int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
  * existing attribute's integer size, in which case the attribute's
  * integer size will be updated to the new value.
  */
-int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
+int zap_update(objset_t *os, uint64_t zapobj, const char *name,
     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
+int zap_update_by_dnode(dnode_t *dn, const char *name, int integer_size,
+    uint64_t num_integers, const void *val, dmu_tx_t *tx);
+
+/* Update by uint64_t[] key. */
 int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
     int key_numints,
     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
@@ -287,8 +328,12 @@ int zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
  * If the requested attribute does not exist, the call will fail and
  * return ENOENT.
  */
-int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
+int zap_length(objset_t *os, uint64_t zapobj, const char *name,
     uint64_t *integer_size, uint64_t *num_integers);
+int zap_length_by_dnode(dnode_t *dn, const char *name,
+    uint64_t *integer_size, uint64_t *num_integers);
+
+/* Attribute length by uint64_t[] key. */
 int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
     int key_numints, uint64_t *integer_size, uint64_t *num_integers);
 int zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
@@ -300,10 +345,12 @@ int zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
  * If the specified attribute does not exist, the call will fail and
  * return ENOENT.
  */
-int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
-int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
-    matchtype_t mt, dmu_tx_t *tx);
+int zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx);
 int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx);
+int zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
+    matchtype_t mt, dmu_tx_t *tx);
+
+/* Remove by uint64_t[] key. */
 int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
     int key_numints, dmu_tx_t *tx);
 int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
@@ -313,9 +360,19 @@ int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
  * Returns (in *count) the number of attributes in the specified zap
  * object.
  */
-int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
+int zap_count(objset_t *os, uint64_t zapobj, uint64_t *count);
 int zap_count_by_dnode(dnode_t *dn, uint64_t *count);
 
+/*
+ * Lookup an existing uint64 value, add the delta value to it, and store
+ * update it with the new value. If the new value is 0, removes the key
+ * entirely.
+ */
+int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
+    dmu_tx_t *tx);
+int zap_increment_by_dnode(dnode_t *dn, const char *name, int64_t delta,
+    dmu_tx_t *tx);
+
 /*
  * Returns (in name) the name of the entry whose (value & mask)
  * (za_first_integer) is value, or ENOENT if not found.  The string
@@ -324,21 +381,8 @@ int zap_count_by_dnode(dnode_t *dn, uint64_t *count);
  */
 int zap_value_search(objset_t *os, uint64_t zapobj,
     uint64_t value, uint64_t mask, char *name, uint64_t namelen);
-
-/*
- * Transfer all the entries from fromobj into intoobj.  Only works on
- * int_size=8 num_integers=1 values.  Fails if there are any duplicated
- * entries.
- */
-int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);
-
-/* Same as zap_join, but set the values to 'value'. */
-int zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
-    uint64_t value, dmu_tx_t *tx);
-
-/* Same as zap_join, but add together any duplicated entries. */
-int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
-    dmu_tx_t *tx);
+int zap_value_search_by_dnode(dnode_t *dn,
+    uint64_t value, uint64_t mask, char *name, uint64_t namelen);
 
 /*
  * Manipulate entries where the name + value are the "same" (the name is
@@ -347,8 +391,10 @@ int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
 int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
 int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
 int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);
-int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
-    dmu_tx_t *tx);
+
+int zap_add_int_by_dnode(dnode_t *dn, uint64_t value, dmu_tx_t *tx);
+int zap_remove_int_by_dnode(dnode_t *dn, uint64_t value, dmu_tx_t *tx);
+int zap_lookup_int_by_dnode(dnode_t *dn, uint64_t value);
 
 /* Here the key is an int and the value is a different int. */
 int zap_add_int_key(objset_t *os, uint64_t obj,
@@ -358,22 +404,19 @@ int zap_update_int_key(objset_t *os, uint64_t obj,
 int zap_lookup_int_key(objset_t *os, uint64_t obj,
     uint64_t key, uint64_t *valuep);
 
-int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
-    dmu_tx_t *tx);
+int zap_add_int_key_by_dnode(dnode_t *dn,
+    uint64_t key, uint64_t value, dmu_tx_t *tx);
+int zap_update_int_key_by_dnode(dnode_t *dn,
+    uint64_t key, uint64_t value, dmu_tx_t *tx);
+int zap_lookup_int_key_by_dnode(dnode_t *dn,
+    uint64_t key, uint64_t *valuep);
 
-struct zap;
-struct zap_leaf;
-typedef struct zap_cursor {
-	/* This structure is opaque! */
-	objset_t *zc_objset;
-	struct zap *zc_zap;
-	struct zap_leaf *zc_leaf;
-	uint64_t zc_zapobj;
-	uint64_t zc_serialized;
-	uint64_t zc_hash;
-	uint32_t zc_cd;
-	boolean_t zc_prefetch;
-} zap_cursor_t;
+/*
+ * The interface for listing all the attributes of a zapobj can be
+ * thought of as cursor moving down a list of the attributes one by
+ * one.  The cookie returned by the zap_cursor_serialize routine is
+ * persistent across system calls (and across reboot, even).
+ */
 
 typedef struct {
 	int za_integer_length;
@@ -389,9 +432,6 @@ typedef struct {
 	char za_name[];
 } zap_attribute_t;
 
-void zap_init(void);
-void zap_fini(void);
-
 /*
  * Alloc and free zap_attribute_t.
  */
@@ -399,22 +439,52 @@ zap_attribute_t *zap_attribute_alloc(void);
 zap_attribute_t *zap_attribute_long_alloc(void);
 void zap_attribute_free(zap_attribute_t *attrp);
 
-/*
- * The interface for listing all the attributes of a zapobj can be
- * thought of as cursor moving down a list of the attributes one by
- * one.  The cookie returned by the zap_cursor_serialize routine is
- * persistent across system calls (and across reboot, even).
- */
+struct zap;
+struct zap_leaf;
+
+typedef struct zap_cursor {
+	/* This structure is opaque! */
+	struct zap *zc_zap;
+	struct zap_leaf *zc_leaf;
+	uint64_t zc_hash;
+	uint32_t zc_cd;
+	boolean_t zc_prefetch;
+	/*
+	 * Legacy fields to main source compat with Lustre, which accesses
+	 * them directly. Not to be used in new code!
+	 */
+	objset_t *zc_objset;
+	uint64_t zc_zapobj;
+} zap_cursor_t;
 
 /*
- * Initialize a zap cursor, pointing to the "first" attribute of the
- * zapobj.  You must _fini the cursor when you are done with it.
+ * Initialize a zap cursor, pointing to the "first" attribute of the zapobj.
+ * The entire zapobj will be prefetched. You must call zap_cursor_fini the
+ * cursor when you are done with it.
  */
-void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj);
-void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os,
-    uint64_t zapobj);
+int zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj);
+int zap_cursor_init_by_dnode(zap_cursor_t *zc, dnode_t *dn);
 void zap_cursor_fini(zap_cursor_t *zc);
 
+/*
+ * Initialize a cursor at the beginning, but request that we not prefetch
+ * the entire ZAP object.
+ */
+int zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os,
+    uint64_t zapobj);
+
+/*
+ * Initialize a zap cursor pointing to the position recorded by
+ * zap_cursor_serialize (in the "serialized" argument).  You can also
+ * use a "serialized" argument of 0 to start at the beginning of the
+ * zapobj (ie.  zap_cursor_init_serialized(..., 0) is equivalent to
+ * zap_cursor_init(...).)
+ */
+int zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os,
+    uint64_t zapobj, uint64_t serialized);
+int zap_cursor_init_serialized_by_dnode(zap_cursor_t *zc, dnode_t *dn,
+    uint64_t serialized);
+
 /*
  * Get the attribute currently pointed to by the cursor.  Returns
  * ENOENT if at the end of the attributes.
@@ -435,17 +505,6 @@ void zap_cursor_advance(zap_cursor_t *zc);
  */
 uint64_t zap_cursor_serialize(zap_cursor_t *zc);
 
-/*
- * Initialize a zap cursor pointing to the position recorded by
- * zap_cursor_serialize (in the "serialized" argument).  You can also
- * use a "serialized" argument of 0 to start at the beginning of the
- * zapobj (ie.  zap_cursor_init_serialized(..., 0) is equivalent to
- * zap_cursor_init(...).)
- */
-void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds,
-    uint64_t zapobj, uint64_t serialized);
-
-
 #define	ZAP_HISTOGRAM_SIZE 10
 
 typedef struct zap_stats {
@@ -535,7 +594,12 @@ typedef struct zap_stats {
  * statistics.  This interface shouldn't be relied on unless you really
  * know what you're doing.
  */
-int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs);
+int zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs);
+int zap_get_stats_by_dnode(dnode_t *dn, zap_stats_t *zs);
+
+/* ZAP subsystem setup/teardown */
+void zap_init(void);
+void zap_fini(void);
 
 #ifdef	__cplusplus
 }
diff --git a/sys/contrib/openzfs/include/sys/zap_impl.h b/sys/contrib/openzfs/include/sys/zap_impl.h
index d010c3c305c..ea8963f550f 100644
--- a/sys/contrib/openzfs/include/sys/zap_impl.h
+++ b/sys/contrib/openzfs/include/sys/zap_impl.h
@@ -26,6 +26,7 @@
  * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.
  * Copyright (c) 2024, Klara, Inc.
+ * Copyright (c) 2026, TrueNAS.
  */
 
 #ifndef	_SYS_ZAP_IMPL_H
@@ -33,7 +34,6 @@
 
 #include <sys/zap.h>
 #include <sys/zfs_context.h>
-#include <sys/avl.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -62,8 +62,9 @@ typedef struct mzap_phys {
 	uint64_t mz_salt;
 	uint64_t mz_normflags;
 	uint64_t mz_pad[5];
-	mzap_ent_phys_t mz_chunk[1];
+
 	/* actually variable size depending on block size */
+	mzap_ent_phys_t mz_chunk[];
 } mzap_phys_t;
 
 typedef struct mzap_ent {
@@ -170,6 +171,9 @@ typedef struct zap {
 	} zap_u;
 } zap_t;
 
+#define	zap_f	zap_u.zap_fat
+#define	zap_m	zap_u.zap_micro
+
 static inline zap_phys_t *
 zap_f_phys(zap_t *zap)
 {
@@ -182,6 +186,10 @@ zap_m_phys(zap_t *zap)
 	return (zap->zap_dbuf->db_data);
 }
 
+/*
+ * zap_name_t carries the original key and whatever we've derived from it
+ * (normalised form, hash, etc) as we work through completing the operation.
+ */
 typedef struct zap_name {
 	zap_t *zn_zap;
 	int zn_key_intlen;
@@ -196,25 +204,94 @@ typedef struct zap_name {
 	char zn_normbuf[];
 } zap_name_t;
 
-#define	zap_f	zap_u.zap_fat
-#define	zap_m	zap_u.zap_micro
+/*
+ * Allocate a zap_name_t. The longname flag ensures there is enough room to
+ * hold a long filename when the 'longname' pool feature is active.
+ */
+zap_name_t *zap_name_alloc(zap_t *zap, boolean_t longname);
 
+/*
+ * Allocate a zap_name_t for the given key. zap_name_init_str() will be
+ * called to normalise the key and initialise the struct.
+ */
+zap_name_t *zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt);
+
+/*
+ * Allocate a zap_name_t for a uint64 array key.
+ */
+zap_name_t *zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints);
+
+/*
+ * Free a zap_name_t.
+ */
+void zap_name_free(zap_name_t *zn);
+
+/*
+ * Initialise an existing zap_name_t with the normalised form of the key,
+ * computed according to the given matchtype.
+ */
+int zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt);
+
+/*
+ * Compare 'matchname' with the name represented by the zap_name_t, applying
+ * the same normalisation method first. Returns true if the normalised forms
+ * match, false otherwise.
+ */
 boolean_t zap_match(zap_name_t *zn, const char *matchname);
-int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
+
+/*
+ * Compute and return the 64-bit hash for the name, according to the name
+ * type and hash flags.
+ */
+uint64_t zap_hash(zap_name_t *zn);
+
+/*
+ * Return a zap_t for the given on-disk object, locked and ready for use.
+ * The zap_t will be allocated and loaded from disk if its not already loaded.
+ */
+int zap_lock(objset_t *os, uint64_t obj, dmu_tx_t *tx,
     krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
     zap_t **zapp);
-void zap_unlockdir(zap_t *zap, const void *tag);
+int zap_lock_by_dnode(dnode_t *dn, dmu_tx_t *tx,
+    krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
+    zap_t **zapp);
+
+/* Unlock and release a zap_t. */
+void zap_unlock(zap_t *zap, const void *tag);
+
+/*
+ * Try to upgrade a zap lock from READER to WRITER. If the upgrade is not
+ * possible without blocking, returns 0. If the upgrade happened, returns 1.
+ */
+int zap_lock_try_upgrade(zap_t *zap, dmu_tx_t *tx);
+
+/*
+ * Upgrade a zap lock from READER to WRITER. If it can't be upgraded
+ * immediately it will block.
+ */
+void zap_lock_upgrade(zap_t *zap, dmu_tx_t *tx);
+
+/* zap_t release function for when associated dbuf is evicted. */
 void zap_evict_sync(void *dbu);
-zap_name_t *zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt);
-void zap_name_free(zap_name_t *zn);
+
+/* Misc internal state & config. */
 int zap_hashbits(zap_t *zap);
 uint32_t zap_maxcd(zap_t *zap);
 uint64_t zap_getflags(zap_t *zap);
 
+/* Microzap implementation. */
+zap_t *mzap_open(dmu_buf_t *db);
+int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);
+mzap_ent_t *mze_find(zap_name_t *zn, zfs_btree_index_t *idx);
+boolean_t mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash);
+void mze_destroy(zap_t *zap);
+boolean_t mzap_normalization_conflict(zap_t *zap, zap_name_t *zn,
+    mzap_ent_t *mze, zfs_btree_index_t *idx);
+void mzap_addent(zap_name_t *zn, uint64_t value);
+void mzap_byteswap(mzap_phys_t *buf, size_t size);
 uint64_t zap_get_micro_max_size(spa_t *spa);
 
-#define	ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
-
+/* Fatzap implementation. */
 void fzap_byteswap(void *buf, size_t size);
 int fzap_count(zap_t *zap, uint64_t *count);
 int fzap_lookup(zap_name_t *zn,
@@ -223,20 +300,17 @@ int fzap_lookup(zap_name_t *zn,
     uint64_t *actual_num_integers);
 void fzap_prefetch(zap_name_t *zn);
 int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
-    const void *val, const void *tag, dmu_tx_t *tx);
-int fzap_update(zap_name_t *zn,
-    int integer_size, uint64_t num_integers, const void *val,
-    const void *tag, dmu_tx_t *tx);
+    const void *val, dmu_tx_t *tx);
+int fzap_update(zap_name_t *zn, int integer_size, uint64_t num_integers,
+    const void *val, dmu_tx_t *tx);
 int fzap_length(zap_name_t *zn,
     uint64_t *integer_size, uint64_t *num_integers);
 int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
 int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
 void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
 void zap_put_leaf(struct zap_leaf *l);
-
-int fzap_add_cd(zap_name_t *zn,
-    uint64_t integer_size, uint64_t num_integers,
-    const void *val, uint32_t cd, const void *tag, dmu_tx_t *tx);
+int fzap_add_cd(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
+    const void *val, uint32_t cd, dmu_tx_t *tx);
 void fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags);
 
 #ifdef	__cplusplus
diff --git a/sys/contrib/openzfs/include/sys/zio_impl.h b/sys/contrib/openzfs/include/sys/zio_impl.h
index 42147adaf1a..62e7e27da38 100644
--- a/sys/contrib/openzfs/include/sys/zio_impl.h
+++ b/sys/contrib/openzfs/include/sys/zio_impl.h
@@ -139,12 +139,12 @@ enum zio_stage {
 
 	ZIO_STAGE_NOP_WRITE		= 1 << 8,	/* -W---- */
 
-	ZIO_STAGE_BRT_FREE		= 1 << 9,	/* --F--- */
+	ZIO_STAGE_DDT_READ_START	= 1 << 9,	/* R----- */
+	ZIO_STAGE_DDT_READ_DONE		= 1 << 10,	/* R----- */
+	ZIO_STAGE_DDT_WRITE		= 1 << 11,	/* -W---- */
+	ZIO_STAGE_DDT_FREE		= 1 << 12,	/* --F--- */
 
-	ZIO_STAGE_DDT_READ_START	= 1 << 10,	/* R----- */
-	ZIO_STAGE_DDT_READ_DONE		= 1 << 11,	/* R----- */
-	ZIO_STAGE_DDT_WRITE		= 1 << 12,	/* -W---- */
-	ZIO_STAGE_DDT_FREE		= 1 << 13,	/* --F--- */
+	ZIO_STAGE_BRT_FREE		= 1 << 13,	/* --F--- */
 
 	ZIO_STAGE_GANG_ASSEMBLE		= 1 << 14,	/* RWFC-- */
 	ZIO_STAGE_GANG_ISSUE		= 1 << 15,	/* RWFC-- */
@@ -259,8 +259,7 @@ enum zio_stage {
 	ZIO_STAGE_DVA_FREE)
 
 #define	ZIO_DDT_FREE_PIPELINE			\
-	(ZIO_INTERLOCK_STAGES |			\
-	ZIO_STAGE_FREE_BP_INIT |		\
+	(ZIO_FREE_PIPELINE |			\
 	ZIO_STAGE_ISSUE_ASYNC |			\
 	ZIO_STAGE_DDT_FREE)
 
diff --git a/sys/contrib/openzfs/lib/libspl/Makefile.am b/sys/contrib/openzfs/lib/libspl/Makefile.am
index 8b50c65c0e6..4b097297816 100644
--- a/sys/contrib/openzfs/lib/libspl/Makefile.am
+++ b/sys/contrib/openzfs/lib/libspl/Makefile.am
@@ -63,7 +63,3 @@ libspl_la_LIBADD = \
 libspl_la_LIBADD += $(LIBATOMIC_LIBS) $(LIBCLOCK_GETTIME)
 
 libspl_assert_la_LIBADD = $(BACKTRACE_LIBS) $(LIBUNWIND_LIBS)
-
-if BUILD_FREEBSD
-libspl_assert_la_LIBADD += -lpthread
-endif
diff --git a/sys/contrib/openzfs/lib/libzfs/Makefile.am b/sys/contrib/openzfs/lib/libzfs/Makefile.am
index 450c501556e..deae3534749 100644
--- a/sys/contrib/openzfs/lib/libzfs/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzfs/Makefile.am
@@ -76,7 +76,7 @@ libzfs_la_LIBADD = \
 
 libzfs_la_LIBADD += -lrt -lm $(LIBCRYPTO_LIBS) $(ZLIB_LIBS) $(LIBFETCH_LIBS) $(LTLIBINTL)
 
-libzfs_la_LDFLAGS = -pthread
+libzfs_la_LDFLAGS = -version-info 7:0:0
 
 if !ASAN_ENABLED
 libzfs_la_LDFLAGS += -Wl,-z,defs
@@ -86,8 +86,6 @@ if BUILD_FREEBSD
 libzfs_la_LIBADD += -lutil -lgeom
 endif
 
-libzfs_la_LDFLAGS += -version-info 7:0:0
-
 pkgconfig_DATA += %D%/libzfs.pc
 
 dist_noinst_DATA += %D%/libzfs.abi %D%/libzfs.suppr
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
index ad28c876630..3f88f2fb83d 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
@@ -2553,7 +2553,7 @@
     <typedef-decl name='__uint32_t' type-id='f0981eeb' id='62f1140c'/>
     <typedef-decl name='__uint64_t' type-id='7359adad' id='8910171f'/>
     <typedef-decl name='size_t' type-id='7359adad' id='b59d7dce'/>
-    <class-decl name='libzfs_handle' size-in-bits='18432' is-struct='yes' visibility='default' id='c8a9d9d8'>
+    <class-decl name='libzfs_handle' size-in-bits='18496' is-struct='yes' visibility='default' id='c8a9d9d8'>
       <data-member access='public' layout-offset-in-bits='0'>
         <var-decl name='libzfs_error' type-id='95e97e5e' visibility='default'/>
       </data-member>
@@ -2605,6 +2605,9 @@
       <data-member access='public' layout-offset-in-bits='18112'>
         <var-decl name='zh_mnttab' type-id='f20fbd51' visibility='default'/>
       </data-member>
+      <data-member access='public' layout-offset-in-bits='18432'>
+        <var-decl name='zh_mnttab_cache_enabled' type-id='c19b74c3' visibility='default'/>
+      </data-member>
     </class-decl>
     <class-decl name='zfs_handle' size-in-bits='4928' is-struct='yes' visibility='default' id='f6ee4445'>
       <data-member access='public' layout-offset-in-bits='0'>
@@ -6412,7 +6415,9 @@
       <enumerator name='VDEV_PROP_SCHEDULER' value='55'/>
       <enumerator name='VDEV_PROP_FDOMAIN' value='56'/>
       <enumerator name='VDEV_PROP_FGROUP' value='57'/>
-      <enumerator name='VDEV_NUM_PROPS' value='58'/>
+      <enumerator name='VDEV_PROP_ALLOC_BIAS' value='58'/>
+      <enumerator name='VDEV_PROP_ROTATIONAL' value='59'/>
+      <enumerator name='VDEV_NUM_PROPS' value='60'/>
     </enum-decl>
     <typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
     <class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_changelist.c b/sys/contrib/openzfs/lib/libzfs/libzfs_changelist.c
index eac06f8f5ab..b1a2e17cb7a 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_changelist.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_changelist.c
@@ -177,6 +177,7 @@ changelist_postfix(prop_changelist_t *clp)
 	char shareopts[ZFS_MAXPROPLEN];
 	boolean_t commit_smb_shares = B_FALSE;
 	boolean_t commit_nfs_shares = B_FALSE;
+	int rc = 0;
 
 	/*
 	 * If CL_GATHER_DONT_UNMOUNT is set, it means we don't want to (un)mount
@@ -266,7 +267,7 @@ changelist_postfix(prop_changelist_t *clp)
 		const enum sa_protocol nfs[] =
 		    {SA_PROTOCOL_NFS, SA_NO_PROTOCOL};
 		if (sharenfs && mounted) {
-			zfs_share(cn->cn_handle, nfs);
+			rc = zfs_share(cn->cn_handle, nfs);
 			commit_nfs_shares = B_TRUE;
 		} else if (cn->cn_shared || clp->cl_waslegacy) {
 			zfs_unshare(cn->cn_handle, NULL, nfs);
@@ -275,7 +276,7 @@ changelist_postfix(prop_changelist_t *clp)
 		const enum sa_protocol smb[] =
 		    {SA_PROTOCOL_SMB, SA_NO_PROTOCOL};
 		if (sharesmb && mounted) {
-			zfs_share(cn->cn_handle, smb);
+			rc = zfs_share(cn->cn_handle, smb);
 			commit_smb_shares = B_TRUE;
 		} else if (cn->cn_shared || clp->cl_waslegacy) {
 			zfs_unshare(cn->cn_handle, NULL, smb);
@@ -291,7 +292,15 @@ changelist_postfix(prop_changelist_t *clp)
 	*p++ = SA_NO_PROTOCOL;
 	zfs_commit_shares(proto);
 
-	return (0);
+	/*
+	 * It's possible rc != 0 since we set a mountpoint or option while
+	 * SMB/NFS was not running.  This is fine, and we should not return
+	 * an error up the stack.
+	 *
+	 * At this point we only want to report mountpoint/shareops parsing
+	 * errors.
+	 */
+	return (rc == SA_SYNTAX_ERR ? rc : 0);
 }
 
 /*
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
index 0b015d8bce6..f82211699f5 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
@@ -2031,12 +2031,21 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
 	return (0);
 }
 
+/*
+ * Export the pool from the system.  Setting force overrides the
+ * active-shared-spare check.  The caller must unmount all datasets
+ * in the pool first.
+ */
 int
 zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
 {
 	return (zpool_export_common(zhp, force, B_FALSE, log_str));
 }
 
+/*
+ * Force-export the pool: bypasses the active-shared-spare check, and skips
+ * writing the exported-state labels and updating the cachefile.
+ */
 int
 zpool_export_force(zpool_handle_t *zhp, const char *log_str)
 {
@@ -2574,6 +2583,10 @@ xlate_init_err(int err)
 	return (err);
 }
 
+/*
+ * Start (or cancel/suspend/uninit) the initialize operation on every
+ * leaf vdev of the pool.
+ */
 int
 zpool_initialize_one(zpool_handle_t *zhp, void *data)
 {
@@ -2685,6 +2698,10 @@ zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
 	return (err == 0 ? 0 : -1);
 }
 
+/*
+ * Start (or cancel/suspend/uninit) the initialize operation on the listed
+ * vdevs.  Returns once the new state is committed.
+ */
 int
 zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
     nvlist_t *vds)
@@ -2692,6 +2709,9 @@ zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
 	return (zpool_initialize_impl(zhp, cmd_type, vds, B_FALSE));
 }
 
+/*
+ * Like zpool_initialize(), but waits for each listed vdev to finish.
+ */
 int
 zpool_initialize_wait(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
     nvlist_t *vds)
@@ -2746,6 +2766,10 @@ zpool_collect_leaves(zpool_handle_t *zhp, nvlist_t *nvroot, nvlist_t *res)
 	}
 }
 
+/*
+ * Start (or cancel/suspend) the trim operation on every leaf vdev of
+ * the pool.
+ */
 int
 zpool_trim_one(zpool_handle_t *zhp, void *data)
 {
@@ -3393,6 +3417,11 @@ __zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
 	return (ret);
 }
 
+/*
+ * Look up a vdev in the pool by path, name, or guid.  Returns the
+ * vdev's configuration nvlist, or NULL on no match.  Also, fills
+ * in avail_spare, l2cache, and log if they are non-NULL.
+ */
 nvlist_t *
 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
     boolean_t *l2cache, boolean_t *log)
@@ -4637,7 +4666,10 @@ zpool_reopen_one(zpool_handle_t *zhp, void *data)
 	return (0);
 }
 
-/* call into libzfs_core to execute the sync IOCTL per pool */
+/*
+ * Block until every buffered write for the pool has reached the
+ * underlying disks.
+ */
 int
 zpool_sync_one(zpool_handle_t *zhp, void *data)
 {
@@ -4913,6 +4945,10 @@ zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
 	return (0);
 }
 
+/*
+ * Format the program name and its command-line arguments into a single
+ * space-separated string.
+ */
 void
 zfs_save_arguments(int argc, char **argv, char *string, int len)
 {
@@ -4925,6 +4961,10 @@ zfs_save_arguments(int argc, char **argv, char *string, int len)
 	}
 }
 
+/*
+ * Append a message to the pool's command-history log, retrievable via
+ * "zpool history".
+ */
 int
 zpool_log_history(libzfs_handle_t *hdl, const char *message)
 {
@@ -5220,6 +5260,11 @@ zpool_obj_to_path_impl(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
 	free(mntpnt);
 }
 
+/*
+ * Translate a (dataset object id, file object id) pair into a readable
+ * path.  If the dataset is mounted the result is an absolute filesystem
+ * path; otherwise it is `dataset:path`.
+ */
 void
 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
     char *pathname, size_t len)
@@ -5227,6 +5272,10 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
 	zpool_obj_to_path_impl(zhp, dsobj, obj, pathname, len, B_FALSE);
 }
 
+/*
+ * Translate a (dataset object id, file object id) pair into a
+ * `dataset:path` string.
+ */
 void
 zpool_obj_to_path_ds(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
     char *pathname, size_t len)
@@ -5281,6 +5330,10 @@ zpool_wait_status(zpool_handle_t *zhp, zpool_wait_activity_t activity,
 	return (error);
 }
 
+/*
+ * Store a boot configuration map in the bootenv area of each leaf
+ * vdev's labels.
+ */
 int
 zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap)
 {
@@ -5294,6 +5347,9 @@ zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap)
 	return (error);
 }
 
+/*
+ * Read the boot configuration map from each leaf vdev's bootenv area.
+ */
 int
 zpool_get_bootenv(zpool_handle_t *zhp, nvlist_t **nvlp)
 {
@@ -5741,6 +5797,9 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,
 				return (ENOENT);
 			if (prop == VDEV_PROP_SIT_OUT)
 				return (ENOENT);
+			/* Only valid for top-level vdevs */
+			if (prop == VDEV_PROP_ALLOC_BIAS)
+				return (ENOENT);
 		}
 		if (vdev_prop_index_to_string(prop, intval,
 		    (const char **)&strval) != 0)
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_share.c b/sys/contrib/openzfs/lib/libzfs/libzfs_share.c
index bfac40f17de..98a09f7f331 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_share.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_share.c
@@ -64,6 +64,10 @@ sa_enable_share(const char *zfsname, const char *mountpoint,
 {
 	VALIDATE_PROTOCOL(protocol, SA_INVALID_PROTOCOL);
 
+	int error = sa_validate_shareopts(shareopts, protocol);
+	if (error != SA_OK)
+		return (error);
+
 	const struct sa_share_impl args =
 	    init_share(zfsname, mountpoint, shareopts);
 	return (fstypes[protocol]->enable_share(&args));
@@ -111,6 +115,10 @@ sa_validate_shareopts(const char *options, enum sa_protocol protocol)
 {
 	VALIDATE_PROTOCOL(protocol, SA_INVALID_PROTOCOL);
 
+	/* error out on invalid characters */
+	if (strpbrk(options, "\a\b\f\n\r") != NULL)
+		return (SA_SYNTAX_ERR);
+
 	return (fstypes[protocol]->validate_shareopts(options));
 }
 
diff --git a/sys/contrib/openzfs/lib/libzfs_core/Makefile.am b/sys/contrib/openzfs/lib/libzfs_core/Makefile.am
index ec7aa95aa02..751deeeb228 100644
--- a/sys/contrib/openzfs/lib/libzfs_core/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzfs_core/Makefile.am
@@ -33,7 +33,7 @@ libzfs_core_la_LIBADD = \
 
 libzfs_core_la_LIBADD += $(LTLIBINTL)
 
-libzfs_core_la_LDFLAGS = -pthread
+libzfs_core_la_LDFLAGS = -version-info 3:0:0
 
 if !ASAN_ENABLED
 libzfs_core_la_LDFLAGS += -Wl,-z,defs
@@ -43,8 +43,6 @@ if BUILD_FREEBSD
 libzfs_core_la_LIBADD += -lutil -lgeom
 endif
 
-libzfs_core_la_LDFLAGS += -version-info 3:0:0
-
 pkgconfig_DATA += %D%/libzfs_core.pc
 
 dist_noinst_DATA += %D%/libzfs_core.abi %D%/libzfs_core.suppr
diff --git a/sys/contrib/openzfs/lib/libzpool/Makefile.am b/sys/contrib/openzfs/lib/libzpool/Makefile.am
index 8192553072f..22c7ceaa1ba 100644
--- a/sys/contrib/openzfs/lib/libzpool/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzpool/Makefile.am
@@ -166,6 +166,8 @@ nodist_libzpool_la_SOURCES = \
 	module/zfs/vdev_root.c \
 	module/zfs/vdev_trim.c \
 	module/zfs/zap.c \
+	module/zfs/zap_fat.c \
+	module/zfs/zap_impl.c \
 	module/zfs/zap_leaf.c \
 	module/zfs/zap_micro.c \
 	module/zfs/zcp.c \
@@ -212,7 +214,7 @@ libzpool_la_LIBADD = \
 
 libzpool_la_LIBADD += $(LIBCLOCK_GETTIME) $(ZLIB_LIBS) -lm
 
-libzpool_la_LDFLAGS = -pthread
+libzpool_la_LDFLAGS = -version-info 7:0:0
 
 if !ASAN_ENABLED
 libzpool_la_LDFLAGS += -Wl,-z,defs
@@ -222,8 +224,6 @@ if BUILD_FREEBSD
 libzpool_la_LIBADD += -lgeom
 endif
 
-libzpool_la_LDFLAGS += -version-info 7:0:0
-
 if TARGET_CPU_POWERPC
 module/zfs/libzpool_la-vdev_raidz_math_powerpc_altivec.$(OBJEXT) : CFLAGS += -maltivec
 module/zfs/libzpool_la-vdev_raidz_math_powerpc_altivec.l$(OBJEXT): CFLAGS += -maltivec
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
index c1fe65d2ad9..9967d9af739 100644
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -4,6 +4,7 @@
 .\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
 .\" Copyright (c) 2019 Datto Inc.
 .\" Copyright (c) 2023, 2024, 2025, Klara, Inc.
+.\" Copyright (c) 2026, Mateusz Piotrowski <0mp@FreeBSD.org>
 .\"
 .\" The contents of this file are subject to the terms of the Common Development
 .\" and Distribution License (the "License").  You may not use this file except
@@ -18,7 +19,7 @@
 .\" own identifying information:
 .\" Portions Copyright [yyyy] [name of copyright owner]
 .\"
-.Dd September 15, 2025
+.Dd May 8, 2026
 .Dt ZFS 4
 .Os
 .
@@ -389,6 +390,18 @@ this is
 or
 .Em 2*1024 Pq with Sy ashift Ns = Ns Sy 12 .
 .
+.It Sy metaslab_df_alloc_threshold Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq u64
+Minimum size which forces the dynamic allocator to change its allocation
+strategy.
+Once the space map cannot satisfy an allocation of this size, it switches to a
+more aggressive strategy (searching by size rather than offset).
+.
+.It Sy metaslab_df_free_pct Ns = Ns Sy 4 Ns % Pq uint
+The minimum free space, in percent, which must be available in a space map to
+continue allocations in a first-fit fashion.
+Once free space drops below this level, allocations switch to a best-fit
+strategy.
+.
 .It Sy metaslab_df_use_largest_segment Ns = Ns Sy 0 Ns | Ns 1 Pq int
 If not searching forward (due to
 .Sy metaslab_df_max_search , metaslab_df_free_pct ,
@@ -445,6 +458,32 @@ This improves performance, especially when there are many metaslabs per vdev
 and the allocation can't actually be satisfied
 (so we would otherwise iterate all metaslabs).
 .
+.It Sy zfs_metaslab_sm_blksz_no_log Ns = Ns Sy 16384 Ns B Po 16 KiB Pc Pq int
+Block size for the metaslab space maps in pools where the
+.Sy log_spacemap
+feature is disabled.
+Multiple metaslabs are modified per transaction group, so a smaller block size
+lets more, scattered I/O operations be issued.
+Must be a power of 2 greater than
+.Sy 4096 .
+This parameter can only be set at module load time.
+.
+.It Sy zfs_metaslab_sm_blksz_with_log Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq int
+Block size for the metaslab space maps in pools where the
+.Sy log_spacemap
+feature is enabled.
+Changes are batched in the per-pool log spacemap and flushed to each metaslab's
+space map only occasionally, so a larger block size is more efficient.
+Must be a power of 2 greater than
+.Sy 4096 .
+This parameter can only be set at module load time.
+.
+.It Sy zfs_metaslab_condense_pct Ns = Ns Sy 200 Ns % Pq uint
+Condense an on-disk space map when its size exceeds this percentage of
+the in-memory representation.
+The minimum is
+.Sy 100 .
+.
 .It Sy zfs_vdev_default_ms_count Ns = Ns Sy 200 Pq uint
 When a vdev is added, target this number of metaslabs per top-level vdev.
 .
@@ -768,9 +807,15 @@ See also
 which serves a similar purpose but has a higher priority if nonzero.
 .
 .It Sy zfs_arc_dnode_reduce_percent Ns = Ns Sy 10 Ns % Pq u64
-Percentage of ARC dnodes to try to scan in response to demand for non-metadata
-when the number of bytes consumed by dnodes exceeds
-.Sy zfs_arc_dnode_limit .
+Percentage used to size dnode prune requests.
+The request size is the larger of two values:
+.Sy zfs_arc_dnode_reduce_percent
+applied to the dnode count above
+.Sy zfs_arc_dnode_limit ,
+or
+.Sy zfs_arc_dnode_reduce_percent
+applied to the total dnode count
+when non-evictable metadata exceeds 3/4 of the metadata target.
 .
 .It Sy zfs_arc_average_blocksize Ns = Ns Sy 8192 Ns B Po 8 KiB Pc Pq uint
 The ARC's buffer hash table is sized based on the assumption of an average
@@ -911,6 +956,19 @@ but that was not proven to be useful.
 Number of missing top-level vdevs which will be allowed during
 pool import (only in read-only mode).
 .
+.It Sy zfs_max_missing_tvds_cachefile Ns = Ns Sy 2 Pq u64
+Number of missing top-level vdevs tolerated when importing a pool
+from a cachefile, before the trusted config is read from the MOS.
+A cachefile can fall out of sync with the on-disk config after a
+device removal that did not rewrite the cachefile, so the default
+of 2 still lets the import reach a copy of the MOS.
+.
+.It Sy zfs_max_missing_tvds_scan Ns = Ns Sy 0 Pq u64
+Number of missing top-level vdevs tolerated when importing a pool
+by scanning device paths, before the trusted config is read from
+the MOS.
+Defaults to 0 because a scan should detect every present device.
+.
 .It Sy zfs_max_nvlist_src_size Ns = Sy 0 Pq u64
 Maximum size in bytes allowed to be passed as
 .Sy zc_nvlist_src_size
@@ -948,8 +1006,6 @@ equivalent to the greater of the number of online CPUs and
 If less than
 .Sy arc_c No >> Sy zfs_arc_no_grow_shift
 free memory is available, the ARC is not allowed to grow.
-This parameter is
-.Fx Ns -specific .
 .
 .It Sy zfs_arc_overflow_shift Ns = Ns Sy 8 Pq int
 The ARC size is considered to be overflowing if it exceeds the current
diff --git a/sys/contrib/openzfs/man/man7/vdevprops.7 b/sys/contrib/openzfs/man/man7/vdevprops.7
index b45128dd924..b52c6d4b023 100644
--- a/sys/contrib/openzfs/man/man7/vdevprops.7
+++ b/sys/contrib/openzfs/man/man7/vdevprops.7
@@ -142,6 +142,8 @@ See
 .Xr zpool-attach 8 .
 .It Sy trim_support
 Indicates if a leaf device supports trim operations.
+.It Sy rotational
+Indicates whether the device backing this vdev uses rotating media.
 .El
 .Pp
 The following native properties can be used to change the behavior of a vdev.
@@ -183,9 +185,12 @@ output.
 A text comment up to 8192 characters long
 .It Sy bootsize
 The amount of space to reserve for the EFI system partition
-.It Sy failfast
+.It Sy failfast Ns = Ns Sy inherit Ns | Ns Sy on Ns | Ns Sy off
 If this device should propagate BIO errors back to ZFS, used to disable
 failfast.
+.Sy inherit
+causes the vdev to adopt the behavior of its parent vdev,
+recursively up the tree.
 .It Sy sit_out
 Only valid for
 .Sy RAIDZ
@@ -218,6 +223,21 @@ If this device should perform new allocations, used to disable a device
 when it is scheduled for later removal.
 See
 .Xr zpool-remove 8 .
+.It Sy alloc_bias Ns = Ns Sy none Ns | Ns Sy log Ns | Ns Sy special Ns | Ns Sy dedup
+Controls the allocation class for a top-level vdev.
+Changes take effect after an export and import of the pool.
+Changing to/from log is not implemented, since it may lead to data loss in
+case of the log device failure.
+Setting to
+.Sy special
+and
+.Sy dedup
+requires
+.Sy feature@allocation_classes
+to be enabled.
+At least one top-level vdev must remain in the normal
+.Pq Sy none
+class.
 .It Sy scheduler Ns = Ns Sy auto Ns | Ns Sy on Ns | Ns Sy off
 Controls how I/O requests are added to the vdev queue when reading or
 writing to this vdev.
diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8
index f500e7e8a13..596e1d94e39 100644
--- a/sys/contrib/openzfs/man/man8/zdb.8
+++ b/sys/contrib/openzfs/man/man8/zdb.8
@@ -284,10 +284,15 @@ Decode and display block from an embedded block pointer specified by the
 arguments.
 .It Fl f , -file-layout
 Display the file layout of an object for the disks of a raidz vdev.
+Numeric values in the disply are hexadecimal.
 With
 .Fl H ,
 the output is in scripted mode for easy parsing, with all values
-being presented as 512 byte blocks.
+being presented as 512 byte blocks in decimal; with
+.Fl v ,
+the block type (parity or data) is displayed; with
+.Fl vv ,
+the offset into the file for each block is also printed.
 Only a single top-level raidz vdev is supported.
 .It Fl h , -history
 Display pool history similar to
diff --git a/sys/contrib/openzfs/man/man8/zpool-attach.8 b/sys/contrib/openzfs/man/man8/zpool-attach.8
index 04996ed4fa1..8394a5efba6 100644
--- a/sys/contrib/openzfs/man/man8/zpool-attach.8
+++ b/sys/contrib/openzfs/man/man8/zpool-attach.8
@@ -27,7 +27,7 @@
 .\" Copyright 2017 Nexenta Systems, Inc.
 .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
 .\"
-.Dd November 8, 2023
+.Dd May 9, 2026
 .Dt ZPOOL-ATTACH 8
 .Os
 .
@@ -132,6 +132,35 @@ Waits until
 has finished resilvering or expanding before returning.
 .El
 .
+.Sh EXAMPLES
+.\" Example 1 is example 5 from zpool.8.
+.\" Make sure to update them bidirectionally
+.Ss Example 1 : No Making a non-mirrored ZFS Storage Pool mirrored
+The following command converts an existing single device
+.Ar sda
+into a mirror by attaching a second device to it,
+.Ar sdb .
+.Dl # Nm zpool Cm attach Ar tank Pa sda sdb
+.
+.Ss Example 2 : No Expanding a RAID-Z vdev with an additional disk
+The following command adds
+.Ar sdg
+to the existing
+.Ar raidz2-0
+vdev in
+.Ar tank ,
+turning a 6-wide RAID-Z2 into a 7-wide RAID-Z2:
+.Dl # Nm zpool Cm attach Ar tank raidz2-0 Pa sdg
+Progress is reported by
+.Nm zpool Cm status .
+The operation requires the
+.Sy raidz_expansion
+pool feature, and
+.Ar sdg
+must be at least as large as the smallest existing disk in the vdev.
+Old blocks keep their original data-to-parity ratio; only blocks written
+after the expansion use the new ratio.
+.
 .Sh SEE ALSO
 .Xr zpool-add 8 ,
 .Xr zpool-detach 8 ,
diff --git a/sys/contrib/openzfs/man/man8/zpool-events.8 b/sys/contrib/openzfs/man/man8/zpool-events.8
index 3753139bdfe..12a11058072 100644
--- a/sys/contrib/openzfs/man/man8/zpool-events.8
+++ b/sys/contrib/openzfs/man/man8/zpool-events.8
@@ -458,12 +458,12 @@ ZIO_STAGE_CHECKSUM_GENERATE:0x00000080:-W----
 
 ZIO_STAGE_NOP_WRITE:0x00000100:-W----
 
-ZIO_STAGE_BRT_FREE:0x00000200:--F---
+ZIO_STAGE_DDT_READ_START:0x00000200:R-----
+ZIO_STAGE_DDT_READ_DONE:0x00000400:R-----
+ZIO_STAGE_DDT_WRITE:0x00000800:-W----
+ZIO_STAGE_DDT_FREE:0x00001000:--F---
 
-ZIO_STAGE_DDT_READ_START:0x00000400:R-----
-ZIO_STAGE_DDT_READ_DONE:0x00000800:R-----
-ZIO_STAGE_DDT_WRITE:0x00001000:-W----
-ZIO_STAGE_DDT_FREE:0x00002000:--F---
+ZIO_STAGE_BRT_FREE:0x00002000:--F---
 
 ZIO_STAGE_GANG_ASSEMBLE:0x00004000:RWFC--
 ZIO_STAGE_GANG_ISSUE:0x00008000:RWFC--
diff --git a/sys/contrib/openzfs/man/man8/zpool-iostat.8 b/sys/contrib/openzfs/man/man8/zpool-iostat.8
index 4abe0895064..16d469849ee 100644
--- a/sys/contrib/openzfs/man/man8/zpool-iostat.8
+++ b/sys/contrib/openzfs/man/man8/zpool-iostat.8
@@ -109,10 +109,7 @@ environment variable set.
 If a script requires the use of a privileged command, like
 .Xr smartctl 8 ,
 then it's recommended you allow the user access to it in
-.Pa /etc/sudoers
-or add the user to the
-.Pa /etc/sudoers.d/zfs
-file.
+.Pa /etc/sudoers .
 .Pp
 If
 .Fl c
diff --git a/sys/contrib/openzfs/man/man8/zpool.8 b/sys/contrib/openzfs/man/man8/zpool.8
index 4b07f96bbcb..25dff473c30 100644
--- a/sys/contrib/openzfs/man/man8/zpool.8
+++ b/sys/contrib/openzfs/man/man8/zpool.8
@@ -245,6 +245,7 @@ Invalid command line options were specified.
 .
 .Sh EXAMPLES
 .\" Examples 1, 2, 3, 4, 12, 13 are shared with zpool-create.8.
+.\" Example 5 is shared with zpool-attach.8.
 .\" Examples 6, 14 are shared with zpool-add.8.
 .\" Examples 7, 16 are shared with zpool-list.8.
 .\" Examples 8 are shared with zpool-destroy.8.
diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in
index 47e739ea4d6..fa4085c84b0 100644
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@@ -4,9 +4,11 @@
 
 ZFS_MODULE_CFLAGS += -std=gnu11 -Wno-declaration-after-statement
 ZFS_MODULE_CFLAGS += -Wmissing-prototypes
-ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @KERNEL_NO_FORMAT_ZERO_LENGTH@
+ZFS_MODULE_CFLAGS += @KERNEL_NO_FORMAT_ZERO_LENGTH@
 
 ifneq ($(KBUILD_EXTMOD),)
+ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@
+ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
 zfs_include = @abs_top_srcdir@/include
 icp_include = @abs_srcdir@/icp/include
 zstd_include = @abs_srcdir@/zstd/include
@@ -16,6 +18,12 @@ ZFS_MODULE_CFLAGS += -I@abs_top_builddir@/include
 src = @abs_srcdir@
 obj = @abs_builddir@
 else
+ifeq ($(CONFIG_ZFS_DEBUG),y)
+ZFS_MODULE_CFLAGS += -Werror
+ZFS_MODULE_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
+else
+ZFS_MODULE_CPPFLAGS += -UDEBUG -DNDEBUG
+endif
 zfs_include = $(srctree)/include/zfs
 icp_include = $(src)/icp/include
 zstd_include = $(src)/zstd/include
@@ -28,7 +36,6 @@ ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/spl
 ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/zfs
 ZFS_MODULE_CFLAGS += -I$(zfs_include)
 ZFS_MODULE_CPPFLAGS += -D_KERNEL
-ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
 
 # KASAN enables -Werror=frame-larger-than=1024, which
 # breaks oh so many parts of our build.
@@ -408,6 +415,8 @@ ZFS_OBJS := \
 	vdev_root.o \
 	vdev_trim.o \
 	zap.o \
+	zap_fat.o \
+	zap_impl.o \
 	zap_leaf.o \
 	zap_micro.o \
 	zcp.o \
diff --git a/sys/contrib/openzfs/module/Makefile.bsd b/sys/contrib/openzfs/module/Makefile.bsd
index 30cf741b965..a0ddbeb9ae6 100644
--- a/sys/contrib/openzfs/module/Makefile.bsd
+++ b/sys/contrib/openzfs/module/Makefile.bsd
@@ -65,6 +65,12 @@ CFLAGS+= -DZFS_DEBUG -g
 CFLAGS += -DNDEBUG
 .endif
 
+.for _SAN in KASAN KMSAN KUBSAN
+.if defined(WITH_${_SAN}) && ${WITH_${_SAN}} == "true"
+KERN_OPTS_EXTRA+= ${_SAN}
+.endif
+.endfor
+
 .if defined(WITH_GCOV) && ${WITH_GCOV} == "true"
 CFLAGS+=	 -fprofile-arcs -ftest-coverage
 .endif
@@ -345,6 +351,8 @@ SRCS+=	abd.c \
 	vdev_root.c \
 	vdev_trim.c \
 	zap.c \
+	zap_fat.c \
+	zap_impl.c \
 	zap_leaf.c \
 	zap_micro.c \
 	zcp.c \
@@ -475,8 +483,8 @@ CFLAGS.vdev_raidz_math_avx2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
 CFLAGS.vdev_raidz_math_avx512f.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
 CFLAGS.vdev_raidz_math_scalar.c= -Wno-cast-qual
 CFLAGS.vdev_raidz_math_sse2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
+CFLAGS.zap_impl.c= -Wno-cast-qual
 CFLAGS.zap_leaf.c= -Wno-cast-qual
-CFLAGS.zap_micro.c= -Wno-cast-qual
 CFLAGS.zcp.c= -Wno-cast-qual
 CFLAGS.zfs_fletcher.c= -Wno-cast-qual -Wno-pointer-arith
 CFLAGS.zfs_fletcher_avx512.c= -Wno-cast-qual -Wno-pointer-arith
diff --git a/sys/contrib/openzfs/module/nvpair/nvpair.c b/sys/contrib/openzfs/module/nvpair/nvpair.c
index 07ac102145e..52678bb2bad 100644
--- a/sys/contrib/openzfs/module/nvpair/nvpair.c
+++ b/sys/contrib/openzfs/module/nvpair/nvpair.c
@@ -135,7 +135,8 @@
 #define	NVP_SIZE_CALC(name_len, data_len) \
 	(NV_ALIGN((sizeof (nvpair_t)) + name_len) + NV_ALIGN(data_len))
 
-static int i_get_value_size(data_type_t type, const void *data, uint_t nelem);
+static int i_get_value_size(data_type_t type, const void *data, uint_t nelem,
+    size_t max_size);
 static int nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type,
     uint_t nelem, const void *data);
 
@@ -810,8 +811,10 @@ i_validate_nvpair(nvpair_t *nvp)
 	 * verify nvp_type, nvp_value_elem, and also possibly
 	 * verify string values and get the value size.
 	 */
-	size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp));
 	size1 = nvp->nvp_size - NVP_VALOFF(nvp);
+	size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp),
+	    size1);
+
 	if (size2 < 0 || size1 != NV_ALIGN(size2))
 		return (EFAULT);
 
@@ -1002,12 +1005,21 @@ nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
  * 	DATA_TYPE_STRING    	and
  *	DATA_TYPE_STRING_ARRAY
  * Is data == NULL then the size of the string(s) is excluded.
+ *
+ * If 'max_size' is non-zero, then don't look beyond 'max_size' number of
+ * bytes when calculating a value size. Note that 'max_size' should include
+ * the NULL terminator byte when calculating string size.  If 'max_size' is 0,
+ * it is ignored.
  */
 static int
-i_get_value_size(data_type_t type, const void *data, uint_t nelem)
+i_get_value_size(data_type_t type, const void *data, uint_t nelem,
+    size_t max_size)
 {
 	uint64_t value_sz;
 
+	if (max_size == 0)
+		max_size = INT32_MAX;
+
 	if (i_validate_type_nelem(type, nelem) != 0)
 		return (-1);
 
@@ -1052,10 +1064,15 @@ i_get_value_size(data_type_t type, const void *data, uint_t nelem)
 		break;
 #endif
 	case DATA_TYPE_STRING:
-		if (data == NULL)
+		if (data == NULL) {
 			value_sz = 0;
-		else
-			value_sz = strlen(data) + 1;
+		} else {
+			value_sz = strnlen(data, max_size);
+			if (value_sz >= max_size) {
+				return (-1);	/* string not terminated */
+			}
+			value_sz += 1;
+		}
 		break;
 	case DATA_TYPE_BOOLEAN_ARRAY:
 		value_sz = (uint64_t)nelem * sizeof (boolean_t);
@@ -1089,16 +1106,23 @@ i_get_value_size(data_type_t type, const void *data, uint_t nelem)
 		break;
 	case DATA_TYPE_STRING_ARRAY:
 		value_sz = (uint64_t)nelem * sizeof (uint64_t);
-
 		if (data != NULL) {
 			char *const *strs = data;
 			uint_t i;
+			size_t newsize;
 
 			/* no alignment requirement for strings */
 			for (i = 0; i < nelem; i++) {
 				if (strs[i] == NULL)
 					return (-1);
-				value_sz += strlen(strs[i]) + 1;
+
+				newsize = strnlen(strs[i], max_size);
+
+				if (newsize == max_size)
+					return (-1);	/* not terminated */
+
+				value_sz += newsize + 1; /* +1 for NULL */
+				max_size -= newsize + 1;
 			}
 		}
 		break;
@@ -1163,7 +1187,7 @@ nvlist_add_common(nvlist_t *nvl, const char *name,
 	 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
 	 * is the size of the string(s) included.
 	 */
-	if ((value_sz = i_get_value_size(type, data, nelem)) < 0)
+	if ((value_sz = i_get_value_size(type, data, nelem, 0)) < 0)
 		return (EINVAL);
 
 	if (i_validate_nvpair_value(type, nelem, data) != 0)
@@ -1588,7 +1612,7 @@ nvpair_value_common(const nvpair_t *nvp, data_type_t type, uint_t *nelem,
 #endif
 		if (data == NULL)
 			return (EINVAL);
-		if ((value_sz = i_get_value_size(type, NULL, 1)) < 0)
+		if ((value_sz = i_get_value_size(type, NULL, 1, 0)) < 0)
 			return (EINVAL);
 		memcpy(data, NVP_VALUE(nvp), (size_t)value_sz);
 		if (nelem != NULL)
@@ -3019,7 +3043,8 @@ nvs_native_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
 	 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
 	 * is the size of the string(s) excluded.
 	 */
-	if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp))) < 0)
+	if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp),
+	    NVP_SIZE(nvp))) < 0)
 		return (EFAULT);
 
 	if (NVP_SIZE_CALC(nvp->nvp_name_sz, value_sz) > nvp->nvp_size)
@@ -3333,7 +3358,7 @@ nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
 	 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
 	 * is the size of the string(s) excluded.
 	 */
-	if ((value_sz = i_get_value_size(type, NULL, nelem)) < 0)
+	if ((value_sz = i_get_value_size(type, NULL, nelem, NVP_SIZE(nvp)) < 0))
 		return (EFAULT);
 
 	/* if there is no data to extract then return */
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
index 02a2870c02b..7cb390cab23 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
@@ -72,9 +72,6 @@ SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, free_target,
     param_set_arc_free_target, 0, CTLFLAG_RW,
 	"Desired number of free pages below which ARC triggers reclaim");
-ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, no_grow_shift,
-    param_set_arc_no_grow_shift, 0, ZMOD_RW,
-	"log2(fraction of ARC which must be free to allow growing)");
 
 int64_t
 arc_available_memory(void)
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
index 22498bb721e..447aa5f8300 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -292,7 +292,7 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
 {
 	int err, val;
 
-	val = arc_no_grow_shift;
+	val = zfs_arc_no_grow_shift;
 	err = sysctl_handle_int(oidp, &val, 0, req);
 	if (err != 0 || req->newptr == NULL)
 		return (err);
@@ -300,7 +300,7 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
 	if (val < 0 || val >= arc_shrink_shift)
 		return (EINVAL);
 
-	arc_no_grow_shift = val;
+	zfs_arc_no_grow_shift = val;
 
 	if (arg2 != 0)
 		warn_deprecated_sysctl("arc_no_grow_shift", "arc.no_grow_shift");
@@ -541,14 +541,14 @@ SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log,
 
 /*
  * The in-core space map representation is more compact than its on-disk form.
- * The zfs_condense_pct determines how much more compact the in-core
+ * The zfs_metaslab_condense_pct determines how much more compact the in-core
  * space map representation must be before we compact it on-disk.
  * Values should be greater than or equal to 100.
  */
-extern uint_t zfs_condense_pct;
+extern uint_t zfs_metaslab_condense_pct;
 
-SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct,
-	CTLFLAG_RWTUN, &zfs_condense_pct, 0,
+SYSCTL_UINT(_vfs_zfs, OID_AUTO, metaslab_condense_pct,
+	CTLFLAG_RWTUN, &zfs_metaslab_condense_pct, 0,
 	"Condense on-disk spacemap when it is more than this many percents"
 	" of in-memory counterpart");
 
@@ -617,18 +617,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval,
 	"Configuration cache file write, retry after failure, interval"
 	" (seconds)");
 
-extern uint64_t zfs_max_missing_tvds_cachefile;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile,
-	CTLFLAG_RWTUN, &zfs_max_missing_tvds_cachefile, 0,
-	"Allow importing pools with missing top-level vdevs in cache file");
-
-extern uint64_t zfs_max_missing_tvds_scan;
-
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan,
-	CTLFLAG_RWTUN, &zfs_max_missing_tvds_scan, 0,
-	"Allow importing pools with missing top-level vdevs during scan");
-
 /* spa_misc.c */
 
 extern int zfs_flags;
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c
index 9fe4042b507..6e340261980 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c
@@ -188,6 +188,12 @@ spl_kvmalloc(size_t size, gfp_t lflags)
 		return (ptr);
 	}
 
+	/*
+	 * vmalloc fallback. KM_VMEM may not have been requested originally if
+	 * we've come through spl_kmem_alloc_impl(), so we need to remove
+	 * __GFP_COMP, which is not a valid flag for vmalloc.
+	 */
+	lflags &= ~__GFP_COMP;
 	return (spl_vmalloc(size, lflags));
 }
 
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
index dbc9aad936b..05f4fb51b4b 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
@@ -410,6 +410,24 @@ param_set_arc_int(const char *buf, zfs_kernel_param_t *kp)
 	return (0);
 }
 
+int
+param_set_arc_no_grow_shift(const char *buf, zfs_kernel_param_t *kp)
+{
+	unsigned long val;
+	int error;
+
+	error = kstrtoul(buf, 0, &val);
+	if (error)
+		return (SET_ERROR(error));
+
+	if (val >= arc_shrink_shift)
+		return (-SET_ERROR(EINVAL));
+
+	zfs_arc_no_grow_shift = val;
+
+	return (0);
+}
+
 int
 param_set_l2arc_dwpd_limit(const char *buf, zfs_kernel_param_t *kp)
 {
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
index 66e10584ab5..7cc19fe5afb 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
@@ -931,8 +931,14 @@ vdev_disk_io_rw(zio_t *zio)
 		return (SET_ERROR(EIO));
 	}
 
+	vdev_t *iter = v;
+	while (iter != NULL && iter->vdev_failfast == ZPROP_BOOLEAN_INHERIT)
+		iter = iter->vdev_parent;
+
+	boolean_t failfast = iter ? iter->vdev_failfast == 1 :
+	    vdev_prop_default_numeric(VDEV_PROP_FAILFAST);
 	if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
-	    v->vdev_failfast == B_TRUE) {
+	    failfast) {
 		bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
 		    zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
 	}
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
index d7b50242992..27f3bbb46f4 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -1689,6 +1689,24 @@ zfs_suspend_fs(zfsvfs_t *zfsvfs)
 	return (0);
 }
 
+/*
+ * Return a referenced znode at or after zp.  The z_znodes_lock protects the
+ * list walk; the returned inode reference keeps the znode alive after the
+ * lock is dropped for zfs_rezget().
+ */
+static znode_t *
+zfs_resume_hold_next_znode(zfsvfs_t *zfsvfs, znode_t *zp)
+{
+	ASSERT(MUTEX_HELD(&zfsvfs->z_znodes_lock));
+
+	for (; zp != NULL; zp = list_next(&zfsvfs->z_all_znodes, zp)) {
+		if (igrab(ZTOI(zp)) != NULL)
+			return (zp);
+	}
+
+	return (NULL);
+}
+
 /*
  * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
  * is an invariant across any of the operations that can be performed while the
@@ -1732,13 +1750,23 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
 	 * dbufs.  If a zfs_rezget() fails, then we unhash the inode
 	 * and mark it stale.  This prevents a collision if a new
 	 * inode/object is created which must use the same inode
-	 * number.  The stale inode will be be released when the
-	 * VFS prunes the dentry holding the remaining references
-	 * on the stale inode.
+	 * number.  The stale inode will be released when the VFS
+	 * prunes the dentry holding the remaining references on
+	 * the stale inode.
+	 *
+	 * zfs_rezget() takes the per-object znode hold lock.  Pin each znode
+	 * while holding z_znodes_lock, then drop the list lock before calling
+	 * zfs_rezget() to preserve the normal zh_lock -> z_znodes_lock order.
 	 */
 	mutex_enter(&zfsvfs->z_znodes_lock);
-	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
-	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
+	zp = zfs_resume_hold_next_znode(zfsvfs,
+	    list_head(&zfsvfs->z_all_znodes));
+	while (zp != NULL) {
+		znode_t *next = zfs_resume_hold_next_znode(zfsvfs,
+		    list_next(&zfsvfs->z_all_znodes, zp));
+
+		mutex_exit(&zfsvfs->z_znodes_lock);
+
 		err2 = zfs_rezget(zp);
 		if (err2) {
 			zpl_d_drop_aliases(ZTOI(zp));
@@ -1747,9 +1775,14 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
 
 		/* see comment in zfs_suspend_fs() */
 		if (zp->z_suspended) {
-			zfs_zrele_async(zp);
 			zp->z_suspended = B_FALSE;
+			zfs_zrele_async(zp);
 		}
+
+		zfs_zrele_async(zp);
+
+		mutex_enter(&zfsvfs->z_znodes_lock);
+		zp = next;
 	}
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index e65f8123012..d6dad70ae09 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -2434,9 +2434,13 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
 	    &zp->z_pflags, sizeof (zp->z_pflags));
 
 	if (attrzp) {
+		/*
+		 * attrzp is zp's hidden xattr directory, so the second
+		 * znode lock acquisition is nested rather than recursive.
+		 */
 		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
-			mutex_enter(&attrzp->z_acl_lock);
-		mutex_enter(&attrzp->z_lock);
+			mutex_enter_nested(&attrzp->z_acl_lock, NESTED_SINGLE);
+		mutex_enter_nested(&attrzp->z_lock, NESTED_SINGLE);
 		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
 		    sizeof (attrzp->z_pflags));
@@ -4074,18 +4078,32 @@ zfs_inactive(struct inode *ip)
 {
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
+	krwlock_t *zti_lock = &zfsvfs->z_teardown_inactive_lock;
 	uint64_t atime[2];
 	int error;
 	int need_unlock = 0;
+	boolean_t no_lockdep = B_FALSE;
 
 	/* Only read lock if we haven't already write locked, e.g. rollback */
-	if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {
+	if (!RW_WRITE_HELD(zti_lock)) {
 		need_unlock = 1;
-		rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
+		/*
+		 * kswapd reaches evict_inode() with fs_reclaim held.  Suppress
+		 * lockdep only for this reclaim-thread acquire/release pair.
+		 */
+		no_lockdep = current_is_reclaim_thread();
+		if (no_lockdep)
+			rw_enter_nolockdep(zti_lock, RW_READER);
+		else
+			rw_enter(zti_lock, RW_READER);
 	}
 	if (zp->z_sa_hdl == NULL) {
-		if (need_unlock)
-			rw_exit(&zfsvfs->z_teardown_inactive_lock);
+		if (need_unlock) {
+			if (no_lockdep)
+				rw_exit_nolockdep(zti_lock);
+			else
+				rw_exit(zti_lock);
+		}
 		return;
 	}
 
@@ -4111,8 +4129,12 @@ zfs_inactive(struct inode *ip)
 	}
 
 	zfs_zinactive(zp);
-	if (need_unlock)
-		rw_exit(&zfsvfs->z_teardown_inactive_lock);
+	if (need_unlock) {
+		if (no_lockdep)
+			rw_exit_nolockdep(zti_lock);
+		else
+			rw_exit(zti_lock);
+	}
 }
 
 /*
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
index 2cd0f17c860..d7194e4f1f7 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
@@ -550,10 +550,11 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg)
  *
  * Finally, all filesystems get automatic handling for the 'source' option,
  * that is, the "name" of the filesystem (the first column of df(1)'s output).
- * However, this only happens if the handler does not otherwise handle
- * the 'source' option. Since we handle _all_ options because of 'sloppy', we
- * deal with this explicitly by calling into the kernel's helper for this,
- * vfs_parse_fs_param_source(), which sets up fc->source.
+ * However, this only happens if the handler does not otherwise handle the
+ * 'source' option. Since we handle _all_ options because of 'sloppy', we have
+ * ot handle it ourselves. Normally we would call vfs_parse_fs_param_source()
+ * to deal with this, but that didn't appear until 5.14, and it's small enough
+ * that we can just handle it ourselves.
  *
  *	source
  *
@@ -565,6 +566,7 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg)
  */
 
 enum {
+	Opt_source,
 	Opt_exec, Opt_suid, Opt_dev,
 	Opt_atime, Opt_relatime, Opt_strictatime,
 	Opt_saxattr, Opt_dirxattr, Opt_noxattr,
@@ -574,6 +576,8 @@ enum {
 };
 
 static const struct fs_parameter_spec zpl_param_spec[] = {
+	fsparam_string("source",	Opt_source),
+
 	fsparam_flag_no("exec",		Opt_exec),
 	fsparam_flag_no("suid",		Opt_suid),
 	fsparam_flag_no("dev",		Opt_dev),
@@ -609,18 +613,34 @@ static const struct fs_parameter_spec zpl_param_spec[] = {
 	{}
 };
 
+/*
+ * Before 5.6, fs_parse() took a struct fs_parameter_description
+ * which wraps the parameter specs with name and enum pointers. From 5.6,
+ * the description struct was removed and fs_parse() accepts the
+ * fs_parameter_spec directly.
+ */
+static int
+zpl_fs_parse(struct fs_context *fc, struct fs_parameter *param,
+	struct fs_parse_result *result)
+{
+#ifdef HAVE_FS_PARSE_TAKES_SPEC
+	return (fs_parse(fc, zpl_param_spec, param, result));
+#else
+	static const struct fs_parameter_description zpl_param_desc = {
+		.name = "zfs",
+		.specs = zpl_param_spec,
+	};
+	return (fs_parse(fc, &zpl_param_desc, param, result));
+#endif
+}
+
 static int
 zpl_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
 	vfs_t *vfs = fc->fs_private;
 
-	/* Handle 'source' explicitly so we don't trip on it as an unknown. */
-	int opt = vfs_parse_fs_param_source(fc, param);
-	if (opt != -ENOPARAM)
-		return (opt);
-
 	struct fs_parse_result result;
-	opt = fs_parse(fc, zpl_param_spec, param, &result);
+	int opt = zpl_fs_parse(fc, param, &result);
 	if (opt == -ENOPARAM) {
 		/*
 		 * Convert unknowns to warnings, to work around the whole
@@ -632,6 +652,16 @@ zpl_parse_param(struct fs_context *fc, struct fs_parameter *param)
 		return (opt);
 
 	switch (opt) {
+	case Opt_source:
+		if (fc->source != NULL) {
+			cmn_err(CE_NOTE,
+			    "ZFS: multiple 'source' options not supported");
+			return (-SET_ERROR(EINVAL));
+		}
+		fc->source = param->string;
+		param->string = NULL;
+		break;
+
 	case Opt_exec:
 		vfs->vfs_exec = !result.negated;
 		vfs->vfs_do_exec = B_TRUE;
@@ -794,7 +824,7 @@ zpl_parse_monolithic(struct fs_context *fc, void *data)
 
 		/* Check if this is one of our options. */
 		struct fs_parse_result result;
-		int opt = fs_parse(fc, zpl_param_spec, &param, &result);
+		int opt = zpl_fs_parse(fc, &param, &result);
 		if (opt >= 0) {
 			/*
 			 * We already know this one of our options, so a
@@ -874,9 +904,14 @@ zpl_get_tree(struct fs_context *fc)
 	if (sb->s_root == NULL) {
 		vfs_t *vfs = fc->fs_private;
 
-		/* Apply readonly flag as mount option */
-		if (fc->sb_flags & SB_RDONLY) {
-			vfs->vfs_readonly = B_TRUE;
+		/*
+		 * If SB_RDONLY was set/cleared from mount options, update
+		 * them in the options struct so we set up the filesystem
+		 * in the proper state.
+		 */
+		if (fc->sb_flags_mask & SB_RDONLY) {
+			vfs->vfs_readonly =
+			    (fc->sb_flags & SB_RDONLY) ? B_TRUE : B_FALSE;
 			vfs->vfs_do_readonly = B_TRUE;
 		}
 
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
index d93282db815..68050c870de 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
@@ -701,6 +701,24 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value,
  * ZFS allows extended user attributes to be disabled administratively
  * by setting the 'xattr=off' property on the dataset.
  */
+
+/*
+ * Concatenate prefix + name into a NUL-terminated stack buffer.
+ * Linux fs/xattr.c (import_xattr_name) caps the full xattr name at
+ * XATTR_NAME_MAX before any handler runs, so XATTR_NAME_MAX + 1
+ * bytes always fit.
+ */
+static inline void
+zpl_xattr_join_name(char *buf, size_t buflen, const char *prefix,
+    size_t prefix_len, const char *name, size_t name_len)
+{
+	ASSERT3U(prefix_len + name_len + 1, <=, buflen);
+
+	memcpy(buf, prefix, prefix_len);
+	memcpy(buf + prefix_len, name, name_len);
+	buf[prefix_len + name_len] = '\0';
+}
+
 static int
 __zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,
     const char *name, size_t name_len)
@@ -726,9 +744,13 @@ __zpl_xattr_user_get(struct inode *ip, const char *name,
 	 * try again without the namespace prefix for compatibility with
 	 * other platforms.
 	 */
-	char *xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
+	char xattr_name[XATTR_NAME_MAX + 1];
+
+	zpl_xattr_join_name(xattr_name, sizeof (xattr_name),
+	    XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN,
+	    name, strlen(name));
+
 	error = zpl_xattr_get(ip, xattr_name, value, size);
-	kmem_strfree(xattr_name);
 	if (error == -ENODATA)
 		error = zpl_xattr_get(ip, name, value, size);
 
@@ -758,8 +780,13 @@ __zpl_xattr_user_set(zidmap_t *user_ns,
 	 *   XATTR_CREATE: fail if xattr already exists
 	 *   XATTR_REPLACE: fail if xattr does not exist
 	 */
-	char *prefixed_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
+	char prefixed_name[XATTR_NAME_MAX + 1];
 	const char *clear_name, *set_name;
+
+	zpl_xattr_join_name(prefixed_name, sizeof (prefixed_name),
+	    XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN,
+	    name, strlen(name));
+
 	if (zfs_xattr_compat) {
 		clear_name = prefixed_name;
 		set_name = name;
@@ -776,7 +803,7 @@ __zpl_xattr_user_set(zidmap_t *user_ns,
 	 * because it already exists.  Stop here.
 	 */
 	if (error == -EEXIST)
-		goto out;
+		return (error);
 	/*
 	 * If XATTR_REPLACE was specified and we succeeded to clear
 	 * an xattr, we don't need to replace anything when setting
@@ -788,10 +815,7 @@ __zpl_xattr_user_set(zidmap_t *user_ns,
 	/*
 	 * Set the new value with the configured name format.
 	 */
-	error = zpl_xattr_set(ip, set_name, value, size, flags);
-out:
-	kmem_strfree(prefixed_name);
-	return (error);
+	return (zpl_xattr_set(ip, set_name, value, size, flags));
 }
 ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
 
@@ -824,17 +848,16 @@ static int
 __zpl_xattr_trusted_get(struct inode *ip, const char *name,
     void *value, size_t size)
 {
-	char *xattr_name;
-	int error;
+	char xattr_name[XATTR_NAME_MAX + 1];
 
 	if (!capable(CAP_SYS_ADMIN))
 		return (-EACCES);
-	/* xattr_resolve_name will do this for us if this is defined */
-	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
-	error = zpl_xattr_get(ip, xattr_name, value, size);
-	kmem_strfree(xattr_name);
 
-	return (error);
+	zpl_xattr_join_name(xattr_name, sizeof (xattr_name),
+	    XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN,
+	    name, strlen(name));
+
+	return (zpl_xattr_get(ip, xattr_name, value, size));
 }
 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
 
@@ -844,17 +867,16 @@ __zpl_xattr_trusted_set(zidmap_t *user_ns,
     const void *value, size_t size, int flags)
 {
 	(void) user_ns;
-	char *xattr_name;
-	int error;
+	char xattr_name[XATTR_NAME_MAX + 1];
 
 	if (!capable(CAP_SYS_ADMIN))
 		return (-EACCES);
-	/* xattr_resolve_name will do this for us if this is defined */
-	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
-	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
-	kmem_strfree(xattr_name);
 
-	return (error);
+	zpl_xattr_join_name(xattr_name, sizeof (xattr_name),
+	    XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN,
+	    name, strlen(name));
+
+	return (zpl_xattr_set(ip, xattr_name, value, size, flags));
 }
 ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
 
@@ -889,14 +911,13 @@ static int
 __zpl_xattr_security_get(struct inode *ip, const char *name,
     void *value, size_t size)
 {
-	char *xattr_name;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
-	error = zpl_xattr_get(ip, xattr_name, value, size);
-	kmem_strfree(xattr_name);
+	char xattr_name[XATTR_NAME_MAX + 1];
 
-	return (error);
+	zpl_xattr_join_name(xattr_name, sizeof (xattr_name),
+	    XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN,
+	    name, strlen(name));
+
+	return (zpl_xattr_get(ip, xattr_name, value, size));
 }
 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
 
@@ -906,14 +927,13 @@ __zpl_xattr_security_set(zidmap_t *user_ns,
     const void *value, size_t size, int flags)
 {
 	(void) user_ns;
-	char *xattr_name;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
-	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
-	kmem_strfree(xattr_name);
+	char xattr_name[XATTR_NAME_MAX + 1];
 
-	return (error);
+	zpl_xattr_join_name(xattr_name, sizeof (xattr_name),
+	    XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN,
+	    name, strlen(name));
+
+	return (zpl_xattr_set(ip, xattr_name, value, size, flags));
 }
 ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
 
diff --git a/sys/contrib/openzfs/module/zcommon/zfs_valstr.c b/sys/contrib/openzfs/module/zcommon/zfs_valstr.c
index 0cb9f584acc..41a2313e575 100644
--- a/sys/contrib/openzfs/module/zcommon/zfs_valstr.c
+++ b/sys/contrib/openzfs/module/zcommon/zfs_valstr.c
@@ -238,11 +238,11 @@ _VALSTR_BITFIELD_IMPL(zio_stage,
 	{ 'E', "EN", "ENCRYPT" },
 	{ 'C', "CG", "CHECKSUM_GENERATE" },
 	{ 'N', "NW", "NOP_WRITE" },
-	{ 'B', "BF", "BRT_FREE" },
 	{ 'd', "dS", "DDT_READ_START" },
 	{ 'd', "dD", "DDT_READ_DONE" },
 	{ 'd', "dW", "DDT_WRITE" },
 	{ 'd', "dF", "DDT_FREE" },
+	{ 'B', "BF", "BRT_FREE" },
 	{ 'G', "GA", "GANG_ASSEMBLE" },
 	{ 'G', "GI", "GANG_ISSUE" },
 	{ 'D', "DT", "DVA_THROTTLE" },
diff --git a/sys/contrib/openzfs/module/zcommon/zpool_prop.c b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
index ee86fe0c717..09f5c88d8fb 100644
--- a/sys/contrib/openzfs/module/zcommon/zpool_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
@@ -374,10 +374,16 @@ vdev_prop_init(void)
 		{ "on",		1},
 		{ NULL }
 	};
+	static const zprop_index_t boolean_inherit_table[] = {
+		{ "off",	0},
+		{ "on",		1},
+		{ "inherit",	ZPROP_BOOLEAN_INHERIT},
+		{ NULL }
+	};
 	static const zprop_index_t boolean_na_table[] = {
 		{ "off",	0},
 		{ "on",		1},
-		{ "-",		2},	/* ZPROP_BOOLEAN_NA */
+		{ "-",		ZPROP_BOOLEAN_NA},
 		{ NULL }
 	};
 
@@ -388,6 +394,14 @@ vdev_prop_init(void)
 		{ NULL }
 	};
 
+	static const zprop_index_t vdev_alloc_bias_table[] = {
+		{ "none",	VDEV_BIAS_NONE },
+		{ "log",	VDEV_BIAS_LOG },
+		{ "special",	VDEV_BIAS_SPECIAL },
+		{ "dedup",	VDEV_BIAS_DEDUP },
+		{ NULL }
+	};
+
 	struct zfs_mod_supported_features *sfeatures =
 	    zfs_mod_list_supported(ZFS_SYSFS_VDEV_PROPERTIES);
 
@@ -547,8 +561,8 @@ vdev_prop_init(void)
 
 	/* default index properties */
 	zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
-	    PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
-	    sfeatures);
+	    PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off | inherit", "FAILFAST",
+	    boolean_inherit_table, sfeatures);
 	zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
 	    B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
 	    "SLOW_IO_EVENTS", boolean_table, sfeatures);
@@ -556,6 +570,13 @@ vdev_prop_init(void)
 	    VDEV_SCHEDULER_AUTO, PROP_DEFAULT, ZFS_TYPE_VDEV,
 	    "auto | on | off", "IO_SCHEDULER",
 	    vdevschedulertype_table, sfeatures);
+	zprop_register_index(VDEV_PROP_ALLOC_BIAS, "alloc_bias",
+	    VDEV_BIAS_NONE, PROP_DEFAULT, ZFS_TYPE_VDEV,
+	    "none | log | special | dedup", "ALLOC_BIAS",
+	    vdev_alloc_bias_table, sfeatures);
+	zprop_register_index(VDEV_PROP_ROTATIONAL, "rotational", 0,
+	    PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "ROTATIONAL",
+	    boolean_table, sfeatures);
 
 	/* hidden properties */
 	zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
index c28cb9114dd..3e76884c557 100644
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -398,14 +398,14 @@ uint_t zfs_arc_pc_percent = 0;
 
 /*
  * log2(fraction of ARC which must be free to allow growing).
- * I.e. If there is less than arc_c >> arc_no_grow_shift free memory,
+ * I.e. If there is less than arc_c >> zfs_arc_no_grow_shift free memory,
  * when reading a new block into the ARC, we will evict an equal-sized block
  * from the ARC.
  *
  * This must be less than arc_shrink_shift, so that when we shrink the ARC,
  * we will still not allow it to grow.
  */
-uint_t		arc_no_grow_shift = 5;
+uint_t		zfs_arc_no_grow_shift = 5;
 
 
 /*
@@ -586,6 +586,7 @@ arc_stats_t arc_stats = {
 	{ "uncached_metadata",		KSTAT_DATA_UINT64 },
 	{ "uncached_evictable_data",	KSTAT_DATA_UINT64 },
 	{ "uncached_evictable_metadata", KSTAT_DATA_UINT64 },
+	{ "l2_ndev",			KSTAT_DATA_UINT64 },
 	{ "l2_hits",			KSTAT_DATA_UINT64 },
 	{ "l2_misses",			KSTAT_DATA_UINT64 },
 	{ "l2_prefetch_asize",		KSTAT_DATA_UINT64 },
@@ -4975,7 +4976,7 @@ arc_reap_cb_check(void *arg, zthr_t *zthr)
 		 */
 		arc_growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
 		return (B_TRUE);
-	} else if (free_memory < arc_c >> arc_no_grow_shift) {
+	} else if (free_memory < arc_c >> zfs_arc_no_grow_shift) {
 		arc_no_grow = B_TRUE;
 	} else if (gethrtime() >= arc_growtime) {
 		arc_no_grow = B_FALSE;
@@ -5571,20 +5572,6 @@ arc_buf_access(arc_buf_t *buf)
 	    !HDR_ISTYPE_METADATA(hdr), data, metadata, hits);
 }
 
-/* a generic arc_read_done_func_t which you can use */
-void
-arc_bcopy_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
-    arc_buf_t *buf, void *arg)
-{
-	(void) zio, (void) zb, (void) bp;
-
-	if (buf == NULL)
-		return;
-
-	memcpy(arg, buf->b_data, arc_buf_size(buf));
-	arc_buf_destroy(buf, arg);
-}
-
 /* a generic arc_read_done_func_t */
 void
 arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
@@ -7440,6 +7427,7 @@ arc_kstat_update(kstat_t *ksp, int rw)
 	    aggsum_value(&arc_sums.arcstat_dnode_size);
 	as->arcstat_bonus_size.value.ui64 =
 	    wmsum_value(&arc_sums.arcstat_bonus_size);
+	as->arcstat_l2_ndev.value.ui64 = l2arc_ndev;
 	as->arcstat_l2_hits.value.ui64 =
 	    wmsum_value(&arc_sums.arcstat_l2_hits);
 	as->arcstat_l2_misses.value.ui64 =
@@ -7654,7 +7642,8 @@ arc_tuning_update(boolean_t verbose)
 	/* Valid range: 1 - N */
 	if (zfs_arc_shrink_shift) {
 		arc_shrink_shift = zfs_arc_shrink_shift;
-		arc_no_grow_shift = MIN(arc_no_grow_shift, arc_shrink_shift -1);
+		zfs_arc_no_grow_shift = MIN(zfs_arc_no_grow_shift,
+		    arc_shrink_shift - 1);
 	}
 
 	/* Valid range: 1 - N ms */
@@ -11683,6 +11672,7 @@ EXPORT_SYMBOL(arc_write);
 EXPORT_SYMBOL(arc_read);
 EXPORT_SYMBOL(arc_buf_info);
 EXPORT_SYMBOL(arc_getbuf_func);
+EXPORT_SYMBOL(arc_buf_destroy);
 EXPORT_SYMBOL(arc_add_prune_callback);
 EXPORT_SYMBOL(arc_remove_prune_callback);
 
@@ -11701,6 +11691,10 @@ ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, grow_retry, param_set_arc_int,
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, shrink_shift, param_set_arc_int,
 	param_get_uint, ZMOD_RW, "log2(fraction of ARC to reclaim)");
 
+ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, no_grow_shift,
+	param_set_arc_no_grow_shift, param_get_uint, ZMOD_RW,
+	"log2(fraction of ARC which must be free to allow growing)");
+
 #ifdef _KERNEL
 ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, pc_percent, UINT, ZMOD_RW,
 	"Percent of pagecache to reclaim ARC to");
diff --git a/sys/contrib/openzfs/module/zfs/ddt_log.c b/sys/contrib/openzfs/module/zfs/ddt_log.c
index 51ce8b9a084..7e699a9b425 100644
--- a/sys/contrib/openzfs/module/zfs/ddt_log.c
+++ b/sys/contrib/openzfs/module/zfs/ddt_log.c
@@ -221,7 +221,7 @@ ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx, ddt_log_update_t *dlu)
 	uint64_t length = nblocks * dlu->dlu_dn->dn_datablksz;
 
 	VERIFY0(dmu_buf_hold_array_by_dnode(dlu->dlu_dn, offset, length,
-	    B_FALSE, FTAG, &dlu->dlu_ndbp, &dlu->dlu_dbp,
+	    B_FALSE, dlu, &dlu->dlu_ndbp, &dlu->dlu_dbp,
 	    DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO));
 
 	dlu->dlu_tx = tx;
@@ -338,7 +338,7 @@ ddt_log_commit(ddt_t *ddt, ddt_log_update_t *dlu)
 	 */
 	dmu_buf_fill_done(dlu->dlu_dbp[dlu->dlu_block], dlu->dlu_tx, B_FALSE);
 
-	dmu_buf_rele_array(dlu->dlu_dbp, dlu->dlu_ndbp, FTAG);
+	dmu_buf_rele_array(dlu->dlu_dbp, dlu->dlu_ndbp, dlu);
 
 	ddt->ddt_log_active->ddl_length +=
 	    dlu->dlu_ndbp * (uint64_t)dlu->dlu_dn->dn_datablksz;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c
index 4919ead3cea..654afe2f844 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
@@ -1859,7 +1859,7 @@ do_userquota_cacheflush(objset_t *os, userquota_cache_t *cache, dmu_tx_t *tx)
 	    &cookie)) != NULL) {
 		/*
 		 * os_userused_lock protects against concurrent calls to
-		 * zap_increment_int().  It's needed because zap_increment_int()
+		 * zap_increment().  It's needed because zap_increment()
 		 * is not thread-safe (i.e. not atomic).
 		 */
 		mutex_enter(&os->os_userused_lock);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c
index fa18a2056bb..74874bb65d3 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_recv.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c
@@ -2901,16 +2901,20 @@ receive_read_record(dmu_recv_cookie_t *drc)
 	{
 		struct drr_object *drro =
 		    &drc->drc_rrd->header.drr_u.drr_object;
-		uint32_t size = DRR_OBJECT_PAYLOAD_SIZE(drro);
+		uint32_t size;
 		void *buf = NULL;
 		dmu_object_info_t doi;
 
+		size = DRR_OBJECT_PAYLOAD_SIZE(drro);
+		if (size > SPA_MAXBLOCKSIZE)
+			return (SET_ERROR(ERANGE));
+
 		if (size != 0)
-			buf = kmem_zalloc(size, KM_SLEEP);
+			buf = vmem_zalloc(size, KM_SLEEP);
 
 		err = receive_read_payload_and_next_header(drc, size, buf);
 		if (err != 0) {
-			kmem_free(buf, size);
+			vmem_free(buf, size);
 			return (err);
 		}
 		err = dmu_object_info(drc->drc_os, drro->drr_object, &doi);
@@ -2934,7 +2938,11 @@ receive_read_record(dmu_recv_cookie_t *drc)
 	case DRR_WRITE:
 	{
 		struct drr_write *drrw = &drc->drc_rrd->header.drr_u.drr_write;
-		int size = DRR_WRITE_PAYLOAD_SIZE(drrw);
+		uint64_t size = DRR_WRITE_PAYLOAD_SIZE(drrw);
+
+		if (size > SPA_MAXBLOCKSIZE)
+			return (SET_ERROR(ERANGE));
+
 		abd_t *abd = abd_alloc_linear(size, B_FALSE);
 		err = receive_read_payload_and_next_header(drc, size,
 		    abd_to_buf(abd));
@@ -2951,12 +2959,18 @@ receive_read_record(dmu_recv_cookie_t *drc)
 	{
 		struct drr_write_embedded *drrwe =
 		    &drc->drc_rrd->header.drr_u.drr_write_embedded;
-		uint32_t size = P2ROUNDUP(drrwe->drr_psize, 8);
-		void *buf = kmem_zalloc(size, KM_SLEEP);
+		uint32_t size;
+		void *buf;
+
+		size = P2ROUNDUP(drrwe->drr_psize, 8);
+		if (size > SPA_MAXBLOCKSIZE)
+			return (SET_ERROR(ERANGE));
+
+		buf = vmem_zalloc(size, KM_SLEEP);
 
 		err = receive_read_payload_and_next_header(drc, size, buf);
 		if (err != 0) {
-			kmem_free(buf, size);
+			vmem_free(buf, size);
 			return (err);
 		}
 
@@ -2985,7 +2999,11 @@ receive_read_record(dmu_recv_cookie_t *drc)
 	case DRR_SPILL:
 	{
 		struct drr_spill *drrs = &drc->drc_rrd->header.drr_u.drr_spill;
-		int size = DRR_SPILL_PAYLOAD_SIZE(drrs);
+		uint64_t size = DRR_SPILL_PAYLOAD_SIZE(drrs);
+
+		if (size > SPA_MAXBLOCKSIZE)
+			return (SET_ERROR(ERANGE));
+
 		abd_t *abd = abd_alloc_linear(size, B_FALSE);
 		err = receive_read_payload_and_next_header(drc, size,
 		    abd_to_buf(abd));
@@ -3136,7 +3154,7 @@ receive_process_record(struct receive_writer_arg *rwa,
 			abd_free(rrd->abd);
 			rrd->abd = NULL;
 		} else if (rrd->payload != NULL) {
-			kmem_free(rrd->payload, rrd->payload_size);
+			vmem_free(rrd->payload, rrd->payload_size);
 			rrd->payload = NULL;
 		}
 		return (0);
@@ -3150,7 +3168,7 @@ receive_process_record(struct receive_writer_arg *rwa,
 				rrd->abd = NULL;
 				rrd->payload = NULL;
 			} else if (rrd->payload != NULL) {
-				kmem_free(rrd->payload, rrd->payload_size);
+				vmem_free(rrd->payload, rrd->payload_size);
 				rrd->payload = NULL;
 			}
 
@@ -3163,7 +3181,7 @@ receive_process_record(struct receive_writer_arg *rwa,
 	{
 		struct drr_object *drro = &rrd->header.drr_u.drr_object;
 		err = receive_object(rwa, drro, rrd->payload);
-		kmem_free(rrd->payload, rrd->payload_size);
+		vmem_free(rrd->payload, rrd->payload_size);
 		rrd->payload = NULL;
 		break;
 	}
@@ -3201,7 +3219,7 @@ receive_process_record(struct receive_writer_arg *rwa,
 		struct drr_write_embedded *drrwe =
 		    &rrd->header.drr_u.drr_write_embedded;
 		err = receive_write_embedded(rwa, drrwe, rrd->payload);
-		kmem_free(rrd->payload, rrd->payload_size);
+		vmem_free(rrd->payload, rrd->payload_size);
 		rrd->payload = NULL;
 		break;
 	}
@@ -3270,7 +3288,7 @@ receive_writer_thread(void *arg)
 			rrd->abd = NULL;
 			rrd->payload = NULL;
 		} else if (rrd->payload != NULL) {
-			kmem_free(rrd->payload, rrd->payload_size);
+			vmem_free(rrd->payload, rrd->payload_size);
 			rrd->payload = NULL;
 		}
 		/*
diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c
index 4c354722e4f..d931d9432f0 100644
--- a/sys/contrib/openzfs/module/zfs/dmu_send.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_send.c
@@ -2241,6 +2241,37 @@ setup_send_progress(struct dmu_send_params *dspp)
 	return (dssp);
 }
 
+/*
+ * Payloads must be multiples of 8 bytes for historical compatibility, but
+ * XDR-encoded nvlists are sized in multiples of 4 bytes and may need padding.
+ *
+ * Here we do the simplest possible thing and copy the data to a separate
+ * buffer. Not ideal in terms of performance and memory use, but most BEGIN
+ * nvlists are small or absent, the allocation is momentary, and we'll need
+ * to do this at most once per dataset.
+ *
+ * It's OK if there is extra data after a packed nvlist on the receiving
+ * side because packed nvlists have an internal end-of-list marker.
+ *
+ * The new buffer is allocated with kmem_alloc() and can be freed with
+ * fnvlist_pack_free(), like the original.
+ */
+static inline void
+pad_packed_nvlist(char **buffer, size_t *size)
+{
+	size_t size_in = *size;
+	size_t extra_bytes = P2ROUNDUP(size_in, 8) - size_in;
+	if (extra_bytes != 0) {
+		size_t expanded_size = size_in + extra_bytes;
+		char *longbuf = kmem_alloc(expanded_size, KM_SLEEP);
+		memcpy(longbuf, *buffer, size_in);
+		memset(longbuf + size_in, 0, extra_bytes);
+		fnvlist_pack_free(*buffer, size_in);
+		*buffer = longbuf;
+		*size = expanded_size;
+	}
+}
+
 /*
  * Actually do the bulk of the work in a zfs send.
  *
@@ -2474,7 +2505,7 @@ dmu_send_impl(struct dmu_send_params *dspp)
 
 	dsl_pool_rele(dp, tag);
 
-	void *payload = NULL;
+	char *payload = NULL;
 	size_t payload_len = 0;
 	nvlist_t *nvl = fnvlist_alloc();
 
@@ -2548,7 +2579,9 @@ dmu_send_impl(struct dmu_send_params *dspp)
 	}
 
 	if (!nvlist_empty(nvl)) {
-		payload = fnvlist_pack(nvl, &payload_len);
+		VERIFY0(nvlist_pack(nvl, &payload, &payload_len,
+		    NV_ENCODE_XDR, KM_SLEEP));
+		pad_packed_nvlist(&payload, &payload_len);
 		drr->drr_payloadlen = payload_len;
 	}
 
diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
index 4ffd75ceace..b0354203d42 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c
@@ -490,7 +490,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
 		} else {
 			dmu_buf_t *db;
 			VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
-			    DB_RF_MUST_SUCCEED, FTAG, &db));
+			    DB_RF_MUST_SUCCEED, tag, &db));
 			dmu_buf_will_fill(db, tx, B_FALSE);
 			VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
 			    SPA_MINBLOCKSIZE), tx));
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c
index 2253b868b53..e88de3dbdfd 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_dir.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c
@@ -1534,9 +1534,28 @@ dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
 }
 
 /* call from syncing context when we actually write/free space for this dd */
-void
-dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
-    int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
+static void dsl_dir_diduse_transfer_space_impl(dsl_dir_t *dd, int64_t used,
+    int64_t compressed, int64_t uncompressed, int64_t tonew,
+    dd_used_t oldtype, dd_used_t newtype, boolean_t nested, dmu_tx_t *tx);
+
+static void
+dsl_dir_lock_enter(dsl_dir_t *dd, boolean_t nested)
+{
+	/*
+	 * lockdep needs an explicit subclass when a child dd_lock
+	 * nests an ancestor.
+	 */
+	if (nested) {
+		mutex_enter_nested(&dd->dd_lock, NESTED_SINGLE);
+	} else {
+		mutex_enter(&dd->dd_lock);
+	}
+}
+
+static void
+dsl_dir_diduse_space_impl(dsl_dir_t *dd, dd_used_t type,
+    int64_t used, int64_t compressed, int64_t uncompressed,
+    boolean_t nested, dmu_tx_t *tx)
 {
 	int64_t accounted_delta;
 
@@ -1554,7 +1573,7 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
 	 */
 	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
 	if (needlock)
-		mutex_enter(&dd->dd_lock);
+		dsl_dir_lock_enter(dd, nested);
 	dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
 	accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);
 	ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);
@@ -1582,12 +1601,20 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
 		mutex_exit(&dd->dd_lock);
 
 	if (dd->dd_parent != NULL) {
-		dsl_dir_diduse_transfer_space(dd->dd_parent,
+		dsl_dir_diduse_transfer_space_impl(dd->dd_parent,
 		    accounted_delta, compressed, uncompressed,
-		    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
+		    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, nested, tx);
 	}
 }
 
+void
+dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type, int64_t used,
+    int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
+{
+	dsl_dir_diduse_space_impl(dd, type, used, compressed, uncompressed,
+	    B_FALSE, tx);
+}
+
 void
 dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
     dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
@@ -1612,10 +1639,10 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
 	mutex_exit(&dd->dd_lock);
 }
 
-void
-dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
+static void
+dsl_dir_diduse_transfer_space_impl(dsl_dir_t *dd, int64_t used,
     int64_t compressed, int64_t uncompressed, int64_t tonew,
-    dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
+	dd_used_t oldtype, dd_used_t newtype, boolean_t nested, dmu_tx_t *tx)
 {
 	int64_t accounted_delta;
 
@@ -1625,7 +1652,7 @@ dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
-	mutex_enter(&dd->dd_lock);
+	dsl_dir_lock_enter(dd, nested);
 	dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
 	accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);
 	ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);
@@ -1656,12 +1683,21 @@ dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
 	mutex_exit(&dd->dd_lock);
 
 	if (dd->dd_parent != NULL) {
-		dsl_dir_diduse_transfer_space(dd->dd_parent,
+		dsl_dir_diduse_transfer_space_impl(dd->dd_parent,
 		    accounted_delta, compressed, uncompressed,
-		    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
+		    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, nested, tx);
 	}
 }
 
+void
+dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
+    int64_t compressed, int64_t uncompressed, int64_t tonew,
+    dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
+{
+	dsl_dir_diduse_transfer_space_impl(dd, used, compressed,
+	    uncompressed, tonew, oldtype, newtype, B_FALSE, tx);
+}
+
 typedef struct dsl_dir_set_qr_arg {
 	const char *ddsqra_name;
 	zprop_source_t ddsqra_source;
@@ -1828,8 +1864,8 @@ dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
 
 	if (dd->dd_parent != NULL) {
 		/* Roll up this additional usage into our ancestors */
-		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
-		    delta, 0, 0, tx);
+		dsl_dir_diduse_space_impl(dd->dd_parent, DD_USED_CHILD_RSRV,
+		    delta, 0, 0, B_TRUE, tx);
 	}
 	mutex_exit(&dd->dd_lock);
 }
@@ -2268,22 +2304,29 @@ dsl_dir_snap_cmtime_update(dsl_dir_t *dd, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	inode_timespec_t t;
+
+	ASSERT(dsl_pool_sync_context(dp));
 	gethrestime(&t);
 
 	mutex_enter(&dd->dd_lock);
 	dd->dd_snap_cmtime = t;
-	if (spa_feature_is_enabled(dp->dp_spa,
-	    SPA_FEATURE_EXTENSIBLE_DATASET)) {
-		objset_t *mos = dd->dd_pool->dp_meta_objset;
-		uint64_t ddobj = dd->dd_object;
-		dsl_dir_zapify(dd, tx);
-		VERIFY0(zap_update(mos, ddobj,
-		    DD_FIELD_SNAPSHOTS_CHANGED,
-		    sizeof (uint64_t),
-		    sizeof (inode_timespec_t) / sizeof (uint64_t),
-		    &t, tx));
-	}
 	mutex_exit(&dd->dd_lock);
+
+	if (!spa_feature_is_enabled(dp->dp_spa,
+	    SPA_FEATURE_EXTENSIBLE_DATASET)) {
+		return;
+	}
+
+	objset_t *mos = dd->dd_pool->dp_meta_objset;
+
+	/*
+	 * dsl_dir_zapify() and zap_update() may dirty buffers and recurse
+	 * into space accounting, so do not call them with dd_lock held.
+	 */
+	dsl_dir_zapify(dd, tx);
+	VERIFY0(zap_update(mos, dd->dd_object, DD_FIELD_SNAPSHOTS_CHANGED,
+	    sizeof (uint64_t),
+	    sizeof (inode_timespec_t) / sizeof (uint64_t), &t, tx));
 }
 
 void
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
index 6f5dfac7b9d..03e13ca96cc 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -1280,6 +1280,7 @@ dsl_errorscrub_pause_resume_sync(void *arg, dmu_tx_t *tx)
 		spa->spa_scan_pass_errorscrub_pause = gethrestime_sec();
 		scn->errorscrub_phys.dep_paused_flags = B_TRUE;
 		dsl_errorscrub_sync_state(scn, tx);
+		zap_cursor_fini(&scn->errorscrub_cursor);
 		spa_event_notify(spa, NULL, NULL, ESC_ZFS_ERRORSCRUB_PAUSED);
 	} else {
 		ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL);
diff --git a/sys/contrib/openzfs/module/zfs/gzip.c b/sys/contrib/openzfs/module/zfs/gzip.c
index d183e998456..2dee3e1da78 100644
--- a/sys/contrib/openzfs/module/zfs/gzip.c
+++ b/sys/contrib/openzfs/module/zfs/gzip.c
@@ -96,13 +96,17 @@ zfs_gzip_decompress_buf(void *s_start, void *d_start, size_t s_len,
 	/* check if hardware accelerator can be used */
 	if (qat_dc_use_accel(d_len)) {
 		if (qat_compress(QAT_DECOMPRESS, s_start, s_len,
-		    d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS)
-			return (0);
+		    d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS) {
+			if ((size_t)dstlen == d_len)
+				return (0);
+		}
 		/* if hardware de-compress fail, do it again with software */
 	}
 
 	if (uncompress_func(d_start, &dstlen, s_start, s_len) != Z_OK)
 		return (-1);
+	if ((size_t)dstlen != d_len)
+		return (-1);
 
 	return (0);
 }
diff --git a/sys/contrib/openzfs/module/zfs/lz4_zfs.c b/sys/contrib/openzfs/module/zfs/lz4_zfs.c
index 24ecf0763f9..7218a505f79 100644
--- a/sys/contrib/openzfs/module/zfs/lz4_zfs.c
+++ b/sys/contrib/openzfs/module/zfs/lz4_zfs.c
@@ -89,17 +89,24 @@ zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
 	(void) n;
 	const char *src = s_start;
 	uint32_t bufsiz = BE_IN32(src);
+	int decoded;
 
 	/* invalid compressed buffer size encoded at start */
 	if (bufsiz + sizeof (bufsiz) > s_len)
 		return (1);
 
 	/*
-	 * Returns 0 on success (decompression function returned non-negative)
-	 * and non-zero on failure (decompression function returned negative).
+	 * LZ4_uncompress_unknownOutputSize returns the number of bytes decoded
+	 * on success, or a negative value on failure. An OpenZFS block must
+	 * expand to exactly d_len bytes
 	 */
-	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
-	    d_start, bufsiz, d_len) < 0);
+	decoded = LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
+	    d_start, bufsiz, d_len);
+	if (decoded < 0)
+		return (1);
+	if (d_len != (size_t)decoded)
+		return (1);
+	return (0);
 }
 
 ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
index 6ea3ecd74fc..2be1f281268 100644
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -82,11 +82,11 @@ int zfs_metaslab_sm_blksz_with_log = (1 << 17);
 
 /*
  * The in-core space map representation is more compact than its on-disk form.
- * The zfs_condense_pct determines how much more compact the in-core
+ * The zfs_metaslab_condense_pct determines how much more compact the in-core
  * space map representation must be before we compact it on-disk.
  * Values should be greater than or equal to 100.
  */
-uint_t zfs_condense_pct = 200;
+uint_t zfs_metaslab_condense_pct = 200;
 
 /*
  * Condensing a metaslab is not guaranteed to actually reduce the amount of
@@ -3826,8 +3826,8 @@ metaslab_group_preload(metaslab_group_t *mg)
  *    increase as a result of writing out the free space range tree.
  *
  * 2. Condense if the on on-disk space map representation is at least
- *    zfs_condense_pct/100 times the size of the optimal representation
- *    (i.e. zfs_condense_pct = 110 and in-core = 1MB, optimal = 1.1MB).
+ *    zfs_metaslab_condense_pct/100 times the size of the optimal representation
+ *    (i.e. zfs_metaslab_condense_pct = 110 and in-core = 1MB, optimal = 1.1MB).
  *
  * 3. Do not condense if the on-disk size of the space map does not actually
  *    decrease.
@@ -3863,7 +3863,8 @@ metaslab_should_condense(metaslab_t *msp)
 	uint64_t optimal_size = space_map_estimate_optimal_size(sm,
 	    msp->ms_allocatable, SM_NO_VDEVID);
 
-	return (object_size >= (optimal_size * zfs_condense_pct / 100) &&
+	return (object_size >=
+	    (optimal_size * zfs_metaslab_condense_pct / 100) &&
 	    object_size > zfs_metaslab_condense_block_threshold * record_size);
 }
 
@@ -6442,6 +6443,14 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,
 ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_use_largest_segment, INT, ZMOD_RW,
 	"When looking in size tree, use largest segment instead of exact fit");
 
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_alloc_threshold, U64, ZMOD_RW,
+	"Minimum size which forces the dynamic allocator to change its "
+	"allocation strategy");
+
+ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_free_pct, UINT, ZMOD_RW,
+	"The minimum free space, in percent, to continue allocations in a "
+	"first-fit fashion");
+
 ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, max_size_cache_sec, U64,
 	ZMOD_RW, "How long to trust the cached max chunk size of a metaslab");
 
@@ -6454,6 +6463,18 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, try_hard_before_gang, INT,
 ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, UINT, ZMOD_RW,
 	"Normally only consider this many of the best metaslabs in each vdev");
 
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, sm_blksz_no_log, INT, ZMOD_RW,
+	"Block size for space map in pools with log space map disabled.  "
+	"Power of 2 greater than 4096.");
+
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, sm_blksz_with_log, INT, ZMOD_RW,
+	"Block size for space map in pools with log space map enabled.  "
+	"Power of 2 greater than 4096.");
+
 ZFS_MODULE_PARAM_CALL(zfs, zfs_, active_allocator,
 	param_set_active_allocator, param_get_charp, ZMOD_RW,
 	"SPA active allocator");
+
+ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, condense_pct, UINT, ZMOD_RW,
+	"Condense on-disk spacemap when it is more than this many percents "
+	"of in-memory counterpart");
diff --git a/sys/contrib/openzfs/module/zfs/sa.c b/sys/contrib/openzfs/module/zfs/sa.c
index bd565bb7101..c6b36474b9f 100644
--- a/sys/contrib/openzfs/module/zfs/sa.c
+++ b/sys/contrib/openzfs/module/zfs/sa.c
@@ -1605,8 +1605,8 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid)
 
 	bulk = kmem_zalloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
 	attrs = kmem_zalloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
-	mutex_enter(&hdl->sa_lock);
 	mutex_enter(&zp->z_lock);
+	mutex_enter(&hdl->sa_lock);
 
 	err = sa_lookup_locked(hdl, SA_ZPL_PROJID(zfsvfs), &projid,
 	    sizeof (uint64_t));
@@ -1750,8 +1750,8 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid)
 	zp->z_is_sa = B_TRUE;
 
 out:
-	mutex_exit(&zp->z_lock);
 	mutex_exit(&hdl->sa_lock);
+	mutex_exit(&zp->z_lock);
 	kmem_free(attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
 	kmem_free(bulk, sizeof (sa_bulk_attr_t) * ZPL_END);
 	if (dxattr_obj)
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
index eafd4b17620..c6ae91b8d9e 100644
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -8333,12 +8333,20 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
 		return (spa_vdev_exit(spa, newrootvd, txg, error));
 
 	/*
-	 * log, dedup and special vdevs should not be replaced by spares.
+	 * Spares can't replace logs
 	 */
-	if ((oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE ||
-	    oldvd->vdev_top->vdev_islog) && newvd->vdev_isspare) {
+	if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
+		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+
+	/*
+	 * For special and dedup vdevs a spare must have matching rotational
+	 * characteristics.  A rotating spare replacing a non-rotating vdev
+	 * would silently degrade pool performance, so we reject the mismatch.
+	 */
+	if (newvd->vdev_isspare &&
+	    oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE &&
+	    newvd->vdev_nonrot != oldvd->vdev_nonrot)
 		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
-	}
 
 	/*
 	 * A dRAID spare can only replace a child of its parent dRAID vdev.
@@ -11011,6 +11019,10 @@ spa_sync(spa_t *spa, uint64_t txg)
 		ASSERT0(spa->spa_vdev_removal->svr_bytes_done[txg & TXG_MASK]);
 	}
 
+	for (vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd;
+	    vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)))
+		vdev_sync_dispatch(vd, txg);
+
 	spa_sync_rewrite_vdev_config(spa, tx);
 	dmu_tx_commit(tx);
 
@@ -11035,9 +11047,6 @@ spa_sync(spa_t *spa, uint64_t txg)
 
 	dsl_pool_sync_done(dp, txg);
 
-	/*
-	 * Update usable space statistics.
-	 */
 	while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
 	    != NULL)
 		vdev_sync_done(vd, txg);
@@ -11811,6 +11820,12 @@ ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, U64, ZMOD_RW,
 	"Allow importing pool with up to this number of missing top-level "
 	"vdevs (in read-only mode)");
 
+ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds_cachefile, U64, ZMOD_RW,
+	"Allow importing pools with missing top-level vdevs in cache file");
+
+ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds_scan, U64, ZMOD_RW,
+	"Allow importing pools with missing top-level vdevs during scan");
+
 ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_pause, INT,
 	ZMOD_RW, "Set the livelist condense zthr to pause");
 
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
index 30639d7f4c7..821dfd6faff 100644
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -460,6 +460,7 @@ vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
 	} else if (vd->vdev_leaf_zap != 0) {
 		*objid = vd->vdev_leaf_zap;
 	} else {
+		*objid = 0;
 		return (EINVAL);
 	}
 
@@ -474,8 +475,11 @@ vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
 	uint64_t objid;
 	int err;
 
-	if (vdev_prop_get_objid(vd, &objid) != 0)
-		return (EINVAL);
+	if (vdev_prop_get_objid(vd, &objid) != 0) {
+		/* No ZAP: property was never set, return the default. */
+		*value = vdev_prop_default_numeric(prop);
+		return (ENOENT);
+	}
 
 	err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
 	    sizeof (uint64_t), 1, value);
@@ -963,6 +967,20 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
 	    &vd->vdev_wholedisk) != 0)
 		vd->vdev_wholedisk = -1ULL;
 
+	/*
+	 * Restore the last-known rotational status for leaf vdevs.  vdev_open()
+	 * will overwrite this with the hardware value when the device is
+	 * accessible; the persisted value acts as a fallback for failed or
+	 * missing devices so that spare selection can still match on device
+	 * type even when the original disk is gone.
+	 */
+	if (vd->vdev_ops->vdev_op_leaf) {
+		uint64_t rotational = 0;
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_VDEV_ROTATIONAL,
+		    &rotational) == 0)
+			vd->vdev_nonrot = !rotational;
+	}
+
 	vic = &vd->vdev_indirect_config;
 
 	ASSERT0(vic->vic_mapping_object);
@@ -1117,6 +1135,11 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
 	if (top_level && (ops == &vdev_raidz_ops || ops == &vdev_draid_ops))
 		vd->vdev_autosit =
 		    vdev_prop_default_numeric(VDEV_PROP_AUTOSIT);
+	if (ops == &vdev_root_ops)
+		vd->vdev_failfast =
+		    vdev_prop_default_numeric(VDEV_PROP_FAILFAST);
+	else
+		vd->vdev_failfast = ZPROP_BOOLEAN_INHERIT;
 
 	/*
 	 * Add ourselves to the parent's list of children.
@@ -3912,10 +3935,9 @@ vdev_load(vdev_t *vd)
 		    vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast),
 		    1, &failfast);
 		if (error == 0) {
-			vd->vdev_failfast = failfast & 1;
+			vd->vdev_failfast = failfast;
 		} else if (error == ENOENT) {
-			vd->vdev_failfast = vdev_prop_default_numeric(
-			    VDEV_PROP_FAILFAST);
+			vd->vdev_failfast = ZPROP_BOOLEAN_INHERIT;
 		} else {
 			vdev_dbgmsg(vd,
 			    "vdev_load: zap_lookup(top_zap=%llu) "
@@ -4224,17 +4246,39 @@ vdev_remove_empty_log(vdev_t *vd, uint64_t txg)
 	dmu_tx_commit(tx);
 }
 
+static void
+metaslab_sync_done_task(void *arg)
+{
+	metaslab_t *msp = arg;
+	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+	metaslab_sync_done(msp, spa_syncing_txg(spa));
+}
+
+void
+vdev_sync_dispatch(vdev_t *vd, uint64_t txg)
+{
+	spa_t *spa = vd->vdev_spa;
+
+	ASSERT(vdev_is_concrete(vd));
+
+	for (metaslab_t *msp = txg_list_head(&vd->vdev_ms_list, TXG_CLEAN(txg));
+	    msp; msp = txg_list_next(&vd->vdev_ms_list, msp, TXG_CLEAN(txg))) {
+		(void) taskq_dispatch(spa->spa_sync_tq,
+		    metaslab_sync_done_task, msp, TQ_SLEEP);
+	}
+}
+
 void
 vdev_sync_done(vdev_t *vd, uint64_t txg)
 {
-	metaslab_t *msp;
 	boolean_t reassess = !txg_list_empty(&vd->vdev_ms_list, TXG_CLEAN(txg));
 
 	ASSERT(vdev_is_concrete(vd));
 
-	while ((msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)))
-	    != NULL)
-		metaslab_sync_done(msp, txg);
+	taskq_wait(vd->vdev_spa->spa_sync_tq);
+
+	while (txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)) != NULL)
+		;
 
 	if (reassess) {
 		metaslab_sync_reassess(vd->vdev_mg);
@@ -6093,6 +6137,29 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
 				    strval);
 			}
 			break;
+		case VDEV_PROP_ALLOC_BIAS: {
+			intval = fnvpair_value_uint64(elem);
+			ASSERT3U(intval, !=, VDEV_BIAS_LOG);
+			const char *bias_str =
+			    (intval == VDEV_BIAS_SPECIAL) ?
+			    VDEV_ALLOC_BIAS_SPECIAL :
+			    (intval == VDEV_BIAS_DEDUP) ?
+			    VDEV_ALLOC_BIAS_DEDUP : NULL;
+			if (bias_str == NULL) {
+				(void) zap_remove(mos, objid,
+				    VDEV_TOP_ZAP_ALLOCATION_BIAS, tx);
+			} else {
+				VERIFY0(zap_update(mos, objid,
+				    VDEV_TOP_ZAP_ALLOCATION_BIAS,
+				    1, strlen(bias_str) + 1, bias_str, tx));
+				spa_activate_allocation_classes(spa, tx);
+			}
+			spa_history_log_internal(spa, "vdev set", tx,
+			    "vdev_guid=%llu: alloc_bias=%s",
+			    (u_longlong_t)vdev_guid,
+			    bias_str != NULL ? bias_str : "none");
+			break;
+		}
 		default:
 			/* normalize the property name */
 			propname = vdev_prop_to_name(prop);
@@ -6207,11 +6274,14 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
 				error = spa_vdev_alloc(spa, vdev_guid);
 			break;
 		case VDEV_PROP_FAILFAST:
-			if (nvpair_value_uint64(elem, &intval) != 0) {
+			if (nvpair_value_uint64(elem, &intval) != 0 ||
+			    intval > ZPROP_BOOLEAN_INHERIT ||
+			    (intval == ZPROP_BOOLEAN_INHERIT &&
+			    vd->vdev_ops == &vdev_root_ops)) {
 				error = EINVAL;
 				break;
 			}
-			vd->vdev_failfast = intval & 1;
+			vd->vdev_failfast = intval;
 			break;
 		case VDEV_PROP_SIT_OUT:
 			/* Only expose this for a draid or raidz leaf */
@@ -6319,6 +6389,53 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
 			}
 			vd->vdev_scheduler = intval;
 			break;
+		case VDEV_PROP_ALLOC_BIAS:
+			if (nvpair_value_uint64(elem, &intval) != 0) {
+				error = EINVAL;
+				break;
+			}
+			if (vd != vd->vdev_top || vd->vdev_top_zap == 0) {
+				error = ENOTSUP;
+				break;
+			}
+			/* Log vdevs are not supported: remove and re-add. */
+			if (vd->vdev_islog) {
+				error = ENOTSUP;
+				break;
+			}
+			/* special/dedup needs allocation_classes feature */
+			if (intval != VDEV_BIAS_NONE &&
+			    ((intval != VDEV_BIAS_SPECIAL &&
+			    intval != VDEV_BIAS_DEDUP) ||
+			    !spa_feature_is_enabled(spa,
+			    SPA_FEATURE_ALLOCATION_CLASSES))) {
+				error = ENOTSUP;
+				break;
+			}
+			/*
+			 * Disallow converting the last normal vdev to
+			 * avoid pool suspension on failed allocations.
+			 */
+			if (intval != VDEV_BIAS_NONE &&
+			    vd->vdev_alloc_bias == VDEV_BIAS_NONE) {
+				vdev_t *rvd = spa->spa_root_vdev;
+				int normal = 0;
+				for (uint64_t c = 0;
+				    c < rvd->vdev_children; c++) {
+					vdev_t *cvd = rvd->vdev_child[c];
+					if (vdev_is_concrete(cvd) &&
+					    cvd->vdev_alloc_bias ==
+					    VDEV_BIAS_NONE &&
+					    !cvd->vdev_noalloc)
+						normal++;
+				}
+				if (normal <= 1) {
+					error = ENOTSUP;
+					break;
+				}
+			}
+			vd->vdev_alloc_bias = (vdev_alloc_bias_t)intval;
+			break;
 		default:
 			/* Most processing is done in vdev_props_set_sync */
 			break;
@@ -6350,7 +6467,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
 	spa_t *spa = vd->vdev_spa;
 	objset_t *mos = spa->spa_meta_objset;
 	int err = 0;
-	uint64_t objid;
+	uint64_t objid = 0;
 	uint64_t vdev_guid;
 	nvpair_t *elem = NULL;
 	nvlist_t *nvprops = NULL;
@@ -6369,9 +6486,15 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
 
 	nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
 
-	if (vdev_prop_get_objid(vd, &objid) != 0)
-		return (SET_ERROR(EINVAL));
-	ASSERT(objid != 0);
+	/*
+	 * A missing ZAP is normal for spare and L2ARC vdevs, which are
+	 * not part of the main vdev tree and never get ZAPs allocated.
+	 * Many properties are sourced directly from vdev_t fields and
+	 * work fine without one; ZAP-backed properties will return their
+	 * default values.  objid is set to 0 when absent and the few
+	 * cases that call zap_lookup directly guard against this below.
+	 */
+	(void) vdev_prop_get_objid(vd, &objid);
 
 	mutex_enter(&spa->spa_props_lock);
 
@@ -6694,18 +6817,28 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
 				break;
 			case VDEV_PROP_FAILFAST:
 				src = ZPROP_SRC_LOCAL;
-				strval = NULL;
 
-				err = zap_lookup(mos, objid, nvpair_name(elem),
-				    sizeof (uint64_t), 1, &intval);
+				if (objid != 0) {
+					err = zap_lookup(mos, objid,
+					    nvpair_name(elem),
+					    sizeof (uint64_t), 1, &intval);
+				} else {
+					err = ENOENT;
+				}
 				if (err == ENOENT) {
-					intval = vdev_prop_default_numeric(
-					    prop);
+					if (vd->vdev_ops == &vdev_root_ops)
+						intval =
+						    vdev_prop_default_numeric(
+						    prop);
+					else
+						intval = ZPROP_BOOLEAN_INHERIT;
 					err = 0;
 				} else if (err) {
 					break;
 				}
-				if (intval == vdev_prop_default_numeric(prop))
+				if (intval == ZPROP_BOOLEAN_INHERIT ||
+				    (vd->vdev_ops == &vdev_root_ops &&
+				    intval == 1))
 					src = ZPROP_SRC_DEFAULT;
 
 				vdev_prop_add_list(outnvl, propname, strval,
@@ -6746,6 +6879,17 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
 				vdev_prop_add_list(outnvl, propname, NULL,
 				    boolval, src);
 				break;
+			case VDEV_PROP_ALLOC_BIAS:
+				if (vd == vd->vdev_top) {
+					vdev_prop_add_list(outnvl, propname,
+					    NULL, vd->vdev_alloc_bias,
+					    ZPROP_SRC_NONE);
+				}
+				continue;
+			case VDEV_PROP_ROTATIONAL:
+				vdev_prop_add_list(outnvl, propname, NULL,
+				    !vd->vdev_nonrot, ZPROP_SRC_NONE);
+				continue;
 			case VDEV_PROP_CHECKSUM_N:
 			case VDEV_PROP_CHECKSUM_T:
 			case VDEV_PROP_IO_N:
@@ -6771,6 +6915,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
 				/* FALLTHRU */
 			case VDEV_PROP_USERPROP:
 				/* User Properites */
+				if (objid == 0)
+					continue;
 				src = ZPROP_SRC_LOCAL;
 
 				err = zap_length(mos, objid, nvpair_name(elem),
diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c
index b1371b0349c..e6da5c1707a 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_label.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_label.c
@@ -467,6 +467,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
 	if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)))
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id);
 	fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid);
+	if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
+	    vd->vdev_top != NULL) {
+		fnvlist_add_uint64(nv, ZPOOL_CONFIG_TOP_GUID,
+		    vd->vdev_top->vdev_guid);
+	}
 
 	if (vd->vdev_path != NULL)
 		fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path);
@@ -493,6 +498,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
 		    vd->vdev_wholedisk);
 	}
 
+	if (vd->vdev_ops->vdev_op_leaf) {
+		fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_ROTATIONAL,
+		    !vd->vdev_nonrot);
+	}
+
 	if (vd->vdev_not_present && !(flags & VDEV_CONFIG_MISSING))
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1);
 
@@ -502,6 +512,9 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
 	if (flags & VDEV_CONFIG_L2CACHE)
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift);
 
+	if ((flags & VDEV_CONFIG_SPARE) && vd->vdev_asize != 0)
+		fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE, vd->vdev_asize);
+
 	if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
 	    vd == vd->vdev_top) {
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
@@ -1392,6 +1405,7 @@ vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *bootenv)
 				    VB_NVLIST);
 				break;
 			}
+			vbe->vbe_bootenv[sizeof (vbe->vbe_bootenv) - 1] = '\0';
 			fnvlist_add_string(bootenv, FREEBSD_BOOTONCE, buf);
 		}
 
diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h b/sys/contrib/openzfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h
index 1ec4d0218bb..3c3370290c8 100644
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h
@@ -102,14 +102,14 @@
 
 #define	WVR(X) [w##X] "=w" (w##X)
 
-#define	UVR0_(REG, ...) [w##REG] "+&w" (w##REG)
-#define	UVR1_(_1, REG, ...) [w##REG] "+&w" (w##REG)
-#define	UVR2_(_1, _2, REG, ...) [w##REG] "+&w" (w##REG)
-#define	UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&w" (w##REG)
-#define	UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&w" (w##REG)
-#define	UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&w" (w##REG)
-#define	UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&w" (w##REG)
-#define	UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&w" (w##REG)
+#define	UVR0_(REG, ...) [w##REG] "+w" (w##REG)
+#define	UVR1_(_1, REG, ...) [w##REG] "+w" (w##REG)
+#define	UVR2_(_1, _2, REG, ...) [w##REG] "+w" (w##REG)
+#define	UVR3_(_1, _2, _3, REG, ...) [w##REG] "+w" (w##REG)
+#define	UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+w" (w##REG)
+#define	UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+w" (w##REG)
+#define	UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+w" (w##REG)
+#define	UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+w" (w##REG)
 
 #define	UVR0(r...) UVR0_(r)
 #define	UVR1(r...) UVR1_(r)
@@ -120,7 +120,7 @@
 #define	UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)
 #define	UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)
 
-#define	UVR(X) [w##X] "+&w" (w##X)
+#define	UVR(X) [w##X] "+w" (w##X)
 
 #define	R_01(REG1, REG2, ...) REG1, REG2
 #define	_R_23(_0, _1, REG2, REG3, ...) REG2, REG3
diff --git a/sys/contrib/openzfs/module/zfs/zap.c b/sys/contrib/openzfs/module/zfs/zap.c
index b40d765e342..ca7598f489b 100644
--- a/sys/contrib/openzfs/module/zfs/zap.c
+++ b/sys/contrib/openzfs/module/zfs/zap.c
@@ -19,1074 +19,117 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
- * Copyright 2023 Alexander Stetsenko <alex.stetsenko@gmail.com>
- * Copyright (c) 2023, Klara Inc.
+ * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright (c) 2024, Klara, Inc.
+ * Copyright (c) 2026, TrueNAS.
  */
 
-/*
- * This file contains the top half of the zfs directory structure
- * implementation. The bottom half is in zap_leaf.c.
- *
- * The zdir is an extendable hash data structure. There is a table of
- * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are
- * each a constant size and hold a variable number of directory entries.
- * The buckets (aka "leaf nodes") are implemented in zap_leaf.c.
- *
- * The pointer table holds a power of 2 number of pointers.
- * (1<<zap_t->zd_data->zd_phys->zd_prefix_len).  The bucket pointed to
- * by the pointer at index i in the table holds entries whose hash value
- * has a zd_prefix_len - bit prefix
- */
-
-#include <sys/spa.h>
+#include <sys/zfs_context.h>
 #include <sys/dmu.h>
 #include <sys/dnode.h>
-#include <sys/zfs_context.h>
-#include <sys/zfs_znode.h>
-#include <sys/fs/zfs.h>
+#include <sys/btree.h>
 #include <sys/zap.h>
 #include <sys/zap_impl.h>
 #include <sys/zap_leaf.h>
 
-/*
- * If zap_iterate_prefetch is set, we will prefetch the entire ZAP object
- * (all leaf blocks) when we start iterating over it.
- *
- * For zap_cursor_init(), the callers all intend to iterate through all the
- * entries.  There are a few cases where an error (typically i/o error) could
- * cause it to bail out early.
- *
- * For zap_cursor_init_serialized(), there are callers that do the iteration
- * outside of ZFS.  Typically they would iterate over everything, but we
- * don't have control of that.  E.g. zfs_ioc_snapshot_list_next(),
- * zcp_snapshots_iter(), and other iterators over things in the MOS - these
- * are called by /sbin/zfs and channel programs.  The other example is
- * zfs_readdir() which iterates over directory entries for the getdents()
- * syscall.  /sbin/ls iterates to the end (unless it receives a signal), but
- * userland doesn't have to.
- *
- * Given that the ZAP entries aren't returned in a specific order, the only
- * legitimate use cases for partial iteration would be:
- *
- * 1. Pagination: e.g. you only want to display 100 entries at a time, so you
- *    get the first 100 and then wait for the user to hit "next page", which
- *    they may never do).
- *
- * 2. You want to know if there are more than X entries, without relying on
- *    the zfs-specific implementation of the directory's st_size (which is
- *    the number of entries).
- */
-static int zap_iterate_prefetch = B_TRUE;
-
-/*
- * Enable ZAP shrinking. When enabled, empty sibling leaf blocks will be
- * collapsed into a single block.
- */
-int zap_shrink_enabled = B_TRUE;
-
-int fzap_default_block_shift = 14; /* 16k blocksize */
-
-static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
-static int zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx);
-
-void
-fzap_byteswap(void *vbuf, size_t size)
-{
-	uint64_t block_type = *(uint64_t *)vbuf;
-
-	if (block_type == ZBT_LEAF || block_type == BSWAP_64(ZBT_LEAF))
-		zap_leaf_byteswap(vbuf, size);
-	else {
-		/* it's a ptrtbl block */
-		byteswap_uint64_array(vbuf, size);
-	}
-}
-
-void
-fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
-{
-	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-	zap->zap_ismicro = FALSE;
-
-	zap->zap_dbu.dbu_evict_func_sync = zap_evict_sync;
-	zap->zap_dbu.dbu_evict_func_async = NULL;
-
-	mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, MUTEX_DEFAULT, 0);
-	zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
-
-	zap_phys_t *zp = zap_f_phys(zap);
-	/*
-	 * explicitly zero it since it might be coming from an
-	 * initialized microzap
-	 */
-	memset(zap->zap_dbuf->db_data, 0, zap->zap_dbuf->db_size);
-	zp->zap_block_type = ZBT_HEADER;
-	zp->zap_magic = ZAP_MAGIC;
-
-	zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap);
-
-	zp->zap_freeblk = 2;		/* block 1 will be the first leaf */
-	zp->zap_num_leafs = 1;
-	zp->zap_num_entries = 0;
-	zp->zap_salt = zap->zap_salt;
-	zp->zap_normflags = zap->zap_normflags;
-	zp->zap_flags = flags;
-
-	/* block 1 will be the first leaf */
-	for (int i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++)
-		ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1;
-
-	/*
-	 * set up block 1 - the first leaf
-	 */
-	dmu_buf_t *db;
-	VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
-	    1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
-	dmu_buf_will_dirty(db, tx);
-
-	zap_leaf_t *l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
-	l->l_dbuf = db;
-
-	zap_leaf_init(l, zp->zap_normflags != 0);
-
-	kmem_free(l, sizeof (zap_leaf_t));
-	dmu_buf_rele(db, FTAG);
-}
-
-static int
-zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx)
-{
-	if (RW_WRITE_HELD(&zap->zap_rwlock))
-		return (1);
-	if (rw_tryupgrade(&zap->zap_rwlock)) {
-		dmu_buf_will_dirty(zap->zap_dbuf, tx);
-		return (1);
-	}
-	return (0);
-}
-
-/*
- * Generic routines for dealing with the pointer & cookie tables.
- */
-
-static int
-zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
-    void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n),
-    dmu_tx_t *tx)
-{
-	uint64_t newblk;
-	int bs = FZAP_BLOCK_SHIFT(zap);
-	int hepb = 1<<(bs-4);
-	/* hepb = half the number of entries in a block */
-
-	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-	ASSERT(tbl->zt_blk != 0);
-	ASSERT(tbl->zt_numblks > 0);
-
-	if (tbl->zt_nextblk != 0) {
-		newblk = tbl->zt_nextblk;
-	} else {
-		newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
-		tbl->zt_nextblk = newblk;
-		ASSERT0(tbl->zt_blks_copied);
-		dmu_prefetch_by_dnode(zap->zap_dnode, 0,
-		    tbl->zt_blk << bs, tbl->zt_numblks << bs,
-		    ZIO_PRIORITY_SYNC_READ);
-	}
-
-	/*
-	 * Copy the ptrtbl from the old to new location.
-	 */
-
-	uint64_t b = tbl->zt_blks_copied;
-	dmu_buf_t *db_old;
-	int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
-	    (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
-	if (err != 0)
-		return (err);
-
-	/* first half of entries in old[b] go to new[2*b+0] */
-	dmu_buf_t *db_new;
-	VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
-	    (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
-	dmu_buf_will_dirty(db_new, tx);
-	transfer_func(db_old->db_data, db_new->db_data, hepb);
-	dmu_buf_rele(db_new, FTAG);
-
-	/* second half of entries in old[b] go to new[2*b+1] */
-	VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
-	    (newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
-	dmu_buf_will_dirty(db_new, tx);
-	transfer_func((uint64_t *)db_old->db_data + hepb,
-	    db_new->db_data, hepb);
-	dmu_buf_rele(db_new, FTAG);
-
-	dmu_buf_rele(db_old, FTAG);
-
-	tbl->zt_blks_copied++;
-
-	dprintf("copied block %llu of %llu\n",
-	    (u_longlong_t)tbl->zt_blks_copied,
-	    (u_longlong_t)tbl->zt_numblks);
-
-	if (tbl->zt_blks_copied == tbl->zt_numblks) {
-		(void) dmu_free_range(zap->zap_objset, zap->zap_object,
-		    tbl->zt_blk << bs, tbl->zt_numblks << bs, tx);
-
-		tbl->zt_blk = newblk;
-		tbl->zt_numblks *= 2;
-		tbl->zt_shift++;
-		tbl->zt_nextblk = 0;
-		tbl->zt_blks_copied = 0;
-
-		dprintf("finished; numblocks now %llu (%uk entries)\n",
-		    (u_longlong_t)tbl->zt_numblks, 1<<(tbl->zt_shift-10));
-	}
-
-	return (0);
-}
-
-static int
-zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
-    dmu_tx_t *tx)
-{
-	int bs = FZAP_BLOCK_SHIFT(zap);
-
-	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-	ASSERT(tbl->zt_blk != 0);
-
-	dprintf("storing %llx at index %llx\n", (u_longlong_t)val,
-	    (u_longlong_t)idx);
-
-	uint64_t blk = idx >> (bs-3);
-	uint64_t off = idx & ((1<<(bs-3))-1);
-
-	dmu_buf_t *db;
-	int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
-	    (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
-	if (err != 0)
-		return (err);
-	dmu_buf_will_dirty(db, tx);
-
-	if (tbl->zt_nextblk != 0) {
-		uint64_t idx2 = idx * 2;
-		uint64_t blk2 = idx2 >> (bs-3);
-		uint64_t off2 = idx2 & ((1<<(bs-3))-1);
-		dmu_buf_t *db2;
-
-		err = dmu_buf_hold_by_dnode(zap->zap_dnode,
-		    (tbl->zt_nextblk + blk2) << bs, FTAG, &db2,
-		    DMU_READ_NO_PREFETCH);
-		if (err != 0) {
-			dmu_buf_rele(db, FTAG);
-			return (err);
-		}
-		dmu_buf_will_dirty(db2, tx);
-		((uint64_t *)db2->db_data)[off2] = val;
-		((uint64_t *)db2->db_data)[off2+1] = val;
-		dmu_buf_rele(db2, FTAG);
-	}
-
-	((uint64_t *)db->db_data)[off] = val;
-	dmu_buf_rele(db, FTAG);
-
-	return (0);
-}
-
-static int
-zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
-{
-	int bs = FZAP_BLOCK_SHIFT(zap);
-
-	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-
-	uint64_t blk = idx >> (bs-3);
-	uint64_t off = idx & ((1<<(bs-3))-1);
-
-	dmu_buf_t *db;
-	int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
-	    (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
-	if (err != 0)
-		return (err);
-	*valp = ((uint64_t *)db->db_data)[off];
-	dmu_buf_rele(db, FTAG);
-
-	if (tbl->zt_nextblk != 0) {
-		/*
-		 * read the nextblk for the sake of i/o error checking,
-		 * so that zap_table_load() will catch errors for
-		 * zap_table_store.
-		 */
-		blk = (idx*2) >> (bs-3);
-
-		err = dmu_buf_hold_by_dnode(zap->zap_dnode,
-		    (tbl->zt_nextblk + blk) << bs, FTAG, &db,
-		    DMU_READ_NO_PREFETCH);
-		if (err == 0)
-			dmu_buf_rele(db, FTAG);
-	}
-	return (err);
-}
-
-/*
- * Routines for growing the ptrtbl.
- */
-
-static void
-zap_ptrtbl_transfer(const uint64_t *src, uint64_t *dst, int n)
-{
-	for (int i = 0; i < n; i++) {
-		uint64_t lb = src[i];
-		dst[2 * i + 0] = lb;
-		dst[2 * i + 1] = lb;
-	}
-}
-
-static int
-zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx)
-{
-	/*
-	 * The pointer table should never use more hash bits than we
-	 * have (otherwise we'd be using useless zero bits to index it).
-	 * If we are within 2 bits of running out, stop growing, since
-	 * this is already an aberrant condition.
-	 */
-	if (zap_f_phys(zap)->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
-		return (SET_ERROR(ENOSPC));
-
-	if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) {
-		/*
-		 * We are outgrowing the "embedded" ptrtbl (the one
-		 * stored in the header block).  Give it its own entire
-		 * block, which will double the size of the ptrtbl.
-		 */
-		ASSERT3U(zap_f_phys(zap)->zap_ptrtbl.zt_shift, ==,
-		    ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
-		ASSERT0(zap_f_phys(zap)->zap_ptrtbl.zt_blk);
-
-		uint64_t newblk = zap_allocate_blocks(zap, 1);
-		dmu_buf_t *db_new;
-		int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
-		    newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
-		    DMU_READ_NO_PREFETCH);
-		if (err != 0)
-			return (err);
-		dmu_buf_will_dirty(db_new, tx);
-		zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
-		    db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
-		dmu_buf_rele(db_new, FTAG);
-
-		zap_f_phys(zap)->zap_ptrtbl.zt_blk = newblk;
-		zap_f_phys(zap)->zap_ptrtbl.zt_numblks = 1;
-		zap_f_phys(zap)->zap_ptrtbl.zt_shift++;
-
-		ASSERT3U(1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift, ==,
-		    zap_f_phys(zap)->zap_ptrtbl.zt_numblks <<
-		    (FZAP_BLOCK_SHIFT(zap)-3));
-
-		return (0);
-	} else {
-		return (zap_table_grow(zap, &zap_f_phys(zap)->zap_ptrtbl,
-		    zap_ptrtbl_transfer, tx));
-	}
-}
-
-static void
-zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx)
-{
-	dmu_buf_will_dirty(zap->zap_dbuf, tx);
-	mutex_enter(&zap->zap_f.zap_num_entries_mtx);
-	ASSERT(delta > 0 || zap_f_phys(zap)->zap_num_entries >= -delta);
-	zap_f_phys(zap)->zap_num_entries += delta;
-	mutex_exit(&zap->zap_f.zap_num_entries_mtx);
-}
+/* zap_create */
 
 static uint64_t
-zap_allocate_blocks(zap_t *zap, int nblocks)
+zap_create_impl(objset_t *os, int normflags, zap_flags_t flags,
+    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize,
+    dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx)
 {
-	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-	uint64_t newblk = zap_f_phys(zap)->zap_freeblk;
-	zap_f_phys(zap)->zap_freeblk += nblocks;
-	return (newblk);
-}
+	uint64_t obj;
 
-static void
-zap_leaf_evict_sync(void *dbu)
-{
-	zap_leaf_t *l = dbu;
+	ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP);
 
-	rw_destroy(&l->l_rwlock);
-	kmem_free(l, sizeof (zap_leaf_t));
-}
-
-static zap_leaf_t *
-zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
-{
-	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
-	uint64_t blkid = zap_allocate_blocks(zap, 1);
-	dmu_buf_t *db = NULL;
-
-	VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
-	    blkid << FZAP_BLOCK_SHIFT(zap), NULL, &db,
-	    DMU_READ_NO_PREFETCH));
-
-	/*
-	 * Create the leaf structure and stash it on the dbuf. If zap was
-	 * recent shrunk or truncated, the dbuf might have been sitting in the
-	 * cache waiting to be evicted, and so still have the old leaf attached
-	 * to it. If so, just reuse it.
-	 */
-	zap_leaf_t *l = dmu_buf_get_user(db);
-	if (l == NULL) {
-		l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
-		l->l_blkid = blkid;
-		l->l_dbuf = db;
-		rw_init(&l->l_rwlock, NULL, RW_NOLOCKDEP, NULL);
-		dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL,
-		    &l->l_dbuf);
-		dmu_buf_set_user(l->l_dbuf, &l->l_dbu);
+	if (allocated_dnode == NULL) {
+		dnode_t *dn;
+		obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift,
+		    indirect_blockshift, bonustype, bonuslen, dnodesize,
+		    &dn, FTAG, tx);
+		mzap_create_impl(dn, normflags, flags, tx);
+		dnode_rele(dn, FTAG);
 	} else {
-		ASSERT3U(l->l_blkid, ==, blkid);
-		ASSERT3P(l->l_dbuf, ==, db);
+		obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift,
+		    indirect_blockshift, bonustype, bonuslen, dnodesize,
+		    allocated_dnode, tag, tx);
+		mzap_create_impl(*allocated_dnode, normflags, flags, tx);
 	}
 
-	rw_enter(&l->l_rwlock, RW_WRITER);
-	dmu_buf_will_dirty(l->l_dbuf, tx);
-
-	zap_leaf_init(l, zap->zap_normflags != 0);
-
-	zap_f_phys(zap)->zap_num_leafs++;
-
-	return (l);
+	return (obj);
 }
 
-int
-fzap_count(zap_t *zap, uint64_t *count)
+uint64_t
+zap_create(objset_t *os, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
-	ASSERT(!zap->zap_ismicro);
-	mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */
-	*count = zap_f_phys(zap)->zap_num_entries;
-	mutex_exit(&zap->zap_f.zap_num_entries_mtx);
-	return (0);
+	return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx));
 }
 
-/*
- * Routines for obtaining zap_leaf_t's
- */
-
-void
-zap_put_leaf(zap_leaf_t *l)
+uint64_t
+zap_create_dnsize(objset_t *os, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
 {
-	rw_exit(&l->l_rwlock);
-	dmu_buf_rele(l->l_dbuf, NULL);
+	return (zap_create_norm_dnsize(os, 0, ot, bonustype, bonuslen,
+	    dnodesize, tx));
 }
 
-static zap_leaf_t *
-zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
+uint64_t
+zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
-	ASSERT(blkid != 0);
-
-	zap_leaf_t *l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
-	rw_init(&l->l_rwlock, NULL, RW_DEFAULT, NULL);
-	rw_enter(&l->l_rwlock, RW_WRITER);
-	l->l_blkid = blkid;
-	l->l_bs = highbit64(db->db_size) - 1;
-	l->l_dbuf = db;
-
-	dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
-	zap_leaf_t *winner = dmu_buf_set_user(db, &l->l_dbu);
-
-	rw_exit(&l->l_rwlock);
-	if (winner != NULL) {
-		/* someone else set it first */
-		zap_leaf_evict_sync(&l->l_dbu);
-		l = winner;
-	}
-
-	/*
-	 * lhr_pad was previously used for the next leaf in the leaf
-	 * chain.  There should be no chained leafs (as we have removed
-	 * support for them).
-	 */
-	ASSERT0(zap_leaf_phys(l)->l_hdr.lh_pad1);
-
-	/*
-	 * There should be more hash entries than there can be
-	 * chunks to put in the hash table
-	 */
-	ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3);
-
-	/* The chunks should begin at the end of the hash table */
-	ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==, (zap_leaf_chunk_t *)
-	    &zap_leaf_phys(l)->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]);
-
-	/* The chunks should end at the end of the block */
-	ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) -
-	    (uintptr_t)zap_leaf_phys(l), ==, l->l_dbuf->db_size);
-
-	return (l);
+	return (zap_create_norm_dnsize(os, normflags, ot, bonustype, bonuslen,
+	    0, tx));
 }
 
-static int
-zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
-    zap_leaf_t **lp)
+uint64_t
+zap_create_norm_dnsize(objset_t *os, int normflags, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
 {
-	dmu_buf_t *db;
-
-	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-
-	/*
-	 * If system crashed just after dmu_free_long_range in zfs_rmnode, we
-	 * would be left with an empty xattr dir in delete queue. blkid=0
-	 * would be passed in when doing zfs_purgedir. If that's the case we
-	 * should just return immediately. The underlying objects should
-	 * already be freed, so this should be perfectly fine.
-	 */
-	if (blkid == 0)
-		return (SET_ERROR(ENOENT));
-
-	int bs = FZAP_BLOCK_SHIFT(zap);
-	int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
-	    blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
-	if (err != 0)
-		return (err);
-
-	ASSERT3U(db->db_object, ==, zap->zap_object);
-	ASSERT3U(db->db_offset, ==, blkid << bs);
-	ASSERT3U(db->db_size, ==, 1 << bs);
-	ASSERT(blkid != 0);
-
-	zap_leaf_t *l = dmu_buf_get_user(db);
-
-	if (l == NULL)
-		l = zap_open_leaf(blkid, db);
-
-	rw_enter(&l->l_rwlock, lt);
-	/*
-	 * Must lock before dirtying, otherwise zap_leaf_phys(l) could change,
-	 * causing ASSERT below to fail.
-	 */
-	if (lt == RW_WRITER)
-		dmu_buf_will_dirty(db, tx);
-	ASSERT3U(l->l_blkid, ==, blkid);
-	ASSERT3P(l->l_dbuf, ==, db);
-	ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_block_type, ==, ZBT_LEAF);
-	ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
-
-	*lp = l;
-	return (0);
+	return (zap_create_impl(os, normflags, 0, ot, 0, 0,
+	    bonustype, bonuslen, dnodesize, NULL, NULL, tx));
 }
 
-static int
-zap_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t *valp)
+uint64_t
+zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
+    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
+    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
-	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-
-	if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) {
-		ASSERT3U(idx, <,
-		    (1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift));
-		*valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx);
-		return (0);
-	} else {
-		return (zap_table_load(zap, &zap_f_phys(zap)->zap_ptrtbl,
-		    idx, valp));
-	}
+	return (zap_create_flags_dnsize(os, normflags, flags, ot,
+	    leaf_blockshift, indirect_blockshift, bonustype, bonuslen, 0, tx));
 }
 
-static int
-zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx)
+uint64_t
+zap_create_flags_dnsize(objset_t *os, int normflags, zap_flags_t flags,
+    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
 {
-	ASSERT(tx != NULL);
-	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
-	if (zap_f_phys(zap)->zap_ptrtbl.zt_blk == 0) {
-		ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk;
-		return (0);
-	} else {
-		return (zap_table_store(zap, &zap_f_phys(zap)->zap_ptrtbl,
-		    idx, blk, tx));
-	}
+	return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift,
+	    indirect_blockshift, bonustype, bonuslen, dnodesize, NULL, NULL,
+	    tx));
 }
 
-static int
-zap_set_idx_range_to_blk(zap_t *zap, uint64_t idx, uint64_t nptrs, uint64_t blk,
-    dmu_tx_t *tx)
+/* zap_crate_hold */
+
+uint64_t
+zap_create_hold(objset_t *os, int normflags, zap_flags_t flags,
+    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize,
+    dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx)
 {
-	int bs = FZAP_BLOCK_SHIFT(zap);
-	int epb = bs >> 3; /* entries per block */
-	int err = 0;
-
-	ASSERT(tx != NULL);
-	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-
-	/*
-	 * Check for i/o errors
-	 */
-	for (int i = 0; i < nptrs; i += epb) {
-		uint64_t blk;
-		err = zap_idx_to_blk(zap, idx + i, &blk);
-		if (err != 0) {
-			return (err);
-		}
-	}
-
-	for (int i = 0; i < nptrs; i++) {
-		err = zap_set_idx_to_blk(zap, idx + i, blk, tx);
-		ASSERT0(err); /* we checked for i/o errors above */
-		if (err != 0)
-			break;
-	}
-
-	return (err);
+	return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift,
+	    indirect_blockshift, bonustype, bonuslen, dnodesize,
+	    allocated_dnode, tag, tx));
 }
 
-#define	ZAP_PREFIX_HASH(pref, pref_len)	((pref) << (64 - (pref_len)))
-
-/*
- * Each leaf has single range of entries (block pointers) in the ZAP ptrtbl.
- * If two leaves are siblings, their ranges are adjecent and contain the same
- * number of entries. In order to find out if a leaf has a sibling, we need to
- * check the range corresponding to the sibling leaf. There is no need to check
- * all entries in the range, we only need to check the frist and the last one.
- */
-static uint64_t
-check_sibling_ptrtbl_range(zap_t *zap, uint64_t prefix, uint64_t prefix_len)
-{
-	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-
-	uint64_t h = ZAP_PREFIX_HASH(prefix, prefix_len);
-	uint64_t idx = ZAP_HASH_IDX(h, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
-	uint64_t pref_diff = zap_f_phys(zap)->zap_ptrtbl.zt_shift - prefix_len;
-	uint64_t nptrs = (1 << pref_diff);
-	uint64_t first;
-	uint64_t last;
-
-	ASSERT3U(idx+nptrs, <=, (1UL << zap_f_phys(zap)->zap_ptrtbl.zt_shift));
-
-	if (zap_idx_to_blk(zap, idx, &first) != 0)
-		return (0);
-
-	if (zap_idx_to_blk(zap, idx + nptrs - 1, &last) != 0)
-		return (0);
-
-	if (first != last)
-		return (0);
-	return (first);
-}
-
-static int
-zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
-{
-	uint64_t blk;
-
-	ASSERT(zap->zap_dbuf == NULL ||
-	    zap_f_phys(zap) == zap->zap_dbuf->db_data);
-
-	/* Reality check for corrupt zap objects (leaf or header). */
-	if ((zap_f_phys(zap)->zap_block_type != ZBT_LEAF &&
-	    zap_f_phys(zap)->zap_block_type != ZBT_HEADER) ||
-	    zap_f_phys(zap)->zap_magic != ZAP_MAGIC) {
-		return (SET_ERROR(EIO));
-	}
-
-	uint64_t idx = ZAP_HASH_IDX(h, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
-	int err = zap_idx_to_blk(zap, idx, &blk);
-	if (err != 0)
-		return (err);
-	err = zap_get_leaf_byblk(zap, blk, tx, lt, lp);
-
-	ASSERT(err ||
-	    ZAP_HASH_IDX(h, zap_leaf_phys(*lp)->l_hdr.lh_prefix_len) ==
-	    zap_leaf_phys(*lp)->l_hdr.lh_prefix);
-	return (err);
-}
-
-static int
-zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l,
-    const void *tag, dmu_tx_t *tx, zap_leaf_t **lp)
-{
-	zap_t *zap = zn->zn_zap;
-	uint64_t hash = zn->zn_hash;
-	int err;
-	int old_prefix_len = zap_leaf_phys(l)->l_hdr.lh_prefix_len;
-
-	ASSERT3U(old_prefix_len, <=, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
-	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-
-	ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
-	    zap_leaf_phys(l)->l_hdr.lh_prefix);
-
-	if (zap_tryupgradedir(zap, tx) == 0 ||
-	    old_prefix_len == zap_f_phys(zap)->zap_ptrtbl.zt_shift) {
-		/* We failed to upgrade, or need to grow the pointer table */
-		objset_t *os = zap->zap_objset;
-		uint64_t object = zap->zap_object;
-
-		zap_put_leaf(l);
-		*lp = l = NULL;
-		zap_unlockdir(zap, tag);
-		err = zap_lockdir(os, object, tx, RW_WRITER,
-		    FALSE, FALSE, tag, &zn->zn_zap);
-		zap = zn->zn_zap;
-		if (err != 0)
-			return (err);
-		ASSERT(!zap->zap_ismicro);
-
-		while (old_prefix_len ==
-		    zap_f_phys(zap)->zap_ptrtbl.zt_shift) {
-			err = zap_grow_ptrtbl(zap, tx);
-			if (err != 0)
-				return (err);
-		}
-
-		err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l);
-		if (err != 0)
-			return (err);
-
-		if (zap_leaf_phys(l)->l_hdr.lh_prefix_len != old_prefix_len) {
-			/* it split while our locks were down */
-			*lp = l;
-			return (0);
-		}
-	}
-	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-	ASSERT3U(old_prefix_len, <, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
-	ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
-	    zap_leaf_phys(l)->l_hdr.lh_prefix);
-
-	int prefix_diff = zap_f_phys(zap)->zap_ptrtbl.zt_shift -
-	    (old_prefix_len + 1);
-	uint64_t sibling =
-	    (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff;
-
-	/* check for i/o errors before doing zap_leaf_split */
-	for (int i = 0; i < (1ULL << prefix_diff); i++) {
-		uint64_t blk;
-		err = zap_idx_to_blk(zap, sibling + i, &blk);
-		if (err != 0)
-			return (err);
-		ASSERT3U(blk, ==, l->l_blkid);
-	}
-
-	zap_leaf_t *nl = zap_create_leaf(zap, tx);
-	zap_leaf_split(l, nl, zap->zap_normflags != 0);
-
-	/* set sibling pointers */
-	for (int i = 0; i < (1ULL << prefix_diff); i++) {
-		err = zap_set_idx_to_blk(zap, sibling + i, nl->l_blkid, tx);
-		ASSERT0(err); /* we checked for i/o errors above */
-	}
-
-	ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_prefix_len, >, 0);
-
-	if (hash & (1ULL << (64 - zap_leaf_phys(l)->l_hdr.lh_prefix_len))) {
-		/* we want the sibling */
-		zap_put_leaf(l);
-		*lp = nl;
-	} else {
-		zap_put_leaf(nl);
-		*lp = l;
-	}
-
-	return (0);
-}
-
-static void
-zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l,
-    const void *tag, dmu_tx_t *tx)
-{
-	zap_t *zap = zn->zn_zap;
-	int shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
-	int leaffull = (zap_leaf_phys(l)->l_hdr.lh_prefix_len == shift &&
-	    zap_leaf_phys(l)->l_hdr.lh_nfree < ZAP_LEAF_LOW_WATER);
-
-	zap_put_leaf(l);
-
-	if (leaffull || zap_f_phys(zap)->zap_ptrtbl.zt_nextblk) {
-		/*
-		 * We are in the middle of growing the pointer table, or
-		 * this leaf will soon make us grow it.
-		 */
-		if (zap_tryupgradedir(zap, tx) == 0) {
-			objset_t *os = zap->zap_objset;
-			uint64_t zapobj = zap->zap_object;
-
-			zap_unlockdir(zap, tag);
-			int err = zap_lockdir(os, zapobj, tx,
-			    RW_WRITER, FALSE, FALSE, tag, &zn->zn_zap);
-			zap = zn->zn_zap;
-			if (err != 0)
-				return;
-		}
-
-		/* could have finished growing while our locks were down */
-		if (zap_f_phys(zap)->zap_ptrtbl.zt_shift == shift)
-			(void) zap_grow_ptrtbl(zap, tx);
-	}
-}
-
-static int
-fzap_checkname(zap_name_t *zn)
-{
-	uint32_t maxnamelen = zn->zn_normbuf_len;
-	uint64_t len = (uint64_t)zn->zn_key_orig_numints * zn->zn_key_intlen;
-	/* Only allow directory zap to have longname */
-	if (len > maxnamelen ||
-	    (len > ZAP_MAXNAMELEN &&
-	    zn->zn_zap->zap_dnode->dn_type != DMU_OT_DIRECTORY_CONTENTS))
-		return (SET_ERROR(ENAMETOOLONG));
-	return (0);
-}
-
-static int
-fzap_checksize(uint64_t integer_size, uint64_t num_integers)
-{
-	/* Only integer sizes supported by C */
-	switch (integer_size) {
-	case 1:
-	case 2:
-	case 4:
-	case 8:
-		break;
-	default:
-		return (SET_ERROR(EINVAL));
-	}
-
-	if (integer_size * num_integers > ZAP_MAXVALUELEN)
-		return (SET_ERROR(E2BIG));
-
-	return (0);
-}
-
-static int
-fzap_check(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers)
-{
-	int err = fzap_checkname(zn);
-	if (err != 0)
-		return (err);
-	return (fzap_checksize(integer_size, num_integers));
-}
-
-/*
- * Routines for manipulating attributes.
- */
-int
-fzap_lookup(zap_name_t *zn,
-    uint64_t integer_size, uint64_t num_integers, void *buf,
-    char *realname, int rn_len, boolean_t *ncp,
-    uint64_t *actual_num_integers)
-{
-	zap_leaf_t *l;
-	zap_entry_handle_t zeh;
-
-	int err = fzap_checkname(zn);
-	if (err != 0)
-		return (err);
-
-	err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l);
-	if (err != 0)
-		return (err);
-	err = zap_leaf_lookup(l, zn, &zeh);
-	if (err == 0) {
-		if ((err = fzap_checksize(integer_size, num_integers)) != 0) {
-			zap_put_leaf(l);
-			return (err);
-		}
-
-		err = zap_entry_read(&zeh, integer_size, num_integers, buf);
-		if (err == 0 && actual_num_integers != NULL)
-			*actual_num_integers = zeh.zeh_num_integers;
-		(void) zap_entry_read_name(zn->zn_zap, &zeh, rn_len, realname);
-		if (ncp) {
-			*ncp = zap_entry_normalization_conflict(&zeh,
-			    zn, NULL, zn->zn_zap);
-		}
-	}
-
-	zap_put_leaf(l);
-	return (err);
-}
-
-int
-fzap_add_cd(zap_name_t *zn,
-    uint64_t integer_size, uint64_t num_integers,
-    const void *val, uint32_t cd, const void *tag, dmu_tx_t *tx)
-{
-	zap_leaf_t *l;
-	int err;
-	zap_entry_handle_t zeh;
-	zap_t *zap = zn->zn_zap;
-
-	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-	ASSERT(!zap->zap_ismicro);
-	ASSERT0(fzap_check(zn, integer_size, num_integers));
-
-	err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
-	if (err != 0)
-		return (err);
-retry:
-	err = zap_leaf_lookup(l, zn, &zeh);
-	if (err == 0) {
-		err = SET_ERROR(EEXIST);
-		goto out;
-	}
-	if (err != ENOENT)
-		goto out;
-
-	err = zap_entry_create(l, zn, cd,
-	    integer_size, num_integers, val, &zeh);
-
-	if (err == 0) {
-		zap_increment_num_entries(zap, 1, tx);
-	} else if (err == EAGAIN) {
-		err = zap_expand_leaf(zn, l, tag, tx, &l);
-		zap = zn->zn_zap;	/* zap_expand_leaf() may change zap */
-		if (err == 0)
-			goto retry;
-	}
-
-out:
-	if (l != NULL) {
-		if (err == ENOSPC)
-			zap_put_leaf(l);
-		else
-			zap_put_leaf_maybe_grow_ptrtbl(zn, l, tag, tx);
-	}
-	return (err);
-}
-
-int
-fzap_add(zap_name_t *zn,
-    uint64_t integer_size, uint64_t num_integers,
-    const void *val, const void *tag, dmu_tx_t *tx)
-{
-	int err = fzap_check(zn, integer_size, num_integers);
-	if (err != 0)
-		return (err);
-
-	return (fzap_add_cd(zn, integer_size, num_integers,
-	    val, ZAP_NEED_CD, tag, tx));
-}
-
-int
-fzap_update(zap_name_t *zn,
-    int integer_size, uint64_t num_integers, const void *val,
-    const void *tag, dmu_tx_t *tx)
-{
-	zap_leaf_t *l;
-	int err;
-	boolean_t create;
-	zap_entry_handle_t zeh;
-	zap_t *zap = zn->zn_zap;
-
-	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-	err = fzap_check(zn, integer_size, num_integers);
-	if (err != 0)
-		return (err);
-
-	err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
-	if (err != 0)
-		return (err);
-retry:
-	err = zap_leaf_lookup(l, zn, &zeh);
-	create = (err == ENOENT);
-	ASSERT(err == 0 || err == ENOENT);
-
-	if (create) {
-		err = zap_entry_create(l, zn, ZAP_NEED_CD,
-		    integer_size, num_integers, val, &zeh);
-		if (err == 0)
-			zap_increment_num_entries(zap, 1, tx);
-	} else {
-		err = zap_entry_update(&zeh, integer_size, num_integers, val);
-	}
-
-	if (err == EAGAIN) {
-		err = zap_expand_leaf(zn, l, tag, tx, &l);
-		zap = zn->zn_zap;	/* zap_expand_leaf() may change zap */
-		if (err == 0)
-			goto retry;
-	}
-
-	if (l != NULL) {
-		if (err == ENOSPC)
-			zap_put_leaf(l);
-		else
-			zap_put_leaf_maybe_grow_ptrtbl(zn, l, tag, tx);
-	}
-	return (err);
-}
-
-int
-fzap_length(zap_name_t *zn,
-    uint64_t *integer_size, uint64_t *num_integers)
-{
-	zap_leaf_t *l;
-	int err;
-	zap_entry_handle_t zeh;
-
-	err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l);
-	if (err != 0)
-		return (err);
-	err = zap_leaf_lookup(l, zn, &zeh);
-	if (err != 0)
-		goto out;
-
-	if (integer_size != NULL)
-		*integer_size = zeh.zeh_integer_size;
-	if (num_integers != NULL)
-		*num_integers = zeh.zeh_num_integers;
-out:
-	zap_put_leaf(l);
-	return (err);
-}
-
-int
-fzap_remove(zap_name_t *zn, dmu_tx_t *tx)
-{
-	zap_leaf_t *l;
-	int err;
-	zap_entry_handle_t zeh;
-
-	err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, tx, RW_WRITER, &l);
-	if (err != 0)
-		return (err);
-	err = zap_leaf_lookup(l, zn, &zeh);
-	if (err == 0) {
-		zap_entry_remove(&zeh);
-		zap_increment_num_entries(zn->zn_zap, -1, tx);
-
-		if (zap_leaf_phys(l)->l_hdr.lh_nentries == 0 &&
-		    zap_shrink_enabled)
-			return (zap_shrink(zn, l, tx));
-	}
-	zap_put_leaf(l);
-	return (err);
-}
-
-void
-fzap_prefetch(zap_name_t *zn)
-{
-	uint64_t blk;
-	zap_t *zap = zn->zn_zap;
-
-	uint64_t idx = ZAP_HASH_IDX(zn->zn_hash,
-	    zap_f_phys(zap)->zap_ptrtbl.zt_shift);
-	if (zap_idx_to_blk(zap, idx, &blk) != 0)
-		return;
-	int bs = FZAP_BLOCK_SHIFT(zap);
-	dmu_prefetch_by_dnode(zap->zap_dnode, 0, blk << bs, 1 << bs,
-	    ZIO_PRIORITY_SYNC_READ);
-}
-
-/*
- * Helper functions for consumers.
- */
+/* zap_create_link */
 
 uint64_t
 zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
@@ -1109,169 +152,727 @@ zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
 	return (new_obj);
 }
 
+/* zap_create_claim */
+
 int
-zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask,
-    char *name, uint64_t namelen)
+zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
-	zap_cursor_t zc;
-	int err;
+	return (zap_create_claim_dnsize(os, obj, ot, bonustype, bonuslen,
+	    0, tx));
+}
 
-	if (mask == 0)
-		mask = -1ULL;
+int
+zap_create_claim_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
+	return (zap_create_claim_norm_dnsize(os, obj,
+	    0, ot, bonustype, bonuslen, dnodesize, tx));
+}
 
-	zap_attribute_t *za = zap_attribute_long_alloc();
-	for (zap_cursor_init(&zc, os, zapobj);
-	    (err = zap_cursor_retrieve(&zc, za)) == 0;
-	    zap_cursor_advance(&zc)) {
-		if ((za->za_first_integer & mask) == (value & mask)) {
-			if (strlcpy(name, za->za_name, namelen) >= namelen)
-				err = SET_ERROR(ENAMETOOLONG);
-			break;
+int
+zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
+    dmu_object_type_t ot,
+    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+{
+	return (zap_create_claim_norm_dnsize(os, obj, normflags, ot, bonustype,
+	    bonuslen, 0, tx));
+}
+
+int
+zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj, int normflags,
+    dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,
+    int dnodesize, dmu_tx_t *tx)
+{
+	dnode_t *dn;
+	int error;
+
+	ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP);
+	error = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen,
+	    dnodesize, tx);
+	if (error != 0)
+		return (error);
+
+	error = dnode_hold(os, obj, FTAG, &dn);
+	if (error != 0)
+		return (error);
+
+	mzap_create_impl(dn, normflags, 0, tx);
+
+	dnode_rele(dn, FTAG);
+
+	return (0);
+}
+
+/* zap_destroy */
+
+int
+zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
+{
+	/*
+	 * dmu_object_free will free the object number and free the
+	 * data.  Freeing the data will cause our pageout function to be
+	 * called, which will destroy our data (zap_leaf_t's and zap_t).
+	 */
+
+	return (dmu_object_free(os, zapobj, tx));
+}
+
+/* zap_lookup */
+
+int
+zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
+    uint64_t integer_size, uint64_t num_integers, void *buf,
+    matchtype_t mt, char *realname, int rn_len,
+    boolean_t *ncp)
+{
+	zap_t *zap;
+
+	int err =
+	    zap_lock_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
+	if (err != 0)
+		return (err);
+
+	zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+
+	if (!zap->zap_ismicro) {
+		err = fzap_lookup(zn, integer_size, num_integers, buf,
+		    realname, rn_len, ncp, NULL);
+	} else {
+		zfs_btree_index_t idx;
+		mzap_ent_t *mze = mze_find(zn, &idx);
+		if (mze == NULL) {
+			err = SET_ERROR(ENOENT);
+		} else {
+			if (num_integers < 1) {
+				err = SET_ERROR(EOVERFLOW);
+			} else if (integer_size != 8) {
+				err = SET_ERROR(EINVAL);
+			} else {
+				*(uint64_t *)buf =
+				    MZE_PHYS(zap, mze)->mze_value;
+				if (realname != NULL)
+					(void) strlcpy(realname,
+					    MZE_PHYS(zap, mze)->mze_name,
+					    rn_len);
+				if (ncp) {
+					*ncp = mzap_normalization_conflict(zap,
+					    zn, mze, &idx);
+				}
+			}
 		}
 	}
-	zap_cursor_fini(&zc);
-	zap_attribute_free(za);
+	zap_name_free(zn);
+	zap_unlock(zap, FTAG);
 	return (err);
 }
 
 int
-zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx)
+zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
+    uint64_t integer_size, uint64_t num_integers, void *buf)
 {
-	zap_cursor_t zc;
-	int err = 0;
+	return (zap_lookup_norm(os, zapobj, name, integer_size,
+	    num_integers, buf, 0, NULL, 0, NULL));
+}
 
-	zap_attribute_t *za = zap_attribute_long_alloc();
-	for (zap_cursor_init(&zc, os, fromobj);
-	    zap_cursor_retrieve(&zc, za) == 0;
-	    (void) zap_cursor_advance(&zc)) {
-		if (za->za_integer_length != 8 || za->za_num_integers != 1) {
-			err = SET_ERROR(EINVAL);
-			break;
-		}
-		err = zap_add(os, intoobj, za->za_name,
-		    8, 1, &za->za_first_integer, tx);
-		if (err != 0)
-			break;
+int
+zap_lookup_by_dnode(dnode_t *dn, const char *name,
+    uint64_t integer_size, uint64_t num_integers, void *buf)
+{
+	return (zap_lookup_norm_by_dnode(dn, name, integer_size,
+	    num_integers, buf, 0, NULL, 0, NULL));
+}
+
+int
+zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
+    uint64_t integer_size, uint64_t num_integers, void *buf,
+    matchtype_t mt, char *realname, int rn_len,
+    boolean_t *ncp)
+{
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_lookup_norm_by_dnode(dn, name, integer_size,
+	    num_integers, buf, mt, realname, rn_len, ncp);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+/* zap_lookup_uint64 */
+
+int
+zap_lookup_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
+    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,
+    uint64_t *actual_num_integers)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
+	if (err != 0)
+		return (err);
+
+	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
 	}
-	zap_cursor_fini(&zc);
-	zap_attribute_free(za);
+
+	err = fzap_lookup(zn, integer_size, num_integers, buf,
+	    NULL, 0, NULL, actual_num_integers);
+	zap_name_free(zn);
+	zap_unlock(zap, FTAG);
 	return (err);
 }
 
 int
-zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
-    uint64_t value, dmu_tx_t *tx)
+zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
+    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
 {
-	zap_cursor_t zc;
-	int err = 0;
-
-	zap_attribute_t *za = zap_attribute_long_alloc();
-	for (zap_cursor_init(&zc, os, fromobj);
-	    zap_cursor_retrieve(&zc, za) == 0;
-	    (void) zap_cursor_advance(&zc)) {
-		if (za->za_integer_length != 8 || za->za_num_integers != 1) {
-			err = SET_ERROR(EINVAL);
-			break;
-		}
-		err = zap_add(os, intoobj, za->za_name,
-		    8, 1, &value, tx);
-		if (err != 0)
-			break;
-	}
-	zap_cursor_fini(&zc);
-	zap_attribute_free(za);
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_lookup_length_uint64_by_dnode(dn, key, key_numints,
+	    integer_size, num_integers, buf, NULL);
+	dnode_rele(dn, FTAG);
 	return (err);
 }
 
 int
-zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
+zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
+    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
+{
+	return (zap_lookup_length_uint64_by_dnode(dn, key, key_numints,
+	    integer_size, num_integers, buf, NULL));
+}
+
+/* zap_contains */
+
+int
+zap_contains_by_dnode(dnode_t *dn, const char *name)
+{
+	int err = zap_lookup_norm_by_dnode(dn, name, 0,
+	    0, NULL, 0, NULL, 0, NULL);
+	if (err == EOVERFLOW || err == EINVAL)
+		err = 0; /* found, but skipped reading the value */
+	return (err);
+}
+
+int
+zap_contains(objset_t *os, uint64_t zapobj, const char *name)
+{
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_contains_by_dnode(dn, name);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+/* zap_prefetch */
+
+static int
+zap_prefetch_by_dnode(dnode_t *dn, const char *name)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
+	if (err)
+		return (err);
+
+	zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+
+	fzap_prefetch(zn);
+	zap_name_free(zn);
+	zap_unlock(zap, FTAG);
+	return (err);
+}
+
+int
+zap_prefetch(objset_t *os, uint64_t zapobj, const char *name)
+{
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_prefetch_by_dnode(dn, name);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+/* zap_prefetch_uint64 */
+
+int
+zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
+	if (err != 0)
+		return (err);
+
+	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+
+	fzap_prefetch(zn);
+	zap_name_free(zn);
+	zap_unlock(zap, FTAG);
+	return (0);
+}
+
+int
+zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
+    int key_numints)
+{
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_prefetch_uint64_by_dnode(dn, key, key_numints);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+/* zap_prefetch_object */
+
+int
+zap_prefetch_object(objset_t *os, uint64_t zapobj)
+{
+	int error;
+	dmu_object_info_t doi;
+
+	error = dmu_object_info(os, zapobj, &doi);
+	if (error == 0 && DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
+		error = SET_ERROR(EINVAL);
+	if (error == 0)
+		dmu_prefetch_wait(os, zapobj, 0, doi.doi_max_offset);
+
+	return (error);
+}
+
+/* zap_add */
+
+int
+zap_add_by_dnode(dnode_t *dn, const char *key,
+    int integer_size, uint64_t num_integers,
+    const void *val, dmu_tx_t *tx)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
+	if (err != 0)
+		return (err);
+
+	const uint64_t *intval = val;
+	zap_name_t *zn = zap_name_alloc_str(zap, key, 0);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+	if (!zap->zap_ismicro) {
+		err = fzap_add(zn, integer_size, num_integers, val, tx);
+	} else if (integer_size != 8 || num_integers != 1 ||
+	    strlen(key) >= MZAP_NAME_LEN ||
+	    !mze_canfit_fzap_leaf(zn, zn->zn_hash)) {
+		err = mzap_upgrade(&zn->zn_zap, tx, 0);
+		if (err == 0) {
+			err = fzap_add(zn, integer_size, num_integers, val, tx);
+		}
+	} else {
+		zfs_btree_index_t idx;
+		if (mze_find(zn, &idx) != NULL) {
+			err = SET_ERROR(EEXIST);
+		} else {
+			mzap_addent(zn, *intval);
+		}
+	}
+	ASSERT(zap == zn->zn_zap);
+	zap_name_free(zn);
+	zap_unlock(zap, FTAG);
+	return (err);
+}
+
+int
+zap_add(objset_t *os, uint64_t zapobj, const char *key,
+    int integer_size, uint64_t num_integers,
+    const void *val, dmu_tx_t *tx)
+{
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_add_by_dnode(dn, key, integer_size, num_integers, val, tx);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+/* zap_add_uint64 */
+
+int
+zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
+    int key_numints, int integer_size, uint64_t num_integers,
+    const void *val, dmu_tx_t *tx)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
+	if (err != 0)
+		return (err);
+
+	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+	err = fzap_add(zn, integer_size, num_integers, val, tx);
+	zap = zn->zn_zap;	/* fzap_add() may change zap */
+	zap_name_free(zn);
+	if (zap != NULL)	/* may be NULL if fzap_add() failed */
+		zap_unlock(zap, FTAG);
+	return (err);
+}
+
+int
+zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
+    int key_numints, int integer_size, uint64_t num_integers,
+    const void *val, dmu_tx_t *tx)
+{
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_add_uint64_by_dnode(dn, key, key_numints,
+	    integer_size, num_integers, val, tx);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+/* zap_update */
+
+int
+zap_update_by_dnode(dnode_t *dn, const char *name, int integer_size,
+    uint64_t num_integers, const void *val, dmu_tx_t *tx)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
+	if (err != 0)
+		return (err);
+
+	const uint64_t *intval = val;
+	zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+	if (!zap->zap_ismicro) {
+		err = fzap_update(zn, integer_size, num_integers, val, tx);
+	} else if (integer_size != 8 || num_integers != 1 ||
+	    strlen(name) >= MZAP_NAME_LEN) {
+		dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
+		    (u_longlong_t)dn->dn_object, integer_size,
+		    (u_longlong_t)num_integers, name);
+		err = mzap_upgrade(&zn->zn_zap, tx, 0);
+		if (err == 0) {
+			err = fzap_update(zn, integer_size, num_integers,
+			    val, tx);
+		}
+	} else {
+		zfs_btree_index_t idx;
+		mzap_ent_t *mze = mze_find(zn, &idx);
+		if (mze != NULL) {
+			MZE_PHYS(zap, mze)->mze_value = *intval;
+		} else {
+			mzap_addent(zn, *intval);
+		}
+	}
+	ASSERT(zap == zn->zn_zap);
+	zap_name_free(zn);
+	zap_unlock(zap, FTAG);
+	return (err);
+}
+
+int
+zap_update(objset_t *os, uint64_t zapobj, const char *name,
+    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
+{
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_update_by_dnode(dn, name,
+	    integer_size, num_integers, val, tx);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+/* zap_update_uint64 */
+
+int
+zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
+    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
+	if (err != 0)
+		return (err);
+
+	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+	err = fzap_update(zn, integer_size, num_integers, val, tx);
+	zap_name_free(zn);
+	zap_unlock(zap, FTAG);
+	return (err);
+}
+
+int
+zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
+    int key_numints, int integer_size, uint64_t num_integers, const void *val,
     dmu_tx_t *tx)
 {
-	zap_cursor_t zc;
-	int err = 0;
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_update_uint64_by_dnode(dn, key, key_numints,
+	    integer_size, num_integers, val, tx);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
 
-	zap_attribute_t *za = zap_attribute_long_alloc();
-	for (zap_cursor_init(&zc, os, fromobj);
-	    zap_cursor_retrieve(&zc, za) == 0;
-	    (void) zap_cursor_advance(&zc)) {
-		uint64_t delta = 0;
+/* zap_length */
 
-		if (za->za_integer_length != 8 || za->za_num_integers != 1) {
-			err = SET_ERROR(EINVAL);
-			break;
-		}
+int
+zap_length_by_dnode(dnode_t *dn, const char *name, uint64_t *integer_size,
+    uint64_t *num_integers)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
+	if (err != 0)
+		return (err);
 
-		err = zap_lookup(os, intoobj, za->za_name, 8, 1, &delta);
-		if (err != 0 && err != ENOENT)
-			break;
-		delta += za->za_first_integer;
-		err = zap_update(os, intoobj, za->za_name, 8, 1, &delta, tx);
-		if (err != 0)
-			break;
+	zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
 	}
-	zap_cursor_fini(&zc);
-	zap_attribute_free(za);
+	if (!zap->zap_ismicro) {
+		err = fzap_length(zn, integer_size, num_integers);
+	} else {
+		zfs_btree_index_t idx;
+		mzap_ent_t *mze = mze_find(zn, &idx);
+		if (mze == NULL) {
+			err = SET_ERROR(ENOENT);
+		} else {
+			if (integer_size)
+				*integer_size = 8;
+			if (num_integers)
+				*num_integers = 1;
+		}
+	}
+	zap_name_free(zn);
+	zap_unlock(zap, FTAG);
 	return (err);
 }
 
 int
-zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx)
+zap_length(objset_t *os, uint64_t zapobj, const char *name,
+    uint64_t *integer_size, uint64_t *num_integers)
 {
-	char name[20];
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_length_by_dnode(dn, name, integer_size, num_integers);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
 
-	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
-	return (zap_add(os, obj, name, 8, 1, &value, tx));
+/* zap_length_uint64 */
+
+int
+zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
+    int key_numints, uint64_t *integer_size, uint64_t *num_integers)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
+	if (err != 0)
+		return (err);
+	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+	err = fzap_length(zn, integer_size, num_integers);
+	zap_name_free(zn);
+	zap_unlock(zap, FTAG);
+	return (err);
 }
 
 int
-zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx)
+zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
+    int key_numints, uint64_t *integer_size, uint64_t *num_integers)
 {
-	char name[20];
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_length_uint64_by_dnode(dn, key, key_numints,
+	    integer_size, num_integers);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
 
-	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
-	return (zap_remove(os, obj, name, tx));
+/* zap_remove */
+
+static int
+zap_remove_norm_by_dnode(dnode_t *dn, const char *name, matchtype_t mt,
+    dmu_tx_t *tx)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
+	if (err)
+		return (err);
+
+	zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+	if (!zap->zap_ismicro) {
+		err = fzap_remove(zn, tx);
+	} else {
+		zfs_btree_index_t idx;
+		mzap_ent_t *mze = mze_find(zn, &idx);
+		if (mze == NULL) {
+			err = SET_ERROR(ENOENT);
+		} else {
+			zap->zap_m.zap_num_entries--;
+			memset(MZE_PHYS(zap, mze), 0, sizeof (mzap_ent_phys_t));
+			zfs_btree_remove_idx(&zap->zap_m.zap_tree, &idx);
+		}
+	}
+	zap_name_free(zn);
+	zap_unlock(zap, FTAG);
+	return (err);
 }
 
 int
-zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value)
+zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
 {
-	char name[20];
-
-	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
-	return (zap_lookup(os, obj, name, 8, 1, &value));
+	return (zap_remove_norm(os, zapobj, name, 0, tx));
 }
 
 int
-zap_add_int_key(objset_t *os, uint64_t obj,
-    uint64_t key, uint64_t value, dmu_tx_t *tx)
+zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx)
 {
-	char name[20];
-
-	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
-	return (zap_add(os, obj, name, 8, 1, &value, tx));
+	return (zap_remove_norm_by_dnode(dn, name, 0, tx));
 }
 
 int
-zap_update_int_key(objset_t *os, uint64_t obj,
-    uint64_t key, uint64_t value, dmu_tx_t *tx)
+zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
+    matchtype_t mt, dmu_tx_t *tx)
 {
-	char name[20];
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_remove_norm_by_dnode(dn, name, mt, tx);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
 
-	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
-	return (zap_update(os, obj, name, 8, 1, &value, tx));
+/* zap_remove_uint64 */
+
+int
+zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
+    dmu_tx_t *tx)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
+	if (err != 0)
+		return (err);
+
+	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
+	if (zn == NULL) {
+		zap_unlock(zap, FTAG);
+		return (SET_ERROR(ENOTSUP));
+	}
+	err = fzap_remove(zn, tx);
+	zap_name_free(zn);
+	zap_unlock(zap, FTAG);
+	return (err);
 }
 
 int
-zap_lookup_int_key(objset_t *os, uint64_t obj, uint64_t key, uint64_t *valuep)
+zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
+    int key_numints, dmu_tx_t *tx)
 {
-	char name[20];
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_remove_uint64_by_dnode(dn, key, key_numints, tx);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
 
-	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
-	return (zap_lookup(os, obj, name, 8, 1, valuep));
+/* zap_count */
+
+int
+zap_count_by_dnode(dnode_t *dn, uint64_t *count)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
+	if (err != 0)
+		return (err);
+	if (!zap->zap_ismicro) {
+		err = fzap_count(zap, count);
+	} else {
+		*count = zap->zap_m.zap_num_entries;
+	}
+	zap_unlock(zap, FTAG);
+	return (err);
 }
 
 int
-zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
+zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
+{
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_count_by_dnode(dn, count);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+/* zap_increment */
+
+int
+zap_increment_by_dnode(dnode_t *dn, const char *name, int64_t delta,
     dmu_tx_t *tx)
 {
 	uint64_t value = 0;
@@ -1279,439 +880,428 @@ zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
 	if (delta == 0)
 		return (0);
 
-	int err = zap_lookup(os, obj, name, 8, 1, &value);
+	int err = zap_lookup_by_dnode(dn, name, 8, 1, &value);
 	if (err != 0 && err != ENOENT)
 		return (err);
 	value += delta;
 	if (value == 0)
-		err = zap_remove(os, obj, name, tx);
+		err = zap_remove_by_dnode(dn, name, tx);
 	else
-		err = zap_update(os, obj, name, 8, 1, &value, tx);
+		err = zap_update_by_dnode(dn, name, 8, 1, &value, tx);
 	return (err);
 }
 
 int
-zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
+zap_increment(objset_t *os, uint64_t zapobj, const char *name, int64_t delta,
     dmu_tx_t *tx)
 {
-	char name[20];
-
-	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
-	return (zap_increment(os, obj, name, delta, tx));
-}
-
-/*
- * Routines for iterating over the attributes.
- */
-
-int
-fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
-{
-	int err;
-	zap_entry_handle_t zeh;
-	zap_leaf_t *l;
-
-	/* retrieve the next entry at or after zc_hash/zc_cd */
-	/* if no entry, return ENOENT */
-
-	/*
-	 * If we are reading from the beginning, we're almost certain to
-	 * iterate over the entire ZAP object.  If there are multiple leaf
-	 * blocks (freeblk > 2), prefetch the whole object (up to
-	 * dmu_prefetch_max bytes), so that we read the leaf blocks
-	 * concurrently. (Unless noprefetch was requested via
-	 * zap_cursor_init_noprefetch()).
-	 */
-	if (zc->zc_hash == 0 && zap_iterate_prefetch &&
-	    zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) {
-		dmu_prefetch_by_dnode(zap->zap_dnode, 0, 0,
-		    zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap),
-		    ZIO_PRIORITY_ASYNC_READ);
-	}
-
-	if (zc->zc_leaf) {
-		rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
-
-		/*
-		 * The leaf was either shrunk or split.
-		 */
-		if ((zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_block_type == 0) ||
-		    (ZAP_HASH_IDX(zc->zc_hash,
-		    zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) !=
-		    zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix)) {
-			zap_put_leaf(zc->zc_leaf);
-			zc->zc_leaf = NULL;
-		}
-	}
-
-again:
-	if (zc->zc_leaf == NULL) {
-		err = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER,
-		    &zc->zc_leaf);
-		if (err != 0)
-			return (err);
-	}
-	l = zc->zc_leaf;
-
-	err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh);
-
-	if (err == ENOENT) {
-		if (zap_leaf_phys(l)->l_hdr.lh_prefix_len == 0) {
-			zc->zc_hash = -1ULL;
-			zc->zc_cd = 0;
-		} else {
-			uint64_t nocare = (1ULL <<
-			    (64 - zap_leaf_phys(l)->l_hdr.lh_prefix_len)) - 1;
-
-			zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1;
-			zc->zc_cd = 0;
-
-			if (zc->zc_hash == 0) {
-				zc->zc_hash = -1ULL;
-			} else {
-				zap_put_leaf(zc->zc_leaf);
-				zc->zc_leaf = NULL;
-				goto again;
-			}
-		}
-	}
-
-	if (err == 0) {
-		zc->zc_hash = zeh.zeh_hash;
-		zc->zc_cd = zeh.zeh_cd;
-		za->za_integer_length = zeh.zeh_integer_size;
-		za->za_num_integers = zeh.zeh_num_integers;
-		if (zeh.zeh_num_integers == 0) {
-			za->za_first_integer = 0;
-		} else {
-			err = zap_entry_read(&zeh, 8, 1, &za->za_first_integer);
-			ASSERT(err == 0 || err == EOVERFLOW);
-		}
-		err = zap_entry_read_name(zap, &zeh,
-		    za->za_name_len, za->za_name);
-		ASSERT0(err);
-
-		za->za_normalization_conflict =
-		    zap_entry_normalization_conflict(&zeh,
-		    NULL, za->za_name, zap);
-	}
-	rw_exit(&zc->zc_leaf->l_rwlock);
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_increment_by_dnode(dn, name, delta, tx);
+	dnode_rele(dn, FTAG);
 	return (err);
 }
 
-static void
-zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs)
+/* zap_value_search */
+
+static int
+zap_value_search_impl(zap_cursor_t *zc, uint64_t value, uint64_t mask,
+    char *name, uint64_t namelen)
 {
-	uint64_t lastblk = 0;
+	int err;
 
-	/*
-	 * NB: if a leaf has more pointers than an entire ptrtbl block
-	 * can hold, then it'll be accounted for more than once, since
-	 * we won't have lastblk.
-	 */
-	for (int i = 0; i < len; i++) {
-		zap_leaf_t *l;
+	if (mask == 0)
+		mask = -1ULL;
 
-		if (tbl[i] == lastblk)
-			continue;
-		lastblk = tbl[i];
-
-		int err = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER, &l);
-		if (err == 0) {
-			zap_leaf_stats(zap, l, zs);
-			zap_put_leaf(l);
+	zap_attribute_t *za = zap_attribute_long_alloc();
+	for (; (err = zap_cursor_retrieve(zc, za)) == 0;
+	    zap_cursor_advance(zc)) {
+		if ((za->za_first_integer & mask) == (value & mask)) {
+			if (strlcpy(name, za->za_name, namelen) >= namelen)
+				err = SET_ERROR(ENAMETOOLONG);
+			break;
 		}
 	}
+	zap_cursor_fini(zc);
+	zap_attribute_free(za);
+	return (err);
+}
+
+int
+zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask,
+    char *name, uint64_t namelen)
+{
+	zap_cursor_t zc;
+	zap_cursor_init(&zc, os, zapobj);
+	return (zap_value_search_impl(&zc, value, mask, name, namelen));
+}
+
+int
+zap_value_search_by_dnode(dnode_t *dn, uint64_t value, uint64_t mask,
+    char *name, uint64_t namelen)
+{
+	zap_cursor_t zc;
+	zap_cursor_init_by_dnode(&zc, dn);
+	return (zap_value_search_impl(&zc, value, mask, name, namelen));
+}
+
+/* zap_*_int */
+
+#define	FORMAT_INT_KEY(name, value)	\
+	char name[20];			\
+	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
+
+int
+zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx)
+{
+	FORMAT_INT_KEY(name, value);
+	return (zap_add(os, obj, name, 8, 1, &value, tx));
+}
+int
+zap_add_int_by_dnode(dnode_t *dn, uint64_t value, dmu_tx_t *tx)
+{
+	FORMAT_INT_KEY(name, value);
+	return (zap_add_by_dnode(dn, name, 8, 1, &value, tx));
+}
+
+int
+zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx)
+{
+	FORMAT_INT_KEY(name, value);
+	return (zap_remove(os, obj, name, tx));
+}
+int
+zap_remove_int_by_dnode(dnode_t *dn, uint64_t value, dmu_tx_t *tx)
+{
+	FORMAT_INT_KEY(name, value);
+	return (zap_remove_by_dnode(dn, name, tx));
+}
+
+int
+zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value)
+{
+	FORMAT_INT_KEY(name, value);
+	return (zap_lookup(os, obj, name, 8, 1, &value));
+}
+
+int
+zap_lookup_int_by_dnode(dnode_t *dn, uint64_t value)
+{
+	FORMAT_INT_KEY(name, value);
+	return (zap_lookup_by_dnode(dn, name, 8, 1, &value));
+}
+
+/* zap_*_int_key */
+
+int
+zap_add_int_key(objset_t *os, uint64_t obj,
+    uint64_t key, uint64_t value, dmu_tx_t *tx)
+{
+	FORMAT_INT_KEY(name, key);
+	return (zap_add(os, obj, name, 8, 1, &value, tx));
+}
+int
+zap_add_int_key_by_dnode(dnode_t *dn,
+    uint64_t key, uint64_t value, dmu_tx_t *tx)
+{
+	FORMAT_INT_KEY(name, key);
+	return (zap_add_by_dnode(dn, name, 8, 1, &value, tx));
+}
+
+int
+zap_update_int_key(objset_t *os, uint64_t obj,
+    uint64_t key, uint64_t value, dmu_tx_t *tx)
+{
+	FORMAT_INT_KEY(name, key);
+	return (zap_update(os, obj, name, 8, 1, &value, tx));
+}
+int
+zap_update_int_key_by_dnode(dnode_t *dn,
+    uint64_t key, uint64_t value, dmu_tx_t *tx)
+{
+	FORMAT_INT_KEY(name, key);
+	return (zap_update_by_dnode(dn, name, 8, 1, &value, tx));
+}
+
+int
+zap_lookup_int_key(objset_t *os, uint64_t obj, uint64_t key, uint64_t *valuep)
+{
+	FORMAT_INT_KEY(name, key);
+	return (zap_lookup(os, obj, name, 8, 1, valuep));
+}
+int
+zap_lookup_int_key_by_dnode(dnode_t *dn, uint64_t key, uint64_t *valuep)
+{
+	FORMAT_INT_KEY(name, key);
+	return (zap_lookup_by_dnode(dn, name, 8, 1, valuep));
+}
+
+/* zap_cursor */
+
+static int
+zap_cursor_init_by_dnode_impl(zap_cursor_t *zc, dnode_t *dn,
+    uint64_t serialized, boolean_t prefetch)
+{
+	zc->zc_zap = NULL;
+	zc->zc_leaf = NULL;
+
+	int err = zap_lock_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
+	    zc, &zc->zc_zap);
+	if (err != 0)
+		return (err);
+
+	zc->zc_prefetch = prefetch;
+	zc->zc_objset = dn->dn_objset;
+	zc->zc_zapobj = dn->dn_object;
+
+	int hb = zap_hashbits(zc->zc_zap);
+	zc->zc_hash = serialized << (64 - hb);
+	zc->zc_cd = serialized >> hb;
+	if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */
+		zc->zc_cd = 0;
+
+	/*
+	 * Drop ZAP read lock, but keep the hold, so the holds on the
+	 * underlying dnode and header dbuf are maintained.
+	 */
+	rw_exit(&zc->zc_zap->zap_rwlock);
+
+	return (0);
+}
+
+static int
+zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
+    uint64_t serialized, uint32_t prefetch)
+{
+	dnode_t *dn = NULL;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0) {
+		zc->zc_zap = NULL;
+		zc->zc_leaf = NULL;
+		return (err);
+	}
+
+	err = zap_cursor_init_by_dnode_impl(zc, dn, serialized, prefetch);
+
+	dnode_rele(dn, FTAG);
+
+	return (err);
+}
+
+int
+zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
+{
+	return (zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE));
+}
+
+int
+zap_cursor_init_by_dnode(zap_cursor_t *zc, dnode_t *dn)
+{
+	return (zap_cursor_init_by_dnode_impl(zc, dn, 0, B_TRUE));
+}
+
+int
+zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
+{
+	return (zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE));
+}
+
+int
+zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
+    uint64_t serialized)
+{
+	return (zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE));
+}
+
+int
+zap_cursor_init_serialized_by_dnode(zap_cursor_t *zc, dnode_t *dn,
+    uint64_t serialized)
+{
+	return (zap_cursor_init_by_dnode_impl(zc, dn, serialized, B_TRUE));
 }
 
 void
-fzap_get_stats(zap_t *zap, zap_stats_t *zs)
+zap_cursor_fini(zap_cursor_t *zc)
 {
-	int bs = FZAP_BLOCK_SHIFT(zap);
-	zs->zs_blocksize = 1ULL << bs;
-
-	/*
-	 * Set zap_phys_t fields
-	 */
-	zs->zs_num_leafs = zap_f_phys(zap)->zap_num_leafs;
-	zs->zs_num_entries = zap_f_phys(zap)->zap_num_entries;
-	zs->zs_num_blocks = zap_f_phys(zap)->zap_freeblk;
-	zs->zs_block_type = zap_f_phys(zap)->zap_block_type;
-	zs->zs_magic = zap_f_phys(zap)->zap_magic;
-	zs->zs_salt = zap_f_phys(zap)->zap_salt;
-
-	/*
-	 * Set zap_ptrtbl fields
-	 */
-	zs->zs_ptrtbl_len = 1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift;
-	zs->zs_ptrtbl_nextblk = zap_f_phys(zap)->zap_ptrtbl.zt_nextblk;
-	zs->zs_ptrtbl_blks_copied =
-	    zap_f_phys(zap)->zap_ptrtbl.zt_blks_copied;
-	zs->zs_ptrtbl_zt_blk = zap_f_phys(zap)->zap_ptrtbl.zt_blk;
-	zs->zs_ptrtbl_zt_numblks = zap_f_phys(zap)->zap_ptrtbl.zt_numblks;
-	zs->zs_ptrtbl_zt_shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
-
-	if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) {
-		/* the ptrtbl is entirely in the header block. */
-		zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
-		    1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs);
-	} else {
-		dmu_prefetch_by_dnode(zap->zap_dnode, 0,
-		    zap_f_phys(zap)->zap_ptrtbl.zt_blk << bs,
-		    zap_f_phys(zap)->zap_ptrtbl.zt_numblks << bs,
-		    ZIO_PRIORITY_SYNC_READ);
-
-		for (int b = 0; b < zap_f_phys(zap)->zap_ptrtbl.zt_numblks;
-		    b++) {
-			dmu_buf_t *db;
-			int err;
-
-			err = dmu_buf_hold_by_dnode(zap->zap_dnode,
-			    (zap_f_phys(zap)->zap_ptrtbl.zt_blk + b) << bs,
-			    FTAG, &db, DMU_READ_NO_PREFETCH);
-			if (err == 0) {
-				zap_stats_ptrtbl(zap, db->db_data,
-				    1<<(bs-3), zs);
-				dmu_buf_rele(db, FTAG);
-			}
-		}
+	if (zc->zc_leaf) {
+		rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
+		zap_put_leaf(zc->zc_leaf);
 	}
+	if (zc->zc_zap) {
+		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
+		zap_unlock(zc->zc_zap, zc);
+	}
+	memset(zc, 0, sizeof (zap_cursor_t));
 }
 
-/*
- * Find last allocated block and update freeblk.
- */
-static void
-zap_trunc(zap_t *zap)
+int
+zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
 {
-	uint64_t nentries;
-	uint64_t lastblk;
+	int err;
 
-	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
+	if (zc->zc_zap == NULL)
+		/* zap_cursor_init failed, cursor is invalid */
+		return (SET_ERROR(EIO));
 
-	if (zap_f_phys(zap)->zap_ptrtbl.zt_blk > 0) {
-		/* External ptrtbl */
-		nentries = (1 << zap_f_phys(zap)->zap_ptrtbl.zt_shift);
-		lastblk = zap_f_phys(zap)->zap_ptrtbl.zt_blk +
-		    zap_f_phys(zap)->zap_ptrtbl.zt_numblks - 1;
+	if (zc->zc_hash == -1ULL)
+		return (SET_ERROR(ENOENT));
+
+	rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
+
+	if (!zc->zc_zap->zap_ismicro) {
+		err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
 	} else {
-		/* Embedded ptrtbl */
-		nentries = (1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
-		lastblk = 0;
+		zfs_btree_index_t idx;
+		mzap_ent_t mze_tofind;
+
+		mze_tofind.mze_hash = zc->zc_hash >> 32;
+		mze_tofind.mze_cd = zc->zc_cd;
+
+		mzap_ent_t *mze = zfs_btree_find(&zc->zc_zap->zap_m.zap_tree,
+		    &mze_tofind, &idx);
+		if (mze == NULL) {
+			mze = zfs_btree_next(&zc->zc_zap->zap_m.zap_tree,
+			    &idx, &idx);
+		}
+		if (mze) {
+			mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
+			ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
+			za->za_normalization_conflict =
+			    mzap_normalization_conflict(zc->zc_zap, NULL,
+			    mze, &idx);
+			za->za_integer_length = 8;
+			za->za_num_integers = 1;
+			za->za_first_integer = mzep->mze_value;
+			(void) strlcpy(za->za_name, mzep->mze_name,
+			    za->za_name_len);
+			zc->zc_hash = (uint64_t)mze->mze_hash << 32;
+			zc->zc_cd = mze->mze_cd;
+			err = 0;
+		} else {
+			zc->zc_hash = -1ULL;
+			err = SET_ERROR(ENOENT);
+		}
 	}
 
-	for (uint64_t idx = 0; idx < nentries; idx++) {
-		uint64_t blk;
-		if (zap_idx_to_blk(zap, idx, &blk) != 0)
-			return;
-		if (blk > lastblk)
-			lastblk = blk;
-	}
-
-	ASSERT3U(lastblk, <, zap_f_phys(zap)->zap_freeblk);
-
-	zap_f_phys(zap)->zap_freeblk = lastblk + 1;
-}
-
-/*
- * ZAP shrinking algorithm.
- *
- * We shrink ZAP recuresively removing empty leaves. We can remove an empty leaf
- * only if it has a sibling. Sibling leaves have the same prefix length and
- * their prefixes differ only by the least significant (sibling) bit. We require
- * both siblings to be empty. This eliminates a need to rehash the non-empty
- * remaining leaf. When we have removed one of two empty sibling, we set ptrtbl
- * entries of the removed leaf to point out to the remaining leaf. Prefix length
- * of the remaining leaf is decremented. As a result, it has a new prefix and it
- * might have a new sibling. So, we repeat the process.
- *
- * Steps:
- * 1. Check if a sibling leaf (sl) exists and it is empty.
- * 2. Release the leaf (l) if it has the sibling bit (slbit) equal to 1.
- * 3. Release the sibling (sl) to derefer it again with WRITER lock.
- * 4. Upgrade zapdir lock to WRITER (once).
- * 5. Derefer released leaves again.
- * 6. If it is needed, recheck whether both leaves are still siblings and empty.
- * 7. Set ptrtbl pointers of the removed leaf (slbit 1) to point out to blkid of
- * the remaining leaf (slbit 0).
- * 8. Free disk block of the removed leaf (dmu_free_range).
- * 9. Decrement prefix_len of the remaining leaf.
- * 10. Repeat the steps.
- */
-static int
-zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
-{
-	zap_t *zap = zn->zn_zap;
-	int64_t zt_shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
-	uint64_t hash = zn->zn_hash;
-	uint64_t prefix = zap_leaf_phys(l)->l_hdr.lh_prefix;
-	uint64_t prefix_len = zap_leaf_phys(l)->l_hdr.lh_prefix_len;
-	boolean_t trunc = B_FALSE;
-	int err = 0;
-
-	ASSERT0(zap_leaf_phys(l)->l_hdr.lh_nentries);
-	ASSERT3U(prefix_len, <=, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
-	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
-	ASSERT3U(ZAP_HASH_IDX(hash, prefix_len), ==, prefix);
-
-	boolean_t writer = B_FALSE;
-
-	/*
-	 * To avoid deadlock always deref leaves in the same order -
-	 * sibling 0 first, then sibling 1.
-	 */
-	while (prefix_len) {
-		zap_leaf_t *sl;
-		int64_t prefix_diff = zt_shift - prefix_len;
-		uint64_t sl_prefix = prefix ^ 1;
-		uint64_t sl_hash = ZAP_PREFIX_HASH(sl_prefix, prefix_len);
-		int slbit = prefix & 1;
-
-		ASSERT0(zap_leaf_phys(l)->l_hdr.lh_nentries);
-
-		/*
-		 * Check if there is a sibling by reading ptrtbl ptrs.
-		 */
-		if (check_sibling_ptrtbl_range(zap, sl_prefix, prefix_len) == 0)
-			break;
-
-		/*
-		 * sibling 1, unlock it - we haven't yet dereferenced sibling 0.
-		 */
-		if (slbit == 1) {
-			zap_put_leaf(l);
-			l = NULL;
-		}
-
-		/*
-		 * Dereference sibling leaf and check if it is empty.
-		 */
-		if ((err = zap_deref_leaf(zap, sl_hash, tx, RW_READER,
-		    &sl)) != 0)
-			break;
-
-		ASSERT3U(ZAP_HASH_IDX(sl_hash, prefix_len), ==, sl_prefix);
-
-		/*
-		 * Check if we have a sibling and it is empty.
-		 */
-		if (zap_leaf_phys(sl)->l_hdr.lh_prefix_len != prefix_len ||
-		    zap_leaf_phys(sl)->l_hdr.lh_nentries != 0) {
-			zap_put_leaf(sl);
-			break;
-		}
-
-		zap_put_leaf(sl);
-
-		/*
-		 * If there two empty sibling, we have work to do, so
-		 * we need to lock ZAP ptrtbl as WRITER.
-		 */
-		if (!writer && (writer = zap_tryupgradedir(zap, tx)) == 0) {
-			/* We failed to upgrade */
-			if (l != NULL) {
-				zap_put_leaf(l);
-				l = NULL;
-			}
-
-			/*
-			 * Usually, the right way to upgrade from a READER lock
-			 * to a WRITER lock is to call zap_unlockdir() and
-			 * zap_lockdir(), but we do not have a tag. Instead,
-			 * we do it in more sophisticated way.
-			 */
-			rw_exit(&zap->zap_rwlock);
-			rw_enter(&zap->zap_rwlock, RW_WRITER);
-			dmu_buf_will_dirty(zap->zap_dbuf, tx);
-
-			zt_shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
-			writer = B_TRUE;
-		}
-
-		/*
-		 * Here we have WRITER lock for ptrtbl.
-		 * Now, we need a WRITER lock for both siblings leaves.
-		 * Also, we have to recheck if the leaves are still siblings
-		 * and still empty.
-		 */
-		if (l == NULL) {
-			/* sibling 0 */
-			if ((err = zap_deref_leaf(zap, (slbit ? sl_hash : hash),
-			    tx, RW_WRITER, &l)) != 0)
-				break;
-
-			/*
-			 * The leaf isn't empty anymore or
-			 * it was shrunk/split while our locks were down.
-			 */
-			if (zap_leaf_phys(l)->l_hdr.lh_nentries != 0 ||
-			    zap_leaf_phys(l)->l_hdr.lh_prefix_len != prefix_len)
-				break;
-		}
-
-		/* sibling 1 */
-		if ((err = zap_deref_leaf(zap, (slbit ? hash : sl_hash), tx,
-		    RW_WRITER, &sl)) != 0)
-			break;
-
-		/*
-		 * The leaf isn't empty anymore or
-		 * it was shrunk/split while our locks were down.
-		 */
-		if (zap_leaf_phys(sl)->l_hdr.lh_nentries != 0 ||
-		    zap_leaf_phys(sl)->l_hdr.lh_prefix_len != prefix_len) {
-			zap_put_leaf(sl);
-			break;
-		}
-
-		/* If we have gotten here, we have a leaf to collapse */
-		uint64_t idx = (slbit ? prefix : sl_prefix) << prefix_diff;
-		uint64_t nptrs = (1ULL << prefix_diff);
-		uint64_t sl_blkid = sl->l_blkid;
-
-		/*
-		 * Set ptrtbl entries to point out to the slibling 0 blkid
-		 */
-		if ((err = zap_set_idx_range_to_blk(zap, idx, nptrs, l->l_blkid,
-		    tx)) != 0) {
-			zap_put_leaf(sl);
-			break;
-		}
-
-		/*
-		 * Free sibling 1 disk block.
-		 */
-		int bs = FZAP_BLOCK_SHIFT(zap);
-		if (sl_blkid == zap_f_phys(zap)->zap_freeblk - 1)
-			trunc = B_TRUE;
-
-		(void) dmu_free_range(zap->zap_objset, zap->zap_object,
-		    sl_blkid << bs, 1 << bs, tx);
-		zap_put_leaf(sl);
-
-		zap_f_phys(zap)->zap_num_leafs--;
-
-		/*
-		 * Update prefix and prefix_len.
-		 */
-		zap_leaf_phys(l)->l_hdr.lh_prefix >>= 1;
-		zap_leaf_phys(l)->l_hdr.lh_prefix_len--;
-
-		prefix = zap_leaf_phys(l)->l_hdr.lh_prefix;
-		prefix_len = zap_leaf_phys(l)->l_hdr.lh_prefix_len;
-	}
-
-	if (trunc)
-		zap_trunc(zap);
-
-	if (l != NULL)
-		zap_put_leaf(l);
-
+	rw_exit(&zc->zc_zap->zap_rwlock);
 	return (err);
 }
 
-ZFS_MODULE_PARAM(zfs, , zap_iterate_prefetch, INT, ZMOD_RW,
-	"When iterating ZAP object, prefetch it");
+void
+zap_cursor_advance(zap_cursor_t *zc)
+{
+	if (zc->zc_hash == -1ULL)
+		return;
+	zc->zc_cd++;
+}
 
-ZFS_MODULE_PARAM(zfs, , zap_shrink_enabled, INT, ZMOD_RW,
-	"Enable ZAP shrinking");
+uint64_t
+zap_cursor_serialize(zap_cursor_t *zc)
+{
+	if (zc->zc_zap == NULL || zc->zc_hash == -1ULL)
+		return (-1ULL);
+
+	ASSERT0((zc->zc_hash & zap_maxcd(zc->zc_zap)));
+	ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
+
+	/*
+	 * We want to keep the high 32 bits of the cursor zero if we can, so
+	 * that 32-bit programs can access this.  So usually use a small
+	 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits
+	 * of the cursor.
+	 *
+	 * [ collision differentiator | zap_hashbits()-bit hash value ]
+	 */
+	return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) |
+	    ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap)));
+}
+
+/* zap_get_stats */
+
+int
+zap_get_stats_by_dnode(dnode_t *dn, zap_stats_t *zs)
+{
+	zap_t *zap;
+	int err =
+	    zap_lock_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
+	if (err != 0)
+		return (err);
+
+	memset(zs, 0, sizeof (zap_stats_t));
+
+	if (zap->zap_ismicro) {
+		zs->zs_blocksize = zap->zap_dbuf->db_size;
+		zs->zs_num_entries = zap->zap_m.zap_num_entries;
+		zs->zs_num_blocks = 1;
+	} else {
+		fzap_get_stats(zap, zs);
+	}
+	zap_unlock(zap, FTAG);
+	return (0);
+}
+
+int
+zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
+{
+	dnode_t *dn;
+	int err = dnode_hold(os, zapobj, FTAG, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_get_stats_by_dnode(dn, zs);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+EXPORT_SYMBOL(zap_create);
+EXPORT_SYMBOL(zap_create_dnsize);
+EXPORT_SYMBOL(zap_create_norm);
+EXPORT_SYMBOL(zap_create_norm_dnsize);
+EXPORT_SYMBOL(zap_create_flags);
+EXPORT_SYMBOL(zap_create_flags_dnsize);
+EXPORT_SYMBOL(zap_create_claim);
+EXPORT_SYMBOL(zap_create_claim_norm);
+EXPORT_SYMBOL(zap_create_claim_norm_dnsize);
+EXPORT_SYMBOL(zap_create_hold);
+EXPORT_SYMBOL(zap_destroy);
+EXPORT_SYMBOL(zap_lookup);
+EXPORT_SYMBOL(zap_lookup_by_dnode);
+EXPORT_SYMBOL(zap_lookup_norm);
+EXPORT_SYMBOL(zap_lookup_uint64);
+EXPORT_SYMBOL(zap_lookup_length_uint64_by_dnode);
+EXPORT_SYMBOL(zap_contains);
+EXPORT_SYMBOL(zap_prefetch);
+EXPORT_SYMBOL(zap_prefetch_uint64);
+EXPORT_SYMBOL(zap_prefetch_object);
+EXPORT_SYMBOL(zap_add);
+EXPORT_SYMBOL(zap_add_by_dnode);
+EXPORT_SYMBOL(zap_add_uint64);
+EXPORT_SYMBOL(zap_add_uint64_by_dnode);
+EXPORT_SYMBOL(zap_update);
+EXPORT_SYMBOL(zap_update_uint64);
+EXPORT_SYMBOL(zap_update_uint64_by_dnode);
+EXPORT_SYMBOL(zap_length);
+EXPORT_SYMBOL(zap_length_uint64);
+EXPORT_SYMBOL(zap_length_uint64_by_dnode);
+EXPORT_SYMBOL(zap_remove);
+EXPORT_SYMBOL(zap_remove_by_dnode);
+EXPORT_SYMBOL(zap_remove_norm);
+EXPORT_SYMBOL(zap_remove_uint64);
+EXPORT_SYMBOL(zap_remove_uint64_by_dnode);
+EXPORT_SYMBOL(zap_count);
+EXPORT_SYMBOL(zap_count_by_dnode);
+EXPORT_SYMBOL(zap_value_search);
+EXPORT_SYMBOL(zap_add_int);
+EXPORT_SYMBOL(zap_remove_int);
+EXPORT_SYMBOL(zap_lookup_int);
+EXPORT_SYMBOL(zap_add_int_key);
+EXPORT_SYMBOL(zap_lookup_int_key);
+EXPORT_SYMBOL(zap_increment);
+EXPORT_SYMBOL(zap_cursor_init);
+EXPORT_SYMBOL(zap_cursor_fini);
+EXPORT_SYMBOL(zap_cursor_retrieve);
+EXPORT_SYMBOL(zap_cursor_advance);
+EXPORT_SYMBOL(zap_cursor_serialize);
+EXPORT_SYMBOL(zap_cursor_init_serialized);
+EXPORT_SYMBOL(zap_get_stats);
diff --git a/sys/contrib/openzfs/module/zfs/zap_fat.c b/sys/contrib/openzfs/module/zfs/zap_fat.c
new file mode 100644
index 00000000000..7b48c6fd5a1
--- /dev/null
+++ b/sys/contrib/openzfs/module/zfs/zap_fat.c
@@ -0,0 +1,1458 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2023 Alexander Stetsenko <alex.stetsenko@gmail.com>
+ * Copyright (c) 2023, Klara Inc.
+ * Copyright (c) 2026, TrueNAS.
+ */
+
+/*
+ * This file contains the top half of the zfs directory structure
+ * implementation. The bottom half is in zap_leaf.c.
+ *
+ * The zdir is an extendable hash data structure. There is a table of
+ * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are
+ * each a constant size and hold a variable number of directory entries.
+ * The buckets (aka "leaf nodes") are implemented in zap_leaf.c.
+ *
+ * The pointer table holds a power of 2 number of pointers.
+ * (1<<zap_t->zd_data->zd_phys->zd_prefix_len).  The bucket pointed to
+ * by the pointer at index i in the table holds entries whose hash value
+ * has a zd_prefix_len - bit prefix
+ */
+
+#include <sys/spa.h>
+#include <sys/dmu.h>
+#include <sys/dnode.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_znode.h>
+#include <sys/fs/zfs.h>
+#include <sys/zap.h>
+#include <sys/zap_impl.h>
+#include <sys/zap_leaf.h>
+
+/*
+ * If zap_iterate_prefetch is set, we will prefetch the entire ZAP object
+ * (all leaf blocks) when we start iterating over it.
+ *
+ * For zap_cursor_init(), the callers all intend to iterate through all the
+ * entries.  There are a few cases where an error (typically i/o error) could
+ * cause it to bail out early.
+ *
+ * For zap_cursor_init_serialized(), there are callers that do the iteration
+ * outside of ZFS.  Typically they would iterate over everything, but we
+ * don't have control of that.  E.g. zfs_ioc_snapshot_list_next(),
+ * zcp_snapshots_iter(), and other iterators over things in the MOS - these
+ * are called by /sbin/zfs and channel programs.  The other example is
+ * zfs_readdir() which iterates over directory entries for the getdents()
+ * syscall.  /sbin/ls iterates to the end (unless it receives a signal), but
+ * userland doesn't have to.
+ *
+ * Given that the ZAP entries aren't returned in a specific order, the only
+ * legitimate use cases for partial iteration would be:
+ *
+ * 1. Pagination: e.g. you only want to display 100 entries at a time, so you
+ *    get the first 100 and then wait for the user to hit "next page", which
+ *    they may never do).
+ *
+ * 2. You want to know if there are more than X entries, without relying on
+ *    the zfs-specific implementation of the directory's st_size (which is
+ *    the number of entries).
+ */
+static int zap_iterate_prefetch = B_TRUE;
+
+/*
+ * Enable ZAP shrinking. When enabled, empty sibling leaf blocks will be
+ * collapsed into a single block.
+ */
+int zap_shrink_enabled = B_TRUE;
+
+int fzap_default_block_shift = 14; /* 16k blocksize */
+
+static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
+static int zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx);
+
+void
+fzap_byteswap(void *vbuf, size_t size)
+{
+	uint64_t block_type = *(uint64_t *)vbuf;
+
+	if (block_type == ZBT_LEAF || block_type == BSWAP_64(ZBT_LEAF))
+		zap_leaf_byteswap(vbuf, size);
+	else {
+		/* it's a ptrtbl block */
+		byteswap_uint64_array(vbuf, size);
+	}
+}
+
+void
+fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
+{
+	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
+	zap->zap_ismicro = FALSE;
+
+	zap->zap_dbu.dbu_evict_func_sync = zap_evict_sync;
+	zap->zap_dbu.dbu_evict_func_async = NULL;
+
+	mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, MUTEX_DEFAULT, 0);
+	zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
+
+	zap_phys_t *zp = zap_f_phys(zap);
+	/*
+	 * explicitly zero it since it might be coming from an
+	 * initialized microzap
+	 */
+	memset(zap->zap_dbuf->db_data, 0, zap->zap_dbuf->db_size);
+	zp->zap_block_type = ZBT_HEADER;
+	zp->zap_magic = ZAP_MAGIC;
+
+	zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap);
+
+	zp->zap_freeblk = 2;		/* block 1 will be the first leaf */
+	zp->zap_num_leafs = 1;
+	zp->zap_num_entries = 0;
+	zp->zap_salt = zap->zap_salt;
+	zp->zap_normflags = zap->zap_normflags;
+	zp->zap_flags = flags;
+
+	/* block 1 will be the first leaf */
+	for (int i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++)
+		ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1;
+
+	/*
+	 * set up block 1 - the first leaf
+	 */
+	dmu_buf_t *db;
+	VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
+	    1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
+	dmu_buf_will_dirty(db, tx);
+
+	zap_leaf_t *l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
+	l->l_dbuf = db;
+
+	zap_leaf_init(l, zp->zap_normflags != 0);
+
+	kmem_free(l, sizeof (zap_leaf_t));
+	dmu_buf_rele(db, FTAG);
+}
+
+/*
+ * Generic routines for dealing with the pointer & cookie tables.
+ */
+
+static int
+zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
+    void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n),
+    dmu_tx_t *tx)
+{
+	uint64_t newblk;
+	int bs = FZAP_BLOCK_SHIFT(zap);
+	int hepb = 1<<(bs-4);
+	/* hepb = half the number of entries in a block */
+
+	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
+	ASSERT(tbl->zt_blk != 0);
+	ASSERT(tbl->zt_numblks > 0);
+
+	if (tbl->zt_nextblk != 0) {
+		newblk = tbl->zt_nextblk;
+	} else {
+		newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
+		tbl->zt_nextblk = newblk;
+		ASSERT0(tbl->zt_blks_copied);
+		dmu_prefetch_by_dnode(zap->zap_dnode, 0,
+		    tbl->zt_blk << bs, tbl->zt_numblks << bs,
+		    ZIO_PRIORITY_SYNC_READ);
+	}
+
+	/*
+	 * Copy the ptrtbl from the old to new location.
+	 */
+
+	uint64_t b = tbl->zt_blks_copied;
+	dmu_buf_t *db_old;
+	int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
+	    (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
+	if (err != 0)
+		return (err);
+
+	/* first half of entries in old[b] go to new[2*b+0] */
+	dmu_buf_t *db_new;
+	VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
+	    (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
+	dmu_buf_will_dirty(db_new, tx);
+	transfer_func(db_old->db_data, db_new->db_data, hepb);
+	dmu_buf_rele(db_new, FTAG);
+
+	/* second half of entries in old[b] go to new[2*b+1] */
+	VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
+	    (newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
+	dmu_buf_will_dirty(db_new, tx);
+	transfer_func((uint64_t *)db_old->db_data + hepb,
+	    db_new->db_data, hepb);
+	dmu_buf_rele(db_new, FTAG);
+
+	dmu_buf_rele(db_old, FTAG);
+
+	tbl->zt_blks_copied++;
+
+	dprintf("copied block %llu of %llu\n",
+	    (u_longlong_t)tbl->zt_blks_copied,
+	    (u_longlong_t)tbl->zt_numblks);
+
+	if (tbl->zt_blks_copied == tbl->zt_numblks) {
+		(void) dmu_free_range(zap->zap_objset, zap->zap_object,
+		    tbl->zt_blk << bs, tbl->zt_numblks << bs, tx);
+
+		tbl->zt_blk = newblk;
+		tbl->zt_numblks *= 2;
+		tbl->zt_shift++;
+		tbl->zt_nextblk = 0;
+		tbl->zt_blks_copied = 0;
+
+		dprintf("finished; numblocks now %llu (%uk entries)\n",
+		    (u_longlong_t)tbl->zt_numblks, 1<<(tbl->zt_shift-10));
+	}
+
+	return (0);
+}
+
+static int
+zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
+    dmu_tx_t *tx)
+{
+	int bs = FZAP_BLOCK_SHIFT(zap);
+
+	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+	ASSERT(tbl->zt_blk != 0);
+
+	dprintf("storing %llx at index %llx\n", (u_longlong_t)val,
+	    (u_longlong_t)idx);
+
+	uint64_t blk = idx >> (bs-3);
+	uint64_t off = idx & ((1<<(bs-3))-1);
+
+	dmu_buf_t *db;
+	int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
+	    (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
+	if (err != 0)
+		return (err);
+	dmu_buf_will_dirty(db, tx);
+
+	if (tbl->zt_nextblk != 0) {
+		uint64_t idx2 = idx * 2;
+		uint64_t blk2 = idx2 >> (bs-3);
+		uint64_t off2 = idx2 & ((1<<(bs-3))-1);
+		dmu_buf_t *db2;
+
+		err = dmu_buf_hold_by_dnode(zap->zap_dnode,
+		    (tbl->zt_nextblk + blk2) << bs, FTAG, &db2,
+		    DMU_READ_NO_PREFETCH);
+		if (err != 0) {
+			dmu_buf_rele(db, FTAG);
+			return (err);
+		}
+		dmu_buf_will_dirty(db2, tx);
+		((uint64_t *)db2->db_data)[off2] = val;
+		((uint64_t *)db2->db_data)[off2+1] = val;
+		dmu_buf_rele(db2, FTAG);
+	}
+
+	((uint64_t *)db->db_data)[off] = val;
+	dmu_buf_rele(db, FTAG);
+
+	return (0);
+}
+
+static int
+zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
+{
+	int bs = FZAP_BLOCK_SHIFT(zap);
+
+	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+
+	uint64_t blk = idx >> (bs-3);
+	uint64_t off = idx & ((1<<(bs-3))-1);
+
+	dmu_buf_t *db;
+	int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
+	    (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
+	if (err != 0)
+		return (err);
+	*valp = ((uint64_t *)db->db_data)[off];
+	dmu_buf_rele(db, FTAG);
+
+	if (tbl->zt_nextblk != 0) {
+		/*
+		 * read the nextblk for the sake of i/o error checking,
+		 * so that zap_table_load() will catch errors for
+		 * zap_table_store.
+		 */
+		blk = (idx*2) >> (bs-3);
+
+		err = dmu_buf_hold_by_dnode(zap->zap_dnode,
+		    (tbl->zt_nextblk + blk) << bs, FTAG, &db,
+		    DMU_READ_NO_PREFETCH);
+		if (err == 0)
+			dmu_buf_rele(db, FTAG);
+	}
+	return (err);
+}
+
+/*
+ * Routines for growing the ptrtbl.
+ */
+
+static void
+zap_ptrtbl_transfer(const uint64_t *src, uint64_t *dst, int n)
+{
+	for (int i = 0; i < n; i++) {
+		uint64_t lb = src[i];
+		dst[2 * i + 0] = lb;
+		dst[2 * i + 1] = lb;
+	}
+}
+
+static int
+zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx)
+{
+	/*
+	 * The pointer table should never use more hash bits than we
+	 * have (otherwise we'd be using useless zero bits to index it).
+	 * If we are within 2 bits of running out, stop growing, since
+	 * this is already an aberrant condition.
+	 */
+	if (zap_f_phys(zap)->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
+		return (SET_ERROR(ENOSPC));
+
+	if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) {
+		/*
+		 * We are outgrowing the "embedded" ptrtbl (the one
+		 * stored in the header block).  Give it its own entire
+		 * block, which will double the size of the ptrtbl.
+		 */
+		ASSERT3U(zap_f_phys(zap)->zap_ptrtbl.zt_shift, ==,
+		    ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
+		ASSERT0(zap_f_phys(zap)->zap_ptrtbl.zt_blk);
+
+		uint64_t newblk = zap_allocate_blocks(zap, 1);
+		dmu_buf_t *db_new;
+		int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
+		    newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
+		    DMU_READ_NO_PREFETCH);
+		if (err != 0)
+			return (err);
+		dmu_buf_will_dirty(db_new, tx);
+		zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
+		    db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
+		dmu_buf_rele(db_new, FTAG);
+
+		zap_f_phys(zap)->zap_ptrtbl.zt_blk = newblk;
+		zap_f_phys(zap)->zap_ptrtbl.zt_numblks = 1;
+		zap_f_phys(zap)->zap_ptrtbl.zt_shift++;
+
+		ASSERT3U(1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift, ==,
+		    zap_f_phys(zap)->zap_ptrtbl.zt_numblks <<
+		    (FZAP_BLOCK_SHIFT(zap)-3));
+
+		return (0);
+	} else {
+		return (zap_table_grow(zap, &zap_f_phys(zap)->zap_ptrtbl,
+		    zap_ptrtbl_transfer, tx));
+	}
+}
+
+static void
+zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx)
+{
+	dmu_buf_will_dirty(zap->zap_dbuf, tx);
+	mutex_enter(&zap->zap_f.zap_num_entries_mtx);
+	ASSERT(delta > 0 || zap_f_phys(zap)->zap_num_entries >= -delta);
+	zap_f_phys(zap)->zap_num_entries += delta;
+	mutex_exit(&zap->zap_f.zap_num_entries_mtx);
+}
+
+static uint64_t
+zap_allocate_blocks(zap_t *zap, int nblocks)
+{
+	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
+	uint64_t newblk = zap_f_phys(zap)->zap_freeblk;
+	zap_f_phys(zap)->zap_freeblk += nblocks;
+	return (newblk);
+}
+
+static void
+zap_leaf_evict_sync(void *dbu)
+{
+	zap_leaf_t *l = dbu;
+
+	rw_destroy(&l->l_rwlock);
+	kmem_free(l, sizeof (zap_leaf_t));
+}
+
+static zap_leaf_t *
+zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
+{
+	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
+
+	uint64_t blkid = zap_allocate_blocks(zap, 1);
+	dmu_buf_t *db = NULL;
+
+	VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
+	    blkid << FZAP_BLOCK_SHIFT(zap), NULL, &db,
+	    DMU_READ_NO_PREFETCH));
+
+	/*
+	 * Create the leaf structure and stash it on the dbuf. If zap was
+	 * recent shrunk or truncated, the dbuf might have been sitting in the
+	 * cache waiting to be evicted, and so still have the old leaf attached
+	 * to it. If so, just reuse it.
+	 */
+	zap_leaf_t *l = dmu_buf_get_user(db);
+	if (l == NULL) {
+		l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
+		l->l_blkid = blkid;
+		l->l_dbuf = db;
+		rw_init(&l->l_rwlock, NULL, RW_NOLOCKDEP, NULL);
+		dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL,
+		    &l->l_dbuf);
+		dmu_buf_set_user(l->l_dbuf, &l->l_dbu);
+	} else {
+		ASSERT3U(l->l_blkid, ==, blkid);
+		ASSERT3P(l->l_dbuf, ==, db);
+	}
+
+	rw_enter(&l->l_rwlock, RW_WRITER);
+	dmu_buf_will_dirty(l->l_dbuf, tx);
+
+	zap_leaf_init(l, zap->zap_normflags != 0);
+
+	zap_f_phys(zap)->zap_num_leafs++;
+
+	return (l);
+}
+
+int
+fzap_count(zap_t *zap, uint64_t *count)
+{
+	ASSERT(!zap->zap_ismicro);
+	mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */
+	*count = zap_f_phys(zap)->zap_num_entries;
+	mutex_exit(&zap->zap_f.zap_num_entries_mtx);
+	return (0);
+}
+
+/*
+ * Routines for obtaining zap_leaf_t's
+ */
+
+void
+zap_put_leaf(zap_leaf_t *l)
+{
+	rw_exit(&l->l_rwlock);
+	dmu_buf_rele(l->l_dbuf, NULL);
+}
+
+static zap_leaf_t *
+zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
+{
+	ASSERT(blkid != 0);
+
+	zap_leaf_t *l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
+	rw_init(&l->l_rwlock, NULL, RW_DEFAULT, NULL);
+	rw_enter(&l->l_rwlock, RW_WRITER);
+	l->l_blkid = blkid;
+	l->l_bs = highbit64(db->db_size) - 1;
+	l->l_dbuf = db;
+
+	dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
+	zap_leaf_t *winner = dmu_buf_set_user(db, &l->l_dbu);
+
+	rw_exit(&l->l_rwlock);
+	if (winner != NULL) {
+		/* someone else set it first */
+		zap_leaf_evict_sync(&l->l_dbu);
+		l = winner;
+	}
+
+	/*
+	 * lhr_pad was previously used for the next leaf in the leaf
+	 * chain.  There should be no chained leafs (as we have removed
+	 * support for them).
+	 */
+	ASSERT0(zap_leaf_phys(l)->l_hdr.lh_pad1);
+
+	/*
+	 * There should be more hash entries than there can be
+	 * chunks to put in the hash table
+	 */
+	ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3);
+
+	/* The chunks should begin at the end of the hash table */
+	ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==, (zap_leaf_chunk_t *)
+	    &zap_leaf_phys(l)->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]);
+
+	/* The chunks should end at the end of the block */
+	ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) -
+	    (uintptr_t)zap_leaf_phys(l), ==, l->l_dbuf->db_size);
+
+	return (l);
+}
+
+static int
+zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
+    zap_leaf_t **lp)
+{
+	dmu_buf_t *db;
+
+	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+
+	/*
+	 * If system crashed just after dmu_free_long_range in zfs_rmnode, we
+	 * would be left with an empty xattr dir in delete queue. blkid=0
+	 * would be passed in when doing zfs_purgedir. If that's the case we
+	 * should just return immediately. The underlying objects should
+	 * already be freed, so this should be perfectly fine.
+	 */
+	if (blkid == 0)
+		return (SET_ERROR(ENOENT));
+
+	int bs = FZAP_BLOCK_SHIFT(zap);
+	int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
+	    blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
+	if (err != 0)
+		return (err);
+
+	ASSERT3U(db->db_object, ==, zap->zap_object);
+	ASSERT3U(db->db_offset, ==, blkid << bs);
+	ASSERT3U(db->db_size, ==, 1 << bs);
+	ASSERT(blkid != 0);
+
+	zap_leaf_t *l = dmu_buf_get_user(db);
+
+	if (l == NULL)
+		l = zap_open_leaf(blkid, db);
+
+	rw_enter(&l->l_rwlock, lt);
+	/*
+	 * Must lock before dirtying, otherwise zap_leaf_phys(l) could change,
+	 * causing ASSERT below to fail.
+	 */
+	if (lt == RW_WRITER)
+		dmu_buf_will_dirty(db, tx);
+	ASSERT3U(l->l_blkid, ==, blkid);
+	ASSERT3P(l->l_dbuf, ==, db);
+	ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_block_type, ==, ZBT_LEAF);
+	ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
+
+	*lp = l;
+	return (0);
+}
+
+static int
+zap_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t *valp)
+{
+	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+
+	if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) {
+		ASSERT3U(idx, <,
+		    (1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift));
+		*valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx);
+		return (0);
+	} else {
+		return (zap_table_load(zap, &zap_f_phys(zap)->zap_ptrtbl,
+		    idx, valp));
+	}
+}
+
+static int
+zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx)
+{
+	ASSERT(tx != NULL);
+	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
+
+	if (zap_f_phys(zap)->zap_ptrtbl.zt_blk == 0) {
+		ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk;
+		return (0);
+	} else {
+		return (zap_table_store(zap, &zap_f_phys(zap)->zap_ptrtbl,
+		    idx, blk, tx));
+	}
+}
+
+static int
+zap_set_idx_range_to_blk(zap_t *zap, uint64_t idx, uint64_t nptrs, uint64_t blk,
+    dmu_tx_t *tx)
+{
+	int bs = FZAP_BLOCK_SHIFT(zap);
+	int epb = bs >> 3; /* entries per block */
+	int err = 0;
+
+	ASSERT(tx != NULL);
+	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
+
+	/*
+	 * Check for i/o errors
+	 */
+	for (int i = 0; i < nptrs; i += epb) {
+		uint64_t blk;
+		err = zap_idx_to_blk(zap, idx + i, &blk);
+		if (err != 0) {
+			return (err);
+		}
+	}
+
+	for (int i = 0; i < nptrs; i++) {
+		err = zap_set_idx_to_blk(zap, idx + i, blk, tx);
+		ASSERT0(err); /* we checked for i/o errors above */
+		if (err != 0)
+			break;
+	}
+
+	return (err);
+}
+
+#define	ZAP_PREFIX_HASH(pref, pref_len)	((pref) << (64 - (pref_len)))
+#define	ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
+
+/*
+ * Each leaf has single range of entries (block pointers) in the ZAP ptrtbl.
+ * If two leaves are siblings, their ranges are adjecent and contain the same
+ * number of entries. In order to find out if a leaf has a sibling, we need to
+ * check the range corresponding to the sibling leaf. There is no need to check
+ * all entries in the range, we only need to check the frist and the last one.
+ */
+static uint64_t
+check_sibling_ptrtbl_range(zap_t *zap, uint64_t prefix, uint64_t prefix_len)
+{
+	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+
+	uint64_t h = ZAP_PREFIX_HASH(prefix, prefix_len);
+	uint64_t idx = ZAP_HASH_IDX(h, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
+	uint64_t pref_diff = zap_f_phys(zap)->zap_ptrtbl.zt_shift - prefix_len;
+	uint64_t nptrs = (1 << pref_diff);
+	uint64_t first;
+	uint64_t last;
+
+	ASSERT3U(idx+nptrs, <=, (1UL << zap_f_phys(zap)->zap_ptrtbl.zt_shift));
+
+	if (zap_idx_to_blk(zap, idx, &first) != 0)
+		return (0);
+
+	if (zap_idx_to_blk(zap, idx + nptrs - 1, &last) != 0)
+		return (0);
+
+	if (first != last)
+		return (0);
+	return (first);
+}
+
+static int
+zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
+{
+	uint64_t blk;
+
+	ASSERT(zap->zap_dbuf == NULL ||
+	    zap_f_phys(zap) == zap->zap_dbuf->db_data);
+
+	/* Reality check for corrupt zap objects (leaf or header). */
+	if ((zap_f_phys(zap)->zap_block_type != ZBT_LEAF &&
+	    zap_f_phys(zap)->zap_block_type != ZBT_HEADER) ||
+	    zap_f_phys(zap)->zap_magic != ZAP_MAGIC) {
+		return (SET_ERROR(EIO));
+	}
+
+	uint64_t idx = ZAP_HASH_IDX(h, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
+	int err = zap_idx_to_blk(zap, idx, &blk);
+	if (err != 0)
+		return (err);
+	err = zap_get_leaf_byblk(zap, blk, tx, lt, lp);
+
+	ASSERT(err ||
+	    ZAP_HASH_IDX(h, zap_leaf_phys(*lp)->l_hdr.lh_prefix_len) ==
+	    zap_leaf_phys(*lp)->l_hdr.lh_prefix);
+	return (err);
+}
+
+static int
+zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
+{
+	zap_t *zap = zn->zn_zap;
+	uint64_t hash = zn->zn_hash;
+	int err;
+	int old_prefix_len = zap_leaf_phys(l)->l_hdr.lh_prefix_len;
+
+	ASSERT3U(old_prefix_len, <=, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
+	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+
+	ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
+	    zap_leaf_phys(l)->l_hdr.lh_prefix);
+
+	if (zap_lock_try_upgrade(zap, tx) == 0 ||
+	    old_prefix_len == zap_f_phys(zap)->zap_ptrtbl.zt_shift) {
+		/* We failed to upgrade, or need to grow the pointer table */
+		zap_put_leaf(l);
+		*lp = l = NULL;
+
+		zap_lock_upgrade(zap, tx);
+
+		while (old_prefix_len ==
+		    zap_f_phys(zap)->zap_ptrtbl.zt_shift) {
+			err = zap_grow_ptrtbl(zap, tx);
+			if (err != 0)
+				return (err);
+		}
+
+		err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l);
+		if (err != 0)
+			return (err);
+
+		if (zap_leaf_phys(l)->l_hdr.lh_prefix_len != old_prefix_len) {
+			/* it split while our locks were down */
+			*lp = l;
+			return (0);
+		}
+	}
+	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
+	ASSERT3U(old_prefix_len, <, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
+	ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
+	    zap_leaf_phys(l)->l_hdr.lh_prefix);
+
+	int prefix_diff = zap_f_phys(zap)->zap_ptrtbl.zt_shift -
+	    (old_prefix_len + 1);
+	uint64_t sibling =
+	    (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff;
+
+	/* check for i/o errors before doing zap_leaf_split */
+	for (int i = 0; i < (1ULL << prefix_diff); i++) {
+		uint64_t blk;
+		err = zap_idx_to_blk(zap, sibling + i, &blk);
+		if (err != 0)
+			return (err);
+		ASSERT3U(blk, ==, l->l_blkid);
+	}
+
+	zap_leaf_t *nl = zap_create_leaf(zap, tx);
+	zap_leaf_split(l, nl, zap->zap_normflags != 0);
+
+	/* set sibling pointers */
+	for (int i = 0; i < (1ULL << prefix_diff); i++) {
+		err = zap_set_idx_to_blk(zap, sibling + i, nl->l_blkid, tx);
+		ASSERT0(err); /* we checked for i/o errors above */
+	}
+
+	ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_prefix_len, >, 0);
+
+	if (hash & (1ULL << (64 - zap_leaf_phys(l)->l_hdr.lh_prefix_len))) {
+		/* we want the sibling */
+		zap_put_leaf(l);
+		*lp = nl;
+	} else {
+		zap_put_leaf(nl);
+		*lp = l;
+	}
+
+	return (0);
+}
+
+static void
+zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
+{
+	zap_t *zap = zn->zn_zap;
+	int shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
+	int leaffull = (zap_leaf_phys(l)->l_hdr.lh_prefix_len == shift &&
+	    zap_leaf_phys(l)->l_hdr.lh_nfree < ZAP_LEAF_LOW_WATER);
+
+	zap_put_leaf(l);
+
+	if (leaffull || zap_f_phys(zap)->zap_ptrtbl.zt_nextblk) {
+		/*
+		 * We are in the middle of growing the pointer table, or
+		 * this leaf will soon make us grow it.
+		 */
+		zap_lock_upgrade(zap, tx);
+
+		/* could have finished growing while our locks were down */
+		if (zap_f_phys(zap)->zap_ptrtbl.zt_shift == shift)
+			(void) zap_grow_ptrtbl(zap, tx);
+	}
+}
+
+static int
+fzap_checkname(zap_name_t *zn)
+{
+	uint32_t maxnamelen = zn->zn_normbuf_len;
+	uint64_t len = (uint64_t)zn->zn_key_orig_numints * zn->zn_key_intlen;
+	/* Only allow directory zap to have longname */
+	if (len > maxnamelen ||
+	    (len > ZAP_MAXNAMELEN &&
+	    zn->zn_zap->zap_dnode->dn_type != DMU_OT_DIRECTORY_CONTENTS))
+		return (SET_ERROR(ENAMETOOLONG));
+	return (0);
+}
+
+static int
+fzap_checksize(uint64_t integer_size, uint64_t num_integers)
+{
+	/* Only integer sizes supported by C */
+	switch (integer_size) {
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		break;
+	default:
+		return (SET_ERROR(EINVAL));
+	}
+
+	if (integer_size * num_integers > ZAP_MAXVALUELEN)
+		return (SET_ERROR(E2BIG));
+
+	return (0);
+}
+
+static int
+fzap_check(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers)
+{
+	int err = fzap_checkname(zn);
+	if (err != 0)
+		return (err);
+	return (fzap_checksize(integer_size, num_integers));
+}
+
+/*
+ * Routines for manipulating attributes.
+ */
+int
+fzap_lookup(zap_name_t *zn,
+    uint64_t integer_size, uint64_t num_integers, void *buf,
+    char *realname, int rn_len, boolean_t *ncp,
+    uint64_t *actual_num_integers)
+{
+	zap_leaf_t *l;
+	zap_entry_handle_t zeh;
+
+	int err = fzap_checkname(zn);
+	if (err != 0)
+		return (err);
+
+	err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l);
+	if (err != 0)
+		return (err);
+	err = zap_leaf_lookup(l, zn, &zeh);
+	if (err == 0) {
+		if ((err = fzap_checksize(integer_size, num_integers)) != 0) {
+			zap_put_leaf(l);
+			return (err);
+		}
+
+		err = zap_entry_read(&zeh, integer_size, num_integers, buf);
+		if (err == 0 && actual_num_integers != NULL)
+			*actual_num_integers = zeh.zeh_num_integers;
+		(void) zap_entry_read_name(zn->zn_zap, &zeh, rn_len, realname);
+		if (ncp) {
+			*ncp = zap_entry_normalization_conflict(&zeh,
+			    zn, NULL, zn->zn_zap);
+		}
+	}
+
+	zap_put_leaf(l);
+	return (err);
+}
+
+int
+fzap_add_cd(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
+    const void *val, uint32_t cd, dmu_tx_t *tx)
+{
+	zap_leaf_t *l;
+	int err;
+	zap_entry_handle_t zeh;
+	zap_t *zap = zn->zn_zap;
+
+	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+	ASSERT(!zap->zap_ismicro);
+	ASSERT0(fzap_check(zn, integer_size, num_integers));
+
+	err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
+	if (err != 0)
+		return (err);
+retry:
+	err = zap_leaf_lookup(l, zn, &zeh);
+	if (err == 0) {
+		err = SET_ERROR(EEXIST);
+		goto out;
+	}
+	if (err != ENOENT)
+		goto out;
+
+	err = zap_entry_create(l, zn, cd,
+	    integer_size, num_integers, val, &zeh);
+
+	if (err == 0) {
+		zap_increment_num_entries(zap, 1, tx);
+	} else if (err == EAGAIN) {
+		err = zap_expand_leaf(zn, l, tx, &l);
+		if (err == 0)
+			goto retry;
+	}
+
+out:
+	if (l != NULL) {
+		if (err == ENOSPC)
+			zap_put_leaf(l);
+		else
+			zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
+	}
+	return (err);
+}
+
+int
+fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
+    const void *val, dmu_tx_t *tx)
+{
+	int err = fzap_check(zn, integer_size, num_integers);
+	if (err != 0)
+		return (err);
+
+	return (fzap_add_cd(zn, integer_size, num_integers,
+	    val, ZAP_NEED_CD, tx));
+}
+
+int
+fzap_update(zap_name_t *zn, int integer_size, uint64_t num_integers,
+    const void *val, dmu_tx_t *tx)
+{
+	zap_leaf_t *l;
+	int err;
+	boolean_t create;
+	zap_entry_handle_t zeh;
+	zap_t *zap = zn->zn_zap;
+
+	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+	err = fzap_check(zn, integer_size, num_integers);
+	if (err != 0)
+		return (err);
+
+	err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
+	if (err != 0)
+		return (err);
+retry:
+	err = zap_leaf_lookup(l, zn, &zeh);
+	create = (err == ENOENT);
+	ASSERT(err == 0 || err == ENOENT);
+
+	if (create) {
+		err = zap_entry_create(l, zn, ZAP_NEED_CD,
+		    integer_size, num_integers, val, &zeh);
+		if (err == 0)
+			zap_increment_num_entries(zap, 1, tx);
+	} else {
+		err = zap_entry_update(&zeh, integer_size, num_integers, val);
+	}
+
+	if (err == EAGAIN) {
+		err = zap_expand_leaf(zn, l, tx, &l);
+		if (err == 0)
+			goto retry;
+	}
+
+	if (l != NULL) {
+		if (err == ENOSPC)
+			zap_put_leaf(l);
+		else
+			zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
+	}
+	return (err);
+}
+
+int
+fzap_length(zap_name_t *zn,
+    uint64_t *integer_size, uint64_t *num_integers)
+{
+	zap_leaf_t *l;
+	int err;
+	zap_entry_handle_t zeh;
+
+	err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l);
+	if (err != 0)
+		return (err);
+	err = zap_leaf_lookup(l, zn, &zeh);
+	if (err != 0)
+		goto out;
+
+	if (integer_size != NULL)
+		*integer_size = zeh.zeh_integer_size;
+	if (num_integers != NULL)
+		*num_integers = zeh.zeh_num_integers;
+out:
+	zap_put_leaf(l);
+	return (err);
+}
+
+int
+fzap_remove(zap_name_t *zn, dmu_tx_t *tx)
+{
+	zap_leaf_t *l;
+	int err;
+	zap_entry_handle_t zeh;
+
+	err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, tx, RW_WRITER, &l);
+	if (err != 0)
+		return (err);
+	err = zap_leaf_lookup(l, zn, &zeh);
+	if (err == 0) {
+		zap_entry_remove(&zeh);
+		zap_increment_num_entries(zn->zn_zap, -1, tx);
+
+		if (zap_leaf_phys(l)->l_hdr.lh_nentries == 0 &&
+		    zap_shrink_enabled)
+			return (zap_shrink(zn, l, tx));
+	}
+	zap_put_leaf(l);
+	return (err);
+}
+
+void
+fzap_prefetch(zap_name_t *zn)
+{
+	uint64_t blk;
+	zap_t *zap = zn->zn_zap;
+
+	uint64_t idx = ZAP_HASH_IDX(zn->zn_hash,
+	    zap_f_phys(zap)->zap_ptrtbl.zt_shift);
+	if (zap_idx_to_blk(zap, idx, &blk) != 0)
+		return;
+	int bs = FZAP_BLOCK_SHIFT(zap);
+	dmu_prefetch_by_dnode(zap->zap_dnode, 0, blk << bs, 1 << bs,
+	    ZIO_PRIORITY_SYNC_READ);
+}
+
+/*
+ * Routines for iterating over the attributes.
+ */
+
+int
+fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
+{
+	int err;
+	zap_entry_handle_t zeh;
+	zap_leaf_t *l;
+
+	/* retrieve the next entry at or after zc_hash/zc_cd */
+	/* if no entry, return ENOENT */
+
+	/*
+	 * If we are reading from the beginning, we're almost certain to
+	 * iterate over the entire ZAP object.  If there are multiple leaf
+	 * blocks (freeblk > 2), prefetch the whole object (up to
+	 * dmu_prefetch_max bytes), so that we read the leaf blocks
+	 * concurrently. (Unless noprefetch was requested via
+	 * zap_cursor_init_noprefetch()).
+	 */
+	if (zc->zc_hash == 0 && zap_iterate_prefetch &&
+	    zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) {
+		dmu_prefetch_by_dnode(zap->zap_dnode, 0, 0,
+		    zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap),
+		    ZIO_PRIORITY_ASYNC_READ);
+	}
+
+	if (zc->zc_leaf) {
+		rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
+
+		/*
+		 * The leaf was either shrunk or split.
+		 */
+		if ((zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_block_type == 0) ||
+		    (ZAP_HASH_IDX(zc->zc_hash,
+		    zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) !=
+		    zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix)) {
+			zap_put_leaf(zc->zc_leaf);
+			zc->zc_leaf = NULL;
+		}
+	}
+
+again:
+	if (zc->zc_leaf == NULL) {
+		err = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER,
+		    &zc->zc_leaf);
+		if (err != 0)
+			return (err);
+	}
+	l = zc->zc_leaf;
+
+	err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh);
+
+	if (err == ENOENT) {
+		if (zap_leaf_phys(l)->l_hdr.lh_prefix_len == 0) {
+			zc->zc_hash = -1ULL;
+			zc->zc_cd = 0;
+		} else {
+			uint64_t nocare = (1ULL <<
+			    (64 - zap_leaf_phys(l)->l_hdr.lh_prefix_len)) - 1;
+
+			zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1;
+			zc->zc_cd = 0;
+
+			if (zc->zc_hash == 0) {
+				zc->zc_hash = -1ULL;
+			} else {
+				zap_put_leaf(zc->zc_leaf);
+				zc->zc_leaf = NULL;
+				goto again;
+			}
+		}
+	}
+
+	if (err == 0) {
+		zc->zc_hash = zeh.zeh_hash;
+		zc->zc_cd = zeh.zeh_cd;
+		za->za_integer_length = zeh.zeh_integer_size;
+		za->za_num_integers = zeh.zeh_num_integers;
+		if (zeh.zeh_num_integers == 0) {
+			za->za_first_integer = 0;
+		} else {
+			err = zap_entry_read(&zeh, 8, 1, &za->za_first_integer);
+			ASSERT(err == 0 || err == EOVERFLOW);
+		}
+		err = zap_entry_read_name(zap, &zeh,
+		    za->za_name_len, za->za_name);
+		ASSERT0(err);
+
+		za->za_normalization_conflict =
+		    zap_entry_normalization_conflict(&zeh,
+		    NULL, za->za_name, zap);
+	}
+	rw_exit(&zc->zc_leaf->l_rwlock);
+	return (err);
+}
+
+static void
+zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs)
+{
+	uint64_t lastblk = 0;
+
+	/*
+	 * NB: if a leaf has more pointers than an entire ptrtbl block
+	 * can hold, then it'll be accounted for more than once, since
+	 * we won't have lastblk.
+	 */
+	for (int i = 0; i < len; i++) {
+		zap_leaf_t *l;
+
+		if (tbl[i] == lastblk)
+			continue;
+		lastblk = tbl[i];
+
+		int err = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER, &l);
+		if (err == 0) {
+			zap_leaf_stats(zap, l, zs);
+			zap_put_leaf(l);
+		}
+	}
+}
+
+void
+fzap_get_stats(zap_t *zap, zap_stats_t *zs)
+{
+	int bs = FZAP_BLOCK_SHIFT(zap);
+	zs->zs_blocksize = 1ULL << bs;
+
+	/*
+	 * Set zap_phys_t fields
+	 */
+	zs->zs_num_leafs = zap_f_phys(zap)->zap_num_leafs;
+	zs->zs_num_entries = zap_f_phys(zap)->zap_num_entries;
+	zs->zs_num_blocks = zap_f_phys(zap)->zap_freeblk;
+	zs->zs_block_type = zap_f_phys(zap)->zap_block_type;
+	zs->zs_magic = zap_f_phys(zap)->zap_magic;
+	zs->zs_salt = zap_f_phys(zap)->zap_salt;
+
+	/*
+	 * Set zap_ptrtbl fields
+	 */
+	zs->zs_ptrtbl_len = 1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift;
+	zs->zs_ptrtbl_nextblk = zap_f_phys(zap)->zap_ptrtbl.zt_nextblk;
+	zs->zs_ptrtbl_blks_copied =
+	    zap_f_phys(zap)->zap_ptrtbl.zt_blks_copied;
+	zs->zs_ptrtbl_zt_blk = zap_f_phys(zap)->zap_ptrtbl.zt_blk;
+	zs->zs_ptrtbl_zt_numblks = zap_f_phys(zap)->zap_ptrtbl.zt_numblks;
+	zs->zs_ptrtbl_zt_shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
+
+	if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) {
+		/* the ptrtbl is entirely in the header block. */
+		zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
+		    1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs);
+	} else {
+		dmu_prefetch_by_dnode(zap->zap_dnode, 0,
+		    zap_f_phys(zap)->zap_ptrtbl.zt_blk << bs,
+		    zap_f_phys(zap)->zap_ptrtbl.zt_numblks << bs,
+		    ZIO_PRIORITY_SYNC_READ);
+
+		for (int b = 0; b < zap_f_phys(zap)->zap_ptrtbl.zt_numblks;
+		    b++) {
+			dmu_buf_t *db;
+			int err;
+
+			err = dmu_buf_hold_by_dnode(zap->zap_dnode,
+			    (zap_f_phys(zap)->zap_ptrtbl.zt_blk + b) << bs,
+			    FTAG, &db, DMU_READ_NO_PREFETCH);
+			if (err == 0) {
+				zap_stats_ptrtbl(zap, db->db_data,
+				    1<<(bs-3), zs);
+				dmu_buf_rele(db, FTAG);
+			}
+		}
+	}
+}
+
+/*
+ * Find last allocated block and update freeblk.
+ */
+static void
+zap_trunc(zap_t *zap)
+{
+	uint64_t nentries;
+	uint64_t lastblk;
+
+	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
+
+	if (zap_f_phys(zap)->zap_ptrtbl.zt_blk > 0) {
+		/* External ptrtbl */
+		nentries = (1 << zap_f_phys(zap)->zap_ptrtbl.zt_shift);
+		lastblk = zap_f_phys(zap)->zap_ptrtbl.zt_blk +
+		    zap_f_phys(zap)->zap_ptrtbl.zt_numblks - 1;
+	} else {
+		/* Embedded ptrtbl */
+		nentries = (1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
+		lastblk = 0;
+	}
+
+	for (uint64_t idx = 0; idx < nentries; idx++) {
+		uint64_t blk;
+		if (zap_idx_to_blk(zap, idx, &blk) != 0)
+			return;
+		if (blk > lastblk)
+			lastblk = blk;
+	}
+
+	ASSERT3U(lastblk, <, zap_f_phys(zap)->zap_freeblk);
+
+	zap_f_phys(zap)->zap_freeblk = lastblk + 1;
+}
+
+/*
+ * ZAP shrinking algorithm.
+ *
+ * We shrink ZAP recuresively removing empty leaves. We can remove an empty leaf
+ * only if it has a sibling. Sibling leaves have the same prefix length and
+ * their prefixes differ only by the least significant (sibling) bit. We require
+ * both siblings to be empty. This eliminates a need to rehash the non-empty
+ * remaining leaf. When we have removed one of two empty sibling, we set ptrtbl
+ * entries of the removed leaf to point out to the remaining leaf. Prefix length
+ * of the remaining leaf is decremented. As a result, it has a new prefix and it
+ * might have a new sibling. So, we repeat the process.
+ *
+ * Steps:
+ * 1. Check if a sibling leaf (sl) exists and it is empty.
+ * 2. Release the leaf (l) if it has the sibling bit (slbit) equal to 1.
+ * 3. Release the sibling (sl) to derefer it again with WRITER lock.
+ * 4. Upgrade zapdir lock to WRITER (once).
+ * 5. Derefer released leaves again.
+ * 6. If it is needed, recheck whether both leaves are still siblings and empty.
+ * 7. Set ptrtbl pointers of the removed leaf (slbit 1) to point out to blkid of
+ * the remaining leaf (slbit 0).
+ * 8. Free disk block of the removed leaf (dmu_free_range).
+ * 9. Decrement prefix_len of the remaining leaf.
+ * 10. Repeat the steps.
+ */
+static int
+zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
+{
+	zap_t *zap = zn->zn_zap;
+	int64_t zt_shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
+	uint64_t hash = zn->zn_hash;
+	uint64_t prefix = zap_leaf_phys(l)->l_hdr.lh_prefix;
+	uint64_t prefix_len = zap_leaf_phys(l)->l_hdr.lh_prefix_len;
+	boolean_t trunc = B_FALSE;
+	int err = 0;
+
+	ASSERT0(zap_leaf_phys(l)->l_hdr.lh_nentries);
+	ASSERT3U(prefix_len, <=, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
+	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
+	ASSERT3U(ZAP_HASH_IDX(hash, prefix_len), ==, prefix);
+
+	boolean_t writer = B_FALSE;
+
+	/*
+	 * To avoid deadlock always deref leaves in the same order -
+	 * sibling 0 first, then sibling 1.
+	 */
+	while (prefix_len) {
+		zap_leaf_t *sl;
+		int64_t prefix_diff = zt_shift - prefix_len;
+		uint64_t sl_prefix = prefix ^ 1;
+		uint64_t sl_hash = ZAP_PREFIX_HASH(sl_prefix, prefix_len);
+		int slbit = prefix & 1;
+
+		ASSERT0(zap_leaf_phys(l)->l_hdr.lh_nentries);
+
+		/*
+		 * Check if there is a sibling by reading ptrtbl ptrs.
+		 */
+		if (check_sibling_ptrtbl_range(zap, sl_prefix, prefix_len) == 0)
+			break;
+
+		/*
+		 * sibling 1, unlock it - we haven't yet dereferenced sibling 0.
+		 */
+		if (slbit == 1) {
+			zap_put_leaf(l);
+			l = NULL;
+		}
+
+		/*
+		 * Dereference sibling leaf and check if it is empty.
+		 */
+		if ((err = zap_deref_leaf(zap, sl_hash, tx, RW_READER,
+		    &sl)) != 0)
+			break;
+
+		ASSERT3U(ZAP_HASH_IDX(sl_hash, prefix_len), ==, sl_prefix);
+
+		/*
+		 * Check if we have a sibling and it is empty.
+		 */
+		if (zap_leaf_phys(sl)->l_hdr.lh_prefix_len != prefix_len ||
+		    zap_leaf_phys(sl)->l_hdr.lh_nentries != 0) {
+			zap_put_leaf(sl);
+			break;
+		}
+
+		zap_put_leaf(sl);
+
+		/*
+		 * If there two empty sibling, we have work to do, so
+		 * we need to lock ZAP ptrtbl as WRITER.
+		 */
+		if (!writer && (writer = zap_lock_try_upgrade(zap, tx)) == 0) {
+			/* We failed to upgrade */
+			if (l != NULL) {
+				zap_put_leaf(l);
+				l = NULL;
+			}
+
+			zap_lock_upgrade(zap, tx);
+
+			zt_shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
+			writer = B_TRUE;
+		}
+
+		/*
+		 * Here we have WRITER lock for ptrtbl.
+		 * Now, we need a WRITER lock for both siblings leaves.
+		 * Also, we have to recheck if the leaves are still siblings
+		 * and still empty.
+		 */
+		if (l == NULL) {
+			/* sibling 0 */
+			if ((err = zap_deref_leaf(zap, (slbit ? sl_hash : hash),
+			    tx, RW_WRITER, &l)) != 0)
+				break;
+
+			/*
+			 * The leaf isn't empty anymore or
+			 * it was shrunk/split while our locks were down.
+			 */
+			if (zap_leaf_phys(l)->l_hdr.lh_nentries != 0 ||
+			    zap_leaf_phys(l)->l_hdr.lh_prefix_len != prefix_len)
+				break;
+		}
+
+		/* sibling 1 */
+		if ((err = zap_deref_leaf(zap, (slbit ? hash : sl_hash), tx,
+		    RW_WRITER, &sl)) != 0)
+			break;
+
+		/*
+		 * The leaf isn't empty anymore or
+		 * it was shrunk/split while our locks were down.
+		 */
+		if (zap_leaf_phys(sl)->l_hdr.lh_nentries != 0 ||
+		    zap_leaf_phys(sl)->l_hdr.lh_prefix_len != prefix_len) {
+			zap_put_leaf(sl);
+			break;
+		}
+
+		/* If we have gotten here, we have a leaf to collapse */
+		uint64_t idx = (slbit ? prefix : sl_prefix) << prefix_diff;
+		uint64_t nptrs = (1ULL << prefix_diff);
+		uint64_t sl_blkid = sl->l_blkid;
+
+		/*
+		 * Set ptrtbl entries to point out to the slibling 0 blkid
+		 */
+		if ((err = zap_set_idx_range_to_blk(zap, idx, nptrs, l->l_blkid,
+		    tx)) != 0) {
+			zap_put_leaf(sl);
+			break;
+		}
+
+		/*
+		 * Free sibling 1 disk block.
+		 */
+		int bs = FZAP_BLOCK_SHIFT(zap);
+		if (sl_blkid == zap_f_phys(zap)->zap_freeblk - 1)
+			trunc = B_TRUE;
+
+		(void) dmu_free_range(zap->zap_objset, zap->zap_object,
+		    sl_blkid << bs, 1 << bs, tx);
+		zap_put_leaf(sl);
+
+		zap_f_phys(zap)->zap_num_leafs--;
+
+		/*
+		 * Update prefix and prefix_len.
+		 */
+		zap_leaf_phys(l)->l_hdr.lh_prefix >>= 1;
+		zap_leaf_phys(l)->l_hdr.lh_prefix_len--;
+
+		prefix = zap_leaf_phys(l)->l_hdr.lh_prefix;
+		prefix_len = zap_leaf_phys(l)->l_hdr.lh_prefix_len;
+	}
+
+	if (trunc)
+		zap_trunc(zap);
+
+	if (l != NULL)
+		zap_put_leaf(l);
+
+	return (err);
+}
+
+ZFS_MODULE_PARAM(zfs, , zap_iterate_prefetch, INT, ZMOD_RW,
+	"When iterating ZAP object, prefetch it");
+
+ZFS_MODULE_PARAM(zfs, , zap_shrink_enabled, INT, ZMOD_RW,
+	"Enable ZAP shrinking");
diff --git a/sys/contrib/openzfs/module/zfs/zap_impl.c b/sys/contrib/openzfs/module/zfs/zap_impl.c
new file mode 100644
index 00000000000..0c2ba1cdbfe
--- /dev/null
+++ b/sys/contrib/openzfs/module/zfs/zap_impl.c
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright (c) 2024, Klara, Inc.
+ * Copyright (c) 2026, TrueNAS.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/dmu.h>
+#include <sys/dnode.h>
+#include <sys/dsl_dataset.h>
+#include <sys/zap.h>
+#include <sys/zap_impl.h>
+
+static kmem_cache_t *zap_name_cache;
+static kmem_cache_t *zap_attr_cache;
+static kmem_cache_t *zap_name_long_cache;
+static kmem_cache_t *zap_attr_long_cache;
+
+/* Setup/teardown caches. Part of the public interface in zap.h. */
+void
+zap_init(void)
+{
+	zap_name_cache = kmem_cache_create("zap_name",
+	    sizeof (zap_name_t) + ZAP_MAXNAMELEN, 0, NULL, NULL,
+	    NULL, NULL, NULL, 0);
+
+	zap_attr_cache = kmem_cache_create("zap_attr_cache",
+	    sizeof (zap_attribute_t) + ZAP_MAXNAMELEN,  0, NULL,
+	    NULL, NULL, NULL, NULL, 0);
+
+	zap_name_long_cache = kmem_cache_create("zap_name_long",
+	    sizeof (zap_name_t) + ZAP_MAXNAMELEN_NEW, 0, NULL, NULL,
+	    NULL, NULL, NULL, 0);
+
+	zap_attr_long_cache = kmem_cache_create("zap_attr_long_cache",
+	    sizeof (zap_attribute_t) + ZAP_MAXNAMELEN_NEW,  0, NULL,
+	    NULL, NULL, NULL, NULL, 0);
+}
+
+void
+zap_fini(void)
+{
+	kmem_cache_destroy(zap_name_cache);
+	kmem_cache_destroy(zap_attr_cache);
+	kmem_cache_destroy(zap_name_long_cache);
+	kmem_cache_destroy(zap_attr_long_cache);
+}
+
+static int
+zap_normalize(zap_t *zap, const char *name, char *namenorm, int normflags,
+    size_t outlen)
+{
+	ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY));
+
+	size_t inlen = strlen(name) + 1;
+
+	int err = 0;
+	(void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
+	    normflags | U8_TEXTPREP_IGNORE_NULL | U8_TEXTPREP_IGNORE_INVALID,
+	    U8_UNICODE_LATEST, &err);
+
+	return (err);
+}
+
+zap_name_t *
+zap_name_alloc(zap_t *zap, boolean_t longname)
+{
+	kmem_cache_t *cache = longname ? zap_name_long_cache : zap_name_cache;
+	zap_name_t *zn = kmem_cache_alloc(cache, KM_SLEEP);
+
+	zn->zn_zap = zap;
+	zn->zn_normbuf_len = longname ? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN;
+	return (zn);
+}
+
+zap_name_t *
+zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt)
+{
+	size_t key_len = strlen(key) + 1;
+	zap_name_t *zn = zap_name_alloc(zap, (key_len > ZAP_MAXNAMELEN));
+	if (zap_name_init_str(zn, key, mt) != 0) {
+		zap_name_free(zn);
+		return (NULL);
+	}
+	return (zn);
+}
+
+zap_name_t *
+zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
+{
+	zap_name_t *zn = kmem_cache_alloc(zap_name_cache, KM_SLEEP);
+
+	ASSERT0(zap->zap_normflags);
+	zn->zn_zap = zap;
+	zn->zn_key_intlen = sizeof (*key);
+	zn->zn_key_orig = zn->zn_key_norm = key;
+	zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;
+	zn->zn_matchtype = 0;
+	zn->zn_normbuf_len = ZAP_MAXNAMELEN;
+
+	zn->zn_hash = zap_hash(zn);
+	return (zn);
+}
+
+void
+zap_name_free(zap_name_t *zn)
+{
+	if (zn->zn_normbuf_len == ZAP_MAXNAMELEN) {
+		kmem_cache_free(zap_name_cache, zn);
+	} else {
+		ASSERT3U(zn->zn_normbuf_len, ==, ZAP_MAXNAMELEN_NEW);
+		kmem_cache_free(zap_name_long_cache, zn);
+	}
+}
+
+int
+zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt)
+{
+	zap_t *zap = zn->zn_zap;
+	size_t key_len = strlen(key) + 1;
+
+	/* Make sure zn is allocated for longname if key is long */
+	IMPLY(key_len > ZAP_MAXNAMELEN,
+	    zn->zn_normbuf_len == ZAP_MAXNAMELEN_NEW);
+
+	zn->zn_key_intlen = sizeof (*key);
+	zn->zn_key_orig = key;
+	zn->zn_key_orig_numints = key_len;
+	zn->zn_matchtype = mt;
+	zn->zn_normflags = zap->zap_normflags;
+
+	/*
+	 * If we're dealing with a case sensitive lookup on a mixed or
+	 * insensitive fs, remove U8_TEXTPREP_TOUPPER or the lookup
+	 * will fold case to all caps overriding the lookup request.
+	 */
+	if (mt & MT_MATCH_CASE)
+		zn->zn_normflags &= ~U8_TEXTPREP_TOUPPER;
+
+	if (zap->zap_normflags) {
+		/*
+		 * We *must* use zap_normflags because this normalization is
+		 * what the hash is computed from.
+		 */
+		if (zap_normalize(zap, key, zn->zn_normbuf,
+		    zap->zap_normflags, zn->zn_normbuf_len) != 0)
+			return (SET_ERROR(ENOTSUP));
+		zn->zn_key_norm = zn->zn_normbuf;
+		zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
+	} else {
+		if (mt != 0)
+			return (SET_ERROR(ENOTSUP));
+		zn->zn_key_norm = zn->zn_key_orig;
+		zn->zn_key_norm_numints = zn->zn_key_orig_numints;
+	}
+
+	zn->zn_hash = zap_hash(zn);
+
+	if (zap->zap_normflags != zn->zn_normflags) {
+		/*
+		 * We *must* use zn_normflags because this normalization is
+		 * what the matching is based on.  (Not the hash!)
+		 */
+		if (zap_normalize(zap, key, zn->zn_normbuf,
+		    zn->zn_normflags, zn->zn_normbuf_len) != 0)
+			return (SET_ERROR(ENOTSUP));
+		zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
+	}
+
+	return (0);
+}
+
+boolean_t
+zap_match(zap_name_t *zn, const char *matchname)
+{
+	boolean_t res = B_FALSE;
+	ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));
+
+	if (zn->zn_matchtype & MT_NORMALIZE) {
+		size_t namelen = zn->zn_normbuf_len;
+		char normbuf[ZAP_MAXNAMELEN];
+		char *norm = normbuf;
+
+		/*
+		 * Cannot allocate this on-stack as it exceed the stack-limit of
+		 * 1024.
+		 */
+		if (namelen > ZAP_MAXNAMELEN)
+			norm = kmem_alloc(namelen, KM_SLEEP);
+
+		if (zap_normalize(zn->zn_zap, matchname, norm,
+		    zn->zn_normflags, namelen) != 0) {
+			res = B_FALSE;
+		} else {
+			res = (strcmp(zn->zn_key_norm, norm) == 0);
+		}
+		if (norm != normbuf)
+			kmem_free(norm, namelen);
+	} else {
+		res = (strcmp(zn->zn_key_orig, matchname) == 0);
+	}
+	return (res);
+}
+
+uint64_t
+zap_hash(zap_name_t *zn)
+{
+	zap_t *zap = zn->zn_zap;
+	uint64_t h = 0;
+
+	if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) {
+		ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY);
+		h = *(uint64_t *)zn->zn_key_orig;
+	} else {
+		h = zap->zap_salt;
+		ASSERT(h != 0);
+		ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
+
+		if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
+			const uint64_t *wp = zn->zn_key_norm;
+
+			ASSERT(zn->zn_key_intlen == 8);
+			for (int i = 0; i < zn->zn_key_norm_numints;
+			    wp++, i++) {
+				uint64_t word = *wp;
+
+				for (int j = 0; j < 8; j++) {
+					h = (h >> 8) ^
+					    zfs_crc64_table[(h ^ word) & 0xFF];
+					word >>= NBBY;
+				}
+			}
+		} else {
+			const uint8_t *cp = zn->zn_key_norm;
+
+			/*
+			 * We previously stored the terminating null on
+			 * disk, but didn't hash it, so we need to
+			 * continue to not hash it.  (The
+			 * zn_key_*_numints includes the terminating
+			 * null for non-binary keys.)
+			 */
+			int len = zn->zn_key_norm_numints - 1;
+
+			ASSERT(zn->zn_key_intlen == 1);
+			for (int i = 0; i < len; cp++, i++) {
+				h = (h >> 8) ^
+				    zfs_crc64_table[(h ^ *cp) & 0xFF];
+			}
+		}
+	}
+	/*
+	 * Don't use all 64 bits, since we need some in the cookie for
+	 * the collision differentiator.  We MUST use the high bits,
+	 * since those are the ones that we first pay attention to when
+	 * choosing the bucket.
+	 */
+	h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);
+
+	return (h);
+}
+
+static int
+zap_lock_impl(dnode_t *dn, dmu_buf_t *db, dmu_tx_t *tx,
+    krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
+{
+	ASSERT0(db->db_offset);
+	objset_t *os = dmu_buf_get_objset(db);
+	uint64_t obj = db->db_object;
+
+	*zapp = NULL;
+
+	if (DMU_OT_BYTESWAP(dn->dn_type) != DMU_BSWAP_ZAP)
+		return (SET_ERROR(EINVAL));
+
+	zap_t *zap = dmu_buf_get_user(db);
+	if (zap == NULL) {
+		zap = mzap_open(db);
+		if (zap == NULL) {
+			/*
+			 * mzap_open() didn't like what it saw on-disk.
+			 * Check for corruption!
+			 */
+			return (SET_ERROR(EIO));
+		}
+	}
+
+	/*
+	 * We're checking zap_ismicro without the lock held, in order to
+	 * tell what type of lock we want.  Once we have some sort of
+	 * lock, see if it really is the right type.  In practice this
+	 * can only be different if it was upgraded from micro to fat,
+	 * and micro wanted WRITER but fat only needs READER.
+	 */
+	krw_t lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
+	rw_enter(&zap->zap_rwlock, lt);
+	if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
+		/* it was upgraded, now we only need reader */
+		ASSERT(lt == RW_WRITER);
+		ASSERT(RW_READER ==
+		    ((!zap->zap_ismicro && fatreader) ? RW_READER : lti));
+		rw_downgrade(&zap->zap_rwlock);
+		lt = RW_READER;
+	}
+
+	zap->zap_objset = os;
+	zap->zap_dnode = dn;
+
+	if (lt == RW_WRITER)
+		dmu_buf_will_dirty(db, tx);
+
+	ASSERT3P(zap->zap_dbuf, ==, db);
+
+	ASSERT(!zap->zap_ismicro ||
+	    zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
+	if (zap->zap_ismicro && tx && adding &&
+	    zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
+		uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
+		if (newsz > zap_get_micro_max_size(dmu_objset_spa(os))) {
+			dprintf("upgrading obj %llu: num_entries=%u\n",
+			    (u_longlong_t)obj, zap->zap_m.zap_num_entries);
+			*zapp = zap;
+			int err = mzap_upgrade(zapp, tx, 0);
+			if (err != 0)
+				rw_exit(&zap->zap_rwlock);
+			return (err);
+		}
+		VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx));
+		zap->zap_m.zap_num_chunks =
+		    db->db_size / MZAP_ENT_LEN - 1;
+
+		if (newsz > SPA_OLD_MAXBLOCKSIZE) {
+			dsl_dataset_t *ds = dmu_objset_ds(os);
+			if (!dsl_dataset_feature_is_active(ds,
+			    SPA_FEATURE_LARGE_MICROZAP)) {
+				/*
+				 * A microzap just grew beyond the old limit
+				 * for the first time, so we have to ensure the
+				 * feature flag is activated.
+				 * zap_get_micro_max_size() won't let us get
+				 * here if the feature is not enabled, so we
+				 * don't need any other checks beforehand.
+				 *
+				 * Since we're in open context, we can't
+				 * activate the feature directly, so we instead
+				 * flag it on the dataset for next sync.
+				 */
+				dsl_dataset_dirty(ds, tx);
+				mutex_enter(&ds->ds_lock);
+				ds->ds_feature_activation
+				    [SPA_FEATURE_LARGE_MICROZAP] =
+				    (void *)B_TRUE;
+				mutex_exit(&ds->ds_lock);
+			}
+		}
+	}
+
+	*zapp = zap;
+	return (0);
+}
+
+int
+zap_lock_by_dnode(dnode_t *dn, dmu_tx_t *tx,
+    krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
+    zap_t **zapp)
+{
+	dmu_buf_t *db;
+	int err;
+
+	err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
+	if (err != 0)
+		return (err);
+	err = zap_lock_impl(dn, db, tx, lti, fatreader, adding, zapp);
+	if (err != 0)
+		dmu_buf_rele(db, tag);
+	else
+		VERIFY(dnode_add_ref(dn, tag));
+	return (err);
+}
+
+int
+zap_lock(objset_t *os, uint64_t obj, dmu_tx_t *tx,
+    krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
+    zap_t **zapp)
+{
+	dnode_t *dn;
+	int err;
+
+	err = dnode_hold(os, obj, tag, &dn);
+	if (err != 0)
+		return (err);
+	err = zap_lock_by_dnode(dn, tx, lti, fatreader, adding, tag, zapp);
+	dnode_rele(dn, tag);
+	return (err);
+}
+
+void
+zap_unlock(zap_t *zap, const void *tag)
+{
+	rw_exit(&zap->zap_rwlock);
+	dnode_rele(zap->zap_dnode, tag);
+	dmu_buf_rele(zap->zap_dbuf, tag);
+}
+
+int
+zap_lock_try_upgrade(zap_t *zap, dmu_tx_t *tx)
+{
+	if (RW_WRITE_HELD(&zap->zap_rwlock))
+		/* Already have writer, nothing to do. */
+		return (1);
+
+	/* Try to upgrade the lock in-place. */
+	if (rw_tryupgrade(&zap->zap_rwlock)) {
+		/*
+		 * Got it, mark buffer dirty, since we only do that in
+		 * zap_lock_impl() for writer.
+		 */
+		dmu_buf_will_dirty(zap->zap_dbuf, tx);
+		return (1);
+	}
+
+	return (0);
+}
+
+void
+zap_lock_upgrade(zap_t *zap, dmu_tx_t *tx)
+{
+	if (zap_lock_try_upgrade(zap, tx))
+		return;
+
+	/*
+	 * It's safe to drop the lock here because we still have a hold on
+	 * zap_dbuf, which prevents the dbuf being evicted and the zap_t being
+	 * deallocated.
+	 */
+	rw_exit(&zap->zap_rwlock);
+
+	rw_enter(&zap->zap_rwlock, RW_WRITER);
+	dmu_buf_will_dirty(zap->zap_dbuf, tx);
+}
+
+void
+zap_evict_sync(void *dbu)
+{
+	zap_t *zap = dbu;
+
+	rw_destroy(&zap->zap_rwlock);
+
+	if (zap->zap_ismicro)
+		mze_destroy(zap);
+	else
+		mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
+
+	kmem_free(zap, sizeof (zap_t));
+}
+
+uint64_t
+zap_getflags(zap_t *zap)
+{
+	if (zap->zap_ismicro)
+		return (0);
+	return (zap_f_phys(zap)->zap_flags);
+}
+
+int
+zap_hashbits(zap_t *zap)
+{
+	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
+		return (48);
+	else
+		return (28);
+}
+
+uint32_t
+zap_maxcd(zap_t *zap)
+{
+	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
+		return ((1<<16)-1);
+	else
+		return (-1U);
+}
+
+/* DNU byteswap callback for DMU_BSWAP_ZAP, see dmu_ot_byteswap. */
+void
+zap_byteswap(void *buf, size_t size)
+{
+	uint64_t block_type = *(uint64_t *)buf;
+
+	if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
+		/* ASSERT(magic == ZAP_LEAF_MAGIC); */
+		mzap_byteswap(buf, size);
+	} else {
+		fzap_byteswap(buf, size);
+	}
+}
+
+/*
+ * Cursor attribute allocator/free. Part of the public interface in zap.h,
+ * in this file to get access to the kmem caches.
+ */
+static zap_attribute_t *
+zap_attribute_alloc_impl(boolean_t longname)
+{
+	zap_attribute_t *za;
+
+	za = kmem_cache_alloc((longname)? zap_attr_long_cache : zap_attr_cache,
+	    KM_SLEEP);
+	za->za_name_len = (longname)? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN;
+	return (za);
+}
+
+zap_attribute_t *
+zap_attribute_alloc(void)
+{
+	return (zap_attribute_alloc_impl(B_FALSE));
+}
+
+zap_attribute_t *
+zap_attribute_long_alloc(void)
+{
+	return (zap_attribute_alloc_impl(B_TRUE));
+}
+
+void
+zap_attribute_free(zap_attribute_t *za)
+{
+	if (za->za_name_len == ZAP_MAXNAMELEN) {
+		kmem_cache_free(zap_attr_cache, za);
+	} else {
+		ASSERT3U(za->za_name_len, ==, ZAP_MAXNAMELEN_NEW);
+		kmem_cache_free(zap_attr_long_cache, za);
+	}
+}
diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c
index 4e343ebf5d1..a7c9c9c03b4 100644
--- a/sys/contrib/openzfs/module/zfs/zap_micro.c
+++ b/sys/contrib/openzfs/module/zfs/zap_micro.c
@@ -81,284 +81,7 @@ zap_get_micro_max_size(spa_t *spa)
 	return (SPA_OLD_MAXBLOCKSIZE);
 }
 
-static int mzap_upgrade(zap_t **zapp,
-    const void *tag, dmu_tx_t *tx, zap_flags_t flags);
-
-uint64_t
-zap_getflags(zap_t *zap)
-{
-	if (zap->zap_ismicro)
-		return (0);
-	return (zap_f_phys(zap)->zap_flags);
-}
-
-int
-zap_hashbits(zap_t *zap)
-{
-	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
-		return (48);
-	else
-		return (28);
-}
-
-uint32_t
-zap_maxcd(zap_t *zap)
-{
-	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
-		return ((1<<16)-1);
-	else
-		return (-1U);
-}
-
-static uint64_t
-zap_hash(zap_name_t *zn)
-{
-	zap_t *zap = zn->zn_zap;
-	uint64_t h = 0;
-
-	if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) {
-		ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY);
-		h = *(uint64_t *)zn->zn_key_orig;
-	} else {
-		h = zap->zap_salt;
-		ASSERT(h != 0);
-		ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
-
-		if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
-			const uint64_t *wp = zn->zn_key_norm;
-
-			ASSERT(zn->zn_key_intlen == 8);
-			for (int i = 0; i < zn->zn_key_norm_numints;
-			    wp++, i++) {
-				uint64_t word = *wp;
-
-				for (int j = 0; j < 8; j++) {
-					h = (h >> 8) ^
-					    zfs_crc64_table[(h ^ word) & 0xFF];
-					word >>= NBBY;
-				}
-			}
-		} else {
-			const uint8_t *cp = zn->zn_key_norm;
-
-			/*
-			 * We previously stored the terminating null on
-			 * disk, but didn't hash it, so we need to
-			 * continue to not hash it.  (The
-			 * zn_key_*_numints includes the terminating
-			 * null for non-binary keys.)
-			 */
-			int len = zn->zn_key_norm_numints - 1;
-
-			ASSERT(zn->zn_key_intlen == 1);
-			for (int i = 0; i < len; cp++, i++) {
-				h = (h >> 8) ^
-				    zfs_crc64_table[(h ^ *cp) & 0xFF];
-			}
-		}
-	}
-	/*
-	 * Don't use all 64 bits, since we need some in the cookie for
-	 * the collision differentiator.  We MUST use the high bits,
-	 * since those are the ones that we first pay attention to when
-	 * choosing the bucket.
-	 */
-	h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);
-
-	return (h);
-}
-
-static int
-zap_normalize(zap_t *zap, const char *name, char *namenorm, int normflags,
-    size_t outlen)
-{
-	ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY));
-
-	size_t inlen = strlen(name) + 1;
-
-	int err = 0;
-	(void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
-	    normflags | U8_TEXTPREP_IGNORE_NULL | U8_TEXTPREP_IGNORE_INVALID,
-	    U8_UNICODE_LATEST, &err);
-
-	return (err);
-}
-
-boolean_t
-zap_match(zap_name_t *zn, const char *matchname)
-{
-	boolean_t res = B_FALSE;
-	ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));
-
-	if (zn->zn_matchtype & MT_NORMALIZE) {
-		size_t namelen = zn->zn_normbuf_len;
-		char normbuf[ZAP_MAXNAMELEN];
-		char *norm = normbuf;
-
-		/*
-		 * Cannot allocate this on-stack as it exceed the stack-limit of
-		 * 1024.
-		 */
-		if (namelen > ZAP_MAXNAMELEN)
-			norm = kmem_alloc(namelen, KM_SLEEP);
-
-		if (zap_normalize(zn->zn_zap, matchname, norm,
-		    zn->zn_normflags, namelen) != 0) {
-			res = B_FALSE;
-		} else {
-			res = (strcmp(zn->zn_key_norm, norm) == 0);
-		}
-		if (norm != normbuf)
-			kmem_free(norm, namelen);
-	} else {
-		res = (strcmp(zn->zn_key_orig, matchname) == 0);
-	}
-	return (res);
-}
-
-static kmem_cache_t *zap_name_cache;
-static kmem_cache_t *zap_attr_cache;
-static kmem_cache_t *zap_name_long_cache;
-static kmem_cache_t *zap_attr_long_cache;
-
 void
-zap_init(void)
-{
-	zap_name_cache = kmem_cache_create("zap_name",
-	    sizeof (zap_name_t) + ZAP_MAXNAMELEN, 0, NULL, NULL,
-	    NULL, NULL, NULL, 0);
-
-	zap_attr_cache = kmem_cache_create("zap_attr_cache",
-	    sizeof (zap_attribute_t) + ZAP_MAXNAMELEN,  0, NULL,
-	    NULL, NULL, NULL, NULL, 0);
-
-	zap_name_long_cache = kmem_cache_create("zap_name_long",
-	    sizeof (zap_name_t) + ZAP_MAXNAMELEN_NEW, 0, NULL, NULL,
-	    NULL, NULL, NULL, 0);
-
-	zap_attr_long_cache = kmem_cache_create("zap_attr_long_cache",
-	    sizeof (zap_attribute_t) + ZAP_MAXNAMELEN_NEW,  0, NULL,
-	    NULL, NULL, NULL, NULL, 0);
-}
-
-void
-zap_fini(void)
-{
-	kmem_cache_destroy(zap_name_cache);
-	kmem_cache_destroy(zap_attr_cache);
-	kmem_cache_destroy(zap_name_long_cache);
-	kmem_cache_destroy(zap_attr_long_cache);
-}
-
-static zap_name_t *
-zap_name_alloc(zap_t *zap, boolean_t longname)
-{
-	kmem_cache_t *cache = longname ? zap_name_long_cache : zap_name_cache;
-	zap_name_t *zn = kmem_cache_alloc(cache, KM_SLEEP);
-
-	zn->zn_zap = zap;
-	zn->zn_normbuf_len = longname ? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN;
-	return (zn);
-}
-
-void
-zap_name_free(zap_name_t *zn)
-{
-	if (zn->zn_normbuf_len == ZAP_MAXNAMELEN) {
-		kmem_cache_free(zap_name_cache, zn);
-	} else {
-		ASSERT3U(zn->zn_normbuf_len, ==, ZAP_MAXNAMELEN_NEW);
-		kmem_cache_free(zap_name_long_cache, zn);
-	}
-}
-
-static int
-zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt)
-{
-	zap_t *zap = zn->zn_zap;
-	size_t key_len = strlen(key) + 1;
-
-	/* Make sure zn is allocated for longname if key is long */
-	IMPLY(key_len > ZAP_MAXNAMELEN,
-	    zn->zn_normbuf_len == ZAP_MAXNAMELEN_NEW);
-
-	zn->zn_key_intlen = sizeof (*key);
-	zn->zn_key_orig = key;
-	zn->zn_key_orig_numints = key_len;
-	zn->zn_matchtype = mt;
-	zn->zn_normflags = zap->zap_normflags;
-
-	/*
-	 * If we're dealing with a case sensitive lookup on a mixed or
-	 * insensitive fs, remove U8_TEXTPREP_TOUPPER or the lookup
-	 * will fold case to all caps overriding the lookup request.
-	 */
-	if (mt & MT_MATCH_CASE)
-		zn->zn_normflags &= ~U8_TEXTPREP_TOUPPER;
-
-	if (zap->zap_normflags) {
-		/*
-		 * We *must* use zap_normflags because this normalization is
-		 * what the hash is computed from.
-		 */
-		if (zap_normalize(zap, key, zn->zn_normbuf,
-		    zap->zap_normflags, zn->zn_normbuf_len) != 0)
-			return (SET_ERROR(ENOTSUP));
-		zn->zn_key_norm = zn->zn_normbuf;
-		zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
-	} else {
-		if (mt != 0)
-			return (SET_ERROR(ENOTSUP));
-		zn->zn_key_norm = zn->zn_key_orig;
-		zn->zn_key_norm_numints = zn->zn_key_orig_numints;
-	}
-
-	zn->zn_hash = zap_hash(zn);
-
-	if (zap->zap_normflags != zn->zn_normflags) {
-		/*
-		 * We *must* use zn_normflags because this normalization is
-		 * what the matching is based on.  (Not the hash!)
-		 */
-		if (zap_normalize(zap, key, zn->zn_normbuf,
-		    zn->zn_normflags, zn->zn_normbuf_len) != 0)
-			return (SET_ERROR(ENOTSUP));
-		zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
-	}
-
-	return (0);
-}
-
-zap_name_t *
-zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt)
-{
-	size_t key_len = strlen(key) + 1;
-	zap_name_t *zn = zap_name_alloc(zap, (key_len > ZAP_MAXNAMELEN));
-	if (zap_name_init_str(zn, key, mt) != 0) {
-		zap_name_free(zn);
-		return (NULL);
-	}
-	return (zn);
-}
-
-static zap_name_t *
-zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
-{
-	zap_name_t *zn = kmem_cache_alloc(zap_name_cache, KM_SLEEP);
-
-	ASSERT0(zap->zap_normflags);
-	zn->zn_zap = zap;
-	zn->zn_key_intlen = sizeof (*key);
-	zn->zn_key_orig = zn->zn_key_norm = key;
-	zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;
-	zn->zn_matchtype = 0;
-	zn->zn_normbuf_len = ZAP_MAXNAMELEN;
-
-	zn->zn_hash = zap_hash(zn);
-	return (zn);
-}
-
-static void
 mzap_byteswap(mzap_phys_t *buf, size_t size)
 {
 	buf->mz_block_type = BSWAP_64(buf->mz_block_type);
@@ -373,19 +96,6 @@ mzap_byteswap(mzap_phys_t *buf, size_t size)
 	}
 }
 
-void
-zap_byteswap(void *buf, size_t size)
-{
-	uint64_t block_type = *(uint64_t *)buf;
-
-	if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
-		/* ASSERT(magic == ZAP_LEAF_MAGIC); */
-		mzap_byteswap(buf, size);
-	} else {
-		fzap_byteswap(buf, size);
-	}
-}
-
 __attribute__((always_inline)) inline
 static int
 mze_compare(const void *arg1, const void *arg2)
@@ -417,7 +127,7 @@ mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
 	zfs_btree_add(&zap->zap_m.zap_tree, &mze);
 }
 
-static mzap_ent_t *
+mzap_ent_t *
 mze_find(zap_name_t *zn, zfs_btree_index_t *idx)
 {
 	mzap_ent_t mze_tofind;
@@ -482,7 +192,7 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash)
  * Check if the current entry keeps the colliding entries under the fatzap leaf
  * size.
  */
-static boolean_t
+boolean_t
 mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
 {
 	zap_t *zap = zn->zn_zap;
@@ -508,14 +218,14 @@ mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
 	return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS));
 }
 
-static void
+void
 mze_destroy(zap_t *zap)
 {
 	zfs_btree_clear(&zap->zap_m.zap_tree);
 	zfs_btree_destroy(&zap->zap_m.zap_tree);
 }
 
-static zap_t *
+zap_t *
 mzap_open(dmu_buf_t *db)
 {
 	zap_t *winner;
@@ -545,9 +255,8 @@ mzap_open(dmu_buf_t *db)
 	}
 
 	/*
-	 * Make sure that zap_ismicro is set before we let others see
-	 * it, because zap_lockdir() checks zap_ismicro without the lock
-	 * held.
+	 * Make sure that zap_ismicro is set before we let others see it,
+	 * because zap_lock() checks zap_ismicro without the lock held.
 	 */
 	dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf);
 	winner = dmu_buf_set_user(db, &zap->zap_dbu);
@@ -614,163 +323,8 @@ mzap_open(dmu_buf_t *db)
 	return (winner);
 }
 
-/*
- * This routine "consumes" the caller's hold on the dbuf, which must
- * have the specified tag.
- */
-static int
-zap_lockdir_impl(dnode_t *dn, dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
-    krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
-{
-	ASSERT0(db->db_offset);
-	objset_t *os = dmu_buf_get_objset(db);
-	uint64_t obj = db->db_object;
-
-	*zapp = NULL;
-
-	if (DMU_OT_BYTESWAP(dn->dn_type) != DMU_BSWAP_ZAP)
-		return (SET_ERROR(EINVAL));
-
-	zap_t *zap = dmu_buf_get_user(db);
-	if (zap == NULL) {
-		zap = mzap_open(db);
-		if (zap == NULL) {
-			/*
-			 * mzap_open() didn't like what it saw on-disk.
-			 * Check for corruption!
-			 */
-			return (SET_ERROR(EIO));
-		}
-	}
-
-	/*
-	 * We're checking zap_ismicro without the lock held, in order to
-	 * tell what type of lock we want.  Once we have some sort of
-	 * lock, see if it really is the right type.  In practice this
-	 * can only be different if it was upgraded from micro to fat,
-	 * and micro wanted WRITER but fat only needs READER.
-	 */
-	krw_t lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
-	rw_enter(&zap->zap_rwlock, lt);
-	if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
-		/* it was upgraded, now we only need reader */
-		ASSERT(lt == RW_WRITER);
-		ASSERT(RW_READER ==
-		    ((!zap->zap_ismicro && fatreader) ? RW_READER : lti));
-		rw_downgrade(&zap->zap_rwlock);
-		lt = RW_READER;
-	}
-
-	zap->zap_objset = os;
-	zap->zap_dnode = dn;
-
-	if (lt == RW_WRITER)
-		dmu_buf_will_dirty(db, tx);
-
-	ASSERT3P(zap->zap_dbuf, ==, db);
-
-	ASSERT(!zap->zap_ismicro ||
-	    zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
-	if (zap->zap_ismicro && tx && adding &&
-	    zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
-		uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
-		if (newsz > zap_get_micro_max_size(dmu_objset_spa(os))) {
-			dprintf("upgrading obj %llu: num_entries=%u\n",
-			    (u_longlong_t)obj, zap->zap_m.zap_num_entries);
-			*zapp = zap;
-			int err = mzap_upgrade(zapp, tag, tx, 0);
-			if (err != 0)
-				rw_exit(&zap->zap_rwlock);
-			return (err);
-		}
-		VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx));
-		zap->zap_m.zap_num_chunks =
-		    db->db_size / MZAP_ENT_LEN - 1;
-
-		if (newsz > SPA_OLD_MAXBLOCKSIZE) {
-			dsl_dataset_t *ds = dmu_objset_ds(os);
-			if (!dsl_dataset_feature_is_active(ds,
-			    SPA_FEATURE_LARGE_MICROZAP)) {
-				/*
-				 * A microzap just grew beyond the old limit
-				 * for the first time, so we have to ensure the
-				 * feature flag is activated.
-				 * zap_get_micro_max_size() won't let us get
-				 * here if the feature is not enabled, so we
-				 * don't need any other checks beforehand.
-				 *
-				 * Since we're in open context, we can't
-				 * activate the feature directly, so we instead
-				 * flag it on the dataset for next sync.
-				 */
-				dsl_dataset_dirty(ds, tx);
-				mutex_enter(&ds->ds_lock);
-				ds->ds_feature_activation
-				    [SPA_FEATURE_LARGE_MICROZAP] =
-				    (void *)B_TRUE;
-				mutex_exit(&ds->ds_lock);
-			}
-		}
-	}
-
-	*zapp = zap;
-	return (0);
-}
-
-static int
-zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
-    krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
-    zap_t **zapp)
-{
-	dmu_buf_t *db;
-	int err;
-
-	err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
-	if (err != 0)
-		return (err);
-	err = zap_lockdir_impl(dn, db, tag, tx, lti, fatreader, adding, zapp);
-	if (err != 0)
-		dmu_buf_rele(db, tag);
-	else
-		VERIFY(dnode_add_ref(dn, tag));
-	return (err);
-}
-
 int
-zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
-    krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
-    zap_t **zapp)
-{
-	dnode_t *dn;
-	dmu_buf_t *db;
-	int err;
-
-	err = dnode_hold(os, obj, tag, &dn);
-	if (err != 0)
-		return (err);
-	err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
-	if (err != 0) {
-		dnode_rele(dn, tag);
-		return (err);
-	}
-	err = zap_lockdir_impl(dn, db, tag, tx, lti, fatreader, adding, zapp);
-	if (err != 0) {
-		dmu_buf_rele(db, tag);
-		dnode_rele(dn, tag);
-	}
-	return (err);
-}
-
-void
-zap_unlockdir(zap_t *zap, const void *tag)
-{
-	rw_exit(&zap->zap_rwlock);
-	dnode_rele(zap->zap_dnode, tag);
-	dmu_buf_rele(zap->zap_dbuf, tag);
-}
-
-static int
-mzap_upgrade(zap_t **zapp, const void *tag, dmu_tx_t *tx, zap_flags_t flags)
+mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
 {
 	int err = 0;
 	zap_t *zap = *zapp;
@@ -808,8 +362,7 @@ mzap_upgrade(zap_t **zapp, const void *tag, dmu_tx_t *tx, zap_flags_t flags)
 		zap_name_init_str(zn, mze->mze_name, 0);
 		/* If we fail here, we would end up losing entries */
 		VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
-		    tag, tx));
-		zap = zn->zn_zap;	/* fzap_add_cd() may change zap */
+		    tx));
 	}
 	zap_name_free(zn);
 	vmem_free(mzp, sz);
@@ -851,227 +404,20 @@ mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags, dmu_tx_t *tx)
 	if (flags != 0) {
 		zap_t *zap;
 		/* Only fat zap supports flags; upgrade immediately. */
-		VERIFY(dnode_add_ref(dn, FTAG));
-		VERIFY0(zap_lockdir_impl(dn, db, FTAG, tx, RW_WRITER,
-		    B_FALSE, B_FALSE, &zap));
-		VERIFY0(mzap_upgrade(&zap, FTAG, tx, flags));
-		zap_unlockdir(zap, FTAG);
-	} else {
-		dmu_buf_rele(db, FTAG);
-	}
-}
-
-static uint64_t
-zap_create_impl(objset_t *os, int normflags, zap_flags_t flags,
-    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
-    dmu_object_type_t bonustype, int bonuslen, int dnodesize,
-    dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx)
-{
-	uint64_t obj;
-
-	ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP);
-
-	if (allocated_dnode == NULL) {
-		dnode_t *dn;
-		obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift,
-		    indirect_blockshift, bonustype, bonuslen, dnodesize,
-		    &dn, FTAG, tx);
-		mzap_create_impl(dn, normflags, flags, tx);
-		dnode_rele(dn, FTAG);
-	} else {
-		obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift,
-		    indirect_blockshift, bonustype, bonuslen, dnodesize,
-		    allocated_dnode, tag, tx);
-		mzap_create_impl(*allocated_dnode, normflags, flags, tx);
+		VERIFY0(zap_lock_by_dnode(dn, tx,
+		    RW_WRITER, B_FALSE, B_FALSE, FTAG, &zap));
+		VERIFY0(mzap_upgrade(&zap, tx, flags));
+		zap_unlock(zap, FTAG);
 	}
 
-	return (obj);
-}
-
-int
-zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
-    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
-	return (zap_create_claim_dnsize(os, obj, ot, bonustype, bonuslen,
-	    0, tx));
-}
-
-int
-zap_create_claim_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t ot,
-    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
-{
-	return (zap_create_claim_norm_dnsize(os, obj,
-	    0, ot, bonustype, bonuslen, dnodesize, tx));
-}
-
-int
-zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
-    dmu_object_type_t ot,
-    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
-	return (zap_create_claim_norm_dnsize(os, obj, normflags, ot, bonustype,
-	    bonuslen, 0, tx));
-}
-
-int
-zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj, int normflags,
-    dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,
-    int dnodesize, dmu_tx_t *tx)
-{
-	dnode_t *dn;
-	int error;
-
-	ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP);
-	error = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen,
-	    dnodesize, tx);
-	if (error != 0)
-		return (error);
-
-	error = dnode_hold(os, obj, FTAG, &dn);
-	if (error != 0)
-		return (error);
-
-	mzap_create_impl(dn, normflags, 0, tx);
-
-	dnode_rele(dn, FTAG);
-
-	return (0);
-}
-
-uint64_t
-zap_create(objset_t *os, dmu_object_type_t ot,
-    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
-	return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx));
-}
-
-uint64_t
-zap_create_dnsize(objset_t *os, dmu_object_type_t ot,
-    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
-{
-	return (zap_create_norm_dnsize(os, 0, ot, bonustype, bonuslen,
-	    dnodesize, tx));
-}
-
-uint64_t
-zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
-    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
-	return (zap_create_norm_dnsize(os, normflags, ot, bonustype, bonuslen,
-	    0, tx));
-}
-
-uint64_t
-zap_create_norm_dnsize(objset_t *os, int normflags, dmu_object_type_t ot,
-    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
-{
-	return (zap_create_impl(os, normflags, 0, ot, 0, 0,
-	    bonustype, bonuslen, dnodesize, NULL, NULL, tx));
-}
-
-uint64_t
-zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
-    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
-    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
-	return (zap_create_flags_dnsize(os, normflags, flags, ot,
-	    leaf_blockshift, indirect_blockshift, bonustype, bonuslen, 0, tx));
-}
-
-uint64_t
-zap_create_flags_dnsize(objset_t *os, int normflags, zap_flags_t flags,
-    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
-    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
-{
-	return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift,
-	    indirect_blockshift, bonustype, bonuslen, dnodesize, NULL, NULL,
-	    tx));
-}
-
-/*
- * Create a zap object and return a pointer to the newly allocated dnode via
- * the allocated_dnode argument.  The returned dnode will be held and the
- * caller is responsible for releasing the hold by calling dnode_rele().
- */
-uint64_t
-zap_create_hold(objset_t *os, int normflags, zap_flags_t flags,
-    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
-    dmu_object_type_t bonustype, int bonuslen, int dnodesize,
-    dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx)
-{
-	return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift,
-	    indirect_blockshift, bonustype, bonuslen, dnodesize,
-	    allocated_dnode, tag, tx));
-}
-
-int
-zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
-{
-	/*
-	 * dmu_object_free will free the object number and free the
-	 * data.  Freeing the data will cause our pageout function to be
-	 * called, which will destroy our data (zap_leaf_t's and zap_t).
-	 */
-
-	return (dmu_object_free(os, zapobj, tx));
-}
-
-void
-zap_evict_sync(void *dbu)
-{
-	zap_t *zap = dbu;
-
-	rw_destroy(&zap->zap_rwlock);
-
-	if (zap->zap_ismicro)
-		mze_destroy(zap);
-	else
-		mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
-
-	kmem_free(zap, sizeof (zap_t));
-}
-
-int
-zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	if (!zap->zap_ismicro) {
-		err = fzap_count(zap, count);
-	} else {
-		*count = zap->zap_m.zap_num_entries;
-	}
-	zap_unlockdir(zap, FTAG);
-	return (err);
-}
-
-int
-zap_count_by_dnode(dnode_t *dn, uint64_t *count)
-{
-	zap_t *zap;
-
-	int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
-	    FTAG, &zap);
-	if (err != 0)
-		return (err);
-	if (!zap->zap_ismicro) {
-		err = fzap_count(zap, count);
-	} else {
-		*count = zap->zap_m.zap_num_entries;
-	}
-	zap_unlockdir(zap, FTAG);
-	return (err);
+	dmu_buf_rele(db, FTAG);
 }
 
 /*
  * zn may be NULL; if not specified, it will be computed if needed.
  * See also the comment above zap_entry_normalization_conflict().
  */
-static boolean_t
+boolean_t
 mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze,
     zfs_btree_index_t *idx)
 {
@@ -1119,340 +465,7 @@ mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze,
 	return (B_FALSE);
 }
 
-/*
- * Routines for manipulating attributes.
- */
-
-int
-zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
-    uint64_t integer_size, uint64_t num_integers, void *buf)
-{
-	return (zap_lookup_norm(os, zapobj, name, integer_size,
-	    num_integers, buf, 0, NULL, 0, NULL));
-}
-
-static int
-zap_lookup_impl(zap_t *zap, const char *name,
-    uint64_t integer_size, uint64_t num_integers, void *buf,
-    matchtype_t mt, char *realname, int rn_len,
-    boolean_t *ncp)
-{
-	int err = 0;
-
-	zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
-	if (zn == NULL)
-		return (SET_ERROR(ENOTSUP));
-
-	if (!zap->zap_ismicro) {
-		err = fzap_lookup(zn, integer_size, num_integers, buf,
-		    realname, rn_len, ncp, NULL);
-	} else {
-		zfs_btree_index_t idx;
-		mzap_ent_t *mze = mze_find(zn, &idx);
-		if (mze == NULL) {
-			err = SET_ERROR(ENOENT);
-		} else {
-			if (num_integers < 1) {
-				err = SET_ERROR(EOVERFLOW);
-			} else if (integer_size != 8) {
-				err = SET_ERROR(EINVAL);
-			} else {
-				*(uint64_t *)buf =
-				    MZE_PHYS(zap, mze)->mze_value;
-				if (realname != NULL)
-					(void) strlcpy(realname,
-					    MZE_PHYS(zap, mze)->mze_name,
-					    rn_len);
-				if (ncp) {
-					*ncp = mzap_normalization_conflict(zap,
-					    zn, mze, &idx);
-				}
-			}
-		}
-	}
-	zap_name_free(zn);
-	return (err);
-}
-
-int
-zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
-    uint64_t integer_size, uint64_t num_integers, void *buf,
-    matchtype_t mt, char *realname, int rn_len,
-    boolean_t *ncp)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_lookup_impl(zap, name, integer_size,
-	    num_integers, buf, mt, realname, rn_len, ncp);
-	zap_unlockdir(zap, FTAG);
-	return (err);
-}
-
-int
-zap_prefetch(objset_t *os, uint64_t zapobj, const char *name)
-{
-	zap_t *zap;
-	int err;
-	zap_name_t *zn;
-
-	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
-	if (err)
-		return (err);
-	zn = zap_name_alloc_str(zap, name, 0);
-	if (zn == NULL) {
-		zap_unlockdir(zap, FTAG);
-		return (SET_ERROR(ENOTSUP));
-	}
-
-	fzap_prefetch(zn);
-	zap_name_free(zn);
-	zap_unlockdir(zap, FTAG);
-	return (err);
-}
-
-int
-zap_prefetch_object(objset_t *os, uint64_t zapobj)
-{
-	int error;
-	dmu_object_info_t doi;
-
-	error = dmu_object_info(os, zapobj, &doi);
-	if (error == 0 && DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
-		error = SET_ERROR(EINVAL);
-	if (error == 0)
-		dmu_prefetch_wait(os, zapobj, 0, doi.doi_max_offset);
-
-	return (error);
-}
-
-int
-zap_lookup_by_dnode(dnode_t *dn, const char *name,
-    uint64_t integer_size, uint64_t num_integers, void *buf)
-{
-	return (zap_lookup_norm_by_dnode(dn, name, integer_size,
-	    num_integers, buf, 0, NULL, 0, NULL));
-}
-
-int
-zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
-    uint64_t integer_size, uint64_t num_integers, void *buf,
-    matchtype_t mt, char *realname, int rn_len,
-    boolean_t *ncp)
-{
-	zap_t *zap;
-
-	int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
-	    FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_lookup_impl(zap, name, integer_size,
-	    num_integers, buf, mt, realname, rn_len, ncp);
-	zap_unlockdir(zap, FTAG);
-	return (err);
-}
-
-static int
-zap_prefetch_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints)
-{
-	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
-	if (zn == NULL) {
-		zap_unlockdir(zap, FTAG);
-		return (SET_ERROR(ENOTSUP));
-	}
-
-	fzap_prefetch(zn);
-	zap_name_free(zn);
-	zap_unlockdir(zap, FTAG);
-	return (0);
-}
-
-int
-zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
-    int key_numints)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_prefetch_uint64_impl(zap, key, key_numints);
-	/* zap_prefetch_uint64_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-int
-zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_prefetch_uint64_impl(zap, key, key_numints);
-	/* zap_prefetch_uint64_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-static int
-zap_lookup_length_uint64_impl(zap_t *zap, const uint64_t *key,
-    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,
-    uint64_t *actual_num_integers)
-{
-	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
-	if (zn == NULL) {
-		zap_unlockdir(zap, FTAG);
-		return (SET_ERROR(ENOTSUP));
-	}
-
-	int err = fzap_lookup(zn, integer_size, num_integers, buf,
-	    NULL, 0, NULL, actual_num_integers);
-	zap_name_free(zn);
-	zap_unlockdir(zap, FTAG);
-	return (err);
-}
-
-int
-zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
-    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_lookup_length_uint64_impl(zap, key, key_numints,
-	    integer_size, num_integers, buf, NULL);
-	/* zap_lookup_length_uint64_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-int
-zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
-    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_lookup_length_uint64_impl(zap, key, key_numints,
-	    integer_size, num_integers, buf, NULL);
-	/* zap_lookup_length_uint64_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-int
-zap_lookup_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
-    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,
-    uint64_t *actual_num_integers)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_lookup_length_uint64_impl(zap, key, key_numints,
-	    integer_size, num_integers, buf, actual_num_integers);
-	/* zap_lookup_length_uint64_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-int
-zap_contains(objset_t *os, uint64_t zapobj, const char *name)
-{
-	int err = zap_lookup_norm(os, zapobj, name, 0,
-	    0, NULL, 0, NULL, 0, NULL);
-	if (err == EOVERFLOW || err == EINVAL)
-		err = 0; /* found, but skipped reading the value */
-	return (err);
-}
-
-int
-zap_length(objset_t *os, uint64_t zapobj, const char *name,
-    uint64_t *integer_size, uint64_t *num_integers)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
-	if (zn == NULL) {
-		zap_unlockdir(zap, FTAG);
-		return (SET_ERROR(ENOTSUP));
-	}
-	if (!zap->zap_ismicro) {
-		err = fzap_length(zn, integer_size, num_integers);
-	} else {
-		zfs_btree_index_t idx;
-		mzap_ent_t *mze = mze_find(zn, &idx);
-		if (mze == NULL) {
-			err = SET_ERROR(ENOENT);
-		} else {
-			if (integer_size)
-				*integer_size = 8;
-			if (num_integers)
-				*num_integers = 1;
-		}
-	}
-	zap_name_free(zn);
-	zap_unlockdir(zap, FTAG);
-	return (err);
-}
-
-int
-zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
-    int key_numints, uint64_t *integer_size, uint64_t *num_integers)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
-	if (zn == NULL) {
-		zap_unlockdir(zap, FTAG);
-		return (SET_ERROR(ENOTSUP));
-	}
-	err = fzap_length(zn, integer_size, num_integers);
-	zap_name_free(zn);
-	zap_unlockdir(zap, FTAG);
-	return (err);
-}
-
-int
-zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
-    int key_numints, uint64_t *integer_size, uint64_t *num_integers)
-{
-	zap_t *zap;
-
-	int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
-	    FTAG, &zap);
-	if (err != 0)
-		return (err);
-	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
-	if (zn == NULL) {
-		zap_unlockdir(zap, FTAG);
-		return (SET_ERROR(ENOTSUP));
-	}
-	err = fzap_length(zn, integer_size, num_integers);
-	zap_name_free(zn);
-	zap_unlockdir(zap, FTAG);
-	return (err);
-}
-
-static void
+void
 mzap_addent(zap_name_t *zn, uint64_t value)
 {
 	zap_t *zap = zn->zn_zap;
@@ -1495,612 +508,6 @@ mzap_addent(zap_name_t *zn, uint64_t value)
 	cmn_err(CE_PANIC, "out of entries!");
 }
 
-static int
-zap_add_impl(zap_t *zap, const char *key,
-    int integer_size, uint64_t num_integers,
-    const void *val, dmu_tx_t *tx, const void *tag)
-{
-	const uint64_t *intval = val;
-	int err = 0;
-
-	zap_name_t *zn = zap_name_alloc_str(zap, key, 0);
-	if (zn == NULL) {
-		zap_unlockdir(zap, tag);
-		return (SET_ERROR(ENOTSUP));
-	}
-	if (!zap->zap_ismicro) {
-		err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
-		zap = zn->zn_zap;	/* fzap_add() may change zap */
-	} else if (integer_size != 8 || num_integers != 1 ||
-	    strlen(key) >= MZAP_NAME_LEN ||
-	    !mze_canfit_fzap_leaf(zn, zn->zn_hash)) {
-		err = mzap_upgrade(&zn->zn_zap, tag, tx, 0);
-		if (err == 0) {
-			err = fzap_add(zn, integer_size, num_integers, val,
-			    tag, tx);
-		}
-		zap = zn->zn_zap;	/* fzap_add() may change zap */
-	} else {
-		zfs_btree_index_t idx;
-		if (mze_find(zn, &idx) != NULL) {
-			err = SET_ERROR(EEXIST);
-		} else {
-			mzap_addent(zn, *intval);
-		}
-	}
-	ASSERT(zap == zn->zn_zap);
-	zap_name_free(zn);
-	if (zap != NULL)	/* may be NULL if fzap_add() failed */
-		zap_unlockdir(zap, tag);
-	return (err);
-}
-
-int
-zap_add(objset_t *os, uint64_t zapobj, const char *key,
-    int integer_size, uint64_t num_integers,
-    const void *val, dmu_tx_t *tx)
-{
-	zap_t *zap;
-	int err;
-
-	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_add_impl(zap, key, integer_size, num_integers, val, tx, FTAG);
-	/* zap_add_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-int
-zap_add_by_dnode(dnode_t *dn, const char *key,
-    int integer_size, uint64_t num_integers,
-    const void *val, dmu_tx_t *tx)
-{
-	zap_t *zap;
-	int err;
-
-	err = zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_add_impl(zap, key, integer_size, num_integers, val, tx, FTAG);
-	/* zap_add_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-static int
-zap_add_uint64_impl(zap_t *zap, const uint64_t *key,
-    int key_numints, int integer_size, uint64_t num_integers,
-    const void *val, dmu_tx_t *tx, const void *tag)
-{
-	int err;
-
-	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
-	if (zn == NULL) {
-		zap_unlockdir(zap, tag);
-		return (SET_ERROR(ENOTSUP));
-	}
-	err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
-	zap = zn->zn_zap;	/* fzap_add() may change zap */
-	zap_name_free(zn);
-	if (zap != NULL)	/* may be NULL if fzap_add() failed */
-		zap_unlockdir(zap, tag);
-	return (err);
-}
-
-int
-zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
-    int key_numints, int integer_size, uint64_t num_integers,
-    const void *val, dmu_tx_t *tx)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_add_uint64_impl(zap, key, key_numints,
-	    integer_size, num_integers, val, tx, FTAG);
-	/* zap_add_uint64_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-int
-zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
-    int key_numints, int integer_size, uint64_t num_integers,
-    const void *val, dmu_tx_t *tx)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_add_uint64_impl(zap, key, key_numints,
-	    integer_size, num_integers, val, tx, FTAG);
-	/* zap_add_uint64_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-int
-zap_update(objset_t *os, uint64_t zapobj, const char *name,
-    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
-{
-	zap_t *zap;
-	const uint64_t *intval = val;
-
-	int err =
-	    zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	zap_name_t *zn = zap_name_alloc_str(zap, name, 0);
-	if (zn == NULL) {
-		zap_unlockdir(zap, FTAG);
-		return (SET_ERROR(ENOTSUP));
-	}
-	if (!zap->zap_ismicro) {
-		err = fzap_update(zn, integer_size, num_integers, val,
-		    FTAG, tx);
-		zap = zn->zn_zap;	/* fzap_update() may change zap */
-	} else if (integer_size != 8 || num_integers != 1 ||
-	    strlen(name) >= MZAP_NAME_LEN) {
-		dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
-		    (u_longlong_t)zapobj, integer_size,
-		    (u_longlong_t)num_integers, name);
-		err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
-		if (err == 0) {
-			err = fzap_update(zn, integer_size, num_integers,
-			    val, FTAG, tx);
-		}
-		zap = zn->zn_zap;	/* fzap_update() may change zap */
-	} else {
-		zfs_btree_index_t idx;
-		mzap_ent_t *mze = mze_find(zn, &idx);
-		if (mze != NULL) {
-			MZE_PHYS(zap, mze)->mze_value = *intval;
-		} else {
-			mzap_addent(zn, *intval);
-		}
-	}
-	ASSERT(zap == zn->zn_zap);
-	zap_name_free(zn);
-	if (zap != NULL)	/* may be NULL if fzap_upgrade() failed */
-		zap_unlockdir(zap, FTAG);
-	return (err);
-}
-
-static int
-zap_update_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints,
-    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx,
-    const void *tag)
-{
-	int err;
-
-	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
-	if (zn == NULL) {
-		zap_unlockdir(zap, tag);
-		return (SET_ERROR(ENOTSUP));
-	}
-	err = fzap_update(zn, integer_size, num_integers, val, tag, tx);
-	zap = zn->zn_zap;	/* fzap_update() may change zap */
-	zap_name_free(zn);
-	if (zap != NULL)	/* may be NULL if fzap_upgrade() failed */
-		zap_unlockdir(zap, tag);
-	return (err);
-}
-
-int
-zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
-    int key_numints, int integer_size, uint64_t num_integers, const void *val,
-    dmu_tx_t *tx)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_update_uint64_impl(zap, key, key_numints,
-	    integer_size, num_integers, val, tx, FTAG);
-	/* zap_update_uint64_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-int
-zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
-    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_update_uint64_impl(zap, key, key_numints,
-	    integer_size, num_integers, val, tx, FTAG);
-	/* zap_update_uint64_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-int
-zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
-{
-	return (zap_remove_norm(os, zapobj, name, 0, tx));
-}
-
-static int
-zap_remove_impl(zap_t *zap, const char *name,
-    matchtype_t mt, dmu_tx_t *tx)
-{
-	int err = 0;
-
-	zap_name_t *zn = zap_name_alloc_str(zap, name, mt);
-	if (zn == NULL)
-		return (SET_ERROR(ENOTSUP));
-	if (!zap->zap_ismicro) {
-		err = fzap_remove(zn, tx);
-	} else {
-		zfs_btree_index_t idx;
-		mzap_ent_t *mze = mze_find(zn, &idx);
-		if (mze == NULL) {
-			err = SET_ERROR(ENOENT);
-		} else {
-			zap->zap_m.zap_num_entries--;
-			memset(MZE_PHYS(zap, mze), 0, sizeof (mzap_ent_phys_t));
-			zfs_btree_remove_idx(&zap->zap_m.zap_tree, &idx);
-		}
-	}
-	zap_name_free(zn);
-	return (err);
-}
-
-int
-zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
-    matchtype_t mt, dmu_tx_t *tx)
-{
-	zap_t *zap;
-	int err;
-
-	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
-	if (err)
-		return (err);
-	err = zap_remove_impl(zap, name, mt, tx);
-	zap_unlockdir(zap, FTAG);
-	return (err);
-}
-
-int
-zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx)
-{
-	zap_t *zap;
-	int err;
-
-	err = zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
-	if (err)
-		return (err);
-	err = zap_remove_impl(zap, name, 0, tx);
-	zap_unlockdir(zap, FTAG);
-	return (err);
-}
-
-static int
-zap_remove_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints,
-    dmu_tx_t *tx, const void *tag)
-{
-	int err;
-
-	zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
-	if (zn == NULL) {
-		zap_unlockdir(zap, tag);
-		return (SET_ERROR(ENOTSUP));
-	}
-	err = fzap_remove(zn, tx);
-	zap_name_free(zn);
-	zap_unlockdir(zap, tag);
-	return (err);
-}
-
-int
-zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
-    int key_numints, dmu_tx_t *tx)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_remove_uint64_impl(zap, key, key_numints, tx, FTAG);
-	/* zap_remove_uint64_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-int
-zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
-    dmu_tx_t *tx)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-	err = zap_remove_uint64_impl(zap, key, key_numints, tx, FTAG);
-	/* zap_remove_uint64_impl() calls zap_unlockdir() */
-	return (err);
-}
-
-
-static zap_attribute_t *
-zap_attribute_alloc_impl(boolean_t longname)
-{
-	zap_attribute_t *za;
-
-	za = kmem_cache_alloc((longname)? zap_attr_long_cache : zap_attr_cache,
-	    KM_SLEEP);
-	za->za_name_len = (longname)? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN;
-	return (za);
-}
-
-zap_attribute_t *
-zap_attribute_alloc(void)
-{
-	return (zap_attribute_alloc_impl(B_FALSE));
-}
-
-zap_attribute_t *
-zap_attribute_long_alloc(void)
-{
-	return (zap_attribute_alloc_impl(B_TRUE));
-}
-
-void
-zap_attribute_free(zap_attribute_t *za)
-{
-	if (za->za_name_len == ZAP_MAXNAMELEN) {
-		kmem_cache_free(zap_attr_cache, za);
-	} else {
-		ASSERT3U(za->za_name_len, ==, ZAP_MAXNAMELEN_NEW);
-		kmem_cache_free(zap_attr_long_cache, za);
-	}
-}
-
-/*
- * Routines for iterating over the attributes.
- */
-
-static void
-zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
-    uint64_t serialized, boolean_t prefetch)
-{
-	zc->zc_objset = os;
-	zc->zc_zap = NULL;
-	zc->zc_leaf = NULL;
-	zc->zc_zapobj = zapobj;
-	zc->zc_serialized = serialized;
-	zc->zc_hash = 0;
-	zc->zc_cd = 0;
-	zc->zc_prefetch = prefetch;
-}
-void
-zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
-    uint64_t serialized)
-{
-	zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE);
-}
-
-/*
- * Initialize a cursor at the beginning of the ZAP object.  The entire
- * ZAP object will be prefetched.
- */
-void
-zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
-{
-	zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE);
-}
-
-/*
- * Initialize a cursor at the beginning, but request that we not prefetch
- * the entire ZAP object.
- */
-void
-zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
-{
-	zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE);
-}
-
-void
-zap_cursor_fini(zap_cursor_t *zc)
-{
-	if (zc->zc_zap) {
-		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
-		zap_unlockdir(zc->zc_zap, NULL);
-		zc->zc_zap = NULL;
-	}
-	if (zc->zc_leaf) {
-		rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
-		zap_put_leaf(zc->zc_leaf);
-		zc->zc_leaf = NULL;
-	}
-	zc->zc_objset = NULL;
-}
-
-uint64_t
-zap_cursor_serialize(zap_cursor_t *zc)
-{
-	if (zc->zc_hash == -1ULL)
-		return (-1ULL);
-	if (zc->zc_zap == NULL)
-		return (zc->zc_serialized);
-	ASSERT0((zc->zc_hash & zap_maxcd(zc->zc_zap)));
-	ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
-
-	/*
-	 * We want to keep the high 32 bits of the cursor zero if we can, so
-	 * that 32-bit programs can access this.  So usually use a small
-	 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits
-	 * of the cursor.
-	 *
-	 * [ collision differentiator | zap_hashbits()-bit hash value ]
-	 */
-	return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) |
-	    ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap)));
-}
-
-int
-zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
-{
-	int err;
-
-	if (zc->zc_hash == -1ULL)
-		return (SET_ERROR(ENOENT));
-
-	if (zc->zc_zap == NULL) {
-		int hb;
-		err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
-		    RW_READER, TRUE, FALSE, NULL, &zc->zc_zap);
-		if (err != 0)
-			return (err);
-
-		/*
-		 * To support zap_cursor_init_serialized, advance, retrieve,
-		 * we must add to the existing zc_cd, which may already
-		 * be 1 due to the zap_cursor_advance.
-		 */
-		ASSERT0(zc->zc_hash);
-		hb = zap_hashbits(zc->zc_zap);
-		zc->zc_hash = zc->zc_serialized << (64 - hb);
-		zc->zc_cd += zc->zc_serialized >> hb;
-		if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */
-			zc->zc_cd = 0;
-	} else {
-		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
-	}
-	if (!zc->zc_zap->zap_ismicro) {
-		err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
-	} else {
-		zfs_btree_index_t idx;
-		mzap_ent_t mze_tofind;
-
-		mze_tofind.mze_hash = zc->zc_hash >> 32;
-		mze_tofind.mze_cd = zc->zc_cd;
-
-		mzap_ent_t *mze = zfs_btree_find(&zc->zc_zap->zap_m.zap_tree,
-		    &mze_tofind, &idx);
-		if (mze == NULL) {
-			mze = zfs_btree_next(&zc->zc_zap->zap_m.zap_tree,
-			    &idx, &idx);
-		}
-		if (mze) {
-			mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
-			ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
-			za->za_normalization_conflict =
-			    mzap_normalization_conflict(zc->zc_zap, NULL,
-			    mze, &idx);
-			za->za_integer_length = 8;
-			za->za_num_integers = 1;
-			za->za_first_integer = mzep->mze_value;
-			(void) strlcpy(za->za_name, mzep->mze_name,
-			    za->za_name_len);
-			zc->zc_hash = (uint64_t)mze->mze_hash << 32;
-			zc->zc_cd = mze->mze_cd;
-			err = 0;
-		} else {
-			zc->zc_hash = -1ULL;
-			err = SET_ERROR(ENOENT);
-		}
-	}
-	rw_exit(&zc->zc_zap->zap_rwlock);
-	return (err);
-}
-
-void
-zap_cursor_advance(zap_cursor_t *zc)
-{
-	if (zc->zc_hash == -1ULL)
-		return;
-	zc->zc_cd++;
-}
-
-int
-zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
-{
-	zap_t *zap;
-
-	int err =
-	    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
-	if (err != 0)
-		return (err);
-
-	memset(zs, 0, sizeof (zap_stats_t));
-
-	if (zap->zap_ismicro) {
-		zs->zs_blocksize = zap->zap_dbuf->db_size;
-		zs->zs_num_entries = zap->zap_m.zap_num_entries;
-		zs->zs_num_blocks = 1;
-	} else {
-		fzap_get_stats(zap, zs);
-	}
-	zap_unlockdir(zap, FTAG);
-	return (0);
-}
-
-#if defined(_KERNEL)
-EXPORT_SYMBOL(zap_create);
-EXPORT_SYMBOL(zap_create_dnsize);
-EXPORT_SYMBOL(zap_create_norm);
-EXPORT_SYMBOL(zap_create_norm_dnsize);
-EXPORT_SYMBOL(zap_create_flags);
-EXPORT_SYMBOL(zap_create_flags_dnsize);
-EXPORT_SYMBOL(zap_create_claim);
-EXPORT_SYMBOL(zap_create_claim_norm);
-EXPORT_SYMBOL(zap_create_claim_norm_dnsize);
-EXPORT_SYMBOL(zap_create_hold);
-EXPORT_SYMBOL(zap_destroy);
-EXPORT_SYMBOL(zap_lookup);
-EXPORT_SYMBOL(zap_lookup_by_dnode);
-EXPORT_SYMBOL(zap_lookup_norm);
-EXPORT_SYMBOL(zap_lookup_uint64);
-EXPORT_SYMBOL(zap_lookup_length_uint64_by_dnode);
-EXPORT_SYMBOL(zap_contains);
-EXPORT_SYMBOL(zap_prefetch);
-EXPORT_SYMBOL(zap_prefetch_uint64);
-EXPORT_SYMBOL(zap_prefetch_object);
-EXPORT_SYMBOL(zap_add);
-EXPORT_SYMBOL(zap_add_by_dnode);
-EXPORT_SYMBOL(zap_add_uint64);
-EXPORT_SYMBOL(zap_add_uint64_by_dnode);
-EXPORT_SYMBOL(zap_update);
-EXPORT_SYMBOL(zap_update_uint64);
-EXPORT_SYMBOL(zap_update_uint64_by_dnode);
-EXPORT_SYMBOL(zap_length);
-EXPORT_SYMBOL(zap_length_uint64);
-EXPORT_SYMBOL(zap_length_uint64_by_dnode);
-EXPORT_SYMBOL(zap_remove);
-EXPORT_SYMBOL(zap_remove_by_dnode);
-EXPORT_SYMBOL(zap_remove_norm);
-EXPORT_SYMBOL(zap_remove_uint64);
-EXPORT_SYMBOL(zap_remove_uint64_by_dnode);
-EXPORT_SYMBOL(zap_count);
-EXPORT_SYMBOL(zap_count_by_dnode);
-EXPORT_SYMBOL(zap_value_search);
-EXPORT_SYMBOL(zap_join);
-EXPORT_SYMBOL(zap_join_increment);
-EXPORT_SYMBOL(zap_add_int);
-EXPORT_SYMBOL(zap_remove_int);
-EXPORT_SYMBOL(zap_lookup_int);
-EXPORT_SYMBOL(zap_increment_int);
-EXPORT_SYMBOL(zap_add_int_key);
-EXPORT_SYMBOL(zap_lookup_int_key);
-EXPORT_SYMBOL(zap_increment);
-EXPORT_SYMBOL(zap_cursor_init);
-EXPORT_SYMBOL(zap_cursor_fini);
-EXPORT_SYMBOL(zap_cursor_retrieve);
-EXPORT_SYMBOL(zap_cursor_advance);
-EXPORT_SYMBOL(zap_cursor_serialize);
-EXPORT_SYMBOL(zap_cursor_init_serialized);
-EXPORT_SYMBOL(zap_get_stats);
-
 ZFS_MODULE_PARAM(zfs, , zap_micro_max_size, INT, ZMOD_RW,
 	"Maximum micro ZAP size before converting to a fat ZAP, "
 	    "in bytes (max 1M)");
-#endif
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index fe98e7db073..a23f397e698 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -1088,6 +1088,23 @@ zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 	    ZFS_DELEG_PERM_CREATE, cr));
 }
 
+/*
+ * Policy for dataset set property operations.  Individual properties checked by
+ * zfs_check_settable(), additionally require zfs_secpolicy_recv() when setting
+ * properties as part of a receive.
+ */
+static int
+zfs_secpolicy_setprops(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+	boolean_t received = zc->zc_cookie;
+	int error;
+
+	if (received && (error = zfs_secpolicy_recv(zc, innvl, cr)))
+		return (error);
+
+	return (zfs_secpolicy_read(zc, innvl, cr));
+}
+
 int
 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 {
@@ -3456,12 +3473,15 @@ zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 
 	ASSERT(spa_writeable(spa));
 
+	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 	if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
+		spa_config_exit(spa, SCL_CONFIG, FTAG);
 		spa_close(spa, FTAG);
 		return (SET_ERROR(ENOENT));
 	}
 
 	error = vdev_prop_set(vd, innvl, outnvl);
+	spa_config_exit(spa, SCL_CONFIG, FTAG);
 
 	spa_close(spa, FTAG);
 
@@ -3500,12 +3520,15 @@ zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
 		return (error);
 
+	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 	if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
+		spa_config_exit(spa, SCL_CONFIG, FTAG);
 		spa_close(spa, FTAG);
 		return (SET_ERROR(ENOENT));
 	}
 
 	error = vdev_prop_get(vd, innvl, outnvl);
+	spa_config_exit(spa, SCL_CONFIG, FTAG);
 
 	spa_close(spa, FTAG);
 
@@ -4120,7 +4143,6 @@ static int
 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	(void) unused, (void) outnvl;
-	const char *message;
 	char *poolname;
 	spa_t *spa;
 	int error;
@@ -4141,7 +4163,7 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
 	if (error != 0)
 		return (error);
 
-	message = fnvlist_lookup_string(innvl, "message");
+	const char *message = fnvlist_lookup_string(innvl, "message");
 
 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
 		spa_close(spa, FTAG);
@@ -6647,21 +6669,27 @@ zfs_ioc_userspace_one(zfs_cmd_t *zc)
  * outputs:
  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
  * zc_cookie	zap cursor
+ *
+ * The zc_nvlist_dst output array is limited to 1000 entries.
  */
 static int
 zfs_ioc_userspace_many(zfs_cmd_t *zc)
 {
+	const size_t batch_limit = 1000 * sizeof (zfs_useracct_t);
+	uint64_t bufsize = MIN(zc->zc_nvlist_dst_size, batch_limit);
 	zfsvfs_t *zfsvfs;
-	int bufsize = zc->zc_nvlist_dst_size;
 
-	if (bufsize <= 0)
+	if (bufsize < sizeof (zfs_useracct_t)) {
+		zc->zc_nvlist_dst_size = sizeof (zfs_useracct_t);
 		return (SET_ERROR(ENOMEM));
+	}
 
 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
 	if (error != 0)
 		return (error);
 
 	void *buf = vmem_alloc(bufsize, KM_SLEEP);
+	zc->zc_nvlist_dst_size = bufsize;
 
 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
 	    buf, &zc->zc_nvlist_dst_size, &zc->zc_guid);
@@ -7152,7 +7180,7 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
 	dsl_pool_t *dp;
 	dsl_dataset_t *new, *old;
 	const char *firstsnap;
-	uint64_t used, comp, uncomp;
+	uint64_t used = 0, comp = 0, uncomp = 0;
 
 	firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
 
@@ -8045,7 +8073,7 @@ zfs_ioctl_init(void)
 	    zfs_ioc_send, zfs_secpolicy_send);
 
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
-	    zfs_secpolicy_none);
+	    zfs_secpolicy_setprops);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
 	    zfs_secpolicy_destroy);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
diff --git a/sys/contrib/openzfs/module/zfs/zfs_quota.c b/sys/contrib/openzfs/module/zfs/zfs_quota.c
index 85b7a549b9a..0b51f8669cb 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_quota.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_quota.c
@@ -86,10 +86,14 @@ zpl_get_file_info(dmu_object_type_t bonustype, const void *data,
 		sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
 		swap = B_TRUE;
 	}
-	VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
+
+	if (unlikely(sa.sa_magic != SA_MAGIC))
+		return (SET_ERROR(EINVAL));
 
 	int hdrsize = sa_hdrsize(&sa);
-	VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
+
+	if (unlikely(hdrsize < sizeof (sa_hdr_phys_t)))
+		return (SET_ERROR(EINVAL));
 
 	uintptr_t data_after_hdr = (uintptr_t)data + hdrsize;
 	zoi->zfi_user = *((uint64_t *)(data_after_hdr + SA_UID_OFFSET));
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
index 0fa58d5ccb6..433d27dd2d1 100644
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -499,7 +499,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
 	for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
 		uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
 		int reclen;
-		char *lrp, *end;
+		char *lrp = NULL, *end = NULL;
 		arc_buf_t *abuf = NULL;
 
 		if (blk_seq > claim_blk_seq)
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
index 94b44561bd9..4b7c13dd1e9 100644
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -3830,7 +3830,6 @@ zio_ddt_write(zio_t *zio)
 
 	int p = DDT_PHYS_FOR_COPIES(ddt, zp->zp_copies);
 	ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);
-	ddt_univ_phys_t *ddp = dde->dde_phys;
 
 	/*
 	 * In the common cases, at this point we have a regular BP with no
@@ -3861,14 +3860,6 @@ zio_ddt_write(zio_t *zio)
 	 * end of the chain and letting the sequence play out.
 	 */
 
-	/*
-	 * Number of DVAs in the DDT entry. If the BP is encrypted we ignore
-	 * the third one as normal.
-	 */
-	int have_dvas = ddt_phys_dva_count(ddp, v, BP_IS_ENCRYPTED(bp));
-	IMPLY(have_dvas == 0, ddt_phys_birth(ddp, v) == 0);
-	boolean_t is_ganged = ddt_phys_is_gang(ddp, v);
-
 	/* Number of DVAs requested by the IO. */
 	uint8_t need_dvas = zp->zp_copies;
 	/* Number of DVAs in outstanding writes for this dde. */
@@ -3883,6 +3874,21 @@ zio_ddt_write(zio_t *zio)
 	if (dde_io != NULL)
 		mutex_enter(&dde_io->dde_io_lock);
 
+	/*
+	 * Number of DVAs in the DDT entry. If the BP is encrypted we ignore
+	 * the third one as normal.
+	 *
+	 * Must be computed after taking dde_io_lock (if held) to avoid
+	 * racing with ddt_phys_unextend() in zio_ddt_child_write_done()
+	 * error path, which can zero DVAs under dde_io_lock. Without the
+	 * lock, a stale have_dvas causes ddt_bp_fill() to copy a zeroed
+	 * DVA into the BP, producing a hole that reads back as zeros.
+	 */
+	ddt_univ_phys_t *ddp = dde->dde_phys;
+	int have_dvas = ddt_phys_dva_count(ddp, v, BP_IS_ENCRYPTED(bp));
+	IMPLY(have_dvas == 0, ddt_phys_birth(ddp, v) == 0);
+	boolean_t is_ganged = ddt_phys_is_gang(ddp, v);
+
 	if (dde_io == NULL || dde_io->dde_lead_zio[p] == NULL) {
 		/*
 		 * No IO outstanding, so we only need to worry about ourselves.
@@ -4168,14 +4174,21 @@ zio_ddt_free(zio_t *zio)
 	}
 	ddt_exit(ddt);
 
-	/*
-	 * When no entry was found, it must have been pruned,
-	 * so we can free it now instead of decrementing the
-	 * refcount in the DDT.
-	 */
-	if (!dde) {
+	if (dde) {
+		/*
+		 * DDT entry found and the refcount has been decremented.
+		 * Stop the pipeline — there is nothing more to do right now.
+		 */
+		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
+	} else {
+		/*
+		 * No DDT entry; the block must have been pruned from the
+		 * table.  Clear the DEDUP bit so it is treated as a normal
+		 * block from here on.  BRT_FREE and DVA_FREE follow in the
+		 * pipeline and will handle any cloned references and the
+		 * actual block free respectively.
+		 */
 		BP_SET_DEDUP(bp, 0);
-		zio->io_pipeline |= ZIO_STAGE_DVA_FREE;
 	}
 
 	return (zio);
@@ -5925,11 +5938,11 @@ static zio_pipe_stage_t *zio_pipeline[] = {
 	zio_encrypt,
 	zio_checksum_generate,
 	zio_nop_write,
-	zio_brt_free,
 	zio_ddt_read_start,
 	zio_ddt_read_done,
 	zio_ddt_write,
 	zio_ddt_free,
+	zio_brt_free,
 	zio_gang_assemble,
 	zio_gang_issue,
 	zio_dva_throttle,
diff --git a/sys/contrib/openzfs/module/zstd/zfs_zstd.c b/sys/contrib/openzfs/module/zstd/zfs_zstd.c
index 88820ab4430..f1f27d74397 100644
--- a/sys/contrib/openzfs/module/zstd/zfs_zstd.c
+++ b/sys/contrib/openzfs/module/zstd/zfs_zstd.c
@@ -693,6 +693,15 @@ zfs_zstd_decompress_level_buf(void *s_start, void *d_start, size_t s_len,
 		return (1);
 	}
 
+	/*
+	 * An OpenZFS compressed block must expand to exactly d_len bytes.
+	 * ZSTD_decompressDCtx returns the decompressed size on success.
+	 */
+	if (result != d_len) {
+		ZSTDSTAT_BUMP(zstd_stat_dec_fail);
+		return (1);
+	}
+
 	if (level) {
 		*level = curlevel;
 	}
diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
index 48ed7bf2eb7..71923a7808e 100644
--- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in
+++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in
@@ -525,7 +525,6 @@ systemctl --system daemon-reload >/dev/null || true
 %config(noreplace) %{_sysconfdir}/%{name}/zed.d/*
 %config(noreplace) %{_sysconfdir}/%{name}/zpool.d/*
 %config(noreplace) %{_sysconfdir}/%{name}/vdev_id.conf.*.example
-%attr(440, root, root) %config(noreplace) %{_sysconfdir}/sudoers.d/*
 
 %config(noreplace) %{_bashcompletiondir}/zfs
 %config(noreplace) %{_bashcompletiondir}/zpool
diff --git a/sys/contrib/openzfs/scripts/Makefile.am b/sys/contrib/openzfs/scripts/Makefile.am
index df2fae42fce..ed18a81b375 100644
--- a/sys/contrib/openzfs/scripts/Makefile.am
+++ b/sys/contrib/openzfs/scripts/Makefile.am
@@ -28,6 +28,7 @@ dist_noinst_SCRIPTS += $(scripts_scripts)
 endif
 
 dist_noinst_DATA += \
+	%D%/coverage_report.pl \
 	%D%/cstyle.pl \
 	%D%/update_authors.pl
 
diff --git a/sys/contrib/openzfs/scripts/coverage_report.pl b/sys/contrib/openzfs/scripts/coverage_report.pl
new file mode 100755
index 00000000000..ba8dec7a8d9
--- /dev/null
+++ b/sys/contrib/openzfs/scripts/coverage_report.pl
@@ -0,0 +1,392 @@
+#!/usr/bin/env perl
+
+# SPDX-License-Identifier: MIT
+#
+# Copyright (c) 2025, Rob Norris <robn@despairlabs.com>
+# Copyright (c) 2026, TrueNAS.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+#
+# usage: coverage_report.pl tests/unit/test_zap.info
+#        coverage_report.pl < tests/unit/test_zap.info
+#
+# This program takes an lcov/geninfo coverage tracefile and shows a summary
+# of line, branch and function coverage for each file. It's focused on the
+# specific needs of OpenZFS' unit test suite (see tests/unit/README.md) but
+# it should be adaptable to any place where lcov's HTML output is too heavy
+# or difficult to use (eg build/CI logs).
+#
+# The heart of this program is a small parser for the tracefile format as
+# described in geninfo(1). The rest is concerned with constructing a useful
+# colorised table output.
+#
+
+#
+# Typical output:
+#
+# Coverage: test_zap       | By line         | By branch       | By function
+#                          | Rate% Total Hit | Rate% Total Hit | Rate% Total Hit
+# module/zfs/u8_textprep.c | 42.0%   802 337 | 33.5%   510 171 | 50.0%    12   6
+# module/zfs/zap.c         | 52.1%   687 358 | 45.2%   250 113 | 41.1%    90  37
+# module/zfs/zap_fat.c     | 87.8%   665 584 | 58.5%   446 261 | 94.6%    37  35
+# module/zfs/zap_impl.c    | 81.9%   232 190 | 60.3%   146  88 | 92.0%    25  23
+# module/zfs/zap_leaf.c    | 86.7%   466 404 | 69.0%   216 149 | 95.7%    23  22
+# module/zfs/zap_micro.c   | 76.5%   238 182 | 54.2%   142  77 | 92.9%    14  13
+#
+
+use 5.010;
+use warnings;
+use strict;
+use Cwd qw(getcwd);
+use Term::ANSIColor qw(colored);
+
+# Setup for color output. Perl has included Term::ANSIColor since 5.6 (~2000),
+# but RGB support didn't arrive until v4 in 5.17.8 (~2012). We disable colors
+# outright on versions < 4, or if output is not attached to a terminal.
+my $use_colors = -t \*STDOUT && $Term::ANSIColor::VERSION >= 4;
+
+# Palette setup. If Term::ANSIColor and the terminal advertise support for
+# it, then we set up a pleasant red -> green gradient for the coverage
+# percentages. If not, we scale those colors down to the older RGB-240 colors
+# (0-5 for each component), which is still quite nice.
+my @palette = !$use_colors ? () : map {
+	state $has_truecolor =
+	    $Term::ANSIColor::VERSION >= 5 && $ENV{COLORTERM};
+	my @rgb = map { hex } m/../g;
+	if ($has_truecolor) {
+		sprintf 'r%dg%db%d', @rgb;
+	} else {
+		sprintf 'rgb%d%d%d', map { $_ * 6 / 255 } @rgb;
+	}
+} (
+	# Catppuccin Latte
+	# https://catppuccin.com/palette/
+	'd20f39',	# Red
+	'e64553',	# Maroon
+	'fe640b',	# Peach
+	'df8e1d',	# Yellow
+	'40a02b',	# Green
+	'179299',	# Teal
+);
+
+# Test name, from the TN: field if present.
+my $test_name = '';
+
+# Per-file data, initially sourced from the tracefile, then augmented
+my %filedata;
+
+# Tracking for the longest (stringified) value for each key. These are used
+# later when computing the output table column width.
+my %len;
+sub bump_len {
+	my ($k, $x) = @_;
+	my $l = length "".$x;
+	$len{$k} = $l if ($len{$k} // 0) < $l;
+}
+
+###
+# Parse the tracefile into per-file data records.
+
+# Current working directory. Expected to be the build root. Used to remove
+# the leading part of the source filenames, so its not the end of the world
+# if its wrong.
+my $cwd = getcwd;
+
+# Loop over the input
+while (my $line = <>) {
+	state $data = {};
+	chomp $line;
+
+	# skip comments
+	next if $line =~ m/^#/;
+
+	if ($line eq 'end_of_record') {
+		# end of this file, prep for next
+		$data = {};
+		next;
+	}
+
+	# everything else should be a KEY:VALUE line
+	my ($k, $v) = $line =~ m/^([A-Z]+):(.*)$/;
+	unless (defined $k) {
+		say "W: $.: malformed line: $line";
+		next;
+	}
+
+	if ($k eq 'TN') {
+		# TN:test_zap
+
+		# Test name. This is actually per-record (a tracefile can
+		# carry multiple test results) but we only ever generate
+		# them for a single test, so we don't make any effort to
+		# notice or track changes.
+		$test_name = $v;
+		next;
+	}
+
+	if ($k eq 'SF') {
+		# SF:/home/robn/code/zfs-unit/module/zfs/zap.c
+
+		# Source file. Value is the name, and the rest of the record
+		# apply to it.
+
+		# Remove the leading build root name.
+		my $path = $v;
+		$path =~ s{^$cwd/*}{};
+
+		# If we haven't seen this file before, create a new data
+		# record for it.
+		$filedata{$v} //= { path => $path };
+		$data = $filedata{$v};
+
+		# Increase path column width if necessary.
+		bump_len('path', $path);
+		next;
+	}
+
+	# Handle the counter keys. These are single values for the entire
+	# record in the file. L, FN and BR are Line, Function and Branch,
+	# F and H are found (ie total) and hit (ie was executed).
+	if (grep { $_ eq $k } qw(LF LH FNF FNH BRF BRH)) {
+		$data->{lc $k} = $v;
+		bump_len(lc $k, $v);
+		next;
+	}
+
+	# Older versions of lcov may not emit absolute found/hit counters. To
+	# handle this, we maintain our own counters from other events recorded
+	# in the info file, which we use if we don't get an absolute count.
+
+	if ($k eq 'DA') {
+		# DA:<line number>,<execution count>[,<checksum>]
+		# DA:463,0
+		# DA:469,153
+		my ($l, $h) = split ',', $v;
+
+		# One DA: record per actual code line (vs comment or other
+		# non-executable line), so we count records, not line number.
+		$data->{_lf}++;
+
+		# Only increment the hit count if the line was executed.
+		$data->{_lh}++ if $h > 0;
+		next;
+	}
+
+	if ($k eq 'FN') {
+		# FN:<start line>,[<end line>,]<function nname>
+		# FN:283,zap_lookup_by_dnode
+
+		# One FN record per function
+		$data->{_fnf}++;
+		next;
+	}
+	if ($k eq 'FNDA') {
+		# FNDA:<execution count>,<function name>
+		# FNDA:0,zap_lookup
+		# FNDA:78,zap_lookup_by_dnode
+
+		# Only count hit if more than one execution.
+		my ($c) = split ',', $v;
+		$data->{_fnh}++ if 0+$c > 0;
+		next;
+	}
+
+	if ($k eq 'BRDA') {
+		# BRDA:<line_number>,[<exception>]<block>,<branch>,<taken>
+		# BRDA:365,0,0,-
+		# BRDA:365,0,1,-
+		my ($l, $b, $br, $c) = split ',', $v;
+
+		# One BRDA: record per branch
+		$data->{_brf}++;
+
+		# <taken> is number of times branch arm was taken, or '-' if
+		# never considered (eg surrounding block was never entered)
+		# they're both 0 for our purposes.
+		$c = 0 if $c eq '-';
+
+		# Only count hit if more than one execution.
+		$data->{_brh}++ if 0+$c > 0;
+		next;
+	}
+}
+
+###
+# Synthesize missing counters
+
+for my $file (keys %filedata) {
+	my $data = $filedata{$file};
+
+	for my $k (qw(lf lh fnf fnh brf brh)) {
+		# Get our own count, if one exists.
+		my $v = delete $data->{"_$k"} // 0;
+
+		# If we didn't find a count in the info file, use our own.
+		# Note that this will also set legitimately unseen values to
+		# 0 (eg a source file with no branches). That's actually what
+		# we want.
+		unless (exists $data->{$k}) {
+			$data->{$k} = $v;
+			bump_len($k, $v);
+		}
+	}
+}
+
+###
+# Synthesize the "rate" percentage field from the "found" and "hit" fields.
+
+sub rate {
+	my ($data, $k, $kf, $kh) = @_;
+	my $rate = sprintf '%.01f%%',
+	    $data->{$kf} ? (100 * $data->{$kh} / $data->{$kf}) : 0;
+	$data->{$k} = $rate;
+	bump_len($k, $rate);
+}
+
+for my $file (keys %filedata) {
+	my $data = $filedata{$file};
+	rate($data, 'lr', 'lf', 'lh');
+	rate($data, 'brr', 'brf', 'brh');
+	rate($data, 'fnr', 'fnf', 'fnh');
+}
+
+###
+# Set up the header "rows".
+
+# We reuse our data record structure a little because outputting these needs to
+# consider and sometimes contribute to column width.
+
+# The top row spans multiple columns. The pad functions below have extra tools
+# to handle the math.
+my $h1data = {
+	path => 'Coverage'.($test_name ? ": $test_name" : ''),
+	l => 'By line',
+	br => 'By branch',
+	fn => 'By function',
+};
+bump_len('path', $h1data->{path});
+
+# The second row is the actual header for each data column, and so may push
+# the column widths out if necessary.
+my $h2data = {
+	lr  => 'Rate%', lf  => 'Total', lh  => 'Hit',
+	brr => 'Rate%', brf => 'Total', brh => 'Hit',
+	fnr => 'Rate%', fnf => 'Total', fnh => 'Hit',
+};
+bump_len($_, $h2data->{$_}) for keys %$h2data;
+
+###
+# Table layout
+
+# Internal helper for padr() and padl() below. The idea is to compute the
+# effective column width, and the string we want to place in it. If it would
+# fit exactly, we return the string. If not, the passed-in function is called
+# with the string, its length and the column width, and it will place it
+# (by adding padding on either side).
+#
+# Most calls take a single column key, which makes it very simple - take
+# the max width for that column (from %len, set by bump_len()), and the value
+# of that key in this column, and that's all of it.
+#
+# For the top heading row (h1data above), a list of column keys can be passed
+# in. In this case, the string will be constructed as a space-separated list
+# of all the keys have have a value in the data row. The column width is the
+# sum of max column widths for all columns that mave a max column width, plus
+# one for each space separator. This allows us to provide a separate string
+# to appear in the space, with the amount of space computed from the columns
+# underneath it.
+#
+sub _pad {
+	my ($fn, $data, @k) = @_;
+	my $str = join ' ', map { $data->{$_} // () } @k;
+	my $strlen = length $str;
+	my $colwidth = -1;
+	$colwidth += ($len{$_} // -1)+1 for @k;
+	return $strlen == $colwidth ? $str : $fn->($str, $strlen, $colwidth);
+}
+
+# Return the value of the named fields, with space-padding added to the right.
+sub padr {
+	_pad(sub {
+		my ($str, $strlen, $colwidth) = @_;
+		$str . (' ' x ($colwidth - $strlen));
+	}, @_);
+}
+
+# Return the value of the named fields, with space-padding added to the left.
+sub padl {
+	_pad(sub {
+		my ($str, $strlen, $colwidth) = @_;
+		(' ' x ($colwidth - $strlen)) . $str;
+	}, @_);
+}
+
+# Return the given % string, wrapped in terminal control codes that will give
+# it an appropriate color from the palette.
+sub colorpct {
+	my ($pct) = @_;
+
+	# If colors are disabled, return the string as-is.
+	return $pct unless $use_colors;
+
+	my ($n) = $pct =~ m/([0-9\.]+)/;
+
+	# scale 0-100 into palette range
+	my $s = int(($#palette / 100) * $n);
+	my $c = $palette[$s];
+
+	return colored([$c], $pct);
+}
+
+my @rows;
+
+# Layout the first header row
+push @rows, [
+	padr($h1data, 'path'),
+	'|', padr($h1data, 'l', 'lr', 'lf', 'lh'),
+	'|', padr($h1data, 'br', 'brr', 'brf', 'brh'),
+	'|', padr($h1data, 'fn', 'fnr', 'fnf', 'fnh'),
+];
+
+# Layout the second header row
+push @rows, [
+	padr($h2data, 'path'),
+	'|', padr($h2data, 'lr'), padl($h2data, 'lf'), padl($h2data, 'lh'),
+	'|', padr($h2data, 'brr'), padl($h2data, 'brf'), padl($h2data, 'brh'),
+	'|', padr($h2data, 'fnr'), padl($h2data, 'fnf'), padl($h2data, 'fnh'),
+];
+
+# Layout the data rows, padding colorising as appropriate.
+for my $file (sort keys %filedata) {
+	my $data = $filedata{$file};
+
+	push @rows, [
+	    padr($data, 'path'),
+	    '|', colorpct(padl($data, 'lr')),
+	    padl($data, 'lf'), padl($data, 'lh'),
+	    '|', colorpct(padl($data, 'brr')),
+	    padl($data, 'brf'), padl($data, 'brh'),
+	    '|', colorpct(padl($data, 'fnr')),
+	    padl($data, 'fnf'), padl($data, 'fnh'),
+	];
+}
+
+# And print them all out!
+say "@$_" for @rows;
diff --git a/sys/contrib/openzfs/tests/Makefile.am b/sys/contrib/openzfs/tests/Makefile.am
index b007a3d7e5f..2002ced658c 100644
--- a/sys/contrib/openzfs/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/Makefile.am
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: CDDL-1.0
+include $(srcdir)/%D%/unit/Makefile.am
 include $(srcdir)/%D%/zfs-tests/Makefile.am
 
 
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
index 4c7e4e85ec0..0dda8fdfa36 100644
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -37,7 +37,8 @@ tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos',
     'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos',
     'alloc_class_007_pos', 'alloc_class_008_pos', 'alloc_class_009_pos',
     'alloc_class_010_pos', 'alloc_class_011_neg', 'alloc_class_012_pos',
-    'alloc_class_013_pos', 'alloc_class_016_pos']
+    'alloc_class_013_pos', 'alloc_class_014_pos', 'alloc_class_015_neg',
+    'alloc_class_016_pos']
 tags = ['functional', 'alloc_class']
 
 [tests/functional/append]
@@ -172,9 +173,10 @@ tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos',
     'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos',
     'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress',
     'zdb_display_block', 'zdb_encrypted', 'zdb_encrypted_raw',
-    'zdb_label_checksum', 'zdb_object_range_neg', 'zdb_object_range_pos',
-    'zdb_objset_id', 'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2',
-    'zdb_backup', 'zdb_tunables']
+    'zdb_file_layout_001', 'zdb_file_layout_002', 'zdb_file_layout_003',
+    'zdb_file_layout_neg', 'zdb_label_checksum', 'zdb_object_range_neg',
+    'zdb_object_range_pos', 'zdb_objset_id', 'zdb_decompress_zstd',
+    'zdb_recover', 'zdb_recover_2', 'zdb_backup', 'zdb_tunables']
 pre =
 post =
 tags = ['functional', 'cli_root', 'zdb']
@@ -269,8 +271,8 @@ tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos',
     'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_007_pos',
     'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg',
     'zfs_mount_012_pos', 'zfs_mount_all_001_pos', 'zfs_mount_encrypted',
-    'zfs_mount_remount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints',
-    'zfs_mount_test_race', 'zfs_mount_recursive']
+    'zfs_mount_remount', 'zfs_mount_ro_rw', 'zfs_mount_all_fail',
+    'zfs_mount_all_mountpoints', 'zfs_mount_test_race', 'zfs_mount_recursive']
 tags = ['functional', 'cli_root', 'zfs_mount']
 
 [tests/functional/cli_root/zfs_program]
@@ -571,8 +573,8 @@ tags = ['functional', 'cli_root', 'zpool_scrub']
 
 [tests/functional/cli_root/zpool_set]
 tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg',
-    'zpool_set_ashift', 'zpool_set_features', 'vdev_set_001_pos',
-    'user_property_001_pos', 'user_property_002_neg',
+    'zpool_set_ashift', 'zpool_set_features', 'zpool_set_inherit',
+    'vdev_set_001_pos', 'user_property_001_pos', 'user_property_002_neg',
     'zpool_set_clear_userprop','vdev_set_scheduler']
 tags = ['functional', 'cli_root', 'zpool_set']
 
@@ -715,10 +717,11 @@ post =
 tags = ['functional', 'deadman']
 
 [tests/functional/dedup]
-tests = ['dedup_fdt_create', 'dedup_fdt_import', 'dedup_fdt_pacing',
-    'dedup_legacy_create', 'dedup_legacy_import', 'dedup_legacy_fdt_upgrade',
-    'dedup_legacy_fdt_mixed', 'dedup_quota', 'dedup_prune', 'dedup_prune_leak',
-    'dedup_zap_shrink']
+tests = ['dedup_bclone', 'dedup_bclone_pruned', 'dedup_fdt_create',
+    'dedup_fdt_import',
+    'dedup_fdt_pacing', 'dedup_legacy_create', 'dedup_legacy_import',
+    'dedup_legacy_fdt_upgrade', 'dedup_legacy_fdt_mixed', 'dedup_quota',
+    'dedup_prune', 'dedup_prune_leak', 'dedup_zap_shrink']
 pre =
 post =
 tags = ['functional', 'dedup']
@@ -1023,6 +1026,15 @@ tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos',
     'scrub_mirror_003_pos', 'scrub_mirror_004_pos']
 tags = ['functional', 'scrub_mirror']
 
+[tests/functional/send_xdr_encoding]
+tests = ['xdr_bookmark_raw', 'xdr_bookmark_raw_with_write',
+    'xdr_incr_from_bookmark', 'xdr_incr_from_redacted', 'xdr_raw',
+    'xdr_redacted_full', 'xdr_redacted_received',
+    'xdr_redacted_received_raw', 'xdr_replication', 'xdr_resume',
+    'xdr_resume_bookmark_raw', 'xdr_resume_bookmark_raw_with_write',
+    'xdr_resume_raw', 'xdr_resume_redacted']
+tags = ['functional', 'send_xdr_encoding']
+
 [tests/functional/slog]
 tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
     'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
@@ -1099,7 +1111,7 @@ tags = ['functional', 'vdev_disk']
 [tests/functional/vdev_zaps]
 tests = ['vdev_zaps_001_pos', 'vdev_zaps_002_pos', 'vdev_zaps_003_pos',
     'vdev_zaps_004_pos', 'vdev_zaps_005_pos', 'vdev_zaps_006_pos',
-    'vdev_zaps_007_pos']
+    'vdev_zaps_007_pos', 'vdev_zaps_008_pos']
 tags = ['functional', 'vdev_zaps']
 
 [tests/functional/write_dirs]
diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run
index 11bda60a9ca..009d984f2b9 100644
--- a/sys/contrib/openzfs/tests/runfiles/linux.run
+++ b/sys/contrib/openzfs/tests/runfiles/linux.run
@@ -118,7 +118,8 @@ tags = ['functional', 'fallocate']
 tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
     'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos',
     'auto_spare_002_pos', 'auto_spare_double', 'auto_spare_multiple',
-    'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault',
+    'auto_spare_ashift', 'auto_spare_rotational', 'auto_spare_shared',
+    'decrypt_fault',
     'decompress_fault', 'fault_limits', 'scrub_after_resilver',
     'suspend_on_probe_errors', 'suspend_resume_single', 'suspend_draid_fgroups',
     'zpool_status_-s']
diff --git a/sys/contrib/openzfs/tests/runfiles/sanity.run b/sys/contrib/openzfs/tests/runfiles/sanity.run
index 936f2bcc32b..788c9b39531 100644
--- a/sys/contrib/openzfs/tests/runfiles/sanity.run
+++ b/sys/contrib/openzfs/tests/runfiles/sanity.run
@@ -156,7 +156,7 @@ tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos',
     'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_007_pos',
     'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg',
     'zfs_mount_012_pos', 'zfs_mount_encrypted', 'zfs_mount_remount',
-    'zfs_mount_all_fail', 'zfs_mount_all_mountpoints',
+    'zfs_mount_ro_rw', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints',
     'zfs_mount_test_race', 'zfs_mount_recursive']
 tags = ['functional', 'cli_root', 'zfs_mount']
 
@@ -353,12 +353,11 @@ tags = ['functional', 'cli_root', 'zpool_scrub']
 
 [tests/functional/cli_root/zpool_set]
 tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg',
-    'zpool_set_ashift', 'zpool_set_features']
+    'zpool_set_ashift', 'zpool_set_features', 'zpool_set_inherit']
 tags = ['functional', 'cli_root', 'zpool_set']
 
 [tests/functional/cli_root/zpool_split]
-tests = ['zpool_split_cliargs', 'zpool_split_devices',
-    'zpool_split_props', 'zpool_split_vdevs', 'zpool_split_indirect']
+tests = ['zpool_split_cliargs', 'zpool_split_devices', 'zpool_split_indirect']
 tags = ['functional', 'cli_root', 'zpool_split']
 
 [tests/functional/cli_root/zpool_status]
@@ -439,12 +438,6 @@ tags = ['functional', 'features', 'large_dnode']
 tests = ['gang_blocks_001_pos']
 tags = ['functional', 'gang_blocks']
 
-[tests/functional/grow]
-pre =
-post =
-tests = ['grow_pool_001_pos', 'grow_replicas_001_pos']
-tags = ['functional', 'grow']
-
 [tests/functional/history]
 tests = ['history_004_pos', 'history_005_neg', 'history_007_pos',
     'history_009_pos']
@@ -502,12 +495,6 @@ tags = ['functional', 'nestedfs']
 tests = ['nopwrite_sync', 'nopwrite_volume']
 tags = ['functional', 'nopwrite']
 
-[tests/functional/pool_checkpoint]
-tests = ['checkpoint_conf_change', 'checkpoint_discard_many',
-    'checkpoint_removal', 'checkpoint_sm_scale', 'checkpoint_twice']
-tags = ['functional', 'pool_checkpoint']
-timeout = 1800
-
 [tests/functional/poolversion]
 tests = ['poolversion_001_pos', 'poolversion_002_pos']
 tags = ['functional', 'poolversion']
@@ -557,13 +544,11 @@ tags = ['functional', 'reservation']
 
 [tests/functional/rsend]
 tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
-    'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'rsend_005_pos',
-    'rsend_006_pos', 'rsend_009_pos', 'rsend_010_pos', 'rsend_011_pos',
-    'rsend_014_pos', 'rsend_016_neg', 'rsend-exclude_001_pos',
-    'rsend-exclude_002_pos', 'send-c_verify_contents',
-    'send-c_volume', 'send-c_zstreamdump', 'send-c_recv_dedup',
-    'send-L_toggle', 'send_encrypted_hierarchy', 'send_encrypted_props',
-    'send_encrypted_freeobjects',
+    'rsend_002_pos', 'rsend_003_pos', 'rsend_009_pos', 'rsend_010_pos',
+    'rsend_011_pos', 'rsend_016_neg', 'rsend-exclude_001_pos',
+    'rsend-exclude_002_pos', 'send-c_volume', 'send-c_zstreamdump',
+    'send-c_recv_dedup', 'send-L_toggle', 'send_encrypted_hierarchy',
+    'send_encrypted_props', 'send_encrypted_freeobjects',
     'send_encrypted_truncated_files', 'send_freeobjects', 'send_holds',
     'send_mixed_raw', 'send-wR_encrypted_zvol', 'send_partial_dataset',
     'send_invalid']
@@ -644,9 +629,3 @@ tags = ['functional', 'zvol', 'zvol_swap']
 [tests/functional/zpool_influxdb]
 tests = ['zpool_influxdb']
 tags = ['functional', 'zpool_influxdb']
-
-[tests/functional/pyzfs]
-tests = ['pyzfs_unittest']
-pre =
-post =
-tags = ['functional', 'pyzfs']
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
index 29d2760ccb8..a80112d914e 100755
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@@ -253,8 +253,9 @@ maybe = {
     'renameat2/setup': ['SKIP', renameat2_reason],
     'reservation/reservation_008_pos': ['FAIL', 7741],
     'reservation/reservation_018_pos': ['FAIL', 5642],
+    'send_xdr_encoding/xdr_bookmark_raw_with_write': ['FAIL', 18491],
+    'send_xdr_encoding/xdr_resume_bookmark_raw_with_write': ['FAIL', 18491],
     'snapshot/clone_001_pos': ['FAIL', known_reason],
-    'snapshot/snapshot_006_pos': ['FAIL', known_reason],
     'snapshot/snapshot_009_pos': ['FAIL', 7961],
     'snapshot/snapshot_010_pos': ['FAIL', 7961],
     'snapused/snapused_004_pos': ['FAIL', 5513],
@@ -277,7 +278,6 @@ if sys.platform.startswith('freebsd'):
         'pool_checkpoint/checkpoint_big_rewind': ['FAIL', 12622],
         'pool_checkpoint/checkpoint_indirect': ['FAIL', 12623],
         'resilver/resilver_restart_001': ['FAIL', known_reason],
-        'snapshot/snapshot_002_pos': ['FAIL', 14831],
         'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', 16668],
         'bclone/bclone_crossfs_corner_cases': ['SKIP', cfr_cross_reason],
         'bclone/bclone_crossfs_corner_cases_limited':
diff --git a/sys/contrib/openzfs/tests/unit/.gitignore b/sys/contrib/openzfs/tests/unit/.gitignore
new file mode 100644
index 00000000000..12a60a65666
--- /dev/null
+++ b/sys/contrib/openzfs/tests/unit/.gitignore
@@ -0,0 +1,4 @@
+/test_*.info
+/test_*_coverage
+
+/test_zap
diff --git a/sys/contrib/openzfs/tests/unit/Makefile.am b/sys/contrib/openzfs/tests/unit/Makefile.am
new file mode 100644
index 00000000000..80fe7311c46
--- /dev/null
+++ b/sys/contrib/openzfs/tests/unit/Makefile.am
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: CDDL-1.0
+
+# libunit.la includes munit and any additional tools that apply to all tests
+libunit_la_CFLAGS = $(AM_CFLAGS)
+
+noinst_LTLIBRARIES += libunit.la
+libunit_la_SOURCES = \
+	%D%/mock_dmu.c \
+	%D%/mock_dmu.h \
+	%D%/munit.c \
+	%D%/munit.h \
+	%D%/unit.c \
+	%D%/unit.h
+
+
+# all test binaries
+UNIT_TESTS = \
+	%D%/test_zap
+noinst_PROGRAMS = $(UNIT_TESTS)
+
+
+%C%_test_zap_CFLAGS = $(AM_CFLAGS)
+
+nodist_%C%_test_zap_SOURCES = \
+	module/zfs/zap.c \
+	module/zfs/zap_fat.c \
+	module/zfs/zap_impl.c \
+	module/zfs/zap_micro.c \
+	module/zfs/zap_leaf.c \
+	module/zfs/u8_textprep.c
+
+%C%_test_zap_SOURCES = \
+	%D%/test_zap.c
+
+%C%_test_zap_LDADD = \
+	libspl.la \
+	libbtree.la \
+	libunit.la
+
+
+# test run and coverage targets below
+PHONY += unit unit-coverage unit-coverage-html
+
+_unit_run_%: %D%/%
+	@echo "  UNITTEST $<" ; $< $(TOPT)
+
+# note: any changes in switches to lcov or genhtml must be carefully checked
+#       on 1.x and 2.x; the current option set is carefully chosen to allow
+#       both to work sensibly
+
+# .info is marked PRECIOUS, because its usually only created as an intermediate
+# from one of the unit phony targets, but once it exists there's no point
+# remaking it until and unless the test binary itself changes
+.PRECIOUS: %D%/%.info
+%D%/%.info: %D%/%
+	@-${RM} $@
+	@${LCOV} --quiet --quiet --zerocounters --directory $(top_srcdir)
+	@echo "  UNITTEST $<" ; $< $(TOPT)
+	@${LCOV} --quiet --quiet --capture  \
+		--test-name $(notdir $<) \
+		--directory $(top_srcdir) \
+		--output-file $@ \
+		--rc lcov_branch_coverage=1 \
+		--rc geninfo_unexecuted_blocks=1 \
+		$(addprefix --include $(abs_top_builddir)/, $(call \
+		    $(join $(join nodist_%C%_, $(notdir $<)), _SOURCES))) \
+		2>/dev/null
+
+_unit_coverage_%: %D%/%.info
+	@scripts/coverage_report.pl $<
+
+_unit_coverage_html_%: %D%/%.info
+	@-${RM} -r $(subst .info,_coverage, $<)
+	@${GENHTML} --quiet -quiet \
+		--rc lcov_branch_coverage=1 \
+		--rc check_data_consistency=0 \
+		--output-directory $(subst .info,_coverage, $<) \
+		$< \
+		2>/dev/null
+	@echo "coverage results:" \
+		"file://$(realpath %D%)/$(subst .info,_coverage,$(notdir $<))/index.html"
+
+CLEAN_LOCAL += unit-clean-local
+unit-clean-local:
+	-${RM} -r %D%/*.info %D%/*_coverage/
+
+_UNIT_ALL_TARGETS = $(notdir $(UNIT_TESTS))
+_UNIT_FIND_TARGET = \
+	$(foreach cmd, $(UNIT_TESTS), \
+		$(if $(filter $(join test_, $(1)), $(notdir $(cmd))), \
+			$(notdir $(cmd))))
+
+_UNIT_TARGETS = $(if $(T), \
+	$(call _UNIT_FIND_TARGET, $(T)), $(call _UNIT_ALL_TARGETS))
+
+unit: $(addprefix _unit_run_, $(_UNIT_TARGETS))
+	@$(if $^, true, echo "ERROR: couldn't find unit test: $(T)" && false)
+
+if CODE_COVERAGE_ENABLED
+unit-coverage: $(addprefix _unit_coverage_, $(_UNIT_TARGETS))
+	@$(if $^, true, echo "ERROR: couldn't find unit test: $(T)" && false)
+unit-coverage-html: $(addprefix _unit_coverage_html_, $(_UNIT_TARGETS))
+	@$(if $^, true, echo "ERROR: couldn't find unit test: $(T)" && false)
+else
+unit-coverage:
+	@echo "unit test coverage not enabled."
+	@echo "re-run configure with --enable-code-coverage"
+	@false
+unit-coverage-html: unit-coverage
+endif
diff --git a/sys/contrib/openzfs/tests/unit/README.md b/sys/contrib/openzfs/tests/unit/README.md
new file mode 100644
index 00000000000..6a4ee095af2
--- /dev/null
+++ b/sys/contrib/openzfs/tests/unit/README.md
@@ -0,0 +1,217 @@
+# Unit tests
+
+> [!NOTE]
+>
+> This document is a draft. It will be updated as we gain experience writing
+> and running unit tests.
+
+This directory contains a unit testing framework for OpenZFS, and a collection
+of unit tests.
+
+## Building and running
+
+The unit tests are built by default as part of the regular userspace build, so
+you probably don’t have to do anything else.
+
+The easiest way to run the tests is to run `make unit`, which will run all the
+available tests.
+
+```
+$ make unit
+  UNITTEST tests/unit/test_zap
+Running test suite with seed 0x9d36890b...
+zap.mock_microzap_sanity             [ OK    ] [ 0.00001088 / 0.00000939 CPU ]
+zap.mock_fatzap_sanity               [ OK    ] [ 0.00004281 / 0.00004257 CPU ]
+zap.zap_basic
+  type=micro                         [ OK    ] [ 0.00001899 / 0.00001893 CPU ]
+  type=fat                           [ OK    ] [ 0.00004174 / 0.00004135 CPU ]
+4 of 4 (100%) tests successful, 0 (0%) test skipped.
+```
+
+Running a single test binary is possible with the `T=` param to `make unit`.
+
+```
+$ make unit T=zap
+  UNITTEST tests/unit/test_zap
+  ...
+```
+
+The test binaries are just normal programs in `./tests/unit`, and can be run
+directly. This is useful for debugging with `gdb`.
+
+```
+$ ./tests/unit/test_zap
+Running test suite with seed 0x18e131ac...
+...
+```
+
+The test framework provides various options for controlling how the tests are
+run. Add the `--help` switch for more info. If using the make rule, options can
+be passed via the `TOPT=` param.
+
+### Building just for tests
+
+Recommended “minimum” build for just the unit tests, with additional debug to
+assist with understanding issues.
+
+```
+./configure \
+	--with-config=user \
+	--enable-debug --enable-debuginfo \
+	--disable-sysvinit --disable-systemd --disable-pam --disable-pyzfs
+make -j$(nproc)
+```
+
+TODO: add `--with-config=unit` that disables _everything_ not needed for the
+tests
+
+### Generating a coverage report
+
+If `configure` was run with `--enable-code-coverage`, then two additional build
+targets are available that will run the requested tests and produce a report.
+
+The `unit-coverage` target runs `scripts/coverage_report.pl` to produce a
+coverage summary directly in text immediately after the test output, and is
+good for inclusion in log files and other build system output.
+
+```
+$ make unit-coverage T=zap
+  UNITTEST tests/unit/test_zap
+Running test suite with seed 0xf51efca9...
+zap.mock_microzap_sanity             [ OK    ] [ 0.00000941 / 0.00000834 CPU ]
+zap.mock_fatzap_sanity               [ OK    ] [ 0.00005782 / 0.00005766 CPU ]
+...
+zap.cursor_release_one
+  type=micro                         [ OK    ] [ 0.00001705 / 0.00001681 CPU ]
+  type=fat                           [ OK    ] [ 0.00004748 / 0.00004738 CPU ]
+30 of 30 (100%) tests successful, 0 (0%) test skipped.
+Coverage: test_zap       | By line         | By branch       | By function
+                         | Rate% Total Hit | Rate% Total Hit | Rate% Total Hit
+module/zfs/u8_textprep.c |  0.0%   802   0 |  0.0%   510   0 |  0.0%    12   0
+module/zfs/zap.c         | 33.9%   610 207 | 31.1%   238  74 | 23.0%    74  17
+module/zfs/zap_fat.c     | 47.1%   665 313 | 29.8%   446 133 | 62.2%    37  23
+module/zfs/zap_impl.c    | 57.8%   232 134 | 39.7%   146  58 | 72.0%    25  18
+module/zfs/zap_leaf.c    | 60.9%   466 284 | 41.2%   216  89 | 78.3%    23  18
+module/zfs/zap_micro.c   | 68.9%   238 164 | 41.5%   142  59 | 92.9%    14  13
+```
+
+The `unit-coverage-html` will use `lcov` and `genhtml` to generate an
+interactive HTML report that also can show the specific source lines that are
+covered.
+
+```
+$ make unit-coverage-html T=zap
+  UNITTEST tests/unit/test_zap
+Running test suite with seed 0x485bf2e2...
+zap.mock_microzap_sanity             [ OK    ] [ 0.00000935 / 0.00000794 CPU ]
+zap.mock_fatzap_sanity               [ OK    ] [ 0.00006050 / 0.00006025 CPU ]
+...
+zap.cursor_release_one
+  type=micro                         [ OK    ] [ 0.00001785 / 0.00001767 CPU ]
+  type=fat                           [ OK    ] [ 0.00005262 / 0.00005250 CPU ]
+30 of 30 (100%) tests successful, 0 (0%) test skipped.
+coverage results:
+file:///home/robn/code/zfs-unit/tests/unit/tests/unit/test_zap_coverage/index.ht
+ml
+```
+
+Currently the coverage data will only be regenerated when the test binary
+itself changes. To force it, use `make unit-clean-local` to remove the coverage
+data.
+
+## Guidance for test writers
+
+### Top five
+
+* Only bring in the source files under test.
+* Use mocks to create the test scenario, then interrogate them to understand
+the result.
+* Prefer more smaller tests over fewer bigger ones.
+* Use coverage reports to guide test development.
+* Do the simplest possible thing.
+
+### Test structure
+
+Tests should be as simple and as readable as possible. When a test fails, we
+want to avoid the possibility that it could be the test itself at fault rather
+than the system under test.
+
+* Aim for one source file per subsystem or source concept (eg ZAP).
+* Aim for one test function per API call or logical behaviour
+  * Each “version” or “mode” of an API call or behaviour is a separate test
+  * Don’t test more than one thing in the same test; a test shouldn’t rely on
+    state or results from an earlier test
+* Use test parameters for “class“ or ”vtable” -type APIs, where each
+  implementation should respond to API calls the same way
+
+### Build system
+
+The build setup `tests/unit/Makefile.am` is very similar to the other
+userspace, however it has a couple of differences to make the run and coverage
+targets work more smoothly.
+
+* Name the test program `test_foo`. Almost always, you will have one source
+  file with the actual tests in it, called `test_foo.c`.
+* Add the program to `UNIT_TESTS`. `noinst_PROGRAMS` will be populated from it,
+  but this gives a specific name the run and coverage targets can use to
+  resolve the `T=` parameter to a specific test.
+* List the source files under test in `nodist_%C%_test_foo_SOURCES`, and the
+  source files for the test itself in `%C%_test_foo_SOURCES`. This is
+  important, as the coverage targets use `nodist_%C%_ ... _SOURCES` as the list
+  of objects to include in the coverage output.
+
+### Mocks
+
+A “mock” struct is a fake version of some data structure that the subsystem
+under test will accept and use as though it was a real one.
+
+* Make mock structs opaque. All uses from the test suite should be through
+  specific named accessor functions.
+* Name a mock struct for the struct it is mimicking, prefixed with `mock_`. eg
+  `mock_dnode_t` is the mock for `dnode_t`.
+* Access functions should be named for the struct, eg the function to create a
+  `mock_dnode_t` is `mock_dnode_t *mock_dnode_create(...)`.
+* `mock_*` functions should always use the mock type name in its signature,
+  never the original.
+* The mock object should always be directly castable to its real type and
+  vice-versa, ie a `mock_dnode_t *`   is always usable wherever a  `dnode_t *`
+  is (within the domain of the subsystem under test).
+
+This guidance pushes the programmer towards being explicit at the possible
+expense of concision. This is in service of keeping the tests reliable; in
+particular, if mocks require explicit casting to use, then there’s far less
+chance of either a mock or a real object being used incorrectly in the test,
+which can be confusing.
+
+### Unit testing framework
+
+[µnit](https://nemequ.github.io/munit/) (aka munit) is the unit test framework.
+It is a relatively niche choice, and arguably abandoned by upstream, but is
+well constructed with a thoughtful feature set and some useful properties:
+
+* Just two source files we can easily carry in the repo.
+* Portable, including to Windows.
+* Each test is run in a forked process, so a test failure will not corrupt the
+  rest of the test suite run
+* Parameterised tests.
+* A large suite of assertions and other useful functions that make it easy to
+  integrate with.
+
+All OpenZFS unit tests are ultimately targeting munit, so its expected that
+they will use various features as needed. However, we also supply our own
+facilities to extend those in useful ways.
+
+#### Local extensions
+
+`unit.h` provides a handful of macros. The majority of these are aliases for
+the much longer munit names for same function, eg `unit_true(n)` is an alias
+for `munit_assert_true(n)`, `unit_eq(a,b)` is an alias for
+`munit_assert_uint64(a, ==, b)`, and so on. These are there so that the
+assertions do not dominate the test visually, as we want it to be easier to
+focus on the details.
+
+Similarly, the `UINT_TEST` and `UNIT_PARAM` macros exist to help with test
+definition, as the casts are a little complicated.
+
+The goal is to keep this set relatively small, but all of munit is there for
+use, so do extend it if necessary.
diff --git a/sys/contrib/openzfs/tests/unit/mock_dmu.c b/sys/contrib/openzfs/tests/unit/mock_dmu.c
new file mode 100644
index 00000000000..ae035498da6
--- /dev/null
+++ b/sys/contrib/openzfs/tests/unit/mock_dmu.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2026, TrueNAS.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/zfs_context.h>
+#include <sys/dmu.h>
+#include <sys/dmu_tx.h>
+#include <sys/dnode.h>
+#include <sys/dsl_dataset.h>
+#include <sys/spa.h>
+#include <sys/zfeature.h>
+
+#include "mock_dmu.h"
+#include "unit.h"
+
+/*
+ * A mock dbuf. A real dmu_buf_t (first for casting) plus the attached user
+ * data pointer. Block data is stored in a separate allocation so that the
+ * struct address remains stable across block resizes.
+ */
+struct mock_dbuf {
+	dmu_buf_t		mdb_db;
+	dmu_buf_user_t		*mdb_user;
+	mock_dnode_t		*mdb_owner;
+	void			*mdb_data;
+};
+typedef struct mock_dbuf mock_dbuf_t;
+
+/*
+ * A mock dnode. a real dnode_t (must be first for casting) with dn_type
+ * and dn_object set, plus a flat array of mock_dbuf_t indexed by block id.
+ */
+struct mock_dnode {
+	dnode_t			mdn_dn;
+	uint64_t		mdn_refcount;
+	size_t			mdn_blksize;
+	size_t			mdn_nblocks;
+	mock_dbuf_t		**mdn_blocks;
+};
+
+/*
+ * A mock transaction. We only allocate and zero it, nothing currently uses
+ * any of its internals.
+ */
+struct mock_dmu_tx {
+	dmu_tx_t		mtx_tx;
+};
+
+/* Mock dnode */
+
+static mock_dbuf_t *
+mock_dnode_block_alloc(mock_dnode_t *mdn, uint64_t blkid)
+{
+	mock_dbuf_t *mdb = kmem_zalloc(sizeof (mock_dbuf_t), KM_SLEEP);
+	mdb->mdb_data = kmem_zalloc(mdn->mdn_blksize, KM_SLEEP);
+
+	mdb->mdb_db.db_object = mdn->mdn_dn.dn_object;
+	mdb->mdb_db.db_offset = blkid * mdn->mdn_blksize;
+	mdb->mdb_db.db_size   = mdn->mdn_blksize;
+	mdb->mdb_db.db_data   = mdb->mdb_data;
+	mdb->mdb_owner = mdn;
+
+	return (mdb);
+}
+
+/* Grow the dbuf array if needed, then return (or create) the dbuf for blkid. */
+static mock_dbuf_t *
+mock_dnode_block_get(mock_dnode_t *mdn, uint64_t blkid)
+{
+	if (blkid >= mdn->mdn_nblocks) {
+		size_t new_n = blkid + 1;
+		mock_dbuf_t **new_blocks =
+		    kmem_zalloc(new_n * sizeof (mock_dbuf_t *), KM_SLEEP);
+		if (mdn->mdn_blocks != NULL) {
+			memcpy(new_blocks, mdn->mdn_blocks,
+			    mdn->mdn_nblocks * sizeof (mock_dbuf_t *));
+			kmem_free(mdn->mdn_blocks,
+			    mdn->mdn_nblocks * sizeof (mock_dbuf_t *));
+		}
+		mdn->mdn_blocks = new_blocks;
+		mdn->mdn_nblocks = new_n;
+	}
+
+	mock_dbuf_t *mdb = mdn->mdn_blocks[blkid];
+	if (mdb == NULL) {
+		mdb = mock_dnode_block_alloc(mdn, blkid);
+		mdn->mdn_blocks[blkid] = mdb;
+	}
+	return (mdb);
+}
+
+mock_dnode_t *
+mock_dnode_create(size_t blksize, dmu_object_type_t type)
+{
+	ASSERT(IS_P2ALIGNED(blksize, 512));
+
+	mock_dnode_t *mdn = kmem_zalloc(sizeof (mock_dnode_t), KM_SLEEP);
+	mdn->mdn_refcount = 1;
+	mdn->mdn_dn.dn_type = type;
+	mdn->mdn_dn.dn_object = 1;	/* arbitrary non-zero object number */
+	mdn->mdn_blksize = blksize;
+
+	return (mdn);
+}
+
+void
+mock_dnode_destroy(mock_dnode_t *mdn)
+{
+	for (size_t i = 0; i < mdn->mdn_nblocks; i++) {
+		mock_dbuf_t *mdb = mdn->mdn_blocks[i];
+		if (mdb == NULL)
+			continue;
+
+		/*
+		 * Call the sync evict callback if one is set, mimicking the
+		 * real DMU when a buffer's refcount drops to zero.
+		 */
+		if (mdb->mdb_user != NULL &&
+		    mdb->mdb_user->dbu_evict_func_sync != NULL)
+			mdb->mdb_user->dbu_evict_func_sync(mdb->mdb_user);
+
+		kmem_free(mdb->mdb_data, mdb->mdb_db.db_size);
+		kmem_free(mdb, sizeof (mock_dbuf_t));
+	}
+
+	kmem_free(mdn->mdn_blocks,
+	    mdn->mdn_nblocks * sizeof (mock_dbuf_t *));
+	kmem_free(mdn, sizeof (mock_dnode_t));
+}
+
+size_t
+mock_dnode_block_count(mock_dnode_t *mdn)
+{
+	return (mdn->mdn_nblocks);
+}
+
+const void *
+mock_dnode_block_data(mock_dnode_t *mdn, uint64_t blkid)
+{
+	if (blkid >= mdn->mdn_nblocks)
+		return (NULL);
+	return (mdn->mdn_blocks[blkid]->mdb_db.db_data);
+}
+
+uint64_t
+mock_dnode_refcount(mock_dnode_t *mdn)
+{
+	return (mdn->mdn_refcount);
+}
+
+/* Mock transaction */
+
+mock_dmu_tx_t *
+mock_tx_create(void)
+{
+	return (kmem_zalloc(sizeof (mock_dmu_tx_t), KM_SLEEP));
+}
+
+void
+mock_tx_destroy(mock_dmu_tx_t *tx)
+{
+	kmem_free(tx, sizeof (mock_dmu_tx_t));
+}
+
+/* DMU stubs, either no-op or light access to mock dnode internals. */
+
+int
+dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset, const void *tag,
+    dmu_buf_t **dbp, dmu_flags_t flags)
+{
+	(void) tag; (void) flags;
+
+	mock_dnode_t *mdn = (mock_dnode_t *)dn;
+	uint64_t blkid = offset / mdn->mdn_blksize;
+	mock_dbuf_t *mdb = mock_dnode_block_get(mdn, blkid);
+
+	*dbp = &mdb->mdb_db;
+	return (0);
+}
+
+void
+dmu_buf_rele(dmu_buf_t *db, const void *tag)
+{
+	(void) db; (void) tag;
+}
+
+void *
+dmu_buf_get_user(dmu_buf_t *db)
+{
+	mock_dbuf_t *mdb = (mock_dbuf_t *)db;
+	return (mdb->mdb_user);
+}
+
+void *
+dmu_buf_set_user(dmu_buf_t *db, dmu_buf_user_t *new_user)
+{
+	mock_dbuf_t *mdb = (mock_dbuf_t *)db;
+	if (mdb->mdb_user != NULL)
+		return (mdb->mdb_user);	/* existing user wins */
+	mdb->mdb_user = new_user;
+	return (NULL);			/* new_user wins */
+}
+
+void
+dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx)
+{
+	(void) db; (void) tx;
+}
+
+objset_t *
+dmu_buf_get_objset(dmu_buf_t *db)
+{
+	mock_dbuf_t *mdb = (mock_dbuf_t *)db;
+
+	/*
+	 * We return the mock_dnode_t pointer cast to objset_t so that
+	 * dmu_object_set_blocksize() below can recover the dnode without
+	 * needing a separate objset structure.
+	 */
+	return ((objset_t *)mdb->mdb_owner);
+}
+
+int
+dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
+    int ibs, dmu_tx_t *tx)
+{
+	(void) object; (void) ibs; (void) tx;
+
+	/* os is a mock_dnode_t (see dmu_buf_get_objset() above). */
+	mock_dnode_t *mdn = (mock_dnode_t *)os;
+
+	/*
+	 * Resize block 0's data buffer in place so the struct address stays
+	 * stable.
+	 */
+	mock_dbuf_t *mdb = mdn->mdn_blocks[0];
+	void *new_data = kmem_zalloc(size, KM_SLEEP);
+	memcpy(new_data, mdb->mdb_data,
+	    MIN(size, (size_t)mdb->mdb_db.db_size));
+	kmem_free(mdb->mdb_data, mdb->mdb_db.db_size);
+
+	mdb->mdb_data = new_data;
+	mdb->mdb_db.db_size = size;
+	mdb->mdb_db.db_data = new_data;
+	mdn->mdn_blksize = size;
+
+	return (0);
+}
+
+boolean_t
+dnode_add_ref(dnode_t *dn, const void *tag)
+{
+	(void) tag;
+	mock_dnode_t *mdn = (mock_dnode_t *)dn;
+	if (mdn->mdn_refcount == 0)
+		return (B_FALSE);
+	mdn->mdn_refcount++;
+	return (B_TRUE);
+}
+
+void
+dnode_rele(dnode_t *dn, const void *tag)
+{
+	(void) tag;
+	mock_dnode_t *mdn = (mock_dnode_t *)dn;
+	unit_gt(mdn->mdn_refcount, 0);
+	mdn->mdn_refcount--;
+}
+
+/*
+ * Misc other stubs. Not strictly DMU mocks, and might move elsewhere later,
+ * but for now this is all we need for our limited test set.
+ */
+
+spa_t *
+dmu_objset_spa(objset_t *os)
+{
+	(void) os;
+	return (NULL);
+}
+
+int
+dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
+    uint64_t size, dmu_tx_t *tx)
+{
+	(void) os; (void) object; (void) offset; (void) size; (void) tx;
+	return (0);
+}
+
+void
+dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
+    uint64_t len, zio_priority_t pri)
+{
+	(void) dn; (void) level; (void) offset; (void) len; (void) pri;
+}
+
+dsl_dataset_t *
+dmu_objset_ds(objset_t *os)
+{
+	(void) os;
+	return (NULL);
+}
+
+boolean_t
+dsl_dataset_feature_is_active(dsl_dataset_t *ds, spa_feature_t f)
+{
+	(void) ds; (void) f;
+	return (B_FALSE);
+}
+
+void
+dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+	(void) ds; (void) tx;
+}
+
+boolean_t
+spa_feature_is_enabled(spa_t *spa, spa_feature_t f)
+{
+	(void) spa; (void) f;
+	return (B_FALSE);
+}
+
+int
+spa_maxblocksize(spa_t *spa)
+{
+	(void) spa;
+	return (SPA_OLD_MAXBLOCKSIZE);
+}
+
+const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
+
+void
+byteswap_uint64_array(void *buf, size_t size)
+{
+	(void) buf; (void) size;
+}
+
+/*
+ * Various objset+object calls; returning error, as they need to use
+ * _by_dnode() variants to get the mock.
+ */
+int
+dnode_hold(objset_t *os, uint64_t object, const void *tag, dnode_t **dnp)
+{
+	(void) os; (void) object; (void) tag; (void) dnp;
+	return (EIO);
+}
+
+int
+dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
+{
+	(void) os; (void) object; (void) tx;
+	return (EIO);
+}
+
+uint64_t
+dmu_object_alloc_hold(objset_t *os, dmu_object_type_t ot,
+    int blocksize, int indirect_blockshift, dmu_object_type_t bonustype,
+    int bonuslen, int dnodesize, dnode_t **allocated_dnode,
+    const void *tag, dmu_tx_t *tx)
+{
+	(void) os; (void) ot; (void) blocksize; (void) indirect_blockshift;
+	(void) bonustype; (void) bonuslen; (void) dnodesize;
+	(void) allocated_dnode; (void) tag; (void) tx;
+	return (EIO);
+}
+
+int
+dmu_object_claim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
+    int blocksize, dmu_object_type_t bonus_type, int bonus_len,
+    int dnodesize, dmu_tx_t *tx)
+{
+	(void) os; (void) object; (void) ot; (void) blocksize;
+	(void) bonus_type; (void) bonus_len; (void) dnodesize; (void) tx;
+	return (EIO);
+}
+
+int
+dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi)
+{
+	(void) os; (void) object; (void) doi;
+	return (EIO);
+}
+
+int
+dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset,
+    uint64_t len)
+{
+	(void) os; (void) object; (void) offset; (void) len;
+	return (EIO);
+}
diff --git a/sys/contrib/openzfs/tests/unit/mock_dmu.h b/sys/contrib/openzfs/tests/unit/mock_dmu.h
new file mode 100644
index 00000000000..2ac82c18b7a
--- /dev/null
+++ b/sys/contrib/openzfs/tests/unit/mock_dmu.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2026, TrueNAS.
+ */
+
+#ifndef _MOCK_DMU_H
+#define	_MOCK_DMU_H
+
+/*
+ * In-memory mock of the core DMU types for unit testing.
+ *
+ * Provides mock_dnode_t carrying a flat array of fixed-size blocks.
+ */
+
+#include <sys/types.h>
+
+typedef struct mock_dnode mock_dnode_t;
+typedef struct mock_dmu_tx mock_dmu_tx_t;
+
+/* Create a mock dnode with the given block size and object type. */
+mock_dnode_t *mock_dnode_create(size_t blksize, dmu_object_type_t type);
+
+/* Free a mock dnode and all its blocks. */
+void mock_dnode_destroy(mock_dnode_t *mdn);
+
+/* Returns the current number of blocks underlying this dnode. */
+size_t mock_dnode_block_count(mock_dnode_t *mdn);
+
+/* Returns a pointer to the data under the given block id. */
+const void *mock_dnode_block_data(mock_dnode_t *mdn, uint64_t blkid);
+
+/* Returns the current dnode ref (hold) count. */
+uint64_t mock_dnode_refcount(mock_dnode_t *mdn);
+
+/* Create/destroy a mock transaction handle. */
+mock_dmu_tx_t *mock_tx_create(void);
+void mock_tx_destroy(mock_dmu_tx_t *tx);
+
+#endif /* _MOCK_DMU_H */
diff --git a/sys/contrib/openzfs/tests/unit/munit.c b/sys/contrib/openzfs/tests/unit/munit.c
new file mode 100644
index 00000000000..73d32728e8c
--- /dev/null
+++ b/sys/contrib/openzfs/tests/unit/munit.c
@@ -0,0 +1,2458 @@
+// SPDX-License-Identifier: MIT
+/* µnit Testing Framework
+ * Copyright (c) 2013-2018 Evan Nemerson <evan@nemerson.com>
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*** Configuration ***/
+
+/* This is just where the output from the test goes.  It's really just
+ * meant to let you choose stdout or stderr, but if anyone really want
+ * to direct it to a file let me know, it would be fairly easy to
+ * support. */
+#if !defined(MUNIT_OUTPUT_FILE)
+#  define MUNIT_OUTPUT_FILE stdout
+#endif
+
+/* This is a bit more useful; it tells µnit how to format the seconds in
+ * timed tests.  If your tests run for longer you might want to reduce
+ * it, and if your computer is really fast and your tests are tiny you
+ * can increase it. */
+#if !defined(MUNIT_TEST_TIME_FORMAT)
+#  define MUNIT_TEST_TIME_FORMAT "0.8f"
+#endif
+
+/* If you have long test names you might want to consider bumping
+ * this.  The result information takes 43 characters. */
+#if !defined(MUNIT_TEST_NAME_LEN)
+#  define MUNIT_TEST_NAME_LEN 37
+#endif
+
+/* If you don't like the timing information, you can disable it by
+ * defining MUNIT_DISABLE_TIMING. */
+#if !defined(MUNIT_DISABLE_TIMING)
+#  define MUNIT_ENABLE_TIMING
+#endif
+
+/* OpenZFS: claim no strerror_r, causing munit to use its own internal
+ * fallback. There are two version of strerror_r (XSI and GNU), subtly
+ * different, and some glibc versions have warn_unused_result set on the
+ * prototype. munit is not prepared for this variance, so better just to
+ * let it do its own thing. -- robn, 2026-05-21 */
+#if !defined(MUNIT_NO_STRERROR_R)
+#  define MUNIT_NO_STRERROR_R
+#endif
+
+/*** End configuration ***/
+
+#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L)
+#  undef _POSIX_C_SOURCE
+#endif
+#if !defined(_POSIX_C_SOURCE)
+#  define _POSIX_C_SOURCE 200809L
+#endif
+
+/* Solaris freaks out if you try to use a POSIX or SUS standard without
+ * the "right" C standard. */
+#if defined(_XOPEN_SOURCE)
+#  undef _XOPEN_SOURCE
+#endif
+
+#if defined(__STDC_VERSION__)
+#  if __STDC_VERSION__ >= 201112L
+#    define _XOPEN_SOURCE 700
+#  elif __STDC_VERSION__ >= 199901L
+#    define _XOPEN_SOURCE 600
+#  endif
+#endif
+
+/* Because, according to Microsoft, POSIX is deprecated.  You've got
+ * to appreciate the chutzpah. */
+#if defined(_MSC_VER) && !defined(_CRT_NONSTDC_NO_DEPRECATE)
+#  define _CRT_NONSTDC_NO_DEPRECATE
+#endif
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+#  include <stdbool.h>
+#elif defined(_WIN32)
+/* https://msdn.microsoft.com/en-us/library/tf4dy80a.aspx */
+#endif
+
+#include <limits.h>
+#include <time.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <setjmp.h>
+
+#if !defined(MUNIT_NO_NL_LANGINFO) && !defined(_WIN32)
+#  define MUNIT_NL_LANGINFO
+#  include <locale.h>
+#  include <langinfo.h>
+#  include <strings.h>
+#endif
+
+#if !defined(_WIN32)
+#  include <unistd.h>
+#  include <sys/types.h>
+#  include <sys/wait.h>
+#else
+#  include <windows.h>
+#  include <io.h>
+#  include <fcntl.h>
+#  if !defined(STDERR_FILENO)
+#    define STDERR_FILENO _fileno(stderr)
+#  endif
+#endif
+
+#include "munit.h"
+
+#define MUNIT_STRINGIFY(x) #x
+#define MUNIT_XSTRINGIFY(x) MUNIT_STRINGIFY(x)
+
+#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_CC) ||  \
+  defined(__IBMCPP__)
+#  define MUNIT_THREAD_LOCAL __thread
+#elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201102L)) ||          \
+  defined(_Thread_local)
+#  define MUNIT_THREAD_LOCAL _Thread_local
+#elif defined(_WIN32)
+#  define MUNIT_THREAD_LOCAL __declspec(thread)
+#endif
+
+/* MSVC 12.0 will emit a warning at /W4 for code like 'do { ... }
+ * while (0)', or 'do { ... } while (1)'.  I'm pretty sure nobody
+ * at Microsoft compiles with /W4. */
+#if defined(_MSC_VER) && (_MSC_VER <= 1800)
+#  pragma warning(disable : 4127)
+#endif
+
+#if defined(_WIN32) || defined(__EMSCRIPTEN__)
+#  define MUNIT_NO_FORK
+#endif
+
+#if defined(__EMSCRIPTEN__)
+#  define MUNIT_NO_BUFFER
+#endif
+
+/*** Logging ***/
+
+static MunitLogLevel munit_log_level_visible = MUNIT_LOG_INFO;
+static MunitLogLevel munit_log_level_fatal = MUNIT_LOG_ERROR;
+
+#if defined(MUNIT_THREAD_LOCAL)
+static MUNIT_THREAD_LOCAL munit_bool munit_error_jmp_buf_valid = 0;
+static MUNIT_THREAD_LOCAL jmp_buf munit_error_jmp_buf;
+#endif
+
+/* At certain warning levels, mingw will trigger warnings about
+ * suggesting the format attribute, which we've explicity *not* set
+ * because it will then choke on our attempts to use the MS-specific
+ * I64 modifier for size_t (which we have to use since MSVC doesn't
+ * support the C99 z modifier). */
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wsuggest-attribute=format"
+#endif
+
+MUNIT_PRINTF(5, 0)
+static void munit_logf_exv(MunitLogLevel level, FILE *fp, const char *filename,
+                           int line, const char *format, va_list ap) {
+  if (level < munit_log_level_visible)
+    return;
+
+  switch (level) {
+  case MUNIT_LOG_DEBUG:
+    fputs("Debug", fp);
+    break;
+  case MUNIT_LOG_INFO:
+    fputs("Info", fp);
+    break;
+  case MUNIT_LOG_WARNING:
+    fputs("Warning", fp);
+    break;
+  case MUNIT_LOG_ERROR:
+    fputs("Error", fp);
+    break;
+  default:
+    munit_logf_ex(MUNIT_LOG_ERROR, filename, line, "Invalid log level (%d)",
+                  level);
+    return;
+  }
+
+  fputs(": ", fp);
+  if (filename != NULL)
+    fprintf(fp, "%s:%d: ", filename, line);
+  vfprintf(fp, format, ap);
+  fputc('\n', fp);
+}
+
+MUNIT_PRINTF(3, 4)
+static void munit_logf_internal(MunitLogLevel level, FILE *fp,
+                                const char *format, ...) {
+  va_list ap;
+
+  va_start(ap, format);
+  munit_logf_exv(level, fp, NULL, 0, format, ap);
+  va_end(ap);
+}
+
+static void munit_log_internal(MunitLogLevel level, FILE *fp,
+                               const char *message) {
+  munit_logf_internal(level, fp, "%s", message);
+}
+
+void munit_logf_ex(MunitLogLevel level, const char *filename, int line,
+                   const char *format, ...) {
+  va_list ap;
+
+  va_start(ap, format);
+  munit_logf_exv(level, stderr, filename, line, format, ap);
+  va_end(ap);
+
+  if (level >= munit_log_level_fatal) {
+#if defined(MUNIT_THREAD_LOCAL)
+    if (munit_error_jmp_buf_valid)
+      longjmp(munit_error_jmp_buf, 1);
+#endif
+    abort();
+  }
+}
+
+void munit_errorf_ex(const char *filename, int line, const char *format, ...) {
+  va_list ap;
+
+  va_start(ap, format);
+  munit_logf_exv(MUNIT_LOG_ERROR, stderr, filename, line, format, ap);
+  va_end(ap);
+
+#if defined(MUNIT_THREAD_LOCAL)
+  if (munit_error_jmp_buf_valid)
+    longjmp(munit_error_jmp_buf, 1);
+#endif
+  abort();
+}
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+#  pragma GCC diagnostic pop
+#endif
+
+#if !defined(MUNIT_STRERROR_LEN)
+#  define MUNIT_STRERROR_LEN 80
+#endif
+
+static void munit_log_errno(MunitLogLevel level, FILE *fp, const char *msg) {
+#if defined(MUNIT_NO_STRERROR_R) ||                                            \
+  (defined(__MINGW32__) && !defined(MINGW_HAS_SECURE_API))
+  munit_logf_internal(level, fp, "%s: %s (%d)", msg, strerror(errno), errno);
+#else
+  char munit_error_str[MUNIT_STRERROR_LEN];
+  munit_error_str[0] = '\0';
+
+#  if !defined(_WIN32)
+  strerror_r(errno, munit_error_str, MUNIT_STRERROR_LEN);
+#  else
+  strerror_s(munit_error_str, MUNIT_STRERROR_LEN, errno);
+#  endif
+
+  munit_logf_internal(level, fp, "%s: %s (%d)", msg, munit_error_str, errno);
+#endif
+}
+
+/*** Memory allocation ***/
+
+void *munit_malloc_ex(const char *filename, int line, size_t size) {
+  void *ptr;
+
+  if (size == 0)
+    return NULL;
+
+  ptr = calloc(1, size);
+  if (MUNIT_UNLIKELY(ptr == NULL)) {
+    munit_logf_ex(MUNIT_LOG_ERROR, filename, line,
+                  "Failed to allocate %" MUNIT_SIZE_MODIFIER "u bytes.", size);
+  }
+
+  return ptr;
+}
+
+/*** Timer code ***/
+
+#if defined(MUNIT_ENABLE_TIMING)
+
+#  define psnip_uint64_t munit_uint64_t
+#  define psnip_uint32_t munit_uint32_t
+
+/* Code copied from portable-snippets
+ * <https://github.com/nemequ/portable-snippets/>.  If you need to
+ * change something, please do it there so we can keep the code in
+ * sync. */
+
+/* Clocks (v1)
+ * Portable Snippets - https://gitub.com/nemequ/portable-snippets
+ * Created by Evan Nemerson <evan@nemerson.com>
+ *
+ *   To the extent possible under law, the authors have waived all
+ *   copyright and related or neighboring rights to this code.  For
+ *   details, see the Creative Commons Zero 1.0 Universal license at
+ *   https://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+#  if !defined(PSNIP_CLOCK_H)
+#    define PSNIP_CLOCK_H
+
+#    if !defined(psnip_uint64_t)
+#      include "../exact-int/exact-int.h"
+#    endif
+
+#    if !defined(PSNIP_CLOCK_STATIC_INLINE)
+#      if defined(__GNUC__)
+#        define PSNIP_CLOCK__COMPILER_ATTRIBUTES __attribute__((__unused__))
+#      else
+#        define PSNIP_CLOCK__COMPILER_ATTRIBUTES
+#      endif
+
+#      define PSNIP_CLOCK__FUNCTION PSNIP_CLOCK__COMPILER_ATTRIBUTES static
+#    endif
+
+enum PsnipClockType {
+  /* This clock provides the current time, in units since 1970-01-01
+   * 00:00:00 UTC not including leap seconds.  In other words, UNIX
+   * time.  Keep in mind that this clock doesn't account for leap
+   * seconds, and can go backwards (think NTP adjustments). */
+  PSNIP_CLOCK_TYPE_WALL = 1,
+  /* The CPU time is a clock which increases only when the current
+   * process is active (i.e., it doesn't increment while blocking on
+   * I/O). */
+  PSNIP_CLOCK_TYPE_CPU = 2,
+  /* Monotonic time is always running (unlike CPU time), but it only
+     ever moves forward unless you reboot the system.  Things like NTP
+     adjustments have no effect on this clock. */
+  PSNIP_CLOCK_TYPE_MONOTONIC = 3
+};
+
+struct PsnipClockTimespec {
+  psnip_uint64_t seconds;
+  psnip_uint64_t nanoseconds;
+};
+
+/* Methods we support: */
+
+#    define PSNIP_CLOCK_METHOD_CLOCK_GETTIME 1
+#    define PSNIP_CLOCK_METHOD_TIME 2
+#    define PSNIP_CLOCK_METHOD_GETTIMEOFDAY 3
+#    define PSNIP_CLOCK_METHOD_QUERYPERFORMANCECOUNTER 4
+#    define PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME 5
+#    define PSNIP_CLOCK_METHOD_CLOCK 6
+#    define PSNIP_CLOCK_METHOD_GETPROCESSTIMES 7
+#    define PSNIP_CLOCK_METHOD_GETRUSAGE 8
+#    define PSNIP_CLOCK_METHOD_GETSYSTEMTIMEPRECISEASFILETIME 9
+#    define PSNIP_CLOCK_METHOD_GETTICKCOUNT64 10
+
+#    include <assert.h>
+
+#    if defined(HEDLEY_UNREACHABLE)
+#      define PSNIP_CLOCK_UNREACHABLE() HEDLEY_UNREACHABLE()
+#    else
+#      define PSNIP_CLOCK_UNREACHABLE() assert(0)
+#    endif
+
+/* Choose an implementation */
+
+/* #undef PSNIP_CLOCK_WALL_METHOD */
+/* #undef PSNIP_CLOCK_CPU_METHOD */
+/* #undef PSNIP_CLOCK_MONOTONIC_METHOD */
+
+/* We want to be able to detect the libc implementation, so we include
+   <limits.h> (<features.h> isn't available everywhere). */
+
+#    if defined(__unix__) || defined(__unix) || defined(__linux__)
+#      include <limits.h>
+#      include <unistd.h>
+#    endif
+
+#    if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0)
+/* These are known to work without librt.  If you know of others
+ * please let us know so we can add them. */
+#      if (defined(__GLIBC__) &&                                               \
+           (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17))) ||    \
+        (defined(__FreeBSD__))
+#        define PSNIP_CLOCK_HAVE_CLOCK_GETTIME
+#      elif !defined(PSNIP_CLOCK_NO_LIBRT)
+#        define PSNIP_CLOCK_HAVE_CLOCK_GETTIME
+#      endif
+#    endif
+
+#    if defined(_WIN32)
+#      if !defined(PSNIP_CLOCK_CPU_METHOD)
+#        define PSNIP_CLOCK_CPU_METHOD PSNIP_CLOCK_METHOD_GETPROCESSTIMES
+#      endif
+#      if !defined(PSNIP_CLOCK_MONOTONIC_METHOD)
+#        define PSNIP_CLOCK_MONOTONIC_METHOD                                   \
+          PSNIP_CLOCK_METHOD_QUERYPERFORMANCECOUNTER
+#      endif
+#    endif
+
+#    if defined(__MACH__) && !defined(__gnu_hurd__)
+#      if !defined(PSNIP_CLOCK_MONOTONIC_METHOD)
+#        define PSNIP_CLOCK_MONOTONIC_METHOD                                   \
+          PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME
+#      endif
+#    endif
+
+#    if defined(PSNIP_CLOCK_HAVE_CLOCK_GETTIME)
+#      include <time.h>
+#      if !defined(PSNIP_CLOCK_WALL_METHOD)
+#        if defined(CLOCK_REALTIME_PRECISE)
+#          define PSNIP_CLOCK_WALL_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+#          define PSNIP_CLOCK_CLOCK_GETTIME_WALL CLOCK_REALTIME_PRECISE
+#        elif !defined(__sun)
+#          define PSNIP_CLOCK_WALL_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+#          define PSNIP_CLOCK_CLOCK_GETTIME_WALL CLOCK_REALTIME
+#        endif
+#      endif
+#      if !defined(PSNIP_CLOCK_CPU_METHOD)
+#        if defined(_POSIX_CPUTIME) || defined(CLOCK_PROCESS_CPUTIME_ID)
+#          define PSNIP_CLOCK_CPU_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+#          define PSNIP_CLOCK_CLOCK_GETTIME_CPU CLOCK_PROCESS_CPUTIME_ID
+#        elif defined(CLOCK_VIRTUAL)
+#          define PSNIP_CLOCK_CPU_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+#          define PSNIP_CLOCK_CLOCK_GETTIME_CPU CLOCK_VIRTUAL
+#        endif
+#      endif
+#      if !defined(PSNIP_CLOCK_MONOTONIC_METHOD)
+#        if defined(CLOCK_MONOTONIC_RAW)
+#          define PSNIP_CLOCK_MONOTONIC_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+#          define PSNIP_CLOCK_CLOCK_GETTIME_MONOTONIC CLOCK_MONOTONIC
+#        elif defined(CLOCK_MONOTONIC_PRECISE)
+#          define PSNIP_CLOCK_MONOTONIC_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+#          define PSNIP_CLOCK_CLOCK_GETTIME_MONOTONIC CLOCK_MONOTONIC_PRECISE
+#        elif defined(_POSIX_MONOTONIC_CLOCK) || defined(CLOCK_MONOTONIC)
+#          define PSNIP_CLOCK_MONOTONIC_METHOD PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+#          define PSNIP_CLOCK_CLOCK_GETTIME_MONOTONIC CLOCK_MONOTONIC
+#        endif
+#      endif
+#    endif
+
+#    if defined(_POSIX_VERSION) && (_POSIX_VERSION >= 200112L)
+#      if !defined(PSNIP_CLOCK_WALL_METHOD)
+#        define PSNIP_CLOCK_WALL_METHOD PSNIP_CLOCK_METHOD_GETTIMEOFDAY
+#      endif
+#    endif
+
+#    if !defined(PSNIP_CLOCK_WALL_METHOD)
+#      define PSNIP_CLOCK_WALL_METHOD PSNIP_CLOCK_METHOD_TIME
+#    endif
+
+#    if !defined(PSNIP_CLOCK_CPU_METHOD)
+#      define PSNIP_CLOCK_CPU_METHOD PSNIP_CLOCK_METHOD_CLOCK
+#    endif
+
+/* Primarily here for testing. */
+#    if !defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                              \
+      defined(PSNIP_CLOCK_REQUIRE_MONOTONIC)
+#      error No monotonic clock found.
+#    endif
+
+/* Implementations */
+
+#    if (defined(PSNIP_CLOCK_CPU_METHOD) &&                                    \
+         (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME)) ||      \
+      (defined(PSNIP_CLOCK_WALL_METHOD) &&                                     \
+       (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME)) ||       \
+      (defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                                \
+       (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME)) ||  \
+      (defined(PSNIP_CLOCK_CPU_METHOD) &&                                      \
+       (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK)) ||                \
+      (defined(PSNIP_CLOCK_WALL_METHOD) &&                                     \
+       (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_CLOCK)) ||               \
+      (defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                                \
+       (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_CLOCK)) ||          \
+      (defined(PSNIP_CLOCK_CPU_METHOD) &&                                      \
+       (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_TIME)) ||                 \
+      (defined(PSNIP_CLOCK_WALL_METHOD) &&                                     \
+       (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_TIME)) ||                \
+      (defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                                \
+       (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_TIME))
+#      include <time.h>
+#    endif
+
+#    if (defined(PSNIP_CLOCK_CPU_METHOD) &&                                    \
+         (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETTIMEOFDAY)) ||       \
+      (defined(PSNIP_CLOCK_WALL_METHOD) &&                                     \
+       (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETTIMEOFDAY)) ||        \
+      (defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                                \
+       (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_GETTIMEOFDAY))
+#      include <sys/time.h>
+#    endif
+
+#    if (defined(PSNIP_CLOCK_CPU_METHOD) &&                                    \
+         (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETPROCESSTIMES)) ||    \
+      (defined(PSNIP_CLOCK_WALL_METHOD) &&                                     \
+       (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETPROCESSTIMES)) ||     \
+      (defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                                \
+       (PSNIP_CLOCK_MONOTONIC_METHOD ==                                        \
+        PSNIP_CLOCK_METHOD_GETPROCESSTIMES)) ||                                \
+      (defined(PSNIP_CLOCK_CPU_METHOD) &&                                      \
+       (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETTICKCOUNT64)) ||       \
+      (defined(PSNIP_CLOCK_WALL_METHOD) &&                                     \
+       (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETTICKCOUNT64)) ||      \
+      (defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                                \
+       (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_GETTICKCOUNT64))
+#      include <windows.h>
+#    endif
+
+#    if (defined(PSNIP_CLOCK_CPU_METHOD) &&                                    \
+         (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETRUSAGE)) ||          \
+      (defined(PSNIP_CLOCK_WALL_METHOD) &&                                     \
+       (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETRUSAGE)) ||           \
+      (defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                                \
+       (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_GETRUSAGE))
+#      include <sys/time.h>
+#      include <sys/resource.h>
+#    endif
+
+#    if (defined(PSNIP_CLOCK_CPU_METHOD) &&                                    \
+         (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME)) || \
+      (defined(PSNIP_CLOCK_WALL_METHOD) &&                                     \
+       (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME)) ||  \
+      (defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                                \
+       (PSNIP_CLOCK_MONOTONIC_METHOD ==                                        \
+        PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME))
+#      include <CoreServices/CoreServices.h>
+#      include <mach/mach.h>
+#      include <mach/mach_time.h>
+#    endif
+
+/*** Implementations ***/
+
+#    define PSNIP_CLOCK_NSEC_PER_SEC ((psnip_uint32_t)(1000000000ULL))
+
+#    if (defined(PSNIP_CLOCK_CPU_METHOD) &&                                    \
+         (PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME)) ||      \
+      (defined(PSNIP_CLOCK_WALL_METHOD) &&                                     \
+       (PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME)) ||       \
+      (defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                                \
+       (PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME))
+PSNIP_CLOCK__FUNCTION psnip_uint32_t
+psnip_clock__clock_getres(clockid_t clk_id) {
+  struct timespec res;
+  int r;
+
+  r = clock_getres(clk_id, &res);
+  if (r != 0)
+    return 0;
+
+  return (psnip_uint32_t)(PSNIP_CLOCK_NSEC_PER_SEC /
+                          (psnip_uint64_t)res.tv_nsec);
+}
+
+PSNIP_CLOCK__FUNCTION int
+psnip_clock__clock_gettime(clockid_t clk_id, struct PsnipClockTimespec *res) {
+  struct timespec ts;
+
+  if (clock_gettime(clk_id, &ts) != 0)
+    return -10;
+
+  res->seconds = (psnip_uint64_t)(ts.tv_sec);
+  res->nanoseconds = (psnip_uint64_t)(ts.tv_nsec);
+
+  return 0;
+}
+#    endif
+
+PSNIP_CLOCK__FUNCTION psnip_uint32_t psnip_clock_wall_get_precision(void) {
+#    if !defined(PSNIP_CLOCK_WALL_METHOD)
+  return 0;
+#    elif defined(PSNIP_CLOCK_WALL_METHOD) &&                                  \
+      PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+  return psnip_clock__clock_getres(PSNIP_CLOCK_CLOCK_GETTIME_WALL);
+#    elif defined(PSNIP_CLOCK_WALL_METHOD) &&                                  \
+      PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETTIMEOFDAY
+  return 1000000;
+#    elif defined(PSNIP_CLOCK_WALL_METHOD) &&                                  \
+      PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_TIME
+  return 1;
+#    else
+  return 0;
+#    endif
+}
+
+PSNIP_CLOCK__FUNCTION int
+psnip_clock_wall_get_time(struct PsnipClockTimespec *res) {
+#    if !defined(PSNIP_CLOCK_WALL_METHOD)
+  (void)res;
+
+  return -2;
+#    elif defined(PSNIP_CLOCK_WALL_METHOD) &&                                  \
+      PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+  return psnip_clock__clock_gettime(PSNIP_CLOCK_CLOCK_GETTIME_WALL, res);
+#    elif defined(PSNIP_CLOCK_WALL_METHOD) &&                                  \
+      PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_TIME
+  res->seconds = time(NULL);
+  res->nanoseconds = 0;
+#    elif defined(PSNIP_CLOCK_WALL_METHOD) &&                                  \
+      PSNIP_CLOCK_WALL_METHOD == PSNIP_CLOCK_METHOD_GETTIMEOFDAY
+  struct timeval tv;
+
+  if (gettimeofday(&tv, NULL) != 0)
+    return -6;
+
+  res->seconds = (psnip_uint64_t)tv.tv_sec;
+  res->nanoseconds = (psnip_uint64_t)tv.tv_usec * 1000;
+#    else
+  (void)res;
+
+  return -2;
+#    endif
+
+  return 0;
+}
+
+PSNIP_CLOCK__FUNCTION psnip_uint32_t psnip_clock_cpu_get_precision(void) {
+#    if !defined(PSNIP_CLOCK_CPU_METHOD)
+  return 0;
+#    elif defined(PSNIP_CLOCK_CPU_METHOD) &&                                   \
+      PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+  return psnip_clock__clock_getres(PSNIP_CLOCK_CLOCK_GETTIME_CPU);
+#    elif defined(PSNIP_CLOCK_CPU_METHOD) &&                                   \
+      PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK
+  return CLOCKS_PER_SEC;
+#    elif defined(PSNIP_CLOCK_CPU_METHOD) &&                                   \
+      PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETPROCESSTIMES
+  return PSNIP_CLOCK_NSEC_PER_SEC / 100;
+#    else
+  return 0;
+#    endif
+}
+
+PSNIP_CLOCK__FUNCTION int
+psnip_clock_cpu_get_time(struct PsnipClockTimespec *res) {
+#    if !defined(PSNIP_CLOCK_CPU_METHOD)
+  (void)res;
+  return -2;
+#    elif defined(PSNIP_CLOCK_CPU_METHOD) &&                                   \
+      PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+  return psnip_clock__clock_gettime(PSNIP_CLOCK_CLOCK_GETTIME_CPU, res);
+#    elif defined(PSNIP_CLOCK_CPU_METHOD) &&                                   \
+      PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_CLOCK
+  clock_t t = clock();
+  if (t == ((clock_t)-1))
+    return -5;
+  res->seconds = t / CLOCKS_PER_SEC;
+  res->nanoseconds =
+    (t % CLOCKS_PER_SEC) * (PSNIP_CLOCK_NSEC_PER_SEC / CLOCKS_PER_SEC);
+#    elif defined(PSNIP_CLOCK_CPU_METHOD) &&                                   \
+      PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETPROCESSTIMES
+  FILETIME CreationTime, ExitTime, KernelTime, UserTime;
+  LARGE_INTEGER date, adjust;
+
+  if (!GetProcessTimes(GetCurrentProcess(), &CreationTime, &ExitTime,
+                       &KernelTime, &UserTime))
+    return -7;
+
+  /* http://www.frenk.com/2009/12/convert-filetime-to-unix-timestamp/ */
+  date.HighPart = (LONG)UserTime.dwHighDateTime;
+  date.LowPart = UserTime.dwLowDateTime;
+  adjust.QuadPart = 11644473600000 * 10000;
+  date.QuadPart -= adjust.QuadPart;
+
+  res->seconds = (psnip_uint64_t)(date.QuadPart / 10000000);
+  res->nanoseconds = (psnip_uint64_t)(date.QuadPart % 10000000) *
+                     (PSNIP_CLOCK_NSEC_PER_SEC / 100);
+#    elif PSNIP_CLOCK_CPU_METHOD == PSNIP_CLOCK_METHOD_GETRUSAGE
+  struct rusage usage;
+  if (getrusage(RUSAGE_SELF, &usage) != 0)
+    return -8;
+
+  res->seconds = usage.ru_utime.tv_sec;
+  res->nanoseconds = tv.tv_usec * 1000;
+#    else
+  (void)res;
+  return -2;
+#    endif
+
+  return 0;
+}
+
+PSNIP_CLOCK__FUNCTION psnip_uint32_t psnip_clock_monotonic_get_precision(void) {
+#    if !defined(PSNIP_CLOCK_MONOTONIC_METHOD)
+  return 0;
+#    elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                             \
+      PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+  return psnip_clock__clock_getres(PSNIP_CLOCK_CLOCK_GETTIME_MONOTONIC);
+#    elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                             \
+      PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME
+  static mach_timebase_info_data_t tbi = {
+    0,
+  };
+  if (tbi.denom == 0)
+    mach_timebase_info(&tbi);
+  return (psnip_uint32_t)(tbi.numer / tbi.denom);
+#    elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                             \
+      PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_GETTICKCOUNT64
+  return 1000;
+#    elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                             \
+      PSNIP_CLOCK_MONOTONIC_METHOD ==                                          \
+        PSNIP_CLOCK_METHOD_QUERYPERFORMANCECOUNTER
+  LARGE_INTEGER Frequency;
+  QueryPerformanceFrequency(&Frequency);
+  return (psnip_uint32_t)((Frequency.QuadPart > PSNIP_CLOCK_NSEC_PER_SEC)
+                            ? PSNIP_CLOCK_NSEC_PER_SEC
+                            : Frequency.QuadPart);
+#    else
+  return 0;
+#    endif
+}
+
+PSNIP_CLOCK__FUNCTION int
+psnip_clock_monotonic_get_time(struct PsnipClockTimespec *res) {
+#    if !defined(PSNIP_CLOCK_MONOTONIC_METHOD)
+  (void)res;
+  return -2;
+#    elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                             \
+      PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_CLOCK_GETTIME
+  return psnip_clock__clock_gettime(PSNIP_CLOCK_CLOCK_GETTIME_MONOTONIC, res);
+#    elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                             \
+      PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_MACH_ABSOLUTE_TIME
+  psnip_uint64_t nsec = mach_absolute_time();
+  static mach_timebase_info_data_t tbi = {
+    0,
+  };
+  if (tbi.denom == 0)
+    mach_timebase_info(&tbi);
+  nsec *= ((psnip_uint64_t)tbi.numer) / ((psnip_uint64_t)tbi.denom);
+  res->seconds = nsec / PSNIP_CLOCK_NSEC_PER_SEC;
+  res->nanoseconds = nsec % PSNIP_CLOCK_NSEC_PER_SEC;
+#    elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                             \
+      PSNIP_CLOCK_MONOTONIC_METHOD ==                                          \
+        PSNIP_CLOCK_METHOD_QUERYPERFORMANCECOUNTER
+  LARGE_INTEGER t, f;
+  if (QueryPerformanceCounter(&t) == 0)
+    return -12;
+
+  QueryPerformanceFrequency(&f);
+  res->seconds = (psnip_uint64_t)(t.QuadPart / f.QuadPart);
+  res->nanoseconds = (psnip_uint64_t)(t.QuadPart % f.QuadPart);
+  if (f.QuadPart > PSNIP_CLOCK_NSEC_PER_SEC)
+    res->nanoseconds /= (psnip_uint64_t)f.QuadPart / PSNIP_CLOCK_NSEC_PER_SEC;
+  else
+    res->nanoseconds *= PSNIP_CLOCK_NSEC_PER_SEC / (psnip_uint64_t)f.QuadPart;
+#    elif defined(PSNIP_CLOCK_MONOTONIC_METHOD) &&                             \
+      PSNIP_CLOCK_MONOTONIC_METHOD == PSNIP_CLOCK_METHOD_GETTICKCOUNT64
+  const ULONGLONG msec = GetTickCount64();
+  res->seconds = msec / 1000;
+  res->nanoseconds = sec % 1000;
+#    else
+  return -2;
+#    endif
+
+  return 0;
+}
+
+/* Returns the number of ticks per second for the specified clock.
+ * For example, a clock with millisecond precision would return 1000,
+ * and a clock with 1 second (such as the time() function) would
+ * return 1.
+ *
+ * If the requested clock isn't available, it will return 0.
+ * Hopefully this will be rare, but if it happens to you please let us
+ * know so we can work on finding a way to support your system.
+ *
+ * Note that different clocks on the same system often have a
+ * different precisions.
+ */
+PSNIP_CLOCK__FUNCTION psnip_uint32_t
+psnip_clock_get_precision(enum PsnipClockType clock_type) {
+  switch (clock_type) {
+  case PSNIP_CLOCK_TYPE_MONOTONIC:
+    return psnip_clock_monotonic_get_precision();
+  case PSNIP_CLOCK_TYPE_CPU:
+    return psnip_clock_cpu_get_precision();
+  case PSNIP_CLOCK_TYPE_WALL:
+    return psnip_clock_wall_get_precision();
+  }
+
+  PSNIP_CLOCK_UNREACHABLE();
+  return 0;
+}
+
+/* Set the provided timespec to the requested time.  Returns 0 on
+ * success, or a negative value on failure. */
+PSNIP_CLOCK__FUNCTION int psnip_clock_get_time(enum PsnipClockType clock_type,
+                                               struct PsnipClockTimespec *res) {
+  assert(res != NULL);
+
+  switch (clock_type) {
+  case PSNIP_CLOCK_TYPE_MONOTONIC:
+    return psnip_clock_monotonic_get_time(res);
+  case PSNIP_CLOCK_TYPE_CPU:
+    return psnip_clock_cpu_get_time(res);
+  case PSNIP_CLOCK_TYPE_WALL:
+    return psnip_clock_wall_get_time(res);
+  }
+
+  return -1;
+}
+
+#  endif /* !defined(PSNIP_CLOCK_H) */
+
+static psnip_uint64_t munit_clock_get_elapsed(struct PsnipClockTimespec *start,
+                                              struct PsnipClockTimespec *end) {
+  psnip_uint64_t r = (end->seconds - start->seconds) * PSNIP_CLOCK_NSEC_PER_SEC;
+  if (end->nanoseconds < start->nanoseconds) {
+    return r - (start->nanoseconds - end->nanoseconds);
+  }
+
+  return r + (end->nanoseconds - start->nanoseconds);
+}
+
+#else
+#  include <time.h>
+#endif /* defined(MUNIT_ENABLE_TIMING) */
+
+/*** PRNG stuff ***/
+
+/* This is (unless I screwed up, which is entirely possible) the
+ * version of PCG with 32-bit state.  It was chosen because it has a
+ * small enough state that we should reliably be able to use CAS
+ * instead of requiring a lock for thread-safety.
+ *
+ * If I did screw up, I probably will not bother changing it unless
+ * there is a significant bias.  It's really not important this be
+ * particularly strong, as long as it is fairly random it's much more
+ * important that it be reproducible, so bug reports have a better
+ * chance of being reproducible. */
+
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) &&              \
+  !defined(__STDC_NO_ATOMICS__) && !defined(__EMSCRIPTEN__) &&                 \
+  (!defined(__GNUC_MINOR__) || (__GNUC__ > 4) ||                               \
+   (__GNUC__ == 4 && __GNUC_MINOR__ > 8))
+#  define HAVE_STDATOMIC
+#elif defined(__clang__)
+#  if __has_extension(c_atomic)
+#    define HAVE_CLANG_ATOMICS
+#  endif
+#endif
+
+/* Workaround for http://llvm.org/bugs/show_bug.cgi?id=26911 */
+#if defined(__clang__) && defined(_WIN32)
+#  undef HAVE_STDATOMIC
+#  if defined(__c2__)
+#    undef HAVE_CLANG_ATOMICS
+#  endif
+#endif
+
+#if defined(_OPENMP)
+#  define ATOMIC_UINT32_T uint32_t
+#elif defined(HAVE_STDATOMIC)
+#  include <stdatomic.h>
+#  define ATOMIC_UINT32_T _Atomic uint32_t
+#elif defined(HAVE_CLANG_ATOMICS)
+#  define ATOMIC_UINT32_T _Atomic uint32_t
+#elif defined(_WIN32)
+#  define ATOMIC_UINT32_T volatile LONG
+#else
+#  define ATOMIC_UINT32_T volatile uint32_t
+#endif
+
+static ATOMIC_UINT32_T munit_rand_state = 42;
+
+#if defined(_OPENMP)
+static inline void munit_atomic_store(ATOMIC_UINT32_T *dest,
+                                      ATOMIC_UINT32_T value) {
+#  pragma omp critical(munit_atomics)
+  *dest = value;
+}
+
+static inline uint32_t munit_atomic_load(ATOMIC_UINT32_T *src) {
+  int ret;
+#  pragma omp critical(munit_atomics)
+  ret = *src;
+  return ret;
+}
+
+static inline uint32_t munit_atomic_cas(ATOMIC_UINT32_T *dest,
+                                        ATOMIC_UINT32_T *expected,
+                                        ATOMIC_UINT32_T desired) {
+  munit_bool ret;
+
+#  pragma omp critical(munit_atomics)
+  {
+    if (*dest == *expected) {
+      *dest = desired;
+      ret = 1;
+    } else {
+      ret = 0;
+    }
+  }
+
+  return ret;
+}
+#elif defined(HAVE_STDATOMIC)
+#  define munit_atomic_store(dest, value) atomic_store(dest, value)
+#  define munit_atomic_load(src) atomic_load(src)
+#  define munit_atomic_cas(dest, expected, value)                              \
+    atomic_compare_exchange_weak(dest, expected, value)
+#elif defined(HAVE_CLANG_ATOMICS)
+#  define munit_atomic_store(dest, value)                                      \
+    __c11_atomic_store(dest, value, __ATOMIC_SEQ_CST)
+#  define munit_atomic_load(src) __c11_atomic_load(src, __ATOMIC_SEQ_CST)
+#  define munit_atomic_cas(dest, expected, value)                              \
+    __c11_atomic_compare_exchange_weak(dest, expected, value,                  \
+                                       __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+#elif defined(__GNUC__) && (__GNUC__ > 4) ||                                   \
+  (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)
+#  define munit_atomic_store(dest, value)                                      \
+    __atomic_store_n(dest, value, __ATOMIC_SEQ_CST)
+#  define munit_atomic_load(src) __atomic_load_n(src, __ATOMIC_SEQ_CST)
+#  define munit_atomic_cas(dest, expected, value)                              \
+    __atomic_compare_exchange_n(dest, expected, value, 1, __ATOMIC_SEQ_CST,    \
+                                __ATOMIC_SEQ_CST)
+#elif defined(__GNUC__) && (__GNUC__ >= 4)
+#  define munit_atomic_store(dest, value)                                      \
+    do {                                                                       \
+      *(dest) = (value);                                                       \
+    } while (0)
+#  define munit_atomic_load(src) (*(src))
+#  define munit_atomic_cas(dest, expected, value)                              \
+    __sync_bool_compare_and_swap(dest, *expected, value)
+#elif defined(_WIN32) /* Untested */
+#  define munit_atomic_store(dest, value)                                      \
+    do {                                                                       \
+      *(dest) = (value);                                                       \
+    } while (0)
+#  define munit_atomic_load(src) (*(src))
+#  define munit_atomic_cas(dest, expected, value)                              \
+    InterlockedCompareExchange((dest), (value), *(expected))
+#else
+#  warning No atomic implementation, PRNG will not be thread-safe
+#  define munit_atomic_store(dest, value)                                      \
+    do {                                                                       \
+      *(dest) = (value);                                                       \
+    } while (0)
+#  define munit_atomic_load(src) (*(src))
+static inline munit_bool munit_atomic_cas(ATOMIC_UINT32_T *dest,
+                                          ATOMIC_UINT32_T *expected,
+                                          ATOMIC_UINT32_T desired) {
+  if (*dest == *expected) {
+    *dest = desired;
+    return 1;
+  } else {
+    return 0;
+  }
+}
+#endif
+
+#define MUNIT_PRNG_MULTIPLIER (747796405U)
+#define MUNIT_PRNG_INCREMENT (1729U)
+
+static munit_uint32_t munit_rand_next_state(munit_uint32_t state) {
+  return state * MUNIT_PRNG_MULTIPLIER + MUNIT_PRNG_INCREMENT;
+}
+
+static munit_uint32_t munit_rand_from_state(munit_uint32_t state) {
+  munit_uint32_t res = ((state >> ((state >> 28) + 4)) ^ state) * (277803737U);
+  res ^= res >> 22;
+  return res;
+}
+
+void munit_rand_seed(munit_uint32_t seed) {
+  munit_uint32_t state = munit_rand_next_state(seed + MUNIT_PRNG_INCREMENT);
+  munit_atomic_store(&munit_rand_state, state);
+}
+
+static munit_uint32_t munit_rand_generate_seed(void) {
+  munit_uint32_t seed, state;
+#if defined(MUNIT_ENABLE_TIMING)
+  struct PsnipClockTimespec wc = {
+    0,
+  };
+
+  psnip_clock_get_time(PSNIP_CLOCK_TYPE_WALL, &wc);
+  seed = (munit_uint32_t)wc.nanoseconds;
+#else
+  seed = (munit_uint32_t)time(NULL);
+#endif
+
+  state = munit_rand_next_state(seed + MUNIT_PRNG_INCREMENT);
+  return munit_rand_from_state(state);
+}
+
+static munit_uint32_t munit_rand_state_uint32(munit_uint32_t *state) {
+  const munit_uint32_t old = *state;
+  *state = munit_rand_next_state(old);
+  return munit_rand_from_state(old);
+}
+
+munit_uint32_t munit_rand_uint32(void) {
+  munit_uint32_t old, state;
+
+  do {
+    old = munit_atomic_load(&munit_rand_state);
+    state = munit_rand_next_state(old);
+  } while (!munit_atomic_cas(&munit_rand_state, &old, state));
+
+  return munit_rand_from_state(old);
+}
+
+static void munit_rand_state_memory(munit_uint32_t *state, size_t size,
+                                    munit_uint8_t *data) {
+  size_t members_remaining = size / sizeof(munit_uint32_t);
+  size_t bytes_remaining = size % sizeof(munit_uint32_t);
+  munit_uint8_t *b = data;
+  munit_uint32_t rv;
+  while (members_remaining-- > 0) {
+    rv = munit_rand_state_uint32(state);
+    memcpy(b, &rv, sizeof(munit_uint32_t));
+    b += sizeof(munit_uint32_t);
+  }
+  if (bytes_remaining != 0) {
+    rv = munit_rand_state_uint32(state);
+    memcpy(b, &rv, bytes_remaining);
+  }
+}
+
+void munit_rand_memory(size_t size, munit_uint8_t *data) {
+  munit_uint32_t old, state;
+
+  do {
+    state = old = munit_atomic_load(&munit_rand_state);
+    munit_rand_state_memory(&state, size, data);
+  } while (!munit_atomic_cas(&munit_rand_state, &old, state));
+}
+
+static munit_uint32_t munit_rand_state_at_most(munit_uint32_t *state,
+                                               munit_uint32_t salt,
+                                               munit_uint32_t max) {
+  /* We want (UINT32_MAX + 1) % max, which in unsigned arithmetic is the same
+   * as (UINT32_MAX + 1 - max) % max = -max % max. We compute -max using not
+   * to avoid compiler warnings.
+   */
+  const munit_uint32_t min = (~max + 1U) % max;
+  munit_uint32_t x;
+
+  if (max == (~((munit_uint32_t)0U)))
+    return munit_rand_state_uint32(state) ^ salt;
+
+  max++;
+
+  do {
+    x = munit_rand_state_uint32(state) ^ salt;
+  } while (x < min);
+
+  return x % max;
+}
+
+static munit_uint32_t munit_rand_at_most(munit_uint32_t salt,
+                                         munit_uint32_t max) {
+  munit_uint32_t old, state;
+  munit_uint32_t retval;
+
+  do {
+    state = old = munit_atomic_load(&munit_rand_state);
+    retval = munit_rand_state_at_most(&state, salt, max);
+  } while (!munit_atomic_cas(&munit_rand_state, &old, state));
+
+  return retval;
+}
+
+int munit_rand_int_range(int min, int max) {
+  munit_uint64_t range = (munit_uint64_t)max - (munit_uint64_t)min;
+
+  if (min > max)
+    return munit_rand_int_range(max, min);
+
+  if (range > (~((munit_uint32_t)0U)))
+    range = (~((munit_uint32_t)0U));
+
+  return min + (int)munit_rand_at_most(0, (munit_uint32_t)range);
+}
+
+double munit_rand_double(void) {
+  munit_uint32_t old, state;
+  double retval = 0.0;
+
+  do {
+    state = old = munit_atomic_load(&munit_rand_state);
+
+    /* See http://mumble.net/~campbell/tmp/random_real.c for how to do
+     * this right.  Patches welcome if you feel that this is too
+     * biased. */
+    retval = munit_rand_state_uint32(&state) / ((~((munit_uint32_t)0U)) + 1.0);
+  } while (!munit_atomic_cas(&munit_rand_state, &old, state));
+
+  return retval;
+}
+
+/*** Test suite handling ***/
+
+typedef struct {
+  unsigned int successful;
+  unsigned int skipped;
+  unsigned int failed;
+  unsigned int errored;
+#if defined(MUNIT_ENABLE_TIMING)
+  munit_uint64_t cpu_clock;
+  munit_uint64_t wall_clock;
+#endif
+} MunitReport;
+
+typedef struct {
+  const char *prefix;
+  const MunitSuite *suite;
+  const char **tests;
+  munit_uint32_t seed;
+  unsigned int iterations;
+  MunitParameter *parameters;
+  munit_bool single_parameter_mode;
+  void *user_data;
+  MunitReport report;
+  munit_bool colorize;
+  munit_bool fork;
+  munit_bool show_stderr;
+  munit_bool fatal_failures;
+} MunitTestRunner;
+
+const char *munit_parameters_get(const MunitParameter params[],
+                                 const char *key) {
+  const MunitParameter *param;
+
+  for (param = params; param != NULL && param->name != NULL; param++)
+    if (strcmp(param->name, key) == 0)
+      return param->value;
+  return NULL;
+}
+
+#if defined(MUNIT_ENABLE_TIMING)
+static void munit_print_time(FILE *fp, munit_uint64_t nanoseconds) {
+  fprintf(fp, "%" MUNIT_TEST_TIME_FORMAT,
+          ((double)nanoseconds) / ((double)PSNIP_CLOCK_NSEC_PER_SEC));
+}
+#endif
+
+/* Add a paramter to an array of parameters. */
+static MunitResult munit_parameters_add(size_t *params_size,
+                                        MunitParameter **params, char *name,
+                                        char *value) {
+  *params = realloc(*params, sizeof(MunitParameter) * (*params_size + 2));
+  if (*params == NULL)
+    return MUNIT_ERROR;
+
+  (*params)[*params_size].name = name;
+  (*params)[*params_size].value = value;
+  (*params_size)++;
+  (*params)[*params_size].name = NULL;
+  (*params)[*params_size].value = NULL;
+
+  return MUNIT_OK;
+}
+
+/* Concatenate two strings, but just return one of the components
+ * unaltered if the other is NULL or "". */
+static char *munit_maybe_concat(size_t *len, char *prefix, char *suffix) {
+  char *res;
+  size_t res_l;
+  const size_t prefix_l = prefix != NULL ? strlen(prefix) : 0;
+  const size_t suffix_l = suffix != NULL ? strlen(suffix) : 0;
+  if (prefix_l == 0 && suffix_l == 0) {
+    res = NULL;
+    res_l = 0;
+  } else if (prefix_l == 0 && suffix_l != 0) {
+    res = suffix;
+    res_l = suffix_l;
+  } else if (prefix_l != 0 && suffix_l == 0) {
+    res = prefix;
+    res_l = prefix_l;
+  } else {
+    res_l = prefix_l + suffix_l;
+    res = malloc(res_l + 1);
+    memcpy(res, prefix, prefix_l);
+    memcpy(res + prefix_l, suffix, suffix_l);
+    res[res_l] = 0;
+  }
+
+  if (len != NULL)
+    *len = res_l;
+
+  return res;
+}
+
+/* Possbily free a string returned by munit_maybe_concat. */
+static void munit_maybe_free_concat(char *s, const char *prefix,
+                                    const char *suffix) {
+  if (prefix != s && suffix != s)
+    free(s);
+}
+
+/* Cheap string hash function, just used to salt the PRNG. */
+static munit_uint32_t munit_str_hash(const char *name) {
+  const char *p;
+  munit_uint32_t h = 5381U;
+
+  for (p = name; *p != '\0'; p++)
+    h = (munit_uint32_t)(h << 5) + h + (munit_uint32_t)*p;
+
+  return h;
+}
+
+static void munit_splice(int from, int to) {
+  munit_uint8_t buf[1024];
+#if !defined(_WIN32)
+  ssize_t len;
+  ssize_t bytes_written;
+  ssize_t write_res;
+#else
+  int len;
+  int bytes_written;
+  int write_res;
+#endif
+  do {
+    len = read(from, buf, sizeof(buf));
+    if (len > 0) {
+      bytes_written = 0;
+      do {
+        write_res = write(to, buf + bytes_written,
+#if !defined(_WIN32)
+                          (size_t)
+#else
+                          (unsigned int)
+#endif
+                            (len - bytes_written));
+        if (write_res < 0)
+          break;
+        bytes_written += write_res;
+      } while (bytes_written < len);
+    } else
+      break;
+  } while (1);
+}
+
+/* This is the part that should be handled in the child process */
+static MunitResult munit_test_runner_exec(MunitTestRunner *runner,
+                                          const MunitTest *test,
+                                          const MunitParameter params[],
+                                          MunitReport *report) {
+  unsigned int iterations = runner->iterations;
+  MunitResult result = MUNIT_FAIL;
+#if defined(MUNIT_ENABLE_TIMING)
+  struct PsnipClockTimespec wall_clock_begin =
+                              {
+                                0,
+                              },
+                            wall_clock_end = {
+                              0,
+                            };
+  struct PsnipClockTimespec cpu_clock_begin =
+                              {
+                                0,
+                              },
+                            cpu_clock_end = {
+                              0,
+                            };
+#endif
+  unsigned int i = 0;
+
+  if ((test->options & MUNIT_TEST_OPTION_SINGLE_ITERATION) ==
+      MUNIT_TEST_OPTION_SINGLE_ITERATION)
+    iterations = 1;
+  else if (iterations == 0)
+    iterations = runner->suite->iterations;
+
+  munit_rand_seed(runner->seed);
+
+  do {
+    void *data = (test->setup == NULL) ? runner->user_data
+                                       : test->setup(params, runner->user_data);
+
+#if defined(MUNIT_ENABLE_TIMING)
+    psnip_clock_get_time(PSNIP_CLOCK_TYPE_WALL, &wall_clock_begin);
+    psnip_clock_get_time(PSNIP_CLOCK_TYPE_CPU, &cpu_clock_begin);
+#endif
+
+    result = test->test(params, data);
+
+#if defined(MUNIT_ENABLE_TIMING)
+    psnip_clock_get_time(PSNIP_CLOCK_TYPE_WALL, &wall_clock_end);
+    psnip_clock_get_time(PSNIP_CLOCK_TYPE_CPU, &cpu_clock_end);
+#endif
+
+    if (test->tear_down != NULL)
+      test->tear_down(data);
+
+    if (MUNIT_LIKELY(result == MUNIT_OK)) {
+      report->successful++;
+#if defined(MUNIT_ENABLE_TIMING)
+      report->wall_clock +=
+        munit_clock_get_elapsed(&wall_clock_begin, &wall_clock_end);
+      report->cpu_clock +=
+        munit_clock_get_elapsed(&cpu_clock_begin, &cpu_clock_end);
+#endif
+    } else {
+      switch ((int)result) {
+      case MUNIT_SKIP:
+        report->skipped++;
+        break;
+      case MUNIT_FAIL:
+        report->failed++;
+        break;
+      case MUNIT_ERROR:
+        report->errored++;
+        break;
+      default:
+        break;
+      }
+      break;
+    }
+  } while (++i < iterations);
+
+  return result;
+}
+
+#if defined(MUNIT_EMOTICON)
+#  define MUNIT_RESULT_STRING_OK ":)"
+#  define MUNIT_RESULT_STRING_SKIP ":|"
+#  define MUNIT_RESULT_STRING_FAIL ":("
+#  define MUNIT_RESULT_STRING_ERROR ":o"
+#  define MUNIT_RESULT_STRING_TODO ":/"
+#else
+#  define MUNIT_RESULT_STRING_OK "OK   "
+#  define MUNIT_RESULT_STRING_SKIP "SKIP "
+#  define MUNIT_RESULT_STRING_FAIL "FAIL "
+#  define MUNIT_RESULT_STRING_ERROR "ERROR"
+#  define MUNIT_RESULT_STRING_TODO "TODO "
+#endif
+
+static void munit_test_runner_print_color(const MunitTestRunner *runner,
+                                          const char *string, char color) {
+  if (runner->colorize)
+    fprintf(MUNIT_OUTPUT_FILE, "\x1b[3%cm%s\x1b[39m", color, string);
+  else
+    fputs(string, MUNIT_OUTPUT_FILE);
+}
+
+#if !defined(MUNIT_NO_BUFFER)
+static int munit_replace_stderr(FILE *stderr_buf) {
+  if (stderr_buf != NULL) {
+    const int orig_stderr = dup(STDERR_FILENO);
+
+    int errfd = fileno(stderr_buf);
+    if (MUNIT_UNLIKELY(errfd == -1)) {
+      exit(EXIT_FAILURE);
+    }
+
+    dup2(errfd, STDERR_FILENO);
+
+    return orig_stderr;
+  }
+
+  return -1;
+}
+
+static void munit_restore_stderr(int orig_stderr) {
+  if (orig_stderr != -1) {
+    dup2(orig_stderr, STDERR_FILENO);
+    close(orig_stderr);
+  }
+}
+#endif /* !defined(MUNIT_NO_BUFFER) */
+
+/* Run a test with the specified parameters. */
+static void
+munit_test_runner_run_test_with_params(MunitTestRunner *runner,
+                                       const MunitTest *test,
+                                       const MunitParameter params[]) {
+  MunitResult result = MUNIT_OK;
+  MunitReport report = {0, 0, 0, 0,
+#if defined(MUNIT_ENABLE_TIMING)
+                        0, 0
+#endif
+  };
+  unsigned int output_l;
+  munit_bool first;
+  const MunitParameter *param;
+  FILE *stderr_buf;
+#if !defined(MUNIT_NO_FORK)
+  int pipefd[2];
+  pid_t fork_pid;
+  ssize_t bytes_written = 0;
+  ssize_t write_res;
+  ssize_t bytes_read = 0;
+  ssize_t read_res;
+  int status = 0;
+  pid_t changed_pid;
+#endif
+
+  if (params != NULL) {
+    output_l = 2;
+    fputs("  ", MUNIT_OUTPUT_FILE);
+    first = 1;
+    for (param = params; param != NULL && param->name != NULL; param++) {
+      if (!first) {
+        fputs(", ", MUNIT_OUTPUT_FILE);
+        output_l += 2;
+      } else {
+        first = 0;
+      }
+
+      output_l += (unsigned int)fprintf(MUNIT_OUTPUT_FILE, "%s=%s", param->name,
+                                        param->value);
+    }
+    while (output_l++ < MUNIT_TEST_NAME_LEN) {
+      fputc(' ', MUNIT_OUTPUT_FILE);
+    }
+  }
+
+  fflush(MUNIT_OUTPUT_FILE);
+
+  stderr_buf = NULL;
+#if !defined(_WIN32) || defined(__MINGW32__)
+  stderr_buf = tmpfile();
+#else
+  tmpfile_s(&stderr_buf);
+#endif
+  if (stderr_buf == NULL) {
+    munit_log_errno(MUNIT_LOG_ERROR, stderr,
+                    "unable to create buffer for stderr");
+    result = MUNIT_ERROR;
+    goto print_result;
+  }
+
+#if !defined(MUNIT_NO_FORK)
+  if (runner->fork) {
+    pipefd[0] = -1;
+    pipefd[1] = -1;
+    if (pipe(pipefd) != 0) {
+      munit_log_errno(MUNIT_LOG_ERROR, stderr, "unable to create pipe");
+      result = MUNIT_ERROR;
+      goto print_result;
+    }
+
+    fork_pid = fork();
+    if (fork_pid == 0) {
+      int orig_stderr;
+
+      close(pipefd[0]);
+
+      orig_stderr = munit_replace_stderr(stderr_buf);
+      munit_test_runner_exec(runner, test, params, &report);
+
+      /* Note that we don't restore stderr.  This is so we can buffer
+       * things written to stderr later on (such as by
+       * asan/tsan/ubsan, valgrind, etc.) */
+      close(orig_stderr);
+
+      do {
+        write_res =
+          write(pipefd[1], ((munit_uint8_t *)(&report)) + bytes_written,
+                sizeof(report) - (size_t)bytes_written);
+        if (write_res < 0) {
+          if (stderr_buf != NULL) {
+            munit_log_errno(MUNIT_LOG_ERROR, stderr, "unable to write to pipe");
+          }
+          exit(EXIT_FAILURE);
+        }
+        bytes_written += write_res;
+      } while ((size_t)bytes_written < sizeof(report));
+
+      if (stderr_buf != NULL)
+        fclose(stderr_buf);
+      close(pipefd[1]);
+
+      exit(EXIT_SUCCESS);
+    } else if (fork_pid == -1) {
+      close(pipefd[0]);
+      close(pipefd[1]);
+      if (stderr_buf != NULL) {
+        munit_log_errno(MUNIT_LOG_ERROR, stderr, "unable to fork");
+      }
+      report.errored++;
+      result = MUNIT_ERROR;
+    } else {
+      close(pipefd[1]);
+      do {
+        read_res = read(pipefd[0], ((munit_uint8_t *)(&report)) + bytes_read,
+                        sizeof(report) - (size_t)bytes_read);
+        if (read_res < 1)
+          break;
+        bytes_read += read_res;
+      } while (bytes_read < (ssize_t)sizeof(report));
+
+      changed_pid = waitpid(fork_pid, &status, 0);
+
+      if (MUNIT_LIKELY(changed_pid == fork_pid) &&
+          MUNIT_LIKELY(WIFEXITED(status))) {
+        if (bytes_read != sizeof(report)) {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr_buf,
+                              "child exited unexpectedly with status %d",
+                              WEXITSTATUS(status));
+          report.errored++;
+        } else if (WEXITSTATUS(status) != EXIT_SUCCESS) {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr_buf,
+                              "child exited with status %d",
+                              WEXITSTATUS(status));
+          report.errored++;
+        }
+      } else {
+        if (WIFSIGNALED(status)) {
+#  if defined(_XOPEN_VERSION) && (_XOPEN_VERSION >= 700)
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr_buf,
+                              "child killed by signal %d (%s)",
+                              WTERMSIG(status), strsignal(WTERMSIG(status)));
+#  else
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr_buf,
+                              "child killed by signal %d", WTERMSIG(status));
+#  endif
+        } else if (WIFSTOPPED(status)) {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr_buf,
+                              "child stopped by signal %d", WSTOPSIG(status));
+        }
+        report.errored++;
+      }
+
+      close(pipefd[0]);
+      waitpid(fork_pid, NULL, 0);
+    }
+  } else
+#endif
+  {
+#if !defined(MUNIT_NO_BUFFER)
+    const volatile int orig_stderr = munit_replace_stderr(stderr_buf);
+#endif
+
+#if defined(MUNIT_THREAD_LOCAL)
+    if (MUNIT_UNLIKELY(setjmp(munit_error_jmp_buf) != 0)) {
+      result = MUNIT_FAIL;
+      report.failed++;
+    } else {
+      munit_error_jmp_buf_valid = 1;
+      result = munit_test_runner_exec(runner, test, params, &report);
+    }
+#else
+    result = munit_test_runner_exec(runner, test, params, &report);
+#endif
+
+#if !defined(MUNIT_NO_BUFFER)
+    munit_restore_stderr(orig_stderr);
+#endif
+
+    /* Here just so that the label is used on Windows and we don't get
+     * a warning */
+    goto print_result;
+  }
+
+print_result:
+
+  fputs("[ ", MUNIT_OUTPUT_FILE);
+  if ((test->options & MUNIT_TEST_OPTION_TODO) == MUNIT_TEST_OPTION_TODO) {
+    if (report.failed != 0 || report.errored != 0 || report.skipped != 0) {
+      munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_TODO, '3');
+      result = MUNIT_OK;
+    } else {
+      munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_ERROR, '1');
+      if (MUNIT_LIKELY(stderr_buf != NULL))
+        munit_log_internal(MUNIT_LOG_ERROR, stderr_buf,
+                           "Test marked TODO, but was successful.");
+      runner->report.failed++;
+      result = MUNIT_ERROR;
+    }
+  } else if (report.failed > 0) {
+    munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_FAIL, '1');
+    runner->report.failed++;
+    result = MUNIT_FAIL;
+  } else if (report.errored > 0) {
+    munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_ERROR, '1');
+    runner->report.errored++;
+    result = MUNIT_ERROR;
+  } else if (report.skipped > 0) {
+    munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_SKIP, '3');
+    runner->report.skipped++;
+    result = MUNIT_SKIP;
+  } else if (report.successful > 1) {
+    munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_OK, '2');
+#if defined(MUNIT_ENABLE_TIMING)
+    fputs(" ] [ ", MUNIT_OUTPUT_FILE);
+    munit_print_time(MUNIT_OUTPUT_FILE, report.wall_clock / report.successful);
+    fputs(" / ", MUNIT_OUTPUT_FILE);
+    munit_print_time(MUNIT_OUTPUT_FILE, report.cpu_clock / report.successful);
+    fprintf(MUNIT_OUTPUT_FILE,
+            " CPU ]\n  %-" MUNIT_XSTRINGIFY(MUNIT_TEST_NAME_LEN) "s Total: [ ",
+            "");
+    munit_print_time(MUNIT_OUTPUT_FILE, report.wall_clock);
+    fputs(" / ", MUNIT_OUTPUT_FILE);
+    munit_print_time(MUNIT_OUTPUT_FILE, report.cpu_clock);
+    fputs(" CPU", MUNIT_OUTPUT_FILE);
+#endif
+    runner->report.successful++;
+    result = MUNIT_OK;
+  } else if (report.successful > 0) {
+    munit_test_runner_print_color(runner, MUNIT_RESULT_STRING_OK, '2');
+#if defined(MUNIT_ENABLE_TIMING)
+    fputs(" ] [ ", MUNIT_OUTPUT_FILE);
+    munit_print_time(MUNIT_OUTPUT_FILE, report.wall_clock);
+    fputs(" / ", MUNIT_OUTPUT_FILE);
+    munit_print_time(MUNIT_OUTPUT_FILE, report.cpu_clock);
+    fputs(" CPU", MUNIT_OUTPUT_FILE);
+#endif
+    runner->report.successful++;
+    result = MUNIT_OK;
+  }
+  fputs(" ]\n", MUNIT_OUTPUT_FILE);
+
+  if (stderr_buf != NULL) {
+    if (result == MUNIT_FAIL || result == MUNIT_ERROR || runner->show_stderr) {
+      fflush(MUNIT_OUTPUT_FILE);
+
+      rewind(stderr_buf);
+      munit_splice(fileno(stderr_buf), STDERR_FILENO);
+
+      fflush(stderr);
+    }
+
+    fclose(stderr_buf);
+  }
+}
+
+static void munit_test_runner_run_test_wild(MunitTestRunner *runner,
+                                            const MunitTest *test,
+                                            const char *test_name,
+                                            MunitParameter *params,
+                                            MunitParameter *p) {
+  const MunitParameterEnum *pe;
+  char **values;
+  MunitParameter *next;
+
+  for (pe = test->parameters; pe != NULL && pe->name != NULL; pe++) {
+    if (p->name == pe->name)
+      break;
+  }
+
+  if (pe == NULL)
+    return;
+
+  for (values = pe->values; *values != NULL; values++) {
+    next = p + 1;
+    p->value = *values;
+    if (next->name == NULL) {
+      munit_test_runner_run_test_with_params(runner, test, params);
+    } else {
+      munit_test_runner_run_test_wild(runner, test, test_name, params, next);
+    }
+    if (runner->fatal_failures &&
+        (runner->report.failed != 0 || runner->report.errored != 0))
+      break;
+  }
+}
+
+/* Run a single test, with every combination of parameters
+ * requested. */
+static void munit_test_runner_run_test(MunitTestRunner *runner,
+                                       const MunitTest *test,
+                                       const char *prefix) {
+  char *test_name =
+    munit_maybe_concat(NULL, (char *)prefix, (char *)test->name);
+  /* The array of parameters to pass to
+   * munit_test_runner_run_test_with_params */
+  MunitParameter *params = NULL;
+  size_t params_l = 0;
+  /* Wildcard parameters are parameters which have possible values
+   * specified in the test, but no specific value was passed to the
+   * CLI.  That means we want to run the test once for every
+   * possible combination of parameter values or, if --single was
+   * passed to the CLI, a single time with a random set of
+   * parameters. */
+  MunitParameter *wild_params = NULL;
+  size_t wild_params_l = 0;
+  const MunitParameterEnum *pe;
+  const MunitParameter *cli_p;
+  munit_bool filled;
+  unsigned int possible;
+  char **vals;
+  size_t first_wild;
+  const MunitParameter *wp;
+  int pidx;
+
+  munit_rand_seed(runner->seed);
+
+  fprintf(MUNIT_OUTPUT_FILE, "%-" MUNIT_XSTRINGIFY(MUNIT_TEST_NAME_LEN) "s",
+          test_name);
+
+  if (test->parameters == NULL) {
+    /* No parameters.  Simple, nice. */
+    munit_test_runner_run_test_with_params(runner, test, NULL);
+  } else {
+    fputc('\n', MUNIT_OUTPUT_FILE);
+
+    for (pe = test->parameters; pe != NULL && pe->name != NULL; pe++) {
+      /* Did we received a value for this parameter from the CLI? */
+      filled = 0;
+      for (cli_p = runner->parameters; cli_p != NULL && cli_p->name != NULL;
+           cli_p++) {
+        if (strcmp(cli_p->name, pe->name) == 0) {
+          if (MUNIT_UNLIKELY(munit_parameters_add(&params_l, &params, pe->name,
+                                                  cli_p->value) != MUNIT_OK))
+            goto cleanup;
+          filled = 1;
+          break;
+        }
+      }
+      if (filled)
+        continue;
+
+      /* Nothing from CLI, is the enum NULL/empty?  We're not a
+       * fuzzer… */
+      if (pe->values == NULL || pe->values[0] == NULL)
+        continue;
+
+      /* If --single was passed to the CLI, choose a value from the
+       * list of possibilities randomly. */
+      if (runner->single_parameter_mode) {
+        possible = 0;
+        for (vals = pe->values; *vals != NULL; vals++)
+          possible++;
+        /* We want the tests to be reproducible, even if you're only
+         * running a single test, but we don't want every test with
+         * the same number of parameters to choose the same parameter
+         * number, so use the test name as a primitive salt. */
+        pidx = (int)munit_rand_at_most(munit_str_hash(test_name), possible - 1);
+        if (MUNIT_UNLIKELY(munit_parameters_add(&params_l, &params, pe->name,
+                                                pe->values[pidx]) != MUNIT_OK))
+          goto cleanup;
+      } else {
+        /* We want to try every permutation.  Put in a placeholder
+         * entry, we'll iterate through them later. */
+        if (MUNIT_UNLIKELY(munit_parameters_add(&wild_params_l, &wild_params,
+                                                pe->name, NULL) != MUNIT_OK))
+          goto cleanup;
+      }
+    }
+
+    if (wild_params_l != 0) {
+      first_wild = params_l;
+      for (wp = wild_params; wp != NULL && wp->name != NULL; wp++) {
+        for (pe = test->parameters;
+             pe != NULL && pe->name != NULL && pe->values != NULL; pe++) {
+          if (strcmp(wp->name, pe->name) == 0) {
+            if (MUNIT_UNLIKELY(munit_parameters_add(&params_l, &params,
+                                                    pe->name,
+                                                    pe->values[0]) != MUNIT_OK))
+              goto cleanup;
+          }
+        }
+      }
+
+      munit_test_runner_run_test_wild(runner, test, test_name, params,
+                                      params + first_wild);
+    } else {
+      munit_test_runner_run_test_with_params(runner, test, params);
+    }
+
+  cleanup:
+    free(params);
+    free(wild_params);
+  }
+
+  munit_maybe_free_concat(test_name, prefix, test->name);
+}
+
+/* Recurse through the suite and run all the tests.  If a list of
+ * tests to run was provied on the command line, run only those
+ * tests.  */
+static void munit_test_runner_run_suite(MunitTestRunner *runner,
+                                        const MunitSuite *suite,
+                                        const char *prefix) {
+  size_t pre_l;
+  char *pre = munit_maybe_concat(&pre_l, (char *)prefix, (char *)suite->prefix);
+  const MunitTest *test;
+  const char **test_name;
+  const MunitSuite *child_suite;
+
+  /* Run the tests. */
+  for (test = suite->tests; test != NULL && test->test != NULL; test++) {
+    if (runner->tests != NULL) { /* Specific tests were requested on the CLI */
+      for (test_name = runner->tests; test_name != NULL && *test_name != NULL;
+           test_name++) {
+        if ((pre_l == 0 || strncmp(pre, *test_name, pre_l) == 0) &&
+            strncmp(test->name, *test_name + pre_l,
+                    strlen(*test_name + pre_l)) == 0) {
+          munit_test_runner_run_test(runner, test, pre);
+          if (runner->fatal_failures &&
+              (runner->report.failed != 0 || runner->report.errored != 0))
+            goto cleanup;
+        }
+      }
+    } else { /* Run all tests */
+      munit_test_runner_run_test(runner, test, pre);
+    }
+  }
+
+  if (runner->fatal_failures &&
+      (runner->report.failed != 0 || runner->report.errored != 0))
+    goto cleanup;
+
+  /* Run any child suites. */
+  for (child_suite = suite->suites;
+       child_suite != NULL && child_suite->prefix != NULL; child_suite++) {
+    munit_test_runner_run_suite(runner, child_suite, pre);
+  }
+
+cleanup:
+
+  munit_maybe_free_concat(pre, prefix, suite->prefix);
+}
+
+static void munit_test_runner_run(MunitTestRunner *runner) {
+  munit_test_runner_run_suite(runner, runner->suite, NULL);
+}
+
+static void munit_print_help(int argc, char *const *argv, void *user_data,
+                             const MunitArgument arguments[]) {
+  const MunitArgument *arg;
+  (void)argc;
+
+  printf("USAGE: %s [OPTIONS...] [TEST...]\n\n", argv[0]);
+  puts(
+    " --seed SEED\n"
+    "           Value used to seed the PRNG.  Must be a 32-bit integer in "
+    "decimal\n"
+    "           notation with no separators (commas, decimals, spaces, "
+    "etc.), or\n"
+    "           hexidecimal prefixed by \"0x\".\n"
+    " --iterations N\n"
+    "           Run each test N times.  0 means the default number.\n"
+    " --param name value\n"
+    "           A parameter key/value pair which will be passed to any test "
+    "with\n"
+    "           takes a parameter of that name.  If not provided, the test "
+    "will be\n"
+    "           run once for each possible parameter value.\n"
+    " --list    Write a list of all available tests.\n"
+    " --list-params\n"
+    "           Write a list of all available tests and their possible "
+    "parameters.\n"
+    " --single  Run each parameterized test in a single configuration "
+    "instead of\n"
+    "           every possible combination\n"
+    " --log-visible debug|info|warning|error\n"
+    " --log-fatal debug|info|warning|error\n"
+    "           Set the level at which messages of different severities are "
+    "visible,\n"
+    "           or cause the test to terminate.\n"
+#if !defined(MUNIT_NO_FORK)
+    " --no-fork Do not execute tests in a child process.  If this option is "
+    "supplied\n"
+    "           and a test crashes (including by failing an assertion), no "
+    "further\n"
+    "           tests will be performed.\n"
+#endif
+    " --fatal-failures\n"
+    "           Stop executing tests as soon as a failure is found.\n"
+    " --show-stderr\n"
+    "           Show data written to stderr by the tests, even if the test "
+    "succeeds.\n"
+    " --color auto|always|never\n"
+    "           Colorize (or don't) the output.\n"
+    /* 12345678901234567890123456789012345678901234567890123456789012345678901234567890
+     */
+    " --help    Print this help message and exit.\n");
+#if defined(MUNIT_NL_LANGINFO)
+  setlocale(LC_ALL, "");
+  fputs((strcasecmp("UTF-8", nl_langinfo(CODESET)) == 0) ? "µnit" : "munit",
+        stdout);
+#else
+  puts("munit");
+#endif
+  printf(" %d.%d.%d\n"
+         "Full documentation at: https://nemequ.github.io/munit/\n",
+         (MUNIT_CURRENT_VERSION >> 16) & 0xff,
+         (MUNIT_CURRENT_VERSION >> 8) & 0xff,
+         (MUNIT_CURRENT_VERSION >> 0) & 0xff);
+  for (arg = arguments; arg != NULL && arg->name != NULL; arg++)
+    arg->write_help(arg, user_data);
+}
+
+static const MunitArgument *
+munit_arguments_find(const MunitArgument arguments[], const char *name) {
+  const MunitArgument *arg;
+
+  for (arg = arguments; arg != NULL && arg->name != NULL; arg++)
+    if (strcmp(arg->name, name) == 0)
+      return arg;
+
+  return NULL;
+}
+
+static void munit_suite_list_tests(const MunitSuite *suite,
+                                   munit_bool show_params, const char *prefix) {
+  size_t pre_l;
+  char *pre = munit_maybe_concat(&pre_l, (char *)prefix, (char *)suite->prefix);
+  const MunitTest *test;
+  const MunitParameterEnum *params;
+  munit_bool first;
+  char **val;
+  const MunitSuite *child_suite;
+
+  for (test = suite->tests; test != NULL && test->name != NULL; test++) {
+    if (pre != NULL)
+      fputs(pre, stdout);
+    puts(test->name);
+
+    if (show_params) {
+      for (params = test->parameters; params != NULL && params->name != NULL;
+           params++) {
+        fprintf(stdout, " - %s: ", params->name);
+        if (params->values == NULL) {
+          puts("Any");
+        } else {
+          first = 1;
+          for (val = params->values; *val != NULL; val++) {
+            if (!first) {
+              fputs(", ", stdout);
+            } else {
+              first = 0;
+            }
+            fputs(*val, stdout);
+          }
+          putc('\n', stdout);
+        }
+      }
+    }
+  }
+
+  for (child_suite = suite->suites;
+       child_suite != NULL && child_suite->prefix != NULL; child_suite++) {
+    munit_suite_list_tests(child_suite, show_params, pre);
+  }
+
+  munit_maybe_free_concat(pre, prefix, suite->prefix);
+}
+
+static munit_bool munit_stream_supports_ansi(FILE *stream) {
+#if !defined(_WIN32)
+  return isatty(fileno(stream));
+#else
+
+#  if !defined(__MINGW32__)
+  size_t ansicon_size = 0;
+#  endif
+
+  if (isatty(fileno(stream))) {
+#  if !defined(__MINGW32__)
+    getenv_s(&ansicon_size, NULL, 0, "ANSICON");
+    return ansicon_size != 0;
+#  else
+    return getenv("ANSICON") != NULL;
+#  endif
+  }
+  return 0;
+#endif
+}
+
+int munit_suite_main_custom(const MunitSuite *suite, void *user_data, int argc,
+                            char *const *argv,
+                            const MunitArgument arguments[]) {
+  int result = EXIT_FAILURE;
+  MunitTestRunner runner;
+  size_t parameters_size = 0;
+  size_t tests_size = 0;
+  int arg;
+
+  char *envptr;
+  unsigned long ts;
+  char *endptr;
+  unsigned long long iterations;
+  MunitLogLevel level;
+  const MunitArgument *argument;
+  const char **runner_tests;
+  unsigned int tests_run;
+  unsigned int tests_total;
+
+  runner.prefix = NULL;
+  runner.suite = NULL;
+  runner.tests = NULL;
+  runner.seed = 0;
+  runner.iterations = 0;
+  runner.parameters = NULL;
+  runner.single_parameter_mode = 0;
+  runner.user_data = NULL;
+
+  runner.report.successful = 0;
+  runner.report.skipped = 0;
+  runner.report.failed = 0;
+  runner.report.errored = 0;
+#if defined(MUNIT_ENABLE_TIMING)
+  runner.report.cpu_clock = 0;
+  runner.report.wall_clock = 0;
+#endif
+
+  runner.colorize = 0;
+#if !defined(_WIN32)
+  runner.fork = 1;
+#else
+  runner.fork = 0;
+#endif
+  runner.show_stderr = 0;
+  runner.fatal_failures = 0;
+  runner.suite = suite;
+  runner.user_data = user_data;
+  runner.seed = munit_rand_generate_seed();
+  runner.colorize = munit_stream_supports_ansi(MUNIT_OUTPUT_FILE);
+
+  for (arg = 1; arg < argc; arg++) {
+    if (strncmp("--", argv[arg], 2) == 0) {
+      if (strcmp("seed", argv[arg] + 2) == 0) {
+        if (arg + 1 >= argc) {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr,
+                              "%s requires an argument", argv[arg]);
+          goto cleanup;
+        }
+
+        envptr = argv[arg + 1];
+        ts = strtoul(argv[arg + 1], &envptr, 0);
+        if (*envptr != '\0' || ts > (~((munit_uint32_t)0U))) {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr,
+                              "invalid value ('%s') passed to %s",
+                              argv[arg + 1], argv[arg]);
+          goto cleanup;
+        }
+        runner.seed = (munit_uint32_t)ts;
+
+        arg++;
+      } else if (strcmp("iterations", argv[arg] + 2) == 0) {
+        if (arg + 1 >= argc) {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr,
+                              "%s requires an argument", argv[arg]);
+          goto cleanup;
+        }
+
+        endptr = argv[arg + 1];
+        iterations = strtoul(argv[arg + 1], &endptr, 0);
+        if (*endptr != '\0' || iterations > UINT_MAX) {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr,
+                              "invalid value ('%s') passed to %s",
+                              argv[arg + 1], argv[arg]);
+          goto cleanup;
+        }
+
+        runner.iterations = (unsigned int)iterations;
+
+        arg++;
+      } else if (strcmp("param", argv[arg] + 2) == 0) {
+        if (arg + 2 >= argc) {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr,
+                              "%s requires two arguments", argv[arg]);
+          goto cleanup;
+        }
+
+        runner.parameters = realloc(runner.parameters, sizeof(MunitParameter) *
+                                                         (parameters_size + 2));
+        if (runner.parameters == NULL) {
+          munit_log_internal(MUNIT_LOG_ERROR, stderr,
+                             "failed to allocate memory");
+          goto cleanup;
+        }
+        runner.parameters[parameters_size].name = (char *)argv[arg + 1];
+        runner.parameters[parameters_size].value = (char *)argv[arg + 2];
+        parameters_size++;
+        runner.parameters[parameters_size].name = NULL;
+        runner.parameters[parameters_size].value = NULL;
+        arg += 2;
+      } else if (strcmp("color", argv[arg] + 2) == 0) {
+        if (arg + 1 >= argc) {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr,
+                              "%s requires an argument", argv[arg]);
+          goto cleanup;
+        }
+
+        if (strcmp(argv[arg + 1], "always") == 0)
+          runner.colorize = 1;
+        else if (strcmp(argv[arg + 1], "never") == 0)
+          runner.colorize = 0;
+        else if (strcmp(argv[arg + 1], "auto") == 0)
+          runner.colorize = munit_stream_supports_ansi(MUNIT_OUTPUT_FILE);
+        else {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr,
+                              "invalid value ('%s') passed to %s",
+                              argv[arg + 1], argv[arg]);
+          goto cleanup;
+        }
+
+        arg++;
+      } else if (strcmp("help", argv[arg] + 2) == 0) {
+        munit_print_help(argc, argv, user_data, arguments);
+        result = EXIT_SUCCESS;
+        goto cleanup;
+      } else if (strcmp("single", argv[arg] + 2) == 0) {
+        runner.single_parameter_mode = 1;
+      } else if (strcmp("show-stderr", argv[arg] + 2) == 0) {
+        runner.show_stderr = 1;
+#if !defined(_WIN32)
+      } else if (strcmp("no-fork", argv[arg] + 2) == 0) {
+        runner.fork = 0;
+#endif
+      } else if (strcmp("fatal-failures", argv[arg] + 2) == 0) {
+        runner.fatal_failures = 1;
+      } else if (strcmp("log-visible", argv[arg] + 2) == 0 ||
+                 strcmp("log-fatal", argv[arg] + 2) == 0) {
+        if (arg + 1 >= argc) {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr,
+                              "%s requires an argument", argv[arg]);
+          goto cleanup;
+        }
+
+        if (strcmp(argv[arg + 1], "debug") == 0)
+          level = MUNIT_LOG_DEBUG;
+        else if (strcmp(argv[arg + 1], "info") == 0)
+          level = MUNIT_LOG_INFO;
+        else if (strcmp(argv[arg + 1], "warning") == 0)
+          level = MUNIT_LOG_WARNING;
+        else if (strcmp(argv[arg + 1], "error") == 0)
+          level = MUNIT_LOG_ERROR;
+        else {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr,
+                              "invalid value ('%s') passed to %s",
+                              argv[arg + 1], argv[arg]);
+          goto cleanup;
+        }
+
+        if (strcmp("log-visible", argv[arg] + 2) == 0)
+          munit_log_level_visible = level;
+        else
+          munit_log_level_fatal = level;
+
+        arg++;
+      } else if (strcmp("list", argv[arg] + 2) == 0) {
+        munit_suite_list_tests(suite, 0, NULL);
+        result = EXIT_SUCCESS;
+        goto cleanup;
+      } else if (strcmp("list-params", argv[arg] + 2) == 0) {
+        munit_suite_list_tests(suite, 1, NULL);
+        result = EXIT_SUCCESS;
+        goto cleanup;
+      } else {
+        argument = munit_arguments_find(arguments, argv[arg] + 2);
+        if (argument == NULL) {
+          munit_logf_internal(MUNIT_LOG_ERROR, stderr,
+                              "unknown argument ('%s')", argv[arg]);
+          goto cleanup;
+        }
+
+        if (!argument->parse_argument(suite, user_data, &arg, argc, argv))
+          goto cleanup;
+      }
+    } else {
+      runner_tests =
+        realloc((void *)runner.tests, sizeof(char *) * (tests_size + 2));
+      if (runner_tests == NULL) {
+        munit_log_internal(MUNIT_LOG_ERROR, stderr,
+                           "failed to allocate memory");
+        goto cleanup;
+      }
+      runner.tests = runner_tests;
+      runner.tests[tests_size++] = argv[arg];
+      runner.tests[tests_size] = NULL;
+    }
+  }
+
+  fflush(stderr);
+  fprintf(MUNIT_OUTPUT_FILE,
+          "Running test suite with seed 0x%08" PRIx32 "...\n", runner.seed);
+
+  munit_test_runner_run(&runner);
+
+  tests_run =
+    runner.report.successful + runner.report.failed + runner.report.errored;
+  tests_total = tests_run + runner.report.skipped;
+  if (tests_run == 0) {
+    fprintf(stderr, "No tests run, %d (100%%) skipped.\n",
+            runner.report.skipped);
+  } else {
+    fprintf(MUNIT_OUTPUT_FILE,
+            "%d of %d (%0.0f%%) tests successful, %d (%0.0f%%) test skipped.\n",
+            runner.report.successful, tests_run,
+            (((double)runner.report.successful) / ((double)tests_run)) * 100.0,
+            runner.report.skipped,
+            (((double)runner.report.skipped) / ((double)tests_total)) * 100.0);
+  }
+
+  if (runner.report.failed == 0 && runner.report.errored == 0) {
+    result = EXIT_SUCCESS;
+  }
+
+cleanup:
+  free(runner.parameters);
+  free((void *)runner.tests);
+
+  return result;
+}
+
+int munit_suite_main(const MunitSuite *suite, void *user_data, int argc,
+                     char *const *argv) {
+  return munit_suite_main_custom(suite, user_data, argc, argv, NULL);
+}
+
+static uint8_t hexchars[] = "0123456789abcdef";
+
+static uint8_t *hexdump_addr(uint8_t *dest, size_t addr) {
+  size_t i;
+  uint8_t a;
+
+  for (i = 0; i < 4; ++i) {
+    a = (addr >> (3 - i) * 8) & 0xff;
+
+    *dest++ = hexchars[a >> 4];
+    *dest++ = hexchars[a & 0xf];
+  }
+
+  return dest;
+}
+
+static uint8_t *asciidump(uint8_t *dest, const uint8_t *data, size_t datalen) {
+  size_t i;
+
+  *dest++ = '|';
+
+  for (i = 0; i < datalen; ++i) {
+    if (0x20 <= data[i] && data[i] <= 0x7e) {
+      *dest++ = data[i];
+    } else {
+      *dest++ = '.';
+    }
+  }
+
+  *dest++ = '|';
+
+  return dest;
+}
+
+static uint8_t *hexdump8(uint8_t *dest, const uint8_t *data, size_t datalen) {
+  size_t i;
+
+  for (i = 0; i < datalen; ++i) {
+    *dest++ = hexchars[data[i] >> 4];
+    *dest++ = hexchars[data[i] & 0xf];
+    *dest++ = ' ';
+  }
+
+  for (; i < 8; ++i) {
+    *dest++ = ' ';
+    *dest++ = ' ';
+    *dest++ = ' ';
+  }
+
+  return dest;
+}
+
+static uint8_t *hexdump16(uint8_t *dest, const uint8_t *data, size_t datalen) {
+  dest = hexdump8(dest, data, datalen < 8 ? datalen : 8);
+  *dest++ = ' ';
+
+  if (datalen < 8) {
+    data = NULL;
+    datalen = 0;
+  } else {
+    data += 8;
+    datalen -= 8;
+  }
+
+  dest = hexdump8(dest, data, datalen);
+  *dest++ = ' ';
+
+  return dest;
+}
+
+static uint8_t *hexdump_line(uint8_t *dest, const uint8_t *data, size_t datalen,
+                             size_t addr) {
+  dest = hexdump_addr(dest, addr);
+  *dest++ = ' ';
+  *dest++ = ' ';
+
+  dest = hexdump16(dest, data, datalen);
+
+  dest = asciidump(dest, data, datalen);
+
+  return dest;
+}
+
+int munit_hexdump(FILE *fp, const void *data, size_t datalen) {
+  size_t offset = 0, n, len;
+  uint8_t buf[128], *p;
+  const uint8_t *s;
+  int repeated = 0;
+
+  if (datalen == 0) {
+    return 0;
+  }
+
+  for (; offset < datalen; offset += 16) {
+    n = datalen - offset;
+    s = (const uint8_t *)data + offset;
+
+    if (n >= 16) {
+      n = 16;
+
+      if (offset > 0) {
+        if (memcmp(s - 16, s, 16) == 0) {
+          if (repeated) {
+            continue;
+          }
+
+          repeated = 1;
+
+          if (fwrite("*\n", 1, 2, fp) < 2) {
+            return -1;
+          }
+
+          continue;
+        }
+
+        repeated = 0;
+      }
+    }
+
+    p = hexdump_line(buf, s, n, offset);
+    *p++ = '\n';
+
+    len = (size_t)(p - buf);
+
+    if (fwrite(buf, 1, len, fp) < len) {
+      return -1;
+    }
+  }
+
+  p = hexdump_addr(buf, datalen);
+  *p++ = '\n';
+
+  len = (size_t)(p - buf);
+
+  if (fwrite(buf, 1, len, fp) < len) {
+    return -1;
+  }
+
+  return 0;
+}
+
+int munit_hexdump_diff(FILE *fp, const void *a, size_t alen, const void *b,
+                       size_t blen) {
+  size_t offset = 0, k, i, len, ncomp, maxlen, adoff = 0;
+  uint8_t buf[128], *p;
+  const uint8_t mk[2] = {'-', '+'};
+  struct datasource {
+    const uint8_t *data;
+    size_t datalen;
+    const uint8_t *s;
+    size_t n;
+  } ds[] = {{a, alen, NULL, 0}, {b, blen, NULL, 0}}, *dp;
+
+  maxlen = alen < blen ? blen : alen;
+
+  for (; offset < maxlen; offset += 16) {
+    for (k = 0; k < 2; ++k) {
+      dp = &ds[k];
+
+      if (offset < dp->datalen) {
+        dp->s = (const uint8_t *)dp->data + offset;
+        dp->n = dp->datalen - offset;
+
+        if (dp->n > 16) {
+          dp->n = 16;
+        }
+      } else {
+        dp->s = NULL;
+        dp->n = 0;
+      }
+    }
+
+    if (ds[0].n == ds[1].n && memcmp(ds[0].s, ds[1].s, ds[0].n) == 0) {
+      continue;
+    }
+
+    for (k = 0; k < 2; ++k) {
+      dp = &ds[k];
+
+      if (!dp->n) {
+        continue;
+      }
+
+      p = buf;
+      *p++ = mk[k];
+      *p++ = mk[k];
+      *p++ = mk[k];
+      *p++ = mk[k];
+
+      p = hexdump_line(p, dp->s, dp->n, offset);
+      *p++ = '\n';
+
+      len = (size_t)(p - buf);
+
+      if (fwrite(buf, 1, len, fp) < len) {
+        return -1;
+      }
+    }
+
+    if (!ds[0].n || !ds[1].n) {
+      continue;
+    }
+
+    ncomp = ds[0].n < ds[1].n ? ds[0].n : ds[1].n;
+
+    p = buf + 4 + 10;
+
+    memset(buf, ' ', 4 + 78);
+
+    for (i = 0; i < ncomp; ++i) {
+      if (ds[0].s[i] == ds[1].s[i]) {
+        *p++ = ' ';
+        *p++ = ' ';
+      } else {
+        adoff = 4 + 10 + 51 + i;
+        *(buf + adoff) = '^';
+
+        *p++ = '^';
+        *p++ = '^';
+      }
+
+      *p++ = ' ';
+
+      if (i == 7) {
+        *p++ = ' ';
+      }
+    }
+
+    if (adoff) {
+      len = adoff + 1;
+    } else {
+      len = (size_t)(p - buf);
+    }
+
+    buf[len++] = '\n';
+
+    if (fwrite(buf, 1, len, fp) < len) {
+      return -1;
+    }
+  }
+
+  return 0;
+}
diff --git a/sys/contrib/openzfs/tests/unit/munit.h b/sys/contrib/openzfs/tests/unit/munit.h
new file mode 100644
index 00000000000..b10d10ee0a5
--- /dev/null
+++ b/sys/contrib/openzfs/tests/unit/munit.h
@@ -0,0 +1,575 @@
+// SPDX-License-Identifier: MIT
+/* µnit Testing Framework
+ * Copyright (c) 2013-2017 Evan Nemerson <evan@nemerson.com>
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MUNIT_H
+#define MUNIT_H
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+
+#define MUNIT_VERSION(major, minor, revision)                                  \
+  (((major) << 16) | ((minor) << 8) | (revision))
+
+#define MUNIT_CURRENT_VERSION MUNIT_VERSION(0, 4, 1)
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+#  define munit_int8_t __int8
+#  define munit_uint8_t unsigned __int8
+#  define munit_int16_t __int16
+#  define munit_uint16_t unsigned __int16
+#  define munit_int32_t __int32
+#  define munit_uint32_t unsigned __int32
+#  define munit_int64_t __int64
+#  define munit_uint64_t unsigned __int64
+#else
+#  include <stdint.h>
+#  define munit_int8_t int8_t
+#  define munit_uint8_t uint8_t
+#  define munit_int16_t int16_t
+#  define munit_uint16_t uint16_t
+#  define munit_int32_t int32_t
+#  define munit_uint32_t uint32_t
+#  define munit_int64_t int64_t
+#  define munit_uint64_t uint64_t
+#endif
+
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+#  if !defined(PRIi8)
+#    define PRIi8 "i"
+#  endif
+#  if !defined(PRIi16)
+#    define PRIi16 "i"
+#  endif
+#  if !defined(PRIi32)
+#    define PRIi32 "i"
+#  endif
+#  if !defined(PRIi64)
+#    define PRIi64 "I64i"
+#  endif
+#  if !defined(PRId8)
+#    define PRId8 "d"
+#  endif
+#  if !defined(PRId16)
+#    define PRId16 "d"
+#  endif
+#  if !defined(PRId32)
+#    define PRId32 "d"
+#  endif
+#  if !defined(PRId64)
+#    define PRId64 "I64d"
+#  endif
+#  if !defined(PRIx8)
+#    define PRIx8 "x"
+#  endif
+#  if !defined(PRIx16)
+#    define PRIx16 "x"
+#  endif
+#  if !defined(PRIx32)
+#    define PRIx32 "x"
+#  endif
+#  if !defined(PRIx64)
+#    define PRIx64 "I64x"
+#  endif
+#  if !defined(PRIu8)
+#    define PRIu8 "u"
+#  endif
+#  if !defined(PRIu16)
+#    define PRIu16 "u"
+#  endif
+#  if !defined(PRIu32)
+#    define PRIu32 "u"
+#  endif
+#  if !defined(PRIu64)
+#    define PRIu64 "I64u"
+#  endif
+#else
+#  include <inttypes.h>
+#endif
+
+#if !defined(munit_bool)
+#  if defined(bool)
+#    define munit_bool bool
+#  elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+#    define munit_bool _Bool
+#  else
+#    define munit_bool int
+#  endif
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+#  define MUNIT_LIKELY(expr) (__builtin_expect((expr), 1))
+#  define MUNIT_UNLIKELY(expr) (__builtin_expect((expr), 0))
+#  define MUNIT_UNUSED __attribute__((__unused__))
+#else
+#  define MUNIT_LIKELY(expr) (expr)
+#  define MUNIT_UNLIKELY(expr) (expr)
+#  define MUNIT_UNUSED
+#endif
+
+#if !defined(_WIN32)
+#  define MUNIT_SIZE_MODIFIER "z"
+#  define MUNIT_CHAR_MODIFIER "hh"
+#  define MUNIT_SHORT_MODIFIER "h"
+#else
+#  if defined(_M_X64) || defined(__amd64__)
+#    define MUNIT_SIZE_MODIFIER "I64"
+#  else
+#    define MUNIT_SIZE_MODIFIER ""
+#  endif
+#  define MUNIT_CHAR_MODIFIER ""
+#  define MUNIT_SHORT_MODIFIER ""
+#endif
+
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+#  define MUNIT_NO_RETURN _Noreturn
+#elif defined(__GNUC__)
+#  define MUNIT_NO_RETURN __attribute__((__noreturn__))
+#elif defined(_MSC_VER)
+#  define MUNIT_NO_RETURN __declspec(noreturn)
+#else
+#  define MUNIT_NO_RETURN
+#endif
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1500)
+#  define MUNIT_PUSH_DISABLE_MSVC_C4127_                                       \
+    __pragma(warning(push)) __pragma(warning(disable : 4127))
+#  define MUNIT_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop))
+#else
+#  define MUNIT_PUSH_DISABLE_MSVC_C4127_
+#  define MUNIT_POP_DISABLE_MSVC_C4127_
+#endif
+
+typedef enum {
+  MUNIT_LOG_DEBUG,
+  MUNIT_LOG_INFO,
+  MUNIT_LOG_WARNING,
+  MUNIT_LOG_ERROR
+} MunitLogLevel;
+
+#if defined(__GNUC__) && !defined(__MINGW32__)
+#  define MUNIT_PRINTF(string_index, first_to_check)                           \
+    __attribute__((format(printf, string_index, first_to_check)))
+#else
+#  define MUNIT_PRINTF(string_index, first_to_check)
+#endif
+
+MUNIT_PRINTF(4, 5)
+void munit_logf_ex(MunitLogLevel level, const char *filename, int line,
+                   const char *format, ...);
+
+#define munit_logf(level, format, ...)                                         \
+  munit_logf_ex(level, __FILE__, __LINE__, format, __VA_ARGS__)
+
+#define munit_log(level, msg) munit_logf(level, "%s", msg)
+
+MUNIT_NO_RETURN
+MUNIT_PRINTF(3, 4)
+void munit_errorf_ex(const char *filename, int line, const char *format, ...);
+
+#define munit_errorf(format, ...)                                              \
+  munit_errorf_ex(__FILE__, __LINE__, format, __VA_ARGS__)
+
+#define munit_error(msg) munit_errorf("%s", msg)
+
+#define munit_assert(expr)                                                     \
+  do {                                                                         \
+    if (!MUNIT_LIKELY(expr)) {                                                 \
+      munit_error("assertion failed: " #expr);                                 \
+    }                                                                          \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_                                             \
+  } while (0) MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_true(expr)                                                \
+  do {                                                                         \
+    if (!MUNIT_LIKELY(expr)) {                                                 \
+      munit_error("assertion failed: " #expr " is not true");                  \
+    }                                                                          \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_                                             \
+  } while (0) MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_false(expr)                                               \
+  do {                                                                         \
+    if (!MUNIT_LIKELY(!(expr))) {                                              \
+      munit_error("assertion failed: " #expr " is not false");                 \
+    }                                                                          \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_                                             \
+  } while (0) MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_type_full(prefix, suffix, T, fmt, a, op, b)               \
+  do {                                                                         \
+    T munit_tmp_a_ = (a);                                                      \
+    T munit_tmp_b_ = (b);                                                      \
+    if (!(munit_tmp_a_ op munit_tmp_b_)) {                                     \
+      munit_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix        \
+                   " %s " prefix "%" fmt suffix ")",                           \
+                   #a, #op, #b, munit_tmp_a_, #op, munit_tmp_b_);              \
+    }                                                                          \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_                                             \
+  } while (0) MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_type(T, fmt, a, op, b)                                    \
+  munit_assert_type_full("", "", T, fmt, a, op, b)
+
+#define munit_assert_char(a, op, b)                                            \
+  munit_assert_type_full("'\\x", "'", char, "02" MUNIT_CHAR_MODIFIER "x", a,   \
+                         op, b)
+#define munit_assert_uchar(a, op, b)                                           \
+  munit_assert_type_full("'\\x", "'", unsigned char,                           \
+                         "02" MUNIT_CHAR_MODIFIER "x", a, op, b)
+#define munit_assert_short(a, op, b)                                           \
+  munit_assert_type(short, MUNIT_SHORT_MODIFIER "d", a, op, b)
+#define munit_assert_ushort(a, op, b)                                          \
+  munit_assert_type(unsigned short, MUNIT_SHORT_MODIFIER "u", a, op, b)
+#define munit_assert_int(a, op, b) munit_assert_type(int, "d", a, op, b)
+#define munit_assert_uint(a, op, b)                                            \
+  munit_assert_type(unsigned int, "u", a, op, b)
+#define munit_assert_long(a, op, b) munit_assert_type(long int, "ld", a, op, b)
+#define munit_assert_ulong(a, op, b)                                           \
+  munit_assert_type(unsigned long int, "lu", a, op, b)
+#define munit_assert_llong(a, op, b)                                           \
+  munit_assert_type(long long int, "lld", a, op, b)
+#define munit_assert_ullong(a, op, b)                                          \
+  munit_assert_type(unsigned long long int, "llu", a, op, b)
+
+#define munit_assert_size(a, op, b)                                            \
+  munit_assert_type(size_t, MUNIT_SIZE_MODIFIER "u", a, op, b)
+#define munit_assert_ssize(a, op, b)                                           \
+  munit_assert_type(ssize_t, MUNIT_SIZE_MODIFIER "d", a, op, b)
+
+#define munit_assert_float(a, op, b) munit_assert_type(float, "f", a, op, b)
+#define munit_assert_double(a, op, b) munit_assert_type(double, "g", a, op, b)
+#define munit_assert_ptr(a, op, b)                                             \
+  munit_assert_type(const void *, "p", a, op, b)
+
+#define munit_assert_int8(a, op, b)                                            \
+  munit_assert_type(munit_int8_t, PRIi8, a, op, b)
+#define munit_assert_uint8(a, op, b)                                           \
+  munit_assert_type(munit_uint8_t, PRIu8, a, op, b)
+#define munit_assert_int16(a, op, b)                                           \
+  munit_assert_type(munit_int16_t, PRIi16, a, op, b)
+#define munit_assert_uint16(a, op, b)                                          \
+  munit_assert_type(munit_uint16_t, PRIu16, a, op, b)
+#define munit_assert_int32(a, op, b)                                           \
+  munit_assert_type(munit_int32_t, PRIi32, a, op, b)
+#define munit_assert_uint32(a, op, b)                                          \
+  munit_assert_type(munit_uint32_t, PRIu32, a, op, b)
+#define munit_assert_int64(a, op, b)                                           \
+  munit_assert_type(munit_int64_t, PRIi64, a, op, b)
+#define munit_assert_uint64(a, op, b)                                          \
+  munit_assert_type(munit_uint64_t, PRIu64, a, op, b)
+
+#define munit_assert_ptrdiff(a, op, b)                                         \
+  munit_assert_type(ptrdiff_t, "td", a, op, b)
+
+#define munit_assert_enum(T, a, op, b) munit_assert_type(T, "d", a, op, b)
+
+#define munit_assert_double_equal(a, b, precision)                             \
+  do {                                                                         \
+    const double munit_tmp_a_ = (a);                                           \
+    const double munit_tmp_b_ = (b);                                           \
+    const double munit_tmp_diff_ = ((munit_tmp_a_ - munit_tmp_b_) < 0)         \
+                                     ? -(munit_tmp_a_ - munit_tmp_b_)          \
+                                     : (munit_tmp_a_ - munit_tmp_b_);          \
+    if (MUNIT_UNLIKELY(munit_tmp_diff_ > 1e-##precision)) {                    \
+      munit_errorf("assertion failed: %s == %s (%0." #precision                \
+                   "g == %0." #precision "g)",                                 \
+                   #a, #b, munit_tmp_a_, munit_tmp_b_);                        \
+    }                                                                          \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_                                             \
+  } while (0) MUNIT_POP_DISABLE_MSVC_C4127_
+
+#include <string.h>
+#define munit_assert_string_equal(a, b)                                        \
+  do {                                                                         \
+    const char *munit_tmp_a_ = (a);                                            \
+    const char *munit_tmp_b_ = (b);                                            \
+    if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) != 0)) {             \
+      munit_hexdump_diff(stderr, munit_tmp_a_, strlen(munit_tmp_a_),           \
+                         munit_tmp_b_, strlen(munit_tmp_b_));                  \
+      munit_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")", #a, \
+                   #b, munit_tmp_a_, munit_tmp_b_);                            \
+    }                                                                          \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_                                             \
+  } while (0) MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_string_not_equal(a, b)                                    \
+  do {                                                                         \
+    const char *munit_tmp_a_ = (a);                                            \
+    const char *munit_tmp_b_ = (b);                                            \
+    if (MUNIT_UNLIKELY(strcmp(munit_tmp_a_, munit_tmp_b_) == 0)) {             \
+      munit_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")", #a, \
+                   #b, munit_tmp_a_, munit_tmp_b_);                            \
+    }                                                                          \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_                                             \
+  } while (0) MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_memory_equal(size, a, b)                                  \
+  do {                                                                         \
+    const unsigned char *munit_tmp_a_ = (const unsigned char *)(a);            \
+    const unsigned char *munit_tmp_b_ = (const unsigned char *)(b);            \
+    const size_t munit_tmp_size_ = (size);                                     \
+    if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) != \
+        0) {                                                                   \
+      size_t munit_tmp_pos_;                                                   \
+      for (munit_tmp_pos_ = 0; munit_tmp_pos_ < munit_tmp_size_;               \
+           munit_tmp_pos_++) {                                                 \
+        if (munit_tmp_a_[munit_tmp_pos_] != munit_tmp_b_[munit_tmp_pos_]) {    \
+          munit_hexdump_diff(stderr, munit_tmp_a_, size, munit_tmp_b_, size);  \
+          munit_errorf("assertion failed: memory %s == %s, at offset "         \
+                       "%" MUNIT_SIZE_MODIFIER "u",                            \
+                       #a, #b, munit_tmp_pos_);                                \
+          break;                                                               \
+        }                                                                      \
+      }                                                                        \
+    }                                                                          \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_                                             \
+  } while (0) MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_memn_equal(a, a_size, b, b_size)                          \
+  do {                                                                         \
+    const unsigned char *munit_tmp_a_ = (const unsigned char *)(a);            \
+    const unsigned char *munit_tmp_b_ = (const unsigned char *)(b);            \
+    const size_t munit_tmp_a_size_ = (a_size);                                 \
+    const size_t munit_tmp_b_size_ = (b_size);                                 \
+    if (MUNIT_UNLIKELY(munit_tmp_a_size_ != munit_tmp_b_size_) ||              \
+        MUNIT_UNLIKELY(munit_tmp_a_size_ && memcmp(munit_tmp_a_, munit_tmp_b_, \
+                                                   munit_tmp_a_size_)) != 0) { \
+      munit_hexdump_diff(stderr, munit_tmp_a_, munit_tmp_a_size_,              \
+                         munit_tmp_b_, munit_tmp_b_size_);                     \
+      munit_errorf("assertion failed: memory %s == %s", #a, #b);               \
+    }                                                                          \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_                                             \
+  } while (0) MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_memory_not_equal(size, a, b)                              \
+  do {                                                                         \
+    const unsigned char *munit_tmp_a_ = (const unsigned char *)(a);            \
+    const unsigned char *munit_tmp_b_ = (const unsigned char *)(b);            \
+    const size_t munit_tmp_size_ = (size);                                     \
+    if (MUNIT_UNLIKELY(memcmp(munit_tmp_a_, munit_tmp_b_, munit_tmp_size_)) == \
+        0) {                                                                   \
+      munit_errorf("assertion failed: memory %s != %s (%zu bytes)", #a, #b,    \
+                   munit_tmp_size_);                                           \
+    }                                                                          \
+    MUNIT_PUSH_DISABLE_MSVC_C4127_                                             \
+  } while (0) MUNIT_POP_DISABLE_MSVC_C4127_
+
+#define munit_assert_ptr_equal(a, b) munit_assert_ptr(a, ==, b)
+#define munit_assert_ptr_not_equal(a, b) munit_assert_ptr(a, !=, b)
+#define munit_assert_null(ptr) munit_assert_ptr(ptr, ==, NULL)
+#define munit_assert_not_null(ptr) munit_assert_ptr(ptr, !=, NULL)
+#define munit_assert_ptr_null(ptr) munit_assert_ptr(ptr, ==, NULL)
+#define munit_assert_ptr_not_null(ptr) munit_assert_ptr(ptr, !=, NULL)
+
+/*** Memory allocation ***/
+
+void *munit_malloc_ex(const char *filename, int line, size_t size);
+
+#define munit_malloc(size) munit_malloc_ex(__FILE__, __LINE__, (size))
+
+#define munit_new(type) ((type *)munit_malloc(sizeof(type)))
+
+#define munit_calloc(nmemb, size) munit_malloc((nmemb) * (size))
+
+#define munit_newa(type, nmemb) ((type *)munit_calloc((nmemb), sizeof(type)))
+
+/*** Random number generation ***/
+
+void munit_rand_seed(munit_uint32_t seed);
+munit_uint32_t munit_rand_uint32(void);
+int munit_rand_int_range(int min, int max);
+double munit_rand_double(void);
+void munit_rand_memory(size_t size, munit_uint8_t *buffer);
+
+/*** Tests and Suites ***/
+
+typedef enum {
+  /* Test successful */
+  MUNIT_OK,
+  /* Test failed */
+  MUNIT_FAIL,
+  /* Test was skipped */
+  MUNIT_SKIP,
+  /* Test failed due to circumstances not intended to be tested
+   * (things like network errors, invalid parameter value, failure to
+   * allocate memory in the test harness, etc.). */
+  MUNIT_ERROR
+} MunitResult;
+
+typedef struct {
+  char *name;
+  char **values;
+} MunitParameterEnum;
+
+typedef struct {
+  char *name;
+  char *value;
+} MunitParameter;
+
+const char *munit_parameters_get(const MunitParameter params[],
+                                 const char *key);
+
+typedef enum {
+  MUNIT_TEST_OPTION_NONE = 0,
+  MUNIT_TEST_OPTION_SINGLE_ITERATION = 1 << 0,
+  MUNIT_TEST_OPTION_TODO = 1 << 1
+} MunitTestOptions;
+
+typedef MunitResult (*MunitTestFunc)(const MunitParameter params[],
+                                     void *user_data_or_fixture);
+typedef void *(*MunitTestSetup)(const MunitParameter params[], void *user_data);
+typedef void (*MunitTestTearDown)(void *fixture);
+
+typedef struct {
+  const char *name;
+  MunitTestFunc test;
+  MunitTestSetup setup;
+  MunitTestTearDown tear_down;
+  MunitTestOptions options;
+  MunitParameterEnum *parameters;
+} MunitTest;
+
+typedef enum { MUNIT_SUITE_OPTION_NONE = 0 } MunitSuiteOptions;
+
+typedef struct MunitSuite_ MunitSuite;
+
+struct MunitSuite_ {
+  const char *prefix;
+  const MunitTest *tests;
+  const MunitSuite *suites;
+  unsigned int iterations;
+  MunitSuiteOptions options;
+};
+
+int munit_suite_main(const MunitSuite *suite, void *user_data, int argc,
+                     char *const *argv);
+
+/* Note: I'm not very happy with this API; it's likely to change if I
+ * figure out something better.  Suggestions welcome. */
+
+typedef struct MunitArgument_ MunitArgument;
+
+struct MunitArgument_ {
+  char *name;
+  munit_bool (*parse_argument)(const MunitSuite *suite, void *user_data,
+                               int *arg, int argc, char *const *argv);
+  void (*write_help)(const MunitArgument *argument, void *user_data);
+};
+
+int munit_suite_main_custom(const MunitSuite *suite, void *user_data, int argc,
+                            char *const *argv, const MunitArgument arguments[]);
+
+#if defined(MUNIT_ENABLE_ASSERT_ALIASES)
+
+#  define assert_true(expr) munit_assert_true(expr)
+#  define assert_false(expr) munit_assert_false(expr)
+#  define assert_char(a, op, b) munit_assert_char(a, op, b)
+#  define assert_uchar(a, op, b) munit_assert_uchar(a, op, b)
+#  define assert_short(a, op, b) munit_assert_short(a, op, b)
+#  define assert_ushort(a, op, b) munit_assert_ushort(a, op, b)
+#  define assert_int(a, op, b) munit_assert_int(a, op, b)
+#  define assert_uint(a, op, b) munit_assert_uint(a, op, b)
+#  define assert_long(a, op, b) munit_assert_long(a, op, b)
+#  define assert_ulong(a, op, b) munit_assert_ulong(a, op, b)
+#  define assert_llong(a, op, b) munit_assert_llong(a, op, b)
+#  define assert_ullong(a, op, b) munit_assert_ullong(a, op, b)
+#  define assert_size(a, op, b) munit_assert_size(a, op, b)
+#  define assert_ssize(a, op, b) munit_assert_ssize(a, op, b)
+#  define assert_float(a, op, b) munit_assert_float(a, op, b)
+#  define assert_double(a, op, b) munit_assert_double(a, op, b)
+#  define assert_ptr(a, op, b) munit_assert_ptr(a, op, b)
+
+#  define assert_int8(a, op, b) munit_assert_int8(a, op, b)
+#  define assert_uint8(a, op, b) munit_assert_uint8(a, op, b)
+#  define assert_int16(a, op, b) munit_assert_int16(a, op, b)
+#  define assert_uint16(a, op, b) munit_assert_uint16(a, op, b)
+#  define assert_int32(a, op, b) munit_assert_int32(a, op, b)
+#  define assert_uint32(a, op, b) munit_assert_uint32(a, op, b)
+#  define assert_int64(a, op, b) munit_assert_int64(a, op, b)
+#  define assert_uint64(a, op, b) munit_assert_uint64(a, op, b)
+
+#  define assert_ptrdiff(a, op, b) munit_assert_ptrdiff(a, op, b)
+
+#  define assert_enum(T, a, op, b) munit_assert_enum(T, a, op, b)
+
+#  define assert_double_equal(a, b, precision)                                 \
+    munit_assert_double_equal(a, b, precision)
+#  define assert_string_equal(a, b) munit_assert_string_equal(a, b)
+#  define assert_string_not_equal(a, b) munit_assert_string_not_equal(a, b)
+#  define assert_memory_equal(size, a, b) munit_assert_memory_equal(size, a, b)
+#  define assert_memn_equal(a, a_size, b, b_size)                              \
+    munit_assert_memn_equal(a, a_size, b, b_size)
+#  define assert_memory_not_equal(size, a, b)                                  \
+    munit_assert_memory_not_equal(size, a, b)
+#  define assert_ptr_equal(a, b) munit_assert_ptr_equal(a, b)
+#  define assert_ptr_not_equal(a, b) munit_assert_ptr_not_equal(a, b)
+#  define assert_ptr_null(ptr) munit_assert_null_equal(ptr)
+#  define assert_ptr_not_null(ptr) munit_assert_not_null(ptr)
+
+#  define assert_null(ptr) munit_assert_null(ptr)
+#  define assert_not_null(ptr) munit_assert_not_null(ptr)
+
+#endif /* defined(MUNIT_ENABLE_ASSERT_ALIASES) */
+
+#define munit_void_test_decl(func)                                             \
+  void func(void);                                                             \
+                                                                               \
+  static inline MunitResult wrap_##func(const MunitParameter params[],         \
+                                        void *fixture) {                       \
+    (void)params;                                                              \
+    (void)fixture;                                                             \
+                                                                               \
+    func();                                                                    \
+    return MUNIT_OK;                                                           \
+  }
+
+#define munit_void_test(func)                                                  \
+  {"/" #func, wrap_##func, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL}
+
+#define munit_test_end() {NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL}
+
+int munit_hexdump(FILE *fp, const void *data, size_t datalen);
+
+int munit_hexdump_diff(FILE *fp, const void *a, size_t alen, const void *b,
+                       size_t blen);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* !defined(MUNIT_H) */
+
+#if defined(MUNIT_ENABLE_ASSERT_ALIASES)
+#if defined(assert)
+#  undef assert
+#endif
+#define assert(expr) munit_assert(expr)
+#endif
diff --git a/sys/contrib/openzfs/tests/unit/test_zap.c b/sys/contrib/openzfs/tests/unit/test_zap.c
new file mode 100644
index 00000000000..c64de7d75c4
--- /dev/null
+++ b/sys/contrib/openzfs/tests/unit/test_zap.c
@@ -0,0 +1,1170 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2026, TrueNAS.
+ */
+
+#include <stdbool.h>
+
+#include <sys/zap.h>
+#include <sys/btree.h>
+typedef struct spa spa_t;	/* forward decl for zap_impl.h */
+#include <sys/zap_impl.h>
+
+#include "mock_dmu.h"
+#include "unit.h"
+
+/* ========== */
+
+/*
+ * Normally defined and initialised in arc.c.  We define and initialise it
+ * ourselves here so this mock can be linked without arc.c.
+ */
+uint64_t zfs_crc64_table[256];
+
+static void
+mock_crc64_init(void)
+{
+	for (int i = 0; i < 256; i++) {
+		uint64_t ct = i;
+		for (int j = 8; j > 0; j--)
+			ct = (ct >> 1) ^ (-(ct & 1) & ZFS_CRC64_POLY);
+		zfs_crc64_table[i] = ct;
+	}
+}
+
+/* Misc utility functions. */
+
+#define	rd64(ptr, off)	(*(uint64_t *)((const char *)(ptr) + (off)))
+
+/* ========== */
+
+/* ZAP-specific mocks and other test helpers. */
+
+/* Create a microzap backed by a mock dnode. */
+static dnode_t *
+mock_zap_create_microzap(void) {
+	/*
+	 * We use DMU_OTN_ZAP_DATA so that DMU_OT_BYTESWAP() returns
+	 * DMU_BSWAP_ZAP without consulting dmu_ot[], which is not currently
+	 * provided in the mock.
+	 */
+	mock_dnode_t *mdn = mock_dnode_create(512, DMU_OTN_ZAP_DATA);
+	dnode_t *dn = (dnode_t *)mdn;
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+	mzap_create_impl(dn, 0, 0, tx);
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	return (dn);
+}
+
+/* Create a fatzap backed by a mock dnode. */
+static dnode_t *
+mock_zap_create_fatzap(void)
+{
+	/*
+	 * We can only create microzaps directly. They only take u64s as a
+	 * value, so we add a u16 to trigger an upgrade to fatzap.
+	 */
+	dnode_t *dn = mock_zap_create_microzap();
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+	uint16_t upgrade = 0;
+	zap_add_by_dnode(dn, "_upgrade", sizeof (uint16_t), 1, &upgrade, tx);
+	zap_remove_by_dnode(dn, "_upgrade", tx);
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	return (dn);
+}
+
+static bool
+mock_zap_is_microzap(dnode_t *dn)
+{
+	/* check block 0 has a microzap header */
+	const void *blk = mock_dnode_block_data((mock_dnode_t *)dn, 0);
+	return (rd64(blk, 0) == ZBT_MICRO);
+}
+
+static bool
+mock_zap_is_fatzap(dnode_t *dn)
+{
+	/* check block 0 has a fatzap header */
+	const void *blk = mock_dnode_block_data((mock_dnode_t *)dn, 0);
+	return (rd64(blk, 0) == ZBT_HEADER && rd64(blk, 8) == ZAP_MAGIC);
+}
+
+static void
+mock_zap_destroy(dnode_t *dn)
+{
+	mock_dnode_t *mdn = (mock_dnode_t *)dn;
+	unit_eq(mock_dnode_refcount(mdn), 1);
+	mock_dnode_destroy(mdn);
+}
+
+/* Create a ZAP of the type named in the given test params. */
+static dnode_t *
+mock_zap_create_params(const MunitParameter params[], const char *key) {
+	const char *type = munit_parameters_get(params, key);
+	if (type == NULL)
+		munit_error("mock_zap_create_params: missing type param");
+	else if (strcmp(type, "micro") == 0)
+		return (mock_zap_create_microzap());
+	else if (strcmp(type, "fat") == 0)
+		return (mock_zap_create_fatzap());
+	else
+		munit_errorf("mock_zap_create_params: invalid type '%s'", type);
+	__builtin_unreachable();
+}
+
+/*
+ * Confirm the stored ZAP is of the type named in the given test params. This
+ * is useful for sanity checks within tests that a ZAP wasn't unexpectedly
+ * upgraded during the test.
+ */
+static bool
+mock_zap_is_params(dnode_t *dn, const MunitParameter params[],
+    const char *key)
+{
+	const char *type = munit_parameters_get(params, key);
+	if (type == NULL)
+		munit_error("mock_zap_is_params: missing type param");
+	else if (strcmp(type, "micro") == 0)
+		return (mock_zap_is_microzap(dn));
+	else if (strcmp(type, "fat") == 0)
+		return (mock_zap_is_fatzap(dn));
+	else
+		munit_errorf("mock_zap_is_params: invalid type '%s'", type);
+	__builtin_unreachable();
+}
+
+/* ========== */
+
+/*
+ * Sanity checks for mock ZAPs. Ensures that the mock_zap_create_* functions
+ * really do create the right kind of ZAPs, since many of the tests need to
+ * run against both kinds to confirm that they all work the same way.
+ */
+static MunitResult
+test_mock_microzap_sanity(const MunitParameter params[], void *data)
+{
+	(void) params, (void) data;
+
+	dnode_t *dn = mock_zap_create_microzap();
+	unit_true(mock_zap_is_microzap(dn));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+static MunitResult
+test_mock_fatzap_sanity(const MunitParameter params[], void *data)
+{
+	(void) params, (void) data;
+
+	dnode_t *dn = mock_zap_create_fatzap();
+	unit_true(mock_zap_is_fatzap(dn));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* ========== */
+
+/*
+ * A simple add, lookup and remove test. Confirms basic operation. These are
+ * tested together simply because all other tests rely on these primitives.
+ */
+static MunitResult
+test_zap_basic(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/* Insert a few entries. */
+	uint64_t val42 = 42;
+	uint64_t val99 = 99;
+	uint64_t val0  = 0;
+
+	unit_ok(zap_add_by_dnode(dn, "hello",
+	    sizeof (uint64_t), 1, &val42, tx));
+	unit_ok(zap_add_by_dnode(dn, "world",
+	    sizeof (uint64_t), 1, &val99, tx));
+	unit_ok(zap_add_by_dnode(dn, "zero",
+	    sizeof (uint64_t), 1, &val0, tx));
+
+	/* Lookup each entry. */
+	uint64_t result = 0;
+	unit_ok(zap_lookup_by_dnode(dn, "hello",
+	    sizeof (uint64_t), 1, &result));
+	unit_eq(result, 42);
+
+	unit_ok(zap_lookup_by_dnode(dn, "world",
+	    sizeof (uint64_t), 1, &result));
+	unit_eq(result, 99);
+
+	unit_ok(zap_lookup_by_dnode(dn, "zero",
+	    sizeof (uint64_t), 1, &result));
+	unit_eq(result, 0);
+
+	/* Non-existent key should return ENOENT. */
+	unit_err(zap_lookup_by_dnode(dn, "nope",
+	    sizeof (uint64_t), 1, &result), ENOENT);
+
+	/* Removing an entry should make it impossible to look up. */
+	unit_ok(zap_remove_by_dnode(dn, "world", tx));
+	unit_err(zap_lookup_by_dnode(dn, "world",
+	    sizeof (uint64_t), 1, &result), ENOENT);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* ========== */
+
+/*
+ * "Core" ZAP API tests. Covers the most basic functionality upon which which
+ * everything else is built.
+ *
+ * Note that to avoid microzap upgrade here, we only short keys and
+ * single-uint64 values.
+ */
+
+/* zap_add: add new items. */
+static MunitResult
+test_zap_add(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/* A key added can be found by that name. */
+	uint64_t va = 1, var = 0;
+	unit_ok(zap_add_by_dnode(dn, "a", sizeof (uint64_t), 1, &va, tx));
+	unit_ok(zap_lookup_by_dnode(dn, "a", sizeof (uint64_t), 1, &var));
+	unit_eq(var, 1);
+
+	/* Another key added can be found by that name. */
+	uint64_t vb = 2, vbr = 0;
+	unit_ok(zap_add_by_dnode(dn, "b", sizeof (uint64_t), 1, &vb, tx));
+	unit_ok(zap_lookup_by_dnode(dn, "b", sizeof (uint64_t), 1, &vbr));
+	unit_eq(vbr, 2);
+
+	/* The first key is still findable with the right value. */
+	var = 0;
+	unit_ok(zap_lookup_by_dnode(dn, "a", sizeof (uint64_t), 1, &var));
+	unit_eq(var, 1);
+
+	/* Adding the key again fails. */
+	unit_err(zap_add_by_dnode(dn, "a",
+	    sizeof (uint64_t), 1, &va, tx), EEXIST);
+
+	/* Adding the key with a different value still fails. */
+	va = 2;
+	unit_err(zap_add_by_dnode(dn, "a",
+	    sizeof (uint64_t), 1, &va, tx), EEXIST);
+
+	/* And is still findable with the original value. */
+	var = 0;
+	unit_ok(zap_lookup_by_dnode(dn, "a", sizeof (uint64_t), 1, &var));
+	unit_eq(var, 1);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* zap_update: add new or replace existing items. */
+static MunitResult
+test_zap_update(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/* Update on a non-existent key inserts it. */
+	uint64_t va = 1, var = 0;
+	unit_ok(zap_update_by_dnode(dn, "a", sizeof (uint64_t), 1, &va, tx));
+	unit_ok(zap_lookup_by_dnode(dn, "a", sizeof (uint64_t), 1, &var));
+	unit_eq(var, 1);
+
+	/* Update on an existing key replaces it without error. */
+	va = 2;
+	unit_ok(zap_update_by_dnode(dn, "a", sizeof (uint64_t), 1, &va, tx));
+	unit_ok(zap_lookup_by_dnode(dn, "a", sizeof (uint64_t), 1, &var));
+	unit_eq(var, 2);
+
+	/* Count should still be 1 (no duplicate was created). */
+	uint64_t count = 0;
+	unit_ok(zap_count_by_dnode(dn, &count));
+	unit_eq(count, 1);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* zap_remove: remove existing items. */
+static MunitResult
+test_zap_remove(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/* Removing a non-existing key fails. */
+	unit_err(zap_remove_by_dnode(dn, "a", tx), ENOENT);
+
+	/* Adding two keys. */
+	uint64_t va = 1, vb = 2;
+	unit_ok(zap_add_by_dnode(dn, "a", sizeof (uint64_t), 1, &va, tx));
+	unit_ok(zap_add_by_dnode(dn, "b", sizeof (uint64_t), 1, &vb, tx));
+
+	/* Remove an existing key succeeds. */
+	unit_ok(zap_remove_by_dnode(dn, "a", tx));
+
+	/* After removing, looking up removed key fails. */
+	uint64_t var = 0;
+	unit_err(
+	    zap_lookup_by_dnode(dn, "a", sizeof (uint64_t), 1, &var), ENOENT);
+
+	/* Looking up the other key succeeds, and has the correct value. */
+	uint64_t vbr = 0;
+	unit_ok(zap_lookup_by_dnode(dn, "b", sizeof (uint64_t), 1, &vbr));
+	unit_eq(vbr, 2);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* zap_count: number of entries, typically without lookup or traversal. */
+static MunitResult
+test_zap_count(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/* A new ZAP has zero entries. */
+	uint64_t count = 0;
+	unit_ok(zap_count_by_dnode(dn, &count));
+	unit_eq(count, 0);
+
+	/* Adding two keys bumps the count to 2. */
+	uint64_t v = 1;
+	unit_ok(zap_add_by_dnode(dn, "a", sizeof (uint64_t), 1, &v, tx));
+	unit_ok(zap_add_by_dnode(dn, "b", sizeof (uint64_t), 1, &v, tx));
+	unit_ok(zap_count_by_dnode(dn, &count));
+	unit_eq(count, 2);
+
+	/* Removing a key reduces the count. */
+	unit_ok(zap_remove_by_dnode(dn, "a", tx));
+	unit_ok(zap_count_by_dnode(dn, &count));
+	unit_eq(count, 1);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* zap_contains: existence check without reading the value. */
+static MunitResult
+test_zap_contains(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	uint64_t v = 1;
+	unit_ok(zap_add_by_dnode(dn, "a", sizeof (uint64_t), 1, &v, tx));
+	unit_ok(zap_contains_by_dnode(dn, "a"));
+	unit_err(zap_contains_by_dnode(dn, "b"), ENOENT);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* zap_length: item metadata without reading the value. */
+static MunitResult
+test_zap_length(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/* uint64: integer_size=8, num_integers=1. */
+	uint64_t v = 42;
+	unit_ok(zap_add_by_dnode(dn, "u64",
+	    sizeof (uint64_t), 1, &v, tx));
+
+	uint64_t isz = 0, nint = 0;
+	unit_ok(zap_length_by_dnode(dn, "u64", &isz, &nint));
+	unit_eq(isz, 8);
+	unit_eq(nint, 1);
+
+	/* Missing key returns ENOENT. */
+	unit_err(zap_length_by_dnode(dn, "nope", &isz, &nint), ENOENT);
+
+	/* Either output pointer may be NULL. */
+	isz = 0; nint = 0;
+	unit_ok(zap_length_by_dnode(dn, "u64", NULL, &nint));
+	unit_ok(zap_length_by_dnode(dn, "u64", &isz, NULL));
+	unit_eq(isz, 8);
+	unit_eq(nint, 1);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* zap_increment: add integer value to existing integer */
+static MunitResult
+test_zap_increment(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	uint64_t r = 0;
+
+	/* Increment a missing key creates it with that value. */
+	unit_ok(zap_increment_by_dnode(dn, "a", 5, tx));
+	unit_ok(zap_lookup_by_dnode(dn, "a", sizeof (uint64_t), 1, &r));
+	unit_eq(r, 5);
+
+	/* Further increments accumulate. */
+	unit_ok(zap_increment_by_dnode(dn, "a", 3, tx));
+	unit_ok(zap_lookup_by_dnode(dn, "a", sizeof (uint64_t), 1, &r));
+	unit_eq(r, 8);
+
+	/* Decrement works. */
+	unit_ok(zap_increment_by_dnode(dn, "a", -2, tx));
+	unit_ok(zap_lookup_by_dnode(dn, "a", sizeof (uint64_t), 1, &r));
+	unit_eq(r, 6);
+
+	/* Zero delta leaves it unchanged. */
+	r = 0;
+	unit_ok(zap_increment_by_dnode(dn, "a", 0, tx));
+	unit_ok(zap_lookup_by_dnode(dn, "a", sizeof (uint64_t), 1, &r));
+	unit_eq(r, 6);
+
+	/* Decrementing to zero removes the entry. */
+	unit_ok(zap_increment_by_dnode(dn, "a", -6, tx));
+	unit_err(zap_lookup_by_dnode(dn, "a",
+	    sizeof (uint64_t), 1, &r), ENOENT);
+
+	/* Delta of zero is a no-op even for a missing key. */
+	unit_ok(zap_increment_by_dnode(dn, "a", 0, tx));
+	unit_err(zap_lookup_by_dnode(dn, "a",
+	    sizeof (uint64_t), 1, &r), ENOENT);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* ========== */
+
+/*
+ * zap_add_int/zap_remove_int/zap_lookup_int: single uint64_t value,
+ * stringified to form the key.
+ */
+static MunitResult
+test_zap_int(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/* Add some ints. */
+	unit_ok(zap_add_int_by_dnode(dn, 5, tx));
+	unit_ok(zap_add_int_by_dnode(dn, 17, tx));
+
+	/* Confirm they're there. */
+	unit_ok(zap_lookup_int_by_dnode(dn, 17));
+	unit_ok(zap_lookup_int_by_dnode(dn, 5));
+
+	/* But not something we didn't add. */
+	unit_err(zap_lookup_int_by_dnode(dn, 23), ENOENT);
+
+	/* Adding something that already exists fails. */
+	unit_err(zap_add_int_by_dnode(dn, 17, tx), EEXIST);
+
+	/* Removing it works, and then it can't be found. */
+	unit_ok(zap_remove_int_by_dnode(dn, 17, tx));
+	unit_err(zap_lookup_int_by_dnode(dn, 17), ENOENT);
+
+	/* Add it can be added back. */
+	unit_ok(zap_add_int_by_dnode(dn, 17, tx));
+	unit_ok(zap_lookup_int_by_dnode(dn, 17));
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* zap_*_int_key: like zap_*_int, but with separate value. */
+static MunitResult
+test_zap_int_keys(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/* Add some ints. */
+	unit_ok(zap_add_int_key_by_dnode(dn, 5, 17, tx));
+	unit_ok(zap_add_int_key_by_dnode(dn, 23, 35, tx));
+
+	/* Confirm they're there. */
+	uint64_t r = 0;
+	unit_ok(zap_lookup_int_key_by_dnode(dn, 5, &r));
+	unit_eq(r, 17);
+	unit_ok(zap_lookup_int_key_by_dnode(dn, 23, &r));
+	unit_eq(r, 35);
+
+	/* But not something we didn't add. */
+	unit_err(zap_lookup_int_key_by_dnode(dn, 79, &r), ENOENT);
+
+	/* Adding something that already exists fails. */
+	unit_err(zap_add_int_key_by_dnode(dn, 23, 51, tx), EEXIST);
+
+	/* Updating it works though. */
+	unit_ok(zap_update_int_key_by_dnode(dn, 23, 51, tx));
+
+	/* Removing it works, and then it can't be found. */
+	unit_ok(zap_remove_int_by_dnode(dn, 23, tx));
+	unit_err(zap_lookup_int_key_by_dnode(dn, 23, &r), ENOENT);
+
+	/* Add it can be added back. */
+	unit_ok(zap_add_int_key_by_dnode(dn, 23, 11, tx));
+	unit_ok(zap_lookup_int_key_by_dnode(dn, 23, &r));
+	unit_eq(r, 11);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* ========== */
+
+/*
+ * Separate stats tests for each ZAP type, since they are about internals and
+ * so can and will produce different results.
+ */
+
+static MunitResult
+test_microzap_stats(const MunitParameter params[], void *data)
+{
+	(void) params; (void) data;
+
+	dnode_t *dn = mock_zap_create_microzap();
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	zap_stats_t zs;
+	uint64_t v = 1;
+	unit_ok(zap_add_by_dnode(dn, "a", sizeof (uint64_t), 1, &v, tx));
+	unit_ok(zap_add_by_dnode(dn, "b", sizeof (uint64_t), 1, &v, tx));
+	unit_ok(zap_get_stats_by_dnode(dn, &zs));
+
+	/* We added two entries. */
+	unit_eq(zs.zs_num_entries, 2);
+
+	/* MicroZAP is always a single block. */
+	unit_eq(zs.zs_num_blocks, 1);
+
+	/* Blocksize matches what we passed to mock_dnode_create(). */
+	unit_eq(zs.zs_blocksize, 512);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_microzap(dn));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+static MunitResult
+test_fatzap_stats(const MunitParameter params[], void *data)
+{
+	(void) params; (void) data;
+
+	dnode_t *dn = mock_zap_create_fatzap();
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	zap_stats_t zs;
+	uint64_t v = 1;
+	unit_ok(zap_add_by_dnode(dn, "a", sizeof (uint64_t), 1, &v, tx));
+	unit_ok(zap_add_by_dnode(dn, "b", sizeof (uint64_t), 1, &v, tx));
+	unit_ok(zap_get_stats_by_dnode(dn, &zs));
+
+	/* We added two entries. */
+	unit_eq(zs.zs_num_entries, 2);
+
+	/* One header block, one leaf block. */
+	unit_eq(zs.zs_num_blocks, 2);
+
+	/* FatZAP block size set by tuneable. */
+	unit_eq(zs.zs_blocksize, 1 << fzap_default_block_shift);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_fatzap(dn));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* ========== */
+
+/* Cursor tests. */
+
+/*
+ * Basic cursor test. Add a bunch of keys+values to a ZAP, read them back
+ * via cursor, confirm they're all there and nothing else is.
+ */
+static MunitResult
+test_cursor(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/* For each ASCII letter as key, add a unique value to the ZAP. */
+	for (int i = 0; i < 26; i++) {
+		char c = (char)i + 'a';
+		char k[2] = { c, '\0' };
+		uint64_t v = (uint64_t)c * 11;
+		unit_ok(zap_add_by_dnode(dn, k, sizeof (uint64_t), 1, &v, tx));
+	}
+
+	/* Sanity check; confirm they're all there by count. */
+	uint64_t count = 0;
+	unit_ok(zap_count_by_dnode(dn, &count));
+	unit_eq(count, 26);
+
+	zap_cursor_t zc;
+	zap_attribute_t *za = zap_attribute_alloc();
+
+	unit_ok(zap_cursor_init_by_dnode(&zc, dn));
+
+	/*
+	 * Cursors don't guarantee an order, so we run over them them all,
+	 * confirm the key matches the value, and then set a bit for each
+	 * one we've seen. By the end, we should have seen them all.
+	 */
+	uint64_t seen = 0;
+	for (int i = 0; i < 26; i++) {
+		unit_ok(zap_cursor_retrieve(&zc, za));
+
+		/* Confirm attribute has the right details for the value. */
+		unit_eq(za->za_integer_length, sizeof (uint64_t));
+		unit_eq(za->za_num_integers, 1);
+
+		/*
+		 * And the right key in za_name. Note that we don't check
+		 * za_name_len, which is the length of a buffer that can
+		 * definitely hold the key, not the key length itself.
+		 */
+		char c = za->za_name[0];
+		unit_true(c >= 'a' && c <= 'z');
+		unit_zero(za->za_name[1]);
+
+		/* Check the value in the attribute. */
+		uint64_t v = (uint64_t)c * 11;
+		unit_eq(za->za_first_integer, v);
+
+		/*
+		 * Also do a direct lookup and confirm the value matches
+		 * the value from the attribute.
+		 */
+		char k[2] = { c, '\0' };
+		uint64_t result = 0;
+		unit_ok(zap_lookup_by_dnode(dn, k,
+		    sizeof (uint64_t), 1, &result));
+		unit_eq(result, v);
+
+		/* This one is good, set the bit to remember this fact. */
+		seen |= 1 << (c-'a');
+
+		zap_cursor_advance(&zc);
+	}
+
+	/* There should be no more keys in the ZAP. */
+	unit_err(zap_cursor_retrieve(&zc, za), ENOENT);
+
+	/* Bits 0-25 should be set if we've seen them all. */
+	unit_eq(seen, (1 << 26) - 1);
+
+	zap_attribute_free(za);
+	zap_cursor_fini(&zc);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/*
+ * Cursor serialize test. Add a bunch of items, use the cursor to read half of
+ * them back, then serialize the cursor. Reload the cursor from the serialized
+ * state and confirm that we pick up where we left off. Then do it again to
+ * ensure it doesn't rely on any internal state.
+ */
+static MunitResult
+test_cursor_serialize(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/* For each ASCII letter as key, add a unique value to the ZAP. */
+	for (int i = 0; i < 26; i++) {
+		char c = (char)i + 'a';
+		char k[2] = { c, '\0' };
+		uint64_t v = (uint64_t)c * 11;
+		unit_ok(zap_add_by_dnode(dn, k, sizeof (uint64_t), 1, &v, tx));
+	}
+
+	/* Sanity check; confirm they're all there by count. */
+	uint64_t count = 0;
+	unit_ok(zap_count_by_dnode(dn, &count));
+	unit_eq(count, 26);
+
+	/*
+	 * Like test_cursor above, we'll walk over the ZAP and set bits
+	 * for each key we see.
+	 */
+	zap_cursor_t zc;
+	zap_attribute_t *za = zap_attribute_alloc();
+	uint64_t seen = 0;
+
+	unit_ok(zap_cursor_init_by_dnode(&zc, dn));
+	for (int i = 0; i < 13; i++) {
+		unit_ok(zap_cursor_retrieve(&zc, za));
+
+		char c = za->za_name[0];
+		unit_true(c >= 'a' && c <= 'z');
+
+		/* This one is good, set the bit to remember this fact. */
+		seen |= 1 << (c-'a');
+
+		zap_cursor_advance(&zc);
+	}
+
+	/* Serialise the and terminate the cursor. */
+	uint64_t cookie = zap_cursor_serialize(&zc);
+	zap_cursor_fini(&zc);
+
+	/*
+	 * Record the bits we saw in the first iteration; we'll use this
+	 * when we reload the cursor a second time below.
+	 */
+	uint64_t orig_seen = seen;
+
+	/* Reinitialise the cursor from the cookie. */
+	unit_ok(zap_cursor_init_serialized_by_dnode(&zc, dn, cookie));
+
+	/* Loop over the remaining entries and track them. */
+	for (int i = 0; i < 13; i++) {
+		unit_ok(zap_cursor_retrieve(&zc, za));
+
+		char c = za->za_name[0];
+		unit_true(c >= 'a' && c <= 'z');
+
+		/* This one is good, set the bit to remember this fact. */
+		seen |= 1 << (c-'a');
+
+		zap_cursor_advance(&zc);
+	}
+
+	/* There should be no more keys in the ZAP. */
+	unit_err(zap_cursor_retrieve(&zc, za), ENOENT);
+
+	/* Bits 0-25 should be set if we've seen them all. */
+	unit_eq(seen, (1 << 26) - 1);
+
+	/* Cursor done. */
+	zap_cursor_fini(&zc);
+
+	/*
+	 * Restore the seen state to before when we reinitialised the saved
+	 * cursor.
+	 */
+	seen = orig_seen;
+
+	/*
+	 * Do it all again a second time. This is making sure that the saved
+	 * cursor is usable even after the its been "used".
+	 */
+	unit_ok(zap_cursor_init_serialized_by_dnode(&zc, dn, cookie));
+	for (int i = 0; i < 13; i++) {
+		unit_ok(zap_cursor_retrieve(&zc, za));
+
+		char c = za->za_name[0];
+		unit_true(c >= 'a' && c <= 'z');
+
+		seen |= 1 << (c-'a');
+
+		zap_cursor_advance(&zc);
+	}
+
+	unit_err(zap_cursor_retrieve(&zc, za), ENOENT);
+	unit_eq(seen, (1 << 26) - 1);
+
+	zap_attribute_free(za);
+	zap_cursor_fini(&zc);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/*
+ * The following tests confirm that the cursor is properly cleaning up dnode
+ * holds taken (or not) across the lifetime of the cursor. The test is not
+ * about how or when it takes holds, only that the dnode refcount is the
+ * same before zap_cursor_init() as after zap_cursor_fini().
+ */
+static MunitResult
+test_cursor_release_unused(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+
+	uint64_t refcount = mock_dnode_refcount((mock_dnode_t *)dn);
+
+	zap_cursor_t zc;
+	unit_ok(zap_cursor_init_by_dnode(&zc, dn));
+	zap_cursor_fini(&zc);
+
+	unit_eq(refcount, mock_dnode_refcount((mock_dnode_t *)dn));
+
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+static MunitResult
+test_cursor_release_advance(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+
+	uint64_t refcount = mock_dnode_refcount((mock_dnode_t *)dn);
+
+	zap_cursor_t zc;
+	unit_ok(zap_cursor_init_by_dnode(&zc, dn));
+	zap_cursor_advance(&zc);
+	zap_cursor_fini(&zc);
+
+	unit_eq(refcount, mock_dnode_refcount((mock_dnode_t *)dn));
+
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+static MunitResult
+test_cursor_release_empty(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+
+	uint64_t refcount = mock_dnode_refcount((mock_dnode_t *)dn);
+
+	zap_cursor_t zc;
+	zap_attribute_t *za = zap_attribute_alloc();
+
+	unit_ok(zap_cursor_init_by_dnode(&zc, dn));
+	unit_err(zap_cursor_retrieve(&zc, za), ENOENT);
+
+	zap_attribute_free(za);
+	zap_cursor_fini(&zc);
+
+	unit_eq(refcount, mock_dnode_refcount((mock_dnode_t *)dn));
+
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+static MunitResult
+test_cursor_release_one(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	uint64_t v = 1;
+	unit_ok(zap_add_by_dnode(dn, "a", sizeof (uint64_t), 1, &v, tx));
+	unit_ok(zap_add_by_dnode(dn, "b", sizeof (uint64_t), 1, &v, tx));
+
+	uint64_t refcount = mock_dnode_refcount((mock_dnode_t *)dn);
+
+	zap_cursor_t zc;
+	zap_attribute_t *za = zap_attribute_alloc();
+
+	unit_ok(zap_cursor_init_by_dnode(&zc, dn));
+	unit_ok(zap_cursor_retrieve(&zc, za));
+
+	zap_attribute_free(za);
+	zap_cursor_fini(&zc);
+
+	unit_eq(refcount, mock_dnode_refcount((mock_dnode_t *)dn));
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* ========== */
+
+/* zap_value_search: find key with given uint64 value. */
+static MunitResult
+test_zap_value_search(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/* Add some items. */
+	uint64_t v1 = 1, v2 = 2, v3 = 3;
+	unit_ok(zap_add_by_dnode(dn, "one", sizeof (uint64_t), 1, &v1, tx));
+	unit_ok(zap_add_by_dnode(dn, "two", sizeof (uint64_t), 1, &v2, tx));
+	unit_ok(zap_add_by_dnode(dn, "three", sizeof (uint64_t), 1, &v3, tx));
+
+	char name[ZAP_MAXNAMELEN];
+
+	/* Find one of them. */
+	unit_ok(zap_value_search_by_dnode(dn, 2, 0, name, sizeof (name)));
+	unit_str_eq(name, "two");
+
+	/* Nonexistent value. */
+	unit_err(zap_value_search_by_dnode(dn, 10, 0,
+	    name, sizeof (name)), ENOENT);
+
+	/* Buffer too small for the key. */
+	unit_err(zap_value_search_by_dnode(dn, 3, 0, name, 2), ENAMETOOLONG);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* zap_value_search: value masks */
+static MunitResult
+test_zap_value_search_mask(const MunitParameter params[], void *data)
+{
+	(void) data;
+
+	dnode_t *dn = mock_zap_create_params(params, "type");
+	dmu_tx_t *tx = (dmu_tx_t *)mock_tx_create();
+
+	/*
+	 * Add a set of values. These all have the same bottom 16 bits, with
+	 * different upper 48 bits, segmented so we can mask them in different
+	 * and interesting ways.
+	 */
+	uint64_t v1 = 0x000000000000f0f0ull;
+	uint64_t v2 = 0x00000000fffff0f0ull;
+	uint64_t v3 = 0x0000ffff0000f0f0ull;
+	uint64_t v4 = 0xffff00000000f0f0ull;
+
+	/*
+	 * Generate four random keys. We do this because zap_value_search() is
+	 * implemented with a simple cursor walk, so will always return the
+	 * first match in hash order, which with fixed keys will always give
+	 * exactly the same results. Using random keys ensures the test values
+	 * are encountered in different orders between test runs, giving us
+	 * better coverage when there are multiple matches.
+	 */
+
+	char k1[9], k2[9], k3[9], k4[9];
+	unit_rand_str(k1, sizeof (k1));
+	unit_rand_str(k2, sizeof (k2));
+	unit_rand_str(k3, sizeof (k3));
+	unit_rand_str(k4, sizeof (k4));
+
+	unit_ok(zap_add_by_dnode(dn, k1, sizeof (uint64_t), 1, &v1, tx));
+	unit_ok(zap_add_by_dnode(dn, k2, sizeof (uint64_t), 1, &v2, tx));
+	unit_ok(zap_add_by_dnode(dn, k3, sizeof (uint64_t), 1, &v3, tx));
+	unit_ok(zap_add_by_dnode(dn, k4, sizeof (uint64_t), 1, &v4, tx));
+
+	char name[ZAP_MAXNAMELEN];
+
+	/* 0 mask is equivalent to all bits set in mask ie exact match. */
+	unit_ok(zap_value_search_by_dnode(dn,
+	    0xf0f0, 0, name, sizeof (name)));
+	unit_str_eq(name, k1);
+	unit_ok(zap_value_search_by_dnode(dn,
+	    0xf0f0, 0xffffffffffffffffull, name, sizeof (name)));
+	unit_str_eq(name, k1);
+
+	/* Low 16 bits could match any. */
+	unit_ok(zap_value_search_by_dnode(dn,
+	    0xf0f0, 0xffff, name, sizeof (name)));
+
+	/* Low 32 bits, 3/1 matches. */
+	unit_ok(zap_value_search_by_dnode(dn,
+	    0x0000f0f0, 0xffffffff, name, sizeof (name)));
+	unit_true(strcmp(name, k1) == 0 || strcmp(name, k3) == 0 ||
+	    strcmp(name, k4) == 0);
+	unit_ok(zap_value_search_by_dnode(dn,
+	    0xfffff0f0, 0xffffffff, name, sizeof (name)));
+	unit_str_eq(name, k2);
+
+	/* Low 48 bits, 2/1/1 matches */
+	unit_ok(zap_value_search_by_dnode(dn,
+	    0x00000000f0f0ull, 0xffffffffffffull, name, sizeof (name)));
+	unit_true(strcmp(name, k1) == 0 || strcmp(name, k4) == 0);
+	unit_ok(zap_value_search_by_dnode(dn,
+	    0x0000fffff0f0ull, 0xffffffffffffull, name, sizeof (name)));
+	unit_str_eq(name, k2);
+	unit_ok(zap_value_search_by_dnode(dn,
+	    0xffff0000f0f0ull, 0xffffffffffffull, name, sizeof (name)));
+	unit_str_eq(name, k3);
+
+	/* Value doesn't exist directly, but matches when mask applied. */
+	unit_ok(zap_value_search_by_dnode(dn,
+	    0xffffffff, 0xffff0000, name, sizeof (name)));
+	unit_str_eq(name, k2);
+
+	mock_tx_destroy((mock_dmu_tx_t *)tx);
+	unit_true(mock_zap_is_params(dn, params, "type"));
+	mock_zap_destroy(dn);
+
+	return (MUNIT_OK);
+}
+
+/* ========== */
+
+/* Test suite definition and boilerplate. */
+
+#define	UNIT_PARAM_ZAP_TYPES(p)	\
+	UNIT_PARAM((p), "micro", "fat")
+
+static const MunitParameterEnum zap_type_params[] = {
+	UNIT_PARAM_ZAP_TYPES("type"),
+	{ 0 },
+};
+
+#define	UNIT_TEST_ZAP_TYPES(name, func)	\
+	UNIT_TEST(name, func, zap_type_params)
+
+static const MunitTest zap_tests[] = {
+	UNIT_TEST("mock_microzap_sanity",	test_mock_microzap_sanity),
+	UNIT_TEST("mock_fatzap_sanity",		test_mock_fatzap_sanity),
+
+	UNIT_TEST_ZAP_TYPES("zap_basic",	test_zap_basic),
+
+	UNIT_TEST_ZAP_TYPES("zap_add",		test_zap_add),
+	UNIT_TEST_ZAP_TYPES("zap_update",	test_zap_update),
+	UNIT_TEST_ZAP_TYPES("zap_remove",	test_zap_remove),
+	UNIT_TEST_ZAP_TYPES("zap_count",	test_zap_count),
+	UNIT_TEST_ZAP_TYPES("zap_contains",	test_zap_contains),
+	UNIT_TEST_ZAP_TYPES("zap_length",	test_zap_length),
+
+	UNIT_TEST_ZAP_TYPES("zap_increment",	test_zap_increment),
+
+	UNIT_TEST_ZAP_TYPES("zap_int",		test_zap_int),
+	UNIT_TEST_ZAP_TYPES("zap_int_keys",	test_zap_int_keys),
+
+	UNIT_TEST("microzap_stats",		test_microzap_stats),
+	UNIT_TEST("fatzap_stats",		test_fatzap_stats),
+
+	UNIT_TEST_ZAP_TYPES("cursor",		test_cursor),
+	UNIT_TEST_ZAP_TYPES("cursor_serialize",	test_cursor_serialize),
+
+	UNIT_TEST_ZAP_TYPES(
+	    "cursor_release_unused",	test_cursor_release_unused),
+	UNIT_TEST_ZAP_TYPES(
+	    "cursor_release_advance",	test_cursor_release_advance),
+	UNIT_TEST_ZAP_TYPES(
+	    "cursor_release_empty",	test_cursor_release_empty),
+	UNIT_TEST_ZAP_TYPES(
+	    "cursor_release_one",	test_cursor_release_one),
+
+	UNIT_TEST_ZAP_TYPES(
+	    "zap_value_search",		test_zap_value_search),
+	UNIT_TEST_ZAP_TYPES(
+	    "zap_value_search_mask",	test_zap_value_search_mask),
+
+	{ 0 },
+};
+
+static const MunitSuite zap_test_suite = {
+	"zap.",
+	zap_tests,
+	NULL,
+	1,
+	MUNIT_SUITE_OPTION_NONE,
+};
+
+int
+main(int argc, char **argv)
+{
+	mock_crc64_init();
+
+	zap_init();
+
+	int rc = munit_suite_main(&zap_test_suite, NULL, argc, argv);
+
+	zap_fini();
+
+	return (rc);
+}
diff --git a/sys/contrib/openzfs/tests/unit/unit.c b/sys/contrib/openzfs/tests/unit/unit.c
new file mode 100644
index 00000000000..3dd2e7de5d5
--- /dev/null
+++ b/sys/contrib/openzfs/tests/unit/unit.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2026, TrueNAS.
+ */
+
+/* Core stubs, applicable to all test suites. */
+
+#include <stdio.h>
+#include <stdarg.h>
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/zfs_debug.h>
+
+#include "munit.h"
+#include "unit.h"
+
+/*
+ * SET_ERROR() expands to __set_error() in debug builds. It's an
+ * under-the-hood tracing aid in production; a no-op is fine.
+ */
+void
+__set_error(const char *file, const char *func, int line, int err)
+{
+	(void) file; (void) func; (void) line; (void) err;
+}
+
+/* Plumb logging and debug into munit for convenience. */
+
+/* dprintf() checks zfs_flags and calls __dprintf() in debug builds. */
+int zfs_dbgmsg_enable = 1;
+int zfs_flags = ZFS_DEBUG_DPRINTF;
+
+/* Log dprintf() to MUNIT_LOG_DEBUG. */
+void
+__dprintf(boolean_t dprint, const char *file, const char *func,
+    int line, const char *fmt, ...)
+{
+	char buf[1024];
+
+	va_list ap;
+	va_start(ap, fmt);
+	vsnprintf(buf, sizeof (buf), fmt, ap);
+	va_end(ap);
+
+	munit_logf_ex(MUNIT_LOG_DEBUG, NULL, 0, "%s%s:%d [%s]: %s",
+	    dprint ? "dprintf: " : "", file, line, func, buf);
+}
+
+/* Log cmn_err() to MUNIT_LOG_INFO or WARNING, abort test on CE_PANIC. */
+void
+cmn_err(int ce, const char *fmt, ...)
+{
+	if (ce == CE_IGNORE)
+		return;
+
+	char buf[1024];
+
+	va_list ap;
+	va_start(ap, fmt);
+	vsnprintf(buf, sizeof (buf), fmt, ap);
+	va_end(ap);
+
+	switch (ce) {
+	case CE_WARN:
+		munit_logf_ex(MUNIT_LOG_WARNING, NULL, 0, "%s", buf);
+		break;
+	case CE_PANIC:
+		munit_errorf_ex(NULL, 0, "PANIC: %s", buf);
+		break;
+	default:
+		munit_logf_ex(MUNIT_LOG_INFO, NULL, 0, "%s", buf);
+		break;
+	}
+}
+
+/* helpers to generate useful random data */
+uint64_t
+unit_rand_uint64(void)
+{
+	uint64_t v =
+	    (((uint64_t)munit_rand_uint32()) << 32) |
+	    ((uint64_t)munit_rand_uint32());
+	return (v);
+}
+
+char *
+unit_rand_str(char *buf, size_t bufsz)
+{
+	for (int i = 0; i < bufsz-1; i++)
+		buf[i] = munit_rand_int_range('a', 'z');
+	buf[bufsz-1] = '\0';
+	return (buf);
+}
diff --git a/sys/contrib/openzfs/tests/unit/unit.h b/sys/contrib/openzfs/tests/unit/unit.h
new file mode 100644
index 00000000000..a8c23da4118
--- /dev/null
+++ b/sys/contrib/openzfs/tests/unit/unit.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: CDDL-1.0
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2026, TrueNAS.
+ */
+
+#ifndef UNIT_H
+#define	UNIT_H
+
+#include "munit.h"
+
+/* test/suite definition helpers */
+
+/* single element in a MunitTest array */
+#define	_UNIT_TEST(name, func, params, ...)				\
+	{ (name), (func), NULL, NULL, MUNIT_TEST_OPTION_NONE,	\
+	(MunitParameterEnum*)(params)  }
+#define	UNIT_TEST(name, func, ...)				\
+	_UNIT_TEST(name, func, ##__VA_ARGS__, NULL)
+
+/* single element in a MunitParameterEnum array */
+#define	UNIT_PARAM(name, ...)	\
+	{ (char *)(name), (char **)(const char *[]) { __VA_ARGS__, NULL } }
+
+/* shortcut for truthy tests */
+#define	unit_true(a)	munit_assert_true(a)
+#define	unit_false(a)	munit_assert_false(a)
+
+/* shortcut for zero test */
+#define	unit_zero(a)	munit_assert_uint64((a), ==, 0)
+
+/* shortcuts for integer comparisons */
+#define	_unit_op(a, op, b)	munit_assert_uint64((a), op, (b))
+
+#define	unit_eq(a, b)	_unit_op((a), ==, (b))
+#define	unit_ne(a, b)	_unit_op((a), !=, (b))
+#define	unit_le(a, b)	_unit_op((a), <=, (b))
+#define	unit_ge(a, b)	_unit_op((a), >=, (b))
+#define	unit_lt(a, b)	_unit_op((a), <,  (b))
+#define	unit_gt(a, b)	_unit_op((a), >,  (b))
+
+/* shortcuts for string comparisons */
+#define	unit_str_eq(a, b)	munit_assert_string_equal(a, b)
+#define	unit_str_ne(a, b)	munit_assert_string_not_equal(a, b)
+
+/* shortcuts for error-returning function call */
+#define	unit_ok(a)	munit_assert_int((a), ==, 0)
+#define	unit_err(a, e)	munit_assert_int((a), ==, (e))
+
+/* helpers to generate useful random data */
+extern uint64_t unit_rand_uint64(void);
+extern char *unit_rand_str(char *buf, size_t bufsz);
+
+#endif /* UNIT_H */
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
index 9f92310985e..3275c1358aa 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am
@@ -35,7 +35,6 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/crypto_test
 %C%_crypto_test_LDADD = libzpool.la
 
 scripts_zfs_tests_bin_PROGRAMS += %D%/clone_after_trunc
-%C%_clone_after_trunc_LDADD = -lpthread
 
 if WANT_DEVNAME2DEVID
 scripts_zfs_tests_bin_PROGRAMS += %D%/devname2devid
@@ -71,7 +70,6 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/libzfs_mnttab_cache_check
 	libzfs.la
 
 scripts_zfs_tests_bin_PROGRAMS += %D%/manipulate_user_buffer
-%C%_manipulate_user_buffer_LDADD = -lpthread
 
 scripts_zfs_tests_bin_PROGRAMS += %D%/mkbusy %D%/mkfile %D%/mkfiles %D%/mktree
 %C%_mkfile_LDADD = $(LTLIBINTL)
@@ -80,7 +78,6 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/mkbusy %D%/mkfile %D%/mkfiles %D%/mktree
 scripts_zfs_tests_bin_PROGRAMS += \
 	%D%/mmap_exec %D%/mmap_ftruncate %D%/mmap_seek \
 	%D%/mmap_sync %D%/mmapwrite %D%/readmmap %D%/mmap_write_sync
-%C%_mmapwrite_LDADD = -lpthread
 
 if WANT_MMAP_LIBAIO
 scripts_zfs_tests_bin_PROGRAMS += %D%/mmap_libaio
@@ -95,7 +92,6 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/nvlist_to_lua
 	libnvpair.la
 
 scripts_zfs_tests_bin_PROGRAMS += %D%/rm_lnkcnt_zero_file
-%C%_rm_lnkcnt_zero_file_LDADD = -lpthread
 
 scripts_zfs_tests_bin_PROGRAMS += %D%/send_doall
 %C%_send_doall_LDADD = \
@@ -107,7 +103,6 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/stride_dd
 %C%_stride_dd_LDADD = -lrt
 
 scripts_zfs_tests_bin_PROGRAMS += %D%/threadsappend
-%C%_threadsappend_LDADD = -lpthread
 
 scripts_zfs_tests_bin_PROGRAMS += %D%/ereports
 %C%_ereports_LDADD = \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/libzfs_input_check.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/libzfs_input_check.c
index 4ef249bbd4a..8f7e36d9efa 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/libzfs_input_check.c
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/libzfs_input_check.c
@@ -85,7 +85,6 @@ static const zfs_ioc_t ioc_skip[] = {
 	ZFS_IOC_DSOBJ_TO_DSNAME,
 	ZFS_IOC_OBJ_TO_PATH,
 	ZFS_IOC_POOL_SET_PROPS,
-	ZFS_IOC_POOL_GET_PROPS,
 	ZFS_IOC_SET_FSACL,
 	ZFS_IOC_GET_FSACL,
 	ZFS_IOC_SHARE,
@@ -125,11 +124,136 @@ static const zfs_ioc_t ioc_skip[] = {
 		lzc_ioctl_test(ioc, name, req, opt, err, wild);	\
 	} while (0)
 
+#define	IOC_INPUT_TEST_INJECT(ioc, name, innvl)			\
+	do {							\
+		active_test = __func__ + 5;			\
+		lzc_ioctl_run_impl(ioc, name, innvl, 0, B_TRUE);	\
+	} while (0)
+
+/*
+ * Given a zfs_cmd_t containing an already packed nvlist in zc->zc_nvlist_src,
+ * and its original innvl, look in innvl for the last string nvpair, or last
+ * string array nvpair, and remove the string terminator.  The idea is to
+ * corrupt the nvlist string value so that anyone doing a strlen() on it will
+ * read past the end of the packed nvlist buffer and trigger a crash.
+ */
+static void
+do_bad_string(zfs_cmd_t *zc, nvlist_t *innvl)
+{
+	nvpair_t *elem = NULL;
+	nvpair_t *lastseen = NULL;
+	const char *str = NULL;
+	const char **arr;
+	uint_t n;
+	char *off;
+	char *packed;
+	uint64_t size, off_size;
+
+	while ((elem = nvlist_next_nvpair(innvl, elem)) != NULL) {
+		if ((nvpair_type(elem) == DATA_TYPE_STRING) ||
+		    (nvpair_type(elem) == DATA_TYPE_STRING_ARRAY))
+			lastseen = elem;
+	}
+
+	if (lastseen == NULL)
+		return;	/* No strings */
+
+	/*
+	 * Lookup either the last string, or the last string in the last
+	 * string array in the nvlist.  We will use this to corrupt from the
+	 * string to the end of the nvlist buffer.  Any attempts to strlen this
+	 * string should run pass the end of the packed buffer.
+	 */
+	if (nvpair_value_string(lastseen, &str) != 0) {
+		if (nvpair_value_string_array(lastseen, &arr, &n) == 0)
+			str = arr[n-1];
+	}
+
+	/*
+	 * We now have the last string.  Corrupt everything from the NULL
+	 * terminator byte for the last string to the end of the packed nvlist
+	 * buffer.
+	 */
+	packed = (char *)zc->zc_nvlist_src;
+	size = zc->zc_nvlist_src_size;
+
+	off = memmem(packed, size, str, strlen(str));
+	off_size = strlen(str);
+
+	memset(&off[off_size - 1], '!', (packed + size) -
+	    (&off[off_size - 1]));
+
+}
+
+/*
+ * For each byte in the packed nvlist list in zc, corrupt a single byte, then
+ * try doing the ioctl.  This tests how well the kernel handles fuzzed nvlists.
+ *
+ * NOTE - make sure you are doing this with a "safe" ioctl!  You don't want to
+ * run this on an ioctl that can potentially corrupt data (like a zpool create).
+ */
+static void
+do_fuzz(int zfs_fd, zfs_ioc_t ioc, zfs_cmd_t *zc)
+{
+	uint64_t size;
+	uint64_t i;
+	unsigned char old = 0;
+	unsigned char *pos;
+	zfs_cmd_t orig_zc = *zc;
+
+	pos = (unsigned char *) zc->zc_nvlist_src;
+	size = zc->zc_nvlist_src_size;
+
+	/*
+	 * Fuzz each byte in the packed nvlist, one byte at a time, and do the
+	 * ioctl.  If the kernel doesn't crash, then the test passed.
+	 */
+	for (i = 0; i < size; i++) {
+		/* Restore the previously corrupted byte */
+		if (i > 0)
+			pos[i-1] = old;
+
+		old = pos[i];
+
+		/* Corrupt the new byte */
+		pos[i]++;
+
+		/*
+		 * Do the ioctl and ignore the return code.  We just want to
+		 * see if the kernel panics.
+		 */
+		lzc_ioctl_fd(zfs_fd, ioc, zc);
+
+		/*
+		 * Restore 'zc' with original fields since the ioctl may
+		 * have modified them.
+		 */
+		*zc = orig_zc;
+	}
+	/* Restore last byte */
+	if (i > 0)
+		pos[i - 1] = old;
+
+	/*
+	 * Try fuzzing the packed nvlist size field.  Test it with one byte
+	 * bigger and one byte smaller than the current value.
+	 */
+	zc->zc_nvlist_src_size--;
+	lzc_ioctl_fd(zfs_fd, ioc, zc);
+
+	zc->zc_nvlist_src_size += 2;
+	lzc_ioctl_fd(zfs_fd, ioc, zc);
+
+	/* Restore to normal */
+	zc->zc_nvlist_src_size -= 1;
+}
+
 /*
  * run a zfs ioctl command, verify expected results and log failures
  */
 static void
-lzc_ioctl_run(zfs_ioc_t ioc, const char *name, nvlist_t *innvl, int expected)
+lzc_ioctl_run_impl(zfs_ioc_t ioc, const char *name, nvlist_t *innvl,
+    int expected, boolean_t do_corrupt)
 {
 	zfs_cmd_t zc = {"\0"};
 	char *packed = NULL;
@@ -160,10 +284,30 @@ lzc_ioctl_run(zfs_ioc_t ioc, const char *name, nvlist_t *innvl, int expected)
 	zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
 	zc.zc_nvlist_dst = (uint64_t)(uintptr_t)malloc(zc.zc_nvlist_dst_size);
 
+	if (do_corrupt) {
+		/*
+		 * Try changing bytes in the packed nvlist to see if it will
+		 * panic the kernel when you do the ioctl.
+		 */
+		do_fuzz(zfs_fd, ioc, &zc);
+
+		/*
+		 * Corrupt the last string in the packed nvlist so it has no
+		 * NULL terminator.
+		 */
+		do_bad_string(&zc, innvl);
+
+	}
+
 	if (lzc_ioctl_fd(zfs_fd, ioc, &zc) != 0)
 		error = errno;
 
-	if (error != expected) {
+	/*
+	 * If we're corrupting the nvlist we don't care about the specific
+	 * error code that gets returned, as it could be one of many.  We only
+	 * care if it panics the kernel.
+	 */
+	if (!do_corrupt && error != expected) {
 		unexpected_failures = B_TRUE;
 		(void) fprintf(stderr, "%s: Unexpected result with %s, "
 		    "error %d (expecting %d)\n",
@@ -174,6 +318,12 @@ lzc_ioctl_run(zfs_ioc_t ioc, const char *name, nvlist_t *innvl, int expected)
 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
 }
 
+static void
+lzc_ioctl_run(zfs_ioc_t ioc, const char *name, nvlist_t *innvl, int expected)
+{
+	return (lzc_ioctl_run_impl(ioc, name, innvl, expected, B_FALSE));
+}
+
 /*
  * Test each ioc for the following ioctl input errors:
  *   ZFS_ERR_IOC_ARG_UNAVAIL	an input argument is not supported by kernel
@@ -310,6 +460,7 @@ test_log_history(const char *pool)
 	fnvlist_add_string(required, "message", "input check");
 
 	IOC_INPUT_TEST(ZFS_IOC_LOG_HISTORY, pool, required, NULL, 0);
+	IOC_INPUT_TEST_INJECT(ZFS_IOC_LOG_HISTORY, pool, required);
 
 	nvlist_free(required);
 }
@@ -791,6 +942,20 @@ test_set_bootenv(const char *pool)
 	nvlist_free(required);
 }
 
+static void
+test_zpool_get(const char *pool)
+{
+	const char *strs[] = {ZPOOL_DEDUPCACHED_PROP_NAME};
+	nvlist_t *optional = fnvlist_alloc();
+
+	fnvlist_add_string_array(optional, ZPOOL_GET_PROPS_NAMES, strs, 1);
+
+	IOC_INPUT_TEST(ZFS_IOC_POOL_GET_PROPS, pool, NULL, optional, 0);
+	IOC_INPUT_TEST_INJECT(ZFS_IOC_POOL_GET_PROPS, pool, optional);
+
+	nvlist_free(optional);
+}
+
 static void
 zfs_ioc_input_tests(const char *pool)
 {
@@ -885,6 +1050,7 @@ zfs_ioc_input_tests(const char *pool)
 
 	test_scrub(pool);
 
+	test_zpool_get(pool);
 	/*
 	 * cleanup
 	 */
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
index c4bcfea5595..c7931ca95e2 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -376,6 +376,8 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
 	functional/rsend/rsend.kshlib \
 	functional/scrub_mirror/default.cfg \
 	functional/scrub_mirror/scrub_mirror_common.kshlib \
+	functional/send_xdr_encoding/send_xdr_encoding.cfg \
+	functional/send_xdr_encoding/send_xdr_encoding.kshlib \
 	functional/slog/slog.cfg \
 	functional/slog/slog.kshlib \
 	functional/snapshot/snapshot.cfg \
@@ -434,6 +436,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/alloc_class/alloc_class_011_neg.ksh \
 	functional/alloc_class/alloc_class_012_pos.ksh \
 	functional/alloc_class/alloc_class_013_pos.ksh \
+	functional/alloc_class/alloc_class_014_pos.ksh \
+	functional/alloc_class/alloc_class_015_neg.ksh \
 	functional/alloc_class/alloc_class_016_pos.ksh \
 	functional/alloc_class/cleanup.ksh \
 	functional/alloc_class/setup.ksh \
@@ -648,6 +652,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/cli_root/zdb/zdb_encrypted.ksh \
 	functional/cli_root/zdb/zdb_encrypted_raw.ksh \
 	functional/cli_root/zdb/zdb_label_checksum.ksh \
+	functional/cli_root/zdb/zdb_file_layout_001.ksh \
+	functional/cli_root/zdb/zdb_file_layout_002.ksh \
+	functional/cli_root/zdb/zdb_file_layout_003.ksh \
+	functional/cli_root/zdb/zdb_file_layout_neg.ksh \
 	functional/cli_root/zdb/zdb_object_range_neg.ksh \
 	functional/cli_root/zdb/zdb_object_range_pos.ksh \
 	functional/cli_root/zdb/zdb_objset_id.ksh \
@@ -806,6 +814,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh \
 	functional/cli_root/zfs_mount/zfs_mount_recursive.ksh \
 	functional/cli_root/zfs_mount/zfs_mount_remount.ksh \
+	functional/cli_root/zfs_mount/zfs_mount_ro_rw.ksh \
 	functional/cli_root/zfs_mount/zfs_mount_test_race.ksh \
 	functional/cli_root/zfs_mount/zfs_multi_mount.ksh \
 	functional/cli_root/zfs_program/cleanup.ksh \
@@ -1295,6 +1304,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/cli_root/zpool_set/zpool_set_002_neg.ksh \
 	functional/cli_root/zpool_set/zpool_set_003_neg.ksh \
 	functional/cli_root/zpool_set/zpool_set_ashift.ksh \
+	functional/cli_root/zpool_set/zpool_set_inherit.ksh \
 	functional/cli_root/zpool_set/user_property_001_pos.ksh \
 	functional/cli_root/zpool_set/user_property_002_neg.ksh \
 	functional/cli_root/zpool_set/zpool_set_features.ksh \
@@ -1495,6 +1505,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/deadman/deadman_zio.ksh \
 	functional/dedup/cleanup.ksh \
 	functional/dedup/setup.ksh \
+	functional/dedup/dedup_bclone.ksh \
+	functional/dedup/dedup_bclone_pruned.ksh \
 	functional/dedup/dedup_fdt_create.ksh \
 	functional/dedup/dedup_fdt_import.ksh \
 	functional/dedup/dedup_fdt_pacing.ksh \
@@ -1608,6 +1620,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/fault/auto_spare_001_pos.ksh \
 	functional/fault/auto_spare_002_pos.ksh \
 	functional/fault/auto_spare_ashift.ksh \
+	functional/fault/auto_spare_rotational.ksh \
 	functional/fault/auto_spare_double.ksh \
 	functional/fault/auto_spare_multiple.ksh \
 	functional/fault/auto_spare_shared.ksh \
@@ -2123,6 +2136,22 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/scrub_mirror/scrub_mirror_003_pos.ksh \
 	functional/scrub_mirror/scrub_mirror_004_pos.ksh \
 	functional/scrub_mirror/setup.ksh \
+	functional/send_xdr_encoding/cleanup.ksh \
+	functional/send_xdr_encoding/setup.ksh \
+	functional/send_xdr_encoding/xdr_bookmark_raw.ksh \
+	functional/send_xdr_encoding/xdr_bookmark_raw_with_write.ksh \
+	functional/send_xdr_encoding/xdr_incr_from_bookmark.ksh \
+	functional/send_xdr_encoding/xdr_incr_from_redacted.ksh \
+	functional/send_xdr_encoding/xdr_raw.ksh \
+	functional/send_xdr_encoding/xdr_redacted_full.ksh \
+	functional/send_xdr_encoding/xdr_redacted_received.ksh \
+	functional/send_xdr_encoding/xdr_redacted_received_raw.ksh \
+	functional/send_xdr_encoding/xdr_replication.ksh \
+	functional/send_xdr_encoding/xdr_resume.ksh \
+	functional/send_xdr_encoding/xdr_resume_bookmark_raw.ksh \
+	functional/send_xdr_encoding/xdr_resume_bookmark_raw_with_write.ksh \
+	functional/send_xdr_encoding/xdr_resume_raw.ksh \
+	functional/send_xdr_encoding/xdr_resume_redacted.ksh \
 	functional/slog/cleanup.ksh \
 	functional/slog/setup.ksh \
 	functional/slog/slog_001_pos.ksh \
@@ -2264,6 +2293,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
 	functional/vdev_zaps/vdev_zaps_005_pos.ksh \
 	functional/vdev_zaps/vdev_zaps_006_pos.ksh \
 	functional/vdev_zaps/vdev_zaps_007_pos.ksh \
+	functional/vdev_zaps/vdev_zaps_008_pos.ksh \
 	functional/write_dirs/cleanup.ksh \
 	functional/write_dirs/setup.ksh \
 	functional/write_dirs/write_dirs_001_pos.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib
index be281c62404..649a6ec601c 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib
@@ -67,3 +67,37 @@ function display_status
 
 	return $ret
 }
+
+#
+# Verify the file identified by the input <inode> is written on a special vdev
+# According to the pool layout used in this test vdev_id 3 and 4 are special
+#
+function file_in_special_vdev # <dataset> <inode>
+{
+	typeset dataset="$1"
+	typeset inum="$2"
+	typeset num_normal=$(echo $ZPOOL_DISKS | wc -w)
+	num_normal=${num_normal##* }
+
+	zdb -dddddd $dataset $inum | awk -v d=$num_normal '{
+# find DVAs from string "offset level dva" only for L0 (data) blocks
+if (match($0,"L0 [0-9]+")) {
+   dvas[0]=$3
+   dvas[1]=$4
+   dvas[2]=$5
+   for (i = 0; i < 3; ++i) {
+      if (match(dvas[i],"([^:]+):.*")) {
+         dva = substr(dvas[i], RSTART, RLENGTH);
+         # parse DVA from string "vdev:offset:asize"
+         if (split(dva,arr,":") != 3) {
+            print "Error parsing DVA: <" dva ">";
+            exit 1;
+         }
+         # verify vdev is "special"
+         if (arr[1] < d) {
+            exit 1;
+         }
+      }
+   }
+}}'
+}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh
index 743a717b2e8..3d463b37611 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_012_pos.ksh
@@ -25,41 +25,6 @@
 
 verify_runnable "global"
 
-#
-# Verify the file identified by the input <inode> is written on a special vdev
-# According to the pool layout used in this test vdev_id 3 and 4 are special
-# XXX: move this function to libtest.shlib once we get "Vdev Properties"
-#
-function file_in_special_vdev # <dataset> <inode>
-{
-	typeset dataset="$1"
-	typeset inum="$2"
-	typeset num_normal=$(echo $ZPOOL_DISKS | wc -w)
-	num_normal=${num_normal##* }
-
-	zdb -dddddd $dataset $inum | awk -v d=$num_normal '{
-# find DVAs from string "offset level dva" only for L0 (data) blocks
-if (match($0,"L0 [0-9]+")) {
-   dvas[0]=$3
-   dvas[1]=$4
-   dvas[2]=$5
-   for (i = 0; i < 3; ++i) {
-      if (match(dvas[i],"([^:]+):.*")) {
-         dva = substr(dvas[i], RSTART, RLENGTH);
-         # parse DVA from string "vdev:offset:asize"
-         if (split(dva,arr,":") != 3) {
-            print "Error parsing DVA: <" dva ">";
-            exit 1;
-         }
-         # verify vdev is "special"
-         if (arr[1] < d) {
-            exit 1;
-         }
-      }
-   }
-}}'
-}
-
 #
 # Check that device removal works for special class vdevs
 #
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_pos.ksh
new file mode 100755
index 00000000000..27c55bc5906
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_pos.ksh
@@ -0,0 +1,109 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2026, TrueNAS.
+#
+
+. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib
+
+#
+# DESCRIPTION:
+#	The alloc_bias vdev property is readable and settable on top-level vdevs.
+#
+# STRATEGY:
+#	1. Create a pool with one normal mirror and one special mirror.
+#	2. Verify alloc_bias getter returns "none" for normal and "special"
+#	   for the special mirror.
+#	3. Verify alloc_bias is not reported for leaf (child) vdevs.
+#	4. Set alloc_bias=none on the special vdev; verify getter returns "none".
+#	5. Export and import the pool; verify no "special" section in status.
+#	6. Set alloc_bias=dedup on the same vdev; verify getter returns "dedup".
+#	7. Export and import the pool; verify "dedup" section appears in status.
+#	8. Set alloc_bias=special; verify getter returns "special".
+#	9. Export and import; verify "special" section appears again.
+#
+
+verify_runnable "global"
+
+claim="alloc_bias vdev property is readable and settable on top-level vdevs"
+
+log_assert $claim
+log_onexit cleanup
+
+log_must disk_setup
+
+# One normal mirror (always stays normal) and one special mirror.
+# The normal mirror ensures the pool always has normal-class vdevs
+# regardless of what we do to the second mirror.
+log_must zpool create $TESTPOOL \
+    mirror $ZPOOL_DISK0 $ZPOOL_DISK1 \
+    special mirror $CLASS_DISK0 $CLASS_DISK1
+
+# Find the special vdev name (mirror-N) from zpool status.
+TVDEV=$(zpool status $TESTPOOL | \
+    awk '/special/{found=1} found && /mirror-/{print $1; exit}')
+log_note "Special vdev: $TVDEV"
+[[ -n "$TVDEV" ]] || log_fail "Could not determine special vdev name"
+
+# Verify initial alloc_bias values.
+BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL mirror-0)
+[[ "$BIAS" == "none" ]] || \
+    log_fail "Normal mirror alloc_bias: expected none, got $BIAS"
+
+BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV)
+[[ "$BIAS" == "special" ]] || \
+    log_fail "Special mirror alloc_bias: expected special, got $BIAS"
+
+# Verify alloc_bias is not reported for a leaf vdev.
+LEAF_OUT=$(zpool get -H -o name,value alloc_bias $TESTPOOL \
+    $ZPOOL_DISK0 2>&1)
+[[ -z "$LEAF_OUT" ]] || \
+    log_fail "alloc_bias reported for leaf vdev, got: $LEAF_OUT"
+
+# --- special -> none, verify after export/import ---
+log_must zpool set alloc_bias=none $TESTPOOL $TVDEV
+BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV)
+[[ "$BIAS" == "none" ]] || \
+    log_fail "After set none: alloc_bias expected none, got $BIAS"
+
+log_must zpool export $TESTPOOL
+log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL
+zpool status $TESTPOOL | grep -q "special" && \
+    log_fail "special still shown after alloc_bias=none + reimport"
+
+# --- none -> dedup, verify after export/import ---
+log_must zpool set alloc_bias=dedup $TESTPOOL $TVDEV
+BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV)
+[[ "$BIAS" == "dedup" ]] || \
+    log_fail "After set dedup alloc_bias expected dedup, got $BIAS"
+
+log_must zpool export $TESTPOOL
+log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL
+zpool status $TESTPOOL | grep -q "dedup" || \
+    log_fail "dedup not shown after alloc_bias=dedup + reimport"
+
+# --- dedup -> special, verify after export/import ---
+log_must zpool set alloc_bias=special $TESTPOOL $TVDEV
+BIAS=$(zpool get -H -o value alloc_bias $TESTPOOL $TVDEV)
+[[ "$BIAS" == "special" ]] || \
+    log_fail "After set special alloc_bias expected special, got $BIAS"
+
+log_must zpool export $TESTPOOL
+log_must zpool import -d $TEST_BASE_DIR -s $TESTPOOL
+zpool status $TESTPOOL | grep -q "special" || \
+    log_fail "special not shown after alloc_bias=special + reimport"
+
+log_must zpool destroy -f $TESTPOOL
+log_pass $claim
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_neg.ksh
new file mode 100755
index 00000000000..43740690b3c
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_neg.ksh
@@ -0,0 +1,91 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2026, TrueNAS.
+#
+
+. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib
+
+#
+# DESCRIPTION:
+#	Setting the alloc_bias vdev property to invalid values or on
+#	unsupported vdev types fails.
+#
+# STRATEGY:
+#	1. Create a pool with a normal mirror and a log vdev.
+#	2. Verify setting alloc_bias on a leaf vdev fails.
+#	3. Verify setting alloc_bias=log fails.
+#	4. Verify setting alloc_bias to an unknown value fails.
+#	5. Verify setting alloc_bias on a log vdev fails.
+#	6. Verify setting alloc_bias=special fails when allocation_classes
+#	   feature is not enabled.
+#	7. Verify converting the last normal vdev fails.
+#
+
+verify_runnable "global"
+
+claim="Setting alloc_bias to invalid values or on unsupported vdevs fails"
+
+log_assert $claim
+log_onexit cleanup
+
+log_must disk_setup
+
+# Create a pool with a normal mirror and a log vdev.
+log_must zpool create $TESTPOOL \
+    mirror $ZPOOL_DISK0 $ZPOOL_DISK1 \
+    log $CLASS_DISK0
+
+NORMAL_VDEV=$(zpool list -v -H $TESTPOOL | awk '$1 ~ /^mirror/ {print $1; exit}')
+log_note "Normal vdev: $NORMAL_VDEV"
+
+# Setting alloc_bias on a leaf vdev must fail.
+log_mustnot zpool set alloc_bias=special $TESTPOOL $ZPOOL_DISK0
+
+# Setting alloc_bias=log must fail (log vdevs must be removed and re-added).
+log_mustnot zpool set alloc_bias=log $TESTPOOL $NORMAL_VDEV
+
+# Setting alloc_bias to an unknown value must fail.
+log_mustnot zpool set alloc_bias=bogus $TESTPOOL $NORMAL_VDEV
+
+# Setting alloc_bias on a log vdev must fail.
+# CLASS_DISK0 is a single-disk (non-mirror) top-level log vdev.
+log_mustnot zpool set alloc_bias=special $TESTPOOL $CLASS_DISK0
+
+log_must zpool destroy -f $TESTPOOL
+
+# Verify setting alloc_bias=special fails when allocation_classes is disabled.
+# Create a pool with the allocation_classes feature explicitly disabled.
+log_must zpool create -o feature@allocation_classes=disabled $TESTPOOL \
+    mirror $ZPOOL_DISK0 $ZPOOL_DISK1
+
+NORMAL_VDEV=$(zpool list -v -H $TESTPOOL | awk '$1 ~ /^mirror/ {print $1; exit}')
+log_mustnot zpool set alloc_bias=special $TESTPOOL $NORMAL_VDEV
+log_mustnot zpool set alloc_bias=dedup $TESTPOOL $NORMAL_VDEV
+
+log_must zpool destroy -f $TESTPOOL
+
+# Verify that converting the last normal-class top-level vdev fails.
+# A pool must always retain at least one normal vdev.
+log_must zpool create $TESTPOOL \
+    mirror $ZPOOL_DISK0 $ZPOOL_DISK1 \
+    special mirror $CLASS_DISK0 $CLASS_DISK1
+
+NORMAL_VDEV=$(zpool list -v -H $TESTPOOL | awk '$1 ~ /^mirror/ {print $1; exit}')
+log_mustnot zpool set alloc_bias=special $TESTPOOL $NORMAL_VDEV
+log_mustnot zpool set alloc_bias=dedup $TESTPOOL $NORMAL_VDEV
+
+log_must zpool destroy -f $TESTPOOL
+log_pass $claim
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh
index 01e9cf49dc8..cda4b0ee953 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_corner_cases.ksh
@@ -51,4 +51,7 @@ log_must zfs set recordsize=$RECORDSIZE $TESTDSTFS
 
 bclone_corner_cases_test $TESTSRCDIR $TESTDSTDIR
 
+sync_pool $TESTPOOL
+log_must zdb -b $TESTPOOL
+
 log_pass
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh
index e1b583813f1..0d2c0f6e16c 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_crossfs_data.ksh
@@ -50,4 +50,7 @@ for filesize in 1 107 113 511 512 513 4095 4096 4097 131071 131072 131073 \
     bclone_test random $filesize false $TESTSRCDIR $TESTDSTDIR
 done
 
+sync_pool $TESTPOOL
+log_must zdb -b $TESTPOOL
+
 log_pass
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh
index d18a1bd2490..619fc3e4216 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_corner_cases.ksh
@@ -45,4 +45,7 @@ log_must zfs set recordsize=$RECORDSIZE $TESTSRCFS
 
 bclone_corner_cases_test $TESTSRCDIR $TESTSRCDIR
 
+sync_pool $TESTPOOL
+log_must zdb -b $TESTPOOL
+
 log_pass
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh
index 45551e04646..f1f80a9c059 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/bclone/bclone_samefs_data.ksh
@@ -46,4 +46,7 @@ for filesize in 1 107 113 511 512 513 4095 4096 4097 131071 131072 131073 \
     bclone_test random $filesize false $TESTSRCDIR $TESTSRCDIR
 done
 
+sync_pool $TESTPOOL
+log_must zdb -b $TESTPOOL
+
 log_pass
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_device_removal.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_device_removal.ksh
index b407d4c541d..d4b7f01e8ba 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_device_removal.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_after_device_removal.ksh
@@ -57,5 +57,9 @@ log_must zfs create $TESTPOOL/$TESTFS
 log_must clonefile -f /$TESTPOOL/file /$TESTPOOL/$TESTFS/file
 log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=16M count=2
 log_must zfs destroy -r $TESTPOOL/$TESTFS
+wait_freeing $TESTPOOL
+sync_pool $TESTPOOL
+
+log_must zdb -b $TESTPOOL
 
 log_pass $claim
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh
index 4c652923545..7c183234922 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh
@@ -83,5 +83,8 @@ typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 $TESTPOOL/$TESTFS file2
 # FreeBSD's seq(1) leaves a trailing space, remove it with sed(1).
 log_must [ "$blocks" = "$(seq -s " " 0 1021 | sed 's/ $//')" ]
 
+sync_pool $TESTPOOL
+log_must zdb -b $TESTPOOL
+
 log_pass "LWB buffer overflow is not triggered with multiple VDEVs ZIL"
 
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh
index 2e854d7e543..ad24c1f06ba 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh
@@ -126,4 +126,7 @@ typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \
 # FreeBSD's seq(1) leaves a trailing space, remove it with sed(1).
 log_must [ "$blocks" = "$(seq -s " " 0 2047 | sed 's/ $//')" ]
 
+sync_pool $TESTPOOL
+log_must zdb -b $TESTPOOL
+
 log_pass $claim
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh
index eb1464ff4d4..6b9ea354226 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh
@@ -128,4 +128,7 @@ typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \
 # FreeBSD's seq(1) leaves a trailing space, remove it with sed(1).
 log_must [ "$blocks" = "$(seq -s " " 0 2047 | sed 's/ $//')" ]
 
+sync_pool $TESTPOOL
+log_must zdb -b $TESTPOOL
+
 log_pass $claim
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_001.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_001.ksh
new file mode 100755
index 00000000000..f9c9555b84b
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_001.ksh
@@ -0,0 +1,78 @@
+#!/bin/ksh
+# SPDX-License-Identifier: CDDL-1.0
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Datto, Inc. All rights reserved.
+# Copyright (c) 2026, Klara Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# zdb -fHv <dataset> <objnum> will display block
+# layouts for the object.
+#
+# Strategery:
+# 1. Create a RAIDZ1 pool, set compression to none
+# 2. Create a file filled with random data
+# 3. Get the inode number of the file
+# 4. Run zdb -fHv <pool>/ <inum> & extract file
+# 5. Compare real file and extracted file.
+
+DATA=/$TESTPOOL1/random.bin
+BLOCKS=$(( $RANDOM % 16 ))
+COMPARE=/tmp/compare.$$
+
+function cleanup
+{
+    destroy_pool $TESTPOOL1
+    rm -f $TESTDIR/file?.bin $COMPARE
+}
+
+log_assert "Verify zdb -fHv displays correct offsets"
+log_onexit cleanup
+
+# 1. Create a RAIDZ1 pool
+log_must mkdir -p $TESTDIR
+for file in 1 2 3 4 5
+do
+    rm -f $TESTDIR/file${file}.bin
+    touch $TESTDIR/file${file}.bin
+    log_must truncate -s 128m $TESTDIR/file${file}.bin
+done
+
+log_must zpool create -O compression=off -O recordsize=16K $TESTPOOL1 raidz1 $TESTDIR/file[12345].bin
+zfs get compression,recordsize $TESTPOOL1
+# 2. Create a file with random data
+log_must rm -f $DATA
+log_must dd if=/dev/urandom of=${DATA} bs=16k count=${BLOCKS} > /dev/null 2>&1
+log_must zpool sync $TESTPOOL1
+
+# 3. Get the inode number of the file
+INUM=$(ls -li $DATA | cut -f1 -d ' ')
+
+# 4. Extract the contents of the file using dd
+rm -f $COMPARE
+log_must touch ${COMPARE}
+log_must zdb -fHv $TESTPOOL1/ ${INUM} |  grep 'D.$' |
+    while read file offset count rest
+    do
+	log_must sh -c "dd if=$TESTDIR/${file} bs=512 skip=${offset} count=${count} >> ${COMPARE}"
+    done
+
+# 5. Compare files
+log_must cmp  ${COMPARE} ${DATA}
+
+log_pass "'zdb -fHv' works as expected."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_002.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_002.ksh
new file mode 100755
index 00000000000..455ec6ccb21
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_002.ksh
@@ -0,0 +1,78 @@
+#!/bin/ksh
+# SPDX-License-Identifier: CDDL-1.0
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Datto, Inc. All rights reserved.
+# Copyright (c) 2026, Klara Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# zdb -fHv <dataset> <objnum> will display block
+# layouts for the object.
+#
+# Strategery:
+# 1. Create a RAIDZ2 pool, set compression to none
+# 2. Create a file filled with random data
+# 3. Get the inode number of the file
+# 4. Run zdb -fHv <pool>/ <inum> & extract file
+# 5. Compare real file and extracted file.
+
+DATA=/$TESTPOOL1/random.bin
+BLOCKS=$(( $RANDOM % 16 ))
+COMPARE=/tmp/compare.$$
+
+function cleanup
+{
+    destroy_pool $TESTPOOL1
+    rm -f $TESTDIR/file?.bin $COMPARE
+}
+
+log_assert "Verify zdb -fHv displays correct offsets"
+log_onexit cleanup
+
+# 1. Create a RAIDZ1 pool
+log_must mkdir -p $TESTDIR
+for file in 1 2 3 4 5 6
+do
+    rm -f $TESTDIR/file${file}.bin
+    touch $TESTDIR/file${file}.bin
+    log_must truncate -s 128m $TESTDIR/file${file}.bin
+done
+
+log_must zpool create -O compression=off -O recordsize=16K $TESTPOOL1 raidz2 $TESTDIR/file[123456].bin
+zfs get compression,recordsize $TESTPOOL1
+# 2. Create a file with random data
+log_must rm -f $DATA
+log_must dd if=/dev/urandom of=${DATA} bs=16k count=${BLOCKS} > /dev/null 2>&1
+log_must zpool sync $TESTPOOL1
+
+# 3. Get the inode number of the file
+INUM=$(ls -li $DATA | cut -f1 -d ' ')
+
+# 4. Extract the contents of the file using dd
+rm -f $COMPARE
+log_must touch ${COMPARE}
+log_must zdb -fHv $TESTPOOL1/ ${INUM} |  grep 'D.$' |
+    while read file offset count rest
+    do
+	log_must sh -c "dd if=$TESTDIR/${file} bs=512 skip=${offset} count=${count} >> ${COMPARE}"
+    done
+
+# 5. Compare files
+log_must cmp  ${COMPARE} ${DATA}
+
+log_pass "'zdb -fHv' works as expected."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_003.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_003.ksh
new file mode 100755
index 00000000000..7673b3488c7
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_003.ksh
@@ -0,0 +1,78 @@
+#!/bin/ksh
+# SPDX-License-Identifier: CDDL-1.0
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Datto, Inc. All rights reserved.
+# Copyright (c) 2026, Klara Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# zdb -fHv <dataset> <objnum> will display block
+# layouts for the object.
+#
+# Strategery:
+# 1. Create a RAIDZ3 pool, set compression to none
+# 2. Create a file filled with random data
+# 3. Get the inode number of the file
+# 4. Run zdb -fHv <pool>/ <inum> & extract file
+# 5. Compare real file and extracted file.
+
+DATA=/$TESTPOOL1/random.bin
+BLOCKS=$(( $RANDOM % 16 ))
+COMPARE=/tmp/compare.$$
+
+function cleanup
+{
+    destroy_pool $TESTPOOL1
+    rm -f $TESTDIR/file?.bin $COMPARE
+}
+
+log_assert "Verify zdb -fHv displays correct offsets"
+log_onexit cleanup
+
+# 1. Create a RAIDZ1 pool
+log_must mkdir -p $TESTDIR
+for file in 1 2 3 4 5 6 7
+do
+    rm -f $TESTDIR/file${file}.bin
+    touch $TESTDIR/file${file}.bin
+    log_must truncate -s 128m $TESTDIR/file${file}.bin
+done
+
+log_must zpool create -O compression=off -O recordsize=16K $TESTPOOL1 raidz3 $TESTDIR/file[123456].bin
+zfs get compression,recordsize $TESTPOOL1
+# 2. Create a file with random data
+log_must rm -f $DATA
+log_must dd if=/dev/urandom of=${DATA} bs=16k count=${BLOCKS} > /dev/null 2>&1
+log_must zpool sync $TESTPOOL1
+
+# 3. Get the inode number of the file
+INUM=$(ls -li $DATA | cut -f1 -d ' ')
+
+# 4. Extract the contents of the file using dd
+rm -f $COMPARE
+log_must touch ${COMPARE}
+log_must zdb -fHv $TESTPOOL1/ ${INUM} |  grep 'D.$' |
+    while read file offset count rest
+    do
+	log_must sh -c "dd if=$TESTDIR/${file} bs=512 skip=${offset} count=${count} >> ${COMPARE}"
+    done
+
+# 5. Compare files
+log_must cmp  ${COMPARE} ${DATA}
+
+log_pass "'zdb -fHv' works as expected."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_neg.ksh
new file mode 100755
index 00000000000..124bdb6b6b3
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_file_layout_neg.ksh
@@ -0,0 +1,57 @@
+#!/bin/ksh
+# SPDX-License-Identifier: CDDL-1.0
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2019 by Datto, Inc. All rights reserved.
+# Copyright (c) 2026, Klara Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# Ensure zdb -f only works on raidz
+#
+# Strategery:
+# 1. Create a pool with one disk
+# 2. Create a file
+# 3. Get the inode number of the file
+# 4. Run zdb -f
+# 5. Confirm failure status
+
+function cleanup
+{
+    destroy_pool $TESTPOOL1
+    rm -f $TESTDIR/file1.bin
+}
+
+log_assert "Verify zdb -f fails on non-raidz pool"
+log_onexit cleanup
+
+# 1. Create a RAIDZ1 pool
+log_must mkdir -p $TESTDIR
+touch $TESTDIR/file1.bin
+log_must truncate -s 128m $TESTDIR/file1.bin
+log_must zpool create -f $TESTPOOL1 $TESTDIR/file1.bin
+
+# 2. Create a file
+log_must touch /$TESTPOOL1/file.txt
+
+# 3. Get the inode number of the file
+INUM=$(ls -li /$TESTDIR/file1.txt | cut -f1 -d ' ')
+
+# 4. Run zdb -f
+log_mustnot zdb -f $TESTDIR/ $INUM
+
+log_pass "'zdb -f' fails on non-raidz as expected."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
index 08795a7ea25..5d7ceb97112 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
@@ -27,6 +27,8 @@
 
 #
 # Copyright (c) 2017 by Delphix. All rights reserved.
+# Copyright 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+# Copyright (c) 2026, TrueNAS.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -131,3 +133,129 @@ function verify_mount_display
 	done
 	return 0
 }
+
+# Helper functions to call the system mount(8) with various options
+function mount_default # <dataset mountpoint | mountpoint>
+{
+	typeset opts=
+	if is_freebsd; then
+		opts="-t zfs"
+	else
+		opts="-t zfs"
+	fi
+
+	mount $opts "$@"
+	return $?
+}
+
+function mount_ro # <dataset mountpoint | mountpoint>
+{
+	typeset opts=
+	if is_freebsd; then
+		opts="-t zfs -r"
+	else
+		opts="-t zfs -o ro"
+	fi
+
+	mount $opts "$@"
+	return $?
+}
+
+function mount_rw # <dataset mountpoint | mountpoint>
+{
+	typeset opts=
+	if is_freebsd; then
+		opts="-t zfs -w"
+	else
+		opts="-t zfs -o rw"
+	fi
+
+	mount $opts "$@"
+	return $?
+}
+
+function remount_ro # <dataset mountpoint | mountpoint>
+{
+	typeset opts=
+	if is_freebsd; then
+		opts="-t zfs -ur"
+	else
+		opts="-o remount,ro"
+	fi
+
+	mount $opts "$@"
+	return $?
+}
+
+function remount_rw # <dataset mountpoint | mountpoint>
+{
+	typeset opts=
+	if is_freebsd; then
+		opts="-t zfs -uw"
+	else
+		opts="-o remount,rw"
+	fi
+
+	mount $opts "$@"
+	return $?
+}
+
+#
+# Verify that $mountpoint is mounted readonly
+# This is preferred over "log_mustnot touch $fs" because we actually want to
+# verify the error returned is EROFS
+#
+function mount_is_ro # mountpoint
+{
+	typeset mountpoint="$1"
+
+	file_write -o create -f $mountpoint/file.dat
+	ret=$?
+	if [[ $ret != 30 ]]; then
+		log_fail "Writing to $mountpoint did not return EROFS ($ret)."
+	fi
+}
+
+function mount_is_rw # mountpoint
+{
+	typeset mountpoint="$1"
+	log_must touch $mountpoint/file.dat
+}
+
+# Get the read-only/read-write option for $mountpoint
+# Prints either "ro" or "rw", or nothing if $mountpoint is not in the mount
+# table, or is not a ZFS mount.
+function mount_get_ro_rw # mountpoint
+{
+	typeset mountpoint="$1"
+
+	if is_freebsd; then
+		# tank/hello  /tank/hello  zfs  rw,nfsv4acls  0 0
+		mount -p | \
+		awk -v mountpoint="$mountpoint" '
+		    $2 != mountpoint || $3 != "zfs" { next }
+		    $4 ~ /(^|,)ro(,|$)/ { print "ro" }
+		    $4 ~ /(^|,)rw(,|$)/ { print "rw" }'
+	else
+		# tank/hello  /tank/hello  zfs  rw,relatime,xattr,noacl,casesensitive  0 0
+		awk -v mountpoint="$mountpoint" '
+		    $2 != mountpoint || $3 != "zfs" { next }
+		    $4 ~ /(^|,)ro(,|$)/ { print "ro" }
+		    $4 ~ /(^|,)rw(,|$)/ { print "rw" }' /proc/mounts
+	fi
+}
+
+# Verify that $mountpoint is mounted with a "read-only" option
+function mount_has_ro_option # mountpoint
+{
+	typeset ropt=$(mount_get_ro_rw "$1")
+	log_must test $ropt == "ro"
+}
+
+# Verify that $mountpoint is mounted with a "read-write" option
+function mount_has_rw_option # mountpoint
+{
+	typeset ropt=$(mount_get_ro_rw "$1")
+	log_must test $ropt == "rw"
+}
+
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh
index c54128f7b9e..a16d17a1229 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_remount.ksh
@@ -23,6 +23,7 @@
 
 #
 # Copyright 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+# Copyright (c) 2026, TrueNAS.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -54,54 +55,6 @@ function cleanup
 	return 0
 }
 
-if is_freebsd; then
-	typeset RO="-t zfs -ur"
-	typeset RW="-t zfs -uw"
-else
-	typeset RO="-o remount,ro"
-	typeset RW="-o remount,rw"
-fi
-
-#
-# Verify the $filesystem is mounted readonly
-# This is preferred over "log_mustnot touch $fs" because we actually want to
-# verify the error returned is EROFS
-#
-function readonlyfs # filesystem
-{
-	typeset filesystem="$1"
-
-	file_write -o create -f $filesystem/file.dat
-	ret=$?
-	if [[ $ret != 30 ]]; then
-		log_fail "Writing to $filesystem did not return EROFS ($ret)."
-	fi
-}
-
-#
-# Verify $dataset is mounted with $option
-#
-function checkmount # dataset option
-{
-	typeset dataset="$1"
-	typeset option="$2"
-	typeset options=""
-
-	if is_freebsd; then
-		options=$(mount -p | awk -v ds="$dataset" '$1 == ds { print $4 }')
-	else
-		options=$(awk -v ds="$dataset" '$1 == ds { print $4 }' /proc/mounts)
-	fi
-	if [[ "$options" == '' ]]; then
-		log_fail "Dataset $dataset is not mounted"
-	elif [[ ! -z "${options##*$option*}" ]]; then
-		log_fail "Dataset $dataset is not mounted with expected "\
-		    "option $option ($options)"
-	else
-		log_note "Dataset $dataset is mounted with option $option"
-	fi
-}
-
 log_assert "Verify remount functionality on both filesystem and snapshots"
 
 log_onexit cleanup
@@ -117,35 +70,35 @@ MNTPSNAP="$TESTDIR/zfs_snap_mount"
 log_must mkdir -p $MNTPSNAP
 
 # 2. Verify we can (re)mount the dataset readonly/read-write
-log_must touch $MNTPFS/file.dat
-checkmount $TESTFS 'rw'
-log_must mount $RO $TESTFS $MNTPFS
-readonlyfs $MNTPFS
-checkmount $TESTFS 'ro'
-log_must mount $RW $TESTFS $MNTPFS
-log_must touch $MNTPFS/file.dat
-checkmount $TESTFS 'rw'
+mount_is_rw $MNTPFS
+mount_has_rw_option $MNTPFS
+log_must remount_ro $TESTFS $MNTPFS
+mount_is_ro $MNTPFS
+mount_has_ro_option $MNTPFS
+log_must remount_rw $TESTFS $MNTPFS
+mount_is_rw $MNTPFS
+mount_has_rw_option $MNTPFS
 
 if is_linux; then
 	# 3. Verify we can (re)mount the snapshot readonly
-	log_must mount -t zfs $TESTSNAP $MNTPSNAP
-	readonlyfs $MNTPSNAP
-	checkmount $TESTSNAP 'ro'
-	log_must mount $RO $TESTSNAP $MNTPSNAP
-	readonlyfs $MNTPSNAP
-	checkmount $TESTSNAP 'ro'
+	log_must mount_default $TESTSNAP $MNTPSNAP
+	mount_is_ro $MNTPSNAP
+	mount_has_ro_option $MNTPSNAP
+	log_must remount_ro $TESTSNAP $MNTPSNAP
+	mount_is_ro $MNTPSNAP
+	mount_has_ro_option $MNTPSNAP
 	log_must umount $MNTPSNAP
 fi
 
 # 4. Verify we can't remount a snapshot read-write
 # The "mount -o rw" command will succeed but the snapshot is mounted readonly.
 # The "mount -o remount,rw" command must fail with an explicit error.
-log_must mount -t zfs -o rw $TESTSNAP $MNTPSNAP
-readonlyfs $MNTPSNAP
-checkmount $TESTSNAP 'ro'
-log_mustnot mount $RW $TESTSNAP $MNTPSNAP
-readonlyfs $MNTPSNAP
-checkmount $TESTSNAP 'ro'
+log_must mount_rw $TESTSNAP $MNTPSNAP
+mount_is_ro $MNTPSNAP
+mount_has_ro_option $MNTPSNAP
+log_mustnot remount_rw $TESTSNAP $MNTPSNAP
+mount_is_ro $MNTPSNAP
+mount_has_ro_option $MNTPSNAP
 log_must umount $MNTPSNAP
 
 # 5. Verify we can remount a dataset readonly and unmount it with
@@ -153,8 +106,8 @@ log_must umount $MNTPSNAP
 log_must eval "echo 'password' | zfs create -o sync=disabled \
     -o encryption=on -o keyformat=passphrase $TESTFS/crypt"
 CRYPT_MNTPFS="$(get_prop mountpoint $TESTFS/crypt)"
-log_must touch $CRYPT_MNTPFS/file.dat
-log_must mount $RO $TESTFS/crypt $CRYPT_MNTPFS
+mount_is_rw $CRYPT_MNTPFS
+log_must remount_ro $TESTFS/crypt $CRYPT_MNTPFS
 log_must umount -f $CRYPT_MNTPFS
 sync_pool $TESTPOOL
 
@@ -163,10 +116,10 @@ log_must zpool export $TESTPOOL
 log_must zpool import -o readonly=on $TESTPOOL
 
 # 7. Verify we can't remount its filesystem read-write
-readonlyfs $MNTPFS
-checkmount $TESTFS 'ro'
-log_mustnot mount $RW $MNTPFS
-readonlyfs $MNTPFS
-checkmount $TESTFS 'ro'
+mount_is_ro $MNTPFS
+mount_has_ro_option $MNTPFS
+log_mustnot remount_rw $MNTPFS
+mount_is_ro $MNTPFS
+mount_has_ro_option $MNTPFS
 
 log_pass "Both filesystem and snapshots can be remounted correctly."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_ro_rw.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_ro_rw.ksh
new file mode 100755
index 00000000000..15e78e6fd88
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_ro_rw.ksh
@@ -0,0 +1,130 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026, TrueNAS.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
+
+#
+# we set up and mount multiple times, with these combinations:
+# - readonly property: on, off
+# - mount method: mount(8) (mountpoint=legacy), zfs-mount(8) (mountpoint=path)
+# - mount option: [none], ro, rw
+#
+# after each mount, we check whether we ended up mounting read-only or
+# read-write, and note the result. once we've done them all, we compare the
+# result set to the "correct" set for this platform (by observation). the
+# test passes if they match, fail if they don't
+#
+#        readonly     |         on          |         off         |
+#        mount method |  legacy  |   path   |  legacy  |   path   |
+#        mount option | -- ro rw | -- ro rw | -- ro rw | -- ro rw |
+typeset -a rs_linux=(   rw ro rw   ro ro rw   rw ro rw   rw ro rw )
+typeset -a rs_freebsd=( ro ro ro   ro ro rw   rw ro rw   rw ro rw )
+
+if is_linux ; then
+    typeset -n rs_wanted=rs_linux
+elif is_freebsd ; then
+    typeset -n rs_wanted=rs_freebsd
+else
+    log_unsupported "no result set defined for this platform"
+fi
+
+verify_runnable "both"
+
+testfs=$TESTPOOL/$TESTFS
+testmnt=$TESTDIR/mountpoint
+
+function cleanup
+{
+	log_must zfs inherit -S canmount $testfs
+	log_must zfs inherit readonly $testfs
+	log_must zfs inherit mountpoint $testfs
+	log_must rm -rf $testmnt
+}
+
+log_assert "Verify combinations of readonly/readwrite produce correct mount."
+
+log_onexit cleanup
+
+
+# setup
+log_must datasetexists $testfs
+log_must zfs set canmount=noauto $testfs
+umount $testfs
+
+
+typeset -a rs=()
+
+for readonly in on off ; do
+	for method in legacy path ; do
+		for option in default ro rw ; do
+
+			log_must zfs set readonly=$readonly $testfs
+
+			if [[ $method == 'legacy' ]] ; then
+				log_must zfs set mountpoint=legacy $testfs
+			else
+				log_must zfs set mountpoint=$testmnt $testfs
+			fi
+
+			# recreate the mountpoint. even if it wasn't mounted,
+			# changing the mountpoint property can remove it
+			log_must mkdir -p $testmnt
+
+			# issue the mount with the wanted method and option
+			case $method in
+			legacy)
+				case $option in
+				default) log_must mount_default $testfs $testmnt ;;
+				ro)      log_must mount_ro $testfs $testmnt ;;
+				rw)      log_must mount_rw $testfs $testmnt ;;
+				esac
+			;;
+			path)
+				case $option in
+				default)  log_must zfs mount $testfs ;;
+				ro)       log_must zfs mount -o ro $testfs ;;
+				rw)       log_must zfs mount -o rw $testfs ;;
+				esac
+			;;
+			esac
+
+			result=$(mount_get_ro_rw $testmnt)
+			rs+=($result)
+			log_note "result: $result"
+
+			log_must umount $testfs
+		done
+	done
+done
+
+log_note "results: ${rs[@]}"
+log_note "wanted:  ${rs_wanted[@]}"
+
+log_must test "${rs[*]}" == "${rs_wanted[*]}"
+
+log_pass "All mounts correct for this platform."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_metaslab_leak.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_metaslab_leak.ksh
index 0d2a39be6b5..c8a69c09aac 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_metaslab_leak.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_metaslab_leak.ksh
@@ -31,7 +31,7 @@ verify_runnable "global"
 
 function cleanup
 {
-	zpool destroy $TESTPOOL
+	destroy_pool $TESTPOOL
 	rm $tmp
 }
 
@@ -58,7 +58,7 @@ log_must eval "zdb -m --allocated-map $TESTPOOL > $tmp"
 log_must zpool destroy $TESTPOOL
 
 log_must zpool create $TESTPOOL $DISKS
-log_must zpool export $TESTPOOL
+log_must_busy zpool export $TESTPOOL
 log_must eval "zhack metaslab leak $TESTPOOL < $tmp"
 log_must zpool import $TESTPOOL
 
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zinject/zinject_args.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zinject/zinject_args.ksh
index 93c320da6fd..f08e4fb6472 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zinject/zinject_args.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zinject/zinject_args.ksh
@@ -23,11 +23,15 @@
 
 #
 # Copyright (c) 2024, Klara Inc.
+# Copyright (c) 2026, Christos Longros <chris.longros@gmail.com>
 #
 
 #
-# TODO: this only checks that the set of valid device fault types. It should
-#       check all the other options, and that they work, and everything really.
+# This covers device, label, object, delay, panic injection modes:
+# every valid value is accepted and unknown values are rejected.
+# A final pass also confirms that a registered injection actually
+# executes by watching the inject counter advance after triggering
+# the desired injected error.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -39,6 +43,7 @@ log_assert "Check zinject parameters."
 log_onexit cleanup
 
 DISK1=${DISKS%% *}
+TESTFILE=/$TESTPOOL/zinject_args.file
 
 function cleanup
 {
@@ -56,8 +61,139 @@ function test_device_fault
 	zinject -c all
 }
 
+function test_device_fault_neg
+{
+	log_mustnot eval "zinject -d $DISK1 -e bogus -T read $TESTPOOL"
+	log_mustnot eval "zinject -d $DISK1 -e io -T bogus $TESTPOOL"
+	zinject -c all
+}
+
+function test_label_fault
+{
+	typeset -a labels=("nvlist" "pad1" "pad2" "uber")
+	for l in ${labels[@]}; do
+		log_must eval \
+		    "zinject -d $DISK1 -e io -L $l $TESTPOOL"
+	done
+	zinject -c all
+}
+
+function test_label_fault_neg
+{
+	log_mustnot eval "zinject -d $DISK1 -e io -L bogus $TESTPOOL"
+	zinject -c all
+}
+
+function test_object_fault
+{
+	log_must dd if=/dev/urandom of=$TESTFILE bs=128k count=1
+	log_must zpool sync $TESTPOOL
+
+	for t in data dnode; do
+		log_must eval "zinject -t $t -e io -f 0.001 $TESTFILE"
+	done
+	zinject -c all
+
+	for t in mos mosdir metaslab config bpobj spacemap errlog; do
+		log_must eval "zinject -t $t -e io -f 0.001 $TESTPOOL"
+	done
+	zinject -c all
+}
+
+function test_object_fault_neg
+{
+	log_mustnot eval "zinject -t bogus -e io $TESTPOOL"
+	log_mustnot eval "zinject -t data -e bogus $TESTFILE"
+	# -t data only accepts checksum or io as the error type.
+	log_mustnot eval "zinject -t data -e nxio $TESTFILE"
+	zinject -c all
+}
+
+function test_delay_fault
+{
+	log_must eval "zinject -d $DISK1 -D 10:1 $TESTPOOL"
+	log_must eval "zinject -d $DISK1 -D 25:2 -T read $TESTPOOL"
+	log_must eval "zinject -d $DISK1 -D 25:2 -T write $TESTPOOL"
+	zinject -c all
+}
+
+function test_delay_fault_neg
+{
+	log_mustnot eval "zinject -d $DISK1 -D 0:1 $TESTPOOL"
+	log_mustnot eval "zinject -d $DISK1 -D 10 $TESTPOOL"
+	log_mustnot eval "zinject -d $DISK1 -D foo $TESTPOOL"
+	zinject -c all
+}
+
+function test_panic_fault
+{
+	# An unmatched function tag so zio_handle_panic_injection() never fires.
+	log_must eval "zinject -p zfs_test_no_such_fn $TESTPOOL"
+	log_must eval "zinject -p zfs_test_no_such_fn $TESTPOOL 1"
+	zinject | grep -q zfs_test_no_such_fn || \
+	    log_fail "panic function was not registered"
+	zinject -c all
+}
+
+function test_panic_fault_neg
+{
+	log_mustnot eval "zinject -p f -d $DISK1 $TESTPOOL"
+	log_mustnot eval "zinject -p f -t data $TESTFILE"
+	log_mustnot eval "zinject -p f -f 50 $TESTPOOL"
+	zinject -c all
+}
+
+# Each registered device/delay/data handler row ends with "match inject".
+function inject_count
+{
+	zinject | awk '/^ *[0-9]/{print $NF}' | head -n 1
+}
+
+function verify_injection
+{
+	typeset cnt
+
+	log_must zfs set primarycache=none $TESTPOOL
+	log_must dd if=/dev/urandom of=$TESTFILE bs=128k count=1
+	log_must zpool sync $TESTPOOL
+
+	log_must eval "zinject -d $DISK1 -e io -T read -f 100 $TESTPOOL"
+	dd if=$TESTFILE of=/dev/null bs=128k count=1 >/dev/null 2>&1 || true
+	cnt=$(inject_count)
+	[[ -n $cnt && $cnt -gt 0 ]] || \
+	    log_fail "device-fault injection did not execute (inject=$cnt)"
+	zinject -c all
+
+	log_must eval "zinject -t data -e checksum -f 100 $TESTFILE"
+	dd if=$TESTFILE of=/dev/null bs=128k count=1 >/dev/null 2>&1 || true
+	cnt=$(inject_count)
+	[[ -n $cnt && $cnt -gt 0 ]] || \
+	    log_fail "object-fault injection did not execute (inject=$cnt)"
+	zinject -c all
+
+	log_must eval "zinject -d $DISK1 -D 5:1 -T write $TESTPOOL"
+	log_must dd if=/dev/urandom of=$TESTFILE bs=128k count=1
+	log_must zpool sync $TESTPOOL
+	cnt=$(inject_count)
+	[[ -n $cnt && $cnt -gt 0 ]] || \
+	    log_fail "delay injection did not execute (inject=$cnt)"
+	zinject -c all
+
+	log_must zfs inherit primarycache $TESTPOOL
+}
+
 default_mirror_setup_noexit $DISKS
 
 test_device_fault
+test_device_fault_neg
+test_label_fault
+test_label_fault_neg
+test_object_fault
+test_object_fault_neg
+test_delay_fault
+test_delay_fault_neg
+test_panic_fault
+test_panic_fault_neg
+verify_injection
 
 log_pass "zinject parameters work as expected."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh
index 530661a686a..92c97aacd84 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh
@@ -82,7 +82,7 @@ log_must zpool reopen $TESTPOOL1
 
 typeset expandsize=$(get_pool_prop expandsize $TESTPOOL1)
 log_note "pool expandsize: $expandsize"
-if [[ "$zpool_expandsize" = "-" ]]; then
+if [[ "$expandsize" = "-" ]]; then
 	log_fail "pool $TESTPOOL1 did not detect any " \
 	    "expandsize after reopen"
 fi
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
index 79992227169..be17821ba1a 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
@@ -66,6 +66,7 @@ typeset -a properties=(
     trim_bytes
     removing
     allocating
+    rotational
     failfast
     checksum_n
     checksum_t
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh
index 60088e6dd97..be3344326e9 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_parallel_pos.ksh
@@ -114,7 +114,7 @@ wait
 parallel_time=$SECONDS
 log_note "asyncronously imported 4 pools in $parallel_time seconds"
 
-log_must test $parallel_time -lt $(($sequential_time / 2))
+log_must test $parallel_time -lt $(($sequential_time * 3 / 4))
 
 #
 # export pools with import delay injectors
@@ -133,6 +133,6 @@ log_must zpool import -a -d $DEVICE_DIR -f
 parallel_time=$SECONDS
 log_note "asyncronously imported 4 pools in $parallel_time seconds"
 
-log_must test $parallel_time -lt $(($sequential_time / 2))
+log_must test $parallel_time -lt $(($sequential_time * 3 / 4))
 
 log_pass "Pool imports occur in parallel"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_inherit.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_inherit.ksh
new file mode 100755
index 00000000000..2694e3278d9
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_inherit.ksh
@@ -0,0 +1,115 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2026, Klara, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+#
+# zpool set can set the failfast property to 'inherit'
+#
+# STRATEGY:
+# 1. Create a pool
+# 2. Verify that we can set 'failfast' to various values, including inherit
+# 3. Verify that the root vdev cannot be set to inherit
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+	destroy_pool $TESTPOOL1
+	rm -f $FILEVDEV1 $FILEVDEV2 $FILEVDEV3
+}
+
+function get_failfast
+{
+	zpool get -H -o value failfast $TESTPOOL1 $@
+}
+
+log_onexit cleanup
+
+log_assert "zpool set can configure 'failfast' property to inherit"
+FILEVDEV1="$TEST_BASE_DIR/zpool_set_inherit1.$$.dat"
+FILEVDEV2="$TEST_BASE_DIR/zpool_set_inherit2.$$.dat"
+FILEVDEV3="$TEST_BASE_DIR/zpool_set_inherit3.$$.dat"
+
+log_must truncate -s $MINVDEVSIZE $FILEVDEV1
+log_must truncate -s $MINVDEVSIZE $FILEVDEV2
+log_must truncate -s $MINVDEVSIZE $FILEVDEV3
+
+log_must zpool create -f $TESTPOOL1 $FILEVDEV1 mirror $FILEVDEV2 $FILEVDEV3
+failfast=$(get_failfast $FILEVDEV1)
+[[ "$failfast" == "inherit" ]] || log_fail "incorrect failfast value: $failfast"
+
+log_must zpool set failfast=on $TESTPOOL1 $FILEVDEV1
+failfast=$(get_failfast $FILEVDEV1)
+[[ "$failfast" == "on" ]] || log_fail "incorrect failfast value: $failfast"
+
+log_must zpool set failfast=off $TESTPOOL1 $FILEVDEV1
+failfast=$(get_failfast $FILEVDEV1)
+[[ "$failfast" == "off" ]] || log_fail "incorrect failfast value: $failfast"
+
+log_must zpool set failfast=inherit $TESTPOOL1 $FILEVDEV1
+
+failfast=$(get_failfast $FILEVDEV2)
+[[ "$failfast" == "inherit" ]] || log_fail "incorrect failfast value: $failfast"
+
+log_must zpool set failfast=on $TESTPOOL1 $FILEVDEV2
+failfast=$(get_failfast $FILEVDEV2)
+[[ "$failfast" == "on" ]] || log_fail "incorrect failfast value: $failfast"
+
+log_must zpool set failfast=off $TESTPOOL1 $FILEVDEV2
+failfast=$(get_failfast $FILEVDEV2)
+[[ "$failfast" == "off" ]] || log_fail "incorrect failfast value: $failfast"
+
+log_must zpool set failfast=inherit $TESTPOOL1 $FILEVDEV2
+
+failfast=$(get_failfast mirror-1)
+[[ "$failfast" == "inherit" ]] || log_fail "incorrect failfast value: $failfast"
+
+log_must zpool set failfast=on $TESTPOOL1 mirror-1
+failfast=$(get_failfast mirror-1)
+[[ "$failfast" == "on" ]] || log_fail "incorrect failfast value: $failfast"
+
+log_must zpool set failfast=off $TESTPOOL1 mirror-1
+failfast=$(get_failfast mirror-1)
+[[ "$failfast" == "off" ]] || log_fail "incorrect failfast value: $failfast"
+
+log_must zpool set failfast=inherit $TESTPOOL1 mirror-1
+
+failfast=$(get_failfast root)
+[[ "$failfast" == "on" ]] || log_fail "incorrect failfast value: $failfast"
+
+log_must zpool set failfast=off $TESTPOOL1 root
+failfast=$(get_failfast root)
+[[ "$failfast" == "off" ]] || log_fail "incorrect failfast value: $failfast"
+
+log_mustnot zpool set failfast=inherit $TESTPOOL1 root
+
+
+log_pass "zpool set can configure 'failfast' property to inherit"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh
index e37f8e44c1a..a36649bc263 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh
@@ -73,7 +73,7 @@ log_must mkdir "$TESTDIR"
 log_must truncate -s $LARGESIZE "$LARGEFILE"
 log_must zpool create -O compression=off $TESTPOOL "$LARGEFILE"
 log_must mkfile $(( floor(LARGESIZE * 0.80) )) /$TESTPOOL/file
-sync_all_pools
+sync_pool $TESTPOOL
 
 new_size=$(du -k "$LARGEFILE" | awk '{print $1 * 1024}')
 log_must test $new_size -le $LARGESIZE
@@ -93,12 +93,8 @@ log_must test $new_size -gt $((4 * floor(LARGESIZE * 0.70) ))
 # Perform a partial trim, we expect it to skip most of the new metaslabs
 # which have never been used and therefore do not need be trimmed.
 log_must set_tunable64 TRIM_METASLAB_SKIP 1
-log_must zpool trim $TESTPOOL
-log_must set_tunable64 TRIM_METASLAB_SKIP 0
-
-while [[ "$(trim_progress $TESTPOOL $LARGEFILE)" -lt "100" ]]; do
-	sleep 0.5
-done
+log_must zpool trim -w $TESTPOOL
+sync_pool $TESTPOOL true
 
 new_size=$(du -k "$LARGEFILE" | awk '{print $1 * 1024}')
 log_must test $new_size -gt $LARGESIZE
@@ -106,11 +102,9 @@ log_must test $new_size -gt $LARGESIZE
 # Perform a full trim, all metaslabs will be trimmed the pool vdev
 # size will be reduced but not down to its original size due to the
 # space usage of the new metaslabs.
-log_must zpool trim $TESTPOOL
-
-while [[ "$(trim_progress $TESTPOOL $LARGEFILE)" -lt "100" ]]; do
-	sleep 0.5
-done
+log_must set_tunable64 TRIM_METASLAB_SKIP 0
+log_must zpool trim -w $TESTPOOL
+sync_pool $TESTPOOL true
 
 new_size=$(du -k "$LARGEFILE" | awk '{print $1 * 1024}')
 log_must test $new_size -le $(( 2 * LARGESIZE))
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_002_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_002_pos.ksh
index a2b3464b2bf..b1c12f1306a 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_002_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_002_pos.ksh
@@ -38,9 +38,8 @@
 #
 # STRATEGY:
 # 1. Set the interval to 1 and count to 4.
-# 2. Sleep for 5 seconds.
-# 3. Verify that the output has 4 records.
-# 4. Set interval to 0.5 and count to 1 to test floating point intervals.
+# 2. Verify that the output has 4 records.
+# 3. Set interval to 0.5 and count to 1 to test floating point intervals.
 
 verify_runnable "both"
 
@@ -61,8 +60,7 @@ if ! is_global_zone ; then
 	TESTPOOL=${TESTPOOL%%/*}
 fi
 
-log_must eval "zpool iostat $TESTPOOL 1 4 > $tmpfile 2>&1 &"
-log_must sleep 5
+log_must eval "zpool iostat $TESTPOOL 1 4 > $tmpfile 2>&1"
 stat_count=$(grep -c $TESTPOOL $tmpfile)
 
 if [[ $stat_count -ne 4 ]]; then
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_bclone.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_bclone.ksh
new file mode 100755
index 00000000000..57f54d93ad4
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_bclone.ksh
@@ -0,0 +1,120 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026, TrueNAS.
+#
+
+#
+# DESCRIPTION:
+#	Verify that block cloning interacts correctly with dedup when the DDT
+#	entry for the block is still present.  In this case brt_pending_apply_vdev()
+#	calls ddt_addref() which succeeds, so the extra reference is tracked in
+#	the DDT rather than in the BRT.
+#
+# STRATEGY:
+#	1. Create a pool with block_cloning enabled and dedup=on
+#	2. Write a file (4 blocks, unique DDT entries, refcnt=1)
+#	3. Clone the file - ddt_addref() bumps DDT refcnt to 2, entries move
+#	   from unique to duplicate table; no BRT entries are created
+#	4. Write a third copy via dd - DDT refcnt becomes 3
+#	5. Delete files in sequence, verifying DDT counts and zdb -b at each step
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+log_assert "Block cloning with live DDT entries uses ddt_addref, not BRT"
+
+# Flush DDT log every TXG so entries appear in the ZAP immediately.
+log_must save_tunable DEDUP_LOG_TXG_MAX
+log_must set_tunable32 DEDUP_LOG_TXG_MAX 1
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+	log_must restore_tunable DEDUP_LOG_TXG_MAX
+}
+
+log_onexit cleanup
+
+# we disable compression so our writes create predictable results on disk
+# Use 'xattr=sa' to prevent selinux xattrs influencing our accounting
+log_must zpool create -f \
+    -o feature@block_cloning=enabled \
+    -O dedup=on \
+    -O compression=off \
+    -O xattr=sa \
+    $TESTPOOL $DISKS
+
+log_must zfs create -o recordsize=128k $TESTPOOL/$TESTFS
+typeset mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS)
+
+# Write unique data: 4 blocks, each gets a DDT entry with refcnt=1.
+log_must dd if=/dev/urandom of=$mountpoint/file1 bs=128k count=4
+sync_pool $TESTPOOL
+
+log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'"
+
+# Clone file1.  The extra reference goes into the DDT rather than the BRT.
+# The entries move from unique (refcnt=1) to duplicate (refcnt=2).
+log_must clonefile -f $mountpoint/file1 $mountpoint/clone1
+sync_pool $TESTPOOL
+
+log_must eval \
+    "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-duplicate:.*entries=4'"
+log_must zdb -b $TESTPOOL
+
+# Write a third copy via dd — DDT refcnt becomes 3.
+log_must dd if=$mountpoint/file1 of=$mountpoint/file2 bs=128k
+sync_pool $TESTPOOL
+
+log_must eval \
+    "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-duplicate:.*entries=4'"
+log_must zdb -b $TESTPOOL
+
+# Delete the clone — DDT refcnt drops to 2, still duplicate.
+log_must rm $mountpoint/clone1
+sync_pool $TESTPOOL
+
+log_must eval \
+    "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-duplicate:.*entries=4'"
+log_must zdb -b $TESTPOOL
+
+# Delete file2 — DDT refcnt drops to 1, entries move back to unique.
+log_must rm $mountpoint/file2
+sync_pool $TESTPOOL
+
+log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'"
+log_must zdb -b $TESTPOOL
+
+# Delete the original — DDT empty, blocks freed.
+log_must rm $mountpoint/file1
+sync_pool $TESTPOOL
+
+log_must eval "zdb -D $TESTPOOL | grep -q 'All DDTs are empty'"
+log_must zdb -b $TESTPOOL
+
+log_pass "Block cloning with live DDT entries uses ddt_addref, not BRT"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_bclone_pruned.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_bclone_pruned.ksh
new file mode 100755
index 00000000000..d01d09ac12e
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_bclone_pruned.ksh
@@ -0,0 +1,152 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026, TrueNAS.
+#
+
+#
+# DESCRIPTION:
+#	Verify that block cloning works correctly when the DDT entry for a
+#	dedup block has been pruned.  When a block has the DEDUP bit set but
+#	no DDT entry (because it was pruned), cloning it must create a BRT
+#	entry to track the extra reference.  Freeing the original must
+#	consult the BRT rather than proceeding directly to a DVA free,
+#	otherwise the block is freed while the clone still references it.
+#
+# STRATEGY:
+#	1. Create a pool with both dedup and block_cloning enabled
+#	2. Write a file with dedup=on so blocks get DEDUP bit set in their BPs
+#	3. Prune the DDT to remove those entries (blocks remain, DEDUP bit
+#	   stays set in block pointers)
+#	4. Clone the file - brt_pending_apply_vdev() must fall back to BRT
+#	   since ddt_addref() returns B_FALSE for pruned entries
+#	5. Write a second copy via dd - same hash, new physical blocks, new
+#	   DDT entries at different DVAs from the BRT-tracked blocks
+#	6. Delete the clone first - must go through BRT, not DDT, even though
+#	   a matching DDT entry now exists for the same hash
+#	7. Delete the dd copy - DDT entries freed normally
+#	8. Delete the original - no DDT entry, no BRT entry, DVA freed
+#	9. Verify reference counts with zdb -b at each step
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+log_assert "Block cloning of dedup blocks with pruned DDT entries uses BRT"
+
+# Flush DDT log every TXG so entries appear in the ZAP immediately,
+# making ddtprune effective and test behavior predictable.
+log_must save_tunable DEDUP_LOG_TXG_MAX
+log_must set_tunable32 DEDUP_LOG_TXG_MAX 1
+log_must save_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
+log_must set_tunable32 DEDUP_LOG_FLUSH_ENTRIES_MIN 100000
+
+function cleanup
+{
+	if poolexists $TESTPOOL ; then
+		destroy_pool $TESTPOOL
+	fi
+	log_must restore_tunable DEDUP_LOG_TXG_MAX
+	log_must restore_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
+}
+
+log_onexit cleanup
+
+log_must zpool create -f -o feature@block_cloning=enabled $TESTPOOL $DISKS
+
+log_must zfs create -o dedup=sha256 -o recordsize=128k $TESTPOOL/$TESTFS
+typeset mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS)
+
+# Write unique data: each block gets a DDT entry with refcnt=1.
+log_must dd if=/dev/urandom of=$mountpoint/file1 bs=128k count=8
+
+sync_pool $TESTPOOL
+
+# Verify DDT has entries before pruning.
+typeset entries=$(zpool status -D $TESTPOOL | \
+    grep "dedup: DDT entries" | awk '{print $4}')
+log_must test "$entries" -eq 8
+
+# Sleep 1s so the DDT entries are at least 1 second old.  ddtprune uses
+# an age-based cutoff and will silently skip entries that are too fresh.
+sleep 1
+
+# Prune all unique (refcnt=1) entries.  The blocks remain on disk and the
+# block pointers in file1 still have the DEDUP bit set, but there is no
+# longer a DDT entry for them.
+log_must zpool ddtprune -p 100 $TESTPOOL
+sync_pool $TESTPOOL
+
+# Confirm the prune actually removed all entries.
+entries=$(zpool status -D $TESTPOOL | \
+    grep "dedup: DDT entries" | awk '{print $4}')
+[[ -z "$entries" || "$entries" -eq 0 ]] || \
+    log_fail "DDT entries not pruned: $entries remain"
+
+# Clone file1.  brt_pending_apply_vdev() will see the DEDUP bit, call
+# ddt_addref(), receive B_FALSE (no DDT entry), and fall through to
+# create BRT entries instead.
+log_must clonefile -f $mountpoint/file1 $mountpoint/clone1
+sync_pool $TESTPOOL
+
+# BRT entries exist; reference counts must be consistent.
+log_must zdb -b $TESTPOOL
+
+# Write a second copy via dd.  Since the DDT was pruned, dedup can't find
+# an existing entry and writes new physical blocks at new DVAs, creating
+# fresh DDT entries with refcnt=1.  The BRT-tracked blocks (file1/clone1)
+# are at the old DVAs and are unaffected.
+log_must dd if=$mountpoint/file1 of=$mountpoint/file2 bs=128k
+sync_pool $TESTPOOL
+
+# Eight new unique DDT entries (file2's blocks); BRT still holds refs for
+# file1/clone1's old blocks.
+typeset entries=$(zpool status -D $TESTPOOL | \
+    grep "dedup: DDT entries" | awk '{print $4}')
+log_must test "$entries" -eq 8
+log_must zdb -b $TESTPOOL
+
+# Delete the clone first.  Its blocks carry the DEDUP bit and the same
+# hash as file2's DDT entries, but the DVAs differ — the free must go
+# through BRT, not DDT, leaving file2's DDT entries intact.
+log_must rm $mountpoint/clone1
+sync_pool $TESTPOOL
+
+entries=$(zpool status -D $TESTPOOL | \
+    grep "dedup: DDT entries" | awk '{print $4}')
+log_must test "$entries" -eq 8
+log_must zdb -b $TESTPOOL
+
+# Delete file2.  DDT entries freed; file1's BRT-tracked blocks unaffected.
+log_must rm $mountpoint/file2
+sync_pool $TESTPOOL
+log_must eval "zdb -D $TESTPOOL | grep -q 'All DDTs are empty'"
+log_must zdb -b $TESTPOOL
+
+# Delete the original.  No DDT entry, no BRT entry; DVA freed directly.
+log_must rm $mountpoint/file1
+sync_pool $TESTPOOL
+log_must zdb -b $TESTPOOL
+
+log_pass "Block cloning of dedup blocks with pruned DDT entries uses BRT"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh
index 1a82e5d30a1..11e2461d936 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh
@@ -44,14 +44,12 @@ function cleanup
 
 log_onexit cleanup
 
-# create a pool with fast dedup enabled. we disable block cloning to ensure
-# it doesn't get in the way of dedup, and we disable compression so our writes
+# create a pool with fast dedup enabled. we disable compression so our writes
 # create predictable results on disk
 # Use 'xattr=sa' to prevent selinux xattrs influencing our accounting
 log_must zpool create -f \
     -o feature@fast_dedup=enabled \
     -O dedup=on \
-    -o feature@block_cloning=disabled \
     -O compression=off \
     -O xattr=sa \
     $TESTPOOL $DISKS
@@ -81,7 +79,7 @@ obj=$(zdb -dddd $TESTPOOL 1 | grep DDT-sha256 | awk '{ print $NF }')
 log_must test $(zdb -dddd $TESTPOOL $obj | grep DDT-sha256-zap- | wc -l) -eq 1
 
 # copy the file
-log_must cp /$TESTPOOL/file1 /$TESTPOOL/file2
+log_must dd if=/$TESTPOOL/file1 of=/$TESTPOOL/file2 bs=128k
 log_must zpool sync
 
 # now four entries in the duplicate table
@@ -104,4 +102,6 @@ log_must eval "zdb -D $TESTPOOL | grep -q 'All DDTs are empty'"
 # logical table now destroyed; containing object destroyed
 log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256 | wc -l) -eq 0
 
+log_must zdb -b $TESTPOOL
+
 log_pass "basic dedup (FDT) operations work"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh
index 5f6eb7c3400..1885daf4489 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh
@@ -44,14 +44,12 @@ function cleanup
 
 log_onexit cleanup
 
-# create a pool with fast dedup enabled. we disable block cloning to ensure
-# it doesn't get in the way of dedup, and we disable compression so our writes
+# create a pool with fast dedup enabled. we disable compression so our writes
 # create predictable results on disk
 # Use 'xattr=sa' to prevent selinux xattrs influencing our accounting
 log_must zpool create -f \
     -o feature@fast_dedup=enabled \
     -O dedup=on \
-    -o feature@block_cloning=disabled \
     -O compression=off \
     -O xattr=sa \
     $TESTPOOL $DISKS
@@ -117,4 +115,6 @@ obj=$(zdb -dddd $TESTPOOL 1 | grep DDT-sha256 | awk '{ print $NF }')
 # with only one ZAP inside
 log_must test $(zdb -dddd $TESTPOOL $obj | grep DDT-sha256-zap- | wc -l) -eq 1
 
+log_must zdb -b $TESTPOOL
+
 log_pass "dedup (FDT) retains version after import"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_pacing.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_pacing.ksh
index 8028e4f0884..2bebed6965f 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_pacing.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_fdt_pacing.ksh
@@ -46,11 +46,9 @@ function cleanup
 
 log_onexit cleanup
 
-# Create a pool with fast dedup enabled. We disable block cloning to ensure
-# it doesn't get in the way of dedup.
+# Create a pool with fast dedup enabled.
 log_must zpool create -f \
     -o feature@fast_dedup=enabled \
-    -o feature@block_cloning=disabled \
     $TESTPOOL $DISKS
 
 # Create a filesystem with a small recordsize so that we get more DDT entries,
@@ -107,4 +105,6 @@ log_entries3=$(get_ddt_log_entries)
 # Verify there are 256 entries in the unique table.
 log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=256'"
 
+log_must zdb -b $TESTPOOL
+
 log_pass "dedup (FDT) paces out log entries appropriately"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh
index 3348614cb74..cc9a8694724 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh
@@ -37,14 +37,12 @@ function cleanup
 
 log_onexit cleanup
 
-# create a pool with legacy dedup enabled. we disable block cloning to ensure
-# it doesn't get in the way of dedup, and we disable compression so our writes
+# create a pool with legacy dedup enabled. we disable compression so our writes
 # create predictable results on disk
 # Use 'xattr=sa' to prevent selinux xattrs influencing our accounting
 log_must zpool create -f \
     -o feature@fast_dedup=disabled \
     -O dedup=on \
-    -o feature@block_cloning=disabled \
     -O compression=off \
     -O xattr=sa \
     $TESTPOOL $DISKS
@@ -70,7 +68,7 @@ log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'"
 log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256-zap- | wc -l) -eq 1
 
 # copy the file
-log_must cp /$TESTPOOL/file1 /$TESTPOOL/file2
+log_must dd if=/$TESTPOOL/file1 of=/$TESTPOOL/file2 bs=128k
 log_must zpool sync
 
 # now four entries in the duplicate table
@@ -93,4 +91,6 @@ log_must eval "zdb -D $TESTPOOL | grep -q 'All DDTs are empty'"
 # logical table now destroyed; all DDT ZAPs removed
 log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256-zap- | wc -l) -eq 0
 
+log_must zdb -b $TESTPOOL
+
 log_pass "basic dedup (legacy) operations work"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh
index c962efaa7c5..03acaf09b39 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh
@@ -45,13 +45,11 @@ function cleanup
 
 log_onexit cleanup
 
-# create a pool with legacy dedup enabled. we disable block cloning to ensure
-# it doesn't get in the way of dedup, and we disable compression so our writes
+# create a pool with legacy dedup enabled. we disable compression so our writes
 # create predictable results on disk
 # Use 'xattr=sa' to prevent selinux xattrs influencing our accounting
 log_must zpool create -f \
     -o feature@fast_dedup=disabled \
-    -o feature@block_cloning=disabled \
     -O compression=off \
     -O xattr=sa \
     $TESTPOOL $DISKS
@@ -102,4 +100,6 @@ log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-blake3 | wc -l) -eq 1
 obj=$(zdb -dddd $TESTPOOL 1 | grep DDT-blake3 | awk '{ print $NF }')
 log_must test $(zdb -dddd $TESTPOOL $obj | grep DDT-.*-zap- | wc -l) -eq 1
 
+log_must zdb -b $TESTPOOL
+
 log_pass "legacy and FDT dedup tables on the same pool can happily coexist"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh
index 94f009fc0d0..2b610af1ebf 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh
@@ -45,14 +45,12 @@ function cleanup
 
 log_onexit cleanup
 
-# create a pool with legacy dedup enabled. we disable block cloning to ensure
-# it doesn't get in the way of dedup, and we disable compression so our writes
+# create a pool with legacy dedup enabled. we disable compression so our writes
 # create predictable results on disk
 # Use 'xattr=sa' to prevent selinux xattrs influencing our accounting
 log_must zpool create -f \
     -o feature@fast_dedup=disabled \
     -O dedup=on \
-    -o feature@block_cloning=disabled \
     -O compression=off \
     -O xattr=sa \
     $TESTPOOL $DISKS
@@ -84,7 +82,7 @@ log_must zpool set feature@fast_dedup=enabled $TESTPOOL
 log_must test $(get_pool_prop feature@fast_dedup $TESTPOOL) = "enabled"
 
 # copy the file
-log_must cp /$TESTPOOL/file1 /$TESTPOOL/file2
+log_must dd if=/$TESTPOOL/file1 of=/$TESTPOOL/file2 bs=128k
 log_must zpool sync
 
 # feature should still be enabled
@@ -127,4 +125,6 @@ obj=$(zdb -dddd $TESTPOOL 1 | grep DDT-sha256 | awk '{ print $NF }')
 # with one ZAP inside
 log_must test $(zdb -dddd $TESTPOOL $obj | grep DDT-sha256-zap- | wc -l) -eq 1
 
+log_must zdb -b $TESTPOOL
+
 log_pass "legacy dedup tables work after upgrade; new dedup tables created as FDT"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh
index 9f6b1ef12a9..c137f7b9499 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh
@@ -37,14 +37,12 @@ function cleanup
 
 log_onexit cleanup
 
-# create a pool with legacy dedup enabled. we disable block cloning to ensure
-# it doesn't get in the way of dedup, and we disable compression so our writes
+# create a pool with legacy dedup enabled. we disable compression so our writes
 # create predictable results on disk
 # Use 'xattr=sa' to prevent selinux xattrs influencing our accounting
 log_must zpool create -f \
     -o feature@fast_dedup=disabled \
     -O dedup=on \
-    -o feature@block_cloning=disabled \
     -O compression=off \
     -O xattr=sa \
     $TESTPOOL $DISKS
@@ -102,4 +100,6 @@ log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'"
 # should be just one DDT ZAP in the MOS
 log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256-zap- | wc -l) -eq 1
 
+log_must zdb -b $TESTPOOL
+
 log_pass "dedup (legacy) retains version after import"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_prune.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_prune.ksh
index 6b4937cc4a2..d80fbe9795d 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_prune.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_prune.ksh
@@ -69,12 +69,12 @@ function ddt_entries
 
 log_onexit cleanup
 
-log_must zpool create -f -o feature@block_cloning=disabled $TESTPOOL $DISKS
+log_must zpool create -f $TESTPOOL $DISKS
 
 log_must zfs create -o recordsize=512 -o dedup=on $TESTPOOL/$TESTFS
 typeset mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS)
 log_must dd if=/dev/urandom of=$mountpoint/f1 bs=512k count=1
-log_must cp $mountpoint/f1 $mountpoint/f2
+log_must dd if=$mountpoint/f1 of=$mountpoint/f2 bs=512k
 sync_pool $TESTPOOL
 entries=$(ddt_entries)
 log_note "ddt entries before: $entries"
@@ -95,5 +95,6 @@ new_entries=$(ddt_entries)
 [[ "$((entries / 4))" -eq "$new_entries" ]] || \
 	log_fail "DDT entries did not shrink enough: $entries -> $new_entries"
 
+log_must zdb -b $TESTPOOL
 
 log_pass "DDT pruning correctly removes non-duplicate entries"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_zap_shrink.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_zap_shrink.ksh
index 597bad253ec..41586204333 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_zap_shrink.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_zap_shrink.ksh
@@ -83,4 +83,6 @@ log_must zpool import $TESTPOOL
 nleafs=$(zdb -dddd $TESTPOOL "$zap_obj" | grep "Leaf blocks:" | awk -F\: '{print($2);}')
 log_must test $nleafs -lt $nleafs_old
 
+log_must zdb -b $TESTPOOL
+
 log_pass "ZAP object shrank after removing entries."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/devices/devices_common.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/devices/devices_common.kshlib
index 8024067ac9e..3298b49fec7 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/devices/devices_common.kshlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/devices/devices_common.kshlib
@@ -54,9 +54,9 @@ function create_dev_file
 			# %t - major device type in hex
 			# %T - minor device type in hex
 			#
-			major=$(stat --dereference --format="%t" "$devstr")
-			minor=$(stat --dereference --format="%T" "$devstr")
-			log_must mknod $filename b "0x${major}" "0x${minor}"
+			major=$(printf '%d' 0x$(stat -L -c "%t" "$devstr"))
+			minor=$(printf '%d' 0x$(stat -L -c "%T" "$devstr"))
+			log_must mknod $filename b "${major}" "${minor}"
 			;;
 		*)
 			#
@@ -83,9 +83,9 @@ function create_dev_file
 			# %t - major device type in hex
 			# %T - minor device type in hex
 			#
-			major=$(stat --format="%t" /dev/null)
-			minor=$(stat --format="%T" /dev/null)
-			log_must mknod $filename c "0x${major}" "0x${minor}"
+			major=$(printf '%d' 0x$(stat -c "%t" /dev/null))
+			minor=$(printf '%d' 0x$(stat -c "%T" /dev/null))
+			log_must mknod $filename c "${major}" "${minor}"
 			;;
 		FreeBSD)
 			#
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh
index 023f5b58a6e..529a6a8c3fe 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_spare_multiple.ksh
@@ -84,8 +84,8 @@ for type in "mirror" "raidz" "raidz2" "raidz3" "draid2:1s"; do
 		log_must zpool create -f $TESTPOOL $type $SAFE_DEV1 \
 		    $SAFE_DEV2 $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV3 $SAFE_DEV4 \
 		    spare $SPARE_DEV1
-		SPARE1=$SPARE_DEV1
-		SPARE2="draid2-0-0"
+		SPARE1="draid2-0-0"
+		SPARE2=$SPARE_DEV1
 	elif [ "$type" = "mirror" ]; then
 		# 1. Create a 3-way mirror pool with two hot spares
 		truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS
@@ -167,8 +167,8 @@ for type in "mirror" "raidz2" "raidz3" "draid2:1s"; do
 		log_must zpool create -f $TESTPOOL $type $SAFE_DEV1 \
 		    $SAFE_DEV2 $FAULT_DEV1 $FAULT_DEV2 $SAFE_DEV3 $SAFE_DEV4 \
 		    spare $SPARE_DEV1
-		SPARE1=$SPARE_DEV1
-		SPARE2="draid2-0-0"
+		SPARE1="draid2-0-0"
+		SPARE2=$SPARE_DEV1
 	elif [ "$type" = "mirror" ]; then
 		# 1. Create a 3-way mirror pool with two hot spares
 		truncate -s $MINVDEVSIZE $DATA_DEVS $SPARE_DEVS
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_spare_rotational.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_spare_rotational.ksh
new file mode 100755
index 00000000000..5378979a8bb
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/auto_spare_rotational.ksh
@@ -0,0 +1,84 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2026, TrueNAS.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION:
+# ZED prefers the smallest sufficient spare when replacing a faulted
+# special vdev, regardless of spare list order.
+#
+# The 'rotational' property is persisted in the pool config for all leaf
+# vdevs so that spare selection can match device type even after the
+# original disk is gone.  ZED sorts spares preferring matching rotational
+# and, among equally-matching spares, the smallest sufficient one.
+#
+# STRATEGY:
+# 1. Create a pool with a normal mirror, a special mirror, and two file
+#    spares of different sizes.  List the larger spare first so that the
+#    sorted order contradicts the list order.
+# 2. Fault a member of the special mirror; verify ZED activates the
+#    smaller sufficient spare, leaving the larger spare available.
+#
+
+verify_runnable "both"
+
+NORM1="$TEST_BASE_DIR/rotational-norm1"
+NORM2="$TEST_BASE_DIR/rotational-norm2"
+SPEC1="$TEST_BASE_DIR/rotational-spec1"
+SPEC2="$TEST_BASE_DIR/rotational-spec2"
+SPARE_SMALL="$TEST_BASE_DIR/rotational-spare-small"
+SPARE_LARGE="$TEST_BASE_DIR/rotational-spare-large"
+
+LARGE_SIZE=$((MINVDEVSIZE * 2))
+
+function cleanup
+{
+	log_must zinject -c all
+	destroy_pool $TESTPOOL
+	rm -f $NORM1 $NORM2 $SPEC1 $SPEC2 $SPARE_SMALL $SPARE_LARGE
+}
+
+log_assert "ZED selects smallest sufficient spare for a faulted special vdev"
+log_onexit cleanup
+
+zed_events_drain
+
+log_must truncate -s $MINVDEVSIZE $NORM1 $NORM2 $SPEC1 $SPEC2 $SPARE_SMALL
+log_must truncate -s $LARGE_SIZE $SPARE_LARGE
+
+# SPARE_LARGE is listed first so that size-preference sorting is what
+# causes SPARE_SMALL to be selected, not merely list order.
+log_must zpool create -f $TESTPOOL \
+    mirror $NORM1 $NORM2 \
+    special mirror $SPEC1 $SPEC2 \
+    spare $SPARE_LARGE $SPARE_SMALL
+
+log_must zinject -d $SPEC1 -e io -T all -f 100 $TESTPOOL
+log_must zpool scrub $TESTPOOL
+
+log_note "Wait for ZED to auto-spare the special vdev"
+log_must wait_vdev_state $TESTPOOL $SPEC1 "FAULTED" 60
+log_must wait_hotspare_state $TESTPOOL $SPARE_SMALL "INUSE"
+
+# The larger spare must not have been activated.
+log_must wait_hotspare_state $TESTPOOL $SPARE_LARGE "AVAIL"
+
+log_must check_state $TESTPOOL "" "DEGRADED"
+
+log_pass "ZED activated the smallest sufficient spare for the special vdev"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib
index 2b5a28b0620..ae8a4b2a648 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib
@@ -435,32 +435,38 @@ function verify_draid_pool
 
 	log_note "verify_draid_pool $pool $replace_mode"
 	log_must zpool scrub -w $pool
+	sync_pool $pool true
 
-	typeset -i cksum=$(cksum_pool $pool)
+	typeset status=$(zpool status -p $pool)
+	typeset -i cksum=$(echo "$status" | awk '
+	    !NF { isvdev = 0 }
+	    isvdev { errors += $NF }
+	    /CKSUM$/ { isvdev = 1 }
+	    END { print errors }')
 
 	if [[ "$replace_mode" = "healing" ]]; then
 		if [[ $cksum -gt 0 ]]; then
-			log_must zpool status -v $pool
+			log_note "$status"
 			log_fail "Unexpected CKSUM errors found for $pool ($cksum)"
 		fi
 
 		if ! check_pool_status $pool "scan" "repaired 0B"; then
-			log_must zpool status -v $pool
+			log_note "$status"
 			log_fail "Unexpected repair IO found for $pool ($cksum)"
 		fi
 	elif [[ "$replace_mode" = "sequential" ]]; then
 		if [[ $cksum -gt 0 ]]; then
-			log_must zpool status -v $pool
+			log_note "$status"
 			log_fail "Unexpected CKSUM errors found for $pool ($cksum)"
 		fi
 	elif [[ "$replace_mode" = "damaged" ]]; then
 		if [[ $cksum -lt 1 ]]; then
-			log_must zpool status -v $pool
+			log_note "$status"
 			log_fail "Expected CKSUM errors missing for $pool ($cksum)"
 		fi
 
 		if check_pool_status $pool "scan" "repaired 0B"; then
-			log_must zpool status -v $pool
+			log_note "$status"
 			log_fail "Expected repair IO missing for $pool ($cksum)"
 		fi
 	else
@@ -468,12 +474,12 @@ function verify_draid_pool
 	fi
 
 	if ! check_pool_status $pool "scan" "with 0 errors"; then
-		log_must zpool status -v $pool
+		log_note "$status"
 		log_fail "Unexpected repair errors found for $pool"
 	fi
 
 	if ! check_pool_status $pool "errors" "No known data errors"; then
-		log_must zpool status -v $pool
+		log_note "$status"
 		log_fail "Unexpected data errors found for $pool"
 	fi
 }
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/removal/removal_with_export.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/removal/removal_with_export.ksh
index b94841aed15..cdd2d201e1a 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/removal/removal_with_export.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/removal/removal_with_export.ksh
@@ -29,7 +29,7 @@ log_onexit default_cleanup_noexit
 function callback
 {
 	test_removal_with_operation_kill
-	log_must zpool export $TESTPOOL
+	log_must_busy zpool export $TESTPOOL
 
 	#
 	# We are concurrently starting dd processes that will
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/cleanup.ksh
new file mode 100755
index 00000000000..8261885e651
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/cleanup.ksh
@@ -0,0 +1,27 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+destroy_pool $POOL
+destroy_pool $POOL2
+
+log_pass
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/send_xdr_encoding.cfg b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/send_xdr_encoding.cfg
new file mode 100644
index 00000000000..e4999a3ca29
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/send_xdr_encoding.cfg
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+read -r DISK1 DISK2 _ <<<"$DISKS"
+export DISK1 DISK2
+
+export POOL=$TESTPOOL
+export POOL2=$TESTPOOL2
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
new file mode 100644
index 00000000000..8e36b748439
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
@@ -0,0 +1,71 @@
+#!/bin/ksh
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.cfg
+
+#
+# Verify that the DRR_BEGIN records in the given send stream encode their
+# nvlist payloads with NV_ENCODE_XDR (and not NV_ENCODE_NATIVE).
+#
+# DRR_BEGIN records that carry an nvlist payload (raw sends, redacted sends,
+# resumed sends, and combinations thereof) must encode that payload with
+# NV_ENCODE_XDR so the resulting stream can be portably consumed across
+# endianness. Encoding the payload with NV_ENCODE_NATIVE produces a stream
+# that is unreadable on a receiver of the opposite endianness.
+#
+# zstream dump prints a single "nvlist encoding = ..." line per DRR_BEGIN
+# record that carries an nvlist payload. The possible values are:
+#
+#     NV_ENCODE_XDR
+#     NV_ENCODE_NATIVE (big-endian)
+#     NV_ENCODE_NATIVE (little-endian)
+#
+# Every test in this suite generates a stream whose DRR_BEGIN record
+# carries an nvlist payload, so the pass criterion is:
+#
+#   - At least one NV_ENCODE_XDR line appears, AND
+#   - No NV_ENCODE_NATIVE line appears.
+#
+# Requiring at least one XDR line catches the case where zstream dump
+# itself fails before producing any encoding output. Asserting on dump
+# content rather than dump exit status means a partial dump can still
+# fail the test on an NV_ENCODE_NATIVE seen before the failure point.
+#
+function verify_xdr_nvlist_encoding
+{
+	typeset stream=$1
+	typeset out
+
+	[[ -f "$stream" ]] || \
+	    log_fail "verify_xdr_nvlist_encoding: stream not found: $stream"
+
+	out=$(zstream dump "$stream" 2>/dev/null)
+
+	if echo "$out" | grep -q 'NV_ENCODE_NATIVE'; then
+		log_fail "verify_xdr_nvlist_encoding: " \
+		    "NV_ENCODE_NATIVE found in $stream"
+	fi
+	if ! echo "$out" | grep -q 'NV_ENCODE_XDR'; then
+		log_fail "verify_xdr_nvlist_encoding: " \
+		    "no NV_ENCODE_XDR found in $stream"
+	fi
+}
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/setup.ksh
new file mode 100755
index 00000000000..609acba3a22
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/setup.ksh
@@ -0,0 +1,29 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+verify_disk_count "$DISKS" 2
+
+create_pool $POOL $DISK1
+create_pool $POOL2 $DISK2
+
+log_pass
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_bookmark_raw.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_bookmark_raw.ksh
new file mode 100755
index 00000000000..9ba10d9e605
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_bookmark_raw.ksh
@@ -0,0 +1,93 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# A raw incremental send from a redaction bookmark on an encrypted dataset
+# (zfs send -w -i ds#book ds@snap) carries both BEGINNV_REDACT_FROM_SNAPS
+# and crypt_keydata in its DRR_BEGIN nvlist payload. Verify that this
+# combined payload is XDR-encoded and the stream can be received.
+#
+# Strategy:
+# 1. Create an encrypted source dataset with a redaction bookmark and a
+#    later snapshot.
+# 2. Establish a raw base on the receiver via zfs send -w of the bookmark's
+#    source snapshot.
+# 3. zfs send -w -i sendfs#book sendfs@s1 to a file.
+# 4. Verify that the resulting stream is XDR-encoded.
+# 5. Verify that the zfs receive succeeds.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_bookmark_raw_src"
+clonefs="$POOL/xdr_bookmark_raw_clone"
+recvfs="$POOL2/xdr_bookmark_raw_recv"
+keyfile="/$POOL/xdr_bookmark_raw.key"
+full_stream="/$POOL/xdr_bookmark_raw_full.zsend"
+incr_stream="/$POOL/xdr_bookmark_raw_incr.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -R
+	datasetexists $recvfs && destroy_dataset $recvfs -R
+	rm -f $keyfile $full_stream $incr_stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of a raw incremental from a redaction bookmark is " \
+    "XDR-encoded and receivable"
+
+log_must eval "echo 'thisisapassphrase' > $keyfile"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+    -o keylocation=file://$keyfile $sendfs
+
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=8 status=none
+log_must dd if=/dev/urandom of=/$sendfs/f2 bs=128k count=8 status=none
+log_must zfs snapshot $sendfs@s0
+
+# The clone inherits encryption from $sendfs.
+log_must zfs clone $sendfs@s0 $clonefs
+log_must dd if=/dev/urandom of=/$clonefs/f1 bs=128k count=8 conv=notrunc \
+    status=none
+log_must zfs snapshot $clonefs@s
+
+log_must zfs redact $sendfs@s0 redaction-bookmark $clonefs@s
+
+# Take @s1 with no intervening writes. See xdr_bookmark_raw_with_write.ksh
+# for a variant that includes a post-redact write; that variant exercises
+# a known kernel-side issue (#18491) and may flake.
+log_must zfs snapshot $sendfs@s1
+
+# Establish a raw base on the receiver.
+log_must eval "zfs send -w $sendfs@s0 > $full_stream"
+log_must eval "zfs receive $recvfs < $full_stream"
+
+# Raw incremental from the redaction bookmark. This is the test focus.
+log_must eval "zfs send -w -i $sendfs#redaction-bookmark $sendfs@s1 > \
+    $incr_stream"
+verify_xdr_nvlist_encoding $incr_stream
+log_must eval "zfs receive $recvfs < $incr_stream"
+
+log_pass "BEGIN nvlist of a raw incremental from a redaction bookmark is " \
+    "XDR-encoded and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_bookmark_raw_with_write.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_bookmark_raw_with_write.ksh
new file mode 100755
index 00000000000..c58735f04d4
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_bookmark_raw_with_write.ksh
@@ -0,0 +1,107 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# This is the post-redact-write variant of xdr_bookmark_raw, separated out
+# because of a known issue (#18491) that causes it to fail roughly 30% of
+# the time. It's included here as a test for issue #18491 until the exact
+# source of that problem can be pinned down more specifically.
+#
+# Known issue: openzfs/zfs#18491
+#
+# On a freshly-created pool, `zfs send -w -i ds#book ds@snap` intermittently
+# fails with EACCES whenever there is data-modifying activity between the
+# `zfs redact` that created the bookmark and the subsequent send. This EACCES
+# is surfaced to userspace as the misleading message "dataset key must be
+# loaded," although the key remains loaded throughout.
+#
+# The reproducer script included in the issue report typically triggers the
+# problem within about 10 iterations on a fresh pool. Disk-sync mitigations
+# (zpool sync, with or without `-f`, with or without sleep, single or doubled,
+# applied at any reasonable point) do not avert the problem. CI runs that
+# include the test in this file reproduce the failure regularly (though
+# intermittently) across multiple distributions. xdr_resume_bookmark_raw.ksh
+# removes the post-redact write (which is not essential to the test) and
+# therefore runs reliably.
+#
+# When this test fails, the failure marker is the libzfs warning
+# "dataset key must be loaded" on stderr from the first `zfs send -w -i`
+# line below (the one that produces the stream we then truncate), and a
+# non-zero exit from that send. The test does not attempt to distinguish
+# the known-issue failure from other possible failures.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_bookmark_raw_with_write_src"
+clonefs="$POOL/xdr_bookmark_raw_with_write_clone"
+recvfs="$POOL2/xdr_bookmark_raw_with_write_recv"
+keyfile="/$POOL/xdr_bookmark_raw_with_write.key"
+full_stream="/$POOL/xdr_bookmark_raw_with_write_full.zsend"
+incr_stream="/$POOL/xdr_bookmark_raw_with_write_incr.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -R
+	datasetexists $recvfs && destroy_dataset $recvfs -R
+	rm -f $keyfile $full_stream $incr_stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of a raw incremental from a redaction bookmark, " \
+    "with a post-redact write, is XDR-encoded and receivable " \
+    "(known to flake; see openzfs/zfs#18491)"
+
+log_must eval "echo 'thisisapassphrase' > $keyfile"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+    -o keylocation=file://$keyfile $sendfs
+
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=8 status=none
+log_must zfs snapshot $sendfs@s0
+
+# The clone inherits encryption from $sendfs.
+log_must zfs clone $sendfs@s0 $clonefs
+log_must dd if=/dev/urandom of=/$clonefs/f1 bs=128k count=8 conv=notrunc \
+    status=none
+log_must zfs snapshot $clonefs@s
+
+log_must zfs redact $sendfs@s0 redaction-bookmark $clonefs@s
+
+# Post-redact write: the trigger for openzfs/zfs#18491.
+log_must dd if=/dev/urandom of=/$sendfs/f3 bs=128k count=8 status=none
+log_must zfs snapshot $sendfs@s1
+
+# Establish a raw base on the receiver.
+log_must eval "zfs send -w $sendfs@s0 > $full_stream"
+log_must eval "zfs receive $recvfs < $full_stream"
+
+# The next line is what races. On failure it exits with EACCES rendered
+# as "dataset key must be loaded".
+log_must eval "zfs send -w -i $sendfs#redaction-bookmark $sendfs@s1 > \
+    $incr_stream"
+verify_xdr_nvlist_encoding $incr_stream
+log_must eval "zfs receive $recvfs < $incr_stream"
+
+log_pass "BEGIN nvlist of a raw incremental from a redaction bookmark, " \
+    "with a post-redact write, is XDR-encoded and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_incr_from_bookmark.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_incr_from_bookmark.ksh
new file mode 100755
index 00000000000..ab04f6aa603
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_incr_from_bookmark.ksh
@@ -0,0 +1,88 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# An incremental send from a redaction bookmark (zfs send -i ds#book ds@snap)
+# carries BEGINNV_REDACT_FROM_SNAPS in its DRR_BEGIN nvlist payload (via the
+# from_rl path). Verify that this payload is XDR-encoded and the stream can
+# be received.
+#
+# Strategy:
+# 1. Create a source dataset with a redaction bookmark.
+# 2. Send a redacted full stream from that bookmark's source snapshot
+#    and receive it into a second pool as a base.
+# 3. Add data and a new snapshot on the source.
+# 4. zfs send -i sendfs#redaction-bookmark sendfs@snap to a file.
+# 5. Verify XDR encoding in the resulting stream.
+# 6. Verify that zfs receive of the stream succeeds.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_incr_from_bookmark_src"
+clonefs="$POOL/xdr_incr_from_bookmark_clone"
+recvfs="$POOL2/xdr_incr_from_bookmark_recv"
+full_stream="/$POOL/xdr_incr_from_bookmark_full.zsend"
+incr_stream="/$POOL/xdr_incr_from_bookmark_incr.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -R
+	datasetexists $recvfs && destroy_dataset $recvfs -R
+	rm -f $full_stream $incr_stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of an incremental send from a redaction bookmark " \
+    "is XDR-encoded and receivable"
+
+log_must zfs create $sendfs
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=8 status=none
+log_must dd if=/dev/urandom of=/$sendfs/f2 bs=128k count=8 status=none
+log_must zfs snapshot $sendfs@s0
+
+log_must zfs clone $sendfs@s0 $clonefs
+log_must dd if=/dev/urandom of=/$clonefs/f1 bs=128k count=8 conv=notrunc \
+    status=none
+log_must zfs snapshot $clonefs@s
+
+log_must zfs redact $sendfs@s0 redaction-bookmark $clonefs@s
+
+# Establish a base on the receiver.
+log_must eval "zfs send --redact redaction-bookmark $sendfs@s0 > $full_stream"
+log_must eval "zfs receive $recvfs < $full_stream"
+
+# Add a new snapshot on the source for the incremental.
+log_must dd if=/dev/urandom of=/$sendfs/f3 bs=128k count=8 status=none
+log_must zfs snapshot $sendfs@s1
+
+# Generate an incremental send from the redaction bookmark. This fires
+# BEGINNV_REDACT_FROM_SNAPS via the from_rl path because the from-side
+# is a redaction bookmark.
+log_must eval "zfs send -i $sendfs#redaction-bookmark $sendfs@s1 > $incr_stream"
+verify_xdr_nvlist_encoding $incr_stream
+log_must eval "zfs receive $recvfs < $incr_stream"
+
+log_pass "BEGIN nvlist of an incremental send from a redaction bookmark " \
+    "is XDR-encoded and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_incr_from_redacted.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_incr_from_redacted.ksh
new file mode 100755
index 00000000000..fc4d34c4346
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_incr_from_redacted.ksh
@@ -0,0 +1,96 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# An incremental send whose from-side is a snapshot of a previously-redacted
+# dataset carries BEGINNV_REDACT_FROM_SNAPS in its DRR_BEGIN nvlist payload
+# via a different code path than incrementals from a redaction bookmark
+# (the dspp->numfromredactsnaps path). Verify that this payload is
+# XDR-encoded and that the stream can be received.
+#
+# Strategy:
+# 1. Produce a redacted dataset on a receiver via a redacted full send,
+#    leaving the receiver with a snapshot whose from-side will carry the
+#    SPA_FEATURE_REDACTED_DATASETS feature.
+# 2. Establish the same base on a tertiary destination so we have somewhere
+#    to apply the incremental.
+# 3. Create a new snapshot of the receiver-side redacted dataset.
+# 4. zfs send -i mid@s0 mid@s1 to a file.
+# 5. Verify that the stream is XDR encoded.
+# 6. Verify that we can zfs receive the incremental onto the tertiary base.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_incr_from_redacted_src"
+clonefs="$POOL/xdr_incr_from_redacted_clone"
+midfs="$POOL2/xdr_incr_from_redacted_mid"
+tertiary="$POOL/xdr_incr_from_redacted_tertiary"
+full_stream="/$POOL/xdr_incr_from_redacted_full.zsend"
+incr_stream="/$POOL/xdr_incr_from_redacted_incr.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -R
+	datasetexists $midfs && destroy_dataset $midfs -R
+	datasetexists $tertiary && destroy_dataset $tertiary -R
+	rm -f $full_stream $incr_stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of an incremental from a previously-redacted " \
+    "snapshot is XDR-encoded and receivable"
+
+log_must zfs create $sendfs
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=8 status=none
+log_must dd if=/dev/urandom of=/$sendfs/f2 bs=128k count=8 status=none
+log_must zfs snapshot $sendfs@s0
+
+log_must zfs clone $sendfs@s0 $clonefs
+log_must dd if=/dev/urandom of=/$clonefs/f1 bs=128k count=8 conv=notrunc \
+    status=none
+log_must zfs snapshot $clonefs@s
+
+log_must zfs redact $sendfs@s0 redaction-bookmark $clonefs@s
+
+# Produce two receivers of the redacted full send: one we will re-send from
+# (mid) and one we will receive the incremental into (tertiary).
+log_must eval "zfs send --redact redaction-bookmark $sendfs@s0 > $full_stream"
+log_must eval "zfs receive $midfs < $full_stream"
+log_must eval "zfs receive $tertiary < $full_stream"
+
+# Create a fresh snapshot of the redacted receiver. The data has not changed
+# (and cannot be modified without mounting), but the snapshot itself is
+# enough to drive an incremental send and trigger the case-4 nvlist path.
+log_must zfs snapshot $midfs@s1
+
+# Create an incremental send from the redacted from-side. This fires
+# BEGINNV_REDACT_FROM_SNAPS via the dspp->numfromredactsnaps path because
+# $midfs@s0 has the SPA_FEATURE_REDACTED_DATASETS feature active.
+log_must eval "zfs send -i $midfs@s0 $midfs@s1 > $incr_stream"
+verify_xdr_nvlist_encoding $incr_stream
+log_must eval "zfs receive $tertiary < $incr_stream"
+
+log_pass "BEGIN nvlist of an incremental from a previously-redacted snapshot " \
+    "is XDR-encoded and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_raw.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_raw.ksh
new file mode 100755
index 00000000000..c3a196650c6
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_raw.ksh
@@ -0,0 +1,67 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# A raw send of an encrypted dataset (zfs send -w) carries a "crypt_keydata"
+# nested nvlist in its DRR_BEGIN nvlist payload. Verify that this payload is
+# XDR-encoded and that the stream can be received.
+#
+# Strategy:
+# 1. Create an encrypted dataset with one snapshot.
+# 2. zfs send -w to a file.
+# 3. Verify that the stream is XDR-encoded.
+# 4. Verify that zfs receive succeeds.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_raw_src"
+recvfs="$POOL2/xdr_raw_recv"
+keyfile="/$POOL/xdr_raw.key"
+stream="/$POOL/xdr_raw.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -r
+	datasetexists $recvfs && destroy_dataset $recvfs -r
+	rm -f $keyfile $stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of a raw send of an encrypted dataset is " \
+    "XDR-encoded and receivable"
+
+log_must eval "echo 'thisisapassphrase' > $keyfile"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+    -o keylocation=file://$keyfile $sendfs
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=8 status=none
+log_must zfs snapshot $sendfs@s1
+
+log_must eval "zfs send -w $sendfs@s1 > $stream"
+
+verify_xdr_nvlist_encoding $stream
+log_must eval "zfs receive $recvfs < $stream"
+
+log_pass "BEGIN nvlist of a raw send of an encrypted dataset is " \
+    "XDR-encoded and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_redacted_full.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_redacted_full.ksh
new file mode 100755
index 00000000000..2bad9bebdaa
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_redacted_full.ksh
@@ -0,0 +1,72 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# A redacted send (zfs send --redact <bookmark>) carries BEGINNV_REDACT_SNAPS
+# in its DRR_BEGIN nvlist payload. Verify that this payload is XDR-encoded and
+# the stream can be received.
+#
+# Strategy:
+# 1. Create a source dataset and a divergent clone.
+# 2. Create a redaction bookmark on the source snapshot relative to the
+#    clone snapshot.
+# 3. zfs send --redact <bookmark> sendfs@snap to a file.
+# 4. verify_xdr_nvlist_encoding on the stream.
+# 5. Verify that zfs receive succeeds.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_redacted_full_src"
+clonefs="$POOL/xdr_redacted_full_clone"
+recvfs="$POOL2/xdr_redacted_full_recv"
+stream="/$POOL/xdr_redacted_full.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -R
+	datasetexists $recvfs && destroy_dataset $recvfs -R
+	rm -f $stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of a redacted send is XDR-encoded and receivable"
+
+log_must zfs create $sendfs
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=8 status=none
+log_must dd if=/dev/urandom of=/$sendfs/f2 bs=128k count=8 status=none
+log_must zfs snapshot $sendfs@s0
+
+log_must zfs clone $sendfs@s0 $clonefs
+log_must dd if=/dev/urandom of=/$clonefs/f1 bs=128k count=8 conv=notrunc \
+    status=none
+log_must zfs snapshot $clonefs@s
+
+log_must zfs redact $sendfs@s0 redaction-bookmark $clonefs@s
+
+log_must eval "zfs send --redact redaction-bookmark $sendfs@s0 > $stream"
+verify_xdr_nvlist_encoding $stream
+log_must eval "zfs receive $recvfs < $stream"
+
+log_pass "BEGIN nvlist of a redacted send is XDR-encoded and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_redacted_received.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_redacted_received.ksh
new file mode 100755
index 00000000000..a18b1f40594
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_redacted_received.ksh
@@ -0,0 +1,84 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# Sending a snapshot from a previously-redacted dataset (one with the
+# SPA_FEATURE_REDACTED_DATASETS feature active, e.g., one that was received
+# from a redacted send) carries BEGINNV_REDACT_SNAPS in its DRR_BEGIN
+# nvlist payload via a different code path than the producer-side --redact
+# flag. Verify that this payload is XDR-encoded and that the stream can be
+# received.
+#
+# Strategy:
+# 1. Produce a redacted dataset on a receiver via a redacted full send.
+# 2. zfs send the received-redacted snapshot to a new dataset.
+# 3. Verify XDR encoding on the new stream.
+# 4. Verify that a zfs receive of the new stream succeeds.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_redacted_received_src"
+clonefs="$POOL/xdr_redacted_received_clone"
+midfs="$POOL2/xdr_redacted_received_mid"
+recvfs="$POOL2/xdr_redacted_received_recv"
+full_stream="/$POOL/xdr_redacted_received_full.zsend"
+resend_stream="/$POOL/xdr_redacted_received_resend.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -R
+	datasetexists $midfs && destroy_dataset $midfs -R
+	datasetexists $recvfs && destroy_dataset $recvfs -R
+	rm -f $full_stream $resend_stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of a send from a previously-redacted dataset is " \
+    "XDR-encoded and receivable"
+
+log_must zfs create $sendfs
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=8 status=none
+log_must dd if=/dev/urandom of=/$sendfs/f2 bs=128k count=8 status=none
+log_must zfs snapshot $sendfs@s0
+
+log_must zfs clone $sendfs@s0 $clonefs
+log_must dd if=/dev/urandom of=/$clonefs/f1 bs=128k count=8 conv=notrunc \
+    status=none
+log_must zfs snapshot $clonefs@s
+
+log_must zfs redact $sendfs@s0 redaction-bookmark $clonefs@s
+
+# Produce a previously-redacted dataset on the receiver.
+log_must eval "zfs send --redact redaction-bookmark $sendfs@s0 > $full_stream"
+log_must eval "zfs receive $midfs < $full_stream"
+
+# Send the received-redacted snapshot. This fires BEGINNV_REDACT_SNAPS via
+# the SPA_FEATURE_REDACTED_DATASETS code path on to_ds.
+log_must eval "zfs send $midfs@s0 > $resend_stream"
+verify_xdr_nvlist_encoding $resend_stream
+log_must eval "zfs receive $recvfs < $resend_stream"
+
+log_pass "BEGIN nvlist of a send from a previously-redacted dataset is " \
+    "XDR-encoded and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_redacted_received_raw.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_redacted_received_raw.ksh
new file mode 100755
index 00000000000..2efcba32b9f
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_redacted_received_raw.ksh
@@ -0,0 +1,97 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# zfs send explicitly disallows the source-side combination of -w and
+# --redact. However, the same nvlist combination (BEGINNV_REDACT_SNAPS
+# together with crypt_keydata) can still be reached by:
+#
+#   1. Sending a redacted (non-raw) stream from an unencrypted source.
+#   2. Receiving it with receiver-side encryption.
+#   3. Re-sending the now-encrypted-and-redacted dataset with -w.
+#
+# The final stream's DRR_BEGIN nvlist contains both the redact-snaps array
+# (via the SPA_FEATURE_REDACTED_DATASETS code path on to_ds) and
+# crypt_keydata (via DMU_BACKUP_FEATURE_RAW). Verify that this combined
+# payload is XDR-encoded and that the stream can be received.
+#
+# Strategy:
+# 1. Create an unencrypted source dataset with a redaction bookmark.
+# 2. zfs send --redact <book> sendfs@snap to a file (no -w).
+# 3. zfs receive into a new dataset with -o encryption=on (receiver-side
+#    encryption).
+# 4. zfs send -w the received dataset to a second stream file.
+# 5. Verify that this second stream is XDR-encoded.
+# 6. Verify that the second stream can be zfs received successfully.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_redacted_received_raw_src"
+clonefs="$POOL/xdr_redacted_received_raw_clone"
+midfs="$POOL2/xdr_redacted_received_raw_mid"
+recvfs="$POOL2/xdr_redacted_received_raw_recv"
+keyfile="/$POOL/xdr_redacted_received_raw.key"
+full_stream="/$POOL/xdr_redacted_received_raw_full.zsend"
+resend_stream="/$POOL/xdr_redacted_received_raw_resend.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -R
+	datasetexists $midfs && destroy_dataset $midfs -R
+	datasetexists $recvfs && destroy_dataset $recvfs -R
+	rm -f $keyfile $full_stream $resend_stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of a raw send of a received-redacted dataset is " \
+    "XDR-encoded and receivable"
+
+log_must zfs create $sendfs
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=8 status=none
+log_must dd if=/dev/urandom of=/$sendfs/f2 bs=128k count=8 status=none
+log_must zfs snapshot $sendfs@s0
+
+log_must zfs clone $sendfs@s0 $clonefs
+log_must dd if=/dev/urandom of=/$clonefs/f1 bs=128k count=8 conv=notrunc \
+    status=none
+log_must zfs snapshot $clonefs@s
+
+log_must zfs redact $sendfs@s0 redaction-bookmark $clonefs@s
+
+# Redacted send (non-raw) into a receiver that establishes its own encryption.
+log_must eval "zfs send --redact redaction-bookmark $sendfs@s0 > $full_stream"
+log_must eval "echo 'thisisapassphrase' > $keyfile"
+log_must eval "zfs receive -o encryption=on -o keyformat=passphrase " \
+    "-o keylocation=file://$keyfile $midfs < $full_stream"
+
+# Re-send the received stream as a raw (encrypted) stream. The DRR_BEGIN
+# nvlist now carries both BEGINNV_REDACT_SNAPS data and crypt_keydata
+# (DMU_BACKUP_FEATURE_RAW).
+log_must eval "zfs send -w $midfs@s0 > $resend_stream"
+verify_xdr_nvlist_encoding $resend_stream
+log_must eval "zfs receive $recvfs < $resend_stream"
+
+log_pass "BEGIN nvlist of a raw send of a received-redacted dataset is " \
+    "XDR-encoded and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_replication.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_replication.ksh
new file mode 100755
index 00000000000..22d0bf20410
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_replication.ksh
@@ -0,0 +1,90 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# A replication send (zfs send -R) may emit two distinct categories of
+# DRR_BEGIN record:
+#
+#   1. A wrapper BEGIN of type DMU_COMPOUNDSTREAM, generated in libzfs
+#      (lib/libzfs/libzfs_sendrecv.c), whose nvlist describes the package
+#      stream. This BEGIN has always been XDR-encoded and is not affected
+#      by the kernel-side encoding changes introduced in PR #18372.
+#
+#   2. One inner BEGIN record per dataset whose contents are included, 
+#      generated in the kernel (module/zfs/dmu_send.c). These are the BEGIN
+#      records whose encoding the kernel-side change consolidates to XDR.
+#
+# All other tests in this suite exercise category (2). This test exercises
+# both categories together: it verifies that no BEGIN record produced
+# anywhere on the userspace+kernel send path is encoded with NV_ENCODE_NATIVE,
+# so a future regression in either layer would be caught.
+#
+# Strategy:
+# 1. Create an unencrypted parent dataset and an encrypted child filesystem
+#    underneath it, with some data in each. The encrypted child is what
+#    causes the kernel-side inner BEGIN to actually carry an nvlist payload
+#    (crypt_keydata) rather than passing through silently.
+# 2. Snapshot recursively.
+# 3. zfs send -wR parent@snap to a file. The resulting stream contains a
+#    libzfs-generated wrapper BEGIN with its compound-stream nvlist plus
+#    one kernel-generated inner BEGIN per dataset; the child's inner BEGIN
+#    carries crypt_keydata.
+# 4. Verify the encoding for the whole stream — this checks every BEGIN
+#    nvlist line that zstream dump emits, so it covers both the wrapper
+#    and the encrypted child's inner record.
+# 5. Verify that the stream can be zfs received successfully.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_replication_src"
+childfs="$POOL/xdr_replication_src/child"
+recvfs="$POOL2/xdr_replication_recv"
+keyfile="/$POOL/xdr_replication.key"
+stream="/$POOL/xdr_replication.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -R
+	datasetexists $recvfs && destroy_dataset $recvfs -R
+	rm -f $keyfile $stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlists in a recursive replication stream (wrapper and inner) are XDR-encoded and receivable"
+
+log_must zfs create $sendfs
+log_must eval "echo 'thisisapassphrase' > $keyfile"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+    -o keylocation=file://$keyfile $childfs
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=4 status=none
+log_must dd if=/dev/urandom of=/$childfs/f1 bs=128k count=4 status=none
+log_must zfs snapshot -r $sendfs@s0
+
+log_must eval "zfs send -wR $sendfs@s0 > $stream"
+verify_xdr_nvlist_encoding $stream
+log_must eval "zfs receive $recvfs < $stream"
+
+log_pass "BEGIN nvlists in a recursive replication stream (wrapper and inner) are XDR-encoded and receivable"
+
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume.ksh
new file mode 100755
index 00000000000..e98de4c47f4
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume.ksh
@@ -0,0 +1,73 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# A token-resumed send (zfs send -t <token>) carries BEGINNV_RESUME_OBJECT
+# and BEGINNV_RESUME_OFFSET in its DRR_BEGIN nvlist payload. Verify that
+# this payload is XDR-encoded and that the resumed stream can be received.
+#
+# Strategy:
+# 1. Create a small dataset with one snapshot.
+# 2. zfs send the snapshot to a file, truncate it, then attempt receive
+#    so that a resume token is left behind.
+# 3. zfs send -t <token> to produce the resumed stream.
+# 4. Verify that the resumed stream is XDR-encoded.
+# 5. Verify that zfs receive -s on the resumed stream is successful.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_resume_src"
+recvfs="$POOL2/xdr_resume_recv"
+full_stream="/$POOL/xdr_resume_full.zsend"
+resumed_stream="/$POOL/xdr_resume_resumed.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -r
+	datasetexists $recvfs && destroy_dataset $recvfs -r
+	rm -f $full_stream $resumed_stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of a token-resumed send is XDR-encoded and receivable"
+
+log_must zfs create $sendfs
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=8 status=none
+log_must zfs snapshot $sendfs@s1
+
+log_must eval "zfs send $sendfs@s1 > $full_stream"
+mess_send_file $full_stream
+log_mustnot eval "zfs receive -s $recvfs < $full_stream"
+
+token=$(get_prop receive_resume_token $recvfs)
+[[ -n "$token" && "$token" != "-" ]] || \
+    log_fail "no resume token left behind by partial receive"
+log_must eval "zfs send -t $token > $resumed_stream"
+
+verify_xdr_nvlist_encoding $resumed_stream
+log_must eval "zfs receive -s $recvfs < $resumed_stream"
+
+log_pass "BEGIN nvlist of a token-resumed send is XDR-encoded and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_bookmark_raw.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_bookmark_raw.ksh
new file mode 100755
index 00000000000..6645315fcd7
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_bookmark_raw.ksh
@@ -0,0 +1,103 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# The most populated DRR_BEGIN nvlist in the kernel: a token-resumed raw
+# incremental from a redaction bookmark carries BEGINNV_REDACT_FROM_SNAPS,
+# crypt_keydata, and BEGINNV_RESUME_{OBJECT,OFFSET}. Verify that this
+# combined payload is XDR-encoded and the resumed stream can be received.
+#
+# Strategy:
+# 1. Create an encrypted source with a redaction bookmark and a later
+#    snapshot, mirroring xdr_bookmark_raw.
+# 2. Establish a raw base on the receiver.
+# 3. zfs send -w -i sendfs#book sendfs@s1 to a file, truncate it, then
+#    attempt receive so that a resume token is left behind.
+# 4. zfs send -t <token> to produce the resumed stream.
+# 5. Verify that the resumed stream is XDR-encoded.
+# 6. Verify that zfs receive -s of the resumed stream is successful.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_resume_bookmark_raw_src"
+clonefs="$POOL/xdr_resume_bookmark_raw_clone"
+recvfs="$POOL2/xdr_resume_bookmark_raw_recv"
+keyfile="/$POOL/xdr_resume_bookmark_raw.key"
+full_stream="/$POOL/xdr_resume_bookmark_raw_full.zsend"
+incr_stream="/$POOL/xdr_resume_bookmark_raw_incr.zsend"
+resumed_stream="/$POOL/xdr_resume_bookmark_raw_resumed.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -R
+	datasetexists $recvfs && destroy_dataset $recvfs -R
+	rm -f $keyfile $full_stream $incr_stream $resumed_stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of a token-resumed raw incremental from a redaction " \
+    "bookmark is XDR-encoded and receivable"
+
+log_must eval "echo 'thisisapassphrase' > $keyfile"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+    -o keylocation=file://$keyfile $sendfs
+
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=16 status=none
+log_must dd if=/dev/urandom of=/$sendfs/f2 bs=128k count=16 status=none
+log_must zfs snapshot $sendfs@s0
+
+log_must zfs clone $sendfs@s0 $clonefs
+log_must dd if=/dev/urandom of=/$clonefs/f1 bs=128k count=16 conv=notrunc \
+    status=none
+log_must zfs snapshot $clonefs@s
+
+log_must zfs redact $sendfs@s0 redaction-bookmark $clonefs@s
+
+# Take @s1 with no intervening write. See xdr_resume_bookmark_raw_with_write.ksh
+# for a variant that includes a post-redact write; that variant exercises
+# a known kernel-side issue (#18491) and may flake.
+log_must zfs snapshot $sendfs@s1
+
+# Establish a raw base on the receiver.
+log_must eval "zfs send -w $sendfs@s0 > $full_stream"
+log_must eval "zfs receive $recvfs < $full_stream"
+
+# Truncate-and-resume on the raw incremental from the redaction bookmark.
+log_must eval "zfs send -w -i $sendfs#redaction-bookmark $sendfs@s1 > \
+    $incr_stream"
+mess_send_file $incr_stream
+log_mustnot eval "zfs receive -s $recvfs < $incr_stream"
+
+token=$(get_prop receive_resume_token $recvfs)
+[[ -n "$token" && "$token" != "-" ]] || \
+    log_fail "no resume token left behind by partial receive"
+log_must eval "zfs send -t $token > $resumed_stream"
+
+verify_xdr_nvlist_encoding $resumed_stream
+log_must eval "zfs receive -s $recvfs < $resumed_stream"
+
+log_pass "BEGIN nvlist of a token-resumed raw incremental from a redaction " \
+    "bookmark is XDR-encoded and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_bookmark_raw_with_write.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_bookmark_raw_with_write.ksh
new file mode 100755
index 00000000000..6c0b6b5b4ec
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_bookmark_raw_with_write.ksh
@@ -0,0 +1,116 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# This is the post-redact-write variant of xdr_resume_bookmark_raw,
+# separated out because of a known issue (#18491) that causes it to fail
+# roughly 30% of the time. It's included here as a test for issue #18491
+# until the exact source of that problem can be pinned down more specifically.
+#
+# Known issue: openzfs/zfs#18491
+#
+# On a freshly-created pool, `zfs send -w -i ds#book ds@snap` intermittently
+# fails with EACCES whenever there is data-modifying activity between the
+# `zfs redact` that created the bookmark and the subsequent send. This EACCES
+# is surfaced to userspace as the misleading message "dataset key must be
+# loaded," although the key remains loaded throughout.
+#
+# The reproducer script included in the issue report typically triggers the
+# problem within about 10 iterations on a fresh pool. Disk-sync mitigations
+# (zpool sync, with or without `-f`, with or without sleep, single or doubled,
+# applied at any reasonable point) do not avert the problem. CI runs that
+# include the test in this file reproduce the failure regularly (though
+# intermittently) across multiple distributions. xdr_resume_bookmark_raw.ksh
+# removes the post-redact write (which is not essential to the test) and
+# therefore runs reliably.
+#
+# When this test fails, the failure marker is the libzfs warning
+# "dataset key must be loaded" on stderr from the first `zfs send -w -i`
+# line below (the one that produces the stream we then truncate), and a
+# non-zero exit from that send. The test does not attempt to distinguish
+# the known-issue failure from other possible failures.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_resume_bookmark_raw_with_write_src"
+clonefs="$POOL/xdr_resume_bookmark_raw_with_write_clone"
+recvfs="$POOL2/xdr_resume_bookmark_raw_with_write_recv"
+keyfile="/$POOL/xdr_resume_bookmark_raw_with_write.key"
+full_stream="/$POOL/xdr_resume_bookmark_raw_with_write_full.zsend"
+incr_stream="/$POOL/xdr_resume_bookmark_raw_with_write_incr.zsend"
+resumed_stream="/$POOL/xdr_resume_bookmark_raw_with_write_resumed.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -R
+	datasetexists $recvfs && destroy_dataset $recvfs -R
+	rm -f $keyfile $full_stream $incr_stream $resumed_stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of a token-resumed raw incremental from a redaction " \
+    "bookmark, with a post-redact write, is XDR-encoded and receivable " \
+    "(known to flake; see openzfs/zfs#18491)"
+
+log_must eval "echo 'thisisapassphrase' > $keyfile"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+    -o keylocation=file://$keyfile $sendfs
+
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=16 status=none
+log_must zfs snapshot $sendfs@s0
+
+log_must zfs clone $sendfs@s0 $clonefs
+log_must dd if=/dev/urandom of=/$clonefs/f1 bs=128k count=16 conv=notrunc \
+    status=none
+log_must zfs snapshot $clonefs@s
+
+log_must zfs redact $sendfs@s0 redaction-bookmark $clonefs@s
+
+# Post-redact write: the trigger for openzfs/zfs#18491.
+log_must dd if=/dev/urandom of=/$sendfs/f3 bs=128k count=16 status=none
+log_must zfs snapshot $sendfs@s1
+
+# Establish a raw base on the receiver.
+log_must eval "zfs send -w $sendfs@s0 > $full_stream"
+log_must eval "zfs receive $recvfs < $full_stream"
+
+# The next line is what races. On failure it exits with EACCES rendered
+# as "dataset key must be loaded".
+log_must eval "zfs send -w -i $sendfs#redaction-bookmark $sendfs@s1 > \
+    $incr_stream"
+mess_send_file $incr_stream
+log_mustnot eval "zfs receive -s $recvfs < $incr_stream"
+
+token=$(get_prop receive_resume_token $recvfs)
+[[ -n "$token" && "$token" != "-" ]] || \
+    log_fail "no resume token left behind by partial receive"
+log_must eval "zfs send -t $token > $resumed_stream"
+
+verify_xdr_nvlist_encoding $resumed_stream
+log_must eval "zfs receive -s $recvfs < $resumed_stream"
+
+log_pass "BEGIN nvlist of a token-resumed raw incremental from a redaction " \
+    "bookmark, with a post-redact write, is XDR-encoded and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_raw.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_raw.ksh
new file mode 100755
index 00000000000..a96df10b945
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_raw.ksh
@@ -0,0 +1,79 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# A resumed raw send (zfs send -t <token> for a raw stream of an encrypted
+# dataset) carries both BEGINNV_RESUME_{OBJECT,OFFSET} and the "crypt_keydata"
+# nested nvlist in its DRR_BEGIN nvlist payload. Verify that this combined
+# payload is XDR-encoded and the resumed stream can be received.
+#
+# Strategy:
+# 1. Create an encrypted dataset with one snapshot.
+# 2. zfs send -w to a file, truncate it, then attempt to zfs receive the
+#    stream so that a resume token is left behind.
+# 3. zfs send -t <token> to produce the resumed raw stream.
+# 4. Verify that the resumed stream is XDR-encoded.
+# 5. Verify that zfs receive -s receives the resumed stream successfully.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_resume_raw_src"
+recvfs="$POOL2/xdr_resume_raw_recv"
+keyfile="/$POOL/xdr_resume_raw.key"
+full_stream="/$POOL/xdr_resume_raw_full.zsend"
+resumed_stream="/$POOL/xdr_resume_raw_resumed.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -r
+	datasetexists $recvfs && destroy_dataset $recvfs -r
+	rm -f $keyfile $full_stream $resumed_stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of a token-resumed raw send is XDR-encoded " \
+    "and receivable"
+
+log_must eval "echo 'thisisapassphrase' > $keyfile"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+    -o keylocation=file://$keyfile $sendfs
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=16 status=none
+log_must zfs snapshot $sendfs@s1
+
+log_must eval "zfs send -w $sendfs@s1 > $full_stream"
+mess_send_file $full_stream
+log_mustnot eval "zfs receive -s $recvfs < $full_stream"
+
+token=$(get_prop receive_resume_token $recvfs)
+[[ -n "$token" && "$token" != "-" ]] || \
+    log_fail "no resume token left behind by partial receive"
+log_must eval "zfs send -t $token > $resumed_stream"
+
+verify_xdr_nvlist_encoding $resumed_stream
+log_must eval "zfs receive -s $recvfs < $resumed_stream"
+
+log_pass "BEGIN nvlist of a token-resumed raw send is XDR-encoded " \
+    "and receivable"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_redacted.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_redacted.ksh
new file mode 100755
index 00000000000..6cee3e51a3d
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/send_xdr_encoding/xdr_resume_redacted.ksh
@@ -0,0 +1,86 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2026 by Garth Snyder. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+. $STF_SUITE/tests/functional/send_xdr_encoding/send_xdr_encoding.kshlib
+
+#
+# Description:
+# A resumed redacted send (zfs send -t <token> for a redacted stream)
+# carries both BEGINNV_REDACT_SNAPS and BEGINNV_RESUME_{OBJECT,OFFSET} in
+# its DRR_BEGIN nvlist payload. Verify that this combined payload is
+# XDR-encoded and the resumed stream can be received.
+#
+# Strategy:
+# 1. Create a source dataset with a redaction bookmark.
+# 2. zfs send --redact <book> sendfs@snap to a file, truncate it, then
+#    attempt zfs receive so that a resume token is left behind.
+# 3. zfs send -t <token> to produce a resumed redacted stream.
+# 4. Verify that the resumed stream is XDR-encoded.
+# 5. Verify that zfs receive -s of the resumed stream is successful.
+#
+
+verify_runnable "both"
+
+sendfs="$POOL/xdr_resume_redacted_src"
+clonefs="$POOL/xdr_resume_redacted_clone"
+recvfs="$POOL2/xdr_resume_redacted_recv"
+full_stream="/$POOL/xdr_resume_redacted_full.zsend"
+resumed_stream="/$POOL/xdr_resume_redacted_resumed.zsend"
+
+function cleanup
+{
+	datasetexists $sendfs && destroy_dataset $sendfs -R
+	datasetexists $recvfs && destroy_dataset $recvfs -R
+	rm -f $full_stream $resumed_stream
+}
+log_onexit cleanup
+
+log_assert "BEGIN nvlist of a token-resumed redacted send is XDR-encoded " \
+    "and receivable"
+
+log_must zfs create $sendfs
+log_must dd if=/dev/urandom of=/$sendfs/f1 bs=128k count=16 status=none
+log_must dd if=/dev/urandom of=/$sendfs/f2 bs=128k count=16 status=none
+log_must zfs snapshot $sendfs@s0
+
+log_must zfs clone $sendfs@s0 $clonefs
+log_must dd if=/dev/urandom of=/$clonefs/f1 bs=128k count=16 conv=notrunc \
+    status=none
+log_must zfs snapshot $clonefs@s
+
+log_must zfs redact $sendfs@s0 redaction-bookmark $clonefs@s
+
+log_must eval "zfs send --redact redaction-bookmark $sendfs@s0 > $full_stream"
+mess_send_file $full_stream
+log_mustnot eval "zfs receive -s $recvfs < $full_stream"
+
+token=$(get_prop receive_resume_token $recvfs)
+[[ -n "$token" && "$token" != "-" ]] || \
+    log_fail "no resume token left behind by partial receive"
+log_must eval "zfs send -t $token > $resumed_stream"
+
+verify_xdr_nvlist_encoding $resumed_stream
+log_must eval "zfs receive -s $recvfs < $resumed_stream"
+
+log_pass "BEGIN nvlist of a token-resumed redacted send is XDR-encoded " \
+    "and receivable"
+
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh
index ffc4e96f5a0..0f3b1a84d83 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_002_pos.ksh
@@ -64,7 +64,7 @@ function cleanup
 log_assert "Verify an archive of a file system is identical to " \
     "an archive of its snapshot."
 
-SNAPSHOT_TARDIR="$(mktemp -t -d zfstests_snapshot_002.XXXXXX)"
+SNAPSHOT_TARDIR="$(mktemp -d "$TEST_BASE_DIR/zfstests_snapshot_002.XXXXXX")"
 log_onexit cleanup
 
 typeset -i COUNT=21
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh
index 20d53eb5012..db8d820bdd1 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_006_pos.ksh
@@ -73,7 +73,7 @@ function cleanup
 log_assert "Verify that an archive of a dataset is identical to " \
    "an archive of the dataset's snapshot."
 
-SNAPSHOT_TARDIR="$(mktemp -t -d zfstests_snapshot_006.XXXXXX)"
+SNAPSHOT_TARDIR="$(mktemp -d "$TEST_BASE_DIR/zfstests_snapshot_006.XXXXXX")"
 log_onexit cleanup
 
 typeset -i COUNT=21
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/stat/statx_dioalign.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/stat/statx_dioalign.ksh
index ab749b5f793..ea10e492503 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/stat/statx_dioalign.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/stat/statx_dioalign.ksh
@@ -89,7 +89,8 @@ typeset -i PAGE_SIZE=$(getconf PAGE_SIZE)
 # Set recordsize to 128K, and make a 64K file (so only one block) for the
 # sizing tests below.
 log_must zfs set recordsize=128K $TESTDS
-log_must dd if=/dev/urandom of=$TESTFILE bs=64k count=1
+log_must rm -f $TESTFILE
+log_must stride_dd -i /dev/urandom -o $TESTFILE -b 65536 -c 1
 log_must zpool sync
 
 # when DIO is disabled via tunable, statx will not return the dioalign result
@@ -141,7 +142,7 @@ done
 # Now we extend the file into its second block. This effectively locks in its
 # block size, which will always be returned regardless of recordsize changes.
 log_must zfs set recordsize=128K $TESTDS
-log_must dd if=/dev/urandom of=$TESTFILE bs=192K count=1
+log_must stride_dd -i /dev/urandom -o $TESTFILE -b 196608 -c 1
 log_must zpool sync
 
 # Confirm that no matter how we change the recordsize, the alignment remains at
@@ -167,14 +168,14 @@ log_must rm -f $TESTFILE
 log_must touch $TESTFILE
 log_must zpool sync
 assert_dioalign $TESTFILE $PAGE_SIZE 16384
-log_must dd if=/dev/urandom of=$TESTFILE bs=16384 count=16 oflag=direct
+log_must stride_dd -i /dev/urandom -o $TESTFILE -b 16384 -c 16 -D
 
 # same again, but writing with incorrect alignment, which should fail.
 log_must rm -f $TESTFILE
 log_must touch $TESTFILE
 log_must zpool sync
 assert_dioalign $TESTFILE $PAGE_SIZE 16384
-log_mustnot dd if=/dev/urandom of=$TESTFILE bs=1024 count=256 oflag=direct
+log_mustnot stride_dd -i /dev/urandom -o $TESTFILE -b 1024 -c 256 -D
 
 # same again, but without strict, which should succeed.
 log_must set_tunable32 DIO_STRICT 0
@@ -182,6 +183,6 @@ log_must rm -f $TESTFILE
 log_must touch $TESTFILE
 log_must zpool sync
 assert_dioalign $TESTFILE $PAGE_SIZE 16384
-log_must dd if=/dev/urandom of=$TESTFILE bs=1024 count=256 oflag=direct
+log_must stride_dd -i /dev/urandom -o $TESTFILE -b 1024 -c 256 -D
 
 log_pass $CLAIM
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_008_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_008_pos.ksh
new file mode 100755
index 00000000000..c5ad282eb8a
--- /dev/null
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/vdev_zaps/vdev_zaps_008_pos.ksh
@@ -0,0 +1,90 @@
+#!/bin/ksh -p
+# SPDX-License-Identifier: CDDL-1.0
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2026, TrueNAS.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Verify that the 'rotational' vdev property is readable on spare and
+# L2ARC vdevs, which have no per-vdev ZAP, and that its value persists
+# across export/import when the spare device is absent.
+#
+# STRATEGY:
+# 1. Create a pool with a mirror, a spare, and an L2ARC device.
+# 2. Verify 'rotational' is readable on leaf, virtual (mirror), spare,
+#    and L2ARC vdevs.
+# 3. Export the pool, remove the spare file, re-import, and verify that
+#    'rotational' still reports the same value for the missing spare,
+#    proving the value comes from the persisted config.
+#
+
+verify_runnable "global"
+
+SPARE="$TEST_BASE_DIR/vz008-spare"
+L2C="$TEST_BASE_DIR/vz008-l2c"
+VDEV1="$TEST_BASE_DIR/vz008-vdev1"
+VDEV2="$TEST_BASE_DIR/vz008-vdev2"
+
+function cleanup
+{
+	destroy_pool $TESTPOOL
+	rm -f $VDEV1 $VDEV2 $SPARE $L2C
+}
+
+log_assert "'rotational' is readable on ZAP-less vdevs and persists absent"
+log_onexit cleanup
+
+log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2 $SPARE $L2C
+
+log_must zpool create -f $TESTPOOL \
+    mirror $VDEV1 $VDEV2 \
+    cache $L2C \
+    spare $SPARE
+
+# Leaf vdev should report rotational.
+NR=$(zpool get -H -o value rotational $TESTPOOL $VDEV1)
+[[ "$NR" == "on" || "$NR" == "off" ]] ||
+    log_fail "leaf $VDEV1: expected on/off, got '$NR'"
+
+# Virtual (mirror) vdev should report rotational.
+MIRROR=$(zpool list -v -H $TESTPOOL | awk '$1 ~ /^mirror/ {print $1; exit}')
+NR=$(zpool get -H -o value rotational $TESTPOOL "$MIRROR")
+[[ "$NR" == "on" || "$NR" == "off" ]] ||
+    log_fail "mirror: expected on/off, got '$NR'"
+
+# Spare vdev should report rotational even though it has no ZAP.
+NR=$(zpool get -H -o value rotational $TESTPOOL $SPARE)
+[[ "$NR" == "on" || "$NR" == "off" ]] ||
+    log_fail "spare $SPARE: expected on/off, got '$NR'"
+
+# L2ARC vdev should report rotational even though it has no ZAP.
+NR=$(zpool get -H -o value rotational $TESTPOOL $L2C)
+[[ "$NR" == "on" || "$NR" == "off" ]] ||
+    log_fail "L2ARC $L2C: expected on/off, got '$NR'"
+
+# The value must persist across export/import when the spare is absent.
+# Remove the spare file before re-import so that vdev_open() cannot read
+# the hardware value and the only source is the persisted config.
+NR_BEFORE=$(zpool get -H -o value rotational $TESTPOOL $SPARE)
+log_must zpool export $TESTPOOL
+log_must rm -f $SPARE
+log_must zpool import -d $TEST_BASE_DIR $TESTPOOL
+NR_AFTER=$(zpool get -H -o value rotational $TESTPOOL $SPARE)
+[[ "$NR_BEFORE" == "$NR_AFTER" ]] ||
+    log_fail "spare rotational changed across import: $NR_BEFORE -> $NR_AFTER"
+
+log_pass "'rotational' readable on spare/L2ARC vdevs and persists when absent"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
index 9047f14bc81..0f18f2e5733 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
@@ -58,7 +58,7 @@ biggest_zvol_size_possible=$(largest_volsize_from_pool $TESTPOOL)
 typeset -f each_zvol_size=$(( floor($biggest_zvol_size_possible * 0.9 / \
 	$num_zvols )))
 
-typeset tmpdir="$(mktemp -t -d zvol_stress_fio_state.XXXXXX)"
+typeset tmpdir="$(mktemp -d "$TEST_BASE_DIR/zvol_stress_fio_state.XXXXXX")"
 
 log_must save_tunable VOL_USE_BLK_MQ
 log_must save_tunable VOL_REQUEST_SYNC
diff --git a/sys/modules/zfs/Makefile b/sys/modules/zfs/Makefile
index 8fd023005b5..44a63977f28 100644
--- a/sys/modules/zfs/Makefile
+++ b/sys/modules/zfs/Makefile
@@ -323,6 +323,8 @@ SRCS+=	abd.c \
 	vdev_root.c \
 	vdev_trim.c \
 	zap.c \
+	zap_fat.c \
+	zap_impl.c \
 	zap_leaf.c \
 	zap_micro.c \
 	zcp.c \
@@ -439,6 +441,7 @@ CFLAGS.vdev_raidz_math_scalar.c= -Wno-cast-qual
 CFLAGS.vdev_raidz_math_sse2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
 CFLAGS.zap_leaf.c= -Wno-cast-qual
 CFLAGS.zap_micro.c= -Wno-cast-qual
+CFLAGS.zap_impl.c= -Wno-cast-qual
 CFLAGS.zcp.c= -Wno-cast-qual
 CFLAGS.zfs_fletcher.c= -Wno-cast-qual -Wno-pointer-arith
 CFLAGS.zfs_fletcher_avx512.c= -Wno-cast-qual -Wno-pointer-arith
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index b4e4c1e4d29..2086bd330e1 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -258,6 +258,9 @@
 /* fs_context exists */
 /* #undef HAVE_FS_CONTEXT */
 
+/* fs_parse() takes fs_parameter_spec directly */
+/* #undef HAVE_FS_PARSE_TAKES_SPEC */
+
 /* yes */
 /* #undef HAVE_GENERIC_FADVISE */
 
@@ -579,6 +582,12 @@
 /* proc_ops structure exists */
 /* #undef HAVE_PROC_OPS_STRUCT */
 
+/* Define if you have POSIX threads libraries and header files. */
+#define HAVE_PTHREAD 1
+
+/* Have PTHREAD_PRIO_INHERIT. */
+#define HAVE_PTHREAD_PRIO_INHERIT 1
+
 /* If available, contains the Python version number currently in use. */
 /* #undef HAVE_PYTHON */
 
@@ -881,6 +890,10 @@
 /* make_request_fn() return type */
 /* #undef MAKE_REQUEST_FN_RET */
 
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+/* #undef PTHREAD_CREATE_JOINABLE */
+
 /* The size of 'off_t', as computed by sizeof. */
 /* #undef SIZEOF_OFF_T */
 
@@ -914,7 +927,7 @@
 /* #undef ZFS_DEVICE_MINOR */
 
 /* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.4.99-566-FreeBSD_ga12c6ed62"
+#define ZFS_META_ALIAS "zfs-2.4.99-695-FreeBSD_ga170134fe"
 
 /* Define the project author. */
 #define ZFS_META_AUTHOR "OpenZFS"
@@ -944,7 +957,7 @@
 #define ZFS_META_NAME "zfs"
 
 /* Define the project release. */
-#define ZFS_META_RELEASE "566-FreeBSD_ga12c6ed62"
+#define ZFS_META_RELEASE "695-FreeBSD_ga170134fe"
 
 /* Define the project version. */
 #define ZFS_META_VERSION "2.4.99"
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 113c3d7a5bb..f783cf01c72 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define	ZFS_META_GITREV "zfs-2.4.99-566-ga12c6ed62-dirty"
+#define	ZFS_META_GITREV "zfs-2.4.99-695-ga170134fe"