zfs: merge openzfs/zfs@a170134fe

Notable upstream pull request merges:
 #18372 eaaea55b6 Consistently encode DRR_BEGIN packed nvlist payloads with
                  NV_ENCODE_XDR
 #18410 891e379d0 Fix failfast default and usage
 #18470 a2d053329 zdb: Add some more file layout output, triggered by -v
 #18472 d50f5b6d0 dsl_dir: avoid dd_lock during snapshots_changed updates
 #18493 d65015938 Vdev allocation bias/class change
 #18497 8fdc86675 zfs: annotate nested dd_lock in reservation sync
                  accounting
 #18494 956deba27 zdb: detect BRT and DDT leaks during block traversal
 #18499 c7cfe0805 zarcstat: detect attached L2ARC device with no data
 #18503 439b802e7 sa: fix sa_add_projid lock ordering
 #18508 968f4db03 zpool-attach.8: add EXAMPLES section
 #18513 45dddc452 zfs.4: Fix documentation of zfs_arc_dnode_reduce_percent
 #18516 8ff64005a zap: split implementation out into more files
 #18520 181e1b522 Fix double free for blocks cloned after DDT prune
 #18535 -multiple zstream: fix crashes when refcount tracking enabled
 #18536 -multiple refcount tag fixups
 #18541 a65ed7afd zpool/zfs: accept --help and -? after a subcommand
 #18544 6fb72fda0 zio_ddt_write: compute have_dvas after taking dde_io_lock
 #18546 -multiple zap: internal locking uplift
 #18550 40a87651d zap_impl: use flex array field for mzap_phys_t.mz_chunks
 #18551 -multiple zap: make the _by_dnode() op variants be the primary
                  implementation
 #18570 112b0131b zpl_xattr: stop heap-allocating prefixed xattr names
 #18578 4bc8c39b6 zed: Prefer dRAID distributed spares to regular ones
 #18596 e30ab5fa4 FreeBSD: Make it possible to build openzfs.ko with
                  sanitizers
 #18597 472ddca11 zed: Prefer spares with matching rotational and size
 #18599 c90dc2808 enforce exact decompressed length for lz4, gzip, and zstd
 #18603 -multiple zap: add zap_cursor_init_by_dnode; cursor unit tests;
                  mock dnode refcounts
 #18604 59dc88602 nvpair: Check for un-terminated strings in packed nvlist
 #18606 ef6f26145 When reading a vdev label skip libzfs_core_init()
 #18613 0aa4088dc sharenfs: Check for invalid characters
 #18615 80fb85b80 Fix the integer type in zfs_ioc_userspace_many()
 #18616 e199f6d98 Fix uninitialized variable warning in vdev_prop_get()
 #18617 7de42602c Extend dataset zfs_ioc_set_prop() secpolicy
 #18622 5fea0c838 Parallelize metaslab_sync_done() calls
 #18623 cab50d5ad Add additional verification of size fields and strings
 #18630 -multiple zap: misc function removal / uplift / tests
 #18633 a8ef128da Fix uninitialized variable warning in zil_parse()

Obtained from:	OpenZFS
OpenZFS commit:	a170134feb

This commit is contained in:

Martin Matuska

2026-06-06 22:48:32 +02:00

parent 4a299ef19c a170134feb

commit d949721745

196 changed files with 14745 additions and 4032 deletions

									
										cddl/lib/libzpool/Makefile
									
		+2
		
												View File
												
				@@ -163,6 +163,8 @@ KERNEL_C = \

					vdev_root.c \

					vdev_trim.c \

					zap.c \

					zap_fat.c \

					zap_impl.c \

					zap_leaf.c \

					zap_micro.c \

					zcp.c \

sys/conf/files

View File

@@ -346,6 +346,8 @@ contrib/openzfs/module/zfs/vdev_removal.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_root.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_trim.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/zap.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/zap_fat.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/zap_impl.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/zap_leaf.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/zap_micro.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/zcp.c		optional zfs compile-with "${ZFS_C}"

									
										sys/contrib/openzfs/.github/workflows/README.md
									
Vendored

		+82
		-47
	
												View File
												
				@@ -1,61 +1,96 @@

				## The testings are done this way

				## CI overview

				The main test pipeline is `zfs-qemu.yml`. Code checking and other

				workflows run independently alongside it.

				```mermaid

				flowchart TB

				subgraph CleanUp and Summary

				  CleanUp+Summary

				subgraph Functional testing

				  Setup[test-config: pick ci_type + OS matrix]

				  Setup --> almalinux

				  Setup --> centos[centos-stream]

				  Setup --> debian

				  Setup --> fedora

				  Setup --> ubuntu

				  Setup --> freebsd

				  almalinux --> Cleanup[cleanup + summary]

				  centos --> Cleanup

				  debian --> Cleanup

				  fedora --> Cleanup

				  ubuntu --> Cleanup

				  freebsd --> Cleanup

				end

				subgraph Functional Testings

				  sanity-checks-20.04

				  zloop-checks-20.04

				  functional-testing-20.04-->Part1-20.04

				  functional-testing-20.04-->Part2-20.04

				  functional-testing-20.04-->Part3-20.04

				  functional-testing-20.04-->Part4-20.04

				  functional-testing-22.04-->Part1-22.04

				  functional-testing-22.04-->Part2-22.04

				  functional-testing-22.04-->Part3-22.04

				  functional-testing-22.04-->Part4-22.04

				  sanity-checks-22.04

				  zloop-checks-22.04

				end

				subgraph Code Checking + Building

				  Build-Ubuntu-20.04

				subgraph Code checking

				  checkstyle.yaml

				  codeql.yml

				  checkstyle.yml

				  Build-Ubuntu-22.04

				  smatch.yml

				end

				  Build-Ubuntu-20.04-->sanity-checks-20.04

				  Build-Ubuntu-20.04-->zloop-checks-20.04

				  Build-Ubuntu-20.04-->functional-testing-20.04

				  Build-Ubuntu-22.04-->sanity-checks-22.04

				  Build-Ubuntu-22.04-->zloop-checks-22.04

				  Build-Ubuntu-22.04-->functional-testing-22.04

				  sanity-checks-20.04-->CleanUp+Summary

				  Part1-20.04-->CleanUp+Summary

				  Part2-20.04-->CleanUp+Summary

				  Part3-20.04-->CleanUp+Summary

				  Part4-20.04-->CleanUp+Summary

				  Part1-22.04-->CleanUp+Summary

				  Part2-22.04-->CleanUp+Summary

				  Part3-22.04-->CleanUp+Summary

				  Part4-22.04-->CleanUp+Summary

				  sanity-checks-22.04-->CleanUp+Summary

				subgraph Other workflows

				  zfs-arm.yml

				  zloop.yml

				  labels.yml

				end

				```

				Every `qemu-vm` matrix entry runs on a fixed `ubuntu-24.04` host.

				The steps inside one entry are:

				1) build zfs modules for Ubuntu 20.04 and 22.04 (~15m)

				2) 2x zloop test (~10m) + 2x sanity test (~25m)

				3) 4x functional testings in parts 1..4 (each ~1h)

				4) cleanup and create summary

				   - content of summary depends on the results of the steps

				1) set up QEMU and boot the guest (~2-4m)

				2) install build dependencies in the guest (~2-4m)

				3) build zfs modules in the guest (~8-12m)

				4) run functional tests (~2-4h)

				5) package and upload per-OS test logs (~10s)

				When everything runs fine, the full run should be done in

				about 2 hours.

				A per-OS entry takes about 3 to 4 hours. Once all entries finish, the

				`cleanup` job aggregates the results into a summary.

				The codeql.yml and checkstyle.yml are not part in this circle.

				### `ci_type` selection

				`test-config` runs `.github/workflows/scripts/generate-ci-type.py` against

				the PR's changed files and picks one of:

				| `ci_type` | OS matrix                                  |

				|-----------|--------------------------------------------|

				| `docs`    | empty (documentation-only PRs)             |

				| `quick`   | 6 Linux + 1 FreeBSD                        |

				| `linux`   | all supported Linux distros                |

				| `freebsd` | all supported FreeBSD versions             |

				| default   | cross-platform sample                      |

				Pushes to `openzfs/zfs` skip the matrix entirely; only PRs (and pushes to

				forks) build.

				Authors can force a specific ci_type by adding `ZFS-CI-Type: <type>` to

				the most recent commit message. The `ZTS_OS_OVERRIDE` repository variable

				can also alter the selection. The `workflow_dispatch` trigger accepts

				`fedora_kernel_ver` (Fedora-only run with a chosen kernel) and

				`specific_os` (pin the matrix to one OS).

				### Supported guests

				Auto-selected:

				- Linux: almalinux 8/9/10, centos-stream 9/10, debian 11/12/13,

				  fedora 43/44, ubuntu 22/24/26

				- FreeBSD: 14.4-RELEASE/STABLE, 15.0-RELEASE, 15.1-STABLE, 16.0-CURRENT

				Available via `specific_os` or `ZTS_OS_OVERRIDE`:

				- archlinux, tumbleweed

				### Code checking

				- `checkstyle.yaml`: source-style checks

				- `codeql.yml`: CodeQL analysis

				- `smatch.yml`: smatch analysis

				### Other workflows

				- `zfs-arm.yml`: ARM build on `ubuntu-24.04-arm`

				- `zloop.yml`: host-side zloop

				- `labels.yml`: maintains PR status labels

				- `zfs-qemu-packages.yml`: manually dispatched, builds release RPMs or

				  tests RPM installation from the ZFS yum repo

									
										sys/contrib/openzfs/.github/workflows/checkstyle.yaml
									
Vendored

		+1
		-1
	
												View File
												
				@@ -12,7 +12,7 @@ jobs:

				  checkstyle:

				    runs-on: ubuntu-22.04

				    steps:

				    - uses: actions/checkout@v4

				    - uses: actions/checkout@v6

				      with:

				        ref: ${{ github.event.pull_request.head.sha }}

				    - name: Install dependencies

									
										sys/contrib/openzfs/.github/workflows/codeql.yml
									
Vendored

		+4
		-4
	
												View File
												
				@@ -11,7 +11,7 @@ concurrency:

				jobs:

				  analyze:

				    name: Analyze

				    runs-on: ubuntu-22.04

				    runs-on: ubuntu-24.04

				    permissions:

				      actions: read

				      contents: read

				@@ -31,15 +31,15 @@ jobs:

				      uses: actions/checkout@v6

				    - name: Initialize CodeQL

				      uses: github/codeql-action/init@v3

				      uses: github/codeql-action/init@v4

				      with:

				        config-file: .github/codeql-${{ matrix.language }}.yml

				        languages: ${{ matrix.language }}

				    - name: Autobuild

				      uses: github/codeql-action/autobuild@v3

				      uses: github/codeql-action/autobuild@v4

				    - name: Perform CodeQL Analysis

				      uses: github/codeql-action/analyze@v3

				      uses: github/codeql-action/analyze@v4

				      with:

				        category: "/language:${{matrix.language}}"

									
										sys/contrib/openzfs/.github/workflows/scripts/generate-ci-type.py
									
Vendored

		+23
		
												View File
												
				@@ -6,6 +6,9 @@

				Output format: "<type> <source>" where source is "manual" (from

				ZFS-CI-Type commit tag) or "auto" (from file change heuristics).

				Prints "docs auto" if every changed file is documentation; the qemu

				matrix is skipped in that case.

				Prints "quick manual" if:

				- the *last* commit message contains 'ZFS-CI-Type: quick'

				or "quick auto" if (heuristics):

				@@ -28,10 +31,24 @@

				    r'.*\.gitignore'

				]))

				"""

				Patterns of files that are documentation only.

				"""

				DOCS_ONLY_REGEX = list(map(re.compile, [

				    r'man/.*',

				    r'.*\.md',

				    r'AUTHORS',

				    r'COPYRIGHT',

				    r'LICENSE',

				    r'NOTICE',

				    r'\.gitignore',

				]))

				"""

				Patterns of files that are considered to trigger full CI.

				"""

				FULL_RUN_REGEX = list(map(re.compile, [

				    r'\.github/workflows/.*\.ya?ml',

				    r'\.github/workflows/scripts/.*',

				    r'cmd.*',

				    r'configs/.*',

				@@ -116,6 +133,12 @@ def output_type(type, source, reason):

				                        f'changed file "{f}" matches pattern "{r.pattern}"'

				                        )

				    if changed_files and all(

				            any(r.match(f) for r in DOCS_ONLY_REGEX)

				            for f in changed_files):

				        output_type('docs', 'auto',

				                    'all changed files are documentation')

				    # catch-all

				    output_type('quick', 'auto',

				                'no changed file matches full CI patterns')

									
										sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
									
Vendored

		+2
		
												View File
												
				@@ -17,6 +17,8 @@ sudo docker builder prune -a

				unneeded="microsoft-edge-stable|azure-cli|google-cloud|google-chrome-stable|"\

				"temurin|llvm|firefox|mysql-server|snapd|android|dotnet|haskell|ghcup|"\

				"powershell|julia|swift|miniconda|chromium"

				# refresh package index before removing packages

				sudo apt-get -y update

				sudo apt-get -y remove $(dpkg-query -f '${binary:Package}\n' -W | grep -E "'$unneeded'")

				sudo apt-get -y autoremove

									
										sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
									
Vendored

		+29
		-24
	
												View File
												
				@@ -28,6 +28,7 @@ NIC="virtio"

				# additional options for virt-install

				OPTS[0]=""

				OPTS[1]=""

				ALT_URL=""

				case "$OS" in

				  almalinux8)

				@@ -56,11 +57,22 @@ case "$OS" in

				  centos-stream9)

				    OSNAME="CentOS Stream 9"

				    URL="https://cloud.centos.org/centos/9-stream/x86_64/images/CentOS-Stream-GenericCloud-9-latest.x86_64.qcow2"

				    # Sometimes we get HTTP errors for the first link.  Fall back to the

				    # "Composes" repo as an alternative.  The "Composes" repo includes

				    # autogenerated nightly CentOS Stream images.  We have to lookup the URL

				    # dynamically since the qcow2 file name has the date in it.

				    ALT_URL=$(wget --accept "CentOS-Stream-GenericCloud-9-*.x86_64.qcow2" --spider -np --recursive  --no-verbose \

				              https://composes.stream.centos.org/stream-9/production/latest-CentOS-Stream/compose/BaseOS/x86_64/images/ 2>&1 | \

				              awk '/200 OK/{print $(NF-2)}')

				    ;;

				  centos-stream10)

				    OSNAME="CentOS Stream 10"

				    OSv="centos-stream9"

				    URL="https://cloud.centos.org/centos/10-stream/x86_64/images/CentOS-Stream-GenericCloud-10-latest.x86_64.qcow2"

				    ALT_URL=$(wget --accept "CentOS-Stream-GenericCloud-10-*.x86_64.qcow2" --spider -np --recursive  --no-verbose \

				              https://composes.stream.centos.org/stream-10/production/latest-CentOS-Stream/compose/BaseOS/x86_64/images/ 2>&1 | \

				              awk '/200 OK/{print $(NF-2)}')

				    ;;

				  debian11)

				    OSNAME="Debian 11"

				@@ -78,11 +90,6 @@ case "$OS" in

				    OPTS[0]="--boot"

				    OPTS[1]="uefi=on"

				    ;;

				  fedora42)

				    OSNAME="Fedora 42"

				    OSv="fedora-unknown"

				    URL="https://download.fedoraproject.org/pub/fedora/linux/releases/42/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-42-1.1.x86_64.qcow2"

				    ;;

				  fedora43)

				    OSNAME="Fedora 43"

				    OSv="fedora-unknown"

				@@ -93,14 +100,6 @@ case "$OS" in

				    OSv="fedora-unknown"

				    URL="https://download.fedoraproject.org/pub/fedora/linux/releases/44/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-44-1.7.x86_64.qcow2"

				    ;;

				  freebsd13-5r)

				    FreeBSD="13.5-RELEASE"

				    OSNAME="FreeBSD $FreeBSD"

				    OSv="freebsd13.0"

				    URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz"

				    KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz"

				    NIC="rtl8139"

				    ;;

				  freebsd14-4r)

				    FreeBSD="14.4-RELEASE"

				    OSNAME="FreeBSD $FreeBSD"

				@@ -111,18 +110,10 @@ case "$OS" in

				  freebsd15-0r)

				    FreeBSD="15.0-RELEASE"

				    OSNAME="FreeBSD $FreeBSD"

				    OSv="freebsd15.0"

				    OSv="freebsd14.0"

				    URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz"

				    KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz"

				    ;;

				  freebsd13-5s)

				    FreeBSD="13.5-STABLE"

				    OSNAME="FreeBSD $FreeBSD"

				    OSv="freebsd13.0"

				    URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz"

				    KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz"

				    NIC="rtl8139"

				    ;;

				  freebsd14-4s)

				    FreeBSD="14.4-STABLE"

				    OSNAME="FreeBSD $FreeBSD"

				@@ -131,7 +122,7 @@ case "$OS" in

				    KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz"

				    ;;

				  freebsd15-1s)

				    FreeBSD="15.1-PRERELEASE"

				    FreeBSD="15.1-STABLE"

				    OSNAME="FreeBSD $FreeBSD"

				    OSv="freebsd14.0"

				    URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz"

				@@ -160,6 +151,11 @@ case "$OS" in

				    OSv="ubuntu24.04"

				    URL="$UBMIRROR/noble/current/noble-server-cloudimg-amd64.img"

				    ;;

				  ubuntu26)

				    OSNAME="Ubuntu 26.04"

				    OSv="ubuntu24.04"

				    URL="$UBMIRROR/resolute/current/resolute-server-cloudimg-amd64.img"

				    ;;

				  *)

				    echo "Wrong value for OS variable!"

				    exit 111

				@@ -173,7 +169,6 @@ echo "ENV=$ENV" >> $ENV

				# result path

				echo 'RESPATH="/var/tmp/test_results"' >> $ENV

				# FreeBSD 13 has problems with: e1000 and virtio

				echo "NIC=$NIC" >> $ENV

				# freebsd15 -> used in zfs-qemu.yml

				@@ -221,6 +216,16 @@ for cmd in 'axel -q -o' 'curl --fail -LSs -o' ; do

				  if [ -s "$IMG" ] ; then

				    # Successful download

				    break

				  else

				    if [ -n "$ALT_URL" ] ; then

				      # Try the $ALT_URL if specified

				      echo "Loading alternative $ALT_URL with $cmd..."

				      time eval "$cmd $IMG $ALT_URL"

				      if [ -s "$IMG" ]; then

				        # Successful ALT_URL download

				        break

				      fi

				    fi

				  fi

				done

									
										sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps-vm.sh
									
Vendored

		+35
		-3
	
												View File
												
				@@ -215,7 +215,7 @@ case "$1" in

				  tumbleweed)

				    tumbleweed

				    ;;

				  ubuntu*)

				  ubuntu22|ubuntu24)

				    debian

				    echo "##[group]Install Ubuntu specific"

				    sudo apt-get install -yq linux-tools-common libtirpc-dev \

				@@ -226,6 +226,27 @@ case "$1" in

				    # https://github.com/actions/runner-images/issues/9946

				    sudo apt-get install -yq build-essential

				    echo "##[endgroup]"

				    echo "##[group]Delete Ubuntu OpenZFS modules"

				    for i in $(find /lib/modules -name zfs -type d); do sudo rm -rvf $i; done

				    echo "##[endgroup]"

				    ;;

				  ubuntu26)

				    debian

				    echo "##[group]Install Ubuntu specific"

				    # Skip linux-modules-extra which is already installed

				    sudo apt-get install -yq linux-tools-common

				    sudo apt-get install -yq libtirpc-dev

				    sudo apt-get install -yq dh-sequence-dkms

				    # Need 'build-essential' explicitly for ARM builder

				    # https://github.com/actions/runner-images/issues/9946

				    sudo apt-get install -yq build-essential

				    # Replace sudo-rs with sudo for now because the Rust version

				    # does not support -E to preserve the entire environment

				    sudo update-alternatives --set sudo /usr/bin/sudo.ws

				    echo "##[endgroup]"

				    echo "##[group]Delete Ubuntu OpenZFS modules"

				    for i in $(find /lib/modules -name zfs -type d); do sudo rm -rvf $i; done

				@@ -267,8 +288,19 @@ case "$1" in

				    ;;

				  debian*|ubuntu*)

				    sudo -E systemctl enable nfs-kernel-server

				    sudo -E systemctl enable qemu-guest-agent

				    sudo -E systemctl enable smbd

				    # enable usershares (disabled by default on ubuntu 26.04)

				    sudo -E sed -i '/usershare max shares/s/^#//' /etc/samba/smb.conf

				    # add systemd drop-in to allow the service to be enabled

				    sudo -E mkdir -p /etc/systemd/system/qemu-guest-agent.service.d/

				    sudo -E tee /etc/systemd/system/qemu-guest-agent.service.d/override.conf <<EOF

				[Install]

				WantedBy=multi-user.target

				EOF

				    sudo -E systemctl daemon-reload

				    sudo -E systemctl enable qemu-guest-agent

				    ;;

				  *)

				    # All other linux distros

				@@ -292,7 +324,7 @@ case "$1" in

				    echo 'GRUB_SERIAL_COMMAND="serial --speed=115200"' \

				      | sudo tee -a /etc/default/grub >/dev/null

				    ;;

				  ubuntu24)

				  ubuntu24|ubuntu26)

				    GRUB_CFG="/boot/grub/grub.cfg"

				    GRUB_MKCONFIG="grub-mkconfig"

				    echo 'GRUB_DISABLE_OS_PROBER="false"' \

									
										sys/contrib/openzfs/.github/workflows/scripts/qemu-3-deps.sh
									
Vendored

		+13
		-5
	
												View File
												
				@@ -2,9 +2,12 @@

				# 3) Wait for VM to boot from previous step and launch dependencies

				#    script on it.

				#

				# $1: OS name (like 'fedora41')

				# $2: (optional) Experimental kernel version to install on fedora,

				#     like "6.14".

				# qemu-3-deps.sh [--poweroff] OS_NAME [FEDORA_VERSION]

				#

				# --poweroff: Power off the VM after installing dependencies

				# OS_NAME: OS name (like 'fedora41')

				# FEDORA_VERSION: (optional) Experimental Fedora kernel version, like "6.14" to

				#     install instead of Fedora defaults.

				######################################################################

				.github/workflows/scripts/qemu-wait-for-vm.sh vm0

				@@ -15,8 +18,13 @@

				# we need to update the kernel version in zfs's META file to allow the

				# build to happen.  We update our local copy of META here, since we know

				# it will be rsync'd up in the next step.

				if [ -n "${2:-}" ] ; then

				  sed -i -E 's/Linux-Maximum: .+/Linux-Maximum: 99.99/g' META

				#

				# Look to see if the last argument looks like a kernel version.

				ver="${@: -1}"

				if [[ $ver =~ ^[0-9]+\.[0-9]+ ]] ; then

				  # We got a kernel version, update META to say we support it so we

				  # can test against it.

				  sed -i -E 's/Linux-Maximum: .+/Linux-Maximum: '$ver'/g' META

				fi

				scp .github/workflows/scripts/qemu-3-deps-vm.sh zfs@vm0:qemu-3-deps-vm.sh

									
										sys/contrib/openzfs/.github/workflows/scripts/qemu-4-build-vm.sh
									
Vendored

		+31
		-3
	
												View File
												
				@@ -5,10 +5,12 @@

				#

				# Usage:

				#

				#       qemu-4-build-vm.sh OS [--enable-debug][--dkms][--patch-level NUM]

				#               [--poweroff][--release][--repo][--tarball]

				#       qemu-4-build-vm.sh OS [--custom-branch BRANCH][--enable-debug][--dkms]

				#               [--patch-level NUM][--poweroff][--release][--repo][--tarball]

				#

				# OS:           OS name like 'fedora41'

				# --custom-branch: When building packages, checkout this version of ZFS to

				#                  build, but use the current CI scripts to do it.

				# --enable-debug:  Build RPMs with '--enable-debug' (for testing)

				# --dkms:       Build DKMS RPMs as well

				# --patch-level NUM:    Use a custom patch level number for packages.

				@@ -27,8 +29,27 @@ POWEROFF=""

				RELEASE=""

				REPO=""

				TARBALL=""

				CUSTOM_BRANCH=""

				PREV_BRANCH=""

				cleanup() {

				  if [ -n "$PREV_BRANCH" ] ; then

				    git checkout $PREV_BRANCH

				  fi

				}

				while [[ $# -gt 0 ]]; do

				  case $1 in

				    --custom-branch)

				      CUSTOM_BRANCH="$2"

				      # If the user specifies a custom tag/branch to build, and the build

				      # fails, we want to make sure our workflow scripts are restored to the

				      # current (more modern) versions so the subsequent CI steps use those.

				      shift

				      shift

				      PREV_BRANCH=$(git branch --show-current)

				      trap 'cleanup' ERR

				      ;;

				    --enable-debug)

				      ENABLE_DEBUG=1

				      shift

				@@ -337,7 +358,7 @@ fi

				#

				# rhel8.10

				# almalinux9.5

				# fedora42

				# fedora44

				source /etc/os-release

				 if which hostnamectl &> /dev/null ; then

				  # Fedora 42+ use hostnamectl

				@@ -367,6 +388,11 @@ if [ -n "$ENABLE_DEBUG" ] ; then

				  extra="--enable-debug"

				fi

				if [ -n "$CUSTOM_BRANCH" ] ; then

				  git fetch --unshallow

				  git checkout $CUSTOM_BRANCH

				fi

				# build

				case "$OS" in

				  freebsd*)

				@@ -393,6 +419,8 @@ case "$OS" in

				    ;;

				esac

				git checkout $PREV_BRANCH

				PREV_BRANCH=""

				# building the zfs module was ok

				echo 0 > /var/tmp/build-exitcode.txt

									
										sys/contrib/openzfs/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh
									
Vendored

		+7
		-1
	
												View File
												
				@@ -25,8 +25,14 @@ cd lustre-release

				# Include Lustre patches to build against master/zfs-2.4.x.  Once these

				# patches are merged we can remove these lines.

				#

				# LU-19539 osd-zfs: use osd_dmu_write() wrapper for xattrs

				# LU-19761 osd-zfs: Build against ZFS 2.4.0

				# LU-19249 build: Compatibility updates for kernel v6.16

				#

				patches=('https://review.whamcloud.com/changes/fs%2Flustre-release~62101/revisions/2/patch?download'

					'https://review.whamcloud.com/changes/fs%2Flustre-release~63267/revisions/9/patch?download')

					'https://review.whamcloud.com/changes/fs%2Flustre-release~63267/revisions/9/patch?download'

					'https://review.whamcloud.com/changes/fs%2Flustre-release~60619/revisions/13/patch?download')

				for p in "${patches[@]}" ; do

					curl $p | base64 -d > patch

									
										sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh
									
Vendored

		+8
		
												View File
												
				@@ -79,6 +79,7 @@ function do_builtin_build() {

				  cd $HOME/linux-$fullver

				  ./scripts/config --enable ZFS

				  ./scripts/config --enable ZFS_DEBUG

				  yes "" | make oldconfig

				  make -j `nproc`

				  ) &> /var/tmp/builtin.txt || rc=$?

				@@ -185,6 +186,13 @@ case "$OS" in

				    sudo mount -o noatime /dev/vdb /var/tmp

				    sudo chmod 1777 /var/tmp

				    sudo mv -f /tmp/*.txt /var/tmp

				    # Allow for longer RCU timeouts due to the heavily virtualized and

				    # potentially oversubscribed nature of the CI environment.

				    rcu_cpu_stall_timeout="/sys/module/rcupdate/parameters/rcu_cpu_stall_timeout"

				    if test -f $rcu_cpu_stall_timeout; then

				        echo 120 | sudo sh -c "cat > '$rcu_cpu_stall_timeout'"

				    fi

				    ;;

				esac

									
										sys/contrib/openzfs/.github/workflows/smatch.yml
									
Vendored

		+9
		
												View File
												
				@@ -3,6 +3,14 @@ name: smatch

				on:

				  push:

				  pull_request:

				    paths-ignore:

				      - 'man/**'

				      - '**.md'

				      - 'AUTHORS'

				      - 'COPYRIGHT'

				      - 'LICENSE'

				      - 'NOTICE'

				      - '.gitignore'

				concurrency:

				  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

				@@ -10,6 +18,7 @@ concurrency:

				jobs:

				  smatch:

				    if: github.event_name == 'pull_request' || github.repository != 'openzfs/zfs'

				    runs-on: ubuntu-24.04

				    steps:

				    - name: Checkout smatch

									
										sys/contrib/openzfs/.github/workflows/zfs-arm.yml
									
Vendored

		+50
		-1
	
												View File
												
				@@ -3,11 +3,30 @@ name: zfs-arm

				on:

				  push:

				  pull_request:

				    paths-ignore:

				      - 'man/**'

				      - '**.md'

				      - 'AUTHORS'

				      - 'COPYRIGHT'

				      - 'LICENSE'

				      - 'NOTICE'

				      - '.gitignore'

				  workflow_dispatch:

				    inputs:

				      gcc_ver:

				        type: string

				        required: false

				        default: ""

				        description: "(optional) install specific GCC version, like '16'"

				concurrency:

				  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

				  cancel-in-progress: true

				jobs:

				  zfs-arm:

				    name: ZFS ARM build

				    if: github.event_name == 'pull_request' || github.repository != 'openzfs/zfs'

				    runs-on: ubuntu-24.04-arm

				    steps:

				    - uses: actions/checkout@v6

				@@ -18,6 +37,31 @@ jobs:

				      timeout-minutes: 20

				      run: |

				        sudo apt-get -y remove firefox || true

				        # Do we want to test with a custom GCC version?

				        if [ "${{ github.event.inputs.gcc_ver }}" != "" ] ; then

				          ver="${{ github.event.inputs.gcc_ver }}"

				          sudo add-apt-repository ppa:ubuntu-toolchain-r/test

				          sudo apt-get update

				          echo "GCCs available:"

				          awk '/Package: gcc-/{print $2}'  /var/lib/apt/lists/*ubuntu-toolchain-r*Packages

				          sudo apt-get -y install gcc g++ gcc-$ver g++-$ver

				          sudo update-alternatives --remove-all gcc || true 2>&1

				          sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-$ver 100

				          sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-$ver 100

				          sudo update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100

				          sudo update-alternatives --set cc /usr/bin/gcc

				          sudo update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100

				          sudo update-alternatives --set c++ /usr/bin/g++

				          sudo update-alternatives --set gcc "/usr/bin/gcc-$ver"

				          sudo update-alternatives --set g++ "/usr/bin/g++-$ver"

				        fi

				        .github/workflows/scripts/qemu-3-deps-vm.sh ubuntu24

				        # We're running the VM scripts locally on the runner, so need to fix

				@@ -28,7 +72,12 @@ jobs:

				    - name: Build modules

				      timeout-minutes: 30

				      run: |

				        .github/workflows/scripts/qemu-4-build-vm.sh --enable-debug ubuntu24

				        # Even though we may have installed a newer GCC, the kernel builds don't

				        # seem to honor it, and instead use the older GCC.  I assume this is

				        # to match up with whatever GCC version was used for the kernel.  Always

				        # specify KERNEL_CC to get around this.  This works when using the

				        # default GCC and with a custom GCC.

				        KERNEL_CC=/usr/bin/gcc .github/workflows/scripts/qemu-4-build-vm.sh --enable-debug ubuntu24

				        # Quick sanity test since we're not running the full ZTS

				        sudo modprobe zfs

									
										sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml
									
Vendored

		+13
		-1
	
												View File
												
				@@ -42,6 +42,11 @@ on:

				        required: false

				        default: ""

				        description: "(optional) repo URL (blank: use http://download.zfsonlinux.org)"

				      custom_branch:

				        type: string

				        required: false

				        default: ""

				        description: "(optional) custom tag/branch to build using current CI (like 'zfs-2.2.9')"

				      lookup:

				        type: boolean

				        required: false

				@@ -58,7 +63,7 @@ jobs:

				    strategy:

				      fail-fast: false

				      matrix:

				        os: ['almalinux8', 'almalinux9', 'almalinux10', 'fedora42', 'fedora43', 'fedora44']

				        os: ['almalinux8', 'almalinux9', 'almalinux10', 'fedora43', 'fedora44']

				    runs-on: ubuntu-24.04

				    steps:

				    - uses: actions/checkout@v6

				@@ -94,9 +99,16 @@ jobs:

				                if [ -n "${{ github.event.inputs.patch_level }}" ] ; then

				                        EXTRA="--patch-level ${{ github.event.inputs.patch_level }}"

				                fi

				                if [ -n "${{ github.event.inputs.custom_branch }}" ] ; then

				                        EXTRA+=" --custom-branch ${{ github.event.inputs.custom_branch }}"

				                fi

				                .github/workflows/scripts/qemu-4-build.sh $EXTRA \

				                        --repo --release --dkms --tarball ${{ matrix.os }}

				                if [ -n "${{ github.event.inputs.custom_branch }}" ] ; then

				                        echo "Built packages for ${{ github.event.inputs.custom_branch }}"

				                fi

				        fi

				    - name: Prepare artifacts

									
										sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
									
Vendored

		+21
		-10
	
												View File
												
				@@ -14,7 +14,7 @@ on:

				        type: string

				        required: false

				        default: ""

				        description: "(optional) Only run on this specific OS (like 'fedora42' or 'alpine3-23')"

				        description: "(optional) Only run on this specific OS (like 'fedora44' or 'alpine3-23')"

				concurrency:

				  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

				@@ -23,6 +23,7 @@ concurrency:

				jobs:

				  test-config:

				    name: Setup

				    if: github.event_name == 'pull_request' || github.repository != 'openzfs/zfs'

				    runs-on: ubuntu-24.04

				    outputs:

				      test_os: ${{ steps.os.outputs.os }}

				@@ -45,24 +46,27 @@ jobs:

				          fi

				          case "$ci_type" in

				          docs)

				            os_selection='[]'

				            ;;

				          quick)

				            os_selection='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd15-1s", "ubuntu24"]'

				            os_selection='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora44", "freebsd15-1s", "ubuntu26"]'

				            ;;

				          linux)

				            os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "debian13", "fedora42", "fedora43", "fedora44", "ubuntu22", "ubuntu24"]'

				            os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "debian13", "fedora43", "fedora44", "ubuntu22", "ubuntu24", "ubuntu26"]'

				            ;;

				          freebsd)

				            os_selection='["freebsd13-5r", "freebsd14-4r", "freebsd13-5s", "freebsd14-4s", "freebsd15-1s", "freebsd16-0c"]'

				            os_selection='["freebsd14-4r", "freebsd14-4s", "freebsd15-0r", "freebsd15-1s", "freebsd16-0c"]'

				            ;;

				          *)

				            # default list

				            os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora42", "fedora43", "fedora44", "freebsd14-4r", "freebsd15-1s", "freebsd16-0c", "ubuntu22", "ubuntu24"]'

				            os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora43", "fedora44", "freebsd14-4r", "freebsd15-0r", "freebsd15-1s", "freebsd16-0c", "ubuntu22", "ubuntu24", "ubuntu26"]'

				            ;;

				          esac

				          # Repository-level override for OS selection.

				          # Set vars.ZTS_OS_OVERRIDE in repo settings to restrict targets

				          # (e.g. '["debian13"]' or '["debian13", "fedora42"]').

				          # (e.g. '["debian13"]' or '["debian13", "fedora44"]').

				          # Manual ZFS-CI-Type in commit messages bypasses the override.

				          if [ -n "${{ vars.ZTS_OS_OVERRIDE }}" ] && [ "$ci_source" != "manual" ]; then

				            override='${{ vars.ZTS_OS_OVERRIDE }}'

				@@ -91,15 +95,19 @@ jobs:

				  qemu-vm:

				    name: qemu-x86

				    needs: [ test-config ]

				    if: >-

				      (github.event_name == 'pull_request' ||

				      github.repository != 'openzfs/zfs') &&

				      needs.test-config.outputs.ci_type != 'docs'

				    strategy:

				      fail-fast: false

				      matrix:

				        # rhl:     almalinux8, almalinux9, centos-streamX, fedora4x

				        # debian:  debian12, debian13, ubuntu22, ubuntu24

				        # debian:  debian12, debian13, ubuntu22, ubuntu24, ubuntu26

				        # misc:    archlinux, tumbleweed

				        # FreeBSD variants of november 2025:

				        # FreeBSD Release: freebsd13-5r, freebsd14-4r, freebsd15-0r

				        # FreeBSD Stable:  freebsd13-5s, freebsd14-4s, freebsd15-1s

				        # FreeBSD Release: freebsd14-4r, freebsd15-0r

				        # FreeBSD Stable:  freebsd14-4s, freebsd15-1s

				        # FreeBSD Current: freebsd16-0c

				        os: ${{ fromJson(needs.test-config.outputs.test_os) }}

				    runs-on: ubuntu-24.04

				@@ -153,7 +161,10 @@ jobs:

				      run: .github/workflows/scripts/qemu-8-summary.sh '${{ steps.artifact-upload.outputs.artifact-url }}'

				  cleanup:

				    if: always()

				    if: >-

				      (github.event_name == 'pull_request' ||

				      github.repository != 'openzfs/zfs') &&

				      always()

				    name: Cleanup

				    runs-on: ubuntu-latest

				    needs: [ qemu-vm ]

									
										sys/contrib/openzfs/.github/workflows/zloop.yml
									
Vendored

		+8
		
												View File
												
				@@ -3,6 +3,14 @@ name: zloop

				on:

				  push:

				  pull_request:

				    paths-ignore:

				      - 'man/**'

				      - '**.md'

				      - 'AUTHORS'

				      - 'COPYRIGHT'

				      - 'LICENSE'

				      - 'NOTICE'

				      - '.gitignore'

				concurrency:

				  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

									
										sys/contrib/openzfs/Makefile.am
									
		+1
		
												View File
												
				@@ -138,6 +138,7 @@ cstyle:

						! -path './include/sys/lua/*' \

						! -path './module/lua/l*.[ch]' \

						! -path './module/zfs/lz4.c' \

						! -path './tests/unit/munit.[ch]' \

						$(cstyle_line)

				filter_executable = -exec test -x '{}' \; -print

									
										sys/contrib/openzfs/README.md
									
		+2
		-2
	
												View File
												
				@@ -52,7 +52,7 @@ All RHEL (and compatible systems: AlmaLinux OS, Rocky Linux, etc) on the **full*

				All Ubuntu **LTS** releases are supported.

				**Supported Ubuntu releases**: **24.04 “Noble”**, **22.04 “Jammy”**.

				**Supported Ubuntu releases**: **26.04 “Resolute”**, **24.04 “Noble”**, **22.04 “Jammy”**.

				### Debian

				@@ -68,4 +68,4 @@ Generally, if a distribution is following an LTS kernel, it should work well wit

				All FreeBSD releases receiving [security support](https://www.freebsd.org/security/#sup) are supported by OpenZFS.

				**Supported FreeBSD releases**: **15.0**, **14.4**, **13.5**.

				**Supported FreeBSD releases**: **15.0**, **14.4**.

									
										sys/contrib/openzfs/cmd/Makefile.am
									
		-1
	
												View File
												
				@@ -54,7 +54,6 @@ ztest_LDADD = \

					libnvpair.la

				ztest_LDADD += -lm

				ztest_LDFLAGS = -pthread

				include $(srcdir)/%D%/raidz_test/Makefile.am

sys/contrib/openzfs/cmd/zarcstat.in

+3 -3

View File

@@ -565,10 +565,10 @@ def init():
     update_hdr_intr()
     # check if L2ARC exists
     # check if L2ARC exists; fall back to l2_size for older kernels that
     # do not export l2_ndev
     snap_stats()
     l2_size = cur.get("l2_size")
     if l2_size:
     if cur.get("l2_ndev") or cur.get("l2_size"):
         l2exist = True
     if desired_cols:

sys/contrib/openzfs/cmd/zarcsummary

+4 -1

View File

@@ -856,7 +856,10 @@ def section_l2arc(kstats_dict):
     # The L2ARC statistics live in the same section as the normal ARC stuff
     arc_stats = isolate_section('arcstats', kstats_dict)
     if arc_stats['l2_size'] == '0':
     # Skip the section only when no cache device is attached. Fall back to
     # l2_size for older kernels that do not export l2_ndev.
     if arc_stats.get('l2_ndev', '0') == '0' and \
             arc_stats['l2_size'] == '0':
         print('L2ARC not detected, skipping section\n')
         return

									
										sys/contrib/openzfs/cmd/zdb/zdb.c
									
		+153
		-58
	
												View File
												
				@@ -2802,18 +2802,18 @@ print_file_layout_raidz(vdev_t *vd, blkptr_t *bp, uint64_t file_offset,

					    vd->vdev_children, vdrz->vd_nparity);

					raidz_row_t *rr = rm->rm_row[0];

					if (!dump_opt['H']) {

						int last_disk = vd->vdev_children - 1;

						/*

						 * Account for out of order disks in raidz1.

						 * For now just reverse them back and adjust for it later.

						 */

					if (rr->rr_firstdatacol == 1 && (zio.io_offset & (1ULL << 20))) {

						if (rr->rr_firstdatacol == 1 &&

						    (zio.io_offset & (1ULL << 20))) {

							uint64_t devidx = rr->rr_col[0].rc_devidx;

							rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;

							rr->rr_col[1].rc_devidx = devidx;

						}

					if (!dump_opt['H']) {

						int last_disk = vd->vdev_children - 1;

						int first_disk = rr->rr_col[0].rc_devidx;

						(void) printf("%12llx", (u_longlong_t)file_offset);

				@@ -2843,23 +2843,49 @@ print_file_layout_raidz(vdev_t *vd, blkptr_t *bp, uint64_t file_offset,

						static uint64_t next_offset = 0;

						if (next_offset != file_offset) {

							(void) printf("skip hole\t-\t%llx\n",

							    (u_longlong_t)((file_offset - next_offset) >>

							    vd->vdev_ashift));

							(void) printf("skip hole\t-\t\t%lld\n",

							    (u_longlong_t)((file_offset - next_offset) / 512));

						}

						next_offset = file_offset + BP_GET_LSIZE(bp);

						uint64_t tmp_offset = file_offset;

						for (int c = 0; c < rr->rr_cols; c++) {

							boolean_t pcol = c < rr->rr_firstdatacol;

							raidz_col_t *rc = &rr->rr_col[c];

							char *path = vd->vdev_child[rc->rc_devidx]->vdev_path;

							// c < rr->rr_firstdatacol

							if (rc->rc_size == 0)

								continue;

							(void) printf("%s\t%llu\t%d\n",

							(void) printf("%s\t\t%llu\t%d",

							    zfs_basename(path),

							    (u_longlong_t)(rc->rc_offset +

							    VDEV_LABEL_START_SIZE)/512,

							    (int)rc->rc_size/512);

							if (dump_opt['v']) {

								char label = pcol ? 'P' : 'D';

								int num;

								if (c < 2) {

									num = 0;

								} else {

									num = pcol ? c :

									    (c - rr->rr_firstdatacol);

								}

								printf("\t%c%d", label, num);

								if (dump_opt['v'] > 1) {

									unsigned long long off;

									if (pcol)

										off = file_offset;

									else

										off = tmp_offset;

									off = off / 512ULL;

									printf("\t%llu", off);

								}

							}

							if (!pcol)

								tmp_offset += rc->rc_size;

							printf("\n");

						}

					}

				}

				@@ -2989,7 +3015,12 @@ dump_indirect_layout(dnode_t *dn)

					 * Start layout with a header

					 */

					if (dump_opt['H']) {

						(void) printf("DISK\t\tLBA\t\tCOUNT\n");

						(void) printf("DISK\t\t\tLBA\tCOUNT");

						if (dump_opt['v'])

							(void) printf("\tTYPE");

						if (dump_opt['v'] > 1)

							(void) printf("\tOFFSET");

						printf("\n");

					} else {

						char diskhdr[16];

				@@ -6325,22 +6356,15 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,

				    dmu_object_type_t type)

				{

					int i;

					boolean_t claimed = B_FALSE;

					boolean_t ddt_block = B_FALSE;

					boolean_t brt_block = B_FALSE;

					ASSERT(type < ZDB_OT_TOTAL);

					if (zilog && zil_bp_tree_add(zilog, bp) != 0)

						return;

					/*

					 * This flag controls if we will issue a claim for the block while

					 * counting it, to ensure that all blocks are referenced in space maps.

					 * We don't issue claims if we're not doing leak tracking, because it's

					 * expensive if the user isn't interested. We also don't claim the

					 * second or later occurences of cloned or dedup'd blocks, because we

					 * already claimed them the first time.

					 */

					boolean_t do_claim = !dump_opt['L'];

					spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER);

					blkptr_t tempbp;

				@@ -6371,21 +6395,30 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,

						ddt_entry_t *dde = ddt_lookup(ddt, bp, B_TRUE);

						/*

						 * ddt_lookup() can return NULL if this block didn't exist

						 * in the DDT and creating it would take the DDT over its

						 * quota. Since we got the block from disk, it must exist in

						 * the DDT, so this can't happen. However, when unique entries

						 * are pruned, the dedup bit can be set with no corresponding

						 * entry in the DDT.

						 * ddt_lookup() can return NULL when unique entries are pruned

						 * from the DDT.

						 */

						if (dde == NULL) {

							ddt_exit(ddt);

							goto skipped;

							goto ddt_done;

						}

						/* Get the phys for this variant */

						ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);

						/*

						 * DDT_PHYS_NONE means the block has the dedup bit set but

						 * its DVA doesn't match any phys in the entry.  This can

						 * happen when a DVA was evicted from the DDT and re-added

						 * on a hash collision.  The block may still have a BRT entry.

						 */

						if (v == DDT_PHYS_NONE) {

							ddt_exit(ddt);

							goto ddt_done;

						}

						ddt_block = B_TRUE;

						/*

						 * This entry may have multiple sets of DVAs. We must claim

						 * each set the first time we see them in a real block on disk,

				@@ -6400,8 +6433,14 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,

							dde->dde_io =

							    (void *)(((uintptr_t)dde->dde_io) | (1 << v));

						/* Consume a reference for this block. */

						if (ddt_phys_total_refcnt(ddt, dde->dde_phys) > 0)

						/*

						 * Consume a reference.  If this variant's refcount is already

						 * zero, the DDT tracking is exhausted — more filesystem

						 * references exist than the DDT accounts for.

						 */

						boolean_t ddt_refcnt_exhausted =

						    (ddt_phys_refcnt(dde->dde_phys, v) == 0);

						if (!ddt_refcnt_exhausted)

							ddt_phys_decref(dde->dde_phys, v);

						/*

				@@ -6430,20 +6469,21 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,

							bp = &tempbp;

						}

						if (seen) {

						if (seen && !ddt_refcnt_exhausted) {

							/*

							 * The second or later time we see this block,

							 * it's a duplicate and we count it.

							 */

							zcb->zcb_dedup_asize += BP_GET_ASIZE(bp);

							zcb->zcb_dedup_blocks++;

							/* Already claimed, don't do it again. */

							do_claim = B_FALSE;

							claimed = B_TRUE;

						}

						ddt_exit(ddt);

					} else if (zcb->zcb_brt_is_active &&

					}

				ddt_done:

					if (!claimed && zcb->zcb_brt_is_active &&

					    brt_maybe_exists(zcb->zcb_spa, bp)) {

						/*

						 * Cloned blocks are special. We need to count them, so we can

				@@ -6451,10 +6491,8 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,

						 * only claim them once.

						 *

						 * To do this, we keep our own in-memory BRT. For each block

						 * we haven't seen before, we look it up in the real BRT and

						 * if its there, we note it and its refcount then proceed as

						 * normal. If we see the block again, we count it as a clone

						 * and then give it no further consideration.

						 * we haven't seen before, we look it up in the real BRT. If

						 * we see the block again, we count it as a clone.

						 */

						zdb_brt_entry_t zbre_search, *zbre;

						avl_index_t where;

				@@ -6462,10 +6500,10 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,

						zbre_search.zbre_dva = bp->blk_dva[0];

						zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where);

						if (zbre == NULL) {

							/* Not seen before; track it */

							uint64_t refcnt =

							    brt_entry_get_refcount(zcb->zcb_spa, bp);

							if (refcnt > 0) {

								brt_block = B_TRUE;

								zbre = umem_zalloc(sizeof (zdb_brt_entry_t),

								    UMEM_NOFAIL);

								zbre->zbre_dva = bp->blk_dva[0];

				@@ -6473,25 +6511,16 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,

								avl_insert(&zcb->zcb_brt, zbre, where);

							}

						} else {

							/*

							 * Second or later occurrence, count it and take a

							 * refcount.

							 */

							brt_block = B_TRUE;

							if (zbre->zbre_refcount > 0) {

								zcb->zcb_clone_asize += BP_GET_ASIZE(bp);

								zcb->zcb_clone_blocks++;

								zbre->zbre_refcount--;

							if (zbre->zbre_refcount == 0) {

								avl_remove(&zcb->zcb_brt, zbre);

								umem_free(zbre, sizeof (zdb_brt_entry_t));

								claimed = B_TRUE;

							}

							/* Already claimed, don't do it again. */

							do_claim = B_FALSE;

						}

					}

				skipped:

					for (i = 0; i < 4; i++) {

						int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;

						int t = (i & 1) ? type : ZDB_OT_TOTAL;

				@@ -6650,12 +6679,21 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,

				#undef BIN

				hist_skipped:

					if (!do_claim)

					if (claimed || dump_opt['L'])

						return;

					VERIFY0(zio_wait(zio_claim(NULL, zcb->zcb_spa,

					int claim_err = zio_wait(zio_claim(NULL, zcb->zcb_spa,

					    spa_min_claim_txg(zcb->zcb_spa), bp, NULL, NULL,

					    ZIO_FLAG_CANFAIL)));

					    ZIO_FLAG_CANFAIL));

					if (claim_err != 0) {

						char blkbuf[BP_SPRINTF_LEN];

						snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);

						(void) printf("block claim error %d%s%s: %s\n",

						    claim_err, brt_block ? " (BRT)" : "",

						    ddt_block ? " (DDT)" : "", blkbuf);

						zcb->zcb_haderrors = 1;

						zcb->zcb_errors[claim_err]++;

					}

				}

				static void

				@@ -7431,10 +7469,66 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)

				static boolean_t

				zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)

				{

					if (dump_opt['L'])

						return (B_FALSE);

					boolean_t leaks = B_FALSE;

					/*

					 * Report leaked BRT entries whose refcount was not fully consumed by

					 * the traversal.

					 */

					if (zcb->zcb_brt_is_active) {

						void *cookie = NULL;

						zdb_brt_entry_t *zbre;

						while ((zbre = avl_destroy_nodes(

						    &zcb->zcb_brt, &cookie)) != NULL) {

							if (!dump_opt['L'] && zbre->zbre_refcount != 0) {

								(void) printf("BRT leak: vdev %llu, "

								    "offset 0x%llx, refcount %llu\n",

								    (u_longlong_t)DVA_GET_VDEV(

								    &zbre->zbre_dva),

								    (u_longlong_t)DVA_GET_OFFSET(

								    &zbre->zbre_dva),

								    (u_longlong_t)zbre->zbre_refcount);

								leaks = B_TRUE;

							}

							umem_free(zbre, sizeof (zdb_brt_entry_t));

						}

						avl_destroy(&zcb->zcb_brt);

					}

					if (dump_opt['L'])

						return (leaks);

					/*

					 * Report leaked DDT entries whose refcount was not fully consumed by

					 * the traversal.  Entries in the DDT ZAP that were never looked up

					 * are not detected here.

					 */

					for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {

						ddt_t *ddt = spa->spa_ddt[c];

						if (ddt == NULL)

							continue;

						ddt_enter(ddt);

						for (ddt_entry_t *dde = avl_first(&ddt->ddt_tree); dde != NULL;

						    dde = AVL_NEXT(&ddt->ddt_tree, dde)) {

							for (int p = 0; p < DDT_NPHYS(ddt); p++) {

								ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);

								uint64_t refcnt = ddt_phys_refcnt(dde->dde_phys,

								    v);

								if (refcnt == 0)

									continue;

								blkptr_t blk;

								char blkbuf[BP_SPRINTF_LEN];

								ddt_bp_create(ddt->ddt_checksum, &dde->dde_key,

								    dde->dde_phys, v, &blk);

								snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);

								(void) printf("DDT leak: refcount %llu %s\n",

								    (u_longlong_t)refcnt, blkbuf);

								leaks = B_TRUE;

							}

						}

						ddt_exit(ddt);

					}

					vdev_t *rvd = spa->spa_root_vdev;

					for (unsigned c = 0; c < rvd->vdev_children; c++) {

						vdev_t *vd = rvd->vdev_child[c];

				@@ -10136,7 +10230,7 @@ main(int argc, char **argv)

					 * Automate cachefile

					 */

					if (!spa_config_path_env && !config_path_console && target &&

					    libzfs_core_init() == 0) {

					    !dump_opt['l'] && libzfs_core_init() == 0) {

						char *pname = strdup(target);

						const char *value;

						nvlist_t *pnvl = NULL;

				@@ -10519,6 +10613,7 @@ main(int argc, char **argv)

						}

						if (dump_opt['f'] && os != NULL) {

							dump_opt['v'] = verbose;

							dump_file_data_layout(os);

						} else if (dump_opt['B']) {

							dump_backup(target, objset_id,

									
										sys/contrib/openzfs/cmd/zed/Makefile.am
									
		-1
	
												View File
												
				@@ -41,6 +41,5 @@ zed_LDADD = \

					libnvpair.la

				zed_LDADD += -lrt $(LIBATOMIC_LIBS) $(LIBUDEV_LIBS) $(LIBUUID_LIBS)

				zed_LDFLAGS = -pthread

				dist_noinst_DATA += %D%/agents/README.md

									
										sys/contrib/openzfs/cmd/zed/agents/zfs_retire.c
									
		+92
		-4
	
												View File
												
				@@ -350,6 +350,60 @@ is_draid_fdomain_failure(fmd_hdl_t *hdl, libzfs_handle_t *zhdl,

					return (res);

				}

				/*

				 * Returns B_TRUE if spare 'a' should be tried before spare 'b' when

				 * replacing a failed vdev with the given characteristics.

				 *

				 * Ordering criteria (most to least significant):

				 *  1. Distributed spare matching the failed vdev's dRAID is preferred

				 *     most (distributed spares rebuild faster than traditional spares).

				 *     Regular spares (no TOP_GUID) come next.  Non-matching distributed

				 *     spares are tried last, as the kernel will reject them anyway.

				 *  2. Matching rotational is preferred over mismatching.

				 *  3. Large enough is preferred over too small.

				 *  4. Smaller size is preferred over bigger (best fit).

				 */

				static boolean_t

				spare_is_preferred(nvlist_t *a, nvlist_t *b, boolean_t have_rotational,

				    uint64_t vdev_rotational, uint64_t vdev_size, uint64_t top_guid)

				{

					uint64_t a_top = 0, b_top = 0;

					(void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_TOP_GUID, &a_top);

					(void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_TOP_GUID, &b_top);

					int a_pri = (a_top == 0) ? 1 :

					    (a_top == top_guid || top_guid == 0) ? 2 : 0;

					int b_pri = (b_top == 0) ? 1 :

					    (b_top == top_guid || top_guid == 0) ? 2 : 0;

					if (a_pri != b_pri)

						return (a_pri > b_pri);

					if (have_rotational) {

						uint64_t a_rotational = 0, b_rotational = 0;

						(void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_VDEV_ROTATIONAL,

						    &a_rotational);

						(void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_VDEV_ROTATIONAL,

						    &b_rotational);

						if ((a_rotational == vdev_rotational) !=

						    (b_rotational == vdev_rotational))

							return (a_rotational == vdev_rotational);

					}

					vdev_stat_t *vs;

					unsigned int c;

					uint64_t a_size = 0, b_size = 0;

					if (nvlist_lookup_uint64_array(a, ZPOOL_CONFIG_VDEV_STATS,

					    (uint64_t **)&vs, &c) == 0)

						a_size = vs->vs_rsize;

					if (nvlist_lookup_uint64_array(b, ZPOOL_CONFIG_VDEV_STATS,

					    (uint64_t **)&vs, &c) == 0)

						b_size = vs->vs_rsize;

					boolean_t a_ok = (a_size >= vdev_size);

					boolean_t b_ok = (b_size >= vdev_size);

					if (a_ok != b_ok)

						return (a_ok);

					return (a_size < b_size);

				}

				/*

				 * Given a vdev, attempt to replace it with every known spare until one

				 * succeeds or we run out of devices to try.

				@@ -364,6 +418,10 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)

					char *dev_name;

					zprop_source_t source;

					int ashift;

					uint64_t vdev_rotational = 0, vdev_size = 0, top_guid = 0;

					boolean_t have_vdev_rotational;

					vdev_stat_t *vs;

					unsigned int c;

					config = zpool_get_config(zhp, NULL);

					if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,

				@@ -377,6 +435,35 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)

					    &spares, &nspares) != 0)

						return (B_FALSE);

					/*

					 * Collect the failed vdev's parameters for optimal replacement.

					 */

					have_vdev_rotational = (nvlist_lookup_uint64(vdev,

					    ZPOOL_CONFIG_VDEV_ROTATIONAL, &vdev_rotational) == 0);

					if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,

					    (uint64_t **)&vs, &c) == 0)

						vdev_size = vs->vs_rsize;

					(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_TOP_GUID, &top_guid);

					/*

					 * Build a sorted index array over the spares, so that better

					 * candicates are tried first.

					 */

					uint_t order[nspares];

					for (s = 0; s < nspares; s++)

						order[s] = s;

					for (s = 1; s < nspares; s++) {

						uint_t key = order[s];

						int j = (int)s - 1;

						while (j >= 0 && spare_is_preferred(spares[key],

						    spares[order[j]], have_vdev_rotational, vdev_rotational,

						    vdev_size, top_guid)) {

							order[j + 1] = order[j];

							j--;

						}

						order[j + 1] = key;

					}

					/*

					 * lookup "ashift" pool property, we may need it for the replacement

					 */

				@@ -394,25 +481,26 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)

					 * replace it.

					 */

					for (s = 0; s < nspares; s++) {

						nvlist_t *spare = spares[order[s]];

						boolean_t rebuild = B_FALSE;

						const char *spare_name, *type;

						if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,

						if (nvlist_lookup_string(spare, ZPOOL_CONFIG_PATH,

						    &spare_name) != 0)

							continue;

						/* prefer sequential resilvering for distributed spares */

						if ((nvlist_lookup_string(spares[s], ZPOOL_CONFIG_TYPE,

						if ((nvlist_lookup_string(spare, ZPOOL_CONFIG_TYPE,

						    &type) == 0) && strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0)

							rebuild = B_TRUE;

						/* if set, add the "ashift" pool property to the spare nvlist */

						if (source != ZPROP_SRC_DEFAULT)

							(void) nvlist_add_uint64(spares[s],

							(void) nvlist_add_uint64(spare,

							    ZPOOL_CONFIG_ASHIFT, ashift);

						(void) nvlist_add_nvlist_array(replacement,

						    ZPOOL_CONFIG_CHILDREN, (const nvlist_t **)&spares[s], 1);

						    ZPOOL_CONFIG_CHILDREN, (const nvlist_t **)&spare, 1);

						fmd_hdl_debug(hdl, "zpool_vdev_replace '%s' with spare '%s'",

						    dev_name, zfs_basename(spare_name));

									
										sys/contrib/openzfs/cmd/zfs/zfs_main.c
									
		+12
		
												View File
												
				@@ -9399,6 +9399,18 @@ main(int argc, char **argv)

						return (1);

					}

					/*

					 * Special case '<subcommand> --help|-?'

					 */

					if (argc >= 3 && (strcmp(argv[2], "--help") == 0 ||

					    strcmp(argv[2], "-?") == 0)) {

						int idx;

						if (find_command_idx(cmdname, &idx) == 0) {

							current_command = &command_table[idx];

							usage(B_FALSE);

						}

					}

					zfs_save_arguments(argc, argv, history_str, sizeof (history_str));

					libzfs_print_on_error(g_zfs, B_TRUE);

									
										sys/contrib/openzfs/cmd/zpool/zpool_main.c
									
		+12
		
												View File
												
				@@ -13878,6 +13878,18 @@ main(int argc, char **argv)

					if (strcmp(cmdname, "help") == 0)

						return (zpool_do_help(argc, argv));

					/*

					 * Special case '<subcommand> --help|-?'

					 */

					if (argc >= 3 && (strcmp(argv[2], "--help") == 0 ||

					    strcmp(argv[2], "-?") == 0)) {

						int idx;

						if (find_command_idx(cmdname, &idx) == 0) {

							current_command = &command_table[idx];

							usage(B_FALSE);

						}

					}

					if ((g_zfs = libzfs_init()) == NULL) {

						(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));

						return (1);

									
										sys/contrib/openzfs/cmd/zstream/zstream.c
									
		+36
		
												View File
												
				@@ -29,6 +29,8 @@

				#include <libintl.h>

				#include <stddef.h>

				#include <libzfs.h>

				#include <signal.h>

				#include <sys/backtrace.h>

				#include "zstream.h"

				void

				@@ -53,9 +55,43 @@ zstream_usage(void)

					exit(1);

				}

				static void sig_handler(int signo)

				{

					struct sigaction action;

					libspl_backtrace(STDERR_FILENO);

					/*

					 * Restore default action and re-raise signal so SIGSEGV and

					 * SIGABRT can trigger a core dump.

					 */

					action.sa_handler = SIG_DFL;

					sigemptyset(&action.sa_mask);

					action.sa_flags = 0;

					(void) sigaction(signo, &action, NULL);

					raise(signo);

				}

				int

				main(int argc, char *argv[])

				{

					/*

					 * Set up signal handlers, so if we crash due to bad data in the stream

					 * we can get more info. Unlike ztest, we don't bail out if we can't

					 * set up signal handlers, because zstream is very useful without them.

					 */

					struct sigaction action = { .sa_handler = sig_handler };

					sigemptyset(&action.sa_mask);

					action.sa_flags = 0;

					if (sigaction(SIGSEGV, &action, NULL) < 0) {

						(void) fprintf(stderr, "zstream: cannot catch SIGSEGV: %s\n",

						    strerror(errno));

					}

					if (sigaction(SIGABRT, &action, NULL) < 0) {

						(void) fprintf(stderr, "zstream: cannot catch SIGABRT: %s\n",

						    strerror(errno));

					}

					char *basename = strrchr(argv[0], '/');

					basename = basename ? (basename + 1) : argv[0];

					if (argc >= 1 && strcmp(basename, "zstreamdump") == 0)

									
										sys/contrib/openzfs/cmd/zstream/zstream_dump.c
									
		+14
		
												View File
												
				@@ -385,6 +385,20 @@ zstream_do_dump(int argc, char *argv[])

								(void) ssread(buf, sz, &zc);

								if (ferror(send_stream))

									perror("fread");

								uint8_t *nv_header = (uint8_t *)buf;

								boolean_t xdr = nv_header[0] == NV_ENCODE_XDR;

								boolean_t big_endian = nv_header[1] == 0;

								const char *nc;

								if (xdr) {

									nc = "NV_ENCODE_XDR";

								} else if (big_endian) {

									nc = "NV_ENCODE_NATIVE (big-endian)";

								} else {

									nc = "NV_ENCODE_NATIVE (little-endian)";

								}

								printf("nvlist encoding = %s\n", nc);

								err = nvlist_unpack(buf, sz, &nv, 0);

								if (err) {

									perror(strerror(err));

									
										sys/contrib/openzfs/cmd/zstream/zstream_recompress.c
									
		+2
		
												View File
												
				@@ -99,6 +99,7 @@ zstream_do_recompress(int argc, char *argv[])

						exit(1);

					}

					zfs_refcount_init();

					abd_init();

					fletcher_4_init();

					zio_init();

				@@ -353,6 +354,7 @@ zstream_do_recompress(int argc, char *argv[])

					zio_fini();

					zstd_fini();

					abd_fini();

					zfs_refcount_fini();

					return (0);

				}

sys/contrib/openzfs/config/Rules.am

View File

@@ -23,6 +23,7 @@ AM_CFLAGS += $(IMPLICIT_FALLTHROUGH)
 AM_CFLAGS += $(DEBUG_CFLAGS)
 AM_CFLAGS += $(ASAN_CFLAGS)
 AM_CFLAGS += $(UBSAN_CFLAGS)
 AM_CFLAGS += $(PTHREAD_CFLAGS)
 AM_CFLAGS += $(CODE_COVERAGE_CFLAGS)
 AM_CFLAGS += $(NO_FORMAT_ZERO_LENGTH)
 AM_CFLAGS += $(NO_FORMAT_TRUNCATION)
@@ -57,6 +58,7 @@ endif
 AM_LDFLAGS  = $(DEBUG_LDFLAGS)
 AM_LDFLAGS += $(ASAN_LDFLAGS)
 AM_LDFLAGS += $(UBSAN_LDFLAGS)
 AM_LDFLAGS += $(PTHREAD_LIBS)
 if BUILD_FREEBSD
 AM_LDFLAGS += -fstack-protector-strong

sys/contrib/openzfs/config/ax_pthread.m4

+523

View File

@@ -0,0 +1,523 @@
 # SPDX-License-Identifier: GPL-3.0-or-later WITH Autoconf-exception-macro
 # ===========================================================================
 #        https://www.gnu.org/software/autoconf-archive/ax_pthread.html
 # ===========================================================================
 #
 # SYNOPSIS
 #
 #   AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
 #
 # DESCRIPTION
 #
 #   This macro figures out how to build C programs using POSIX threads. It
 #   sets the PTHREAD_LIBS output variable to the threads library and linker
 #   flags, and the PTHREAD_CFLAGS output variable to any special C compiler
 #   flags that are needed. (The user can also force certain compiler
 #   flags/libs to be tested by setting these environment variables.)
 #
 #   Also sets PTHREAD_CC and PTHREAD_CXX to any special C compiler that is
 #   needed for multi-threaded programs (defaults to the value of CC
 #   respectively CXX otherwise). (This is necessary on e.g. AIX to use the
 #   special cc_r/CC_r compiler alias.)
 #
 #   NOTE: You are assumed to not only compile your program with these flags,
 #   but also to link with them as well. For example, you might link with
 #   $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS
 #   $PTHREAD_CXX $CXXFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS
 #
 #   If you are only building threaded programs, you may wish to use these
 #   variables in your default LIBS, CFLAGS, and CC:
 #
 #     LIBS="$PTHREAD_LIBS $LIBS"
 #     CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
 #     CXXFLAGS="$CXXFLAGS $PTHREAD_CFLAGS"
 #     CC="$PTHREAD_CC"
 #     CXX="$PTHREAD_CXX"
 #
 #   In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant
 #   has a nonstandard name, this macro defines PTHREAD_CREATE_JOINABLE to
 #   that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX).
 #
 #   Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the
 #   PTHREAD_PRIO_INHERIT symbol is defined when compiling with
 #   PTHREAD_CFLAGS.
 #
 #   ACTION-IF-FOUND is a list of shell commands to run if a threads library
 #   is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it
 #   is not found. If ACTION-IF-FOUND is not specified, the default action
 #   will define HAVE_PTHREAD.
 #
 #   Please let the authors know if this macro fails on any platform, or if
 #   you have any other suggestions or comments. This macro was based on work
 #   by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help
 #   from M. Frigo), as well as ac_pthread and hb_pthread macros posted by
 #   Alejandro Forero Cuervo to the autoconf macro repository. We are also
 #   grateful for the helpful feedback of numerous users.
 #
 #   Updated for Autoconf 2.68 by Daniel Richard G.
 #
 # LICENSE
 #
 #   Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
 #   Copyright (c) 2011 Daniel Richard G. <skunk@iSKUNK.ORG>
 #   Copyright (c) 2019 Marc Stevens <marc.stevens@cwi.nl>
 #
 #   This program is free software: you can redistribute it and/or modify it
 #   under the terms of the GNU General Public License as published by the
 #   Free Software Foundation, either version 3 of the License, or (at your
 #   option) any later version.
 #
 #   This program is distributed in the hope that it will be useful, but
 #   WITHOUT ANY WARRANTY; without even the implied warranty of
 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
 #   Public License for more details.
 #
 #   You should have received a copy of the GNU General Public License along
 #   with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 #   As a special exception, the respective Autoconf Macro's copyright owner
 #   gives unlimited permission to copy, distribute and modify the configure
 #   scripts that are the output of Autoconf when processing the Macro. You
 #   need not follow the terms of the GNU General Public License when using
 #   or distributing such scripts, even though portions of the text of the
 #   Macro appear in them. The GNU General Public License (GPL) does govern
 #   all other use of the material that constitutes the Autoconf Macro.
 #
 #   This special exception to the GPL applies to versions of the Autoconf
 #   Macro released by the Autoconf Archive. When you make and distribute a
 #   modified version of the Autoconf Macro, you may extend this special
 #   exception to the GPL to apply to your modified version as well.
 #serial 31
 AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD])
 AC_DEFUN([AX_PTHREAD], [
 AC_REQUIRE([AC_CANONICAL_HOST])
 AC_REQUIRE([AC_PROG_CC])
 AC_REQUIRE([AC_PROG_SED])
 AC_LANG_PUSH([C])
 ax_pthread_ok=no
 # We used to check for pthread.h first, but this fails if pthread.h
 # requires special compiler flags (e.g. on Tru64 or Sequent).
 # It gets checked for in the link test anyway.
 # First of all, check if the user has set any of the PTHREAD_LIBS,
 # etcetera environment variables, and if threads linking works using
 # them:
 if test "x$PTHREAD_CFLAGS$PTHREAD_LIBS" != "x"; then
         ax_pthread_save_CC="$CC"
         ax_pthread_save_CFLAGS="$CFLAGS"
         ax_pthread_save_LIBS="$LIBS"
         AS_IF([test "x$PTHREAD_CC" != "x"], [CC="$PTHREAD_CC"])
         AS_IF([test "x$PTHREAD_CXX" != "x"], [CXX="$PTHREAD_CXX"])
         CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
         LIBS="$PTHREAD_LIBS $LIBS"
         AC_MSG_CHECKING([for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS])
         AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_join])], [ax_pthread_ok=yes])
         AC_MSG_RESULT([$ax_pthread_ok])
         if test "x$ax_pthread_ok" = "xno"; then
                 PTHREAD_LIBS=""
                 PTHREAD_CFLAGS=""
         fi
         CC="$ax_pthread_save_CC"
         CFLAGS="$ax_pthread_save_CFLAGS"
         LIBS="$ax_pthread_save_LIBS"
 fi
 # We must check for the threads library under a number of different
 # names; the ordering is very important because some systems
 # (e.g. DEC) have both -lpthread and -lpthreads, where one of the
 # libraries is broken (non-POSIX).
 # Create a list of thread flags to try. Items with a "," contain both
 # C compiler flags (before ",") and linker flags (after ","). Other items
 # starting with a "-" are C compiler flags, and remaining items are
 # library names, except for "none" which indicates that we try without
 # any flags at all, and "pthread-config" which is a program returning
 # the flags for the Pth emulation library.
 ax_pthread_flags="pthreads none -Kthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config"
 # The ordering *is* (sometimes) important.  Some notes on the
 # individual items follow:
 # pthreads: AIX (must check this before -lpthread)
 # none: in case threads are in libc; should be tried before -Kthread and
 #       other compiler flags to prevent continual compiler warnings
 # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
 # -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads), Tru64
 #           (Note: HP C rejects this with "bad form for `-t' option")
 # -pthreads: Solaris/gcc (Note: HP C also rejects)
 # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
 #      doesn't hurt to check since this sometimes defines pthreads and
 #      -D_REENTRANT too), HP C (must be checked before -lpthread, which
 #      is present but should not be used directly; and before -mthreads,
 #      because the compiler interprets this as "-mt" + "-hreads")
 # -mthreads: Mingw32/gcc, Lynx/gcc
 # pthread: Linux, etcetera
 # --thread-safe: KAI C++
 # pthread-config: use pthread-config program (for GNU Pth library)
 case $host_os in
         freebsd*)
         # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
         # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
         ax_pthread_flags="-kthread lthread $ax_pthread_flags"
         ;;
         hpux*)
         # From the cc(1) man page: "[-mt] Sets various -D flags to enable
         # multi-threading and also sets -lpthread."
         ax_pthread_flags="-mt -pthread pthread $ax_pthread_flags"
         ;;
         openedition*)
         # IBM z/OS requires a feature-test macro to be defined in order to
         # enable POSIX threads at all, so give the user a hint if this is
         # not set. (We don't define these ourselves, as they can affect
         # other portions of the system API in unpredictable ways.)
         AC_EGREP_CPP([AX_PTHREAD_ZOS_MISSING],
             [
 #            if !defined(_OPEN_THREADS) && !defined(_UNIX03_THREADS)
              AX_PTHREAD_ZOS_MISSING
 #            endif
             ],
             [AC_MSG_WARN([IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support.])])
         ;;
         solaris*)
         # On Solaris (at least, for some versions), libc contains stubbed
         # (non-functional) versions of the pthreads routines, so link-based
         # tests will erroneously succeed. (N.B.: The stubs are missing
         # pthread_cleanup_push, or rather a function called by this macro,
         # so we could check for that, but who knows whether they'll stub
         # that too in a future libc.)  So we'll check first for the
         # standard Solaris way of linking pthreads (-mt -lpthread).
         ax_pthread_flags="-mt,-lpthread pthread $ax_pthread_flags"
         ;;
 esac
 # Are we compiling with Clang?
 AC_CACHE_CHECK([whether $CC is Clang],
     [ax_cv_PTHREAD_CLANG],
     [ax_cv_PTHREAD_CLANG=no
      # Note that Autoconf sets GCC=yes for Clang as well as GCC
      if test "x$GCC" = "xyes"; then
         AC_EGREP_CPP([AX_PTHREAD_CC_IS_CLANG],
             [/* Note: Clang 2.7 lacks __clang_[a-z]+__ */
 #            if defined(__clang__) && defined(__llvm__)
              AX_PTHREAD_CC_IS_CLANG
 #            endif
             ],
             [ax_cv_PTHREAD_CLANG=yes])
      fi
     ])
 ax_pthread_clang="$ax_cv_PTHREAD_CLANG"
 # GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC)
 # Note that for GCC and Clang -pthread generally implies -lpthread,
 # except when -nostdlib is passed.
 # This is problematic using libtool to build C++ shared libraries with pthread:
 # [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25460
 # [2] https://bugzilla.redhat.com/show_bug.cgi?id=661333
 # [3] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=468555
 # To solve this, first try -pthread together with -lpthread for GCC
 AS_IF([test "x$GCC" = "xyes"],
       [ax_pthread_flags="-pthread,-lpthread -pthread -pthreads $ax_pthread_flags"])
 # Clang takes -pthread (never supported any other flag), but we'll try with -lpthread first
 AS_IF([test "x$ax_pthread_clang" = "xyes"],
       [ax_pthread_flags="-pthread,-lpthread -pthread"])
 # The presence of a feature test macro requesting re-entrant function
 # definitions is, on some systems, a strong hint that pthreads support is
 # correctly enabled
 case $host_os in
         darwin* | hpux* | linux* | osf* | solaris*)
         ax_pthread_check_macro="_REENTRANT"
         ;;
         aix*)
         ax_pthread_check_macro="_THREAD_SAFE"
         ;;
         *)
         ax_pthread_check_macro="--"
         ;;
 esac
 AS_IF([test "x$ax_pthread_check_macro" = "x--"],
       [ax_pthread_check_cond=0],
       [ax_pthread_check_cond="!defined($ax_pthread_check_macro)"])
 if test "x$ax_pthread_ok" = "xno"; then
 for ax_pthread_try_flag in $ax_pthread_flags; do
         case $ax_pthread_try_flag in
                 none)
                 AC_MSG_CHECKING([whether pthreads work without any flags])
                 ;;
                 *,*)
                 PTHREAD_CFLAGS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\1/"`
                 PTHREAD_LIBS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\2/"`
                 AC_MSG_CHECKING([whether pthreads work with "$PTHREAD_CFLAGS" and "$PTHREAD_LIBS"])
                 ;;
                 -*)
                 AC_MSG_CHECKING([whether pthreads work with $ax_pthread_try_flag])
                 PTHREAD_CFLAGS="$ax_pthread_try_flag"
                 ;;
                 pthread-config)
                 AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no])
                 AS_IF([test "x$ax_pthread_config" = "xno"], [continue])
                 PTHREAD_CFLAGS="`pthread-config --cflags`"
                 PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
                 ;;
                 *)
                 AC_MSG_CHECKING([for the pthreads library -l$ax_pthread_try_flag])
                 PTHREAD_LIBS="-l$ax_pthread_try_flag"
                 ;;
         esac
         ax_pthread_save_CFLAGS="$CFLAGS"
         ax_pthread_save_LIBS="$LIBS"
         CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
         LIBS="$PTHREAD_LIBS $LIBS"
         # Check for various functions.  We must include pthread.h,
         # since some functions may be macros.  (On the Sequent, we
         # need a special flag -Kthread to make this header compile.)
         # We check for pthread_join because it is in -lpthread on IRIX
         # while pthread_create is in libc.  We check for pthread_attr_init
         # due to DEC craziness with -lpthreads.  We check for
         # pthread_cleanup_push because it is one of the few pthread
         # functions on Solaris that doesn't have a non-functional libc stub.
         # We try pthread_create on general principles.
         AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>
 #                       if $ax_pthread_check_cond
 #                        error "$ax_pthread_check_macro must be defined"
 #                       endif
                         static void *some_global = NULL;
                         static void routine(void *a)
                           {
                              /* To avoid any unused-parameter or
                                 unused-but-set-parameter warning.  */
                              some_global = a;
                           }
                         static void *start_routine(void *a) { return a; }],
                        [pthread_t th; pthread_attr_t attr;
                         pthread_create(&th, 0, start_routine, 0);
                         pthread_join(th, 0);
                         pthread_attr_init(&attr);
                         pthread_cleanup_push(routine, 0);
                         pthread_cleanup_pop(0) /* ; */])],
             [ax_pthread_ok=yes],
             [])
         CFLAGS="$ax_pthread_save_CFLAGS"
         LIBS="$ax_pthread_save_LIBS"
         AC_MSG_RESULT([$ax_pthread_ok])
         AS_IF([test "x$ax_pthread_ok" = "xyes"], [break])
         PTHREAD_LIBS=""
         PTHREAD_CFLAGS=""
 done
 fi
 # Clang needs special handling, because older versions handle the -pthread
 # option in a rather... idiosyncratic way
 if test "x$ax_pthread_clang" = "xyes"; then
         # Clang takes -pthread; it has never supported any other flag
         # (Note 1: This will need to be revisited if a system that Clang
         # supports has POSIX threads in a separate library.  This tends not
         # to be the way of modern systems, but it's conceivable.)
         # (Note 2: On some systems, notably Darwin, -pthread is not needed
         # to get POSIX threads support; the API is always present and
         # active.  We could reasonably leave PTHREAD_CFLAGS empty.  But
         # -pthread does define _REENTRANT, and while the Darwin headers
         # ignore this macro, third-party headers might not.)
         # However, older versions of Clang make a point of warning the user
         # that, in an invocation where only linking and no compilation is
         # taking place, the -pthread option has no effect ("argument unused
         # during compilation").  They expect -pthread to be passed in only
         # when source code is being compiled.
         #
         # Problem is, this is at odds with the way Automake and most other
         # C build frameworks function, which is that the same flags used in
         # compilation (CFLAGS) are also used in linking.  Many systems
         # supported by AX_PTHREAD require exactly this for POSIX threads
         # support, and in fact it is often not straightforward to specify a
         # flag that is used only in the compilation phase and not in
         # linking.  Such a scenario is extremely rare in practice.
         #
         # Even though use of the -pthread flag in linking would only print
         # a warning, this can be a nuisance for well-run software projects
         # that build with -Werror.  So if the active version of Clang has
         # this misfeature, we search for an option to squash it.
         AC_CACHE_CHECK([whether Clang needs flag to prevent "argument unused" warning when linking with -pthread],
             [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG],
             [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown
              # Create an alternate version of $ac_link that compiles and
              # links in two steps (.c -> .o, .o -> exe) instead of one
              # (.c -> exe), because the warning occurs only in the second
              # step
              ax_pthread_save_ac_link="$ac_link"
              ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g'
              ax_pthread_link_step=`AS_ECHO(["$ac_link"]) | sed "$ax_pthread_sed"`
              ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)"
              ax_pthread_save_CFLAGS="$CFLAGS"
              for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do
                 AS_IF([test "x$ax_pthread_try" = "xunknown"], [break])
                 CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS"
                 ac_link="$ax_pthread_save_ac_link"
                 AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])],
                     [ac_link="$ax_pthread_2step_ac_link"
                      AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])],
                          [break])
                     ])
              done
              ac_link="$ax_pthread_save_ac_link"
              CFLAGS="$ax_pthread_save_CFLAGS"
              AS_IF([test "x$ax_pthread_try" = "x"], [ax_pthread_try=no])
              ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try"
             ])
         case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in
                 no | unknown) ;;
                 *) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;;
         esac
 fi # $ax_pthread_clang = yes
 # Various other checks:
 if test "x$ax_pthread_ok" = "xyes"; then
         ax_pthread_save_CFLAGS="$CFLAGS"
         ax_pthread_save_LIBS="$LIBS"
         CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
         LIBS="$PTHREAD_LIBS $LIBS"
         # Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
         AC_CACHE_CHECK([for joinable pthread attribute],
             [ax_cv_PTHREAD_JOINABLE_ATTR],
             [ax_cv_PTHREAD_JOINABLE_ATTR=unknown
              for ax_pthread_attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
                  AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>],
                                                  [int attr = $ax_pthread_attr; return attr /* ; */])],
                                 [ax_cv_PTHREAD_JOINABLE_ATTR=$ax_pthread_attr; break],
                                 [])
              done
             ])
         AS_IF([test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xunknown" && \
                test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xPTHREAD_CREATE_JOINABLE" && \
                test "x$ax_pthread_joinable_attr_defined" != "xyes"],
               [AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE],
                                   [$ax_cv_PTHREAD_JOINABLE_ATTR],
                                   [Define to necessary symbol if this constant
                                    uses a non-standard name on your system.])
                ax_pthread_joinable_attr_defined=yes
               ])
         AC_CACHE_CHECK([whether more special flags are required for pthreads],
             [ax_cv_PTHREAD_SPECIAL_FLAGS],
             [ax_cv_PTHREAD_SPECIAL_FLAGS=no
              case $host_os in
              solaris*)
              ax_cv_PTHREAD_SPECIAL_FLAGS="-D_POSIX_PTHREAD_SEMANTICS"
              ;;
              esac
             ])
         AS_IF([test "x$ax_cv_PTHREAD_SPECIAL_FLAGS" != "xno" && \
                test "x$ax_pthread_special_flags_added" != "xyes"],
               [PTHREAD_CFLAGS="$ax_cv_PTHREAD_SPECIAL_FLAGS $PTHREAD_CFLAGS"
                ax_pthread_special_flags_added=yes])
         AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT],
             [ax_cv_PTHREAD_PRIO_INHERIT],
             [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]],
                                              [[int i = PTHREAD_PRIO_INHERIT;
                                                return i;]])],
                             [ax_cv_PTHREAD_PRIO_INHERIT=yes],
                             [ax_cv_PTHREAD_PRIO_INHERIT=no])
             ])
         AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" && \
                test "x$ax_pthread_prio_inherit_defined" != "xyes"],
               [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.])
                ax_pthread_prio_inherit_defined=yes
               ])
         CFLAGS="$ax_pthread_save_CFLAGS"
         LIBS="$ax_pthread_save_LIBS"
         # More AIX lossage: compile with *_r variant
         if test "x$GCC" != "xyes"; then
             case $host_os in
                 aix*)
                 AS_CASE(["x/$CC"],
                     [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6],
                     [#handle absolute path differently from PATH based program lookup
                      AS_CASE(["x$CC"],
                          [x/*],
                          [
 			   AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])
 			   AS_IF([test "x${CXX}" != "x"], [AS_IF([AS_EXECUTABLE_P([${CXX}_r])],[PTHREAD_CXX="${CXX}_r"])])
 			 ],
                          [
 			   AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])
 			   AS_IF([test "x${CXX}" != "x"], [AC_CHECK_PROGS([PTHREAD_CXX],[${CXX}_r],[$CXX])])
 			 ]
                      )
                     ])
                 ;;
             esac
         fi
 fi
 test -n "$PTHREAD_CC" || PTHREAD_CC="$CC"
 test -n "$PTHREAD_CXX" || PTHREAD_CXX="$CXX"
 AC_SUBST([PTHREAD_LIBS])
 AC_SUBST([PTHREAD_CFLAGS])
 AC_SUBST([PTHREAD_CC])
 AC_SUBST([PTHREAD_CXX])
 # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
 if test "x$ax_pthread_ok" = "xyes"; then
         ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1])
         :
 else
         ax_pthread_ok=no
         $2
 fi
 AC_LANG_POP
 ])dnl AX_PTHREAD

sys/contrib/openzfs/config/kernel-fs-parse.m4

+34

View File

@@ -0,0 +1,34 @@
 dnl # SPDX-License-Identifier: CDDL-1.0
 dnl #
 dnl # 5.6 API change
 dnl # Before 5.6, fs_parse() took a struct fs_parameter_description
 dnl # which wraps the parameter specs with name and enum pointers. From 5.6,
 dnl # the description struct was removed and fs_parse() accepts the
 dnl # fs_parameter_spec directly.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_FS_PARSE], [
 	ZFS_LINUX_TEST_SRC([fs_parse], [
 		#include <linux/fs_context.h>
 		#include <linux/fs_parser.h>
 	],[
 		static const struct fs_parameter_spec specs[] = {
 			{}
 		};
 		int test __attribute__ ((unused));
 		struct fs_context *fc __attribute__ ((unused)) = NULL;
 		struct fs_parameter param __attribute__ ((unused));
 		struct fs_parse_result result __attribute__ ((unused));
 		test = fs_parse(fc, specs, &param, &result);
 	])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_FS_PARSE], [
 	AC_MSG_CHECKING([whether fs_parse() takes fs_parameter_spec directly])
 	ZFS_LINUX_TEST_RESULT([fs_parse], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_FS_PARSE_TAKES_SPEC, 1,
 		    [fs_parse() takes fs_parameter_spec directly])
 	],[
 		AC_MSG_RESULT(no)
 	])
 ])

sys/contrib/openzfs/config/kernel.m4

+113 -3

View File

@@ -78,6 +78,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
 	ZFS_AC_KERNEL_SRC_SECURITY_INODE
 	ZFS_AC_KERNEL_SRC_FS_CONTEXT
 	ZFS_AC_KERNEL_SRC_FS_PARSE
 	ZFS_AC_KERNEL_SRC_SB_DYING
 	ZFS_AC_KERNEL_SRC_SET_NLINK
 	ZFS_AC_KERNEL_SRC_SGET
@@ -153,9 +154,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 			;;
 	esac
 	AC_MSG_CHECKING([for available kernel interfaces])
 	ZFS_LINUX_TEST_COMPILE_ALL([kabi])
 	AC_MSG_RESULT([done])
 	ZFS_LINUX_TEST_COMPILE_ALL([kabi], [for available kernel interfaces])
 ])
 dnl #
@@ -203,6 +202,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_TRUNCATE_SETSIZE
 	ZFS_AC_KERNEL_SECURITY_INODE
 	ZFS_AC_KERNEL_FS_CONTEXT
 	ZFS_AC_KERNEL_FS_PARSE
 	ZFS_AC_KERNEL_SB_DYING
 	ZFS_AC_KERNEL_SET_NLINK
 	ZFS_AC_KERNEL_SGET
@@ -753,6 +753,108 @@ AC_DEFUN([ZFS_LINUX_TEST_MODPOST], [
 	], [], [yes])
 ])
 dnl #
 dnl # Progress output for ZFS_LINUX_TEST_COMPILE_ALL
 dnl #
 dnl # From clean, we currently have ~250 kernel tests to compile. This can
 dnl # take anywhere from a few seconds to a few minutes while we wait for
 dnl # the module build invocation to complete (see ZFS_LINUX_COMPILE).
 dnl #
 dnl # To show some progress in the main set of tests, we start a background
 dnl # job to monitor the build progress and update the output.
 dnl #
 AC_DEFUN([_ZFS_LINUX_TEST_COMPILE_PROGRESS_START], [
 	dnl # normal "checking for..." output
 	AC_MSG_CHECKING([$2])
 	dnl # don't start the background job if configure was called with
 	dnl # --silent or --quiet, or if configure's output stream is not
 	dnl # attached to a terminal
 	AS_IF([test "x$silent" != "xyes" -a -t AS_MESSAGE_FD], [
 		dnl # save "checking" message for cleanup later
 		_zfs_linux_test_progress_text="$2"
 		dnl # new shell job in background
 		(
 			dnl # ZFS_LINUX_CONFTEST_MAKEFILE adds one line per
 			dnl # test to the top Makefile, so the line count
 			dnl # is our target
 			total=$(wc -l < $1/Makefile)
 			count=0
 			dnl # eject if our parent process has gone away. this
 			dnl # is protection against the parent being killed.
 			dnl # (we can't use trap because autoconf generates
 			dnl # that and doesn't provide an easy way to hook it).
 			while kill -0 $$ 2>/dev/null ; do
 				dnl # ZFS_LINUX_TEST_COMPILE_ALL has a short
 				dnl # second stage for modpost, where build.log
 				dnl # recreated. we make some effort to both
 				dnl # detect that and handle it, mostly by
 				dnl # making sure the counter never goes
 				dnl # backwards.
 				if test "$count" -lt "$total" ; then
 					dnl # if build.log went away, then
 					dnl # we never got to do a last count,
 					dnl # so we can assume they're all
 					dnl # finished and just bump the count
 					dnl # to the total
 					if ! test -f $1/build.log ; then
 						count=$total
 					else
 						dnl # look for compilation lines
 						dnl # (CC) for .o files that
 						dnl # are in a dir (so not
 						dnl # whole-of-build artifacts)
 						dnl # and only have a a single
 						dnl # period (so not .mod.o
 						dnl # link artifacts)
 						count_n=$(awk '/CC/ && /\/[[^\.]]+\.o$/ { c++ } END { print c }' $1/build.log 2>/dev/null)
 						if test "x$count_n" != "x" ; then
 							dnl # empty output
 							dnl # means awk failed,
 							dnl # likely build.log
 							dnl # went away. use
 							dnl # the current count
 							count=$count_n
 						fi
 					fi
 					dnl # re-output the entire message with
 					dnl # the new counts
 					printf '\rchecking %s... %d/%d' "$2" "$count" "$total" >&6
 				fi
 				dnl # yield before loop
 				sleep 0.5
 			done
 		) &
 		dnl # save the pid so we can kill it later
 		_zfs_linux_test_progress_pid=$!
 	])
 ])
 AC_DEFUN([_ZFS_LINUX_TEST_COMPILE_PROGRESS_DONE], [
 	dnl # only do cleanup if we actually started the job
 	AS_IF([test "x$_zfs_linux_test_progress_pid" != "x"], [
 		dnl # kill it; no-op if it already died
 		kill $_zfs_linux_test_progress_pid 2>/dev/null
 		dnl # wait for it to really go away and clean it up
 		wait $_zfs_linux_test_progress_pid 2>/dev/null
 		dnl # reprint the original checking line. the control code
 		dnl # is ANSI "erase entire line"
 		printf '\r\033\1332Kchecking %s... ' "$_zfs_linux_test_progress_text" >&AS_MESSAGE_FD
 		dnl # cleanup for next run
 		_zfs_linux_test_progress_pid=
 		_zfs_linux_test_progress_text=
 	])
 	dnl # normal final output for screen and config.log
 	AC_MSG_RESULT([$1])
 ])
 dnl #
 dnl # Perform the compilation of the test cases in two phases.
 dnl #
@@ -771,6 +873,10 @@ dnl # The maximum allowed parallelism can be controlled by setting the
 dnl # TEST_JOBS environment variable.  Otherwise, it default to $(nproc).
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [
 	AS_IF([test "x$2" != "x"], [
 		_ZFS_LINUX_TEST_COMPILE_PROGRESS_START([build], [$2])
 	])
 	dnl # Phase 1 - Compilation only, final linking is skipped.
 	ZFS_LINUX_TEST_COMPILE([$1], [build])
@@ -818,6 +924,10 @@ AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [
 			])
 		done
 	])
 	AS_IF([test "x$2" != "x"], [
 		_ZFS_LINUX_TEST_COMPILE_PROGRESS_DONE([done])
 	])
 ])
 dnl #

sys/contrib/openzfs/config/zfs-build.m4

+12

View File

@@ -39,6 +39,18 @@ dnl # (If INVARIANTS is detected, we need to force DEBUG, or strange panics
 dnl # can ensue.)
 dnl #
 AC_DEFUN([ZFS_AC_DEBUG], [
 	dnl #
 	dnl # In the Linux kernel copy-builtin build, assertion/debug support
 	dnl # is selected by CONFIG_ZFS_DEBUG (Kconfig).
 	dnl #
 	AH_BOTTOM([
 #ifdef CONFIG_ZFS
 #undef ZFS_DEBUG
 #ifdef CONFIG_ZFS_DEBUG
 #define ZFS_DEBUG 1
 #endif
 #endif])
 	AC_MSG_CHECKING([whether assertion support will be enabled])
 	AC_ARG_ENABLE([debug],
 		[AS_HELP_STRING([--enable-debug],

sys/contrib/openzfs/configure.ac

View File

@@ -54,6 +54,7 @@ AC_PROG_LN_S
 PKG_PROG_PKG_CONFIG
 AM_PROG_AS
 AM_PROG_CC_C_O
 AX_PTHREAD
 AX_CODE_COVERAGE
 _AM_PROG_TAR(pax)

sys/contrib/openzfs/contrib/debian/not-installed

-1

View File

@@ -2,7 +2,6 @@ usr/bin/zarcsummary.py
 usr/share/zfs/zfs-helpers.sh
 etc/default/zfs
 etc/init.d
 etc/sudoers.d
 etc/zfs/vdev_id.conf.alias.example
 etc/zfs/vdev_id.conf.multipath.example
 etc/zfs/vdev_id.conf.sas_direct.example

									
										sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
									
		+21
		-8
	
												View File
												
				@@ -840,27 +840,41 @@ zfs_key_config_modify_session_counter(pam_handle_t *pamh,

						    errno);

						return (-1);

					}

					if (chown(runtime_path, 0, 0) != 0) {

						pam_syslog(pamh, LOG_ERR, "Can't chown runtime path: %d",

						    errno);

					const int runtime_fd = open(runtime_path,

					    O_RDONLY | O_CLOEXEC | O_NOFOLLOW | O_DIRECTORY);

					if (runtime_fd < 0) {

						pam_syslog(pamh, LOG_ERR, "Can't open runtime path: %d", errno);

						return (-1);

					}

					if (chmod(runtime_path, S_IRWXU) != 0) {

					if (fchown(runtime_fd, 0, 0) != 0) {

						pam_syslog(pamh, LOG_ERR, "Can't chown runtime path: %d",

						    errno);

						close(runtime_fd);

						return (-1);

					}

					if (fchmod(runtime_fd, S_IRWXU) != 0) {

						pam_syslog(pamh, LOG_ERR, "Can't chmod runtime path: %d",

						    errno);

						close(runtime_fd);

						return (-1);

					}

					char *counter_path;

					if (asprintf(&counter_path, "%s/%u", runtime_path, config->uid) == -1)

					if (asprintf(&counter_path, "%u", config->uid) == -1) {

						close(runtime_fd);

						return (-1);

					}

					const int fd = open(counter_path,

					const int fd = openat(runtime_fd, counter_path,

					    O_RDWR | O_CLOEXEC | O_CREAT | O_NOFOLLOW,

					    S_IRUSR | S_IWUSR);

					int ret = errno;

					free(counter_path);

					close(runtime_fd);

					if (fd < 0) {

						pam_syslog(pamh, LOG_ERR, "Can't open counter file: %d", errno);

						pam_syslog(pamh, LOG_ERR, "Can't open counter file: %d", ret);

						return (-1);

					}

					if (flock(fd, LOCK_EX) != 0) {

				@@ -871,7 +885,6 @@ zfs_key_config_modify_session_counter(pam_handle_t *pamh,

					char counter[20];

					char *pos = counter;

					int remaining = sizeof (counter) - 1;

					int ret;

					counter[sizeof (counter) - 1] = 0;

					while (remaining > 0 && (ret = read(fd, pos, remaining)) > 0) {

						remaining -= ret;

sys/contrib/openzfs/copy-builtin

+11

View File

@@ -43,6 +43,17 @@ config ZFS
 	  To compile this file system support as a module, choose M here.
 	  If unsure, say N.
 config ZFS_DEBUG
 	bool "ZFS debugging"
 	depends on ZFS
 	help
 	  Enable ZFS debugging. This turns on all ASSERT() assertions,
 	  enables additional debug-only code paths, and promotes
 	  compiler warnings to errors. This should only be enabled for
 	  development or troubleshooting.
 	  If unsure, say N.
 EOF

									
										sys/contrib/openzfs/etc/Makefile.am
									
		-8
	
												View File
												
				@@ -1,10 +1,4 @@

				# SPDX-License-Identifier: CDDL-1.0

				sudoersddir = $(sysconfdir)/sudoers.d

				sudoersd_DATA = \

					%D%/sudoers.d/zfs

				dist_noinst_DATA += $(sudoersd_DATA)

				sysconf_zfsdir = $(sysconfdir)/zfs

				@@ -88,8 +82,6 @@ systemdgenerator_PROGRAMS = \

				%C%_systemd_system_generators_zfs_mount_generator_LDADD = \

					libzfs.la

				%C%_systemd_system_generators_zfs_mount_generator_LDFLAGS = -pthread

				CPPCHECKTARGETS += $(systemdgenerator_PROGRAMS)

				endif

sys/contrib/openzfs/etc/sudoers.d/zfs

-9

View File

@@ -1,9 +0,0 @@
 ##
 ## Allow any user to run `zpool iostat/status -c smart` in order
 ## to read basic SMART health statistics for a pool.
 ##
 ## CAUTION: Any syntax error introduced here will break sudo.
 ## Editing with 'visudo' is recommended: visudo -f  /etc/sudoers.d/zfs
 ##
 # ALL ALL = (root) NOPASSWD: /usr/sbin/smartctl -a /dev/[hsv]d[a-z0-9]*

									
										sys/contrib/openzfs/include/os/freebsd/zfs/sys/arc_os.h
									
		-1
	
												View File
												
				@@ -29,6 +29,5 @@

				#define	_SYS_ARC_OS_H

				int param_set_arc_free_target(SYSCTL_HANDLER_ARGS);

				int param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS);

				#endif

									
										sys/contrib/openzfs/include/os/linux/spl/sys/rwlock.h
									
		+100
		-36
	
												View File
												
				@@ -30,7 +30,6 @@

				#include <linux/sched.h>

				typedef enum {

					RW_DRIVER	= 2,

					RW_DEFAULT	= 4,

					RW_NOLOCKDEP	= 5

				} krw_type_t;

				@@ -75,20 +74,35 @@ spl_rw_set_type(krwlock_t *rwp, krw_type_t type)

				{

					rwp->rw_type = type;

				}

				static inline void

				spl_rw_lockdep_off(void)

				{

					lockdep_off();

				}

				static inline void

				spl_rw_lockdep_on(void)

				{

					lockdep_on();

				}

				static inline void

				spl_rw_lockdep_off_maybe(krwlock_t *rwp)		\

				{							\

					if (rwp && rwp->rw_type == RW_NOLOCKDEP)	\

						lockdep_off();				\

						spl_rw_lockdep_off();			\

				}

				static inline void

				spl_rw_lockdep_on_maybe(krwlock_t *rwp)			\

				{							\

					if (rwp && rwp->rw_type == RW_NOLOCKDEP)	\

						lockdep_on();				\

						spl_rw_lockdep_on();			\

				}

				#else  /* CONFIG_LOCKDEP */

				#define	spl_rw_set_type(rwp, type)

				#define	spl_rw_lockdep_off()

				#define	spl_rw_lockdep_on()

				#define	spl_rw_lockdep_off_maybe(rwp)

				#define	spl_rw_lockdep_on_maybe(rwp)

				#endif /* CONFIG_LOCKDEP */

				@@ -117,6 +131,56 @@ RW_READ_HELD(krwlock_t *rwp)

				 * will be correctly located in the users code which is important

				 * for the built in kernel lock analysis tools

				 */

				#define	spl_rw_tryenter_impl(rwp, rw) /* CSTYLED */			\

				({									\

					int _rc_ = 0;							\

													\

					switch (rw) {							\

					case RW_READER:							\

						_rc_ = down_read_trylock(SEM(rwp));			\

						break;							\

					case RW_WRITER:							\

						if ((_rc_ = down_write_trylock(SEM(rwp))))		\

							spl_rw_set_owner(rwp);				\

						break;							\

					default:							\

						VERIFY(0);						\

					}								\

					_rc_;								\

				})

				#define	spl_rw_enter_impl(rwp, rw) /* CSTYLED */			\

				({									\

					switch (rw) {							\

					case RW_READER:							\

						down_read(SEM(rwp));					\

						break;							\

					case RW_WRITER:							\

						down_write(SEM(rwp));					\

						spl_rw_set_owner(rwp);					\

						break;							\

					default:							\

						VERIFY(0);						\

					}								\

				})

				#define	spl_rw_exit_impl(rwp) /* CSTYLED */				\

				({									\

					if (RW_WRITE_HELD(rwp)) {					\

						spl_rw_clear_owner(rwp);				\

						up_write(SEM(rwp));					\

					} else {							\

						ASSERT(RW_READ_HELD(rwp));				\

						up_read(SEM(rwp));					\

					}								\

				})

				#define	spl_rw_downgrade_impl(rwp) /* CSTYLED */			\

				({									\

					spl_rw_clear_owner(rwp);					\

					downgrade_write(SEM(rwp));					\

				})

				#define	rw_init(rwp, name, type, arg) /* CSTYLED */			\

				({									\

					static struct lock_class_key __key;				\

				@@ -140,60 +204,60 @@ RW_READ_HELD(krwlock_t *rwp)

				#define	rw_tryenter(rwp, rw) /* CSTYLED */				\

				({									\

					int _rc_ = 0;							\

													\

					spl_rw_lockdep_off_maybe(rwp);					\

					switch (rw) {							\

					case RW_READER:							\

						_rc_ = down_read_trylock(SEM(rwp));			\

						break;							\

					case RW_WRITER:							\

						if ((_rc_ = down_write_trylock(SEM(rwp))))		\

							spl_rw_set_owner(rwp);				\

						break;							\

					default:							\

						VERIFY(0);						\

					}								\

					int _rc_ = spl_rw_tryenter_impl(rwp, rw);			\

					spl_rw_lockdep_on_maybe(rwp);					\

					_rc_;								\

				})

				#define	rw_tryenter_nolockdep(rwp, rw) /* CSTYLED */			\

				({									\

					spl_rw_lockdep_off();						\

					int _rc_ = spl_rw_tryenter_impl(rwp, rw);			\

					spl_rw_lockdep_on();						\

					_rc_;								\

				})

				#define	rw_enter(rwp, rw) /* CSTYLED */					\

				({									\

					spl_rw_lockdep_off_maybe(rwp);					\

					switch (rw) {							\

					case RW_READER:							\

						down_read(SEM(rwp));					\

						break;							\

					case RW_WRITER:							\

						down_write(SEM(rwp));					\

						spl_rw_set_owner(rwp);					\

						break;							\

					default:							\

						VERIFY(0);						\

					}								\

					spl_rw_enter_impl(rwp, rw);					\

					spl_rw_lockdep_on_maybe(rwp);					\

				})

				#define	rw_enter_nolockdep(rwp, rw) /* CSTYLED */			\

				({									\

					spl_rw_lockdep_off();						\

					spl_rw_enter_impl(rwp, rw);					\

					spl_rw_lockdep_on();						\

				})

				#define	rw_exit(rwp) /* CSTYLED */					\

				({									\

					spl_rw_lockdep_off_maybe(rwp);					\

					if (RW_WRITE_HELD(rwp)) {					\

						spl_rw_clear_owner(rwp);				\

						up_write(SEM(rwp));					\

					} else {							\

						ASSERT(RW_READ_HELD(rwp));				\

						up_read(SEM(rwp));					\

					}								\

					spl_rw_exit_impl(rwp);						\

					spl_rw_lockdep_on_maybe(rwp);					\

				})

				#define	rw_exit_nolockdep(rwp) /* CSTYLED */				\

				({									\

					spl_rw_lockdep_off();						\

					spl_rw_exit_impl(rwp);						\

					spl_rw_lockdep_on();						\

				})

				#define	rw_downgrade(rwp) /* CSTYLED */					\

				({									\

					spl_rw_lockdep_off_maybe(rwp);					\

					spl_rw_clear_owner(rwp);					\

					downgrade_write(SEM(rwp));					\

					spl_rw_downgrade_impl(rwp);					\

					spl_rw_lockdep_on_maybe(rwp);					\

				})

				#define	rw_downgrade_nolockdep(rwp) /* CSTYLED */			\

				({									\

					spl_rw_lockdep_off();						\

					spl_rw_downgrade_impl(rwp);					\

					spl_rw_lockdep_on();						\

				})

				#endif /* _SPL_RWLOCK_H */

									
										sys/contrib/openzfs/include/sys/arc.h
									
		+1
		-2
	
												View File
												
				@@ -95,8 +95,7 @@ typedef void arc_prune_func_t(uint64_t bytes, void *priv);

				extern uint_t zfs_arc_average_blocksize;

				extern int l2arc_exclude_special;

				/* generic arc_done_func_t's which you can use */

				arc_read_done_func_t arc_bcopy_func;

				/* generic arc_done_func_t which can be used */

				arc_read_done_func_t arc_getbuf_func;

				/* generic arc_prune_func_t wrapper for callbacks */

									
										sys/contrib/openzfs/include/sys/arc_impl.h
									
		+4
		-1
	
												View File
												
				@@ -832,6 +832,8 @@ typedef struct arc_stats {

					 * due to ARC_FLAG_UNCACHED being set.

					 */

					kstat_named_t arcstat_uncached_evictable_metadata;

					/* Number of L2ARC devices currently attached across all pools. */

					kstat_named_t arcstat_l2_ndev;

					kstat_named_t arcstat_l2_hits;

					kstat_named_t arcstat_l2_misses;

					/*

				@@ -1103,7 +1105,7 @@ extern arc_sums_t arc_sums;

				extern hrtime_t arc_growtime;

				extern boolean_t arc_warm;

				extern uint_t arc_grow_retry;

				extern uint_t arc_no_grow_shift;

				extern uint_t zfs_arc_no_grow_shift;

				extern uint_t arc_shrink_shift;

				extern kmutex_t arc_prune_mtx;

				extern list_t arc_prune_list;

				@@ -1134,6 +1136,7 @@ extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);

				extern int param_set_arc_min(ZFS_MODULE_PARAM_ARGS);

				extern int param_set_arc_max(ZFS_MODULE_PARAM_ARGS);

				extern int param_set_l2arc_dwpd_limit(ZFS_MODULE_PARAM_ARGS);

				extern int param_set_arc_no_grow_shift(ZFS_MODULE_PARAM_ARGS);

				extern void l2arc_dwpd_bump_reset(void);

				/* used in zdb.c */

									
										sys/contrib/openzfs/include/sys/fs/zfs.h
									
		+14
		
												View File
												
				@@ -363,6 +363,7 @@ typedef enum {

				/* Small enough to not hog a whole line of printout in zpool(8). */

				#define	ZPROP_MAX_COMMENT	32

				#define	ZPROP_BOOLEAN_NA	2

				#define	ZPROP_BOOLEAN_INHERIT	2

				#define	ZPROP_VALUE		"value"

				#define	ZPROP_SOURCE		"source"

				@@ -476,6 +477,8 @@ typedef enum {

					VDEV_PROP_SCHEDULER,

					VDEV_PROP_FDOMAIN,

					VDEV_PROP_FGROUP,

					VDEV_PROP_ALLOC_BIAS,

					VDEV_PROP_ROTATIONAL,

					VDEV_NUM_PROPS

				} vdev_prop_t;

				@@ -491,6 +494,16 @@ typedef enum {

					VDEV_SCHEDULER_OFF

				} vdev_scheduler_type_t;

				/*

				 * Allocation bias for top-level vdevs (alloc_bias property).

				 */

				typedef enum vdev_alloc_bias {

					VDEV_BIAS_NONE,

					VDEV_BIAS_LOG,		/* dedicated to ZIL data (SLOG) */

					VDEV_BIAS_SPECIAL,	/* dedicated to ddt, metadata, and small blks */

					VDEV_BIAS_DEDUP		/* dedicated to dedup metadata */

				} vdev_alloc_bias_t;

				/*

				 * Dataset property functions shared between libzfs and kernel.

				 */

				@@ -919,6 +932,7 @@ typedef struct zpool_load_policy {

				#define	ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH	"vdev_enc_sysfs_path"

				#define	ZPOOL_CONFIG_WHOLE_DISK		"whole_disk"

				#define	ZPOOL_CONFIG_VDEV_ROTATIONAL	"rotational"

				#define	ZPOOL_CONFIG_ERRCOUNT		"error_count"

				#define	ZPOOL_CONFIG_NOT_PRESENT	"not_present"

				#define	ZPOOL_CONFIG_SPARES		"spares"

									
										sys/contrib/openzfs/include/sys/metaslab_impl.h
									
		+1
		-1
	
												View File
												
				@@ -330,7 +330,7 @@ struct metaslab_group {

				 *

				 * As the space map grows (as a result of the appends) it will

				 * eventually become space-inefficient.  When the metaslab's in-core

				 * free tree is zfs_condense_pct/100 times the size of the minimal

				 * free tree is zfs_metaslab_condense_pct/100 times the size of the minimal

				 * on-disk representation, we rewrite it in its minimized form.  If a

				 * metaslab needs to condense then we must set the ms_condensing flag to

				 * ensure that allocations are not performed on the metaslab that is

									
										sys/contrib/openzfs/include/sys/vdev_impl.h
									
		+1
		-8
	
												View File
												
				@@ -155,14 +155,6 @@ struct vdev_queue {

					kmutex_t	vq_lock;

				};

				typedef enum vdev_alloc_bias {

					VDEV_BIAS_NONE,

					VDEV_BIAS_LOG,		/* dedicated to ZIL data (SLOG) */

					VDEV_BIAS_SPECIAL,	/* dedicated to ddt, metadata, and small blks */

					VDEV_BIAS_DEDUP		/* dedicated to dedup metadata */

				} vdev_alloc_bias_t;

				/*

				 * On-disk indirect vdev state.

				 *

				@@ -600,6 +592,7 @@ extern boolean_t vdev_log_state_valid(vdev_t *vd);

				extern int vdev_load(vdev_t *vd);

				extern int vdev_dtl_load(vdev_t *vd);

				extern void vdev_sync(vdev_t *vd, uint64_t txg);

				extern void vdev_sync_dispatch(vdev_t *vd, uint64_t txg);

				extern void vdev_sync_done(vdev_t *vd, uint64_t txg);

				extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);

				extern void vdev_dirty_leaves(vdev_t *vd, int flags, uint64_t txg);

									
										sys/contrib/openzfs/include/sys/zap.h
									
		+151
		-87
	
												View File
												
				@@ -24,6 +24,7 @@

				 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.

				 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.

				 * Copyright 2017 Nexenta Systems, Inc.

				 * Copyright (c) 2026, TrueNAS.

				 */

				#ifndef	_SYS_ZAP_H

				@@ -121,13 +122,13 @@ typedef enum zap_flags {

				/*

				 * Create a new zapobj with no attributes and return its object number.

				 */

				uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,

				uint64_t zap_create(objset_t *os, dmu_object_type_t ot,

				    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);

				uint64_t zap_create_dnsize(objset_t *ds, dmu_object_type_t ot,

				uint64_t zap_create_dnsize(objset_t *os, dmu_object_type_t ot,

				    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);

				uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,

				uint64_t zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,

				    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);

				uint64_t zap_create_norm_dnsize(objset_t *ds, int normflags,

				uint64_t zap_create_norm_dnsize(objset_t *os, int normflags,

				    dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,

				    int dnodesize, dmu_tx_t *tx);

				uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,

				@@ -137,11 +138,22 @@ uint64_t zap_create_flags_dnsize(objset_t *os, int normflags,

				    zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift,

				    int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,

				    int dnodesize, dmu_tx_t *tx);

				/*

				 * Create a zap object and return a pointer to the newly allocated dnode via

				 * the allocated_dnode argument.  The returned dnode will be held and the

				 * caller is responsible for releasing the hold by calling dnode_rele().

				 */

				uint64_t zap_create_hold(objset_t *os, int normflags, zap_flags_t flags,

				    dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,

				    dmu_object_type_t bonustype, int bonuslen, int dnodesize,

				    dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx);

				/*

				 * Create a new zapobj with no attributes, and add an entry to an existing

				 * zapobj with the given name as key and the object number of the new zapobj as

				 * the value. Returns the object number of the new zapobj.

				 */

				uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,

				    uint64_t parent_obj, const char *name, dmu_tx_t *tx);

				uint64_t zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot,

				@@ -157,20 +169,21 @@ void mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags,

				 * Create a new zapobj with no attributes from the given (unallocated)

				 * object number.

				 */

				int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,

				int zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,

				    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);

				int zap_create_claim_dnsize(objset_t *ds, uint64_t obj, dmu_object_type_t ot,

				int zap_create_claim_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t ot,

				    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);

				int zap_create_claim_norm(objset_t *ds, uint64_t obj,

				int zap_create_claim_norm(objset_t *os, uint64_t obj,

				    int normflags, dmu_object_type_t ot,

				    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);

				int zap_create_claim_norm_dnsize(objset_t *ds, uint64_t obj,

				int zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj,

				    int normflags, dmu_object_type_t ot,

				    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);

				/*

				 * The zapobj passed in must be a valid ZAP object for all of the

				 * following routines.

				 * All operations on a zapobj take either the the objset/objectid pair

				 * that "names" the object, or an existing dnode_t for the object. The

				 * zapobj passed in must be a valid ZAP object.

				 */

				/*

				@@ -178,7 +191,7 @@ int zap_create_claim_norm_dnsize(objset_t *ds, uint64_t obj,

				 *

				 * Frees the object number using dmu_object_free.

				 */

				int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);

				int zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);

				/*

				 * Manipulate attributes.

				@@ -207,21 +220,32 @@ int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);

				 * fit will be transferred to 'buf'.  If the entire attribute was not

				 * transferred, the call will return EOVERFLOW.

				 */

				int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,

				int zap_lookup(objset_t *os, uint64_t zapobj, const char *name,

				    uint64_t integer_size, uint64_t num_integers, void *buf);

				int zap_lookup_by_dnode(dnode_t *dn, const char *name,

				    uint64_t integer_size, uint64_t num_integers, void *buf);

				/*

				 * If rn_len is nonzero, realname will be set to the name of the found

				 * entry (which may be different from the requested name if matchtype is

				 * not MT_EXACT).

				 * not zero).

				 *

				 * If normalization_conflictp is not NULL, it will be set if there is

				 * another name with the same case/unicode normalized form.

				 */

				int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,

				int zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,

				    uint64_t integer_size, uint64_t num_integers, void *buf,

				    matchtype_t mt, char *realname, int rn_len,

				    boolean_t *normalization_conflictp);

				int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,

				    uint64_t integer_size, uint64_t num_integers, void *buf,

				    matchtype_t mt, char *realname, int rn_len,

				    boolean_t *ncp);

				/*

				 * The _uint64 variants take an array of uint64_t as the key. The ZAP must

				 * be created with ZAP_FLAG_UINT64_KEY.

				 */

				int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,

				    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);

				int zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				@@ -229,20 +253,31 @@ int zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				int zap_lookup_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,

				    uint64_t *actual_num_integers);

				int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);

				/*

				 * Lookup the attribute with the given name. Returns ENOENT if it does not

				 * exist, 0 if it does. This is like zap_lookup(), but may be more efficient.

				 */

				int zap_contains(objset_t *os, uint64_t zapobj, const char *name);

				int zap_contains_by_dnode(dnode_t *dn, const char *name);

				/*

				 * Prefetch the blocks within the ZAP where the given key is stored. The

				 * prefetch IO will occure in the background.

				 */

				int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);

				int zap_prefetch_object(objset_t *os, uint64_t zapobj);

				/* Prefetch by uint64_t[] key. */

				int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,

				    int key_numints);

				int zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				    int key_numints);

				int zap_lookup_by_dnode(dnode_t *dn, const char *name,

				    uint64_t integer_size, uint64_t num_integers, void *buf);

				int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,

				    uint64_t integer_size, uint64_t num_integers, void *buf,

				    matchtype_t mt, char *realname, int rn_len,

				    boolean_t *ncp);

				/*

				 * Prefetch the entire ZAP object. Unlike zap_prefetch(), will block until

				 * the entire object is loaded into the ARC.

				 */

				int zap_prefetch_object(objset_t *os, uint64_t zapobj);

				/*

				 * Create an attribute with the given name and value.

				@@ -250,13 +285,15 @@ int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,

				 * If an attribute with the given name already exists, the call will

				 * fail and return EEXIST.

				 */

				int zap_add(objset_t *ds, uint64_t zapobj, const char *key,

				int zap_add(objset_t *os, uint64_t zapobj, const char *key,

				    int integer_size, uint64_t num_integers,

				    const void *val, dmu_tx_t *tx);

				int zap_add_by_dnode(dnode_t *dn, const char *key,

				    int integer_size, uint64_t num_integers,

				    const void *val, dmu_tx_t *tx);

				int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key,

				/* Add by uint64_t[] key. */

				int zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,

				    int key_numints, int integer_size, uint64_t num_integers,

				    const void *val, dmu_tx_t *tx);

				int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				@@ -271,8 +308,12 @@ int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				 * existing attribute's integer size, in which case the attribute's

				 * integer size will be updated to the new value.

				 */

				int zap_update(objset_t *ds, uint64_t zapobj, const char *name,

				int zap_update(objset_t *os, uint64_t zapobj, const char *name,

				    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);

				int zap_update_by_dnode(dnode_t *dn, const char *name, int integer_size,

				    uint64_t num_integers, const void *val, dmu_tx_t *tx);

				/* Update by uint64_t[] key. */

				int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,

				    int key_numints,

				    int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);

				@@ -287,8 +328,12 @@ int zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				 * If the requested attribute does not exist, the call will fail and

				 * return ENOENT.

				 */

				int zap_length(objset_t *ds, uint64_t zapobj, const char *name,

				int zap_length(objset_t *os, uint64_t zapobj, const char *name,

				    uint64_t *integer_size, uint64_t *num_integers);

				int zap_length_by_dnode(dnode_t *dn, const char *name,

				    uint64_t *integer_size, uint64_t *num_integers);

				/* Attribute length by uint64_t[] key. */

				int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,

				    int key_numints, uint64_t *integer_size, uint64_t *num_integers);

				int zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				@@ -300,10 +345,12 @@ int zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				 * If the specified attribute does not exist, the call will fail and

				 * return ENOENT.

				 */

				int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);

				int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,

				    matchtype_t mt, dmu_tx_t *tx);

				int zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx);

				int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx);

				int zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,

				    matchtype_t mt, dmu_tx_t *tx);

				/* Remove by uint64_t[] key. */

				int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,

				    int key_numints, dmu_tx_t *tx);

				int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				@@ -313,9 +360,19 @@ int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				 * Returns (in *count) the number of attributes in the specified zap

				 * object.

				 */

				int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);

				int zap_count(objset_t *os, uint64_t zapobj, uint64_t *count);

				int zap_count_by_dnode(dnode_t *dn, uint64_t *count);

				/*

				 * Lookup an existing uint64 value, add the delta value to it, and store

				 * update it with the new value. If the new value is 0, removes the key

				 * entirely.

				 */

				int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,

				    dmu_tx_t *tx);

				int zap_increment_by_dnode(dnode_t *dn, const char *name, int64_t delta,

				    dmu_tx_t *tx);

				/*

				 * Returns (in name) the name of the entry whose (value & mask)

				 * (za_first_integer) is value, or ENOENT if not found.  The string

				@@ -324,21 +381,8 @@ int zap_count_by_dnode(dnode_t *dn, uint64_t *count);

				 */

				int zap_value_search(objset_t *os, uint64_t zapobj,

				    uint64_t value, uint64_t mask, char *name, uint64_t namelen);

				/*

				 * Transfer all the entries from fromobj into intoobj.  Only works on

				 * int_size=8 num_integers=1 values.  Fails if there are any duplicated

				 * entries.

				 */

				int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);

				/* Same as zap_join, but set the values to 'value'. */

				int zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,

				    uint64_t value, dmu_tx_t *tx);

				/* Same as zap_join, but add together any duplicated entries. */

				int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,

				    dmu_tx_t *tx);

				int zap_value_search_by_dnode(dnode_t *dn,

				    uint64_t value, uint64_t mask, char *name, uint64_t namelen);

				/*

				 * Manipulate entries where the name + value are the "same" (the name is

				@@ -347,8 +391,10 @@ int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,

				int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);

				int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);

				int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);

				int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,

				    dmu_tx_t *tx);

				int zap_add_int_by_dnode(dnode_t *dn, uint64_t value, dmu_tx_t *tx);

				int zap_remove_int_by_dnode(dnode_t *dn, uint64_t value, dmu_tx_t *tx);

				int zap_lookup_int_by_dnode(dnode_t *dn, uint64_t value);

				/* Here the key is an int and the value is a different int. */

				int zap_add_int_key(objset_t *os, uint64_t obj,

				@@ -358,22 +404,19 @@ int zap_update_int_key(objset_t *os, uint64_t obj,

				int zap_lookup_int_key(objset_t *os, uint64_t obj,

				    uint64_t key, uint64_t *valuep);

				int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,

				    dmu_tx_t *tx);

				int zap_add_int_key_by_dnode(dnode_t *dn,

				    uint64_t key, uint64_t value, dmu_tx_t *tx);

				int zap_update_int_key_by_dnode(dnode_t *dn,

				    uint64_t key, uint64_t value, dmu_tx_t *tx);

				int zap_lookup_int_key_by_dnode(dnode_t *dn,

				    uint64_t key, uint64_t *valuep);

				struct zap;

				struct zap_leaf;

				typedef struct zap_cursor {

					/* This structure is opaque! */

					objset_t *zc_objset;

					struct zap *zc_zap;

					struct zap_leaf *zc_leaf;

					uint64_t zc_zapobj;

					uint64_t zc_serialized;

					uint64_t zc_hash;

					uint32_t zc_cd;

					boolean_t zc_prefetch;

				} zap_cursor_t;

				/*

				 * The interface for listing all the attributes of a zapobj can be

				 * thought of as cursor moving down a list of the attributes one by

				 * one.  The cookie returned by the zap_cursor_serialize routine is

				 * persistent across system calls (and across reboot, even).

				 */

				typedef struct {

					int za_integer_length;

				@@ -389,9 +432,6 @@ typedef struct {

					char za_name[];

				} zap_attribute_t;

				void zap_init(void);

				void zap_fini(void);

				/*

				 * Alloc and free zap_attribute_t.

				 */

				@@ -399,22 +439,52 @@ zap_attribute_t *zap_attribute_alloc(void);

				zap_attribute_t *zap_attribute_long_alloc(void);

				void zap_attribute_free(zap_attribute_t *attrp);

				/*

				 * The interface for listing all the attributes of a zapobj can be

				 * thought of as cursor moving down a list of the attributes one by

				 * one.  The cookie returned by the zap_cursor_serialize routine is

				 * persistent across system calls (and across reboot, even).

				struct zap;

				struct zap_leaf;

				typedef struct zap_cursor {

					/* This structure is opaque! */

					struct zap *zc_zap;

					struct zap_leaf *zc_leaf;

					uint64_t zc_hash;

					uint32_t zc_cd;

					boolean_t zc_prefetch;

					/*

					 * Legacy fields to main source compat with Lustre, which accesses

					 * them directly. Not to be used in new code!

					 */

					objset_t *zc_objset;

					uint64_t zc_zapobj;

				} zap_cursor_t;

				/*

				 * Initialize a zap cursor, pointing to the "first" attribute of the

				 * zapobj.  You must _fini the cursor when you are done with it.

				 * Initialize a zap cursor, pointing to the "first" attribute of the zapobj.

				 * The entire zapobj will be prefetched. You must call zap_cursor_fini the

				 * cursor when you are done with it.

				 */

				void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj);

				void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os,

				    uint64_t zapobj);

				int zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj);

				int zap_cursor_init_by_dnode(zap_cursor_t *zc, dnode_t *dn);

				void zap_cursor_fini(zap_cursor_t *zc);

				/*

				 * Initialize a cursor at the beginning, but request that we not prefetch

				 * the entire ZAP object.

				 */

				int zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os,

				    uint64_t zapobj);

				/*

				 * Initialize a zap cursor pointing to the position recorded by

				 * zap_cursor_serialize (in the "serialized" argument).  You can also

				 * use a "serialized" argument of 0 to start at the beginning of the

				 * zapobj (ie.  zap_cursor_init_serialized(..., 0) is equivalent to

				 * zap_cursor_init(...).)

				 */

				int zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os,

				    uint64_t zapobj, uint64_t serialized);

				int zap_cursor_init_serialized_by_dnode(zap_cursor_t *zc, dnode_t *dn,

				    uint64_t serialized);

				/*

				 * Get the attribute currently pointed to by the cursor.  Returns

				 * ENOENT if at the end of the attributes.

				@@ -435,17 +505,6 @@ void zap_cursor_advance(zap_cursor_t *zc);

				 */

				uint64_t zap_cursor_serialize(zap_cursor_t *zc);

				/*

				 * Initialize a zap cursor pointing to the position recorded by

				 * zap_cursor_serialize (in the "serialized" argument).  You can also

				 * use a "serialized" argument of 0 to start at the beginning of the

				 * zapobj (ie.  zap_cursor_init_serialized(..., 0) is equivalent to

				 * zap_cursor_init(...).)

				 */

				void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds,

				    uint64_t zapobj, uint64_t serialized);

				#define	ZAP_HISTOGRAM_SIZE 10

				typedef struct zap_stats {

				@@ -535,7 +594,12 @@ typedef struct zap_stats {

				 * statistics.  This interface shouldn't be relied on unless you really

				 * know what you're doing.

				 */

				int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs);

				int zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs);

				int zap_get_stats_by_dnode(dnode_t *dn, zap_stats_t *zs);

				/* ZAP subsystem setup/teardown */

				void zap_init(void);

				void zap_fini(void);

				#ifdef	__cplusplus

				}

									
										sys/contrib/openzfs/include/sys/zap_impl.h
									
		+92
		-18
	
												View File
												
				@@ -26,6 +26,7 @@

				 * Copyright (c) 2013, 2016 by Delphix. All rights reserved.

				 * Copyright 2017 Nexenta Systems, Inc.

				 * Copyright (c) 2024, Klara, Inc.

				 * Copyright (c) 2026, TrueNAS.

				 */

				#ifndef	_SYS_ZAP_IMPL_H

				@@ -33,7 +34,6 @@

				#include <sys/zap.h>

				#include <sys/zfs_context.h>

				#include <sys/avl.h>

				#ifdef	__cplusplus

				extern "C" {

				@@ -62,8 +62,9 @@ typedef struct mzap_phys {

					uint64_t mz_salt;

					uint64_t mz_normflags;

					uint64_t mz_pad[5];

					mzap_ent_phys_t mz_chunk[1];

					/* actually variable size depending on block size */

					mzap_ent_phys_t mz_chunk[];

				} mzap_phys_t;

				typedef struct mzap_ent {

				@@ -170,6 +171,9 @@ typedef struct zap {

					} zap_u;

				} zap_t;

				#define	zap_f	zap_u.zap_fat

				#define	zap_m	zap_u.zap_micro

				static inline zap_phys_t *

				zap_f_phys(zap_t *zap)

				{

				@@ -182,6 +186,10 @@ zap_m_phys(zap_t *zap)

					return (zap->zap_dbuf->db_data);

				}

				/*

				 * zap_name_t carries the original key and whatever we've derived from it

				 * (normalised form, hash, etc) as we work through completing the operation.

				 */

				typedef struct zap_name {

					zap_t *zn_zap;

					int zn_key_intlen;

				@@ -196,25 +204,94 @@ typedef struct zap_name {

					char zn_normbuf[];

				} zap_name_t;

				#define	zap_f	zap_u.zap_fat

				#define	zap_m	zap_u.zap_micro

				/*

				 * Allocate a zap_name_t. The longname flag ensures there is enough room to

				 * hold a long filename when the 'longname' pool feature is active.

				 */

				zap_name_t *zap_name_alloc(zap_t *zap, boolean_t longname);

				/*

				 * Allocate a zap_name_t for the given key. zap_name_init_str() will be

				 * called to normalise the key and initialise the struct.

				 */

				zap_name_t *zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt);

				/*

				 * Allocate a zap_name_t for a uint64 array key.

				 */

				zap_name_t *zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints);

				/*

				 * Free a zap_name_t.

				 */

				void zap_name_free(zap_name_t *zn);

				/*

				 * Initialise an existing zap_name_t with the normalised form of the key,

				 * computed according to the given matchtype.

				 */

				int zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt);

				/*

				 * Compare 'matchname' with the name represented by the zap_name_t, applying

				 * the same normalisation method first. Returns true if the normalised forms

				 * match, false otherwise.

				 */

				boolean_t zap_match(zap_name_t *zn, const char *matchname);

				int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,

				/*

				 * Compute and return the 64-bit hash for the name, according to the name

				 * type and hash flags.

				 */

				uint64_t zap_hash(zap_name_t *zn);

				/*

				 * Return a zap_t for the given on-disk object, locked and ready for use.

				 * The zap_t will be allocated and loaded from disk if its not already loaded.

				 */

				int zap_lock(objset_t *os, uint64_t obj, dmu_tx_t *tx,

				    krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,

				    zap_t **zapp);

				void zap_unlockdir(zap_t *zap, const void *tag);

				int zap_lock_by_dnode(dnode_t *dn, dmu_tx_t *tx,

				    krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,

				    zap_t **zapp);

				/* Unlock and release a zap_t. */

				void zap_unlock(zap_t *zap, const void *tag);

				/*

				 * Try to upgrade a zap lock from READER to WRITER. If the upgrade is not

				 * possible without blocking, returns 0. If the upgrade happened, returns 1.

				 */

				int zap_lock_try_upgrade(zap_t *zap, dmu_tx_t *tx);

				/*

				 * Upgrade a zap lock from READER to WRITER. If it can't be upgraded

				 * immediately it will block.

				 */

				void zap_lock_upgrade(zap_t *zap, dmu_tx_t *tx);

				/* zap_t release function for when associated dbuf is evicted. */

				void zap_evict_sync(void *dbu);

				zap_name_t *zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt);

				void zap_name_free(zap_name_t *zn);

				/* Misc internal state & config. */

				int zap_hashbits(zap_t *zap);

				uint32_t zap_maxcd(zap_t *zap);

				uint64_t zap_getflags(zap_t *zap);

				/* Microzap implementation. */

				zap_t *mzap_open(dmu_buf_t *db);

				int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);

				mzap_ent_t *mze_find(zap_name_t *zn, zfs_btree_index_t *idx);

				boolean_t mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash);

				void mze_destroy(zap_t *zap);

				boolean_t mzap_normalization_conflict(zap_t *zap, zap_name_t *zn,

				    mzap_ent_t *mze, zfs_btree_index_t *idx);

				void mzap_addent(zap_name_t *zn, uint64_t value);

				void mzap_byteswap(mzap_phys_t *buf, size_t size);

				uint64_t zap_get_micro_max_size(spa_t *spa);

				#define	ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))

				/* Fatzap implementation. */

				void fzap_byteswap(void *buf, size_t size);

				int fzap_count(zap_t *zap, uint64_t *count);

				int fzap_lookup(zap_name_t *zn,

				@@ -223,20 +300,17 @@ int fzap_lookup(zap_name_t *zn,

				    uint64_t *actual_num_integers);

				void fzap_prefetch(zap_name_t *zn);

				int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,

				    const void *val, const void *tag, dmu_tx_t *tx);

				int fzap_update(zap_name_t *zn,

				    int integer_size, uint64_t num_integers, const void *val,

				    const void *tag, dmu_tx_t *tx);

				    const void *val, dmu_tx_t *tx);

				int fzap_update(zap_name_t *zn, int integer_size, uint64_t num_integers,

				    const void *val, dmu_tx_t *tx);

				int fzap_length(zap_name_t *zn,

				    uint64_t *integer_size, uint64_t *num_integers);

				int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);

				int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);

				void fzap_get_stats(zap_t *zap, zap_stats_t *zs);

				void zap_put_leaf(struct zap_leaf *l);

				int fzap_add_cd(zap_name_t *zn,

				    uint64_t integer_size, uint64_t num_integers,

				    const void *val, uint32_t cd, const void *tag, dmu_tx_t *tx);

				int fzap_add_cd(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,

				    const void *val, uint32_t cd, dmu_tx_t *tx);

				void fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags);

				#ifdef	__cplusplus

									
										sys/contrib/openzfs/include/sys/zio_impl.h
									
		+6
		-7
	
												View File
												
				@@ -139,12 +139,12 @@ enum zio_stage {

					ZIO_STAGE_NOP_WRITE		= 1 << 8,	/* -W---- */

					ZIO_STAGE_BRT_FREE		= 1 << 9,	/* --F--- */

					ZIO_STAGE_DDT_READ_START	= 1 << 9,	/* R----- */

					ZIO_STAGE_DDT_READ_DONE		= 1 << 10,	/* R----- */

					ZIO_STAGE_DDT_WRITE		= 1 << 11,	/* -W---- */

					ZIO_STAGE_DDT_FREE		= 1 << 12,	/* --F--- */

					ZIO_STAGE_DDT_READ_START	= 1 << 10,	/* R----- */

					ZIO_STAGE_DDT_READ_DONE		= 1 << 11,	/* R----- */

					ZIO_STAGE_DDT_WRITE		= 1 << 12,	/* -W---- */

					ZIO_STAGE_DDT_FREE		= 1 << 13,	/* --F--- */

					ZIO_STAGE_BRT_FREE		= 1 << 13,	/* --F--- */

					ZIO_STAGE_GANG_ASSEMBLE		= 1 << 14,	/* RWFC-- */

					ZIO_STAGE_GANG_ISSUE		= 1 << 15,	/* RWFC-- */

				@@ -259,8 +259,7 @@ enum zio_stage {

					ZIO_STAGE_DVA_FREE)

				#define	ZIO_DDT_FREE_PIPELINE			\

					(ZIO_INTERLOCK_STAGES |			\

					ZIO_STAGE_FREE_BP_INIT |		\

					(ZIO_FREE_PIPELINE |			\

					ZIO_STAGE_ISSUE_ASYNC |			\

					ZIO_STAGE_DDT_FREE)

									
										sys/contrib/openzfs/lib/libspl/Makefile.am
									
		-4
	
												View File
												
				@@ -63,7 +63,3 @@ libspl_la_LIBADD = \

				libspl_la_LIBADD += $(LIBATOMIC_LIBS) $(LIBCLOCK_GETTIME)

				libspl_assert_la_LIBADD = $(BACKTRACE_LIBS) $(LIBUNWIND_LIBS)

				if BUILD_FREEBSD

				libspl_assert_la_LIBADD += -lpthread

				endif

									
										sys/contrib/openzfs/lib/libzfs/Makefile.am
									
		+1
		-3
	
												View File
												
				@@ -76,7 +76,7 @@ libzfs_la_LIBADD = \

				libzfs_la_LIBADD += -lrt -lm $(LIBCRYPTO_LIBS) $(ZLIB_LIBS) $(LIBFETCH_LIBS) $(LTLIBINTL)

				libzfs_la_LDFLAGS = -pthread

				libzfs_la_LDFLAGS = -version-info 7:0:0

				if !ASAN_ENABLED

				libzfs_la_LDFLAGS += -Wl,-z,defs

				@@ -86,8 +86,6 @@ if BUILD_FREEBSD

				libzfs_la_LIBADD += -lutil -lgeom

				endif

				libzfs_la_LDFLAGS += -version-info 7:0:0

				pkgconfig_DATA += %D%/libzfs.pc

				dist_noinst_DATA += %D%/libzfs.abi %D%/libzfs.suppr

sys/contrib/openzfs/lib/libzfs/libzfs.abi

+7 -2

View File

@@ -2553,7 +2553,7 @@
     <typedef-decl name='__uint32_t' type-id='f0981eeb' id='62f1140c'/>
     <typedef-decl name='__uint64_t' type-id='7359adad' id='8910171f'/>
     <typedef-decl name='size_t' type-id='7359adad' id='b59d7dce'/>
     <class-decl name='libzfs_handle' size-in-bits='18432' is-struct='yes' visibility='default' id='c8a9d9d8'>
     <class-decl name='libzfs_handle' size-in-bits='18496' is-struct='yes' visibility='default' id='c8a9d9d8'>
       <data-member access='public' layout-offset-in-bits='0'>
         <var-decl name='libzfs_error' type-id='95e97e5e' visibility='default'/>
       </data-member>
@@ -2605,6 +2605,9 @@
       <data-member access='public' layout-offset-in-bits='18112'>
         <var-decl name='zh_mnttab' type-id='f20fbd51' visibility='default'/>
       </data-member>
       <data-member access='public' layout-offset-in-bits='18432'>
         <var-decl name='zh_mnttab_cache_enabled' type-id='c19b74c3' visibility='default'/>
       </data-member>
     </class-decl>
     <class-decl name='zfs_handle' size-in-bits='4928' is-struct='yes' visibility='default' id='f6ee4445'>
       <data-member access='public' layout-offset-in-bits='0'>
@@ -6412,7 +6415,9 @@
       <enumerator name='VDEV_PROP_SCHEDULER' value='55'/>
       <enumerator name='VDEV_PROP_FDOMAIN' value='56'/>
       <enumerator name='VDEV_PROP_FGROUP' value='57'/>
       <enumerator name='VDEV_NUM_PROPS' value='58'/>
       <enumerator name='VDEV_PROP_ALLOC_BIAS' value='58'/>
       <enumerator name='VDEV_PROP_ROTATIONAL' value='59'/>
       <enumerator name='VDEV_NUM_PROPS' value='60'/>
     </enum-decl>
     <typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
     <class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>

									
										sys/contrib/openzfs/lib/libzfs/libzfs_changelist.c
									
		+12
		-3
	
												View File
												
				@@ -177,6 +177,7 @@ changelist_postfix(prop_changelist_t *clp)

					char shareopts[ZFS_MAXPROPLEN];

					boolean_t commit_smb_shares = B_FALSE;

					boolean_t commit_nfs_shares = B_FALSE;

					int rc = 0;

					/*

					 * If CL_GATHER_DONT_UNMOUNT is set, it means we don't want to (un)mount

				@@ -266,7 +267,7 @@ changelist_postfix(prop_changelist_t *clp)

						const enum sa_protocol nfs[] =

						    {SA_PROTOCOL_NFS, SA_NO_PROTOCOL};

						if (sharenfs && mounted) {

							zfs_share(cn->cn_handle, nfs);

							rc = zfs_share(cn->cn_handle, nfs);

							commit_nfs_shares = B_TRUE;

						} else if (cn->cn_shared || clp->cl_waslegacy) {

							zfs_unshare(cn->cn_handle, NULL, nfs);

				@@ -275,7 +276,7 @@ changelist_postfix(prop_changelist_t *clp)

						const enum sa_protocol smb[] =

						    {SA_PROTOCOL_SMB, SA_NO_PROTOCOL};

						if (sharesmb && mounted) {

							zfs_share(cn->cn_handle, smb);

							rc = zfs_share(cn->cn_handle, smb);

							commit_smb_shares = B_TRUE;

						} else if (cn->cn_shared || clp->cl_waslegacy) {

							zfs_unshare(cn->cn_handle, NULL, smb);

				@@ -291,7 +292,15 @@ changelist_postfix(prop_changelist_t *clp)

					*p++ = SA_NO_PROTOCOL;

					zfs_commit_shares(proto);

					return (0);

					/*

					 * It's possible rc != 0 since we set a mountpoint or option while

					 * SMB/NFS was not running.  This is fine, and we should not return

					 * an error up the stack.

					 *

					 * At this point we only want to report mountpoint/shareops parsing

					 * errors.

					 */

					return (rc == SA_SYNTAX_ERR ? rc : 0);

				}

				/*

									
										sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
									
		+60
		-1
	
												View File
												
				@@ -2031,12 +2031,21 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,

					return (0);

				}

				/*

				 * Export the pool from the system.  Setting force overrides the

				 * active-shared-spare check.  The caller must unmount all datasets

				 * in the pool first.

				 */

				int

				zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)

				{

					return (zpool_export_common(zhp, force, B_FALSE, log_str));

				}

				/*

				 * Force-export the pool: bypasses the active-shared-spare check, and skips

				 * writing the exported-state labels and updating the cachefile.

				 */

				int

				zpool_export_force(zpool_handle_t *zhp, const char *log_str)

				{

				@@ -2574,6 +2583,10 @@ xlate_init_err(int err)

					return (err);

				}

				/*

				 * Start (or cancel/suspend/uninit) the initialize operation on every

				 * leaf vdev of the pool.

				 */

				int

				zpool_initialize_one(zpool_handle_t *zhp, void *data)

				{

				@@ -2685,6 +2698,10 @@ zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,

					return (err == 0 ? 0 : -1);

				}

				/*

				 * Start (or cancel/suspend/uninit) the initialize operation on the listed

				 * vdevs.  Returns once the new state is committed.

				 */

				int

				zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,

				    nvlist_t *vds)

				@@ -2692,6 +2709,9 @@ zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,

					return (zpool_initialize_impl(zhp, cmd_type, vds, B_FALSE));

				}

				/*

				 * Like zpool_initialize(), but waits for each listed vdev to finish.

				 */

				int

				zpool_initialize_wait(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,

				    nvlist_t *vds)

				@@ -2746,6 +2766,10 @@ zpool_collect_leaves(zpool_handle_t *zhp, nvlist_t *nvroot, nvlist_t *res)

					}

				}

				/*

				 * Start (or cancel/suspend) the trim operation on every leaf vdev of

				 * the pool.

				 */

				int

				zpool_trim_one(zpool_handle_t *zhp, void *data)

				{

				@@ -3393,6 +3417,11 @@ __zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,

					return (ret);

				}

				/*

				 * Look up a vdev in the pool by path, name, or guid.  Returns the

				 * vdev's configuration nvlist, or NULL on no match.  Also, fills

				 * in avail_spare, l2cache, and log if they are non-NULL.

				 */

				nvlist_t *

				zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,

				    boolean_t *l2cache, boolean_t *log)

				@@ -4637,7 +4666,10 @@ zpool_reopen_one(zpool_handle_t *zhp, void *data)

					return (0);

				}

				/* call into libzfs_core to execute the sync IOCTL per pool */

				/*

				 * Block until every buffered write for the pool has reached the

				 * underlying disks.

				 */

				int

				zpool_sync_one(zpool_handle_t *zhp, void *data)

				{

				@@ -4913,6 +4945,10 @@ zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)

					return (0);

				}

				/*

				 * Format the program name and its command-line arguments into a single

				 * space-separated string.

				 */

				void

				zfs_save_arguments(int argc, char **argv, char *string, int len)

				{

				@@ -4925,6 +4961,10 @@ zfs_save_arguments(int argc, char **argv, char *string, int len)

					}

				}

				/*

				 * Append a message to the pool's command-history log, retrievable via

				 * "zpool history".

				 */

				int

				zpool_log_history(libzfs_handle_t *hdl, const char *message)

				{

				@@ -5220,6 +5260,11 @@ zpool_obj_to_path_impl(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,

					free(mntpnt);

				}

				/*

				 * Translate a (dataset object id, file object id) pair into a readable

				 * path.  If the dataset is mounted the result is an absolute filesystem

				 * path; otherwise it is `dataset:path`.

				 */

				void

				zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,

				    char *pathname, size_t len)

				@@ -5227,6 +5272,10 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,

					zpool_obj_to_path_impl(zhp, dsobj, obj, pathname, len, B_FALSE);

				}

				/*

				 * Translate a (dataset object id, file object id) pair into a

				 * `dataset:path` string.

				 */

				void

				zpool_obj_to_path_ds(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,

				    char *pathname, size_t len)

				@@ -5281,6 +5330,10 @@ zpool_wait_status(zpool_handle_t *zhp, zpool_wait_activity_t activity,

					return (error);

				}

				/*

				 * Store a boot configuration map in the bootenv area of each leaf

				 * vdev's labels.

				 */

				int

				zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap)

				{

				@@ -5294,6 +5347,9 @@ zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap)

					return (error);

				}

				/*

				 * Read the boot configuration map from each leaf vdev's bootenv area.

				 */

				int

				zpool_get_bootenv(zpool_handle_t *zhp, nvlist_t **nvlp)

				{

				@@ -5741,6 +5797,9 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,

								return (ENOENT);

							if (prop == VDEV_PROP_SIT_OUT)

								return (ENOENT);

							/* Only valid for top-level vdevs */

							if (prop == VDEV_PROP_ALLOC_BIAS)

								return (ENOENT);

						}

						if (vdev_prop_index_to_string(prop, intval,

						    (const char **)&strval) != 0)

									
										sys/contrib/openzfs/lib/libzfs/libzfs_share.c
									
		+8
		
												View File
												
				@@ -64,6 +64,10 @@ sa_enable_share(const char *zfsname, const char *mountpoint,

				{

					VALIDATE_PROTOCOL(protocol, SA_INVALID_PROTOCOL);

					int error = sa_validate_shareopts(shareopts, protocol);

					if (error != SA_OK)

						return (error);

					const struct sa_share_impl args =

					    init_share(zfsname, mountpoint, shareopts);

					return (fstypes[protocol]->enable_share(&args));

				@@ -111,6 +115,10 @@ sa_validate_shareopts(const char *options, enum sa_protocol protocol)

				{

					VALIDATE_PROTOCOL(protocol, SA_INVALID_PROTOCOL);

					/* error out on invalid characters */

					if (strpbrk(options, "\a\b\f\n\r") != NULL)

						return (SA_SYNTAX_ERR);

					return (fstypes[protocol]->validate_shareopts(options));

				}

									
										sys/contrib/openzfs/lib/libzfs_core/Makefile.am
									
		+1
		-3
	
												View File
												
				@@ -33,7 +33,7 @@ libzfs_core_la_LIBADD = \

				libzfs_core_la_LIBADD += $(LTLIBINTL)

				libzfs_core_la_LDFLAGS = -pthread

				libzfs_core_la_LDFLAGS = -version-info 3:0:0

				if !ASAN_ENABLED

				libzfs_core_la_LDFLAGS += -Wl,-z,defs

				@@ -43,8 +43,6 @@ if BUILD_FREEBSD

				libzfs_core_la_LIBADD += -lutil -lgeom

				endif

				libzfs_core_la_LDFLAGS += -version-info 3:0:0

				pkgconfig_DATA += %D%/libzfs_core.pc

				dist_noinst_DATA += %D%/libzfs_core.abi %D%/libzfs_core.suppr

									
										sys/contrib/openzfs/lib/libzpool/Makefile.am
									
		+3
		-3
	
												View File
												
				@@ -166,6 +166,8 @@ nodist_libzpool_la_SOURCES = \

					module/zfs/vdev_root.c \

					module/zfs/vdev_trim.c \

					module/zfs/zap.c \

					module/zfs/zap_fat.c \

					module/zfs/zap_impl.c \

					module/zfs/zap_leaf.c \

					module/zfs/zap_micro.c \

					module/zfs/zcp.c \

				@@ -212,7 +214,7 @@ libzpool_la_LIBADD = \

				libzpool_la_LIBADD += $(LIBCLOCK_GETTIME) $(ZLIB_LIBS) -lm

				libzpool_la_LDFLAGS = -pthread

				libzpool_la_LDFLAGS = -version-info 7:0:0

				if !ASAN_ENABLED

				libzpool_la_LDFLAGS += -Wl,-z,defs

				@@ -222,8 +224,6 @@ if BUILD_FREEBSD

				libzpool_la_LIBADD += -lgeom

				endif

				libzpool_la_LDFLAGS += -version-info 7:0:0

				if TARGET_CPU_POWERPC

				module/zfs/libzpool_la-vdev_raidz_math_powerpc_altivec.$(OBJEXT) : CFLAGS += -maltivec

				module/zfs/libzpool_la-vdev_raidz_math_powerpc_altivec.l$(OBJEXT): CFLAGS += -maltivec

sys/contrib/openzfs/man/man4/zfs.4

+62 -6

View File

@@ -4,6 +4,7 @@
 .\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
 .\" Copyright (c) 2019 Datto Inc.
 .\" Copyright (c) 2023, 2024, 2025, Klara, Inc.
 .\" Copyright (c) 2026, Mateusz Piotrowski <0mp@FreeBSD.org>
 .\"
 .\" The contents of this file are subject to the terms of the Common Development
 .\" and Distribution License (the "License").  You may not use this file except
@@ -18,7 +19,7 @@
 .\" own identifying information:
 .\" Portions Copyright [yyyy] [name of copyright owner]
 .\"
 .Dd September 15, 2025
 .Dd May 8, 2026
 .Dt ZFS 4
 .Os
 .
@@ -389,6 +390,18 @@ this is
 or
 .Em 2*1024 Pq with Sy ashift Ns = Ns Sy 12 .
 .
 .It Sy metaslab_df_alloc_threshold Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq u64
 Minimum size which forces the dynamic allocator to change its allocation
 strategy.
 Once the space map cannot satisfy an allocation of this size, it switches to a
 more aggressive strategy (searching by size rather than offset).
 .
 .It Sy metaslab_df_free_pct Ns = Ns Sy 4 Ns % Pq uint
 The minimum free space, in percent, which must be available in a space map to
 continue allocations in a first-fit fashion.
 Once free space drops below this level, allocations switch to a best-fit
 strategy.
 .
 .It Sy metaslab_df_use_largest_segment Ns = Ns Sy 0 Ns | Ns 1 Pq int
 If not searching forward (due to
 .Sy metaslab_df_max_search , metaslab_df_free_pct ,
@@ -445,6 +458,32 @@ This improves performance, especially when there are many metaslabs per vdev
 and the allocation can't actually be satisfied
 (so we would otherwise iterate all metaslabs).
 .
 .It Sy zfs_metaslab_sm_blksz_no_log Ns = Ns Sy 16384 Ns B Po 16 KiB Pc Pq int
 Block size for the metaslab space maps in pools where the
 .Sy log_spacemap
 feature is disabled.
 Multiple metaslabs are modified per transaction group, so a smaller block size
 lets more, scattered I/O operations be issued.
 Must be a power of 2 greater than
 .Sy 4096 .
 This parameter can only be set at module load time.
 .
 .It Sy zfs_metaslab_sm_blksz_with_log Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq int
 Block size for the metaslab space maps in pools where the
 .Sy log_spacemap
 feature is enabled.
 Changes are batched in the per-pool log spacemap and flushed to each metaslab's
 space map only occasionally, so a larger block size is more efficient.
 Must be a power of 2 greater than
 .Sy 4096 .
 This parameter can only be set at module load time.
 .
 .It Sy zfs_metaslab_condense_pct Ns = Ns Sy 200 Ns % Pq uint
 Condense an on-disk space map when its size exceeds this percentage of
 the in-memory representation.
 The minimum is
 .Sy 100 .
 .
 .It Sy zfs_vdev_default_ms_count Ns = Ns Sy 200 Pq uint
 When a vdev is added, target this number of metaslabs per top-level vdev.
 .
@@ -768,9 +807,15 @@ See also
 which serves a similar purpose but has a higher priority if nonzero.
 .
 .It Sy zfs_arc_dnode_reduce_percent Ns = Ns Sy 10 Ns % Pq u64
 Percentage of ARC dnodes to try to scan in response to demand for non-metadata
 when the number of bytes consumed by dnodes exceeds
 .Sy zfs_arc_dnode_limit .
 Percentage used to size dnode prune requests.
 The request size is the larger of two values:
 .Sy zfs_arc_dnode_reduce_percent
 applied to the dnode count above
 .Sy zfs_arc_dnode_limit ,
 or
 .Sy zfs_arc_dnode_reduce_percent
 applied to the total dnode count
 when non-evictable metadata exceeds 3/4 of the metadata target.
 .
 .It Sy zfs_arc_average_blocksize Ns = Ns Sy 8192 Ns B Po 8 KiB Pc Pq uint
 The ARC's buffer hash table is sized based on the assumption of an average
@@ -911,6 +956,19 @@ but that was not proven to be useful.
 Number of missing top-level vdevs which will be allowed during
 pool import (only in read-only mode).
 .
 .It Sy zfs_max_missing_tvds_cachefile Ns = Ns Sy 2 Pq u64
 Number of missing top-level vdevs tolerated when importing a pool
 from a cachefile, before the trusted config is read from the MOS.
 A cachefile can fall out of sync with the on-disk config after a
 device removal that did not rewrite the cachefile, so the default
 of 2 still lets the import reach a copy of the MOS.
 .
 .It Sy zfs_max_missing_tvds_scan Ns = Ns Sy 0 Pq u64
 Number of missing top-level vdevs tolerated when importing a pool
 by scanning device paths, before the trusted config is read from
 the MOS.
 Defaults to 0 because a scan should detect every present device.
 .
 .It Sy zfs_max_nvlist_src_size Ns = Sy 0 Pq u64
 Maximum size in bytes allowed to be passed as
 .Sy zc_nvlist_src_size
@@ -948,8 +1006,6 @@ equivalent to the greater of the number of online CPUs and
 If less than
 .Sy arc_c No >> Sy zfs_arc_no_grow_shift
 free memory is available, the ARC is not allowed to grow.
 This parameter is
 .Fx Ns -specific .
 .
 .It Sy zfs_arc_overflow_shift Ns = Ns Sy 8 Pq int
 The ARC size is considered to be overflowing if it exceeds the current

sys/contrib/openzfs/man/man7/vdevprops.7

+21 -1

View File

@@ -142,6 +142,8 @@ See
 .Xr zpool-attach 8 .
 .It Sy trim_support
 Indicates if a leaf device supports trim operations.
 .It Sy rotational
 Indicates whether the device backing this vdev uses rotating media.
 .El
 .Pp
 The following native properties can be used to change the behavior of a vdev.
@@ -183,9 +185,12 @@ output.
 A text comment up to 8192 characters long
 .It Sy bootsize
 The amount of space to reserve for the EFI system partition
 .It Sy failfast
 .It Sy failfast Ns = Ns Sy inherit Ns | Ns Sy on Ns | Ns Sy off
 If this device should propagate BIO errors back to ZFS, used to disable
 failfast.
 .Sy inherit
 causes the vdev to adopt the behavior of its parent vdev,
 recursively up the tree.
 .It Sy sit_out
 Only valid for
 .Sy RAIDZ
@@ -218,6 +223,21 @@ If this device should perform new allocations, used to disable a device
 when it is scheduled for later removal.
 See
 .Xr zpool-remove 8 .
 .It Sy alloc_bias Ns = Ns Sy none Ns | Ns Sy log Ns | Ns Sy special Ns | Ns Sy dedup
 Controls the allocation class for a top-level vdev.
 Changes take effect after an export and import of the pool.
 Changing to/from log is not implemented, since it may lead to data loss in
 case of the log device failure.
 Setting to
 .Sy special
 and
 .Sy dedup
 requires
 .Sy feature@allocation_classes
 to be enabled.
 At least one top-level vdev must remain in the normal
 .Pq Sy none
 class.
 .It Sy scheduler Ns = Ns Sy auto Ns | Ns Sy on Ns | Ns Sy off
 Controls how I/O requests are added to the vdev queue when reading or
 writing to this vdev.

sys/contrib/openzfs/man/man8/zdb.8

+6 -1

View File

@@ -284,10 +284,15 @@ Decode and display block from an embedded block pointer specified by the
 arguments.
 .It Fl f , -file-layout
 Display the file layout of an object for the disks of a raidz vdev.
 Numeric values in the disply are hexadecimal.
 With
 .Fl H ,
 the output is in scripted mode for easy parsing, with all values
 being presented as 512 byte blocks.
 being presented as 512 byte blocks in decimal; with
 .Fl v ,
 the block type (parity or data) is displayed; with
 .Fl vv ,
 the offset into the file for each block is also printed.
 Only a single top-level raidz vdev is supported.
 .It Fl h , -history
 Display pool history similar to

sys/contrib/openzfs/man/man8/zpool-attach.8

+30 -1

View File

@@ -27,7 +27,7 @@
 .\" Copyright 2017 Nexenta Systems, Inc.
 .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
 .\"
 .Dd November 8, 2023
 .Dd May 9, 2026
 .Dt ZPOOL-ATTACH 8
 .Os
 .
@@ -132,6 +132,35 @@ Waits until
 has finished resilvering or expanding before returning.
 .El
 .
 .Sh EXAMPLES
 .\" Example 1 is example 5 from zpool.8.
 .\" Make sure to update them bidirectionally
 .Ss Example 1 : No Making a non-mirrored ZFS Storage Pool mirrored
 The following command converts an existing single device
 .Ar sda
 into a mirror by attaching a second device to it,
 .Ar sdb .
 .Dl # Nm zpool Cm attach Ar tank Pa sda sdb
 .
 .Ss Example 2 : No Expanding a RAID-Z vdev with an additional disk
 The following command adds
 .Ar sdg
 to the existing
 .Ar raidz2-0
 vdev in
 .Ar tank ,
 turning a 6-wide RAID-Z2 into a 7-wide RAID-Z2:
 .Dl # Nm zpool Cm attach Ar tank raidz2-0 Pa sdg
 Progress is reported by
 .Nm zpool Cm status .
 The operation requires the
 .Sy raidz_expansion
 pool feature, and
 .Ar sdg
 must be at least as large as the smallest existing disk in the vdev.
 Old blocks keep their original data-to-parity ratio; only blocks written
 after the expansion use the new ratio.
 .
 .Sh SEE ALSO
 .Xr zpool-add 8 ,
 .Xr zpool-detach 8 ,

sys/contrib/openzfs/man/man8/zpool-events.8

+5 -5

View File

@@ -458,12 +458,12 @@ ZIO_STAGE_CHECKSUM_GENERATE:0x00000080:-W----
 ZIO_STAGE_NOP_WRITE:0x00000100:-W----
 ZIO_STAGE_BRT_FREE:0x00000200:--F---
 ZIO_STAGE_DDT_READ_START:0x00000200:R-----
 ZIO_STAGE_DDT_READ_DONE:0x00000400:R-----
 ZIO_STAGE_DDT_WRITE:0x00000800:-W----
 ZIO_STAGE_DDT_FREE:0x00001000:--F---
 ZIO_STAGE_DDT_READ_START:0x00000400:R-----
 ZIO_STAGE_DDT_READ_DONE:0x00000800:R-----
 ZIO_STAGE_DDT_WRITE:0x00001000:-W----
 ZIO_STAGE_DDT_FREE:0x00002000:--F---
 ZIO_STAGE_BRT_FREE:0x00002000:--F---
 ZIO_STAGE_GANG_ASSEMBLE:0x00004000:RWFC--
 ZIO_STAGE_GANG_ISSUE:0x00008000:RWFC--

sys/contrib/openzfs/man/man8/zpool-iostat.8

+1 -4

View File

@@ -109,10 +109,7 @@ environment variable set.
 If a script requires the use of a privileged command, like
 .Xr smartctl 8 ,
 then it's recommended you allow the user access to it in
 .Pa /etc/sudoers
 or add the user to the
 .Pa /etc/sudoers.d/zfs
 file.
 .Pa /etc/sudoers .
 .Pp
 If
 .Fl c

sys/contrib/openzfs/man/man8/zpool.8

View File

@@ -245,6 +245,7 @@ Invalid command line options were specified.
 .
 .Sh EXAMPLES
 .\" Examples 1, 2, 3, 4, 12, 13 are shared with zpool-create.8.
 .\" Example 5 is shared with zpool-attach.8.
 .\" Examples 6, 14 are shared with zpool-add.8.
 .\" Examples 7, 16 are shared with zpool-list.8.
 .\" Examples 8 are shared with zpool-destroy.8.

sys/contrib/openzfs/module/Kbuild.in

+11 -2

View File

@@ -4,9 +4,11 @@
 ZFS_MODULE_CFLAGS += -std=gnu11 -Wno-declaration-after-statement
 ZFS_MODULE_CFLAGS += -Wmissing-prototypes
 ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @KERNEL_NO_FORMAT_ZERO_LENGTH@
 ZFS_MODULE_CFLAGS += @KERNEL_NO_FORMAT_ZERO_LENGTH@
 ifneq ($(KBUILD_EXTMOD),)
 ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@
 ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
 zfs_include = @abs_top_srcdir@/include
 icp_include = @abs_srcdir@/icp/include
 zstd_include = @abs_srcdir@/zstd/include
@@ -16,6 +18,12 @@ ZFS_MODULE_CFLAGS += -I@abs_top_builddir@/include
 src = @abs_srcdir@
 obj = @abs_builddir@
 else
 ifeq ($(CONFIG_ZFS_DEBUG),y)
 ZFS_MODULE_CFLAGS += -Werror
 ZFS_MODULE_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
 else
 ZFS_MODULE_CPPFLAGS += -UDEBUG -DNDEBUG
 endif
 zfs_include = $(srctree)/include/zfs
 icp_include = $(src)/icp/include
 zstd_include = $(src)/zstd/include
@@ -28,7 +36,6 @@ ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/spl
 ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/zfs
 ZFS_MODULE_CFLAGS += -I$(zfs_include)
 ZFS_MODULE_CPPFLAGS += -D_KERNEL
 ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
 # KASAN enables -Werror=frame-larger-than=1024, which
 # breaks oh so many parts of our build.
@@ -408,6 +415,8 @@ ZFS_OBJS := \
 	vdev_root.o \
 	vdev_trim.o \
 	zap.o \
 	zap_fat.o \
 	zap_impl.o \
 	zap_leaf.o \
 	zap_micro.o \
 	zcp.o \

									
										sys/contrib/openzfs/module/Makefile.bsd
									
		+9
		-1
	
												View File
												
				@@ -65,6 +65,12 @@ CFLAGS+= -DZFS_DEBUG -g

				CFLAGS += -DNDEBUG

				.endif

				.for _SAN in KASAN KMSAN KUBSAN

				.if defined(WITH_${_SAN}) && ${WITH_${_SAN}} == "true"

				KERN_OPTS_EXTRA+= ${_SAN}

				.endif

				.endfor

				.if defined(WITH_GCOV) && ${WITH_GCOV} == "true"

				CFLAGS+=	 -fprofile-arcs -ftest-coverage

				.endif

				@@ -345,6 +351,8 @@ SRCS+=	abd.c \

					vdev_root.c \

					vdev_trim.c \

					zap.c \

					zap_fat.c \

					zap_impl.c \

					zap_leaf.c \

					zap_micro.c \

					zcp.c \

				@@ -475,8 +483,8 @@ CFLAGS.vdev_raidz_math_avx2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier

				CFLAGS.vdev_raidz_math_avx512f.c= -Wno-cast-qual -Wno-duplicate-decl-specifier

				CFLAGS.vdev_raidz_math_scalar.c= -Wno-cast-qual

				CFLAGS.vdev_raidz_math_sse2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier

				CFLAGS.zap_impl.c= -Wno-cast-qual

				CFLAGS.zap_leaf.c= -Wno-cast-qual

				CFLAGS.zap_micro.c= -Wno-cast-qual

				CFLAGS.zcp.c= -Wno-cast-qual

				CFLAGS.zfs_fletcher.c= -Wno-cast-qual -Wno-pointer-arith

				CFLAGS.zfs_fletcher_avx512.c= -Wno-cast-qual -Wno-pointer-arith

									
										sys/contrib/openzfs/module/nvpair/nvpair.c
									
		+37
		-12
	
												View File
												
				@@ -135,7 +135,8 @@

				#define	NVP_SIZE_CALC(name_len, data_len) \

					(NV_ALIGN((sizeof (nvpair_t)) + name_len) + NV_ALIGN(data_len))

				static int i_get_value_size(data_type_t type, const void *data, uint_t nelem);

				static int i_get_value_size(data_type_t type, const void *data, uint_t nelem,

				    size_t max_size);

				static int nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type,

				    uint_t nelem, const void *data);

				@@ -810,8 +811,10 @@ i_validate_nvpair(nvpair_t *nvp)

					 * verify nvp_type, nvp_value_elem, and also possibly

					 * verify string values and get the value size.

					 */

					size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp));

					size1 = nvp->nvp_size - NVP_VALOFF(nvp);

					size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp),

					    size1);

					if (size2 < 0 || size1 != NV_ALIGN(size2))

						return (EFAULT);

				@@ -1002,12 +1005,21 @@ nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)

				 * 	DATA_TYPE_STRING    	and

				 *	DATA_TYPE_STRING_ARRAY

				 * Is data == NULL then the size of the string(s) is excluded.

				 *

				 * If 'max_size' is non-zero, then don't look beyond 'max_size' number of

				 * bytes when calculating a value size. Note that 'max_size' should include

				 * the NULL terminator byte when calculating string size.  If 'max_size' is 0,

				 * it is ignored.

				 */

				static int

				i_get_value_size(data_type_t type, const void *data, uint_t nelem)

				i_get_value_size(data_type_t type, const void *data, uint_t nelem,

				    size_t max_size)

				{

					uint64_t value_sz;

					if (max_size == 0)

						max_size = INT32_MAX;

					if (i_validate_type_nelem(type, nelem) != 0)

						return (-1);

				@@ -1052,10 +1064,15 @@ i_get_value_size(data_type_t type, const void *data, uint_t nelem)

						break;

				#endif

					case DATA_TYPE_STRING:

						if (data == NULL)

						if (data == NULL) {

							value_sz = 0;

						else

							value_sz = strlen(data) + 1;

						} else {

							value_sz = strnlen(data, max_size);

							if (value_sz >= max_size) {

								return (-1);	/* string not terminated */

							}

							value_sz += 1;

						}

						break;

					case DATA_TYPE_BOOLEAN_ARRAY:

						value_sz = (uint64_t)nelem * sizeof (boolean_t);

				@@ -1089,16 +1106,23 @@ i_get_value_size(data_type_t type, const void *data, uint_t nelem)

						break;

					case DATA_TYPE_STRING_ARRAY:

						value_sz = (uint64_t)nelem * sizeof (uint64_t);

						if (data != NULL) {

							char *const *strs = data;

							uint_t i;

							size_t newsize;

							/* no alignment requirement for strings */

							for (i = 0; i < nelem; i++) {

								if (strs[i] == NULL)

									return (-1);

								value_sz += strlen(strs[i]) + 1;

								newsize = strnlen(strs[i], max_size);

								if (newsize == max_size)

									return (-1);	/* not terminated */

								value_sz += newsize + 1; /* +1 for NULL */

								max_size -= newsize + 1;

							}

						}

						break;

				@@ -1163,7 +1187,7 @@ nvlist_add_common(nvlist_t *nvl, const char *name,

					 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY

					 * is the size of the string(s) included.

					 */

					if ((value_sz = i_get_value_size(type, data, nelem)) < 0)

					if ((value_sz = i_get_value_size(type, data, nelem, 0)) < 0)

						return (EINVAL);

					if (i_validate_nvpair_value(type, nelem, data) != 0)

				@@ -1588,7 +1612,7 @@ nvpair_value_common(const nvpair_t *nvp, data_type_t type, uint_t *nelem,

				#endif

						if (data == NULL)

							return (EINVAL);

						if ((value_sz = i_get_value_size(type, NULL, 1)) < 0)

						if ((value_sz = i_get_value_size(type, NULL, 1, 0)) < 0)

							return (EINVAL);

						memcpy(data, NVP_VALUE(nvp), (size_t)value_sz);

						if (nelem != NULL)

				@@ -3019,7 +3043,8 @@ nvs_native_nvp_op(nvstream_t *nvs, nvpair_t *nvp)

					 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY

					 * is the size of the string(s) excluded.

					 */

					if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp))) < 0)

					if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp),

					    NVP_SIZE(nvp))) < 0)

						return (EFAULT);

					if (NVP_SIZE_CALC(nvp->nvp_name_sz, value_sz) > nvp->nvp_size)

				@@ -3333,7 +3358,7 @@ nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp)

					 * In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY

					 * is the size of the string(s) excluded.

					 */

					if ((value_sz = i_get_value_size(type, NULL, nelem)) < 0)

					if ((value_sz = i_get_value_size(type, NULL, nelem, NVP_SIZE(nvp)) < 0))

						return (EFAULT);

					/* if there is no data to extract then return */

									
										sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
									
		-3
	
												View File
												
				@@ -72,9 +72,6 @@ SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,

				ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, free_target,

				    param_set_arc_free_target, 0, CTLFLAG_RW,

					"Desired number of free pages below which ARC triggers reclaim");

				ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, no_grow_shift,

				    param_set_arc_no_grow_shift, 0, ZMOD_RW,

					"log2(fraction of ARC which must be free to allow growing)");

				int64_t

				arc_available_memory(void)

									
										sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
									
		+6
		-18
	
												View File
												
				@@ -292,7 +292,7 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)

				{

					int err, val;

					val = arc_no_grow_shift;

					val = zfs_arc_no_grow_shift;

					err = sysctl_handle_int(oidp, &val, 0, req);

					if (err != 0 || req->newptr == NULL)

						return (err);

				@@ -300,7 +300,7 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)

					if (val < 0 || val >= arc_shrink_shift)

						return (EINVAL);

					arc_no_grow_shift = val;

					zfs_arc_no_grow_shift = val;

					if (arg2 != 0)

						warn_deprecated_sysctl("arc_no_grow_shift", "arc.no_grow_shift");

				@@ -541,14 +541,14 @@ SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log,

				/*

				 * The in-core space map representation is more compact than its on-disk form.

				 * The zfs_condense_pct determines how much more compact the in-core

				 * The zfs_metaslab_condense_pct determines how much more compact the in-core

				 * space map representation must be before we compact it on-disk.

				 * Values should be greater than or equal to 100.

				 */

				extern uint_t zfs_condense_pct;

				extern uint_t zfs_metaslab_condense_pct;

				SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct,

					CTLFLAG_RWTUN, &zfs_condense_pct, 0,

				SYSCTL_UINT(_vfs_zfs, OID_AUTO, metaslab_condense_pct,

					CTLFLAG_RWTUN, &zfs_metaslab_condense_pct, 0,

					"Condense on-disk spacemap when it is more than this many percents"

					" of in-memory counterpart");

				@@ -617,18 +617,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval,

					"Configuration cache file write, retry after failure, interval"

					" (seconds)");

				extern uint64_t zfs_max_missing_tvds_cachefile;

				SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile,

					CTLFLAG_RWTUN, &zfs_max_missing_tvds_cachefile, 0,

					"Allow importing pools with missing top-level vdevs in cache file");

				extern uint64_t zfs_max_missing_tvds_scan;

				SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan,

					CTLFLAG_RWTUN, &zfs_max_missing_tvds_scan, 0,

					"Allow importing pools with missing top-level vdevs during scan");

				/* spa_misc.c */

				extern int zfs_flags;

									
										sys/contrib/openzfs/module/os/linux/spl/spl-kmem.c
									
		+6
		
												View File
												
				@@ -188,6 +188,12 @@ spl_kvmalloc(size_t size, gfp_t lflags)

						return (ptr);

					}

					/*

					 * vmalloc fallback. KM_VMEM may not have been requested originally if

					 * we've come through spl_kmem_alloc_impl(), so we need to remove

					 * __GFP_COMP, which is not a valid flag for vmalloc.

					 */

					lflags &= ~__GFP_COMP;

					return (spl_vmalloc(size, lflags));

				}

									
										sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
									
		+18
		
												View File
												
				@@ -410,6 +410,24 @@ param_set_arc_int(const char *buf, zfs_kernel_param_t *kp)

					return (0);

				}

				int

				param_set_arc_no_grow_shift(const char *buf, zfs_kernel_param_t *kp)

				{

					unsigned long val;

					int error;

					error = kstrtoul(buf, 0, &val);

					if (error)

						return (SET_ERROR(error));

					if (val >= arc_shrink_shift)

						return (-SET_ERROR(EINVAL));

					zfs_arc_no_grow_shift = val;

					return (0);

				}

				int

				param_set_l2arc_dwpd_limit(const char *buf, zfs_kernel_param_t *kp)

				{

									
										sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
									
		+7
		-1
	
												View File
												
				@@ -931,8 +931,14 @@ vdev_disk_io_rw(zio_t *zio)

						return (SET_ERROR(EIO));

					}

					vdev_t *iter = v;

					while (iter != NULL && iter->vdev_failfast == ZPROP_BOOLEAN_INHERIT)

						iter = iter->vdev_parent;

					boolean_t failfast = iter ? iter->vdev_failfast == 1 :

					    vdev_prop_default_numeric(VDEV_PROP_FAILFAST);

					if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&

					    v->vdev_failfast == B_TRUE) {

					    failfast) {

						bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,

						    zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);

					}

									
										sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
									
		+39
		-6
	
												View File
												
				@@ -1689,6 +1689,24 @@ zfs_suspend_fs(zfsvfs_t *zfsvfs)

					return (0);

				}

				/*

				 * Return a referenced znode at or after zp.  The z_znodes_lock protects the

				 * list walk; the returned inode reference keeps the znode alive after the

				 * lock is dropped for zfs_rezget().

				 */

				static znode_t *

				zfs_resume_hold_next_znode(zfsvfs_t *zfsvfs, znode_t *zp)

				{

					ASSERT(MUTEX_HELD(&zfsvfs->z_znodes_lock));

					for (; zp != NULL; zp = list_next(&zfsvfs->z_all_znodes, zp)) {

						if (igrab(ZTOI(zp)) != NULL)

							return (zp);

					}

					return (NULL);

				}

				/*

				 * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset

				 * is an invariant across any of the operations that can be performed while the

				@@ -1732,13 +1750,23 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)

					 * dbufs.  If a zfs_rezget() fails, then we unhash the inode

					 * and mark it stale.  This prevents a collision if a new

					 * inode/object is created which must use the same inode

					 * number.  The stale inode will be be released when the

					 * VFS prunes the dentry holding the remaining references

					 * on the stale inode.

					 * number.  The stale inode will be released when the VFS

					 * prunes the dentry holding the remaining references on

					 * the stale inode.

					 *

					 * zfs_rezget() takes the per-object znode hold lock.  Pin each znode

					 * while holding z_znodes_lock, then drop the list lock before calling

					 * zfs_rezget() to preserve the normal zh_lock -> z_znodes_lock order.

					 */

					mutex_enter(&zfsvfs->z_znodes_lock);

					for (zp = list_head(&zfsvfs->z_all_znodes); zp;

					    zp = list_next(&zfsvfs->z_all_znodes, zp)) {

					zp = zfs_resume_hold_next_znode(zfsvfs,

					    list_head(&zfsvfs->z_all_znodes));

					while (zp != NULL) {

						znode_t *next = zfs_resume_hold_next_znode(zfsvfs,

						    list_next(&zfsvfs->z_all_znodes, zp));

						mutex_exit(&zfsvfs->z_znodes_lock);

						err2 = zfs_rezget(zp);

						if (err2) {

							zpl_d_drop_aliases(ZTOI(zp));

				@@ -1747,9 +1775,14 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)

						/* see comment in zfs_suspend_fs() */

						if (zp->z_suspended) {

							zfs_zrele_async(zp);

							zp->z_suspended = B_FALSE;

							zfs_zrele_async(zp);

						}

						zfs_zrele_async(zp);

						mutex_enter(&zfsvfs->z_znodes_lock);

						zp = next;

					}

					mutex_exit(&zfsvfs->z_znodes_lock);

									
										sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
									
		+30
		-8
	
												View File
												
				@@ -2434,9 +2434,13 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)

					    &zp->z_pflags, sizeof (zp->z_pflags));

					if (attrzp) {

						/*

						 * attrzp is zp's hidden xattr directory, so the second

						 * znode lock acquisition is nested rather than recursive.

						 */

						if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))

							mutex_enter(&attrzp->z_acl_lock);

						mutex_enter(&attrzp->z_lock);

							mutex_enter_nested(&attrzp->z_acl_lock, NESTED_SINGLE);

						mutex_enter_nested(&attrzp->z_lock, NESTED_SINGLE);

						SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,

						    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,

						    sizeof (attrzp->z_pflags));

				@@ -4074,18 +4078,32 @@ zfs_inactive(struct inode *ip)

				{

					znode_t	*zp = ITOZ(ip);

					zfsvfs_t *zfsvfs = ITOZSB(ip);

					krwlock_t *zti_lock = &zfsvfs->z_teardown_inactive_lock;

					uint64_t atime[2];

					int error;

					int need_unlock = 0;

					boolean_t no_lockdep = B_FALSE;

					/* Only read lock if we haven't already write locked, e.g. rollback */

					if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {

					if (!RW_WRITE_HELD(zti_lock)) {

						need_unlock = 1;

						rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);

						/*

						 * kswapd reaches evict_inode() with fs_reclaim held.  Suppress

						 * lockdep only for this reclaim-thread acquire/release pair.

						 */

						no_lockdep = current_is_reclaim_thread();

						if (no_lockdep)

							rw_enter_nolockdep(zti_lock, RW_READER);

						else

							rw_enter(zti_lock, RW_READER);

					}

					if (zp->z_sa_hdl == NULL) {

						if (need_unlock)

							rw_exit(&zfsvfs->z_teardown_inactive_lock);

						if (need_unlock) {

							if (no_lockdep)

								rw_exit_nolockdep(zti_lock);

							else

								rw_exit(zti_lock);

						}

						return;

					}

				@@ -4111,8 +4129,12 @@ zfs_inactive(struct inode *ip)

					}

					zfs_zinactive(zp);

					if (need_unlock)

						rw_exit(&zfsvfs->z_teardown_inactive_lock);

					if (need_unlock) {

						if (no_lockdep)

							rw_exit_nolockdep(zti_lock);

						else

							rw_exit(zti_lock);

					}

				}

				/*

									
										sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
									
		+49
		-14
	
												View File
												
				@@ -550,10 +550,11 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg)

				 *

				 * Finally, all filesystems get automatic handling for the 'source' option,

				 * that is, the "name" of the filesystem (the first column of df(1)'s output).

				 * However, this only happens if the handler does not otherwise handle

				 * the 'source' option. Since we handle _all_ options because of 'sloppy', we

				 * deal with this explicitly by calling into the kernel's helper for this,

				 * vfs_parse_fs_param_source(), which sets up fc->source.

				 * However, this only happens if the handler does not otherwise handle the

				 * 'source' option. Since we handle _all_ options because of 'sloppy', we have

				 * ot handle it ourselves. Normally we would call vfs_parse_fs_param_source()

				 * to deal with this, but that didn't appear until 5.14, and it's small enough

				 * that we can just handle it ourselves.

				 *

				 *	source

				 *

				@@ -565,6 +566,7 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg)

				 */

				enum {

					Opt_source,

					Opt_exec, Opt_suid, Opt_dev,

					Opt_atime, Opt_relatime, Opt_strictatime,

					Opt_saxattr, Opt_dirxattr, Opt_noxattr,

				@@ -574,6 +576,8 @@ enum {

				};

				static const struct fs_parameter_spec zpl_param_spec[] = {

					fsparam_string("source",	Opt_source),

					fsparam_flag_no("exec",		Opt_exec),

					fsparam_flag_no("suid",		Opt_suid),

					fsparam_flag_no("dev",		Opt_dev),

				@@ -609,18 +613,34 @@ static const struct fs_parameter_spec zpl_param_spec[] = {

					{}

				};

				/*

				 * Before 5.6, fs_parse() took a struct fs_parameter_description

				 * which wraps the parameter specs with name and enum pointers. From 5.6,

				 * the description struct was removed and fs_parse() accepts the

				 * fs_parameter_spec directly.

				 */

				static int

				zpl_fs_parse(struct fs_context *fc, struct fs_parameter *param,

					struct fs_parse_result *result)

				{

				#ifdef HAVE_FS_PARSE_TAKES_SPEC

					return (fs_parse(fc, zpl_param_spec, param, result));

				#else

					static const struct fs_parameter_description zpl_param_desc = {

						.name = "zfs",

						.specs = zpl_param_spec,

					};

					return (fs_parse(fc, &zpl_param_desc, param, result));

				#endif

				}

				static int

				zpl_parse_param(struct fs_context *fc, struct fs_parameter *param)

				{

					vfs_t *vfs = fc->fs_private;

					/* Handle 'source' explicitly so we don't trip on it as an unknown. */

					int opt = vfs_parse_fs_param_source(fc, param);

					if (opt != -ENOPARAM)

						return (opt);

					struct fs_parse_result result;

					opt = fs_parse(fc, zpl_param_spec, param, &result);

					int opt = zpl_fs_parse(fc, param, &result);

					if (opt == -ENOPARAM) {

						/*

						 * Convert unknowns to warnings, to work around the whole

				@@ -632,6 +652,16 @@ zpl_parse_param(struct fs_context *fc, struct fs_parameter *param)

						return (opt);

					switch (opt) {

					case Opt_source:

						if (fc->source != NULL) {

							cmn_err(CE_NOTE,

							    "ZFS: multiple 'source' options not supported");

							return (-SET_ERROR(EINVAL));

						}

						fc->source = param->string;

						param->string = NULL;

						break;

					case Opt_exec:

						vfs->vfs_exec = !result.negated;

						vfs->vfs_do_exec = B_TRUE;

				@@ -794,7 +824,7 @@ zpl_parse_monolithic(struct fs_context *fc, void *data)

						/* Check if this is one of our options. */

						struct fs_parse_result result;

						int opt = fs_parse(fc, zpl_param_spec, &param, &result);

						int opt = zpl_fs_parse(fc, &param, &result);

						if (opt >= 0) {

							/*

							 * We already know this one of our options, so a

				@@ -874,9 +904,14 @@ zpl_get_tree(struct fs_context *fc)

					if (sb->s_root == NULL) {

						vfs_t *vfs = fc->fs_private;

						/* Apply readonly flag as mount option */

						if (fc->sb_flags & SB_RDONLY) {

							vfs->vfs_readonly = B_TRUE;

						/*

						 * If SB_RDONLY was set/cleared from mount options, update

						 * them in the options struct so we set up the filesystem

						 * in the proper state.

						 */

						if (fc->sb_flags_mask & SB_RDONLY) {

							vfs->vfs_readonly =

							    (fc->sb_flags & SB_RDONLY) ? B_TRUE : B_FALSE;

							vfs->vfs_do_readonly = B_TRUE;

						}

									
										sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
									
		+56
		-36
	
												View File
												
				@@ -701,6 +701,24 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value,

				 * ZFS allows extended user attributes to be disabled administratively

				 * by setting the 'xattr=off' property on the dataset.

				 */

				/*

				 * Concatenate prefix + name into a NUL-terminated stack buffer.

				 * Linux fs/xattr.c (import_xattr_name) caps the full xattr name at

				 * XATTR_NAME_MAX before any handler runs, so XATTR_NAME_MAX + 1

				 * bytes always fit.

				 */

				static inline void

				zpl_xattr_join_name(char *buf, size_t buflen, const char *prefix,

				    size_t prefix_len, const char *name, size_t name_len)

				{

					ASSERT3U(prefix_len + name_len + 1, <=, buflen);

					memcpy(buf, prefix, prefix_len);

					memcpy(buf + prefix_len, name, name_len);

					buf[prefix_len + name_len] = '\0';

				}

				static int

				__zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,

				    const char *name, size_t name_len)

				@@ -726,9 +744,13 @@ __zpl_xattr_user_get(struct inode *ip, const char *name,

					 * try again without the namespace prefix for compatibility with

					 * other platforms.

					 */

					char *xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);

					char xattr_name[XATTR_NAME_MAX + 1];

					zpl_xattr_join_name(xattr_name, sizeof (xattr_name),

					    XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN,

					    name, strlen(name));

					error = zpl_xattr_get(ip, xattr_name, value, size);

					kmem_strfree(xattr_name);

					if (error == -ENODATA)

						error = zpl_xattr_get(ip, name, value, size);

				@@ -758,8 +780,13 @@ __zpl_xattr_user_set(zidmap_t *user_ns,

					 *   XATTR_CREATE: fail if xattr already exists

					 *   XATTR_REPLACE: fail if xattr does not exist

					 */

					char *prefixed_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);

					char prefixed_name[XATTR_NAME_MAX + 1];

					const char *clear_name, *set_name;

					zpl_xattr_join_name(prefixed_name, sizeof (prefixed_name),

					    XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN,

					    name, strlen(name));

					if (zfs_xattr_compat) {

						clear_name = prefixed_name;

						set_name = name;

				@@ -776,7 +803,7 @@ __zpl_xattr_user_set(zidmap_t *user_ns,

					 * because it already exists.  Stop here.

					 */

					if (error == -EEXIST)

						goto out;

						return (error);

					/*

					 * If XATTR_REPLACE was specified and we succeeded to clear

					 * an xattr, we don't need to replace anything when setting

				@@ -788,10 +815,7 @@ __zpl_xattr_user_set(zidmap_t *user_ns,

					/*

					 * Set the new value with the configured name format.

					 */

					error = zpl_xattr_set(ip, set_name, value, size, flags);

				out:

					kmem_strfree(prefixed_name);

					return (error);

					return (zpl_xattr_set(ip, set_name, value, size, flags));

				}

				ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);

				@@ -824,17 +848,16 @@ static int

				__zpl_xattr_trusted_get(struct inode *ip, const char *name,

				    void *value, size_t size)

				{

					char *xattr_name;

					int error;

					char xattr_name[XATTR_NAME_MAX + 1];

					if (!capable(CAP_SYS_ADMIN))

						return (-EACCES);

					/* xattr_resolve_name will do this for us if this is defined */

					xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);

					error = zpl_xattr_get(ip, xattr_name, value, size);

					kmem_strfree(xattr_name);

					return (error);

					zpl_xattr_join_name(xattr_name, sizeof (xattr_name),

					    XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN,

					    name, strlen(name));

					return (zpl_xattr_get(ip, xattr_name, value, size));

				}

				ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);

				@@ -844,17 +867,16 @@ __zpl_xattr_trusted_set(zidmap_t *user_ns,

				    const void *value, size_t size, int flags)

				{

					(void) user_ns;

					char *xattr_name;

					int error;

					char xattr_name[XATTR_NAME_MAX + 1];

					if (!capable(CAP_SYS_ADMIN))

						return (-EACCES);

					/* xattr_resolve_name will do this for us if this is defined */

					xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);

					error = zpl_xattr_set(ip, xattr_name, value, size, flags);

					kmem_strfree(xattr_name);

					return (error);

					zpl_xattr_join_name(xattr_name, sizeof (xattr_name),

					    XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN,

					    name, strlen(name));

					return (zpl_xattr_set(ip, xattr_name, value, size, flags));

				}

				ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);

				@@ -889,14 +911,13 @@ static int

				__zpl_xattr_security_get(struct inode *ip, const char *name,

				    void *value, size_t size)

				{

					char *xattr_name;

					int error;

					/* xattr_resolve_name will do this for us if this is defined */

					xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);

					error = zpl_xattr_get(ip, xattr_name, value, size);

					kmem_strfree(xattr_name);

					char xattr_name[XATTR_NAME_MAX + 1];

					return (error);

					zpl_xattr_join_name(xattr_name, sizeof (xattr_name),

					    XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN,

					    name, strlen(name));

					return (zpl_xattr_get(ip, xattr_name, value, size));

				}

				ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);

				@@ -906,14 +927,13 @@ __zpl_xattr_security_set(zidmap_t *user_ns,

				    const void *value, size_t size, int flags)

				{

					(void) user_ns;

					char *xattr_name;

					int error;

					/* xattr_resolve_name will do this for us if this is defined */

					xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);

					error = zpl_xattr_set(ip, xattr_name, value, size, flags);

					kmem_strfree(xattr_name);

					char xattr_name[XATTR_NAME_MAX + 1];

					return (error);

					zpl_xattr_join_name(xattr_name, sizeof (xattr_name),

					    XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN,

					    name, strlen(name));

					return (zpl_xattr_set(ip, xattr_name, value, size, flags));

				}

				ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);

									
										sys/contrib/openzfs/module/zcommon/zfs_valstr.c
									
		+1
		-1
	
												View File
												
				@@ -238,11 +238,11 @@ _VALSTR_BITFIELD_IMPL(zio_stage,

					{ 'E', "EN", "ENCRYPT" },

					{ 'C', "CG", "CHECKSUM_GENERATE" },

					{ 'N', "NW", "NOP_WRITE" },

					{ 'B', "BF", "BRT_FREE" },

					{ 'd', "dS", "DDT_READ_START" },

					{ 'd', "dD", "DDT_READ_DONE" },

					{ 'd', "dW", "DDT_WRITE" },

					{ 'd', "dF", "DDT_FREE" },

					{ 'B', "BF", "BRT_FREE" },

					{ 'G', "GA", "GANG_ASSEMBLE" },

					{ 'G', "GI", "GANG_ISSUE" },

					{ 'D', "DT", "DVA_THROTTLE" },

									
										sys/contrib/openzfs/module/zcommon/zpool_prop.c
									
		+24
		-3
	
												View File
												
				@@ -374,10 +374,16 @@ vdev_prop_init(void)

						{ "on",		1},

						{ NULL }

					};

					static const zprop_index_t boolean_inherit_table[] = {

						{ "off",	0},

						{ "on",		1},

						{ "inherit",	ZPROP_BOOLEAN_INHERIT},

						{ NULL }

					};

					static const zprop_index_t boolean_na_table[] = {

						{ "off",	0},

						{ "on",		1},

						{ "-",		2},	/* ZPROP_BOOLEAN_NA */

						{ "-",		ZPROP_BOOLEAN_NA},

						{ NULL }

					};

				@@ -388,6 +394,14 @@ vdev_prop_init(void)

						{ NULL }

					};

					static const zprop_index_t vdev_alloc_bias_table[] = {

						{ "none",	VDEV_BIAS_NONE },

						{ "log",	VDEV_BIAS_LOG },

						{ "special",	VDEV_BIAS_SPECIAL },

						{ "dedup",	VDEV_BIAS_DEDUP },

						{ NULL }

					};

					struct zfs_mod_supported_features *sfeatures =

					    zfs_mod_list_supported(ZFS_SYSFS_VDEV_PROPERTIES);

				@@ -547,8 +561,8 @@ vdev_prop_init(void)

					/* default index properties */

					zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,

					    PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,

					    sfeatures);

					    PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off | inherit", "FAILFAST",

					    boolean_inherit_table, sfeatures);

					zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",

					    B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",

					    "SLOW_IO_EVENTS", boolean_table, sfeatures);

				@@ -556,6 +570,13 @@ vdev_prop_init(void)

					    VDEV_SCHEDULER_AUTO, PROP_DEFAULT, ZFS_TYPE_VDEV,

					    "auto | on | off", "IO_SCHEDULER",

					    vdevschedulertype_table, sfeatures);

					zprop_register_index(VDEV_PROP_ALLOC_BIAS, "alloc_bias",

					    VDEV_BIAS_NONE, PROP_DEFAULT, ZFS_TYPE_VDEV,

					    "none | log | special | dedup", "ALLOC_BIAS",

					    vdev_alloc_bias_table, sfeatures);

					zprop_register_index(VDEV_PROP_ROTATIONAL, "rotational", 0,

					    PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "ROTATIONAL",

					    boolean_table, sfeatures);

					/* hidden properties */

					zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,

									
										sys/contrib/openzfs/module/zfs/arc.c
									
		+12
		-18
	
												View File
												
				@@ -398,14 +398,14 @@ uint_t zfs_arc_pc_percent = 0;

				/*

				 * log2(fraction of ARC which must be free to allow growing).

				 * I.e. If there is less than arc_c >> arc_no_grow_shift free memory,

				 * I.e. If there is less than arc_c >> zfs_arc_no_grow_shift free memory,

				 * when reading a new block into the ARC, we will evict an equal-sized block

				 * from the ARC.

				 *

				 * This must be less than arc_shrink_shift, so that when we shrink the ARC,

				 * we will still not allow it to grow.

				 */

				uint_t		arc_no_grow_shift = 5;

				uint_t		zfs_arc_no_grow_shift = 5;

				/*

				@@ -586,6 +586,7 @@ arc_stats_t arc_stats = {

					{ "uncached_metadata",		KSTAT_DATA_UINT64 },

					{ "uncached_evictable_data",	KSTAT_DATA_UINT64 },

					{ "uncached_evictable_metadata", KSTAT_DATA_UINT64 },

					{ "l2_ndev",			KSTAT_DATA_UINT64 },

					{ "l2_hits",			KSTAT_DATA_UINT64 },

					{ "l2_misses",			KSTAT_DATA_UINT64 },

					{ "l2_prefetch_asize",		KSTAT_DATA_UINT64 },

				@@ -4975,7 +4976,7 @@ arc_reap_cb_check(void *arg, zthr_t *zthr)

						 */

						arc_growtime = gethrtime() + SEC2NSEC(arc_grow_retry);

						return (B_TRUE);

					} else if (free_memory < arc_c >> arc_no_grow_shift) {

					} else if (free_memory < arc_c >> zfs_arc_no_grow_shift) {

						arc_no_grow = B_TRUE;

					} else if (gethrtime() >= arc_growtime) {

						arc_no_grow = B_FALSE;

				@@ -5571,20 +5572,6 @@ arc_buf_access(arc_buf_t *buf)

					    !HDR_ISTYPE_METADATA(hdr), data, metadata, hits);

				}

				/* a generic arc_read_done_func_t which you can use */

				void

				arc_bcopy_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,

				    arc_buf_t *buf, void *arg)

				{

					(void) zio, (void) zb, (void) bp;

					if (buf == NULL)

						return;

					memcpy(arg, buf->b_data, arc_buf_size(buf));

					arc_buf_destroy(buf, arg);

				}

				/* a generic arc_read_done_func_t */

				void

				arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,

				@@ -7440,6 +7427,7 @@ arc_kstat_update(kstat_t *ksp, int rw)

					    aggsum_value(&arc_sums.arcstat_dnode_size);

					as->arcstat_bonus_size.value.ui64 =

					    wmsum_value(&arc_sums.arcstat_bonus_size);

					as->arcstat_l2_ndev.value.ui64 = l2arc_ndev;

					as->arcstat_l2_hits.value.ui64 =

					    wmsum_value(&arc_sums.arcstat_l2_hits);

					as->arcstat_l2_misses.value.ui64 =

				@@ -7654,7 +7642,8 @@ arc_tuning_update(boolean_t verbose)

					/* Valid range: 1 - N */

					if (zfs_arc_shrink_shift) {

						arc_shrink_shift = zfs_arc_shrink_shift;

						arc_no_grow_shift = MIN(arc_no_grow_shift, arc_shrink_shift -1);

						zfs_arc_no_grow_shift = MIN(zfs_arc_no_grow_shift,

						    arc_shrink_shift - 1);

					}

					/* Valid range: 1 - N ms */

				@@ -11683,6 +11672,7 @@ EXPORT_SYMBOL(arc_write);

				EXPORT_SYMBOL(arc_read);

				EXPORT_SYMBOL(arc_buf_info);

				EXPORT_SYMBOL(arc_getbuf_func);

				EXPORT_SYMBOL(arc_buf_destroy);

				EXPORT_SYMBOL(arc_add_prune_callback);

				EXPORT_SYMBOL(arc_remove_prune_callback);

				@@ -11701,6 +11691,10 @@ ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, grow_retry, param_set_arc_int,

				ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, shrink_shift, param_set_arc_int,

					param_get_uint, ZMOD_RW, "log2(fraction of ARC to reclaim)");

				ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, no_grow_shift,

					param_set_arc_no_grow_shift, param_get_uint, ZMOD_RW,

					"log2(fraction of ARC which must be free to allow growing)");

				#ifdef _KERNEL

				ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, pc_percent, UINT, ZMOD_RW,

					"Percent of pagecache to reclaim ARC to");

									
										sys/contrib/openzfs/module/zfs/ddt_log.c
									
		+2
		-2
	
												View File
												
				@@ -221,7 +221,7 @@ ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx, ddt_log_update_t *dlu)

					uint64_t length = nblocks * dlu->dlu_dn->dn_datablksz;

					VERIFY0(dmu_buf_hold_array_by_dnode(dlu->dlu_dn, offset, length,

					    B_FALSE, FTAG, &dlu->dlu_ndbp, &dlu->dlu_dbp,

					    B_FALSE, dlu, &dlu->dlu_ndbp, &dlu->dlu_dbp,

					    DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO));

					dlu->dlu_tx = tx;

				@@ -338,7 +338,7 @@ ddt_log_commit(ddt_t *ddt, ddt_log_update_t *dlu)

					 */

					dmu_buf_fill_done(dlu->dlu_dbp[dlu->dlu_block], dlu->dlu_tx, B_FALSE);

					dmu_buf_rele_array(dlu->dlu_dbp, dlu->dlu_ndbp, FTAG);

					dmu_buf_rele_array(dlu->dlu_dbp, dlu->dlu_ndbp, dlu);

					ddt->ddt_log_active->ddl_length +=

					    dlu->dlu_ndbp * (uint64_t)dlu->dlu_dn->dn_datablksz;

									
										sys/contrib/openzfs/module/zfs/dmu_objset.c
									
		+1
		-1
	
												View File
												
				@@ -1859,7 +1859,7 @@ do_userquota_cacheflush(objset_t *os, userquota_cache_t *cache, dmu_tx_t *tx)

					    &cookie)) != NULL) {

						/*

						 * os_userused_lock protects against concurrent calls to

						 * zap_increment_int().  It's needed because zap_increment_int()

						 * zap_increment().  It's needed because zap_increment()

						 * is not thread-safe (i.e. not atomic).

						 */

						mutex_enter(&os->os_userused_lock);

									
										sys/contrib/openzfs/module/zfs/dmu_recv.c
									
		+31
		-13
	
												View File
												
				@@ -2901,16 +2901,20 @@ receive_read_record(dmu_recv_cookie_t *drc)

					{

						struct drr_object *drro =

						    &drc->drc_rrd->header.drr_u.drr_object;

						uint32_t size = DRR_OBJECT_PAYLOAD_SIZE(drro);

						uint32_t size;

						void *buf = NULL;

						dmu_object_info_t doi;

						size = DRR_OBJECT_PAYLOAD_SIZE(drro);

						if (size > SPA_MAXBLOCKSIZE)

							return (SET_ERROR(ERANGE));

						if (size != 0)

							buf = kmem_zalloc(size, KM_SLEEP);

							buf = vmem_zalloc(size, KM_SLEEP);

						err = receive_read_payload_and_next_header(drc, size, buf);

						if (err != 0) {

							kmem_free(buf, size);

							vmem_free(buf, size);

							return (err);

						}

						err = dmu_object_info(drc->drc_os, drro->drr_object, &doi);

				@@ -2934,7 +2938,11 @@ receive_read_record(dmu_recv_cookie_t *drc)

					case DRR_WRITE:

					{

						struct drr_write *drrw = &drc->drc_rrd->header.drr_u.drr_write;

						int size = DRR_WRITE_PAYLOAD_SIZE(drrw);

						uint64_t size = DRR_WRITE_PAYLOAD_SIZE(drrw);

						if (size > SPA_MAXBLOCKSIZE)

							return (SET_ERROR(ERANGE));

						abd_t *abd = abd_alloc_linear(size, B_FALSE);

						err = receive_read_payload_and_next_header(drc, size,

						    abd_to_buf(abd));

				@@ -2951,12 +2959,18 @@ receive_read_record(dmu_recv_cookie_t *drc)

					{

						struct drr_write_embedded *drrwe =

						    &drc->drc_rrd->header.drr_u.drr_write_embedded;

						uint32_t size = P2ROUNDUP(drrwe->drr_psize, 8);

						void *buf = kmem_zalloc(size, KM_SLEEP);

						uint32_t size;

						void *buf;

						size = P2ROUNDUP(drrwe->drr_psize, 8);

						if (size > SPA_MAXBLOCKSIZE)

							return (SET_ERROR(ERANGE));

						buf = vmem_zalloc(size, KM_SLEEP);

						err = receive_read_payload_and_next_header(drc, size, buf);

						if (err != 0) {

							kmem_free(buf, size);

							vmem_free(buf, size);

							return (err);

						}

				@@ -2985,7 +2999,11 @@ receive_read_record(dmu_recv_cookie_t *drc)

					case DRR_SPILL:

					{

						struct drr_spill *drrs = &drc->drc_rrd->header.drr_u.drr_spill;

						int size = DRR_SPILL_PAYLOAD_SIZE(drrs);

						uint64_t size = DRR_SPILL_PAYLOAD_SIZE(drrs);

						if (size > SPA_MAXBLOCKSIZE)

							return (SET_ERROR(ERANGE));

						abd_t *abd = abd_alloc_linear(size, B_FALSE);

						err = receive_read_payload_and_next_header(drc, size,

						    abd_to_buf(abd));

				@@ -3136,7 +3154,7 @@ receive_process_record(struct receive_writer_arg *rwa,

							abd_free(rrd->abd);

							rrd->abd = NULL;

						} else if (rrd->payload != NULL) {

							kmem_free(rrd->payload, rrd->payload_size);

							vmem_free(rrd->payload, rrd->payload_size);

							rrd->payload = NULL;

						}

						return (0);

				@@ -3150,7 +3168,7 @@ receive_process_record(struct receive_writer_arg *rwa,

								rrd->abd = NULL;

								rrd->payload = NULL;

							} else if (rrd->payload != NULL) {

								kmem_free(rrd->payload, rrd->payload_size);

								vmem_free(rrd->payload, rrd->payload_size);

								rrd->payload = NULL;

							}

				@@ -3163,7 +3181,7 @@ receive_process_record(struct receive_writer_arg *rwa,

					{

						struct drr_object *drro = &rrd->header.drr_u.drr_object;

						err = receive_object(rwa, drro, rrd->payload);

						kmem_free(rrd->payload, rrd->payload_size);

						vmem_free(rrd->payload, rrd->payload_size);

						rrd->payload = NULL;

						break;

					}

				@@ -3201,7 +3219,7 @@ receive_process_record(struct receive_writer_arg *rwa,

						struct drr_write_embedded *drrwe =

						    &rrd->header.drr_u.drr_write_embedded;

						err = receive_write_embedded(rwa, drrwe, rrd->payload);

						kmem_free(rrd->payload, rrd->payload_size);

						vmem_free(rrd->payload, rrd->payload_size);

						rrd->payload = NULL;

						break;

					}

				@@ -3270,7 +3288,7 @@ receive_writer_thread(void *arg)

							rrd->abd = NULL;

							rrd->payload = NULL;

						} else if (rrd->payload != NULL) {

							kmem_free(rrd->payload, rrd->payload_size);

							vmem_free(rrd->payload, rrd->payload_size);

							rrd->payload = NULL;

						}

						/*

									
										sys/contrib/openzfs/module/zfs/dmu_send.c
									
		+35
		-2
	
												View File
												
				@@ -2241,6 +2241,37 @@ setup_send_progress(struct dmu_send_params *dspp)

					return (dssp);

				}

				/*

				 * Payloads must be multiples of 8 bytes for historical compatibility, but

				 * XDR-encoded nvlists are sized in multiples of 4 bytes and may need padding.

				 *

				 * Here we do the simplest possible thing and copy the data to a separate

				 * buffer. Not ideal in terms of performance and memory use, but most BEGIN

				 * nvlists are small or absent, the allocation is momentary, and we'll need

				 * to do this at most once per dataset.

				 *

				 * It's OK if there is extra data after a packed nvlist on the receiving

				 * side because packed nvlists have an internal end-of-list marker.

				 *

				 * The new buffer is allocated with kmem_alloc() and can be freed with

				 * fnvlist_pack_free(), like the original.

				 */

				static inline void

				pad_packed_nvlist(char **buffer, size_t *size)

				{

					size_t size_in = *size;

					size_t extra_bytes = P2ROUNDUP(size_in, 8) - size_in;

					if (extra_bytes != 0) {

						size_t expanded_size = size_in + extra_bytes;

						char *longbuf = kmem_alloc(expanded_size, KM_SLEEP);

						memcpy(longbuf, *buffer, size_in);

						memset(longbuf + size_in, 0, extra_bytes);

						fnvlist_pack_free(*buffer, size_in);

						*buffer = longbuf;

						*size = expanded_size;

					}

				}

				/*

				 * Actually do the bulk of the work in a zfs send.

				 *

				@@ -2474,7 +2505,7 @@ dmu_send_impl(struct dmu_send_params *dspp)

					dsl_pool_rele(dp, tag);

					void *payload = NULL;

					char *payload = NULL;

					size_t payload_len = 0;

					nvlist_t *nvl = fnvlist_alloc();

				@@ -2548,7 +2579,9 @@ dmu_send_impl(struct dmu_send_params *dspp)

					}

					if (!nvlist_empty(nvl)) {

						payload = fnvlist_pack(nvl, &payload_len);

						VERIFY0(nvlist_pack(nvl, &payload, &payload_len,

						    NV_ENCODE_XDR, KM_SLEEP));

						pad_packed_nvlist(&payload, &payload_len);

						drr->drr_payloadlen = payload_len;

					}

									
										sys/contrib/openzfs/module/zfs/dsl_bookmark.c
									
		+1
		-1
	
												View File
												
				@@ -490,7 +490,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,

						} else {

							dmu_buf_t *db;

							VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,

							    DB_RF_MUST_SUCCEED, FTAG, &db));

							    DB_RF_MUST_SUCCEED, tag, &db));

							dmu_buf_will_fill(db, tx, B_FALSE);

							VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,

							    SPA_MINBLOCKSIZE), tx));

									
										sys/contrib/openzfs/module/zfs/dsl_dir.c
									
		+68
		-25
	
												View File
												
				@@ -1534,9 +1534,28 @@ dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)

				}

				/* call from syncing context when we actually write/free space for this dd */

				void

				dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,

				    int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)

				static void dsl_dir_diduse_transfer_space_impl(dsl_dir_t *dd, int64_t used,

				    int64_t compressed, int64_t uncompressed, int64_t tonew,

				    dd_used_t oldtype, dd_used_t newtype, boolean_t nested, dmu_tx_t *tx);

				static void

				dsl_dir_lock_enter(dsl_dir_t *dd, boolean_t nested)

				{

					/*

					 * lockdep needs an explicit subclass when a child dd_lock

					 * nests an ancestor.

					 */

					if (nested) {

						mutex_enter_nested(&dd->dd_lock, NESTED_SINGLE);

					} else {

						mutex_enter(&dd->dd_lock);

					}

				}

				static void

				dsl_dir_diduse_space_impl(dsl_dir_t *dd, dd_used_t type,

				    int64_t used, int64_t compressed, int64_t uncompressed,

				    boolean_t nested, dmu_tx_t *tx)

				{

					int64_t accounted_delta;

				@@ -1554,7 +1573,7 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,

					 */

					boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);

					if (needlock)

						mutex_enter(&dd->dd_lock);

						dsl_dir_lock_enter(dd, nested);

					dsl_dir_phys_t *ddp = dsl_dir_phys(dd);

					accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);

					ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);

				@@ -1582,12 +1601,20 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,

						mutex_exit(&dd->dd_lock);

					if (dd->dd_parent != NULL) {

						dsl_dir_diduse_transfer_space(dd->dd_parent,

						dsl_dir_diduse_transfer_space_impl(dd->dd_parent,

						    accounted_delta, compressed, uncompressed,

						    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);

						    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, nested, tx);

					}

				}

				void

				dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type, int64_t used,

				    int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)

				{

					dsl_dir_diduse_space_impl(dd, type, used, compressed, uncompressed,

					    B_FALSE, tx);

				}

				void

				dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,

				    dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)

				@@ -1612,10 +1639,10 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,

					mutex_exit(&dd->dd_lock);

				}

				void

				dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,

				static void

				dsl_dir_diduse_transfer_space_impl(dsl_dir_t *dd, int64_t used,

				    int64_t compressed, int64_t uncompressed, int64_t tonew,

				    dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)

					dd_used_t oldtype, dd_used_t newtype, boolean_t nested, dmu_tx_t *tx)

				{

					int64_t accounted_delta;

				@@ -1625,7 +1652,7 @@ dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,

					dmu_buf_will_dirty(dd->dd_dbuf, tx);

					mutex_enter(&dd->dd_lock);

					dsl_dir_lock_enter(dd, nested);

					dsl_dir_phys_t *ddp = dsl_dir_phys(dd);

					accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);

					ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);

				@@ -1656,12 +1683,21 @@ dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,

					mutex_exit(&dd->dd_lock);

					if (dd->dd_parent != NULL) {

						dsl_dir_diduse_transfer_space(dd->dd_parent,

						dsl_dir_diduse_transfer_space_impl(dd->dd_parent,

						    accounted_delta, compressed, uncompressed,

						    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);

						    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, nested, tx);

					}

				}

				void

				dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,

				    int64_t compressed, int64_t uncompressed, int64_t tonew,

				    dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)

				{

					dsl_dir_diduse_transfer_space_impl(dd, used, compressed,

					    uncompressed, tonew, oldtype, newtype, B_FALSE, tx);

				}

				typedef struct dsl_dir_set_qr_arg {

					const char *ddsqra_name;

					zprop_source_t ddsqra_source;

				@@ -1828,8 +1864,8 @@ dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)

					if (dd->dd_parent != NULL) {

						/* Roll up this additional usage into our ancestors */

						dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,

						    delta, 0, 0, tx);

						dsl_dir_diduse_space_impl(dd->dd_parent, DD_USED_CHILD_RSRV,

						    delta, 0, 0, B_TRUE, tx);

					}

					mutex_exit(&dd->dd_lock);

				}

				@@ -2268,22 +2304,29 @@ dsl_dir_snap_cmtime_update(dsl_dir_t *dd, dmu_tx_t *tx)

				{

					dsl_pool_t *dp = dmu_tx_pool(tx);

					inode_timespec_t t;

					ASSERT(dsl_pool_sync_context(dp));

					gethrestime(&t);

					mutex_enter(&dd->dd_lock);

					dd->dd_snap_cmtime = t;

					if (spa_feature_is_enabled(dp->dp_spa,

					    SPA_FEATURE_EXTENSIBLE_DATASET)) {

						objset_t *mos = dd->dd_pool->dp_meta_objset;

						uint64_t ddobj = dd->dd_object;

						dsl_dir_zapify(dd, tx);

						VERIFY0(zap_update(mos, ddobj,

						    DD_FIELD_SNAPSHOTS_CHANGED,

						    sizeof (uint64_t),

						    sizeof (inode_timespec_t) / sizeof (uint64_t),

						    &t, tx));

					}

					mutex_exit(&dd->dd_lock);

					if (!spa_feature_is_enabled(dp->dp_spa,

					    SPA_FEATURE_EXTENSIBLE_DATASET)) {

						return;

					}

					objset_t *mos = dd->dd_pool->dp_meta_objset;

					/*

					 * dsl_dir_zapify() and zap_update() may dirty buffers and recurse

					 * into space accounting, so do not call them with dd_lock held.

					 */

					dsl_dir_zapify(dd, tx);

					VERIFY0(zap_update(mos, dd->dd_object, DD_FIELD_SNAPSHOTS_CHANGED,

					    sizeof (uint64_t),

					    sizeof (inode_timespec_t) / sizeof (uint64_t), &t, tx));

				}

				void

									
										sys/contrib/openzfs/module/zfs/dsl_scan.c
									
		+1
		
												View File
												
				@@ -1280,6 +1280,7 @@ dsl_errorscrub_pause_resume_sync(void *arg, dmu_tx_t *tx)

						spa->spa_scan_pass_errorscrub_pause = gethrestime_sec();

						scn->errorscrub_phys.dep_paused_flags = B_TRUE;

						dsl_errorscrub_sync_state(scn, tx);

						zap_cursor_fini(&scn->errorscrub_cursor);

						spa_event_notify(spa, NULL, NULL, ESC_ZFS_ERRORSCRUB_PAUSED);

					} else {

						ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL);

									
										sys/contrib/openzfs/module/zfs/gzip.c
									
		+5
		-1
	
												View File
												
				@@ -96,13 +96,17 @@ zfs_gzip_decompress_buf(void *s_start, void *d_start, size_t s_len,

					/* check if hardware accelerator can be used */

					if (qat_dc_use_accel(d_len)) {

						if (qat_compress(QAT_DECOMPRESS, s_start, s_len,

						    d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS)

						    d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS) {

							if ((size_t)dstlen == d_len)

								return (0);

						}

						/* if hardware de-compress fail, do it again with software */

					}

					if (uncompress_func(d_start, &dstlen, s_start, s_len) != Z_OK)

						return (-1);

					if ((size_t)dstlen != d_len)

						return (-1);

					return (0);

				}

									
										sys/contrib/openzfs/module/zfs/lz4_zfs.c
									
		+11
		-4
	
												View File
												
				@@ -89,17 +89,24 @@ zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,

					(void) n;

					const char *src = s_start;

					uint32_t bufsiz = BE_IN32(src);

					int decoded;

					/* invalid compressed buffer size encoded at start */

					if (bufsiz + sizeof (bufsiz) > s_len)

						return (1);

					/*

					 * Returns 0 on success (decompression function returned non-negative)

					 * and non-zero on failure (decompression function returned negative).

					 * LZ4_uncompress_unknownOutputSize returns the number of bytes decoded

					 * on success, or a negative value on failure. An OpenZFS block must

					 * expand to exactly d_len bytes

					 */

					return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],

					    d_start, bufsiz, d_len) < 0);

					decoded = LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],

					    d_start, bufsiz, d_len);

					if (decoded < 0)

						return (1);

					if (d_len != (size_t)decoded)

						return (1);

					return (0);

				}

				ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)

									
										sys/contrib/openzfs/module/zfs/metaslab.c
									
		+26
		-5
	
												View File
												
				@@ -82,11 +82,11 @@ int zfs_metaslab_sm_blksz_with_log = (1 << 17);

				/*

				 * The in-core space map representation is more compact than its on-disk form.

				 * The zfs_condense_pct determines how much more compact the in-core

				 * The zfs_metaslab_condense_pct determines how much more compact the in-core

				 * space map representation must be before we compact it on-disk.

				 * Values should be greater than or equal to 100.

				 */

				uint_t zfs_condense_pct = 200;

				uint_t zfs_metaslab_condense_pct = 200;

				/*

				 * Condensing a metaslab is not guaranteed to actually reduce the amount of

				@@ -3826,8 +3826,8 @@ metaslab_group_preload(metaslab_group_t *mg)

				 *    increase as a result of writing out the free space range tree.

				 *

				 * 2. Condense if the on on-disk space map representation is at least

				 *    zfs_condense_pct/100 times the size of the optimal representation

				 *    (i.e. zfs_condense_pct = 110 and in-core = 1MB, optimal = 1.1MB).

				 *    zfs_metaslab_condense_pct/100 times the size of the optimal representation

				 *    (i.e. zfs_metaslab_condense_pct = 110 and in-core = 1MB, optimal = 1.1MB).

				 *

				 * 3. Do not condense if the on-disk size of the space map does not actually

				 *    decrease.

				@@ -3863,7 +3863,8 @@ metaslab_should_condense(metaslab_t *msp)

					uint64_t optimal_size = space_map_estimate_optimal_size(sm,

					    msp->ms_allocatable, SM_NO_VDEVID);

					return (object_size >= (optimal_size * zfs_condense_pct / 100) &&

					return (object_size >=

					    (optimal_size * zfs_metaslab_condense_pct / 100) &&

					    object_size > zfs_metaslab_condense_block_threshold * record_size);

				}

				@@ -6442,6 +6443,14 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,

				ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_use_largest_segment, INT, ZMOD_RW,

					"When looking in size tree, use largest segment instead of exact fit");

				ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_alloc_threshold, U64, ZMOD_RW,

					"Minimum size which forces the dynamic allocator to change its "

					"allocation strategy");

				ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_free_pct, UINT, ZMOD_RW,

					"The minimum free space, in percent, to continue allocations in a "

					"first-fit fashion");

				ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, max_size_cache_sec, U64,

					ZMOD_RW, "How long to trust the cached max chunk size of a metaslab");

				@@ -6454,6 +6463,18 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, try_hard_before_gang, INT,

				ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, UINT, ZMOD_RW,

					"Normally only consider this many of the best metaslabs in each vdev");

				ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, sm_blksz_no_log, INT, ZMOD_RW,

					"Block size for space map in pools with log space map disabled.  "

					"Power of 2 greater than 4096.");

				ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, sm_blksz_with_log, INT, ZMOD_RW,

					"Block size for space map in pools with log space map enabled.  "

					"Power of 2 greater than 4096.");

				ZFS_MODULE_PARAM_CALL(zfs, zfs_, active_allocator,

					param_set_active_allocator, param_get_charp, ZMOD_RW,

					"SPA active allocator");

				ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, condense_pct, UINT, ZMOD_RW,

					"Condense on-disk spacemap when it is more than this many percents "

					"of in-memory counterpart");

									
										sys/contrib/openzfs/module/zfs/sa.c
									
		+2
		-2
	
												View File
												
				@@ -1605,8 +1605,8 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid)

					bulk = kmem_zalloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);

					attrs = kmem_zalloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);

					mutex_enter(&hdl->sa_lock);

					mutex_enter(&zp->z_lock);

					mutex_enter(&hdl->sa_lock);

					err = sa_lookup_locked(hdl, SA_ZPL_PROJID(zfsvfs), &projid,

					    sizeof (uint64_t));

				@@ -1750,8 +1750,8 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid)

					zp->z_is_sa = B_TRUE;

				out:

					mutex_exit(&zp->z_lock);

					mutex_exit(&hdl->sa_lock);

					mutex_exit(&zp->z_lock);

					kmem_free(attrs, sizeof (sa_bulk_attr_t) * ZPL_END);

					kmem_free(bulk, sizeof (sa_bulk_attr_t) * ZPL_END);

					if (dxattr_obj)

									
										sys/contrib/openzfs/module/zfs/spa.c
									
		+22
		-7
	
												View File
												
				@@ -8333,12 +8333,20 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,

						return (spa_vdev_exit(spa, newrootvd, txg, error));

					/*

					 * log, dedup and special vdevs should not be replaced by spares.

					 * Spares can't replace logs

					 */

					if ((oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE ||

					    oldvd->vdev_top->vdev_islog) && newvd->vdev_isspare) {

					if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)

						return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));

					/*

					 * For special and dedup vdevs a spare must have matching rotational

					 * characteristics.  A rotating spare replacing a non-rotating vdev

					 * would silently degrade pool performance, so we reject the mismatch.

					 */

					if (newvd->vdev_isspare &&

					    oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE &&

					    newvd->vdev_nonrot != oldvd->vdev_nonrot)

						return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));

					}

					/*

					 * A dRAID spare can only replace a child of its parent dRAID vdev.

				@@ -11011,6 +11019,10 @@ spa_sync(spa_t *spa, uint64_t txg)

						ASSERT0(spa->spa_vdev_removal->svr_bytes_done[txg & TXG_MASK]);

					}

					for (vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd;

					    vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)))

						vdev_sync_dispatch(vd, txg);

					spa_sync_rewrite_vdev_config(spa, tx);

					dmu_tx_commit(tx);

				@@ -11035,9 +11047,6 @@ spa_sync(spa_t *spa, uint64_t txg)

					dsl_pool_sync_done(dp, txg);

					/*

					 * Update usable space statistics.

					 */

					while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))

					    != NULL)

						vdev_sync_done(vd, txg);

				@@ -11811,6 +11820,12 @@ ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, U64, ZMOD_RW,

					"Allow importing pool with up to this number of missing top-level "

					"vdevs (in read-only mode)");

				ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds_cachefile, U64, ZMOD_RW,

					"Allow importing pools with missing top-level vdevs in cache file");

				ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds_scan, U64, ZMOD_RW,

					"Allow importing pools with missing top-level vdevs during scan");

				ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_pause, INT,

					ZMOD_RW, "Set the livelist condense zthr to pause");

									
										sys/contrib/openzfs/module/zfs/vdev.c
									
		+165
		-19
	
												View File
												
				@@ -460,6 +460,7 @@ vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)

					} else if (vd->vdev_leaf_zap != 0) {

						*objid = vd->vdev_leaf_zap;

					} else {

						*objid = 0;

						return (EINVAL);

					}

				@@ -474,8 +475,11 @@ vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)

					uint64_t objid;

					int err;

					if (vdev_prop_get_objid(vd, &objid) != 0)

						return (EINVAL);

					if (vdev_prop_get_objid(vd, &objid) != 0) {

						/* No ZAP: property was never set, return the default. */

						*value = vdev_prop_default_numeric(prop);

						return (ENOENT);

					}

					err = zap_lookup(mos, objid, vdev_prop_to_name(prop),

					    sizeof (uint64_t), 1, value);

				@@ -963,6 +967,20 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,

					    &vd->vdev_wholedisk) != 0)

						vd->vdev_wholedisk = -1ULL;

					/*

					 * Restore the last-known rotational status for leaf vdevs.  vdev_open()

					 * will overwrite this with the hardware value when the device is

					 * accessible; the persisted value acts as a fallback for failed or

					 * missing devices so that spare selection can still match on device

					 * type even when the original disk is gone.

					 */

					if (vd->vdev_ops->vdev_op_leaf) {

						uint64_t rotational = 0;

						if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_VDEV_ROTATIONAL,

						    &rotational) == 0)

							vd->vdev_nonrot = !rotational;

					}

					vic = &vd->vdev_indirect_config;

					ASSERT0(vic->vic_mapping_object);

				@@ -1117,6 +1135,11 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,

					if (top_level && (ops == &vdev_raidz_ops || ops == &vdev_draid_ops))

						vd->vdev_autosit =

						    vdev_prop_default_numeric(VDEV_PROP_AUTOSIT);

					if (ops == &vdev_root_ops)

						vd->vdev_failfast =

						    vdev_prop_default_numeric(VDEV_PROP_FAILFAST);

					else

						vd->vdev_failfast = ZPROP_BOOLEAN_INHERIT;

					/*

					 * Add ourselves to the parent's list of children.

				@@ -3912,10 +3935,9 @@ vdev_load(vdev_t *vd)

						    vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast),

						    1, &failfast);

						if (error == 0) {

							vd->vdev_failfast = failfast & 1;

							vd->vdev_failfast = failfast;

						} else if (error == ENOENT) {

							vd->vdev_failfast = vdev_prop_default_numeric(

							    VDEV_PROP_FAILFAST);

							vd->vdev_failfast = ZPROP_BOOLEAN_INHERIT;

						} else {

							vdev_dbgmsg(vd,

							    "vdev_load: zap_lookup(top_zap=%llu) "

				@@ -4224,17 +4246,39 @@ vdev_remove_empty_log(vdev_t *vd, uint64_t txg)

					dmu_tx_commit(tx);

				}

				static void

				metaslab_sync_done_task(void *arg)

				{

					metaslab_t *msp = arg;

					spa_t *spa = msp->ms_group->mg_vd->vdev_spa;

					metaslab_sync_done(msp, spa_syncing_txg(spa));

				}

				void

				vdev_sync_dispatch(vdev_t *vd, uint64_t txg)

				{

					spa_t *spa = vd->vdev_spa;

					ASSERT(vdev_is_concrete(vd));

					for (metaslab_t *msp = txg_list_head(&vd->vdev_ms_list, TXG_CLEAN(txg));

					    msp; msp = txg_list_next(&vd->vdev_ms_list, msp, TXG_CLEAN(txg))) {

						(void) taskq_dispatch(spa->spa_sync_tq,

						    metaslab_sync_done_task, msp, TQ_SLEEP);

					}

				}

				void

				vdev_sync_done(vdev_t *vd, uint64_t txg)

				{

					metaslab_t *msp;

					boolean_t reassess = !txg_list_empty(&vd->vdev_ms_list, TXG_CLEAN(txg));

					ASSERT(vdev_is_concrete(vd));

					while ((msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)))

					    != NULL)

						metaslab_sync_done(msp, txg);

					taskq_wait(vd->vdev_spa->spa_sync_tq);

					while (txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)) != NULL)

						;

					if (reassess) {

						metaslab_sync_reassess(vd->vdev_mg);

				@@ -6093,6 +6137,29 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)

								    strval);

							}

							break;

						case VDEV_PROP_ALLOC_BIAS: {

							intval = fnvpair_value_uint64(elem);

							ASSERT3U(intval, !=, VDEV_BIAS_LOG);

							const char *bias_str =

							    (intval == VDEV_BIAS_SPECIAL) ?

							    VDEV_ALLOC_BIAS_SPECIAL :

							    (intval == VDEV_BIAS_DEDUP) ?

							    VDEV_ALLOC_BIAS_DEDUP : NULL;

							if (bias_str == NULL) {

								(void) zap_remove(mos, objid,

								    VDEV_TOP_ZAP_ALLOCATION_BIAS, tx);

							} else {

								VERIFY0(zap_update(mos, objid,

								    VDEV_TOP_ZAP_ALLOCATION_BIAS,

								    1, strlen(bias_str) + 1, bias_str, tx));

								spa_activate_allocation_classes(spa, tx);

							}

							spa_history_log_internal(spa, "vdev set", tx,

							    "vdev_guid=%llu: alloc_bias=%s",

							    (u_longlong_t)vdev_guid,

							    bias_str != NULL ? bias_str : "none");

							break;

						}

						default:

							/* normalize the property name */

							propname = vdev_prop_to_name(prop);

				@@ -6207,11 +6274,14 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)

								error = spa_vdev_alloc(spa, vdev_guid);

							break;

						case VDEV_PROP_FAILFAST:

							if (nvpair_value_uint64(elem, &intval) != 0) {

							if (nvpair_value_uint64(elem, &intval) != 0 ||

							    intval > ZPROP_BOOLEAN_INHERIT ||

							    (intval == ZPROP_BOOLEAN_INHERIT &&

							    vd->vdev_ops == &vdev_root_ops)) {

								error = EINVAL;

								break;

							}

							vd->vdev_failfast = intval & 1;

							vd->vdev_failfast = intval;

							break;

						case VDEV_PROP_SIT_OUT:

							/* Only expose this for a draid or raidz leaf */

				@@ -6319,6 +6389,53 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)

							}

							vd->vdev_scheduler = intval;

							break;

						case VDEV_PROP_ALLOC_BIAS:

							if (nvpair_value_uint64(elem, &intval) != 0) {

								error = EINVAL;

								break;

							}

							if (vd != vd->vdev_top || vd->vdev_top_zap == 0) {

								error = ENOTSUP;

								break;

							}

							/* Log vdevs are not supported: remove and re-add. */

							if (vd->vdev_islog) {

								error = ENOTSUP;

								break;

							}

							/* special/dedup needs allocation_classes feature */

							if (intval != VDEV_BIAS_NONE &&

							    ((intval != VDEV_BIAS_SPECIAL &&

							    intval != VDEV_BIAS_DEDUP) ||

							    !spa_feature_is_enabled(spa,

							    SPA_FEATURE_ALLOCATION_CLASSES))) {

								error = ENOTSUP;

								break;

							}

							/*

							 * Disallow converting the last normal vdev to

							 * avoid pool suspension on failed allocations.

							 */

							if (intval != VDEV_BIAS_NONE &&

							    vd->vdev_alloc_bias == VDEV_BIAS_NONE) {

								vdev_t *rvd = spa->spa_root_vdev;

								int normal = 0;

								for (uint64_t c = 0;

								    c < rvd->vdev_children; c++) {

									vdev_t *cvd = rvd->vdev_child[c];

									if (vdev_is_concrete(cvd) &&

									    cvd->vdev_alloc_bias ==

									    VDEV_BIAS_NONE &&

									    !cvd->vdev_noalloc)

										normal++;

								}

								if (normal <= 1) {

									error = ENOTSUP;

									break;

								}

							}

							vd->vdev_alloc_bias = (vdev_alloc_bias_t)intval;

							break;

						default:

							/* Most processing is done in vdev_props_set_sync */

							break;

				@@ -6350,7 +6467,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)

					spa_t *spa = vd->vdev_spa;

					objset_t *mos = spa->spa_meta_objset;

					int err = 0;

					uint64_t objid;

					uint64_t objid = 0;

					uint64_t vdev_guid;

					nvpair_t *elem = NULL;

					nvlist_t *nvprops = NULL;

				@@ -6369,9 +6486,15 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)

					nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);

					if (vdev_prop_get_objid(vd, &objid) != 0)

						return (SET_ERROR(EINVAL));

					ASSERT(objid != 0);

					/*

					 * A missing ZAP is normal for spare and L2ARC vdevs, which are

					 * not part of the main vdev tree and never get ZAPs allocated.

					 * Many properties are sourced directly from vdev_t fields and

					 * work fine without one; ZAP-backed properties will return their

					 * default values.  objid is set to 0 when absent and the few

					 * cases that call zap_lookup directly guard against this below.

					 */

					(void) vdev_prop_get_objid(vd, &objid);

					mutex_enter(&spa->spa_props_lock);

				@@ -6694,18 +6817,28 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)

								break;

							case VDEV_PROP_FAILFAST:

								src = ZPROP_SRC_LOCAL;

								strval = NULL;

								err = zap_lookup(mos, objid, nvpair_name(elem),

								if (objid != 0) {

									err = zap_lookup(mos, objid,

									    nvpair_name(elem),

									    sizeof (uint64_t), 1, &intval);

								} else {

									err = ENOENT;

								}

								if (err == ENOENT) {

									intval = vdev_prop_default_numeric(

									if (vd->vdev_ops == &vdev_root_ops)

										intval =

										    vdev_prop_default_numeric(

										    prop);

									else

										intval = ZPROP_BOOLEAN_INHERIT;

									err = 0;

								} else if (err) {

									break;

								}

								if (intval == vdev_prop_default_numeric(prop))

								if (intval == ZPROP_BOOLEAN_INHERIT ||

								    (vd->vdev_ops == &vdev_root_ops &&

								    intval == 1))

									src = ZPROP_SRC_DEFAULT;

								vdev_prop_add_list(outnvl, propname, strval,

				@@ -6746,6 +6879,17 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)

								vdev_prop_add_list(outnvl, propname, NULL,

								    boolval, src);

								break;

							case VDEV_PROP_ALLOC_BIAS:

								if (vd == vd->vdev_top) {

									vdev_prop_add_list(outnvl, propname,

									    NULL, vd->vdev_alloc_bias,

									    ZPROP_SRC_NONE);

								}

								continue;

							case VDEV_PROP_ROTATIONAL:

								vdev_prop_add_list(outnvl, propname, NULL,

								    !vd->vdev_nonrot, ZPROP_SRC_NONE);

								continue;

							case VDEV_PROP_CHECKSUM_N:

							case VDEV_PROP_CHECKSUM_T:

							case VDEV_PROP_IO_N:

				@@ -6771,6 +6915,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)

								/* FALLTHRU */

							case VDEV_PROP_USERPROP:

								/* User Properites */

								if (objid == 0)

									continue;

								src = ZPROP_SRC_LOCAL;

								err = zap_length(mos, objid, nvpair_name(elem),

									
										sys/contrib/openzfs/module/zfs/vdev_label.c
									
		+14
		
												View File
												
				@@ -467,6 +467,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,

					if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)))

						fnvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id);

					fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid);

					if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&

					    vd->vdev_top != NULL) {

						fnvlist_add_uint64(nv, ZPOOL_CONFIG_TOP_GUID,

						    vd->vdev_top->vdev_guid);

					}

					if (vd->vdev_path != NULL)

						fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path);

				@@ -493,6 +498,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,

						    vd->vdev_wholedisk);

					}

					if (vd->vdev_ops->vdev_op_leaf) {

						fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_ROTATIONAL,

						    !vd->vdev_nonrot);

					}

					if (vd->vdev_not_present && !(flags & VDEV_CONFIG_MISSING))

						fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1);

				@@ -502,6 +512,9 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,

					if (flags & VDEV_CONFIG_L2CACHE)

						fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift);

					if ((flags & VDEV_CONFIG_SPARE) && vd->vdev_asize != 0)

						fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE, vd->vdev_asize);

					if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&

					    vd == vd->vdev_top) {

						fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,

				@@ -1392,6 +1405,7 @@ vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *bootenv)

								    VB_NVLIST);

								break;

							}

							vbe->vbe_bootenv[sizeof (vbe->vbe_bootenv) - 1] = '\0';

							fnvlist_add_string(bootenv, FREEBSD_BOOTONCE, buf);

						}

									
										sys/contrib/openzfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h
									
		+9
		-9
	
												View File
												
				@@ -102,14 +102,14 @@

				#define	WVR(X) [w##X] "=w" (w##X)

				#define	UVR0_(REG, ...) [w##REG] "+&w" (w##REG)

				#define	UVR1_(_1, REG, ...) [w##REG] "+&w" (w##REG)

				#define	UVR2_(_1, _2, REG, ...) [w##REG] "+&w" (w##REG)

				#define	UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&w" (w##REG)

				#define	UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&w" (w##REG)

				#define	UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&w" (w##REG)

				#define	UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&w" (w##REG)

				#define	UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&w" (w##REG)

				#define	UVR0_(REG, ...) [w##REG] "+w" (w##REG)

				#define	UVR1_(_1, REG, ...) [w##REG] "+w" (w##REG)

				#define	UVR2_(_1, _2, REG, ...) [w##REG] "+w" (w##REG)

				#define	UVR3_(_1, _2, _3, REG, ...) [w##REG] "+w" (w##REG)

				#define	UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+w" (w##REG)

				#define	UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+w" (w##REG)

				#define	UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+w" (w##REG)

				#define	UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+w" (w##REG)

				#define	UVR0(r...) UVR0_(r)

				#define	UVR1(r...) UVR1_(r)

				@@ -120,7 +120,7 @@

				#define	UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)

				#define	UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)

				#define	UVR(X) [w##X] "+&w" (w##X)

				#define	UVR(X) [w##X] "+w" (w##X)

				#define	R_01(REG1, REG2, ...) REG1, REG2

				#define	_R_23(_0, _1, REG2, REG3, ...) REG2, REG3

sys/contrib/openzfs/module/zfs/zap.c

+1121 -1531

View File

File diff suppressed because it is too large Load Diff

sys/contrib/openzfs/module/zfs/zap_fat.c

+1458

View File

File diff suppressed because it is too large Load Diff

									
										sys/contrib/openzfs/module/zfs/zap_impl.c
									
		+558
		
												View File
												
				@@ -0,0 +1,558 @@

				// SPDX-License-Identifier: CDDL-1.0

				/*

				 * CDDL HEADER START

				 *

				 * The contents of this file are subject to the terms of the

				 * Common Development and Distribution License (the "License").

				 * You may not use this file except in compliance with the License.

				 *

				 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

				 * or https://opensource.org/licenses/CDDL-1.0.

				 * See the License for the specific language governing permissions

				 * and limitations under the License.

				 *

				 * When distributing Covered Code, include this CDDL HEADER in each

				 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.

				 * If applicable, add the following below this CDDL HEADER, with the

				 * fields enclosed by brackets "[]" replaced with your own identifying

				 * information: Portions Copyright [yyyy] [name of copyright owner]

				 *

				 * CDDL HEADER END

				 */

				/*

				 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.

				 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.

				 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.

				 * Copyright 2017 Nexenta Systems, Inc.

				 * Copyright (c) 2024, Klara, Inc.

				 * Copyright (c) 2026, TrueNAS.

				 */

				#include <sys/zfs_context.h>

				#include <sys/dmu.h>

				#include <sys/dnode.h>

				#include <sys/dsl_dataset.h>

				#include <sys/zap.h>

				#include <sys/zap_impl.h>

				static kmem_cache_t *zap_name_cache;

				static kmem_cache_t *zap_attr_cache;

				static kmem_cache_t *zap_name_long_cache;

				static kmem_cache_t *zap_attr_long_cache;

				/* Setup/teardown caches. Part of the public interface in zap.h. */

				void

				zap_init(void)

				{

					zap_name_cache = kmem_cache_create("zap_name",

					    sizeof (zap_name_t) + ZAP_MAXNAMELEN, 0, NULL, NULL,

					    NULL, NULL, NULL, 0);

					zap_attr_cache = kmem_cache_create("zap_attr_cache",

					    sizeof (zap_attribute_t) + ZAP_MAXNAMELEN,  0, NULL,

					    NULL, NULL, NULL, NULL, 0);

					zap_name_long_cache = kmem_cache_create("zap_name_long",

					    sizeof (zap_name_t) + ZAP_MAXNAMELEN_NEW, 0, NULL, NULL,

					    NULL, NULL, NULL, 0);

					zap_attr_long_cache = kmem_cache_create("zap_attr_long_cache",

					    sizeof (zap_attribute_t) + ZAP_MAXNAMELEN_NEW,  0, NULL,

					    NULL, NULL, NULL, NULL, 0);

				}

				void

				zap_fini(void)

				{

					kmem_cache_destroy(zap_name_cache);

					kmem_cache_destroy(zap_attr_cache);

					kmem_cache_destroy(zap_name_long_cache);

					kmem_cache_destroy(zap_attr_long_cache);

				}

				static int

				zap_normalize(zap_t *zap, const char *name, char *namenorm, int normflags,

				    size_t outlen)

				{

					ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY));

					size_t inlen = strlen(name) + 1;

					int err = 0;

					(void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,

					    normflags | U8_TEXTPREP_IGNORE_NULL | U8_TEXTPREP_IGNORE_INVALID,

					    U8_UNICODE_LATEST, &err);

					return (err);

				}

				zap_name_t *

				zap_name_alloc(zap_t *zap, boolean_t longname)

				{

					kmem_cache_t *cache = longname ? zap_name_long_cache : zap_name_cache;

					zap_name_t *zn = kmem_cache_alloc(cache, KM_SLEEP);

					zn->zn_zap = zap;

					zn->zn_normbuf_len = longname ? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN;

					return (zn);

				}

				zap_name_t *

				zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt)

				{

					size_t key_len = strlen(key) + 1;

					zap_name_t *zn = zap_name_alloc(zap, (key_len > ZAP_MAXNAMELEN));

					if (zap_name_init_str(zn, key, mt) != 0) {

						zap_name_free(zn);

						return (NULL);

					}

					return (zn);

				}

				zap_name_t *

				zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)

				{

					zap_name_t *zn = kmem_cache_alloc(zap_name_cache, KM_SLEEP);

					ASSERT0(zap->zap_normflags);

					zn->zn_zap = zap;

					zn->zn_key_intlen = sizeof (*key);

					zn->zn_key_orig = zn->zn_key_norm = key;

					zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;

					zn->zn_matchtype = 0;

					zn->zn_normbuf_len = ZAP_MAXNAMELEN;

					zn->zn_hash = zap_hash(zn);

					return (zn);

				}

				void

				zap_name_free(zap_name_t *zn)

				{

					if (zn->zn_normbuf_len == ZAP_MAXNAMELEN) {

						kmem_cache_free(zap_name_cache, zn);

					} else {

						ASSERT3U(zn->zn_normbuf_len, ==, ZAP_MAXNAMELEN_NEW);

						kmem_cache_free(zap_name_long_cache, zn);

					}

				}

				int

				zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt)

				{

					zap_t *zap = zn->zn_zap;

					size_t key_len = strlen(key) + 1;

					/* Make sure zn is allocated for longname if key is long */

					IMPLY(key_len > ZAP_MAXNAMELEN,

					    zn->zn_normbuf_len == ZAP_MAXNAMELEN_NEW);

					zn->zn_key_intlen = sizeof (*key);

					zn->zn_key_orig = key;

					zn->zn_key_orig_numints = key_len;

					zn->zn_matchtype = mt;

					zn->zn_normflags = zap->zap_normflags;

					/*

					 * If we're dealing with a case sensitive lookup on a mixed or

					 * insensitive fs, remove U8_TEXTPREP_TOUPPER or the lookup

					 * will fold case to all caps overriding the lookup request.

					 */

					if (mt & MT_MATCH_CASE)

						zn->zn_normflags &= ~U8_TEXTPREP_TOUPPER;

					if (zap->zap_normflags) {

						/*

						 * We *must* use zap_normflags because this normalization is

						 * what the hash is computed from.

						 */

						if (zap_normalize(zap, key, zn->zn_normbuf,

						    zap->zap_normflags, zn->zn_normbuf_len) != 0)

							return (SET_ERROR(ENOTSUP));

						zn->zn_key_norm = zn->zn_normbuf;

						zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;

					} else {

						if (mt != 0)

							return (SET_ERROR(ENOTSUP));

						zn->zn_key_norm = zn->zn_key_orig;

						zn->zn_key_norm_numints = zn->zn_key_orig_numints;

					}

					zn->zn_hash = zap_hash(zn);

					if (zap->zap_normflags != zn->zn_normflags) {

						/*

						 * We *must* use zn_normflags because this normalization is

						 * what the matching is based on.  (Not the hash!)

						 */

						if (zap_normalize(zap, key, zn->zn_normbuf,

						    zn->zn_normflags, zn->zn_normbuf_len) != 0)

							return (SET_ERROR(ENOTSUP));

						zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;

					}

					return (0);

				}

				boolean_t

				zap_match(zap_name_t *zn, const char *matchname)

				{

					boolean_t res = B_FALSE;

					ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));

					if (zn->zn_matchtype & MT_NORMALIZE) {

						size_t namelen = zn->zn_normbuf_len;

						char normbuf[ZAP_MAXNAMELEN];

						char *norm = normbuf;

						/*

						 * Cannot allocate this on-stack as it exceed the stack-limit of

						 * 1024.

						 */

						if (namelen > ZAP_MAXNAMELEN)

							norm = kmem_alloc(namelen, KM_SLEEP);

						if (zap_normalize(zn->zn_zap, matchname, norm,

						    zn->zn_normflags, namelen) != 0) {

							res = B_FALSE;

						} else {

							res = (strcmp(zn->zn_key_norm, norm) == 0);

						}

						if (norm != normbuf)

							kmem_free(norm, namelen);

					} else {

						res = (strcmp(zn->zn_key_orig, matchname) == 0);

					}

					return (res);

				}

				uint64_t

				zap_hash(zap_name_t *zn)

				{

					zap_t *zap = zn->zn_zap;

					uint64_t h = 0;

					if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) {

						ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY);

						h = *(uint64_t *)zn->zn_key_orig;

					} else {

						h = zap->zap_salt;

						ASSERT(h != 0);

						ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);

						if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {

							const uint64_t *wp = zn->zn_key_norm;

							ASSERT(zn->zn_key_intlen == 8);

							for (int i = 0; i < zn->zn_key_norm_numints;

							    wp++, i++) {

								uint64_t word = *wp;

								for (int j = 0; j < 8; j++) {

									h = (h >> 8) ^

									    zfs_crc64_table[(h ^ word) & 0xFF];

									word >>= NBBY;

								}

							}

						} else {

							const uint8_t *cp = zn->zn_key_norm;

							/*

							 * We previously stored the terminating null on

							 * disk, but didn't hash it, so we need to

							 * continue to not hash it.  (The

							 * zn_key_*_numints includes the terminating

							 * null for non-binary keys.)

							 */

							int len = zn->zn_key_norm_numints - 1;

							ASSERT(zn->zn_key_intlen == 1);

							for (int i = 0; i < len; cp++, i++) {

								h = (h >> 8) ^

								    zfs_crc64_table[(h ^ *cp) & 0xFF];

							}

						}

					}

					/*

					 * Don't use all 64 bits, since we need some in the cookie for

					 * the collision differentiator.  We MUST use the high bits,

					 * since those are the ones that we first pay attention to when

					 * choosing the bucket.

					 */

					h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);

					return (h);

				}

				static int

				zap_lock_impl(dnode_t *dn, dmu_buf_t *db, dmu_tx_t *tx,

				    krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)

				{

					ASSERT0(db->db_offset);

					objset_t *os = dmu_buf_get_objset(db);

					uint64_t obj = db->db_object;

					*zapp = NULL;

					if (DMU_OT_BYTESWAP(dn->dn_type) != DMU_BSWAP_ZAP)

						return (SET_ERROR(EINVAL));

					zap_t *zap = dmu_buf_get_user(db);

					if (zap == NULL) {

						zap = mzap_open(db);

						if (zap == NULL) {

							/*

							 * mzap_open() didn't like what it saw on-disk.

							 * Check for corruption!

							 */

							return (SET_ERROR(EIO));

						}

					}

					/*

					 * We're checking zap_ismicro without the lock held, in order to

					 * tell what type of lock we want.  Once we have some sort of

					 * lock, see if it really is the right type.  In practice this

					 * can only be different if it was upgraded from micro to fat,

					 * and micro wanted WRITER but fat only needs READER.

					 */

					krw_t lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;

					rw_enter(&zap->zap_rwlock, lt);

					if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {

						/* it was upgraded, now we only need reader */

						ASSERT(lt == RW_WRITER);

						ASSERT(RW_READER ==

						    ((!zap->zap_ismicro && fatreader) ? RW_READER : lti));

						rw_downgrade(&zap->zap_rwlock);

						lt = RW_READER;

					}

					zap->zap_objset = os;

					zap->zap_dnode = dn;

					if (lt == RW_WRITER)

						dmu_buf_will_dirty(db, tx);

					ASSERT3P(zap->zap_dbuf, ==, db);

					ASSERT(!zap->zap_ismicro ||

					    zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);

					if (zap->zap_ismicro && tx && adding &&

					    zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {

						uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;

						if (newsz > zap_get_micro_max_size(dmu_objset_spa(os))) {

							dprintf("upgrading obj %llu: num_entries=%u\n",

							    (u_longlong_t)obj, zap->zap_m.zap_num_entries);

							*zapp = zap;

							int err = mzap_upgrade(zapp, tx, 0);

							if (err != 0)

								rw_exit(&zap->zap_rwlock);

							return (err);

						}

						VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx));

						zap->zap_m.zap_num_chunks =

						    db->db_size / MZAP_ENT_LEN - 1;

						if (newsz > SPA_OLD_MAXBLOCKSIZE) {

							dsl_dataset_t *ds = dmu_objset_ds(os);

							if (!dsl_dataset_feature_is_active(ds,

							    SPA_FEATURE_LARGE_MICROZAP)) {

								/*

								 * A microzap just grew beyond the old limit

								 * for the first time, so we have to ensure the

								 * feature flag is activated.

								 * zap_get_micro_max_size() won't let us get

								 * here if the feature is not enabled, so we

								 * don't need any other checks beforehand.

								 *

								 * Since we're in open context, we can't

								 * activate the feature directly, so we instead

								 * flag it on the dataset for next sync.

								 */

								dsl_dataset_dirty(ds, tx);

								mutex_enter(&ds->ds_lock);

								ds->ds_feature_activation

								    [SPA_FEATURE_LARGE_MICROZAP] =

								    (void *)B_TRUE;

								mutex_exit(&ds->ds_lock);

							}

						}

					}

					*zapp = zap;

					return (0);

				}

				int

				zap_lock_by_dnode(dnode_t *dn, dmu_tx_t *tx,

				    krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,

				    zap_t **zapp)

				{

					dmu_buf_t *db;

					int err;

					err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);

					if (err != 0)

						return (err);

					err = zap_lock_impl(dn, db, tx, lti, fatreader, adding, zapp);

					if (err != 0)

						dmu_buf_rele(db, tag);

					else

						VERIFY(dnode_add_ref(dn, tag));

					return (err);

				}

				int

				zap_lock(objset_t *os, uint64_t obj, dmu_tx_t *tx,

				    krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,

				    zap_t **zapp)

				{

					dnode_t *dn;

					int err;

					err = dnode_hold(os, obj, tag, &dn);

					if (err != 0)

						return (err);

					err = zap_lock_by_dnode(dn, tx, lti, fatreader, adding, tag, zapp);

					dnode_rele(dn, tag);

					return (err);

				}

				void

				zap_unlock(zap_t *zap, const void *tag)

				{

					rw_exit(&zap->zap_rwlock);

					dnode_rele(zap->zap_dnode, tag);

					dmu_buf_rele(zap->zap_dbuf, tag);

				}

				int

				zap_lock_try_upgrade(zap_t *zap, dmu_tx_t *tx)

				{

					if (RW_WRITE_HELD(&zap->zap_rwlock))

						/* Already have writer, nothing to do. */

						return (1);

					/* Try to upgrade the lock in-place. */

					if (rw_tryupgrade(&zap->zap_rwlock)) {

						/*

						 * Got it, mark buffer dirty, since we only do that in

						 * zap_lock_impl() for writer.

						 */

						dmu_buf_will_dirty(zap->zap_dbuf, tx);

						return (1);

					}

					return (0);

				}

				void

				zap_lock_upgrade(zap_t *zap, dmu_tx_t *tx)

				{

					if (zap_lock_try_upgrade(zap, tx))

						return;

					/*

					 * It's safe to drop the lock here because we still have a hold on

					 * zap_dbuf, which prevents the dbuf being evicted and the zap_t being

					 * deallocated.

					 */

					rw_exit(&zap->zap_rwlock);

					rw_enter(&zap->zap_rwlock, RW_WRITER);

					dmu_buf_will_dirty(zap->zap_dbuf, tx);

				}

				void

				zap_evict_sync(void *dbu)

				{

					zap_t *zap = dbu;

					rw_destroy(&zap->zap_rwlock);

					if (zap->zap_ismicro)

						mze_destroy(zap);

					else

						mutex_destroy(&zap->zap_f.zap_num_entries_mtx);

					kmem_free(zap, sizeof (zap_t));

				}

				uint64_t

				zap_getflags(zap_t *zap)

				{

					if (zap->zap_ismicro)

						return (0);

					return (zap_f_phys(zap)->zap_flags);

				}

				int

				zap_hashbits(zap_t *zap)

				{

					if (zap_getflags(zap) & ZAP_FLAG_HASH64)

						return (48);

					else

						return (28);

				}

				uint32_t

				zap_maxcd(zap_t *zap)

				{

					if (zap_getflags(zap) & ZAP_FLAG_HASH64)

						return ((1<<16)-1);

					else

						return (-1U);

				}

				/* DNU byteswap callback for DMU_BSWAP_ZAP, see dmu_ot_byteswap. */

				void

				zap_byteswap(void *buf, size_t size)

				{

					uint64_t block_type = *(uint64_t *)buf;

					if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {

						/* ASSERT(magic == ZAP_LEAF_MAGIC); */

						mzap_byteswap(buf, size);

					} else {

						fzap_byteswap(buf, size);

					}

				}

				/*

				 * Cursor attribute allocator/free. Part of the public interface in zap.h,

				 * in this file to get access to the kmem caches.

				 */

				static zap_attribute_t *

				zap_attribute_alloc_impl(boolean_t longname)

				{

					zap_attribute_t *za;

					za = kmem_cache_alloc((longname)? zap_attr_long_cache : zap_attr_cache,

					    KM_SLEEP);

					za->za_name_len = (longname)? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN;

					return (za);

				}

				zap_attribute_t *

				zap_attribute_alloc(void)

				{

					return (zap_attribute_alloc_impl(B_FALSE));

				}

				zap_attribute_t *

				zap_attribute_long_alloc(void)

				{

					return (zap_attribute_alloc_impl(B_TRUE));

				}

				void

				zap_attribute_free(zap_attribute_t *za)

				{

					if (za->za_name_len == ZAP_MAXNAMELEN) {

						kmem_cache_free(zap_attr_cache, za);

					} else {

						ASSERT3U(za->za_name_len, ==, ZAP_MAXNAMELEN_NEW);

						kmem_cache_free(zap_attr_long_cache, za);

					}

				}

zfs: merge openzfs/zfs@a170134fe

Some files were not shown because too many files have changed in this diff Show More