zfs: merge openzfs/zfs@962e68865

Notable upstream pull request merges:
 #16307 1d43387dd zdb: Add -O option for -r to specify object-id
 #17965 a62c62120 ARC: Pre-convert zfs_arc_min_prefetch_ms
 #17970 d393166c5 ARC: Increase parallel eviction batching
 #17981 20f09eae4 ZIO: ZIO_STAGE_DDT_WRITE is a blocking stage
 #17983 ff47dd35e Fix ddtprune causing space leak
 #18015 86b064469 FreeBSD: Fix a potential null dereference
                  in zfs_freebsd_fsync() (already merged)
 #18020 ff47dd35e Ensure 64-bit `off_t` is used in user space
                  instead of `loff_t`
 #18028 09492e0f2 Reduce dataset buffers re-dirtying
 #18033 f72fd378c Defer async destroys on pool import
 #18043 3d76ba273 Improve async destroy processing timing
 #18044 46d6f1fe5 DDT: Move logs searches out of the lock
 #18047 ff5414406 DDT: Switch to using ZAP _by_dnode() interfaces
 #18048 3b1ff816b DDT: Add/use zap_lookup_length_uint64_by_dnode()
 #18055 22e89aca8 DDT: Fix compressed entry buffer size
 #18059 0550abd4b RAIDZ: Remove some excessive logging
 #18060 a83bb15fc Reduce minimal scrub/resilver times
 #18061 962e68865 Use reduced precision for scan times
 #18063 051a8c749 Bypass snprintf() in quota checks if no quotas set
 #18064 7ff329ac2 Fix rangelock test for growing block size

Obtained from:	OpenZFS
OpenZFS commit:	962e68865e

This commit is contained in:

Martin Matuska

2025-12-19 21:44:42 +01:00

parent f8cee1f2c2 962e68865e

commit 546d3d08e5

61 changed files with 1344 additions and 721 deletions

									
										sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh
									
Vendored

		+14
		
												View File
												
				@@ -13,6 +13,20 @@ set -eu

				# handle on what the timeout value should be.

				(while [ 1 ] ; do sleep 30 && echo "[watchdog: $(ps -eo cmd --sort=-pcpu  | head -n 2 | tail -n 1)}')]"; done) &

				# The default 'azure.archive.ubuntu.com' mirrors can be really slow.

				# Prioritize the official Ubuntu mirrors.

				#

				# The normal apt-mirrors.txt will look like:

				#

				# http://azure.archive.ubuntu.com/ubuntu/       priority:1

				# https://archive.ubuntu.com/ubuntu/    priority:2

				# https://security.ubuntu.com/ubuntu/   priority:3

				#

				# Just delete the 'azure.archive.ubuntu.com' line.

				sudo sed -i '/azure.archive.ubuntu.com/d' /etc/apt/apt-mirrors.txt

				echo "Using mirrors:"

				cat /etc/apt/apt-mirrors.txt

				# install needed packages

				export DEBIAN_FRONTEND="noninteractive"

				sudo apt-get -y update

									
										sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh
									
Vendored

		-7
	
												View File
												
				@@ -95,13 +95,6 @@ case "$OS" in

				    KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz"

				    NIC="rtl8139"

				    ;;

				  freebsd14-2r)

				    FreeBSD="14.2-RELEASE"

				    OSNAME="FreeBSD $FreeBSD"

				    OSv="freebsd14.0"

				    URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz"

				    KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz"

				    ;;

				  freebsd14-3r)

				    FreeBSD="14.3-RELEASE"

				    OSNAME="FreeBSD $FreeBSD"

									
										sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml
									
Vendored

		-5
	
												View File
												
				@@ -60,20 +60,16 @@ jobs:

				        ref: ${{ github.event.pull_request.head.sha }}

				    - name: Setup QEMU

				      timeout-minutes: 10

				      run: .github/workflows/scripts/qemu-1-setup.sh

				    - name: Start build machine

				      timeout-minutes: 10

				      run: .github/workflows/scripts/qemu-2-start.sh ${{ matrix.os }}

				    - name: Install dependencies

				      timeout-minutes: 20

				      run: |

				        .github/workflows/scripts/qemu-3-deps.sh ${{ matrix.os }}

				    - name: Build modules or Test repo

				      timeout-minutes: 60

				      run: |

				        set -e

				        if [ "${{ github.event.inputs.test_type }}" == "Test repo" ] ; then

				@@ -94,7 +90,6 @@ jobs:

				    - name: Prepare artifacts

				      if: always()

				      timeout-minutes: 10

				      run: |

				        rsync -a zfs@vm0:/tmp/repo /tmp || true

				        .github/workflows/scripts/replace-dupes-with-symlinks.sh /tmp/repo

									
										sys/contrib/openzfs/.github/workflows/zfs-qemu.yml
									
Vendored

		+4
		-4
	
												View File
												
				@@ -46,7 +46,7 @@ jobs:

				            os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "debian13", "fedora41", "fedora42", "fedora43", "ubuntu22", "ubuntu24"]'

				            ;;

				          freebsd)

				            os_selection='["freebsd13-5r", "freebsd14-2r", "freebsd14-3r", "freebsd13-5s", "freebsd14-3s", "freebsd15-0s", "freebsd16-0c"]'

				            os_selection='["freebsd13-5r", "freebsd14-3r", "freebsd13-5s", "freebsd14-3s", "freebsd15-0s", "freebsd16-0c"]'

				            ;;

				          *)

				            # default list

				@@ -76,7 +76,7 @@ jobs:

				        # debian:  debian12, debian13, ubuntu22, ubuntu24

				        # misc:    archlinux, tumbleweed

				        # FreeBSD variants of november 2025:

				        # FreeBSD Release: freebsd13-5r, freebsd14-2r, freebsd14-3r

				        # FreeBSD Release: freebsd13-5r, freebsd14-3r, freebsd15-0r

				        # FreeBSD Stable:  freebsd13-5s, freebsd14-3s, freebsd15-0s

				        # FreeBSD Current: freebsd16-0c

				        os: ${{ fromJson(needs.test-config.outputs.test_os) }}

				@@ -87,7 +87,7 @@ jobs:

				        ref: ${{ github.event.pull_request.head.sha }}

				    - name: Setup QEMU

				      timeout-minutes: 20

				      timeout-minutes: 60

				      run: |

				        # Add a timestamp to each line to debug timeouts

				        while IFS=$'\n' read -r line; do

				@@ -99,7 +99,7 @@ jobs:

				      run: .github/workflows/scripts/qemu-2-start.sh ${{ matrix.os }}

				    - name: Install dependencies

				      timeout-minutes: 20

				      timeout-minutes: 60

				      run: .github/workflows/scripts/qemu-3-deps.sh ${{ matrix.os }} ${{ github.event.inputs.fedora_kernel_ver }}

				    - name: Build modules

sys/contrib/openzfs/META

+1 -1

View File

@@ -6,5 +6,5 @@ Release:       1
 Release-Tags:  relext
 License:       CDDL
 Author:        OpenZFS
 Linux-Maximum: 6.17
 Linux-Maximum: 6.18
 Linux-Minimum: 4.18

									
										sys/contrib/openzfs/cmd/zdb/zdb.c
									
		+8
		-3
	
												View File
												
				@@ -739,13 +739,14 @@ usage(void)

					    "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"

					    "\t%s -O [-K <key>] <dataset> <path>\n"

					    "\t%s -r [-K <key>] <dataset> <path> <destination>\n"

					    "\t%s -r [-K <key>] -O <dataset> <object-id> <destination>\n"

					    "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"

					    "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"

					    "\t%s -E [-A] word0:word1:...:word15\n"

					    "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "

					    "<poolname>\n\n",

					    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,

					    cmdname, cmdname, cmdname, cmdname, cmdname);

					    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);

					(void) fprintf(stderr, "    Dataset name must include at least one "

					    "separator character '/' or '@'\n");

				@@ -9956,7 +9957,7 @@ main(int argc, char **argv)

					 * which imports the pool to the namespace if it's

					 * not in the cachefile.

					 */

					if (dump_opt['O']) {

					if (dump_opt['O'] && !dump_opt['r']) {

						if (argc != 2)

							usage();

						dump_opt['v'] = verbose + 3;

				@@ -9969,7 +9970,11 @@ main(int argc, char **argv)

						if (argc != 3)

							usage();

						dump_opt['v'] = verbose;

						error = dump_path(argv[0], argv[1], &object);

						if (dump_opt['O']) {

							object = strtoull(argv[1], NULL, 0);

						} else {

							error = dump_path(argv[0], argv[1], &object);

						}

						if (error != 0)

							fatal("internal error: %s", strerror(error));

					}

sys/contrib/openzfs/config/kernel-kasan-enabled.m4

+23

View File

@@ -0,0 +1,23 @@
 dnl #
 dnl # 6.18: some architectures and config option causes the kasan_ inline
 dnl #       functions to reference the GPL-only symbol 'kasan_flag_enabled',
 dnl #       breaking the build. Detect this and work
 dnl #       around it.
 AC_DEFUN([ZFS_AC_KERNEL_SRC_KASAN_ENABLED], [
 	ZFS_LINUX_TEST_SRC([kasan_enabled], [
 		#include <linux/kasan.h>
 	], [
 		kasan_enabled();
 	], [], [ZFS_META_LICENSE])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_KASAN_ENABLED], [
 	AC_MSG_CHECKING([whether kasan_enabled() is GPL-only])
 	ZFS_LINUX_TEST_RESULT([kasan_enabled_license], [
 		AC_MSG_RESULT(no)
 	], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_KASAN_ENABLED_GPL_ONLY, 1,
 		    [kasan_enabled() is GPL-only])
 	])
 ])

sys/contrib/openzfs/config/kernel.m4

View File

@@ -138,6 +138,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE
 	ZFS_AC_KERNEL_SRC_NAMESPACE
 	ZFS_AC_KERNEL_SRC_INODE_GENERIC_DROP
 	ZFS_AC_KERNEL_SRC_KASAN_ENABLED
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -260,6 +261,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_SOPS_FREE_INODE
 	ZFS_AC_KERNEL_NAMESPACE
 	ZFS_AC_KERNEL_INODE_GENERIC_DROP
 	ZFS_AC_KERNEL_KASAN_ENABLED
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_CPU_HAS_FEATURE

sys/contrib/openzfs/config/user-largefile.m4

+21

View File

@@ -0,0 +1,21 @@
 dnl #
 dnl # ZFS_AC_CONFIG_USER_LARGEFILE
 dnl #
 dnl # Ensure off_t is 64-bit for large file support in userspace.
 dnl # This is required for OpenZFS to handle files larger than 2GB.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER_LARGEFILE], [
 	AC_SYS_LARGEFILE
 	AC_CHECK_SIZEOF([off_t])
 	AC_MSG_CHECKING([for 64-bit off_t])
 	AS_IF([test "$ac_cv_sizeof_off_t" -ne 8], [
 		AC_MSG_RESULT([no, $ac_cv_sizeof_off_t bytes])
 		AC_MSG_FAILURE([
 *** OpenZFS userspace requires 64-bit off_t support for large files.
 *** Please ensure your system supports large file operations.
 *** Current off_t size: $ac_cv_sizeof_off_t bytes])
 	], [
 		AC_MSG_RESULT([yes, $ac_cv_sizeof_off_t bytes])
 	])
 ])

sys/contrib/openzfs/config/user.m4

View File

@@ -3,6 +3,7 @@ dnl # Default ZFS user configuration
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER], [
 	ZFS_AC_CONFIG_USER_GETTEXT
 	ZFS_AC_CONFIG_USER_LARGEFILE
 	ZFS_AC_CONFIG_USER_MOUNT_HELPER
 	ZFS_AC_CONFIG_USER_SYSVINIT
 	ZFS_AC_CONFIG_USER_DRACUT

sys/contrib/openzfs/contrib/initramfs/scripts/zfs

+364 -299

View File

File diff suppressed because it is too large Load Diff

sys/contrib/openzfs/etc/default/zfs.in

View File

@@ -60,6 +60,10 @@ ZPOOL_IMPORT_ALL_VISIBLE='no'
 # This is a space separated list.
 #ZFS_POOL_EXCEPTIONS="test2"
 # Additional important (operating system) file systems to mount beside
 # the root file system.
 #ZFS_INITRD_ADDITIONAL_DATASETS="rpool/usr rpool/var rpool/var/spool"
 # Should the datasets be mounted verbosely?
 # A mount counter will be used when mounting if set to 'yes'.
 VERBOSE_MOUNT='no'

sys/contrib/openzfs/etc/zfs/zfs-functions.in

+12 -12

View File

@@ -26,13 +26,13 @@ fi
 # Of course the functions we need are called differently
 # on different distributions - it would be way too easy
 # otherwise!!
 if type log_failure_msg > /dev/null 2>&1 ; then
 if command -v log_failure_msg > /dev/null 2>&1 ; then
 	# LSB functions - fall through
 	zfs_log_begin_msg() { log_begin_msg "$1"; }
 	zfs_log_end_msg() { log_end_msg "$1"; }
 	zfs_log_failure_msg() { log_failure_msg "$1"; }
 	zfs_log_progress_msg() { log_progress_msg "$1"; }
 elif type success > /dev/null 2>&1 ; then
 elif command -v success > /dev/null 2>&1 ; then
 	# Fedora/RedHat functions
 	zfs_set_ifs() {
 		# For some reason, the init function library have a problem
@@ -64,7 +64,7 @@ elif type success > /dev/null 2>&1 ; then
 		zfs_set_ifs "$TMP_IFS"
 	}
 	zfs_log_progress_msg() { printf "%s" "$""$1"; }
 elif type einfo > /dev/null 2>&1 ; then
 elif command -v einfo > /dev/null 2>&1 ; then
 	# Gentoo functions
 	zfs_log_begin_msg() { ebegin "$1"; }
 	zfs_log_end_msg() { eend "$1"; }
@@ -109,7 +109,7 @@ fi
 # ----------------------------------------------------
 export ZFS ZED ZPOOL ZPOOL_CACHE ZFS_LOAD_KEY ZFS_UNLOAD_KEY ZFS_MOUNT ZFS_UNMOUNT \
     ZFS_SHARE ZFS_UNSHARE
     ZFS_SHARE ZFS_UNSHARE ZFS_POOL_EXCEPTIONS ZFS_INITRD_ADDITIONAL_DATASETS
 zfs_action()
 {
@@ -140,7 +140,7 @@ zfs_daemon_start()
 	local PIDFILE="$1";	shift
 	local DAEMON_BIN="$1";	shift
 	if type start-stop-daemon > /dev/null 2>&1 ; then
 	if command -v start-stop-daemon > /dev/null 2>&1 ; then
 		# LSB functions
 		start-stop-daemon --start --quiet --pidfile "$PIDFILE" \
 		    --exec "$DAEMON_BIN" --test > /dev/null || return 1
@@ -157,7 +157,7 @@ zfs_daemon_start()
 		then
 			ln -sf "$PIDFILE" /run/sendsigs.omit.d/zed
 		fi
 	elif type daemon > /dev/null 2>&1 ; then
 	elif command -v daemon > /dev/null 2>&1 ; then
 		# Fedora/RedHat functions
 		# shellcheck disable=SC2086
 		daemon --pidfile "$PIDFILE" "$DAEMON_BIN" "$@"
@@ -182,7 +182,7 @@ zfs_daemon_stop()
 	local DAEMON_BIN="$2"
 	local DAEMON_NAME="$3"
 	if type start-stop-daemon > /dev/null 2>&1 ; then
 	if command -v start-stop-daemon > /dev/null 2>&1 ; then
 		# LSB functions
 		start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 \
 		    --pidfile "$PIDFILE" --name "$DAEMON_NAME"
@@ -190,7 +190,7 @@ zfs_daemon_stop()
 		[ "$ret" = 0 ] && rm -f "$PIDFILE"
 		return "$ret"
 	elif type killproc > /dev/null 2>&1 ; then
 	elif command -v killproc > /dev/null 2>&1 ; then
 		# Fedora/RedHat functions
 		killproc -p "$PIDFILE" "$DAEMON_NAME"
 		ret="$?"
@@ -212,11 +212,11 @@ zfs_daemon_status()
 	local DAEMON_BIN="$2"
 	local DAEMON_NAME="$3"
 	if type status_of_proc > /dev/null 2>&1 ; then
 	if command -v status_of_proc > /dev/null 2>&1 ; then
 		# LSB functions
 		status_of_proc "$DAEMON_NAME" "$DAEMON_BIN"
 		return $?
 	elif type status > /dev/null 2>&1 ; then
 	elif command -v status > /dev/null 2>&1 ; then
 		# Fedora/RedHat functions
 		status -p "$PIDFILE" "$DAEMON_NAME"
 		return $?
@@ -233,12 +233,12 @@ zfs_daemon_reload()
 	local PIDFILE="$1"
 	local DAEMON_NAME="$2"
 	if type start-stop-daemon > /dev/null 2>&1 ; then
 	if command -v start-stop-daemon > /dev/null 2>&1 ; then
 		# LSB functions
 		start-stop-daemon --stop --signal 1 --quiet \
 		    --pidfile "$PIDFILE" --name "$DAEMON_NAME"
 		return $?
 	elif type killproc > /dev/null 2>&1 ; then
 	elif command -v killproc > /dev/null 2>&1 ; then
 		# Fedora/RedHat functions
 		killproc -p "$PIDFILE" "$DAEMON_NAME" -HUP
 		return $?

									
										sys/contrib/openzfs/include/sys/ddt.h
									
		+7
		-4
	
												View File
												
				@@ -213,6 +213,7 @@ typedef enum {

				#define	DDE_FLAG_LOADED		(1 << 0)	/* entry ready for use */

				#define	DDE_FLAG_OVERQUOTA	(1 << 1)	/* entry unusable, no space */

				#define	DDE_FLAG_LOGGED		(1 << 2)	/* loaded from log */

				#define	DDE_FLAG_FROM_FLUSHING	(1 << 3)	/* loaded from flushing log */

				/*

				 * Additional data to support entry update or repair. This is fixed size

				@@ -280,13 +281,14 @@ typedef struct {

				 */

				typedef struct {

					kmutex_t	ddt_lock;	/* protects changes to all fields */

					avl_tree_t	ddt_tree;	/* "live" (changed) entries this txg */

					avl_tree_t	ddt_log_tree;	/* logged entries */

					avl_tree_t	ddt_repair_tree;	/* entries being repaired */

					ddt_log_t	ddt_log[2];		/* active/flushing logs */

					/*

					 * Log trees are stable during I/O, and only modified during sync

					 * with exclusive access.

					 */

					ddt_log_t	ddt_log[2] ____cacheline_aligned; /* logged entries */

					ddt_log_t	*ddt_log_active;	/* pointers into ddt_log */

					ddt_log_t	*ddt_log_flushing;	/* swapped when flush starts */

				@@ -324,6 +326,7 @@ typedef struct {

					/* per-type/per-class entry store objects */

					uint64_t	ddt_object[DDT_TYPES][DDT_CLASSES];

					dnode_t		*ddt_object_dnode[DDT_TYPES][DDT_CLASSES];

					/* object ids for stored, logged and per-type/per-class stats */

					uint64_t	ddt_stat_object;

									
										sys/contrib/openzfs/include/sys/ddt_impl.h
									
		+14
		-17
	
												View File
												
				@@ -69,8 +69,8 @@ extern "C" {

				 * the live tree.

				 */

				typedef struct {

					ddt_key_t	ddle_key;	/* ddt_log_tree key */

					avl_node_t	ddle_node;	/* ddt_log_tree node */

					ddt_key_t	ddle_key;	/* ddl_tree key */

					avl_node_t	ddle_node;	/* ddl_tree node */

					ddt_type_t	ddle_type;	/* storage type */

					ddt_class_t	ddle_class;	/* storage class */

				@@ -163,21 +163,18 @@ typedef struct {

					int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,

					    boolean_t prehash);

					int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);

					int (*ddt_op_lookup)(objset_t *os, uint64_t object,

					    const ddt_key_t *ddk, void *phys, size_t psize);

					int (*ddt_op_contains)(objset_t *os, uint64_t object,

					    const ddt_key_t *ddk);

					void (*ddt_op_prefetch)(objset_t *os, uint64_t object,

					    const ddt_key_t *ddk);

					void (*ddt_op_prefetch_all)(objset_t *os, uint64_t object);

					int (*ddt_op_update)(objset_t *os, uint64_t object,

					    const ddt_key_t *ddk, const void *phys, size_t psize,

					int (*ddt_op_lookup)(dnode_t *dn, const ddt_key_t *ddk,

					    void *phys, size_t psize);

					int (*ddt_op_contains)(dnode_t *dn, const ddt_key_t *ddk);

					void (*ddt_op_prefetch)(dnode_t *dn, const ddt_key_t *ddk);

					void (*ddt_op_prefetch_all)(dnode_t *dn);

					int (*ddt_op_update)(dnode_t *dn, const ddt_key_t *ddk,

					    const void *phys, size_t psize, dmu_tx_t *tx);

					int (*ddt_op_remove)(dnode_t *dn, const ddt_key_t *ddk,

					    dmu_tx_t *tx);

					int (*ddt_op_remove)(objset_t *os, uint64_t object,

					    const ddt_key_t *ddk, dmu_tx_t *tx);

					int (*ddt_op_walk)(objset_t *os, uint64_t object, uint64_t *walk,

					    ddt_key_t *ddk, void *phys, size_t psize);

					int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);

					int (*ddt_op_walk)(dnode_t *dn, uint64_t *walk, ddt_key_t *ddk,

					    void *phys, size_t psize);

					int (*ddt_op_count)(dnode_t *dn, uint64_t *count);

				} ddt_ops_t;

				extern const ddt_ops_t ddt_zap_ops;

				@@ -193,7 +190,7 @@ extern boolean_t ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl,

				    ddt_lightweight_entry_t *ddlwe);

				extern boolean_t ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,

				    ddt_lightweight_entry_t *ddlwe);

				    ddt_lightweight_entry_t *ddlwe, boolean_t *from_flushing);

				extern boolean_t ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl,

				    const ddt_key_t *ddk);

									
										sys/contrib/openzfs/include/sys/dsl_scan.h
									
		+1
		-1
	
												View File
												
				@@ -157,7 +157,7 @@ typedef struct dsl_scan {

					/* per txg statistics */

					uint64_t scn_visited_this_txg;	/* total bps visited this txg */

					uint64_t scn_dedup_frees_this_txg;	/* dedup bps freed this txg */

					uint64_t scn_async_frees_this_txg; /* async frees (dedup/clone/gang) */

					uint64_t scn_holes_this_txg;

					uint64_t scn_lt_min_this_txg;

					uint64_t scn_gt_max_this_txg;

									
										sys/contrib/openzfs/include/sys/zap.h
									
		+6
		
												View File
												
				@@ -226,6 +226,9 @@ int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,

				    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);

				int zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);

				int zap_lookup_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,

				    uint64_t *actual_num_integers);

				int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);

				int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);

				int zap_prefetch_object(objset_t *os, uint64_t zapobj);

				@@ -288,6 +291,8 @@ int zap_length(objset_t *ds, uint64_t zapobj, const char *name,

				    uint64_t *integer_size, uint64_t *num_integers);

				int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,

				    int key_numints, uint64_t *integer_size, uint64_t *num_integers);

				int zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				    int key_numints, uint64_t *integer_size, uint64_t *num_integers);

				/*

				 * Remove the specified attribute.

				@@ -309,6 +314,7 @@ int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				 * object.

				 */

				int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);

				int zap_count_by_dnode(dnode_t *dn, uint64_t *count);

				/*

				 * Returns (in name) the name of the entry whose (value & mask)

									
										sys/contrib/openzfs/include/sys/zap_impl.h
									
		+2
		-1
	
												View File
												
				@@ -219,7 +219,8 @@ void fzap_byteswap(void *buf, size_t size);

				int fzap_count(zap_t *zap, uint64_t *count);

				int fzap_lookup(zap_name_t *zn,

				    uint64_t integer_size, uint64_t num_integers, void *buf,

				    char *realname, int rn_len, boolean_t *normalization_conflictp);

				    char *realname, int rn_len, boolean_t *normalization_conflictp,

				    uint64_t *actual_num_integers);

				void fzap_prefetch(zap_name_t *zn);

				int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,

				    const void *val, const void *tag, dmu_tx_t *tx);

									
										sys/contrib/openzfs/include/sys/zfs_file.h
									
		+11
		
												View File
												
				@@ -25,6 +25,17 @@

				#include <sys/zfs_context.h>

				/*

				 * loff_t is a Linux kernel/VFS type. glibc and musl expose it to user

				 * space via <fcntl.h>, but FreeBSD libc does not. For FreeBSD user

				 * space we map loff_t to off_t so the shared interfaces that use the

				 * loff_t name still compile. The FreeBSD kernel gets loff_t from its

				 * own linux-compat headers.

				 */

				#if !defined(_KERNEL) && defined(__FreeBSD__)

				typedef off_t loff_t;

				#endif

				#ifndef _KERNEL

				typedef struct zfs_file {

					int f_fd;

									
										sys/contrib/openzfs/include/sys/zio_impl.h
									
		+2
		-1
	
												View File
												
				@@ -278,7 +278,8 @@ enum zio_stage {

					ZIO_VDEV_IO_STAGES)

				#define	ZIO_BLOCKING_STAGES			\

					(ZIO_STAGE_DVA_ALLOCATE |		\

					(ZIO_STAGE_DDT_WRITE |			\

					ZIO_STAGE_DVA_ALLOCATE |		\

					ZIO_STAGE_DVA_CLAIM |			\

					ZIO_STAGE_VDEV_IO_START)

									
										sys/contrib/openzfs/lib/libspl/include/sys/kstat.h
									
		+1
		-1
	
												View File
												
				@@ -86,6 +86,6 @@ extern void kstat_delete(kstat_t *);

				extern void kstat_set_raw_ops(kstat_t *ksp,

				    int (*headers)(char *buf, size_t size),

				    int (*data)(char *buf, size_t size, void *data),

				    void *(*addr)(kstat_t *ksp, loff_t index));

				    void *(*addr)(kstat_t *ksp, off_t index));

				#endif	/* _SYS_KSTAT_H */

									
										sys/contrib/openzfs/lib/libspl/include/sys/types.h
									
		-15
	
												View File
												
				@@ -50,19 +50,4 @@ typedef int		projid_t;

				#include <sys/param.h> /* for NBBY */

				#ifdef __FreeBSD__

				typedef off_t loff_t;

				#endif

				/*

				 * On musl, loff_t is a macro within fcntl.h when _GNU_SOURCE is defined.

				 * If no macro is defined, a typedef fallback is provided.

				 */

				#if defined(__linux__) && !defined(__GLIBC__)

				#include <fcntl.h>

				#ifndef loff_t

				typedef off_t loff_t;

				#endif

				#endif

				#endif

									
										sys/contrib/openzfs/lib/libspl/kstat.c
									
		+1
		-1
	
												View File
												
				@@ -58,7 +58,7 @@ void

				kstat_set_raw_ops(kstat_t *ksp,

				    int (*headers)(char *buf, size_t size),

				    int (*data)(char *buf, size_t size, void *data),

				    void *(*addr)(kstat_t *ksp, loff_t index))

				    void *(*addr)(kstat_t *ksp, off_t index))

				{

					(void) ksp, (void) headers, (void) data, (void) addr;

				}

sys/contrib/openzfs/man/man4/zfs.4

+26 -5

View File

@@ -771,6 +771,12 @@ Number ARC headers to evict per sub-list before proceeding to another sub-list.
 This batch-style operation prevents entire sub-lists from being evicted at once
 but comes at a cost of additional unlocking and locking.
 .
 .It Sy zfs_arc_evict_batches_limit Ns = Ns Sy 5 Pq uint
 Number of
 .Sy zfs_arc_evict_batch_limit
 batches to process per parallel eviction task under heavy load to reduce number
 of context switches.
 .
 .It Sy zfs_arc_evict_threads Ns = Ns Sy 0 Pq int
 Sets the number of ARC eviction threads to be used.
 .Pp
@@ -1462,8 +1468,13 @@ Enable/disable the processing of the free_bpobj object.
 .It Sy zfs_async_block_max_blocks Ns = Ns Sy UINT64_MAX Po unlimited Pc Pq u64
 Maximum number of blocks freed in a single TXG.
 .
 .It Sy zfs_max_async_dedup_frees Ns = Ns Sy 100000 Po 10^5 Pc Pq u64
 Maximum number of dedup blocks freed in a single TXG.
 .It Sy zfs_max_async_dedup_frees Ns = Ns Sy 250000 Pq u64
 Maximum number of dedup, clone or gang blocks freed in a single TXG.
 These frees may require additional I/O, making them more expensive.
 .
 .It Sy zfs_async_free_zio_wait_interval Ns = Ns Sy 2000 Pq u64
 After freeing this many dedup, clone or gang blocks wait for all pending
 I/Os to complete before continuing.
 .
 .It Sy zfs_vdev_async_read_max_active Ns = Ns Sy 3 Pq uint
 Maximum asynchronous read I/O operations active to each device.
@@ -1733,7 +1744,7 @@ but we chose the more conservative approach of not setting it,
 so that there is no possibility of
 leaking space in the "partial temporary" failure case.
 .
 .It Sy zfs_free_min_time_ms Ns = Ns Sy 1000 Ns ms Po 1s Pc Pq uint
 .It Sy zfs_free_min_time_ms Ns = Ns Sy 500 Ns ms Po 1s Pc Pq uint
 During a
 .Nm zfs Cm destroy
 operation using the
@@ -1761,6 +1772,16 @@ Blocks that go to the special vdevs are still written indirectly, as with
 .Sy logbias Ns = Ns Sy throughput .
 This parameter is ignored if an SLOG is present.
 .
 .It Sy zfs_import_defer_txgs Ns = Ns Sy 5 Pq uint
 Number of transaction groups to wait after pool import before starting
 background work such as asynchronous block freeing
 .Pq from snapshots, clones, and deduplication
 and scrub or resilver operations.
 This allows the pool import and filesystem mounting to complete more quickly
 without interference from background activities.
 The default value of 5 transaction groups typically provides sufficient time
 for import and mount operations to complete on most systems.
 .
 .It Sy zfs_initialize_value Ns = Ns Sy 16045690984833335022 Po 0xDEADBEEFDEADBEEE Pc Pq u64
 Pattern written to vdev free space by
 .Xr zpool-initialize 8 .
@@ -2095,7 +2116,7 @@ even if the
 .Sy resilver_defer
 feature is enabled.
 .
 .It Sy zfs_resilver_min_time_ms Ns = Ns Sy 3000 Ns ms Po 3 s Pc Pq uint
 .It Sy zfs_resilver_min_time_ms Ns = Ns Sy 1500 Ns ms Pq uint
 Resilvers are processed by the sync thread.
 While resilvering, it will spend at least this much time
 working on a resilver between TXG flushes.
@@ -2112,7 +2133,7 @@ in order to verify the checksums of all blocks which have been
 copied during the expansion.
 This is enabled by default and strongly recommended.
 .
 .It Sy zfs_scrub_min_time_ms Ns = Ns Sy 1000 Ns ms Po 1 s Pc Pq uint
 .It Sy zfs_scrub_min_time_ms Ns = Ns Sy 750 Ns ms Pq uint
 Scrubs are processed by the sync thread.
 While scrubbing, it will spend at least this much time
 working on a scrub between TXG flushes.

sys/contrib/openzfs/man/man8/zdb.8

+11

View File

@@ -84,6 +84,11 @@
 .Op Fl K Ar key
 .Ar dataset path destination
 .Nm
 .Fl r
 .Fl O
 .Op Fl K Ar key
 .Ar dataset object-id destination
 .Nm
 .Fl R
 .Op Fl A
 .Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
@@ -376,6 +381,12 @@ Specified
 .Ar path
 must be relative to the root of
 .Ar dataset .
 When used with
 .Fl O ,
 the
 .Ar path
 argument is interpreted as an object identifier,
 not a path.
 This option can be combined with
 .Fl v
 for increasing verbosity.

sys/contrib/openzfs/module/Kbuild.in

View File

@@ -433,6 +433,7 @@ ZFS_OBJS := \
 ZFS_OBJS_OS := \
 	abd_os.o \
 	arc_os.o \
 	kasan_compat.o \
 	mmp_os.o \
 	policy.o \
 	qat.o \

									
										sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode_os.c
									
		+4
		-3
	
												View File
												
				@@ -121,11 +121,12 @@ zfs_rangelock_cb(zfs_locked_range_t *new, void *arg)

					}

					/*

					 * If we need to grow the block size then lock the whole file range.

					 * If we might grow the block size then lock the whole file range.

					 * NB: this test should match the check in zfs_grow_blocksize

					 */

					uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);

					if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||

					    zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {

					if (zp->z_size <= zp->z_blksz && end_size > zp->z_blksz &&

					    (!ISP2(zp->z_blksz) || zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {

						new->lr_offset = 0;

						new->lr_length = UINT64_MAX;

					}

									
										sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
									
		+23
		-10
	
												View File
												
				@@ -888,6 +888,14 @@ abd_iter_advance(struct abd_iter *aiter, size_t amount)

					}

				}

				#ifndef nth_page

				/*

				 * Since 6.18 nth_page() no longer exists, and is no longer required to iterate

				 * within a single SG entry, so we replace it with a simple addition.

				 */

				#define	nth_page(p, n)	((p)+(n))

				#endif

				/*

				 * Map the current chunk into aiter. This can be safely called when the aiter

				 * has already exhausted, in which case this does nothing.

				@@ -915,7 +923,14 @@ abd_iter_map(struct abd_iter *aiter)

						aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset,

						    aiter->iter_abd->abd_size - aiter->iter_pos);

						paddr = zfs_kmap_local(sg_page(aiter->iter_sg));

						struct page *page = sg_page(aiter->iter_sg);

						if (PageHighMem(page)) {

							page = nth_page(page, offset / PAGE_SIZE);

							offset &= PAGE_SIZE - 1;

							aiter->iter_mapsize = MIN(aiter->iter_mapsize,

							    PAGE_SIZE - offset);

						}

						paddr = zfs_kmap_local(page);

					}

					aiter->iter_mapaddr = (char *)paddr + offset;

				@@ -933,8 +948,14 @@ abd_iter_unmap(struct abd_iter *aiter)

						return;

					if (!abd_is_linear(aiter->iter_abd)) {

						size_t offset = aiter->iter_offset;

						struct page *page = sg_page(aiter->iter_sg);

						if (PageHighMem(page))

							offset &= PAGE_SIZE - 1;

						/* LINTED E_FUNC_SET_NOT_USED */

						zfs_kunmap_local(aiter->iter_mapaddr - aiter->iter_offset);

						zfs_kunmap_local(aiter->iter_mapaddr - offset);

					}

					ASSERT3P(aiter->iter_mapaddr, !=, NULL);

				@@ -1110,14 +1131,6 @@ abd_return_buf_copy(abd_t *abd, void *buf, size_t n)

				#define	ABD_ITER_PAGE_SIZE(page)	(PAGESIZE)

				#endif

				#ifndef nth_page

				/*

				 * Since 6.18 nth_page() no longer exists, and is no longer required to iterate

				 * within a single SG entry, so we replace it with a simple addition.

				 */

				#define	nth_page(p, n)	((p)+(n))

				#endif

				void

				abd_iter_page(struct abd_iter *aiter)

				{

									
										sys/contrib/openzfs/module/os/linux/zfs/kasan_compat.c
									
		+48
		
												View File
												
				@@ -0,0 +1,48 @@

				// SPDX-License-Identifier: CDDL-1.0

				/*

				 * CDDL HEADER START

				 *

				 * The contents of this file are subject to the terms of the

				 * Common Development and Distribution License (the "License").

				 * You may not use this file except in compliance with the License.

				 *

				 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

				 * or https://opensource.org/licenses/CDDL-1.0.

				 * See the License for the specific language governing permissions

				 * and limitations under the License.

				 *

				 * When distributing Covered Code, include this CDDL HEADER in each

				 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.

				 * If applicable, add the following below this CDDL HEADER, with the

				 * fields enclosed by brackets "[]" replaced with your own identifying

				 * information: Portions Copyright [yyyy] [name of copyright owner]

				 *

				 * CDDL HEADER END

				 */

				/*

				 * Copyright (c) 2025, Rob Norris <robn@despairlabs.com>

				 */

				#ifndef _ZFS_LINUX_KASAN_ENABLED_H

				#define	_ZFS_LINUX_KASAN_ENABLED_H

				#ifdef HAVE_KASAN_ENABLED_GPL_ONLY

				/*

				 * The kernel supports a runtime setting to enable/disable KASAN. The control

				 * flag kasan_flag_enabled is a GPL-only symbol, which prevents us from

				 * accessing it. Unfortunately, this is called by the header function

				 * kasan_enabled(), which in turn is used to call or skip instrumentation

				 * functions in various header-based kernel facilities. If we inadvertently

				 * call one, the build breaks.

				 *

				 * To work around this, we define our own `kasan_flag_enabled` set to "false",

				 * disabling use of KASAN inside our code. The linker will resolve this symbol

				 * at build time, and so never need to reach out to the off-limits kernel

				 * symbol.

				 */

				#include <linux/static_key.h>

				struct static_key_false kasan_flag_enabled = STATIC_KEY_FALSE_INIT;

				#endif

				#endif

									
										sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
									
		+104
		-38
	
												View File
												
				@@ -117,13 +117,17 @@ static int zfs_snapshot_no_setuid = 0;

				typedef struct {

					char		*se_name;	/* full snapshot name */

					char		*se_path;	/* full mount path */

					spa_t		*se_spa;	/* pool spa */

					spa_t		*se_spa;	/* pool spa (NULL if pending) */

					uint64_t	se_objsetid;	/* snapshot objset id */

					struct dentry   *se_root_dentry; /* snapshot root dentry */

					taskqid_t	se_taskqid;	/* scheduled unmount taskqid */

					avl_node_t	se_node_name;	/* zfs_snapshots_by_name link */

					avl_node_t	se_node_objsetid; /* zfs_snapshots_by_objsetid link */

					zfs_refcount_t	se_refcount;	/* reference count */

					kmutex_t	se_mtx;		/* protects se_mounting and se_cv */

					kcondvar_t	se_cv;		/* signal mount completion */

					boolean_t	se_mounting;	/* mount operation in progress */

					int		se_mount_error;	/* error from failed mount */

				} zfs_snapentry_t;

				static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay);

				@@ -146,6 +150,10 @@ zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa,

					se->se_objsetid = objsetid;

					se->se_root_dentry = root_dentry;

					se->se_taskqid = TASKQID_INVALID;

					mutex_init(&se->se_mtx, NULL, MUTEX_DEFAULT, NULL);

					cv_init(&se->se_cv, NULL, CV_DEFAULT, NULL);

					se->se_mounting = B_FALSE;

					se->se_mount_error = 0;

					zfs_refcount_create(&se->se_refcount);

				@@ -162,6 +170,8 @@ zfsctl_snapshot_free(zfs_snapentry_t *se)

					zfs_refcount_destroy(&se->se_refcount);

					kmem_strfree(se->se_name);

					kmem_strfree(se->se_path);

					mutex_destroy(&se->se_mtx);

					cv_destroy(&se->se_cv);

					kmem_free(se, sizeof (zfs_snapentry_t));

				}

				@@ -187,9 +197,9 @@ zfsctl_snapshot_rele(zfs_snapentry_t *se)

				}

				/*

				 * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and

				 * zfs_snapshots_by_objsetid trees.  While the zfs_snapentry_t is part

				 * of the trees a reference is held.

				 * Add a zfs_snapentry_t to the zfs_snapshots_by_name tree.  If the entry

				 * is not pending (se_spa != NULL), also add to zfs_snapshots_by_objsetid.

				 * While the zfs_snapentry_t is part of the trees a reference is held.

				 */

				static void

				zfsctl_snapshot_add(zfs_snapentry_t *se)

				@@ -197,24 +207,42 @@ zfsctl_snapshot_add(zfs_snapentry_t *se)

					ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));

					zfsctl_snapshot_hold(se);

					avl_add(&zfs_snapshots_by_name, se);

					avl_add(&zfs_snapshots_by_objsetid, se);

					if (se->se_spa != NULL)

						avl_add(&zfs_snapshots_by_objsetid, se);

				}

				/*

				 * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and

				 * zfs_snapshots_by_objsetid trees.  Upon removal a reference is dropped,

				 * this can result in the structure being freed if that was the last

				 * remaining reference.

				 * Remove a zfs_snapentry_t from the zfs_snapshots_by_name tree and

				 * zfs_snapshots_by_objsetid tree (if not pending).  Upon removal a

				 * reference is dropped, this can result in the structure being freed

				 * if that was the last remaining reference.

				 */

				static void

				zfsctl_snapshot_remove(zfs_snapentry_t *se)

				{

					ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));

					avl_remove(&zfs_snapshots_by_name, se);

					avl_remove(&zfs_snapshots_by_objsetid, se);

					if (se->se_spa != NULL)

						avl_remove(&zfs_snapshots_by_objsetid, se);

					zfsctl_snapshot_rele(se);

				}

				/*

				 * Fill a pending zfs_snapentry_t after mount succeeds.  Fills in the

				 * remaining fields and adds the entry to the zfs_snapshots_by_objsetid tree.

				 */

				static void

				zfsctl_snapshot_fill(zfs_snapentry_t *se, spa_t *spa, uint64_t objsetid,

				    struct dentry *root_dentry)

				{

					ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));

					ASSERT3P(se->se_spa, ==, NULL);

					se->se_spa = spa;

					se->se_objsetid = objsetid;

					se->se_root_dentry = root_dentry;

					avl_add(&zfs_snapshots_by_objsetid, se);

				}

				/*

				 * Snapshot name comparison function for the zfs_snapshots_by_name.

				 */

				@@ -312,6 +340,11 @@ zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname)

					se = zfsctl_snapshot_find_by_name(old_snapname);

					if (se == NULL)

						return (SET_ERROR(ENOENT));

					if (se->se_spa == NULL) {

						/* Snapshot mount is in progress */

						zfsctl_snapshot_rele(se);

						return (SET_ERROR(EBUSY));

					}

					zfsctl_snapshot_remove(se);

					kmem_strfree(se->se_name);

				@@ -430,26 +463,6 @@ zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay)

					return (error);

				}

				/*

				 * Check if snapname is currently mounted.  Returned non-zero when mounted

				 * and zero when unmounted.

				 */

				static boolean_t

				zfsctl_snapshot_ismounted(const char *snapname)

				{

					zfs_snapentry_t *se;

					boolean_t ismounted = B_FALSE;

					rw_enter(&zfs_snapshot_lock, RW_READER);

					if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) {

						zfsctl_snapshot_rele(se);

						ismounted = B_TRUE;

					}

					rw_exit(&zfs_snapshot_lock);

					return (ismounted);

				}

				/*

				 * Check if the given inode is a part of the virtual .zfs directory.

				 */

				@@ -1131,6 +1144,14 @@ zfsctl_snapshot_unmount(const char *snapname, int flags)

					}

					rw_exit(&zfs_snapshot_lock);

					/*

					 * Wait for any pending auto-mount to complete before unmounting.

					 */

					mutex_enter(&se->se_mtx);

					while (se->se_mounting)

						cv_wait(&se->se_cv, &se->se_mtx);

					mutex_exit(&se->se_mtx);

					exportfs_flush();

					if (flags & MNT_FORCE)

				@@ -1232,14 +1253,35 @@ zfsctl_snapshot_mount(struct path *path, int flags)

					    zfs_snapshot_no_setuid ? "nosuid" : "suid");

					/*

					 * Multiple concurrent automounts of a snapshot are never allowed.

					 * The snapshot may be manually mounted as many times as desired.

					 * Check if snapshot is already being mounted. If found, wait for

					 * pending mount to complete before returning success.

					 */

					if (zfsctl_snapshot_ismounted(full_name)) {

						error = 0;

					rw_enter(&zfs_snapshot_lock, RW_WRITER);

					if ((se = zfsctl_snapshot_find_by_name(full_name)) != NULL) {

						rw_exit(&zfs_snapshot_lock);

						mutex_enter(&se->se_mtx);

						while (se->se_mounting)

							cv_wait(&se->se_cv, &se->se_mtx);

						/*

						 * Return the same error as the first mount attempt (0 if

						 * succeeded, error code if failed).

						 */

						error = se->se_mount_error;

						mutex_exit(&se->se_mtx);

						zfsctl_snapshot_rele(se);

						goto error;

					}

					/*

					 * Create pending entry and mark mount in progress.

					 */

					se = zfsctl_snapshot_alloc(full_name, full_path, NULL, 0, NULL);

					se->se_mounting = B_TRUE;

					zfsctl_snapshot_add(se);

					zfsctl_snapshot_hold(se);

					rw_exit(&zfs_snapshot_lock);

					/*

					 * Attempt to mount the snapshot from user space.  Normally this

					 * would be done using the vfs_kern_mount() function, however that

				@@ -1258,6 +1300,9 @@ zfsctl_snapshot_mount(struct path *path, int flags)

					argv[9] = full_path;

					error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);

					if (error) {

						/*

						 * Mount failed - cleanup pending entry and signal waiters.

						 */

						if (!(error & MOUNT_BUSY << 8)) {

							zfs_dbgmsg("Unable to automount %s error=%d",

							    full_path, error);

				@@ -1273,6 +1318,16 @@ zfsctl_snapshot_mount(struct path *path, int flags)

							 */

							error = 0;

						}

						rw_enter(&zfs_snapshot_lock, RW_WRITER);

						zfsctl_snapshot_remove(se);

						rw_exit(&zfs_snapshot_lock);

						mutex_enter(&se->se_mtx);

						se->se_mount_error = error;

						se->se_mounting = B_FALSE;

						cv_broadcast(&se->se_cv);

						mutex_exit(&se->se_mtx);

						zfsctl_snapshot_rele(se);

						goto error;

					}

				@@ -1289,14 +1344,25 @@ zfsctl_snapshot_mount(struct path *path, int flags)

						spath.mnt->mnt_flags |= MNT_SHRINKABLE;

						rw_enter(&zfs_snapshot_lock, RW_WRITER);

						se = zfsctl_snapshot_alloc(full_name, full_path,

						    snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os),

						    dentry);

						zfsctl_snapshot_add(se);

						zfsctl_snapshot_fill(se, snap_zfsvfs->z_os->os_spa,

						    dmu_objset_id(snap_zfsvfs->z_os), dentry);

						zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);

						rw_exit(&zfs_snapshot_lock);

					} else {

						rw_enter(&zfs_snapshot_lock, RW_WRITER);

						zfsctl_snapshot_remove(se);

						rw_exit(&zfs_snapshot_lock);

					}

					path_put(&spath);

					/*

					 * Signal mount completion and cleanup.

					 */

					mutex_enter(&se->se_mtx);

					se->se_mounting = B_FALSE;

					cv_broadcast(&se->se_cv);

					mutex_exit(&se->se_mtx);

					zfsctl_snapshot_rele(se);

				error:

					kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);

					kmem_free(full_path, MAXPATHLEN);

									
										sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
									
		+6
		-4
	
												View File
												
				@@ -100,15 +100,17 @@ zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)

					while (n && uio->uio_resid) {

						void *paddr;

						cnt = MIN(bv->bv_len - skip, n);

						size_t offset = bv->bv_offset + skip;

						cnt = MIN(PAGE_SIZE - (offset & ~PAGE_MASK),

						    MIN(bv->bv_len - skip, n));

						paddr = zfs_kmap_local(bv->bv_page);

						paddr = zfs_kmap_local(bv->bv_page + (offset >> PAGE_SHIFT));

						if (rw == UIO_READ) {

							/* Copy from buffer 'p' to the bvec data */

							memcpy(paddr + bv->bv_offset + skip, p, cnt);

							memcpy(paddr + (offset & ~PAGE_MASK), p, cnt);

						} else {

							/* Copy from bvec data to buffer 'p' */

							memcpy(p, paddr + bv->bv_offset + skip, cnt);

							memcpy(p, paddr + (offset & ~PAGE_MASK), cnt);

						}

						zfs_kunmap_local(paddr);

									
										sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
									
		-1
	
												View File
												
				@@ -31,7 +31,6 @@

				/* Portions Copyright 2007 Jeremy Teo */

				/* Portions Copyright 2010 Robert Milkowski */

				#include <sys/types.h>

				#include <sys/param.h>

				#include <sys/time.h>

									
										sys/contrib/openzfs/module/os/linux/zfs/zfs_znode_os.c
									
		+4
		-3
	
												View File
												
				@@ -95,11 +95,12 @@ zfs_rangelock_cb(zfs_locked_range_t *new, void *arg)

					}

					/*

					 * If we need to grow the block size then lock the whole file range.

					 * If we might grow the block size then lock the whole file range.

					 * NB: this test should match the check in zfs_grow_blocksize

					 */

					uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);

					if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||

					    zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {

					if (zp->z_size <= zp->z_blksz && end_size > zp->z_blksz &&

					    (!ISP2(zp->z_blksz) || zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {

						new->lr_offset = 0;

						new->lr_length = UINT64_MAX;

					}

									
										sys/contrib/openzfs/module/zfs/abd.c
									
		+8
		-8
	
												View File
												
				@@ -1111,13 +1111,6 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,

						func_raidz_gen(caddrs, daddr, len, dlen);

						for (i = parity-1; i >= 0; i--) {

							abd_iter_unmap(&caiters[i]);

							c_cabds[i] =

							    abd_advance_abd_iter(cabds[i], c_cabds[i],

							    &caiters[i], len);

						}

						if (dsize > 0) {

							abd_iter_unmap(&daiter);

							c_dabd =

				@@ -1126,6 +1119,13 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,

							dsize -= dlen;

						}

						for (i = parity - 1; i >= 0; i--) {

							abd_iter_unmap(&caiters[i]);

							c_cabds[i] =

							    abd_advance_abd_iter(cabds[i], c_cabds[i],

							    &caiters[i], len);

						}

						csize -= len;

					}

					abd_exit_critical(flags);

				@@ -1194,7 +1194,7 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,

						func_raidz_rec(xaddrs, len, caddrs, mul);

						for (i = parity-1; i >= 0; i--) {

						for (i = parity - 1; i >= 0; i--) {

							abd_iter_unmap(&xiters[i]);

							abd_iter_unmap(&citers[i]);

							c_tabds[i] =

									
										sys/contrib/openzfs/module/zfs/arc.c
									
		+53
		-40
	
												View File
												
				@@ -371,6 +371,12 @@ static uint_t zfs_arc_eviction_pct = 200;

				 */

				static uint_t zfs_arc_evict_batch_limit = 10;

				/*

				 * Number batches to process per parallel eviction task under heavy load to

				 * reduce number of context switches.

				 */

				static uint_t zfs_arc_evict_batches_limit = 5;

				/* number of seconds before growing cache again */

				uint_t arc_grow_retry = 5;

				@@ -406,8 +412,8 @@ uint_t		arc_no_grow_shift = 5;

				 * minimum lifespan of a prefetch block in clock ticks

				 * (initialized in arc_init())

				 */

				static uint_t		arc_min_prefetch_ms;

				static uint_t		arc_min_prescient_prefetch_ms;

				static uint_t		arc_min_prefetch;

				static uint_t		arc_min_prescient_prefetch;

				/*

				 * If this percent of memory is free, don't throttle.

				@@ -3766,8 +3772,6 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)

				{

					arc_state_t *evicted_state, *state;

					int64_t bytes_evicted = 0;

					uint_t min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ?

					    arc_min_prescient_prefetch_ms : arc_min_prefetch_ms;

					ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));

					ASSERT(HDR_HAS_L1HDR(hdr));

				@@ -3824,9 +3828,10 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)

					    ((state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost);

					/* prefetch buffers have a minimum lifespan */

					uint_t min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ?

					    arc_min_prescient_prefetch : arc_min_prefetch;

					if ((hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT)) &&

					    ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access <

					    MSEC_TO_TICK(min_lifetime)) {

					    ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access < min_lifetime) {

						ARCSTAT_BUMP(arcstat_evict_skip);

						return (bytes_evicted);

					}

				@@ -3900,7 +3905,7 @@ arc_set_need_free(void)

				static uint64_t

				arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,

				    uint64_t spa, uint64_t bytes)

				    uint64_t spa, uint64_t bytes, boolean_t *more)

				{

					multilist_sublist_t *mls;

					uint64_t bytes_evicted = 0, real_evicted = 0;

				@@ -3984,6 +3989,10 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,

					multilist_sublist_unlock(mls);

					/* Indicate if another iteration may be productive. */

					if (more)

						*more = (hdr != NULL);

					/*

					 * Increment the count of evicted bytes, and wake up any threads that

					 * are waiting for the count to reach this value.  Since the list is

				@@ -4004,21 +4013,12 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,

						while ((aw = list_head(&arc_evict_waiters)) != NULL &&

						    aw->aew_count <= arc_evict_count) {

							list_remove(&arc_evict_waiters, aw);

							cv_broadcast(&aw->aew_cv);

							cv_signal(&aw->aew_cv);

						}

					}

					arc_set_need_free();

					mutex_exit(&arc_evict_lock);

					/*

					 * If the ARC size is reduced from arc_c_max to arc_c_min (especially

					 * if the average cached block is small), eviction can be on-CPU for

					 * many seconds.  To ensure that other threads that may be bound to

					 * this CPU are able to make progress, make a voluntary preemption

					 * call here.

					 */

					kpreempt(KPREEMPT_SYNC);

					return (bytes_evicted);

				}

				@@ -4079,8 +4079,18 @@ static void

				arc_evict_task(void *arg)

				{

					evict_arg_t *eva = arg;

					eva->eva_evicted = arc_evict_state_impl(eva->eva_ml, eva->eva_idx,

					    eva->eva_marker, eva->eva_spa, eva->eva_bytes);

					uint64_t total_evicted = 0;

					boolean_t more;

					uint_t batches = zfs_arc_evict_batches_limit;

					/* Process multiple batches to amortize taskq dispatch overhead. */

					do {

						total_evicted += arc_evict_state_impl(eva->eva_ml,

						    eva->eva_idx, eva->eva_marker, eva->eva_spa,

						    eva->eva_bytes - total_evicted, &more);

					} while (total_evicted < eva->eva_bytes && --batches > 0 && more);

					eva->eva_evicted = total_evicted;

				}

				static void

				@@ -4221,18 +4231,19 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,

							if (bytes == ARC_EVICT_ALL) {

								evict = bytes;

							} else if (left > ntasks * MIN_EVICT_SIZE) {

							} else if (left >= ntasks * MIN_EVICT_SIZE) {

								evict = DIV_ROUND_UP(left, ntasks);

							} else {

								ntasks = DIV_ROUND_UP(left, MIN_EVICT_SIZE);

								if (ntasks == 1)

								ntasks = left / MIN_EVICT_SIZE;

								if (ntasks < 2)

									use_evcttq = B_FALSE;

								else

									evict = DIV_ROUND_UP(left, ntasks);

							}

						}

						for (int i = 0; sublists_left > 0; i++, sublist_idx++,

						    sublists_left--) {

							uint64_t bytes_remaining;

							uint64_t bytes_evicted;

							/* we've reached the end, wrap to the beginning */

				@@ -4254,16 +4265,17 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,

								continue;

							}

							if (total_evicted < bytes)

								bytes_remaining = bytes - total_evicted;

							else

								break;

							bytes_evicted = arc_evict_state_impl(ml, sublist_idx,

							    markers[sublist_idx], spa, bytes_remaining);

							    markers[sublist_idx], spa, bytes - total_evicted,

							    NULL);

							scan_evicted += bytes_evicted;

							total_evicted += bytes_evicted;

							if (total_evicted < bytes)

								kpreempt(KPREEMPT_SYNC);

							else

								break;

						}

						if (use_evcttq) {

				@@ -4838,8 +4850,7 @@ arc_evict_cb_check(void *arg, zthr_t *zthr)

					 */

					return ((zfs_refcount_count(&arc_uncached->arcs_esize[ARC_BUFC_DATA]) +

					    zfs_refcount_count(&arc_uncached->arcs_esize[ARC_BUFC_METADATA]) &&

					    ddi_get_lbolt() - arc_last_uncached_flush >

					    MSEC_TO_TICK(arc_min_prefetch_ms / 2)));

					    ddi_get_lbolt() - arc_last_uncached_flush > arc_min_prefetch / 2));

				}

				/*

				@@ -4889,7 +4900,7 @@ arc_evict_cb(void *arg, zthr_t *zthr)

						 */

						arc_evict_waiter_t *aw;

						while ((aw = list_remove_head(&arc_evict_waiters)) != NULL) {

							cv_broadcast(&aw->aew_cv);

							cv_signal(&aw->aew_cv);

						}

						arc_set_need_free();

					}

				@@ -5170,9 +5181,8 @@ arc_wait_for_eviction(uint64_t amount, boolean_t lax, boolean_t use_reserve)

						uint64_t last_count = 0;

						mutex_enter(&arc_evict_lock);

						if (!list_is_empty(&arc_evict_waiters)) {

							arc_evict_waiter_t *last =

							    list_tail(&arc_evict_waiters);

						arc_evict_waiter_t *last;

						if ((last = list_tail(&arc_evict_waiters)) != NULL) {

							last_count = last->aew_count;

						} else if (!arc_evict_needed) {

							arc_evict_needed = B_TRUE;

				@@ -7593,12 +7603,12 @@ arc_tuning_update(boolean_t verbose)

					/* Valid range: 1 - N ms */

					if (zfs_arc_min_prefetch_ms)

						arc_min_prefetch_ms = zfs_arc_min_prefetch_ms;

						arc_min_prefetch = MSEC_TO_TICK(zfs_arc_min_prefetch_ms);

					/* Valid range: 1 - N ms */

					if (zfs_arc_min_prescient_prefetch_ms) {

						arc_min_prescient_prefetch_ms =

						    zfs_arc_min_prescient_prefetch_ms;

						arc_min_prescient_prefetch =

						    MSEC_TO_TICK(zfs_arc_min_prescient_prefetch_ms);

					}

					/* Valid range: 0 - 100 */

				@@ -7982,8 +7992,8 @@ arc_init(void)

					list_create(&arc_evict_waiters, sizeof (arc_evict_waiter_t),

					    offsetof(arc_evict_waiter_t, aew_node));

					arc_min_prefetch_ms = 1000;

					arc_min_prescient_prefetch_ms = 6000;

					arc_min_prefetch = MSEC_TO_TICK(1000);

					arc_min_prescient_prefetch = MSEC_TO_TICK(6000);

				#if defined(_KERNEL)

					arc_lowmem_init();

				@@ -11290,6 +11300,9 @@ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, UINT, ZMOD_RW,

				ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batch_limit, UINT, ZMOD_RW,

					"The number of headers to evict per sublist before moving to the next");

				ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batches_limit, UINT, ZMOD_RW,

					"The number of batches to run per parallel eviction task");

				ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, prune_task_threads, INT, ZMOD_RW,

					"Number of arc_prune threads");

									
										sys/contrib/openzfs/module/zfs/ddt.c
									
		+103
		-66
	
												View File
												
				@@ -407,6 +407,9 @@ ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class,

					VERIFY0(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash));

					ASSERT3U(*objectp, !=, 0);

					VERIFY0(dnode_hold(os, *objectp, ddt,

					    &ddt->ddt_object_dnode[type][class]));

					ASSERT3U(ddt->ddt_version, !=, DDT_VERSION_UNCONFIGURED);

					VERIFY0(zap_add(os, ddt->ddt_dir_object, name, sizeof (uint64_t), 1,

				@@ -437,6 +440,10 @@ ddt_object_destroy(ddt_t *ddt, ddt_type_t type, ddt_class_t class,

					VERIFY0(count);

					VERIFY0(zap_remove(os, ddt->ddt_dir_object, name, tx));

					VERIFY0(zap_remove(os, spa->spa_ddt_stat_object, name, tx));

					if (ddt->ddt_object_dnode[type][class] != NULL) {

						dnode_rele(ddt->ddt_object_dnode[type][class], ddt);

						ddt->ddt_object_dnode[type][class] = NULL;

					}

					VERIFY0(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx));

					memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t));

				@@ -468,28 +475,38 @@ ddt_object_load(ddt_t *ddt, ddt_type_t type, ddt_class_t class)

					if (error != 0)

						return (error);

					error = dnode_hold(ddt->ddt_os, ddt->ddt_object[type][class], ddt,

					    &ddt->ddt_object_dnode[type][class]);

					if (error != 0)

						return (error);

					error = zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,

					    sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),

					    &ddt->ddt_histogram[type][class]);

					if (error != 0)

						return (error);

						goto error;

					/*

					 * Seed the cached statistics.

					 */

					error = ddt_object_info(ddt, type, class, &doi);

					if (error)

						return (error);

						goto error;

					error = ddt_object_count(ddt, type, class, &count);

					if (error)

						return (error);

						goto error;

					ddo->ddo_count = count;

					ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;

					ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;

					return (0);

				error:

					dnode_rele(ddt->ddt_object_dnode[type][class], ddt);

					ddt->ddt_object_dnode[type][class] = NULL;

					return (error);

				}

				static void

				@@ -528,11 +545,11 @@ static int

				ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class,

				    ddt_entry_t *dde)

				{

					if (!ddt_object_exists(ddt, type, class))

					dnode_t *dn = ddt->ddt_object_dnode[type][class];

					if (dn == NULL)

						return (SET_ERROR(ENOENT));

					return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os,

					    ddt->ddt_object[type][class], &dde->dde_key,

					return (ddt_ops[type]->ddt_op_lookup(dn, &dde->dde_key,

					    dde->dde_phys, DDT_PHYS_SIZE(ddt)));

				}

				@@ -540,42 +557,42 @@ static int

				ddt_object_contains(ddt_t *ddt, ddt_type_t type, ddt_class_t class,

				    const ddt_key_t *ddk)

				{

					if (!ddt_object_exists(ddt, type, class))

					dnode_t *dn = ddt->ddt_object_dnode[type][class];

					if (dn == NULL)

						return (SET_ERROR(ENOENT));

					return (ddt_ops[type]->ddt_op_contains(ddt->ddt_os,

					    ddt->ddt_object[type][class], ddk));

					return (ddt_ops[type]->ddt_op_contains(dn, ddk));

				}

				static void

				ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class,

				    const ddt_key_t *ddk)

				{

					if (!ddt_object_exists(ddt, type, class))

					dnode_t *dn = ddt->ddt_object_dnode[type][class];

					if (dn == NULL)

						return;

					ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os,

					    ddt->ddt_object[type][class], ddk);

					ddt_ops[type]->ddt_op_prefetch(dn, ddk);

				}

				static void

				ddt_object_prefetch_all(ddt_t *ddt, ddt_type_t type, ddt_class_t class)

				{

					if (!ddt_object_exists(ddt, type, class))

					dnode_t *dn = ddt->ddt_object_dnode[type][class];

					if (dn == NULL)

						return;

					ddt_ops[type]->ddt_op_prefetch_all(ddt->ddt_os,

					    ddt->ddt_object[type][class]);

					ddt_ops[type]->ddt_op_prefetch_all(dn);

				}

				static int

				ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class,

				    const ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx)

				{

					ASSERT(ddt_object_exists(ddt, type, class));

					dnode_t *dn = ddt->ddt_object_dnode[type][class];

					ASSERT(dn != NULL);

					return (ddt_ops[type]->ddt_op_update(ddt->ddt_os,

					    ddt->ddt_object[type][class], &ddlwe->ddlwe_key,

					return (ddt_ops[type]->ddt_op_update(dn, &ddlwe->ddlwe_key,

					    &ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt), tx));

				}

				@@ -583,20 +600,20 @@ static int

				ddt_object_remove(ddt_t *ddt, ddt_type_t type, ddt_class_t class,

				    const ddt_key_t *ddk, dmu_tx_t *tx)

				{

					ASSERT(ddt_object_exists(ddt, type, class));

					dnode_t *dn = ddt->ddt_object_dnode[type][class];

					ASSERT(dn != NULL);

					return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os,

					    ddt->ddt_object[type][class], ddk, tx));

					return (ddt_ops[type]->ddt_op_remove(dn, ddk, tx));

				}

				int

				ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class,

				    uint64_t *walk, ddt_lightweight_entry_t *ddlwe)

				{

					ASSERT(ddt_object_exists(ddt, type, class));

					dnode_t *dn = ddt->ddt_object_dnode[type][class];

					ASSERT(dn != NULL);

					int error = ddt_ops[type]->ddt_op_walk(ddt->ddt_os,

					    ddt->ddt_object[type][class], walk, &ddlwe->ddlwe_key,

					int error = ddt_ops[type]->ddt_op_walk(dn, walk, &ddlwe->ddlwe_key,

					    &ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt));

					if (error == 0) {

						ddlwe->ddlwe_type = type;

				@@ -610,10 +627,10 @@ int

				ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t class,

				    uint64_t *count)

				{

					ASSERT(ddt_object_exists(ddt, type, class));

					dnode_t *dn = ddt->ddt_object_dnode[type][class];

					ASSERT(dn != NULL);

					return (ddt_ops[type]->ddt_op_count(ddt->ddt_os,

					    ddt->ddt_object[type][class], count));

					return (ddt_ops[type]->ddt_op_count(dn, count));

				}

				int

				@@ -1037,13 +1054,6 @@ ddt_remove(ddt_t *ddt, ddt_entry_t *dde)

				{

					ASSERT(MUTEX_HELD(&ddt->ddt_lock));

					/* Entry is still in the log, so charge the entry back to it */

					if (dde->dde_flags & DDE_FLAG_LOGGED) {

						ddt_lightweight_entry_t ddlwe;

						DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);

						ddt_histogram_add_entry(ddt, &ddt->ddt_log_histogram, &ddlwe);

					}

					avl_remove(&ddt->ddt_tree, dde);

					ddt_free(ddt, dde);

				}

				@@ -1234,63 +1244,61 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t verify)

					/* Time to make a new entry. */

					dde = ddt_alloc(ddt, &search);

					/* Record the time this class was created (used by ddt prune) */

					if (ddt->ddt_flags & DDT_FLAG_FLAT)

						dde->dde_phys->ddp_flat.ddp_class_start = ddt_class_start();

					avl_insert(&ddt->ddt_tree, dde, where);

					/* If its in the log tree, we can "load" it from there */

					/*

					 * The entry in ddt_tree has no DDE_FLAG_LOADED, so other possible

					 * threads will wait even while we drop the lock.

					 */

					ddt_exit(ddt);

					/*

					 * If there is a log, we should try to "load" from there first.

					 */

					if (ddt->ddt_flags & DDT_FLAG_LOG) {

						ddt_lightweight_entry_t ddlwe;

						boolean_t from_flushing;

						if (ddt_log_find_key(ddt, &search, &ddlwe)) {

							/*

							 * See if we have the key first, and if so, set up

							 * the entry.

							 */

						/* Read-only search, no locks needed (logs stable during I/O) */

						if (ddt_log_find_key(ddt, &search, &ddlwe, &from_flushing)) {

							dde->dde_type = ddlwe.ddlwe_type;

							dde->dde_class = ddlwe.ddlwe_class;

							memcpy(dde->dde_phys, &ddlwe.ddlwe_phys,

							    DDT_PHYS_SIZE(ddt));

							/* Whatever we found isn't valid for this BP, eject */

							if (verify &&

							    !ddt_entry_lookup_is_valid(ddt, bp, dde)) {

							/*

							 * Check validity. If invalid and no waiters, clean up

							 * immediately. Otherwise continue setup for waiters.

							 */

							boolean_t valid = !verify ||

							    ddt_entry_lookup_is_valid(ddt, bp, dde);

							ddt_enter(ddt);

							if (!valid && dde->dde_waiters == 0) {

								avl_remove(&ddt->ddt_tree, dde);

								ddt_free(ddt, dde);

								return (NULL);

							}

							/* Remove it and count it */

							if (ddt_log_remove_key(ddt,

							    ddt->ddt_log_active, &search)) {

								DDT_KSTAT_BUMP(ddt, dds_lookup_log_active_hit);

							} else {

								VERIFY(ddt_log_remove_key(ddt,

								    ddt->ddt_log_flushing, &search));

							dde->dde_flags = DDE_FLAG_LOADED | DDE_FLAG_LOGGED;

							if (from_flushing) {

								dde->dde_flags |= DDE_FLAG_FROM_FLUSHING;

								DDT_KSTAT_BUMP(ddt,

								    dds_lookup_log_flushing_hit);

							} else {

								DDT_KSTAT_BUMP(ddt, dds_lookup_log_active_hit);

							}

							dde->dde_flags = DDE_FLAG_LOADED | DDE_FLAG_LOGGED;

							DDT_KSTAT_BUMP(ddt, dds_lookup_log_hit);

							DDT_KSTAT_BUMP(ddt, dds_lookup_existing);

							return (dde);

							cv_broadcast(&dde->dde_cv);

							return (valid ? dde : NULL);

						}

						DDT_KSTAT_BUMP(ddt, dds_lookup_log_miss);

					}

					/*

					 * ddt_tree is now stable, so unlock and let everyone else keep moving.

					 * Anyone landing on this entry will find it without DDE_FLAG_LOADED,

					 * and go to sleep waiting for it above.

					 */

					ddt_exit(ddt);

					/* Search all store objects for the entry. */

					error = ENOENT;

					for (type = 0; type < DDT_TYPES; type++) {

				@@ -1727,6 +1735,15 @@ ddt_table_free(ddt_t *ddt)

					wmsum_fini(&ddt->ddt_kstat_dds_lookup_stored_miss);

					ddt_log_free(ddt);

					for (ddt_type_t type = 0; type < DDT_TYPES; type++) {

						for (ddt_class_t class = 0; class < DDT_CLASSES; class++) {

							if (ddt->ddt_object_dnode[type][class] != NULL) {

								dnode_rele(ddt->ddt_object_dnode[type][class],

								    ddt);

								ddt->ddt_object_dnode[type][class] = NULL;

							}

						}

					}

					ASSERT0(avl_numnodes(&ddt->ddt_tree));

					ASSERT0(avl_numnodes(&ddt->ddt_repair_tree));

					avl_destroy(&ddt->ddt_tree);

				@@ -2354,6 +2371,19 @@ ddt_sync_table_log(ddt_t *ddt, dmu_tx_t *tx)

						    avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {

							ASSERT(dde->dde_flags & DDE_FLAG_LOADED);

							DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);

							/* If from flushing log, remove it. */

							if (dde->dde_flags & DDE_FLAG_FROM_FLUSHING) {

								VERIFY(ddt_log_remove_key(ddt,

								    ddt->ddt_log_flushing, &ddlwe.ddlwe_key));

							}

							/* Update class_start to track last modification time */

							if (ddt->ddt_flags & DDT_FLAG_FLAT) {

								ddlwe.ddlwe_phys.ddp_flat.ddp_class_start =

								    ddt_class_start();

							}

							ddt_log_entry(ddt, &ddlwe, &dlu);

							ddt_sync_scan_entry(ddt, &ddlwe, tx);

							ddt_free(ddt, dde);

				@@ -2414,6 +2444,13 @@ ddt_sync_table_flush(ddt_t *ddt, dmu_tx_t *tx)

						ddt_lightweight_entry_t ddlwe;

						DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);

						/* Update class_start to track last modification time */

						if (ddt->ddt_flags & DDT_FLAG_FLAT) {

							ddlwe.ddlwe_phys.ddp_flat.ddp_class_start =

							    ddt_class_start();

						}

						ddt_sync_flush_entry(ddt, &ddlwe,

						    dde->dde_type, dde->dde_class, tx);

						ddt_sync_scan_entry(ddt, &ddlwe, tx);

				@@ -2765,7 +2802,7 @@ ddt_prune_walk(spa_t *spa, uint64_t cutoff, ddt_age_histo_t *histogram)

						 * If this entry is on the log, then the stored entry is stale

						 * and we should skip it.

						 */

						if (ddt_log_find_key(ddt, &ddlwe.ddlwe_key, NULL))

						if (ddt_log_find_key(ddt, &ddlwe.ddlwe_key, NULL, NULL))

							continue;

						/* prune older entries */

									
										sys/contrib/openzfs/module/zfs/ddt_log.c
									
		+22
		-10
	
												View File
												
				@@ -252,7 +252,8 @@ ddt_log_free_entry(ddt_t *ddt, ddt_log_entry_t *ddle)

				}

				static void

				ddt_log_update_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe)

				ddt_log_update_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe,

				    boolean_t hist)

				{

					/* Create the log tree entry from a live or stored entry */

					avl_index_t where;

				@@ -262,7 +263,13 @@ ddt_log_update_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe)

						ddle = ddt_log_alloc_entry(ddt);

						ddle->ddle_key = ddlwe->ddlwe_key;

						avl_insert(&ddl->ddl_tree, ddle, where);

					} else if (hist) {

						ddt_lightweight_entry_t oddlwe;

						DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, &oddlwe);

						ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, &oddlwe);

					}

					if (hist)

						ddt_histogram_add_entry(ddt, &ddt->ddt_log_histogram, ddlwe);

					ddle->ddle_type = ddlwe->ddlwe_type;

					ddle->ddle_class = ddlwe->ddlwe_class;

					memcpy(ddle->ddle_phys, &ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt));

				@@ -273,8 +280,7 @@ ddt_log_entry(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe, ddt_log_update_t *dlu)

				{

					ASSERT3U(dlu->dlu_dbp, !=, NULL);

					ddt_log_update_entry(ddt, ddt->ddt_log_active, ddlwe);

					ddt_histogram_add_entry(ddt, &ddt->ddt_log_histogram, ddlwe);

					ddt_log_update_entry(ddt, ddt->ddt_log_active, ddlwe, B_TRUE);

					/* Get our block */

					ASSERT3U(dlu->dlu_block, <, dlu->dlu_ndbp);

				@@ -381,14 +387,20 @@ ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl, const ddt_key_t *ddk)

				boolean_t

				ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,

				    ddt_lightweight_entry_t *ddlwe)

				    ddt_lightweight_entry_t *ddlwe, boolean_t *from_flushing)

				{

					ddt_log_entry_t *ddle =

					    avl_find(&ddt->ddt_log_active->ddl_tree, ddk, NULL);

					if (!ddle)

					ddt_log_entry_t *ddle = avl_find(&ddt->ddt_log_active->ddl_tree,

					    ddk, NULL);

					if (ddle) {

						if (from_flushing)

							*from_flushing = B_FALSE;

					} else {

						ddle = avl_find(&ddt->ddt_log_flushing->ddl_tree, ddk, NULL);

					if (!ddle)

						return (B_FALSE);

						if (!ddle)

							return (B_FALSE);

						if (from_flushing)

							*from_flushing = B_TRUE;

					}

					if (ddlwe)

						DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, ddlwe);

					return (B_TRUE);

				@@ -524,7 +536,7 @@ ddt_log_load_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_log_record_t *dlr,

					ddlwe.ddlwe_key = dlre->dlre_key;

					memcpy(&ddlwe.ddlwe_phys, dlre->dlre_phys, DDT_PHYS_SIZE(ddt));

					ddt_log_update_entry(ddt, ddl, &ddlwe);

					ddt_log_update_entry(ddt, ddl, &ddlwe, B_FALSE);

				}

				static void

									
										sys/contrib/openzfs/module/zfs/ddt_zap.c
									
		+33
		-37
	
												View File
												
				@@ -33,6 +33,7 @@

				#include <sys/ddt_impl.h>

				#include <sys/zap.h>

				#include <sys/dmu_tx.h>

				#include <sys/dnode.h>

				#include <sys/zio_compress.h>

				static unsigned int ddt_zap_default_bs = 15;

				@@ -56,7 +57,7 @@ ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len)

					/* Call compress function directly to avoid hole detection. */

					abd_t sabd, dabd;

					abd_get_from_buf_struct(&sabd, (void *)src, s_len);

					abd_get_from_buf_struct(&dabd, dst, d_len);

					abd_get_from_buf_struct(&dabd, dst, d_len - 1);

					c_len = ci->ci_compress(&sabd, &dabd, s_len, d_len - 1, ci->ci_level);

					abd_free(&dabd);

					abd_free(&sabd);

				@@ -85,9 +86,10 @@ ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)

					}

					abd_t sabd, dabd;

					abd_get_from_buf_struct(&sabd, src, s_len);

					size_t c_len = s_len - 1;

					abd_get_from_buf_struct(&sabd, src, c_len);

					abd_get_from_buf_struct(&dabd, dst, d_len);

					VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, s_len, d_len, NULL));

					VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, c_len, d_len, NULL));

					abd_free(&dabd);

					abd_free(&sabd);

				@@ -120,54 +122,48 @@ ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)

				}

				static int

				ddt_zap_lookup(objset_t *os, uint64_t object,

				    const ddt_key_t *ddk, void *phys, size_t psize)

				ddt_zap_lookup(dnode_t *dn, const ddt_key_t *ddk, void *phys, size_t psize)

				{

					uchar_t *cbuf;

					uint64_t one, csize;

					uint64_t csize;

					int error;

					error = zap_length_uint64(os, object, (uint64_t *)ddk,

					    DDT_KEY_WORDS, &one, &csize);

					if (error)

						return (error);

					cbuf = kmem_alloc(psize + 1, KM_SLEEP);

					ASSERT3U(one, ==, 1);

					ASSERT3U(csize, <=, psize + 1);

					cbuf = kmem_alloc(csize, KM_SLEEP);

					error = zap_lookup_uint64(os, object, (uint64_t *)ddk,

					    DDT_KEY_WORDS, 1, csize, cbuf);

					if (error == 0)

					error = zap_lookup_length_uint64_by_dnode(dn, (uint64_t *)ddk,

					    DDT_KEY_WORDS, 1, psize + 1, cbuf, &csize);

					if (error == 0) {

						ASSERT3U(csize, <=, psize + 1);

						ddt_zap_decompress(cbuf, phys, csize, psize);

					}

					kmem_free(cbuf, csize);

					kmem_free(cbuf, psize + 1);

					return (error);

				}

				static int

				ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk)

				ddt_zap_contains(dnode_t *dn, const ddt_key_t *ddk)

				{

					return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS,

					    NULL, NULL));

					return (zap_length_uint64_by_dnode(dn, (uint64_t *)ddk,

					    DDT_KEY_WORDS, NULL, NULL));

				}

				static void

				ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)

				ddt_zap_prefetch(dnode_t *dn, const ddt_key_t *ddk)

				{

					(void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);

					(void) zap_prefetch_uint64_by_dnode(dn, (uint64_t *)ddk,

					    DDT_KEY_WORDS);

				}

				static void

				ddt_zap_prefetch_all(objset_t *os, uint64_t object)

				ddt_zap_prefetch_all(dnode_t *dn)

				{

					(void) zap_prefetch_object(os, object);

					(void) zap_prefetch_object(dn->dn_objset, dn->dn_object);

				}

				static int

				ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,

				ddt_zap_update(dnode_t *dn, const ddt_key_t *ddk,

				    const void *phys, size_t psize, dmu_tx_t *tx)

				{

					const size_t cbuf_size = psize + 1;

				@@ -176,7 +172,7 @@ ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,

					uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size);

					int error = zap_update_uint64(os, object, (uint64_t *)ddk,

					int error = zap_update_uint64_by_dnode(dn, (uint64_t *)ddk,

					    DDT_KEY_WORDS, 1, csize, cbuf, tx);

					kmem_free(cbuf, cbuf_size);

				@@ -185,15 +181,14 @@ ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,

				}

				static int

				ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk,

				    dmu_tx_t *tx)

				ddt_zap_remove(dnode_t *dn, const ddt_key_t *ddk, dmu_tx_t *tx)

				{

					return (zap_remove_uint64(os, object, (uint64_t *)ddk,

					return (zap_remove_uint64_by_dnode(dn, (uint64_t *)ddk,

					    DDT_KEY_WORDS, tx));

				}

				static int

				ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,

				ddt_zap_walk(dnode_t *dn, uint64_t *walk, ddt_key_t *ddk,

				    void *phys, size_t psize)

				{

					zap_cursor_t zc;

				@@ -209,9 +204,10 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,

						 * scrub I/Os for each ZAP block that we read in, so

						 * reading the ZAP is unlikely to be the bottleneck.

						 */

						zap_cursor_init_noprefetch(&zc, os, object);

						zap_cursor_init_noprefetch(&zc, dn->dn_objset, dn->dn_object);

					} else {

						zap_cursor_init_serialized(&zc, os, object, *walk);

						zap_cursor_init_serialized(&zc, dn->dn_objset, dn->dn_object,

						    *walk);

					}

					if ((error = zap_cursor_retrieve(&zc, za)) == 0) {

						uint64_t csize = za->za_num_integers;

				@@ -221,7 +217,7 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,

						uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP);

						error = zap_lookup_uint64(os, object, (uint64_t *)za->za_name,

						error = zap_lookup_uint64_by_dnode(dn, (uint64_t *)za->za_name,

						    DDT_KEY_WORDS, 1, csize, cbuf);

						ASSERT0(error);

						if (error == 0) {

				@@ -240,9 +236,9 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,

				}

				static int

				ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count)

				ddt_zap_count(dnode_t *dn, uint64_t *count)

				{

					return (zap_count(os, object, count));

					return (zap_count_by_dnode(dn, count));

				}

				const ddt_ops_t ddt_zap_ops = {

									
										sys/contrib/openzfs/module/zfs/dsl_dataset.c
									
		+4
		-2
	
												View File
												
				@@ -161,7 +161,8 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)

					ASSERT3U(BP_GET_BIRTH(bp), >,

					    dsl_dataset_phys(ds)->ds_prev_snap_txg);

					dmu_buf_will_dirty(ds->ds_dbuf, tx);

					/* ds_dbuf is pre-dirtied in dsl_dataset_sync(). */

					ASSERT(dmu_buf_is_dirty(ds->ds_dbuf, tx));

					mutex_enter(&ds->ds_lock);

					delta = parent_delta(ds, used);

					dsl_dataset_phys(ds)->ds_referenced_bytes += used;

				@@ -274,7 +275,8 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,

					ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);

					ASSERT(!ds->ds_is_snapshot);

					dmu_buf_will_dirty(ds->ds_dbuf, tx);

					/* ds_dbuf is pre-dirtied in dsl_dataset_sync(). */

					ASSERT(dmu_buf_is_dirty(ds->ds_dbuf, tx));

					/*

					 * Track block for livelist, but ignore embedded blocks because

									
										sys/contrib/openzfs/module/zfs/dsl_scan.c
									
		+71
		-42
	
												View File
												
				@@ -189,16 +189,16 @@ static uint_t zfs_scan_mem_lim_fact = 20;

				static uint_t zfs_scan_mem_lim_soft_fact = 20;

				/* minimum milliseconds to scrub per txg */

				static uint_t zfs_scrub_min_time_ms = 1000;

				static uint_t zfs_scrub_min_time_ms = 750;

				/* minimum milliseconds to obsolete per txg */

				static uint_t zfs_obsolete_min_time_ms = 500;

				/* minimum milliseconds to free per txg */

				static uint_t zfs_free_min_time_ms = 1000;

				static uint_t zfs_free_min_time_ms = 500;

				/* minimum milliseconds to resilver per txg */

				static uint_t zfs_resilver_min_time_ms = 3000;

				static uint_t zfs_resilver_min_time_ms = 1500;

				static uint_t zfs_scan_checkpoint_intval = 7200; /* in seconds */

				int zfs_scan_suspend_progress = 0; /* set to prevent scans from progressing */

				@@ -208,7 +208,13 @@ static const ddt_class_t zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;

				/* max number of blocks to free in a single TXG */

				static uint64_t zfs_async_block_max_blocks = UINT64_MAX;

				/* max number of dedup blocks to free in a single TXG */

				static uint64_t zfs_max_async_dedup_frees = 100000;

				static uint64_t zfs_max_async_dedup_frees = 250000;

				/*

				 * After freeing this many async ZIOs (dedup, clone, gang blocks), wait for

				 * them to complete before continuing.  This prevents unbounded I/O queueing.

				 */

				static uint64_t zfs_async_free_zio_wait_interval = 2000;

				/* set to disable resilver deferring */

				static int zfs_resilver_disable_defer = B_FALSE;

				@@ -217,16 +223,14 @@ static int zfs_resilver_disable_defer = B_FALSE;

				static uint_t zfs_resilver_defer_percent = 10;

				/*

				 * We wait a few txgs after importing a pool to begin scanning so that

				 * the import / mounting code isn't held up by scrub / resilver IO.

				 * Unfortunately, it is a bit difficult to determine exactly how long

				 * this will take since userspace will trigger fs mounts asynchronously

				 * and the kernel will create zvol minors asynchronously. As a result,

				 * the value provided here is a bit arbitrary, but represents a

				 * reasonable estimate of how many txgs it will take to finish fully

				 * importing a pool

				 * Number of TXGs to wait after importing before starting background

				 * work (async destroys, scan/scrub/resilver operations). This allows

				 * the import command and filesystem mounts to complete quickly without

				 * being delayed by background activities. The value is somewhat arbitrary

				 * since userspace triggers filesystem mounts asynchronously, but 5 TXGs

				 * provides a reasonable window for import completion in most cases.

				 */

				#define	SCAN_IMPORT_WAIT_TXGS 		5

				static uint_t zfs_import_defer_txgs = 5;

				#define	DSL_SCAN_IS_SCRUB_RESILVER(scn) \

					((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \

				@@ -1665,7 +1669,7 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)

					 *  or

					 *  - the scan queue has reached its memory use limit

					 */

					uint64_t curr_time_ns = gethrtime();

					uint64_t curr_time_ns = getlrtime();

					uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;

					uint64_t sync_time_ns = curr_time_ns -

					    scn->scn_dp->dp_spa->spa_sync_starttime;

				@@ -1727,7 +1731,7 @@ dsl_error_scrub_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)

					 *  - the spa is shutting down because this pool is being exported

					 *    or the machine is rebooting.

					 */

					uint64_t curr_time_ns = gethrtime();

					uint64_t curr_time_ns = getlrtime();

					uint64_t error_scrub_time_ns = curr_time_ns - scn->scn_sync_start_time;

					uint64_t sync_time_ns = curr_time_ns -

					    scn->scn_dp->dp_spa->spa_sync_starttime;

				@@ -3239,7 +3243,7 @@ static boolean_t

				scan_io_queue_check_suspend(dsl_scan_t *scn)

				{

					/* See comment in dsl_scan_check_suspend() */

					uint64_t curr_time_ns = gethrtime();

					uint64_t curr_time_ns = getlrtime();

					uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;

					uint64_t sync_time_ns = curr_time_ns -

					    scn->scn_dp->dp_spa->spa_sync_starttime;

				@@ -3592,12 +3596,12 @@ dsl_scan_async_block_should_pause(dsl_scan_t *scn)

					}

					if (zfs_max_async_dedup_frees != 0 &&

					    scn->scn_dedup_frees_this_txg >= zfs_max_async_dedup_frees) {

					    scn->scn_async_frees_this_txg >= zfs_max_async_dedup_frees) {

						return (B_TRUE);

					}

					elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;

					return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||

					elapsed_nanosecs = getlrtime() - scn->scn_sync_start_time;

					return (elapsed_nanosecs / (NANOSEC / 2) > zfs_txg_timeout ||

					    (NSEC2MSEC(elapsed_nanosecs) > scn->scn_async_block_min_time_ms &&

					    txg_sync_waiting(scn->scn_dp)) ||

					    spa_shutting_down(scn->scn_dp->dp_spa));

				@@ -3614,14 +3618,32 @@ dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)

							return (SET_ERROR(ERESTART));

					}

					zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,

					    dmu_tx_get_txg(tx), bp, 0));

					zio_t *zio = zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,

					    dmu_tx_get_txg(tx), bp, 0);

					dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD,

					    -bp_get_dsize_sync(scn->scn_dp->dp_spa, bp),

					    -BP_GET_PSIZE(bp), -BP_GET_UCSIZE(bp), tx);

					scn->scn_visited_this_txg++;

					if (BP_GET_DEDUP(bp))

						scn->scn_dedup_frees_this_txg++;

					if (zio != NULL) {

						/*

						 * zio_free_sync() returned a ZIO, meaning this is an

						 * async I/O (dedup, clone or gang block).

						 */

						scn->scn_async_frees_this_txg++;

						zio_nowait(zio);

						/*

						 * After issuing N async ZIOs, wait for them to complete.

						 * This makes time limits work with actual I/O completion

						 * times, not just queuing times.

						 */

						uint64_t i = zfs_async_free_zio_wait_interval;

						if (i != 0 && (scn->scn_async_frees_this_txg % i) == 0) {

							VERIFY0(zio_wait(scn->scn_zio_root));

							scn->scn_zio_root = zio_root(scn->scn_dp->dp_spa, NULL,

							    NULL, ZIO_FLAG_MUSTSUCCEED);

						}

					}

					return (0);

				}

				@@ -3865,10 +3887,10 @@ dsl_process_async_destroys(dsl_pool_t *dp, dmu_tx_t *tx)

						    "free_bpobj/bptree on %s in txg %llu; err=%u",

						    (longlong_t)scn->scn_visited_this_txg,

						    (longlong_t)

						    NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),

						    NSEC2MSEC(getlrtime() - scn->scn_sync_start_time),

						    spa->spa_name, (longlong_t)tx->tx_txg, err);

						scn->scn_visited_this_txg = 0;

						scn->scn_dedup_frees_this_txg = 0;

						scn->scn_async_frees_this_txg = 0;

						/*

						 * Write out changes to the DDT and the BRT that may be required

				@@ -4196,14 +4218,14 @@ dsl_errorscrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)

					}

					spa->spa_scrub_active = B_TRUE;

					scn->scn_sync_start_time = gethrtime();

					scn->scn_sync_start_time = getlrtime();

					/*

					 * zfs_scan_suspend_progress can be set to disable scrub progress.

					 * See more detailed comment in dsl_scan_sync().

					 */

					if (zfs_scan_suspend_progress) {

						uint64_t scan_time_ns = gethrtime() - scn->scn_sync_start_time;

						uint64_t scan_time_ns = getlrtime() - scn->scn_sync_start_time;

						int mintime = zfs_scrub_min_time_ms;

						while (zfs_scan_suspend_progress &&

				@@ -4211,7 +4233,7 @@ dsl_errorscrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)

						    !spa_shutting_down(scn->scn_dp->dp_spa) &&

						    NSEC2MSEC(scan_time_ns) < mintime) {

							delay(hz);

							scan_time_ns = gethrtime() - scn->scn_sync_start_time;

							scan_time_ns = getlrtime() - scn->scn_sync_start_time;

						}

						return;

					}

				@@ -4394,6 +4416,14 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)

					if (spa_shutting_down(spa))

						return;

					/*

					 * Wait a few txgs after importing before doing background work

					 * (async destroys and scanning).  This should help the import

					 * command to complete quickly.

					 */

					if (spa->spa_syncing_txg < spa->spa_first_txg + zfs_import_defer_txgs)

						return;

					/*

					 * If the scan is inactive due to a stalled async destroy, try again.

					 */

				@@ -4402,7 +4432,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)

					/* reset scan statistics */

					scn->scn_visited_this_txg = 0;

					scn->scn_dedup_frees_this_txg = 0;

					scn->scn_async_frees_this_txg = 0;

					scn->scn_holes_this_txg = 0;

					scn->scn_lt_min_this_txg = 0;

					scn->scn_gt_max_this_txg = 0;

				@@ -4413,7 +4443,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)

					scn->scn_avg_zio_size_this_txg = 0;

					scn->scn_zios_this_txg = 0;

					scn->scn_suspending = B_FALSE;

					scn->scn_sync_start_time = gethrtime();

					scn->scn_sync_start_time = getlrtime();

					spa->spa_scrub_active = B_TRUE;

					/*

				@@ -4430,13 +4460,6 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)

					if (!dsl_scan_is_running(scn) || dsl_scan_is_paused_scrub(scn))

						return;

					/*

					 * Wait a few txgs after importing to begin scanning so that

					 * we can get the pool imported quickly.

					 */

					if (spa->spa_syncing_txg < spa->spa_first_txg + SCAN_IMPORT_WAIT_TXGS)

						return;

					/*

					 * zfs_scan_suspend_progress can be set to disable scan progress.

					 * We don't want to spin the txg_sync thread, so we add a delay

				@@ -4444,7 +4467,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)

					 * useful for testing and debugging.

					 */

					if (zfs_scan_suspend_progress) {

						uint64_t scan_time_ns = gethrtime() - scn->scn_sync_start_time;

						uint64_t scan_time_ns = getlrtime() - scn->scn_sync_start_time;

						uint_t mintime = (scn->scn_phys.scn_func ==

						    POOL_SCAN_RESILVER) ? zfs_resilver_min_time_ms :

						    zfs_scrub_min_time_ms;

				@@ -4454,7 +4477,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)

						    !spa_shutting_down(scn->scn_dp->dp_spa) &&

						    NSEC2MSEC(scan_time_ns) < mintime) {

							delay(hz);

							scan_time_ns = gethrtime() - scn->scn_sync_start_time;

							scan_time_ns = getlrtime() - scn->scn_sync_start_time;

						}

						return;

					}

				@@ -4584,7 +4607,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)

						    "%llu in ddt, %llu > maxtxg)",

						    (longlong_t)scn->scn_visited_this_txg,

						    spa->spa_name,

						    (longlong_t)NSEC2MSEC(gethrtime() -

						    (longlong_t)NSEC2MSEC(getlrtime() -

						    scn->scn_sync_start_time),

						    (longlong_t)scn->scn_objsets_visited_this_txg,

						    (longlong_t)scn->scn_holes_this_txg,

				@@ -4625,7 +4648,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)

						    (longlong_t)scn->scn_zios_this_txg,

						    spa->spa_name,

						    (longlong_t)scn->scn_segs_this_txg,

						    (longlong_t)NSEC2MSEC(gethrtime() -

						    (longlong_t)NSEC2MSEC(getlrtime() -

						    scn->scn_sync_start_time),

						    (longlong_t)scn->scn_avg_zio_size_this_txg,

						    (longlong_t)scn->scn_avg_seg_size_this_txg);

				@@ -5319,7 +5342,10 @@ ZFS_MODULE_PARAM(zfs, zfs_, async_block_max_blocks, U64, ZMOD_RW,

					"Max number of blocks freed in one txg");

				ZFS_MODULE_PARAM(zfs, zfs_, max_async_dedup_frees, U64, ZMOD_RW,

					"Max number of dedup blocks freed in one txg");

					"Max number of dedup, clone or gang blocks freed in one txg");

				ZFS_MODULE_PARAM(zfs, zfs_, async_free_zio_wait_interval, U64, ZMOD_RW,

					"Wait for pending free I/Os after issuing this many asynchronously");

				ZFS_MODULE_PARAM(zfs, zfs_, free_bpobj_enabled, INT, ZMOD_RW,

					"Enable processing of the free_bpobj");

				@@ -5336,6 +5362,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_issue_strategy, UINT, ZMOD_RW,

				ZFS_MODULE_PARAM(zfs, zfs_, scan_legacy, INT, ZMOD_RW,

					"Scrub using legacy non-sequential method");

				ZFS_MODULE_PARAM(zfs, zfs_, import_defer_txgs, UINT, ZMOD_RW,

					"Number of TXGs to defer background work after pool import");

				ZFS_MODULE_PARAM(zfs, zfs_, scan_checkpoint_intval, UINT, ZMOD_RW,

					"Scan progress on-disk checkpointing interval");

									
										sys/contrib/openzfs/module/zfs/spa.c
									
		+1
		-1
	
												View File
												
				@@ -10449,7 +10449,7 @@ spa_sync(spa_t *spa, uint64_t txg)

					dsl_pool_t *dp = spa->spa_dsl_pool;

					dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);

					spa->spa_sync_starttime = gethrtime();

					spa->spa_sync_starttime = getlrtime();

					taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid, B_TRUE);

					spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,

									
										sys/contrib/openzfs/module/zfs/spa_misc.c
									
		+1
		-1
	
												View File
												
				@@ -720,7 +720,7 @@ spa_deadman(void *arg)

						return;

					zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",

					    (gethrtime() - spa->spa_sync_starttime) / NANOSEC,

					    (getlrtime() - spa->spa_sync_starttime) / NANOSEC,

					    (u_longlong_t)++spa->spa_deadman_calls);

					if (zfs_deadman_enabled)

						vdev_deadman(spa->spa_root_vdev, FTAG);

									
										sys/contrib/openzfs/module/zfs/vdev_raidz.c
									
		-16
	
												View File
												
				@@ -2703,16 +2703,6 @@ vdev_raidz_io_start(zio_t *zio)

								next_offset = synced_offset;

							}

						}

						if (use_scratch) {

							zfs_dbgmsg("zio=%px %s io_offset=%llu offset_synced="

							    "%lld next_offset=%lld use_scratch=%u",

							    zio,

							    zio->io_type == ZIO_TYPE_WRITE ? "WRITE" : "READ",

							    (long long)zio->io_offset,

							    (long long)synced_offset,

							    (long long)next_offset,

							    use_scratch);

						}

						rm = vdev_raidz_map_alloc_expanded(zio,

						    tvd->vdev_ashift, vdrz->vd_physical_width,

				@@ -2851,8 +2841,6 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)

							continue;

						if (abd_cmp(orig[c], rc->rc_abd) != 0) {

							zfs_dbgmsg("found error on col=%u devidx=%u off %llx",

							    c, (int)rc->rc_devidx, (u_longlong_t)rc->rc_offset);

							vdev_raidz_checksum_error(zio, rc, orig[c]);

							rc->rc_error = SET_ERROR(ECKSUM);

							ret++;

				@@ -3175,10 +3163,6 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)

							 */

							ASSERT0(zio->io_flags & ZIO_FLAG_DIO_READ);

							zfs_dbgmsg("zio=%px repairing c=%u devidx=%u "

							    "offset=%llx",

							    zio, c, rc->rc_devidx, (long long)rc->rc_offset);

							zio_nowait(zio_vdev_child_io(zio, NULL, cvd,

							    rc->rc_offset, rc->rc_abd, rc->rc_size,

							    ZIO_TYPE_WRITE,

									
										sys/contrib/openzfs/module/zfs/zap.c
									
		+4
		-1
	
												View File
												
				@@ -878,7 +878,8 @@ fzap_check(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers)

				int

				fzap_lookup(zap_name_t *zn,

				    uint64_t integer_size, uint64_t num_integers, void *buf,

				    char *realname, int rn_len, boolean_t *ncp)

				    char *realname, int rn_len, boolean_t *ncp,

				    uint64_t *actual_num_integers)

				{

					zap_leaf_t *l;

					zap_entry_handle_t zeh;

				@@ -898,6 +899,8 @@ fzap_lookup(zap_name_t *zn,

						}

						err = zap_entry_read(&zeh, integer_size, num_integers, buf);

						if (err == 0 && actual_num_integers != NULL)

							*actual_num_integers = zeh.zeh_num_integers;

						(void) zap_entry_read_name(zn->zn_zap, &zeh, rn_len, realname);

						if (ncp) {

							*ncp = zap_entry_normalization_conflict(&zeh,

									
										sys/contrib/openzfs/module/zfs/zap_micro.c
									
		+70
		-10
	
												View File
												
				@@ -1049,6 +1049,24 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)

					return (err);

				}

				int

				zap_count_by_dnode(dnode_t *dn, uint64_t *count)

				{

					zap_t *zap;

					int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,

					    FTAG, &zap);

					if (err != 0)

						return (err);

					if (!zap->zap_ismicro) {

						err = fzap_count(zap, count);

					} else {

						*count = zap->zap_m.zap_num_entries;

					}

					zap_unlockdir(zap, FTAG);

					return (err);

				}

				/*

				 * zn may be NULL; if not specified, it will be computed if needed.

				 * See also the comment above zap_entry_normalization_conflict().

				@@ -1127,7 +1145,7 @@ zap_lookup_impl(zap_t *zap, const char *name,

					if (!zap->zap_ismicro) {

						err = fzap_lookup(zn, integer_size, num_integers, buf,

						    realname, rn_len, ncp);

						    realname, rn_len, ncp, NULL);

					} else {

						zfs_btree_index_t idx;

						mzap_ent_t *mze = mze_find(zn, &idx);

				@@ -1282,8 +1300,9 @@ zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints)

				}

				static int

				zap_lookup_uint64_impl(zap_t *zap, const uint64_t *key,

				    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)

				zap_lookup_length_uint64_impl(zap_t *zap, const uint64_t *key,

				    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,

				    uint64_t *actual_num_integers)

				{

					zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);

					if (zn == NULL) {

				@@ -1292,7 +1311,7 @@ zap_lookup_uint64_impl(zap_t *zap, const uint64_t *key,

					}

					int err = fzap_lookup(zn, integer_size, num_integers, buf,

					    NULL, 0, NULL);

					    NULL, 0, NULL, actual_num_integers);

					zap_name_free(zn);

					zap_unlockdir(zap, FTAG);

					return (err);

				@@ -1308,9 +1327,9 @@ zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,

					    zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);

					if (err != 0)

						return (err);

					err = zap_lookup_uint64_impl(zap, key, key_numints, integer_size,

					    num_integers, buf);

					/* zap_lookup_uint64_impl() calls zap_unlockdir() */

					err = zap_lookup_length_uint64_impl(zap, key, key_numints,

					    integer_size, num_integers, buf, NULL);

					/* zap_lookup_length_uint64_impl() calls zap_unlockdir() */

					return (err);

				}

				@@ -1324,9 +1343,26 @@ zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

					    zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);

					if (err != 0)

						return (err);

					err = zap_lookup_uint64_impl(zap, key, key_numints, integer_size,

					    num_integers, buf);

					/* zap_lookup_uint64_impl() calls zap_unlockdir() */

					err = zap_lookup_length_uint64_impl(zap, key, key_numints,

					    integer_size, num_integers, buf, NULL);

					/* zap_lookup_length_uint64_impl() calls zap_unlockdir() */

					return (err);

				}

				int

				zap_lookup_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				    int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,

				    uint64_t *actual_num_integers)

				{

					zap_t *zap;

					int err =

					    zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);

					if (err != 0)

						return (err);

					err = zap_lookup_length_uint64_impl(zap, key, key_numints,

					    integer_size, num_integers, buf, actual_num_integers);

					/* zap_lookup_length_uint64_impl() calls zap_unlockdir() */

					return (err);

				}

				@@ -1395,6 +1431,27 @@ zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,

					return (err);

				}

				int

				zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,

				    int key_numints, uint64_t *integer_size, uint64_t *num_integers)

				{

					zap_t *zap;

					int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,

					    FTAG, &zap);

					if (err != 0)

						return (err);

					zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);

					if (zn == NULL) {

						zap_unlockdir(zap, FTAG);

						return (SET_ERROR(ENOTSUP));

					}

					err = fzap_length(zn, integer_size, num_integers);

					zap_name_free(zn);

					zap_unlockdir(zap, FTAG);

					return (err);

				}

				static void

				mzap_addent(zap_name_t *zn, uint64_t value)

				{

				@@ -2003,6 +2060,7 @@ EXPORT_SYMBOL(zap_lookup);

				EXPORT_SYMBOL(zap_lookup_by_dnode);

				EXPORT_SYMBOL(zap_lookup_norm);

				EXPORT_SYMBOL(zap_lookup_uint64);

				EXPORT_SYMBOL(zap_lookup_length_uint64_by_dnode);

				EXPORT_SYMBOL(zap_contains);

				EXPORT_SYMBOL(zap_prefetch);

				EXPORT_SYMBOL(zap_prefetch_uint64);

				@@ -2016,12 +2074,14 @@ EXPORT_SYMBOL(zap_update_uint64);

				EXPORT_SYMBOL(zap_update_uint64_by_dnode);

				EXPORT_SYMBOL(zap_length);

				EXPORT_SYMBOL(zap_length_uint64);

				EXPORT_SYMBOL(zap_length_uint64_by_dnode);

				EXPORT_SYMBOL(zap_remove);

				EXPORT_SYMBOL(zap_remove_by_dnode);

				EXPORT_SYMBOL(zap_remove_norm);

				EXPORT_SYMBOL(zap_remove_uint64);

				EXPORT_SYMBOL(zap_remove_uint64_by_dnode);

				EXPORT_SYMBOL(zap_count);

				EXPORT_SYMBOL(zap_count_by_dnode);

				EXPORT_SYMBOL(zap_value_search);

				EXPORT_SYMBOL(zap_join);

				EXPORT_SYMBOL(zap_join_increment);

									
										sys/contrib/openzfs/module/zfs/zfs_quota.c
									
		+4
		-4
	
												View File
												
				@@ -433,13 +433,13 @@ zfs_id_overobjquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)

					} else {

						return (B_FALSE);

					}

					if (quotaobj == 0 && default_quota == 0)

						return (B_FALSE);

					if (zfsvfs->z_replay)

						return (B_FALSE);

					(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)id);

					if (quotaobj == 0) {

						if (default_quota == 0)

							return (B_FALSE);

						quota = default_quota;

					} else {

						err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);

				@@ -484,13 +484,13 @@ zfs_id_overblockquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)

					} else {

						return (B_FALSE);

					}

					if (quotaobj == 0 && default_quota == 0)

						return (B_FALSE);

					if (zfsvfs->z_replay)

						return (B_FALSE);

					(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)id);

					if (quotaobj == 0) {

						if (default_quota == 0)

							return (B_FALSE);

						quota = default_quota;

					} else {

						err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);

									
										sys/contrib/openzfs/module/zfs/zio.c
									
		+18
		-5
	
												View File
												
				@@ -4067,19 +4067,21 @@ zio_ddt_write(zio_t *zio)

					/*

					 * We need to write. We will create a new write with the copies

					 * property adjusted to match the number of DVAs we need to need to

					 * grow the DDT entry by to satisfy the request.

					 * property adjusted to match the number of DVAs we need to grow

					 * the DDT entry by to satisfy the request.

					 */

					zio_prop_t czp = *zp;

					zio_prop_t czp;

					if (have_dvas > 0 || parent_dvas > 0) {

						czp = *zp;

						czp.zp_copies = need_dvas;

						czp.zp_gang_copies = 0;

						zp = &czp;

					} else {

						ASSERT3U(czp.zp_copies, ==, need_dvas);

						ASSERT3U(zp->zp_copies, ==, need_dvas);

					}

					zio_t *cio = zio_write(zio, spa, txg, bp, zio->io_orig_abd,

					    zio->io_orig_size, zio->io_orig_size, &czp,

					    zio->io_orig_size, zio->io_orig_size, zp,

					    zio_ddt_child_write_ready, NULL,

					    zio_ddt_child_write_done, dde, zio->io_priority,

					    ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);

				@@ -4157,6 +4159,17 @@ zio_ddt_free(zio_t *zio)

						ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);

						if (v != DDT_PHYS_NONE)

							ddt_phys_decref(dde->dde_phys, v);

						else

							/*

							 * If the entry was found but the phys was not, then

							 * this block must have been pruned from the dedup

							 * table, and the entry refers to a later version of

							 * this data. Therefore, the caller is trying to delete

							 * the only stored instance of this block, and so we

							 * need to do a normal (not dedup) free. Clear dde so

							 * we fall into the block below.

							 */

							dde = NULL;

					}

					ddt_exit(ddt);

									
										sys/contrib/openzfs/scripts/commitcheck.sh
									
		+3
		-1
	
												View File
												
				@@ -7,7 +7,9 @@ REF="HEAD"

				test_commit_bodylength()

				{

				    length="72"

				    body=$(git log --no-show-signature -n 1 --pretty=%b "$REF" | grep -Ev "http(s)*://" | grep -E -m 1 ".{$((length + 1))}")

				    body=$(git log --no-show-signature -n 1 --pretty=%b "$REF" |

				        grep -Evi -e "http(s)*://" -e "signed-off-by:" -e "reviewed-by:" |

				        grep -E -m 1 ".{$((length + 1))}")

				    if [ -n "$body" ]; then

				        echo "error: commit message body contains line over ${length} characters"

				        return 1

									
										sys/contrib/openzfs/scripts/paxcheck.sh
									
		+2
		-2
	
												View File
												
				@@ -10,7 +10,7 @@ RET=0

				# check for exec stacks

				OUT=$(scanelf -qyRAF '%e %p' "$1")

				if [ x"${OUT}" != x ]; then

				if [ "${OUT}" != "" ]; then

				    RET=2

				    echo "The following files contain writable and executable sections"

				    echo " Files with such sections will not work properly (or at all!) on some"

				@@ -26,7 +26,7 @@ fi

				# check for TEXTRELS

				OUT=$(scanelf -qyRAF '%T %p' "$1")

				if [ x"${OUT}" != x ]; then

				if [ "${OUT}" != "" ]; then

				    RET=2

				    echo "The following files contain runtime text relocations"

				    echo " Text relocations force the dynamic linker to perform extra"

sys/contrib/openzfs/tests/runfiles/common.run

+3 -2

View File

@@ -706,7 +706,8 @@ tags = ['functional', 'deadman']
 [tests/functional/dedup]
 tests = ['dedup_fdt_create', 'dedup_fdt_import', 'dedup_fdt_pacing',
     'dedup_legacy_create', 'dedup_legacy_import', 'dedup_legacy_fdt_upgrade',
     'dedup_legacy_fdt_mixed', 'dedup_quota', 'dedup_prune', 'dedup_zap_shrink']
     'dedup_legacy_fdt_mixed', 'dedup_quota', 'dedup_prune', 'dedup_prune_leak',
     'dedup_zap_shrink']
 pre =
 post =
 tags = ['functional', 'dedup']
@@ -1019,7 +1020,7 @@ tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
     'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos',
     'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos',
     'snapshot_012_pos', 'snapshot_013_pos', 'snapshot_014_pos',
     'snapshot_017_pos', 'snapshot_018_pos']
     'snapshot_017_pos', 'snapshot_018_pos', 'snapshot_019_pos']
 tags = ['functional', 'snapshot']
 [tests/functional/snapused]

sys/contrib/openzfs/tests/runfiles/sanity.run

+1 -1

View File

@@ -580,7 +580,7 @@ tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
     'snapshot_007_pos', 'snapshot_008_pos', 'snapshot_009_pos',
     'snapshot_010_pos', 'snapshot_011_pos', 'snapshot_012_pos',
     'snapshot_013_pos', 'snapshot_014_pos', 'snapshot_017_pos',
     'snapshot_018_pos']
     'snapshot_018_pos', 'snapshot_019_pos']
 tags = ['functional', 'snapshot']
 [tests/functional/snapused]

									
										sys/contrib/openzfs/tests/zfs-tests/cmd/clone_mmap_cached.c
									
		+4
		-3
	
												View File
												
				@@ -36,12 +36,13 @@

				#include <string.h>

				#include <unistd.h>

				#ifdef __FreeBSD__

				#define	loff_t	off_t

				#if defined(_GNU_SOURCE) && defined(__linux__)

				_Static_assert(sizeof (loff_t) == sizeof (off_t),

					"loff_t and off_t must be the same size");

				#endif

				ssize_t

				copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int)

				copy_file_range(int, off_t *, int, off_t *, size_t, unsigned int)

				    __attribute__((weak));

				static void *

									
										sys/contrib/openzfs/tests/zfs-tests/cmd/clone_mmap_write.c
									
		+4
		-3
	
												View File
												
				@@ -42,12 +42,13 @@

				#include <sys/stat.h>

				#include <sys/mman.h>

				#ifdef __FreeBSD__

				#define	loff_t	off_t

				#if defined(_GNU_SOURCE) && defined(__linux__)

				_Static_assert(sizeof (loff_t) == sizeof (off_t),

					"loff_t and off_t must be the same size");

				#endif

				ssize_t

				copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int)

				copy_file_range(int, off_t *, int, off_t *, size_t, unsigned int)

				    __attribute__((weak));

				static int

									
										sys/contrib/openzfs/tests/zfs-tests/cmd/clonefile.c
									
		+12
		-11
	
												View File
												
				@@ -59,16 +59,17 @@

				#endif

				#endif /* __NR_copy_file_range */

				#ifdef __FreeBSD__

				#define	loff_t	off_t

				#if defined(_GNU_SOURCE) && defined(__linux__)

				_Static_assert(sizeof (loff_t) == sizeof (off_t),

					"loff_t and off_t must be the same size");

				#endif

				ssize_t

				copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int)

				copy_file_range(int, off_t *, int, off_t *, size_t, unsigned int)

				    __attribute__((weak));

				static inline ssize_t

				cf_copy_file_range(int sfd, loff_t *soff, int dfd, loff_t *doff,

				cf_copy_file_range(int sfd, off_t *soff, int dfd, off_t *doff,

				    size_t len, unsigned int flags)

				{

					if (copy_file_range)

				@@ -151,9 +152,9 @@ usage(void)

				}

				int do_clone(int sfd, int dfd);

				int do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len);

				int do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len);

				int do_deduperange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len);

				int do_clonerange(int sfd, int dfd, off_t soff, off_t doff, size_t len);

				int do_copyfilerange(int sfd, int dfd, off_t soff, off_t doff, size_t len);

				int do_deduperange(int sfd, int dfd, off_t soff, off_t doff, size_t len);

				int quiet = 0;

				@@ -203,7 +204,7 @@ main(int argc, char **argv)

							abort();

					}

					loff_t soff = 0, doff = 0;

					off_t soff = 0, doff = 0;

					size_t len = SSIZE_MAX;

					unsigned long long len2;

					if ((argc-optind) == 5) {

				@@ -295,7 +296,7 @@ do_clone(int sfd, int dfd)

				}

				int

				do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)

				do_clonerange(int sfd, int dfd, off_t soff, off_t doff, size_t len)

				{

					if (!quiet)

						fprintf(stderr, "using FICLONERANGE\n");

				@@ -314,7 +315,7 @@ do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)

				}

				int

				do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)

				do_copyfilerange(int sfd, int dfd, off_t soff, off_t doff, size_t len)

				{

					if (!quiet)

						fprintf(stderr, "using copy_file_range\n");

				@@ -341,7 +342,7 @@ do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)

				}

				int

				do_deduperange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)

				do_deduperange(int sfd, int dfd, off_t soff, off_t doff, size_t len)

				{

					if (!quiet)

						fprintf(stderr, "using FIDEDUPERANGE\n");

sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib

-2

View File

@@ -3861,8 +3861,6 @@ function directory_diff # dir_a dir_b
 	# do not match there is a "c" entry in one of the columns).
 	if rsync --version | grep -q "[, ] crtimes"; then
 		args+=("--crtimes")
 	else
 		log_note "This rsync package does not support --crtimes (-N)."
 	fi
 	# If we are testing a ZIL replay, we need to ignore timestamp changes.

									
										sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
									
		+2
		
												View File
												
				@@ -1482,6 +1482,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \

					functional/dedup/dedup_legacy_fdt_upgrade.ksh \

					functional/dedup/dedup_legacy_fdt_mixed.ksh \

					functional/dedup/dedup_prune.ksh \

					functional/dedup/dedup_prune_leak.ksh \

					functional/dedup/dedup_quota.ksh \

					functional/dedup/dedup_zap_shrink.ksh \

					functional/delegate/cleanup.ksh \

				@@ -2121,6 +2122,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \

					functional/snapshot/snapshot_016_pos.ksh \

					functional/snapshot/snapshot_017_pos.ksh \

					functional/snapshot/snapshot_018_pos.ksh \

					functional/snapshot/snapshot_019_pos.ksh \

					functional/snapused/cleanup.ksh \

					functional/snapused/setup.ksh \

					functional/snapused/snapused_001_pos.ksh \

									
										sys/contrib/openzfs/tests/zfs-tests/tests/functional/dedup/dedup_prune_leak.ksh
									
Executable

		+86
		
												View File
												
				@@ -0,0 +1,86 @@

				#!/bin/ksh -p

				# SPDX-License-Identifier: CDDL-1.0

				# CDDL HEADER START

				#

				# The contents of this file are subject to the terms of the

				# Common Development and Distribution License (the "License").

				# You may not use this file except in compliance with the License.

				#

				# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

				# or https://opensource.org/licenses/CDDL-1.0.

				# See the License for the specific language governing permissions

				# and limitations under the License.

				#

				# When distributing Covered Code, include this CDDL HEADER in each

				# file and include the License file at usr/src/OPENSOLARIS.LICENSE.

				# If applicable, add the following below this CDDL HEADER, with the

				# fields enclosed by brackets "[]" replaced with your own identifying

				# information: Portions Copyright [yyyy] [name of copyright owner]

				#

				# CDDL HEADER END

				#

				#

				# Copyright (c) 2025, Klara Inc.

				# Copyright (c) 2025, Nutanix Inc.

				#

				# DESCRIPTION:

				#	Verify that zpool ddtprune successfully reduces the number of entries

				#	in the DDT.

				#

				# STRATEGY:

				#	1. Create a pool with dedup=on

				#	2. Add non-duplicate entries to the DDT

				#	3. ddtprune all entries

				#	4. Remove the file

				#	5. Verify there's no space leak

				#

				. $STF_SUITE/include/libtest.shlib

				. $STF_SUITE/tests/functional/events/events_common.kshlib

				verify_runnable "both"

				log_assert "Verify DDT pruning does not cause space leak"

				# We set the dedup log txg interval to 1, to get a log flush every txg,

				# effectively disabling the log. Without this it's hard to predict when

				# entries appear in the DDT ZAP

				log_must save_tunable DEDUP_LOG_TXG_MAX

				log_must set_tunable32 DEDUP_LOG_TXG_MAX 1

				log_must save_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN

				log_must set_tunable32 DEDUP_LOG_FLUSH_ENTRIES_MIN 100000

				function cleanup

				{

					if poolexists $TESTPOOL ; then

						destroy_pool $TESTPOOL

					fi

					log_must restore_tunable DEDUP_LOG_TXG_MAX

					log_must restore_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN

				}

				log_onexit cleanup

				log_must zpool create -f $TESTPOOL $DISKS

				log_must zfs create -o dedup=on $TESTPOOL/$TESTFS

				typeset mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS)

				log_must dd if=/dev/urandom of=$mountpoint/f1 bs=1M count=16

				# We seems to need some amount of txg sync here to make it more consistently

				# reproducible

				for i in $(seq 50); do

					zpool sync $TESTPOOL

				done

				log_must zpool ddtprune -p 100 $TESTPOOL

				log_must rm $mountpoint/f1

				sync_pool $TESTPOOL

				zdb_out=$(zdb -bcc $TESTPOOL)

				echo "$zdb_out"

				if echo "$zdb_out" | grep -q "leaked space"; then

					log_fail "DDT pruning causes space leak"

				fi

				log_pass "DDT pruning does not cause space leak"

									
										sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_019_pos.ksh
									
Executable

		+82
		
												View File
												
				@@ -0,0 +1,82 @@

				#!/bin/ksh -p

				# SPDX-License-Identifier: CDDL-1.0

				#

				# CDDL HEADER START

				#

				# The contents of this file are subject to the terms of the

				# Common Development and Distribution License (the "License").

				# You may not use this file except in compliance with the License.

				#

				# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

				# or https://opensource.org/licenses/CDDL-1.0.

				# See the License for the specific language governing permissions

				# and limitations under the License.

				#

				# When distributing Covered Code, include this CDDL HEADER in each

				# file and include the License file at usr/src/OPENSOLARIS.LICENSE.

				# If applicable, add the following below this CDDL HEADER, with the

				# fields enclosed by brackets "[]" replaced with your own identifying

				# information: Portions Copyright [yyyy] [name of copyright owner]

				#

				# CDDL HEADER END

				#

				#

				# Copyright 2025 iXsystems, Inc.

				#

				. $STF_SUITE/include/libtest.shlib

				. $STF_SUITE/tests/functional/snapshot/snapshot.cfg

				#

				# DESCRIPTION:

				# Verify that parallel snapshot automount operations don't cause AVL tree

				# panic due to duplicate mount attempts.

				#

				# STRATEGY:

				# 1. Create a filesystem with snapdir=visible

				# 2. Create a snapshot

				# 3. Trigger parallel ls operations on the snapshot directory

				# 4. Verify no kernel panic occurred and snapshot is accessible

				#

				function cleanup

				{

					destroy_pool $TESTPOOL

				}

				verify_runnable "both"

				log_assert "Verify parallel snapshot automount doesn't cause AVL tree panic"

				log_onexit cleanup

				# Create pool and filesystem

				create_pool $TESTPOOL $DISKS

				log_must zfs create -o snapdir=visible -o mountpoint=$TESTDIR $TESTPOOL/$TESTFS

				# Create a snapshot

				log_must zfs snapshot $SNAPFS

				# Trigger parallel automount operations to reproduce the race condition.

				# Multiple concurrent ls operations will attempt to automount the same

				# unmounted snapshot, which previously could cause duplicate mount helpers

				# and AVL tree panic.

				snapdir_path="$TESTDIR/.zfs/snapshot/$TESTSNAP"

				for i in {1..100}

				do

					ls $snapdir_path >/dev/null 2>&1 &

				done

				# Wait for all background processes to complete

				wait

				# Verify the snapshot is accessible and properly mounted after parallel access

				log_must ls $snapdir_path

				# Verify we can unmount the filesystem cleanly. This confirms no processes

				# are stuck in a syscall and all automated snapshots were unmounted properly.

				# If the AVL panic occurred, unmount would fail.

				log_must zfs unmount $TESTPOOL/$TESTFS

				log_pass "Parallel snapshot automount completed without AVL tree panic"

									
										sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
									
		+5
		-13
	
												View File
												
				@@ -73,7 +73,6 @@ function do_test {

					block_device_wait $zvolpath

					# Write using sync (creates FLUSH calls after writes, but not FUA)

					old_vdev_writes=$(get_sync $DISK1)

					old_log_writes=$(get_sync $datafile3)

					log_must fio --name=write_iops --size=5M \

				@@ -81,20 +80,13 @@ function do_test {

						--iodepth=1 --rw=randwrite --group_reporting=1 \

						--filename=$zvolpath --sync=1

					vdev_writes=$(( $(get_sync $DISK1) - $old_vdev_writes))

					log_writes=$(( $(get_sync $datafile3) - $old_log_writes))

					# When we're doing sync writes, we should see many more writes go to

					# the log vs the first vdev.  Experiments show anywhere from a 160-320x

					# ratio of writes to the log vs the first vdev (due to some straggler

					# writes to the first vdev).

					#

					# Check that we have a large ratio (100x) of sync writes going to the

					# log device

					ratio=$(($log_writes / $vdev_writes))

					log_note "Got $log_writes log writes, $vdev_writes vdev writes."

					if [ $ratio -lt 100 ] ; then

						log_fail "Expected > 100x more log writes than vdev writes. "

					# When doing sync writes, we should see at least one SLOG write per

					# block (5MB / 4KB) == 1280.

					log_note "Got $log_writes log writes."

					if [ $log_writes -lt 1280 ] ; then

						log_fail "Expected >= 1280 log writes. "

					fi

					# Create a data file

									
										sys/modules/zfs/zfs_config.h
									
		+21
		-3
	
												View File
												
				@@ -433,6 +433,9 @@

				/* iter_is_ubuf() is available */

				/* #undef HAVE_ITER_IS_UBUF */

				/* kasan_enabled() is GPL-only */

				/* #undef HAVE_KASAN_ENABLED_GPL_ONLY */

				/* kernel has kernel_fpu_* functions */

				/* #undef HAVE_KERNEL_FPU */

				@@ -826,6 +829,9 @@

				/* make_request_fn() return type */

				/* #undef MAKE_REQUEST_FN_RET */

				/* The size of 'off_t', as computed by sizeof. */

				/* #undef SIZEOF_OFF_T */

				/* using complete_and_exit() instead */

				/* #undef SPL_KTHREAD_COMPLETE_AND_EXIT */

				@@ -856,7 +862,7 @@

				/* #undef ZFS_DEVICE_MINOR */

				/* Define the project alias string. */

				#define ZFS_META_ALIAS "zfs-2.4.99-248-FreeBSD_g89f729dcc"

				#define ZFS_META_ALIAS "zfs-2.4.99-292-FreeBSD_g962e68865"

				/* Define the project author. */

				#define ZFS_META_AUTHOR "OpenZFS"

				@@ -865,7 +871,7 @@

				/* #undef ZFS_META_DATA */

				/* Define the maximum compatible kernel version. */

				#define ZFS_META_KVER_MAX "6.17"

				#define ZFS_META_KVER_MAX "6.18"

				/* Define the minimum compatible kernel version. */

				#define ZFS_META_KVER_MIN "4.18"

				@@ -886,10 +892,22 @@

				#define ZFS_META_NAME "zfs"

				/* Define the project release. */

				#define ZFS_META_RELEASE "248-FreeBSD_g89f729dcc"

				#define ZFS_META_RELEASE "292-FreeBSD_g962e68865"

				/* Define the project version. */

				#define ZFS_META_VERSION "2.4.99"

				/* count is located in percpu_ref.data */

				/* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */

				/* Number of bits in a file offset, on hosts where this is settable. */

				/* #undef _FILE_OFFSET_BITS */

				/* Define to 1 on platforms where this makes off_t a 64-bit type. */

				/* #undef _LARGE_FILES */

				/* Number of bits in time_t, on hosts where this is settable. */

				/* #undef _TIME_BITS */

				/* Define to 1 on platforms where this makes time_t a 64-bit type. */

				/* #undef __MINGW_USE_VC2005_COMPAT */

									
										sys/modules/zfs/zfs_gitrev.h
									
		+1
		-1
	
												View File
												
				@@ -1 +1 @@

				#define	ZFS_META_GITREV "zfs-2.4.99-220-ge63d026b9"

				#define	ZFS_META_GITREV "zfs-2.4.99-292-g962e68865"