zfs: merge openzfs/zfs@962e68865
Notable upstream pull request merges: #163071d43387ddzdb: Add -O option for -r to specify object-id #17965a62c62120ARC: Pre-convert zfs_arc_min_prefetch_ms #17970d393166c5ARC: Increase parallel eviction batching #1798120f09eae4ZIO: ZIO_STAGE_DDT_WRITE is a blocking stage #17983ff47dd35eFix ddtprune causing space leak #1801586b064469FreeBSD: Fix a potential null dereference in zfs_freebsd_fsync() (already merged) #18020ff47dd35eEnsure 64-bit `off_t` is used in user space instead of `loff_t` #1802809492e0f2Reduce dataset buffers re-dirtying #18033f72fd378cDefer async destroys on pool import #180433d76ba273Improve async destroy processing timing #1804446d6f1fe5DDT: Move logs searches out of the lock #18047ff5414406DDT: Switch to using ZAP _by_dnode() interfaces #180483b1ff816bDDT: Add/use zap_lookup_length_uint64_by_dnode() #1805522e89aca8DDT: Fix compressed entry buffer size #180590550abd4bRAIDZ: Remove some excessive logging #18060a83bb15fcReduce minimal scrub/resilver times #18061962e68865Use reduced precision for scan times #18063051a8c749Bypass snprintf() in quota checks if no quotas set #180647ff329ac2Fix rangelock test for growing block size Obtained from: OpenZFS OpenZFS commit:962e68865e
This commit is contained in:
@@ -13,6 +13,20 @@ set -eu
|
||||
# handle on what the timeout value should be.
|
||||
(while [ 1 ] ; do sleep 30 && echo "[watchdog: $(ps -eo cmd --sort=-pcpu | head -n 2 | tail -n 1)}')]"; done) &
|
||||
|
||||
# The default 'azure.archive.ubuntu.com' mirrors can be really slow.
|
||||
# Prioritize the official Ubuntu mirrors.
|
||||
#
|
||||
# The normal apt-mirrors.txt will look like:
|
||||
#
|
||||
# http://azure.archive.ubuntu.com/ubuntu/ priority:1
|
||||
# https://archive.ubuntu.com/ubuntu/ priority:2
|
||||
# https://security.ubuntu.com/ubuntu/ priority:3
|
||||
#
|
||||
# Just delete the 'azure.archive.ubuntu.com' line.
|
||||
sudo sed -i '/azure.archive.ubuntu.com/d' /etc/apt/apt-mirrors.txt
|
||||
echo "Using mirrors:"
|
||||
cat /etc/apt/apt-mirrors.txt
|
||||
|
||||
# install needed packages
|
||||
export DEBIAN_FRONTEND="noninteractive"
|
||||
sudo apt-get -y update
|
||||
|
||||
@@ -95,13 +95,6 @@ case "$OS" in
|
||||
KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz"
|
||||
NIC="rtl8139"
|
||||
;;
|
||||
freebsd14-2r)
|
||||
FreeBSD="14.2-RELEASE"
|
||||
OSNAME="FreeBSD $FreeBSD"
|
||||
OSv="freebsd14.0"
|
||||
URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz"
|
||||
KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz"
|
||||
;;
|
||||
freebsd14-3r)
|
||||
FreeBSD="14.3-RELEASE"
|
||||
OSNAME="FreeBSD $FreeBSD"
|
||||
|
||||
@@ -60,20 +60,16 @@ jobs:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Setup QEMU
|
||||
timeout-minutes: 10
|
||||
run: .github/workflows/scripts/qemu-1-setup.sh
|
||||
|
||||
- name: Start build machine
|
||||
timeout-minutes: 10
|
||||
run: .github/workflows/scripts/qemu-2-start.sh ${{ matrix.os }}
|
||||
|
||||
- name: Install dependencies
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
.github/workflows/scripts/qemu-3-deps.sh ${{ matrix.os }}
|
||||
|
||||
- name: Build modules or Test repo
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
set -e
|
||||
if [ "${{ github.event.inputs.test_type }}" == "Test repo" ] ; then
|
||||
@@ -94,7 +90,6 @@ jobs:
|
||||
|
||||
- name: Prepare artifacts
|
||||
if: always()
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
rsync -a zfs@vm0:/tmp/repo /tmp || true
|
||||
.github/workflows/scripts/replace-dupes-with-symlinks.sh /tmp/repo
|
||||
|
||||
+4
-4
@@ -46,7 +46,7 @@ jobs:
|
||||
os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "debian13", "fedora41", "fedora42", "fedora43", "ubuntu22", "ubuntu24"]'
|
||||
;;
|
||||
freebsd)
|
||||
os_selection='["freebsd13-5r", "freebsd14-2r", "freebsd14-3r", "freebsd13-5s", "freebsd14-3s", "freebsd15-0s", "freebsd16-0c"]'
|
||||
os_selection='["freebsd13-5r", "freebsd14-3r", "freebsd13-5s", "freebsd14-3s", "freebsd15-0s", "freebsd16-0c"]'
|
||||
;;
|
||||
*)
|
||||
# default list
|
||||
@@ -76,7 +76,7 @@ jobs:
|
||||
# debian: debian12, debian13, ubuntu22, ubuntu24
|
||||
# misc: archlinux, tumbleweed
|
||||
# FreeBSD variants of november 2025:
|
||||
# FreeBSD Release: freebsd13-5r, freebsd14-2r, freebsd14-3r
|
||||
# FreeBSD Release: freebsd13-5r, freebsd14-3r, freebsd15-0r
|
||||
# FreeBSD Stable: freebsd13-5s, freebsd14-3s, freebsd15-0s
|
||||
# FreeBSD Current: freebsd16-0c
|
||||
os: ${{ fromJson(needs.test-config.outputs.test_os) }}
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Setup QEMU
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
# Add a timestamp to each line to debug timeouts
|
||||
while IFS=$'\n' read -r line; do
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
run: .github/workflows/scripts/qemu-2-start.sh ${{ matrix.os }}
|
||||
|
||||
- name: Install dependencies
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 60
|
||||
run: .github/workflows/scripts/qemu-3-deps.sh ${{ matrix.os }} ${{ github.event.inputs.fedora_kernel_ver }}
|
||||
|
||||
- name: Build modules
|
||||
|
||||
@@ -6,5 +6,5 @@ Release: 1
|
||||
Release-Tags: relext
|
||||
License: CDDL
|
||||
Author: OpenZFS
|
||||
Linux-Maximum: 6.17
|
||||
Linux-Maximum: 6.18
|
||||
Linux-Minimum: 4.18
|
||||
|
||||
@@ -739,13 +739,14 @@ usage(void)
|
||||
"[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
|
||||
"\t%s -O [-K <key>] <dataset> <path>\n"
|
||||
"\t%s -r [-K <key>] <dataset> <path> <destination>\n"
|
||||
"\t%s -r [-K <key>] -O <dataset> <object-id> <destination>\n"
|
||||
"\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
|
||||
"\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
|
||||
"\t%s -E [-A] word0:word1:...:word15\n"
|
||||
"\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
|
||||
"<poolname>\n\n",
|
||||
cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
|
||||
cmdname, cmdname, cmdname, cmdname, cmdname);
|
||||
cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
|
||||
|
||||
(void) fprintf(stderr, " Dataset name must include at least one "
|
||||
"separator character '/' or '@'\n");
|
||||
@@ -9956,7 +9957,7 @@ main(int argc, char **argv)
|
||||
* which imports the pool to the namespace if it's
|
||||
* not in the cachefile.
|
||||
*/
|
||||
if (dump_opt['O']) {
|
||||
if (dump_opt['O'] && !dump_opt['r']) {
|
||||
if (argc != 2)
|
||||
usage();
|
||||
dump_opt['v'] = verbose + 3;
|
||||
@@ -9969,7 +9970,11 @@ main(int argc, char **argv)
|
||||
if (argc != 3)
|
||||
usage();
|
||||
dump_opt['v'] = verbose;
|
||||
error = dump_path(argv[0], argv[1], &object);
|
||||
if (dump_opt['O']) {
|
||||
object = strtoull(argv[1], NULL, 0);
|
||||
} else {
|
||||
error = dump_path(argv[0], argv[1], &object);
|
||||
}
|
||||
if (error != 0)
|
||||
fatal("internal error: %s", strerror(error));
|
||||
}
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
dnl #
|
||||
dnl # 6.18: some architectures and config option causes the kasan_ inline
|
||||
dnl # functions to reference the GPL-only symbol 'kasan_flag_enabled',
|
||||
dnl # breaking the build. Detect this and work
|
||||
dnl # around it.
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_KASAN_ENABLED], [
|
||||
ZFS_LINUX_TEST_SRC([kasan_enabled], [
|
||||
#include <linux/kasan.h>
|
||||
], [
|
||||
kasan_enabled();
|
||||
], [], [ZFS_META_LICENSE])
|
||||
])
|
||||
AC_DEFUN([ZFS_AC_KERNEL_KASAN_ENABLED], [
|
||||
AC_MSG_CHECKING([whether kasan_enabled() is GPL-only])
|
||||
ZFS_LINUX_TEST_RESULT([kasan_enabled_license], [
|
||||
AC_MSG_RESULT(no)
|
||||
], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_KASAN_ENABLED_GPL_ONLY, 1,
|
||||
[kasan_enabled() is GPL-only])
|
||||
])
|
||||
])
|
||||
|
||||
@@ -138,6 +138,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE
|
||||
ZFS_AC_KERNEL_SRC_NAMESPACE
|
||||
ZFS_AC_KERNEL_SRC_INODE_GENERIC_DROP
|
||||
ZFS_AC_KERNEL_SRC_KASAN_ENABLED
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
|
||||
@@ -260,6 +261,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||
ZFS_AC_KERNEL_SOPS_FREE_INODE
|
||||
ZFS_AC_KERNEL_NAMESPACE
|
||||
ZFS_AC_KERNEL_INODE_GENERIC_DROP
|
||||
ZFS_AC_KERNEL_KASAN_ENABLED
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_CPU_HAS_FEATURE
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
dnl #
|
||||
dnl # ZFS_AC_CONFIG_USER_LARGEFILE
|
||||
dnl #
|
||||
dnl # Ensure off_t is 64-bit for large file support in userspace.
|
||||
dnl # This is required for OpenZFS to handle files larger than 2GB.
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_CONFIG_USER_LARGEFILE], [
|
||||
AC_SYS_LARGEFILE
|
||||
AC_CHECK_SIZEOF([off_t])
|
||||
|
||||
AC_MSG_CHECKING([for 64-bit off_t])
|
||||
AS_IF([test "$ac_cv_sizeof_off_t" -ne 8], [
|
||||
AC_MSG_RESULT([no, $ac_cv_sizeof_off_t bytes])
|
||||
AC_MSG_FAILURE([
|
||||
*** OpenZFS userspace requires 64-bit off_t support for large files.
|
||||
*** Please ensure your system supports large file operations.
|
||||
*** Current off_t size: $ac_cv_sizeof_off_t bytes])
|
||||
], [
|
||||
AC_MSG_RESULT([yes, $ac_cv_sizeof_off_t bytes])
|
||||
])
|
||||
])
|
||||
@@ -3,6 +3,7 @@ dnl # Default ZFS user configuration
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_CONFIG_USER], [
|
||||
ZFS_AC_CONFIG_USER_GETTEXT
|
||||
ZFS_AC_CONFIG_USER_LARGEFILE
|
||||
ZFS_AC_CONFIG_USER_MOUNT_HELPER
|
||||
ZFS_AC_CONFIG_USER_SYSVINIT
|
||||
ZFS_AC_CONFIG_USER_DRACUT
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -60,6 +60,10 @@ ZPOOL_IMPORT_ALL_VISIBLE='no'
|
||||
# This is a space separated list.
|
||||
#ZFS_POOL_EXCEPTIONS="test2"
|
||||
|
||||
# Additional important (operating system) file systems to mount beside
|
||||
# the root file system.
|
||||
#ZFS_INITRD_ADDITIONAL_DATASETS="rpool/usr rpool/var rpool/var/spool"
|
||||
|
||||
# Should the datasets be mounted verbosely?
|
||||
# A mount counter will be used when mounting if set to 'yes'.
|
||||
VERBOSE_MOUNT='no'
|
||||
|
||||
@@ -26,13 +26,13 @@ fi
|
||||
# Of course the functions we need are called differently
|
||||
# on different distributions - it would be way too easy
|
||||
# otherwise!!
|
||||
if type log_failure_msg > /dev/null 2>&1 ; then
|
||||
if command -v log_failure_msg > /dev/null 2>&1 ; then
|
||||
# LSB functions - fall through
|
||||
zfs_log_begin_msg() { log_begin_msg "$1"; }
|
||||
zfs_log_end_msg() { log_end_msg "$1"; }
|
||||
zfs_log_failure_msg() { log_failure_msg "$1"; }
|
||||
zfs_log_progress_msg() { log_progress_msg "$1"; }
|
||||
elif type success > /dev/null 2>&1 ; then
|
||||
elif command -v success > /dev/null 2>&1 ; then
|
||||
# Fedora/RedHat functions
|
||||
zfs_set_ifs() {
|
||||
# For some reason, the init function library have a problem
|
||||
@@ -64,7 +64,7 @@ elif type success > /dev/null 2>&1 ; then
|
||||
zfs_set_ifs "$TMP_IFS"
|
||||
}
|
||||
zfs_log_progress_msg() { printf "%s" "$""$1"; }
|
||||
elif type einfo > /dev/null 2>&1 ; then
|
||||
elif command -v einfo > /dev/null 2>&1 ; then
|
||||
# Gentoo functions
|
||||
zfs_log_begin_msg() { ebegin "$1"; }
|
||||
zfs_log_end_msg() { eend "$1"; }
|
||||
@@ -109,7 +109,7 @@ fi
|
||||
# ----------------------------------------------------
|
||||
|
||||
export ZFS ZED ZPOOL ZPOOL_CACHE ZFS_LOAD_KEY ZFS_UNLOAD_KEY ZFS_MOUNT ZFS_UNMOUNT \
|
||||
ZFS_SHARE ZFS_UNSHARE
|
||||
ZFS_SHARE ZFS_UNSHARE ZFS_POOL_EXCEPTIONS ZFS_INITRD_ADDITIONAL_DATASETS
|
||||
|
||||
zfs_action()
|
||||
{
|
||||
@@ -140,7 +140,7 @@ zfs_daemon_start()
|
||||
local PIDFILE="$1"; shift
|
||||
local DAEMON_BIN="$1"; shift
|
||||
|
||||
if type start-stop-daemon > /dev/null 2>&1 ; then
|
||||
if command -v start-stop-daemon > /dev/null 2>&1 ; then
|
||||
# LSB functions
|
||||
start-stop-daemon --start --quiet --pidfile "$PIDFILE" \
|
||||
--exec "$DAEMON_BIN" --test > /dev/null || return 1
|
||||
@@ -157,7 +157,7 @@ zfs_daemon_start()
|
||||
then
|
||||
ln -sf "$PIDFILE" /run/sendsigs.omit.d/zed
|
||||
fi
|
||||
elif type daemon > /dev/null 2>&1 ; then
|
||||
elif command -v daemon > /dev/null 2>&1 ; then
|
||||
# Fedora/RedHat functions
|
||||
# shellcheck disable=SC2086
|
||||
daemon --pidfile "$PIDFILE" "$DAEMON_BIN" "$@"
|
||||
@@ -182,7 +182,7 @@ zfs_daemon_stop()
|
||||
local DAEMON_BIN="$2"
|
||||
local DAEMON_NAME="$3"
|
||||
|
||||
if type start-stop-daemon > /dev/null 2>&1 ; then
|
||||
if command -v start-stop-daemon > /dev/null 2>&1 ; then
|
||||
# LSB functions
|
||||
start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 \
|
||||
--pidfile "$PIDFILE" --name "$DAEMON_NAME"
|
||||
@@ -190,7 +190,7 @@ zfs_daemon_stop()
|
||||
[ "$ret" = 0 ] && rm -f "$PIDFILE"
|
||||
|
||||
return "$ret"
|
||||
elif type killproc > /dev/null 2>&1 ; then
|
||||
elif command -v killproc > /dev/null 2>&1 ; then
|
||||
# Fedora/RedHat functions
|
||||
killproc -p "$PIDFILE" "$DAEMON_NAME"
|
||||
ret="$?"
|
||||
@@ -212,11 +212,11 @@ zfs_daemon_status()
|
||||
local DAEMON_BIN="$2"
|
||||
local DAEMON_NAME="$3"
|
||||
|
||||
if type status_of_proc > /dev/null 2>&1 ; then
|
||||
if command -v status_of_proc > /dev/null 2>&1 ; then
|
||||
# LSB functions
|
||||
status_of_proc "$DAEMON_NAME" "$DAEMON_BIN"
|
||||
return $?
|
||||
elif type status > /dev/null 2>&1 ; then
|
||||
elif command -v status > /dev/null 2>&1 ; then
|
||||
# Fedora/RedHat functions
|
||||
status -p "$PIDFILE" "$DAEMON_NAME"
|
||||
return $?
|
||||
@@ -233,12 +233,12 @@ zfs_daemon_reload()
|
||||
local PIDFILE="$1"
|
||||
local DAEMON_NAME="$2"
|
||||
|
||||
if type start-stop-daemon > /dev/null 2>&1 ; then
|
||||
if command -v start-stop-daemon > /dev/null 2>&1 ; then
|
||||
# LSB functions
|
||||
start-stop-daemon --stop --signal 1 --quiet \
|
||||
--pidfile "$PIDFILE" --name "$DAEMON_NAME"
|
||||
return $?
|
||||
elif type killproc > /dev/null 2>&1 ; then
|
||||
elif command -v killproc > /dev/null 2>&1 ; then
|
||||
# Fedora/RedHat functions
|
||||
killproc -p "$PIDFILE" "$DAEMON_NAME" -HUP
|
||||
return $?
|
||||
|
||||
@@ -213,6 +213,7 @@ typedef enum {
|
||||
#define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */
|
||||
#define DDE_FLAG_OVERQUOTA (1 << 1) /* entry unusable, no space */
|
||||
#define DDE_FLAG_LOGGED (1 << 2) /* loaded from log */
|
||||
#define DDE_FLAG_FROM_FLUSHING (1 << 3) /* loaded from flushing log */
|
||||
|
||||
/*
|
||||
* Additional data to support entry update or repair. This is fixed size
|
||||
@@ -280,13 +281,14 @@ typedef struct {
|
||||
*/
|
||||
typedef struct {
|
||||
kmutex_t ddt_lock; /* protects changes to all fields */
|
||||
|
||||
avl_tree_t ddt_tree; /* "live" (changed) entries this txg */
|
||||
avl_tree_t ddt_log_tree; /* logged entries */
|
||||
|
||||
avl_tree_t ddt_repair_tree; /* entries being repaired */
|
||||
|
||||
ddt_log_t ddt_log[2]; /* active/flushing logs */
|
||||
/*
|
||||
* Log trees are stable during I/O, and only modified during sync
|
||||
* with exclusive access.
|
||||
*/
|
||||
ddt_log_t ddt_log[2] ____cacheline_aligned; /* logged entries */
|
||||
ddt_log_t *ddt_log_active; /* pointers into ddt_log */
|
||||
ddt_log_t *ddt_log_flushing; /* swapped when flush starts */
|
||||
|
||||
@@ -324,6 +326,7 @@ typedef struct {
|
||||
|
||||
/* per-type/per-class entry store objects */
|
||||
uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
|
||||
dnode_t *ddt_object_dnode[DDT_TYPES][DDT_CLASSES];
|
||||
|
||||
/* object ids for stored, logged and per-type/per-class stats */
|
||||
uint64_t ddt_stat_object;
|
||||
|
||||
@@ -69,8 +69,8 @@ extern "C" {
|
||||
* the live tree.
|
||||
*/
|
||||
typedef struct {
|
||||
ddt_key_t ddle_key; /* ddt_log_tree key */
|
||||
avl_node_t ddle_node; /* ddt_log_tree node */
|
||||
ddt_key_t ddle_key; /* ddl_tree key */
|
||||
avl_node_t ddle_node; /* ddl_tree node */
|
||||
|
||||
ddt_type_t ddle_type; /* storage type */
|
||||
ddt_class_t ddle_class; /* storage class */
|
||||
@@ -163,21 +163,18 @@ typedef struct {
|
||||
int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
|
||||
boolean_t prehash);
|
||||
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
|
||||
int (*ddt_op_lookup)(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk, void *phys, size_t psize);
|
||||
int (*ddt_op_contains)(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk);
|
||||
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk);
|
||||
void (*ddt_op_prefetch_all)(objset_t *os, uint64_t object);
|
||||
int (*ddt_op_update)(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk, const void *phys, size_t psize,
|
||||
int (*ddt_op_lookup)(dnode_t *dn, const ddt_key_t *ddk,
|
||||
void *phys, size_t psize);
|
||||
int (*ddt_op_contains)(dnode_t *dn, const ddt_key_t *ddk);
|
||||
void (*ddt_op_prefetch)(dnode_t *dn, const ddt_key_t *ddk);
|
||||
void (*ddt_op_prefetch_all)(dnode_t *dn);
|
||||
int (*ddt_op_update)(dnode_t *dn, const ddt_key_t *ddk,
|
||||
const void *phys, size_t psize, dmu_tx_t *tx);
|
||||
int (*ddt_op_remove)(dnode_t *dn, const ddt_key_t *ddk,
|
||||
dmu_tx_t *tx);
|
||||
int (*ddt_op_remove)(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk, dmu_tx_t *tx);
|
||||
int (*ddt_op_walk)(objset_t *os, uint64_t object, uint64_t *walk,
|
||||
ddt_key_t *ddk, void *phys, size_t psize);
|
||||
int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
|
||||
int (*ddt_op_walk)(dnode_t *dn, uint64_t *walk, ddt_key_t *ddk,
|
||||
void *phys, size_t psize);
|
||||
int (*ddt_op_count)(dnode_t *dn, uint64_t *count);
|
||||
} ddt_ops_t;
|
||||
|
||||
extern const ddt_ops_t ddt_zap_ops;
|
||||
@@ -193,7 +190,7 @@ extern boolean_t ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl,
|
||||
ddt_lightweight_entry_t *ddlwe);
|
||||
|
||||
extern boolean_t ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,
|
||||
ddt_lightweight_entry_t *ddlwe);
|
||||
ddt_lightweight_entry_t *ddlwe, boolean_t *from_flushing);
|
||||
extern boolean_t ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl,
|
||||
const ddt_key_t *ddk);
|
||||
|
||||
|
||||
@@ -157,7 +157,7 @@ typedef struct dsl_scan {
|
||||
|
||||
/* per txg statistics */
|
||||
uint64_t scn_visited_this_txg; /* total bps visited this txg */
|
||||
uint64_t scn_dedup_frees_this_txg; /* dedup bps freed this txg */
|
||||
uint64_t scn_async_frees_this_txg; /* async frees (dedup/clone/gang) */
|
||||
uint64_t scn_holes_this_txg;
|
||||
uint64_t scn_lt_min_this_txg;
|
||||
uint64_t scn_gt_max_this_txg;
|
||||
|
||||
@@ -226,6 +226,9 @@ int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_lookup_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
uint64_t *actual_num_integers);
|
||||
int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
|
||||
int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);
|
||||
int zap_prefetch_object(objset_t *os, uint64_t zapobj);
|
||||
@@ -288,6 +291,8 @@ int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
uint64_t *integer_size, uint64_t *num_integers);
|
||||
int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints, uint64_t *integer_size, uint64_t *num_integers);
|
||||
int zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints, uint64_t *integer_size, uint64_t *num_integers);
|
||||
|
||||
/*
|
||||
* Remove the specified attribute.
|
||||
@@ -309,6 +314,7 @@ int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
* object.
|
||||
*/
|
||||
int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
|
||||
int zap_count_by_dnode(dnode_t *dn, uint64_t *count);
|
||||
|
||||
/*
|
||||
* Returns (in name) the name of the entry whose (value & mask)
|
||||
|
||||
@@ -219,7 +219,8 @@ void fzap_byteswap(void *buf, size_t size);
|
||||
int fzap_count(zap_t *zap, uint64_t *count);
|
||||
int fzap_lookup(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
char *realname, int rn_len, boolean_t *normalization_conflictp);
|
||||
char *realname, int rn_len, boolean_t *normalization_conflictp,
|
||||
uint64_t *actual_num_integers);
|
||||
void fzap_prefetch(zap_name_t *zn);
|
||||
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, const void *tag, dmu_tx_t *tx);
|
||||
|
||||
@@ -25,6 +25,17 @@
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
/*
|
||||
* loff_t is a Linux kernel/VFS type. glibc and musl expose it to user
|
||||
* space via <fcntl.h>, but FreeBSD libc does not. For FreeBSD user
|
||||
* space we map loff_t to off_t so the shared interfaces that use the
|
||||
* loff_t name still compile. The FreeBSD kernel gets loff_t from its
|
||||
* own linux-compat headers.
|
||||
*/
|
||||
#if !defined(_KERNEL) && defined(__FreeBSD__)
|
||||
typedef off_t loff_t;
|
||||
#endif
|
||||
|
||||
#ifndef _KERNEL
|
||||
typedef struct zfs_file {
|
||||
int f_fd;
|
||||
|
||||
@@ -278,7 +278,8 @@ enum zio_stage {
|
||||
ZIO_VDEV_IO_STAGES)
|
||||
|
||||
#define ZIO_BLOCKING_STAGES \
|
||||
(ZIO_STAGE_DVA_ALLOCATE | \
|
||||
(ZIO_STAGE_DDT_WRITE | \
|
||||
ZIO_STAGE_DVA_ALLOCATE | \
|
||||
ZIO_STAGE_DVA_CLAIM | \
|
||||
ZIO_STAGE_VDEV_IO_START)
|
||||
|
||||
|
||||
@@ -86,6 +86,6 @@ extern void kstat_delete(kstat_t *);
|
||||
extern void kstat_set_raw_ops(kstat_t *ksp,
|
||||
int (*headers)(char *buf, size_t size),
|
||||
int (*data)(char *buf, size_t size, void *data),
|
||||
void *(*addr)(kstat_t *ksp, loff_t index));
|
||||
void *(*addr)(kstat_t *ksp, off_t index));
|
||||
|
||||
#endif /* _SYS_KSTAT_H */
|
||||
|
||||
@@ -50,19 +50,4 @@ typedef int projid_t;
|
||||
|
||||
#include <sys/param.h> /* for NBBY */
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
typedef off_t loff_t;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On musl, loff_t is a macro within fcntl.h when _GNU_SOURCE is defined.
|
||||
* If no macro is defined, a typedef fallback is provided.
|
||||
*/
|
||||
#if defined(__linux__) && !defined(__GLIBC__)
|
||||
#include <fcntl.h>
|
||||
#ifndef loff_t
|
||||
typedef off_t loff_t;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -58,7 +58,7 @@ void
|
||||
kstat_set_raw_ops(kstat_t *ksp,
|
||||
int (*headers)(char *buf, size_t size),
|
||||
int (*data)(char *buf, size_t size, void *data),
|
||||
void *(*addr)(kstat_t *ksp, loff_t index))
|
||||
void *(*addr)(kstat_t *ksp, off_t index))
|
||||
{
|
||||
(void) ksp, (void) headers, (void) data, (void) addr;
|
||||
}
|
||||
|
||||
@@ -771,6 +771,12 @@ Number ARC headers to evict per sub-list before proceeding to another sub-list.
|
||||
This batch-style operation prevents entire sub-lists from being evicted at once
|
||||
but comes at a cost of additional unlocking and locking.
|
||||
.
|
||||
.It Sy zfs_arc_evict_batches_limit Ns = Ns Sy 5 Pq uint
|
||||
Number of
|
||||
.Sy zfs_arc_evict_batch_limit
|
||||
batches to process per parallel eviction task under heavy load to reduce number
|
||||
of context switches.
|
||||
.
|
||||
.It Sy zfs_arc_evict_threads Ns = Ns Sy 0 Pq int
|
||||
Sets the number of ARC eviction threads to be used.
|
||||
.Pp
|
||||
@@ -1462,8 +1468,13 @@ Enable/disable the processing of the free_bpobj object.
|
||||
.It Sy zfs_async_block_max_blocks Ns = Ns Sy UINT64_MAX Po unlimited Pc Pq u64
|
||||
Maximum number of blocks freed in a single TXG.
|
||||
.
|
||||
.It Sy zfs_max_async_dedup_frees Ns = Ns Sy 100000 Po 10^5 Pc Pq u64
|
||||
Maximum number of dedup blocks freed in a single TXG.
|
||||
.It Sy zfs_max_async_dedup_frees Ns = Ns Sy 250000 Pq u64
|
||||
Maximum number of dedup, clone or gang blocks freed in a single TXG.
|
||||
These frees may require additional I/O, making them more expensive.
|
||||
.
|
||||
.It Sy zfs_async_free_zio_wait_interval Ns = Ns Sy 2000 Pq u64
|
||||
After freeing this many dedup, clone or gang blocks wait for all pending
|
||||
I/Os to complete before continuing.
|
||||
.
|
||||
.It Sy zfs_vdev_async_read_max_active Ns = Ns Sy 3 Pq uint
|
||||
Maximum asynchronous read I/O operations active to each device.
|
||||
@@ -1733,7 +1744,7 @@ but we chose the more conservative approach of not setting it,
|
||||
so that there is no possibility of
|
||||
leaking space in the "partial temporary" failure case.
|
||||
.
|
||||
.It Sy zfs_free_min_time_ms Ns = Ns Sy 1000 Ns ms Po 1s Pc Pq uint
|
||||
.It Sy zfs_free_min_time_ms Ns = Ns Sy 500 Ns ms Po 1s Pc Pq uint
|
||||
During a
|
||||
.Nm zfs Cm destroy
|
||||
operation using the
|
||||
@@ -1761,6 +1772,16 @@ Blocks that go to the special vdevs are still written indirectly, as with
|
||||
.Sy logbias Ns = Ns Sy throughput .
|
||||
This parameter is ignored if an SLOG is present.
|
||||
.
|
||||
.It Sy zfs_import_defer_txgs Ns = Ns Sy 5 Pq uint
|
||||
Number of transaction groups to wait after pool import before starting
|
||||
background work such as asynchronous block freeing
|
||||
.Pq from snapshots, clones, and deduplication
|
||||
and scrub or resilver operations.
|
||||
This allows the pool import and filesystem mounting to complete more quickly
|
||||
without interference from background activities.
|
||||
The default value of 5 transaction groups typically provides sufficient time
|
||||
for import and mount operations to complete on most systems.
|
||||
.
|
||||
.It Sy zfs_initialize_value Ns = Ns Sy 16045690984833335022 Po 0xDEADBEEFDEADBEEE Pc Pq u64
|
||||
Pattern written to vdev free space by
|
||||
.Xr zpool-initialize 8 .
|
||||
@@ -2095,7 +2116,7 @@ even if the
|
||||
.Sy resilver_defer
|
||||
feature is enabled.
|
||||
.
|
||||
.It Sy zfs_resilver_min_time_ms Ns = Ns Sy 3000 Ns ms Po 3 s Pc Pq uint
|
||||
.It Sy zfs_resilver_min_time_ms Ns = Ns Sy 1500 Ns ms Pq uint
|
||||
Resilvers are processed by the sync thread.
|
||||
While resilvering, it will spend at least this much time
|
||||
working on a resilver between TXG flushes.
|
||||
@@ -2112,7 +2133,7 @@ in order to verify the checksums of all blocks which have been
|
||||
copied during the expansion.
|
||||
This is enabled by default and strongly recommended.
|
||||
.
|
||||
.It Sy zfs_scrub_min_time_ms Ns = Ns Sy 1000 Ns ms Po 1 s Pc Pq uint
|
||||
.It Sy zfs_scrub_min_time_ms Ns = Ns Sy 750 Ns ms Pq uint
|
||||
Scrubs are processed by the sync thread.
|
||||
While scrubbing, it will spend at least this much time
|
||||
working on a scrub between TXG flushes.
|
||||
|
||||
@@ -84,6 +84,11 @@
|
||||
.Op Fl K Ar key
|
||||
.Ar dataset path destination
|
||||
.Nm
|
||||
.Fl r
|
||||
.Fl O
|
||||
.Op Fl K Ar key
|
||||
.Ar dataset object-id destination
|
||||
.Nm
|
||||
.Fl R
|
||||
.Op Fl A
|
||||
.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
|
||||
@@ -376,6 +381,12 @@ Specified
|
||||
.Ar path
|
||||
must be relative to the root of
|
||||
.Ar dataset .
|
||||
When used with
|
||||
.Fl O ,
|
||||
the
|
||||
.Ar path
|
||||
argument is interpreted as an object identifier,
|
||||
not a path.
|
||||
This option can be combined with
|
||||
.Fl v
|
||||
for increasing verbosity.
|
||||
|
||||
@@ -433,6 +433,7 @@ ZFS_OBJS := \
|
||||
ZFS_OBJS_OS := \
|
||||
abd_os.o \
|
||||
arc_os.o \
|
||||
kasan_compat.o \
|
||||
mmp_os.o \
|
||||
policy.o \
|
||||
qat.o \
|
||||
|
||||
@@ -121,11 +121,12 @@ zfs_rangelock_cb(zfs_locked_range_t *new, void *arg)
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to grow the block size then lock the whole file range.
|
||||
* If we might grow the block size then lock the whole file range.
|
||||
* NB: this test should match the check in zfs_grow_blocksize
|
||||
*/
|
||||
uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
|
||||
if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
|
||||
zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
|
||||
if (zp->z_size <= zp->z_blksz && end_size > zp->z_blksz &&
|
||||
(!ISP2(zp->z_blksz) || zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
|
||||
new->lr_offset = 0;
|
||||
new->lr_length = UINT64_MAX;
|
||||
}
|
||||
|
||||
@@ -888,6 +888,14 @@ abd_iter_advance(struct abd_iter *aiter, size_t amount)
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef nth_page
|
||||
/*
|
||||
* Since 6.18 nth_page() no longer exists, and is no longer required to iterate
|
||||
* within a single SG entry, so we replace it with a simple addition.
|
||||
*/
|
||||
#define nth_page(p, n) ((p)+(n))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Map the current chunk into aiter. This can be safely called when the aiter
|
||||
* has already exhausted, in which case this does nothing.
|
||||
@@ -915,7 +923,14 @@ abd_iter_map(struct abd_iter *aiter)
|
||||
aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset,
|
||||
aiter->iter_abd->abd_size - aiter->iter_pos);
|
||||
|
||||
paddr = zfs_kmap_local(sg_page(aiter->iter_sg));
|
||||
struct page *page = sg_page(aiter->iter_sg);
|
||||
if (PageHighMem(page)) {
|
||||
page = nth_page(page, offset / PAGE_SIZE);
|
||||
offset &= PAGE_SIZE - 1;
|
||||
aiter->iter_mapsize = MIN(aiter->iter_mapsize,
|
||||
PAGE_SIZE - offset);
|
||||
}
|
||||
paddr = zfs_kmap_local(page);
|
||||
}
|
||||
|
||||
aiter->iter_mapaddr = (char *)paddr + offset;
|
||||
@@ -933,8 +948,14 @@ abd_iter_unmap(struct abd_iter *aiter)
|
||||
return;
|
||||
|
||||
if (!abd_is_linear(aiter->iter_abd)) {
|
||||
size_t offset = aiter->iter_offset;
|
||||
|
||||
struct page *page = sg_page(aiter->iter_sg);
|
||||
if (PageHighMem(page))
|
||||
offset &= PAGE_SIZE - 1;
|
||||
|
||||
/* LINTED E_FUNC_SET_NOT_USED */
|
||||
zfs_kunmap_local(aiter->iter_mapaddr - aiter->iter_offset);
|
||||
zfs_kunmap_local(aiter->iter_mapaddr - offset);
|
||||
}
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, !=, NULL);
|
||||
@@ -1110,14 +1131,6 @@ abd_return_buf_copy(abd_t *abd, void *buf, size_t n)
|
||||
#define ABD_ITER_PAGE_SIZE(page) (PAGESIZE)
|
||||
#endif
|
||||
|
||||
#ifndef nth_page
|
||||
/*
|
||||
* Since 6.18 nth_page() no longer exists, and is no longer required to iterate
|
||||
* within a single SG entry, so we replace it with a simple addition.
|
||||
*/
|
||||
#define nth_page(p, n) ((p)+(n))
|
||||
#endif
|
||||
|
||||
void
|
||||
abd_iter_page(struct abd_iter *aiter)
|
||||
{
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
// SPDX-License-Identifier: CDDL-1.0
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2025, Rob Norris <robn@despairlabs.com>
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_LINUX_KASAN_ENABLED_H
|
||||
#define _ZFS_LINUX_KASAN_ENABLED_H
|
||||
|
||||
#ifdef HAVE_KASAN_ENABLED_GPL_ONLY
|
||||
/*
|
||||
* The kernel supports a runtime setting to enable/disable KASAN. The control
|
||||
* flag kasan_flag_enabled is a GPL-only symbol, which prevents us from
|
||||
* accessing it. Unfortunately, this is called by the header function
|
||||
* kasan_enabled(), which in turn is used to call or skip instrumentation
|
||||
* functions in various header-based kernel facilities. If we inadvertently
|
||||
* call one, the build breaks.
|
||||
*
|
||||
* To work around this, we define our own `kasan_flag_enabled` set to "false",
|
||||
* disabling use of KASAN inside our code. The linker will resolve this symbol
|
||||
* at build time, and so never need to reach out to the off-limits kernel
|
||||
* symbol.
|
||||
*/
|
||||
#include <linux/static_key.h>
|
||||
struct static_key_false kasan_flag_enabled = STATIC_KEY_FALSE_INIT;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -117,13 +117,17 @@ static int zfs_snapshot_no_setuid = 0;
|
||||
typedef struct {
|
||||
char *se_name; /* full snapshot name */
|
||||
char *se_path; /* full mount path */
|
||||
spa_t *se_spa; /* pool spa */
|
||||
spa_t *se_spa; /* pool spa (NULL if pending) */
|
||||
uint64_t se_objsetid; /* snapshot objset id */
|
||||
struct dentry *se_root_dentry; /* snapshot root dentry */
|
||||
taskqid_t se_taskqid; /* scheduled unmount taskqid */
|
||||
avl_node_t se_node_name; /* zfs_snapshots_by_name link */
|
||||
avl_node_t se_node_objsetid; /* zfs_snapshots_by_objsetid link */
|
||||
zfs_refcount_t se_refcount; /* reference count */
|
||||
kmutex_t se_mtx; /* protects se_mounting and se_cv */
|
||||
kcondvar_t se_cv; /* signal mount completion */
|
||||
boolean_t se_mounting; /* mount operation in progress */
|
||||
int se_mount_error; /* error from failed mount */
|
||||
} zfs_snapentry_t;
|
||||
|
||||
static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay);
|
||||
@@ -146,6 +150,10 @@ zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa,
|
||||
se->se_objsetid = objsetid;
|
||||
se->se_root_dentry = root_dentry;
|
||||
se->se_taskqid = TASKQID_INVALID;
|
||||
mutex_init(&se->se_mtx, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&se->se_cv, NULL, CV_DEFAULT, NULL);
|
||||
se->se_mounting = B_FALSE;
|
||||
se->se_mount_error = 0;
|
||||
|
||||
zfs_refcount_create(&se->se_refcount);
|
||||
|
||||
@@ -162,6 +170,8 @@ zfsctl_snapshot_free(zfs_snapentry_t *se)
|
||||
zfs_refcount_destroy(&se->se_refcount);
|
||||
kmem_strfree(se->se_name);
|
||||
kmem_strfree(se->se_path);
|
||||
mutex_destroy(&se->se_mtx);
|
||||
cv_destroy(&se->se_cv);
|
||||
|
||||
kmem_free(se, sizeof (zfs_snapentry_t));
|
||||
}
|
||||
@@ -187,9 +197,9 @@ zfsctl_snapshot_rele(zfs_snapentry_t *se)
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a zfs_snapentry_t to both the zfs_snapshots_by_name and
|
||||
* zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part
|
||||
* of the trees a reference is held.
|
||||
* Add a zfs_snapentry_t to the zfs_snapshots_by_name tree. If the entry
|
||||
* is not pending (se_spa != NULL), also add to zfs_snapshots_by_objsetid.
|
||||
* While the zfs_snapentry_t is part of the trees a reference is held.
|
||||
*/
|
||||
static void
|
||||
zfsctl_snapshot_add(zfs_snapentry_t *se)
|
||||
@@ -197,24 +207,42 @@ zfsctl_snapshot_add(zfs_snapentry_t *se)
|
||||
ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
|
||||
zfsctl_snapshot_hold(se);
|
||||
avl_add(&zfs_snapshots_by_name, se);
|
||||
avl_add(&zfs_snapshots_by_objsetid, se);
|
||||
if (se->se_spa != NULL)
|
||||
avl_add(&zfs_snapshots_by_objsetid, se);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and
|
||||
* zfs_snapshots_by_objsetid trees. Upon removal a reference is dropped,
|
||||
* this can result in the structure being freed if that was the last
|
||||
* remaining reference.
|
||||
* Remove a zfs_snapentry_t from the zfs_snapshots_by_name tree and
|
||||
* zfs_snapshots_by_objsetid tree (if not pending). Upon removal a
|
||||
* reference is dropped, this can result in the structure being freed
|
||||
* if that was the last remaining reference.
|
||||
*/
|
||||
static void
|
||||
zfsctl_snapshot_remove(zfs_snapentry_t *se)
|
||||
{
|
||||
ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
|
||||
avl_remove(&zfs_snapshots_by_name, se);
|
||||
avl_remove(&zfs_snapshots_by_objsetid, se);
|
||||
if (se->se_spa != NULL)
|
||||
avl_remove(&zfs_snapshots_by_objsetid, se);
|
||||
zfsctl_snapshot_rele(se);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill a pending zfs_snapentry_t after mount succeeds. Fills in the
|
||||
* remaining fields and adds the entry to the zfs_snapshots_by_objsetid tree.
|
||||
*/
|
||||
static void
|
||||
zfsctl_snapshot_fill(zfs_snapentry_t *se, spa_t *spa, uint64_t objsetid,
|
||||
struct dentry *root_dentry)
|
||||
{
|
||||
ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
|
||||
ASSERT3P(se->se_spa, ==, NULL);
|
||||
se->se_spa = spa;
|
||||
se->se_objsetid = objsetid;
|
||||
se->se_root_dentry = root_dentry;
|
||||
avl_add(&zfs_snapshots_by_objsetid, se);
|
||||
}
|
||||
|
||||
/*
|
||||
* Snapshot name comparison function for the zfs_snapshots_by_name.
|
||||
*/
|
||||
@@ -312,6 +340,11 @@ zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname)
|
||||
se = zfsctl_snapshot_find_by_name(old_snapname);
|
||||
if (se == NULL)
|
||||
return (SET_ERROR(ENOENT));
|
||||
if (se->se_spa == NULL) {
|
||||
/* Snapshot mount is in progress */
|
||||
zfsctl_snapshot_rele(se);
|
||||
return (SET_ERROR(EBUSY));
|
||||
}
|
||||
|
||||
zfsctl_snapshot_remove(se);
|
||||
kmem_strfree(se->se_name);
|
||||
@@ -430,26 +463,6 @@ zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if snapname is currently mounted. Returned non-zero when mounted
|
||||
* and zero when unmounted.
|
||||
*/
|
||||
static boolean_t
|
||||
zfsctl_snapshot_ismounted(const char *snapname)
|
||||
{
|
||||
zfs_snapentry_t *se;
|
||||
boolean_t ismounted = B_FALSE;
|
||||
|
||||
rw_enter(&zfs_snapshot_lock, RW_READER);
|
||||
if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) {
|
||||
zfsctl_snapshot_rele(se);
|
||||
ismounted = B_TRUE;
|
||||
}
|
||||
rw_exit(&zfs_snapshot_lock);
|
||||
|
||||
return (ismounted);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the given inode is a part of the virtual .zfs directory.
|
||||
*/
|
||||
@@ -1131,6 +1144,14 @@ zfsctl_snapshot_unmount(const char *snapname, int flags)
|
||||
}
|
||||
rw_exit(&zfs_snapshot_lock);
|
||||
|
||||
/*
|
||||
* Wait for any pending auto-mount to complete before unmounting.
|
||||
*/
|
||||
mutex_enter(&se->se_mtx);
|
||||
while (se->se_mounting)
|
||||
cv_wait(&se->se_cv, &se->se_mtx);
|
||||
mutex_exit(&se->se_mtx);
|
||||
|
||||
exportfs_flush();
|
||||
|
||||
if (flags & MNT_FORCE)
|
||||
@@ -1232,14 +1253,35 @@ zfsctl_snapshot_mount(struct path *path, int flags)
|
||||
zfs_snapshot_no_setuid ? "nosuid" : "suid");
|
||||
|
||||
/*
|
||||
* Multiple concurrent automounts of a snapshot are never allowed.
|
||||
* The snapshot may be manually mounted as many times as desired.
|
||||
* Check if snapshot is already being mounted. If found, wait for
|
||||
* pending mount to complete before returning success.
|
||||
*/
|
||||
if (zfsctl_snapshot_ismounted(full_name)) {
|
||||
error = 0;
|
||||
rw_enter(&zfs_snapshot_lock, RW_WRITER);
|
||||
if ((se = zfsctl_snapshot_find_by_name(full_name)) != NULL) {
|
||||
rw_exit(&zfs_snapshot_lock);
|
||||
mutex_enter(&se->se_mtx);
|
||||
while (se->se_mounting)
|
||||
cv_wait(&se->se_cv, &se->se_mtx);
|
||||
|
||||
/*
|
||||
* Return the same error as the first mount attempt (0 if
|
||||
* succeeded, error code if failed).
|
||||
*/
|
||||
error = se->se_mount_error;
|
||||
mutex_exit(&se->se_mtx);
|
||||
zfsctl_snapshot_rele(se);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create pending entry and mark mount in progress.
|
||||
*/
|
||||
se = zfsctl_snapshot_alloc(full_name, full_path, NULL, 0, NULL);
|
||||
se->se_mounting = B_TRUE;
|
||||
zfsctl_snapshot_add(se);
|
||||
zfsctl_snapshot_hold(se);
|
||||
rw_exit(&zfs_snapshot_lock);
|
||||
|
||||
/*
|
||||
* Attempt to mount the snapshot from user space. Normally this
|
||||
* would be done using the vfs_kern_mount() function, however that
|
||||
@@ -1258,6 +1300,9 @@ zfsctl_snapshot_mount(struct path *path, int flags)
|
||||
argv[9] = full_path;
|
||||
error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
|
||||
if (error) {
|
||||
/*
|
||||
* Mount failed - cleanup pending entry and signal waiters.
|
||||
*/
|
||||
if (!(error & MOUNT_BUSY << 8)) {
|
||||
zfs_dbgmsg("Unable to automount %s error=%d",
|
||||
full_path, error);
|
||||
@@ -1273,6 +1318,16 @@ zfsctl_snapshot_mount(struct path *path, int flags)
|
||||
*/
|
||||
error = 0;
|
||||
}
|
||||
|
||||
rw_enter(&zfs_snapshot_lock, RW_WRITER);
|
||||
zfsctl_snapshot_remove(se);
|
||||
rw_exit(&zfs_snapshot_lock);
|
||||
mutex_enter(&se->se_mtx);
|
||||
se->se_mount_error = error;
|
||||
se->se_mounting = B_FALSE;
|
||||
cv_broadcast(&se->se_cv);
|
||||
mutex_exit(&se->se_mtx);
|
||||
zfsctl_snapshot_rele(se);
|
||||
goto error;
|
||||
}
|
||||
|
||||
@@ -1289,14 +1344,25 @@ zfsctl_snapshot_mount(struct path *path, int flags)
|
||||
spath.mnt->mnt_flags |= MNT_SHRINKABLE;
|
||||
|
||||
rw_enter(&zfs_snapshot_lock, RW_WRITER);
|
||||
se = zfsctl_snapshot_alloc(full_name, full_path,
|
||||
snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os),
|
||||
dentry);
|
||||
zfsctl_snapshot_add(se);
|
||||
zfsctl_snapshot_fill(se, snap_zfsvfs->z_os->os_spa,
|
||||
dmu_objset_id(snap_zfsvfs->z_os), dentry);
|
||||
zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
|
||||
rw_exit(&zfs_snapshot_lock);
|
||||
} else {
|
||||
rw_enter(&zfs_snapshot_lock, RW_WRITER);
|
||||
zfsctl_snapshot_remove(se);
|
||||
rw_exit(&zfs_snapshot_lock);
|
||||
}
|
||||
path_put(&spath);
|
||||
|
||||
/*
|
||||
* Signal mount completion and cleanup.
|
||||
*/
|
||||
mutex_enter(&se->se_mtx);
|
||||
se->se_mounting = B_FALSE;
|
||||
cv_broadcast(&se->se_cv);
|
||||
mutex_exit(&se->se_mtx);
|
||||
zfsctl_snapshot_rele(se);
|
||||
error:
|
||||
kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
|
||||
kmem_free(full_path, MAXPATHLEN);
|
||||
|
||||
@@ -100,15 +100,17 @@ zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
|
||||
while (n && uio->uio_resid) {
|
||||
void *paddr;
|
||||
cnt = MIN(bv->bv_len - skip, n);
|
||||
size_t offset = bv->bv_offset + skip;
|
||||
cnt = MIN(PAGE_SIZE - (offset & ~PAGE_MASK),
|
||||
MIN(bv->bv_len - skip, n));
|
||||
|
||||
paddr = zfs_kmap_local(bv->bv_page);
|
||||
paddr = zfs_kmap_local(bv->bv_page + (offset >> PAGE_SHIFT));
|
||||
if (rw == UIO_READ) {
|
||||
/* Copy from buffer 'p' to the bvec data */
|
||||
memcpy(paddr + bv->bv_offset + skip, p, cnt);
|
||||
memcpy(paddr + (offset & ~PAGE_MASK), p, cnt);
|
||||
} else {
|
||||
/* Copy from bvec data to buffer 'p' */
|
||||
memcpy(p, paddr + bv->bv_offset + skip, cnt);
|
||||
memcpy(p, paddr + (offset & ~PAGE_MASK), cnt);
|
||||
}
|
||||
zfs_kunmap_local(paddr);
|
||||
|
||||
|
||||
@@ -31,7 +31,6 @@
|
||||
/* Portions Copyright 2007 Jeremy Teo */
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
@@ -95,11 +95,12 @@ zfs_rangelock_cb(zfs_locked_range_t *new, void *arg)
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to grow the block size then lock the whole file range.
|
||||
* If we might grow the block size then lock the whole file range.
|
||||
* NB: this test should match the check in zfs_grow_blocksize
|
||||
*/
|
||||
uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
|
||||
if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
|
||||
zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
|
||||
if (zp->z_size <= zp->z_blksz && end_size > zp->z_blksz &&
|
||||
(!ISP2(zp->z_blksz) || zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
|
||||
new->lr_offset = 0;
|
||||
new->lr_length = UINT64_MAX;
|
||||
}
|
||||
|
||||
@@ -1111,13 +1111,6 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
|
||||
|
||||
func_raidz_gen(caddrs, daddr, len, dlen);
|
||||
|
||||
for (i = parity-1; i >= 0; i--) {
|
||||
abd_iter_unmap(&caiters[i]);
|
||||
c_cabds[i] =
|
||||
abd_advance_abd_iter(cabds[i], c_cabds[i],
|
||||
&caiters[i], len);
|
||||
}
|
||||
|
||||
if (dsize > 0) {
|
||||
abd_iter_unmap(&daiter);
|
||||
c_dabd =
|
||||
@@ -1126,6 +1119,13 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
|
||||
dsize -= dlen;
|
||||
}
|
||||
|
||||
for (i = parity - 1; i >= 0; i--) {
|
||||
abd_iter_unmap(&caiters[i]);
|
||||
c_cabds[i] =
|
||||
abd_advance_abd_iter(cabds[i], c_cabds[i],
|
||||
&caiters[i], len);
|
||||
}
|
||||
|
||||
csize -= len;
|
||||
}
|
||||
abd_exit_critical(flags);
|
||||
@@ -1194,7 +1194,7 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
|
||||
|
||||
func_raidz_rec(xaddrs, len, caddrs, mul);
|
||||
|
||||
for (i = parity-1; i >= 0; i--) {
|
||||
for (i = parity - 1; i >= 0; i--) {
|
||||
abd_iter_unmap(&xiters[i]);
|
||||
abd_iter_unmap(&citers[i]);
|
||||
c_tabds[i] =
|
||||
|
||||
@@ -371,6 +371,12 @@ static uint_t zfs_arc_eviction_pct = 200;
|
||||
*/
|
||||
static uint_t zfs_arc_evict_batch_limit = 10;
|
||||
|
||||
/*
|
||||
* Number batches to process per parallel eviction task under heavy load to
|
||||
* reduce number of context switches.
|
||||
*/
|
||||
static uint_t zfs_arc_evict_batches_limit = 5;
|
||||
|
||||
/* number of seconds before growing cache again */
|
||||
uint_t arc_grow_retry = 5;
|
||||
|
||||
@@ -406,8 +412,8 @@ uint_t arc_no_grow_shift = 5;
|
||||
* minimum lifespan of a prefetch block in clock ticks
|
||||
* (initialized in arc_init())
|
||||
*/
|
||||
static uint_t arc_min_prefetch_ms;
|
||||
static uint_t arc_min_prescient_prefetch_ms;
|
||||
static uint_t arc_min_prefetch;
|
||||
static uint_t arc_min_prescient_prefetch;
|
||||
|
||||
/*
|
||||
* If this percent of memory is free, don't throttle.
|
||||
@@ -3766,8 +3772,6 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
|
||||
{
|
||||
arc_state_t *evicted_state, *state;
|
||||
int64_t bytes_evicted = 0;
|
||||
uint_t min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ?
|
||||
arc_min_prescient_prefetch_ms : arc_min_prefetch_ms;
|
||||
|
||||
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
|
||||
ASSERT(HDR_HAS_L1HDR(hdr));
|
||||
@@ -3824,9 +3828,10 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, uint64_t *real_evicted)
|
||||
((state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost);
|
||||
|
||||
/* prefetch buffers have a minimum lifespan */
|
||||
uint_t min_lifetime = HDR_PRESCIENT_PREFETCH(hdr) ?
|
||||
arc_min_prescient_prefetch : arc_min_prefetch;
|
||||
if ((hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT)) &&
|
||||
ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access <
|
||||
MSEC_TO_TICK(min_lifetime)) {
|
||||
ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access < min_lifetime) {
|
||||
ARCSTAT_BUMP(arcstat_evict_skip);
|
||||
return (bytes_evicted);
|
||||
}
|
||||
@@ -3900,7 +3905,7 @@ arc_set_need_free(void)
|
||||
|
||||
static uint64_t
|
||||
arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
|
||||
uint64_t spa, uint64_t bytes)
|
||||
uint64_t spa, uint64_t bytes, boolean_t *more)
|
||||
{
|
||||
multilist_sublist_t *mls;
|
||||
uint64_t bytes_evicted = 0, real_evicted = 0;
|
||||
@@ -3984,6 +3989,10 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
|
||||
|
||||
multilist_sublist_unlock(mls);
|
||||
|
||||
/* Indicate if another iteration may be productive. */
|
||||
if (more)
|
||||
*more = (hdr != NULL);
|
||||
|
||||
/*
|
||||
* Increment the count of evicted bytes, and wake up any threads that
|
||||
* are waiting for the count to reach this value. Since the list is
|
||||
@@ -4004,21 +4013,12 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
|
||||
while ((aw = list_head(&arc_evict_waiters)) != NULL &&
|
||||
aw->aew_count <= arc_evict_count) {
|
||||
list_remove(&arc_evict_waiters, aw);
|
||||
cv_broadcast(&aw->aew_cv);
|
||||
cv_signal(&aw->aew_cv);
|
||||
}
|
||||
}
|
||||
arc_set_need_free();
|
||||
mutex_exit(&arc_evict_lock);
|
||||
|
||||
/*
|
||||
* If the ARC size is reduced from arc_c_max to arc_c_min (especially
|
||||
* if the average cached block is small), eviction can be on-CPU for
|
||||
* many seconds. To ensure that other threads that may be bound to
|
||||
* this CPU are able to make progress, make a voluntary preemption
|
||||
* call here.
|
||||
*/
|
||||
kpreempt(KPREEMPT_SYNC);
|
||||
|
||||
return (bytes_evicted);
|
||||
}
|
||||
|
||||
@@ -4079,8 +4079,18 @@ static void
|
||||
arc_evict_task(void *arg)
|
||||
{
|
||||
evict_arg_t *eva = arg;
|
||||
eva->eva_evicted = arc_evict_state_impl(eva->eva_ml, eva->eva_idx,
|
||||
eva->eva_marker, eva->eva_spa, eva->eva_bytes);
|
||||
uint64_t total_evicted = 0;
|
||||
boolean_t more;
|
||||
uint_t batches = zfs_arc_evict_batches_limit;
|
||||
|
||||
/* Process multiple batches to amortize taskq dispatch overhead. */
|
||||
do {
|
||||
total_evicted += arc_evict_state_impl(eva->eva_ml,
|
||||
eva->eva_idx, eva->eva_marker, eva->eva_spa,
|
||||
eva->eva_bytes - total_evicted, &more);
|
||||
} while (total_evicted < eva->eva_bytes && --batches > 0 && more);
|
||||
|
||||
eva->eva_evicted = total_evicted;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -4221,18 +4231,19 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,
|
||||
|
||||
if (bytes == ARC_EVICT_ALL) {
|
||||
evict = bytes;
|
||||
} else if (left > ntasks * MIN_EVICT_SIZE) {
|
||||
} else if (left >= ntasks * MIN_EVICT_SIZE) {
|
||||
evict = DIV_ROUND_UP(left, ntasks);
|
||||
} else {
|
||||
ntasks = DIV_ROUND_UP(left, MIN_EVICT_SIZE);
|
||||
if (ntasks == 1)
|
||||
ntasks = left / MIN_EVICT_SIZE;
|
||||
if (ntasks < 2)
|
||||
use_evcttq = B_FALSE;
|
||||
else
|
||||
evict = DIV_ROUND_UP(left, ntasks);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; sublists_left > 0; i++, sublist_idx++,
|
||||
sublists_left--) {
|
||||
uint64_t bytes_remaining;
|
||||
uint64_t bytes_evicted;
|
||||
|
||||
/* we've reached the end, wrap to the beginning */
|
||||
@@ -4254,16 +4265,17 @@ arc_evict_state(arc_state_t *state, arc_buf_contents_t type, uint64_t spa,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (total_evicted < bytes)
|
||||
bytes_remaining = bytes - total_evicted;
|
||||
else
|
||||
break;
|
||||
|
||||
bytes_evicted = arc_evict_state_impl(ml, sublist_idx,
|
||||
markers[sublist_idx], spa, bytes_remaining);
|
||||
markers[sublist_idx], spa, bytes - total_evicted,
|
||||
NULL);
|
||||
|
||||
scan_evicted += bytes_evicted;
|
||||
total_evicted += bytes_evicted;
|
||||
|
||||
if (total_evicted < bytes)
|
||||
kpreempt(KPREEMPT_SYNC);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if (use_evcttq) {
|
||||
@@ -4838,8 +4850,7 @@ arc_evict_cb_check(void *arg, zthr_t *zthr)
|
||||
*/
|
||||
return ((zfs_refcount_count(&arc_uncached->arcs_esize[ARC_BUFC_DATA]) +
|
||||
zfs_refcount_count(&arc_uncached->arcs_esize[ARC_BUFC_METADATA]) &&
|
||||
ddi_get_lbolt() - arc_last_uncached_flush >
|
||||
MSEC_TO_TICK(arc_min_prefetch_ms / 2)));
|
||||
ddi_get_lbolt() - arc_last_uncached_flush > arc_min_prefetch / 2));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4889,7 +4900,7 @@ arc_evict_cb(void *arg, zthr_t *zthr)
|
||||
*/
|
||||
arc_evict_waiter_t *aw;
|
||||
while ((aw = list_remove_head(&arc_evict_waiters)) != NULL) {
|
||||
cv_broadcast(&aw->aew_cv);
|
||||
cv_signal(&aw->aew_cv);
|
||||
}
|
||||
arc_set_need_free();
|
||||
}
|
||||
@@ -5170,9 +5181,8 @@ arc_wait_for_eviction(uint64_t amount, boolean_t lax, boolean_t use_reserve)
|
||||
|
||||
uint64_t last_count = 0;
|
||||
mutex_enter(&arc_evict_lock);
|
||||
if (!list_is_empty(&arc_evict_waiters)) {
|
||||
arc_evict_waiter_t *last =
|
||||
list_tail(&arc_evict_waiters);
|
||||
arc_evict_waiter_t *last;
|
||||
if ((last = list_tail(&arc_evict_waiters)) != NULL) {
|
||||
last_count = last->aew_count;
|
||||
} else if (!arc_evict_needed) {
|
||||
arc_evict_needed = B_TRUE;
|
||||
@@ -7593,12 +7603,12 @@ arc_tuning_update(boolean_t verbose)
|
||||
|
||||
/* Valid range: 1 - N ms */
|
||||
if (zfs_arc_min_prefetch_ms)
|
||||
arc_min_prefetch_ms = zfs_arc_min_prefetch_ms;
|
||||
arc_min_prefetch = MSEC_TO_TICK(zfs_arc_min_prefetch_ms);
|
||||
|
||||
/* Valid range: 1 - N ms */
|
||||
if (zfs_arc_min_prescient_prefetch_ms) {
|
||||
arc_min_prescient_prefetch_ms =
|
||||
zfs_arc_min_prescient_prefetch_ms;
|
||||
arc_min_prescient_prefetch =
|
||||
MSEC_TO_TICK(zfs_arc_min_prescient_prefetch_ms);
|
||||
}
|
||||
|
||||
/* Valid range: 0 - 100 */
|
||||
@@ -7982,8 +7992,8 @@ arc_init(void)
|
||||
list_create(&arc_evict_waiters, sizeof (arc_evict_waiter_t),
|
||||
offsetof(arc_evict_waiter_t, aew_node));
|
||||
|
||||
arc_min_prefetch_ms = 1000;
|
||||
arc_min_prescient_prefetch_ms = 6000;
|
||||
arc_min_prefetch = MSEC_TO_TICK(1000);
|
||||
arc_min_prescient_prefetch = MSEC_TO_TICK(6000);
|
||||
|
||||
#if defined(_KERNEL)
|
||||
arc_lowmem_init();
|
||||
@@ -11290,6 +11300,9 @@ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, eviction_pct, UINT, ZMOD_RW,
|
||||
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batch_limit, UINT, ZMOD_RW,
|
||||
"The number of headers to evict per sublist before moving to the next");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, evict_batches_limit, UINT, ZMOD_RW,
|
||||
"The number of batches to run per parallel eviction task");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, prune_task_threads, INT, ZMOD_RW,
|
||||
"Number of arc_prune threads");
|
||||
|
||||
|
||||
@@ -407,6 +407,9 @@ ddt_object_create(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
|
||||
VERIFY0(ddt_ops[type]->ddt_op_create(os, objectp, tx, prehash));
|
||||
ASSERT3U(*objectp, !=, 0);
|
||||
|
||||
VERIFY0(dnode_hold(os, *objectp, ddt,
|
||||
&ddt->ddt_object_dnode[type][class]));
|
||||
|
||||
ASSERT3U(ddt->ddt_version, !=, DDT_VERSION_UNCONFIGURED);
|
||||
|
||||
VERIFY0(zap_add(os, ddt->ddt_dir_object, name, sizeof (uint64_t), 1,
|
||||
@@ -437,6 +440,10 @@ ddt_object_destroy(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
|
||||
VERIFY0(count);
|
||||
VERIFY0(zap_remove(os, ddt->ddt_dir_object, name, tx));
|
||||
VERIFY0(zap_remove(os, spa->spa_ddt_stat_object, name, tx));
|
||||
if (ddt->ddt_object_dnode[type][class] != NULL) {
|
||||
dnode_rele(ddt->ddt_object_dnode[type][class], ddt);
|
||||
ddt->ddt_object_dnode[type][class] = NULL;
|
||||
}
|
||||
VERIFY0(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx));
|
||||
memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t));
|
||||
|
||||
@@ -468,28 +475,38 @@ ddt_object_load(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
error = dnode_hold(ddt->ddt_os, ddt->ddt_object[type][class], ddt,
|
||||
&ddt->ddt_object_dnode[type][class]);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
error = zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
|
||||
sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
|
||||
&ddt->ddt_histogram[type][class]);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
goto error;
|
||||
|
||||
/*
|
||||
* Seed the cached statistics.
|
||||
*/
|
||||
error = ddt_object_info(ddt, type, class, &doi);
|
||||
if (error)
|
||||
return (error);
|
||||
goto error;
|
||||
|
||||
error = ddt_object_count(ddt, type, class, &count);
|
||||
if (error)
|
||||
return (error);
|
||||
goto error;
|
||||
|
||||
ddo->ddo_count = count;
|
||||
ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
|
||||
ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
|
||||
|
||||
return (0);
|
||||
|
||||
error:
|
||||
dnode_rele(ddt->ddt_object_dnode[type][class], ddt);
|
||||
ddt->ddt_object_dnode[type][class] = NULL;
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -528,11 +545,11 @@ static int
|
||||
ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
|
||||
ddt_entry_t *dde)
|
||||
{
|
||||
if (!ddt_object_exists(ddt, type, class))
|
||||
dnode_t *dn = ddt->ddt_object_dnode[type][class];
|
||||
if (dn == NULL)
|
||||
return (SET_ERROR(ENOENT));
|
||||
|
||||
return (ddt_ops[type]->ddt_op_lookup(ddt->ddt_os,
|
||||
ddt->ddt_object[type][class], &dde->dde_key,
|
||||
return (ddt_ops[type]->ddt_op_lookup(dn, &dde->dde_key,
|
||||
dde->dde_phys, DDT_PHYS_SIZE(ddt)));
|
||||
}
|
||||
|
||||
@@ -540,42 +557,42 @@ static int
|
||||
ddt_object_contains(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
|
||||
const ddt_key_t *ddk)
|
||||
{
|
||||
if (!ddt_object_exists(ddt, type, class))
|
||||
dnode_t *dn = ddt->ddt_object_dnode[type][class];
|
||||
if (dn == NULL)
|
||||
return (SET_ERROR(ENOENT));
|
||||
|
||||
return (ddt_ops[type]->ddt_op_contains(ddt->ddt_os,
|
||||
ddt->ddt_object[type][class], ddk));
|
||||
return (ddt_ops[type]->ddt_op_contains(dn, ddk));
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
|
||||
const ddt_key_t *ddk)
|
||||
{
|
||||
if (!ddt_object_exists(ddt, type, class))
|
||||
dnode_t *dn = ddt->ddt_object_dnode[type][class];
|
||||
if (dn == NULL)
|
||||
return;
|
||||
|
||||
ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os,
|
||||
ddt->ddt_object[type][class], ddk);
|
||||
ddt_ops[type]->ddt_op_prefetch(dn, ddk);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_object_prefetch_all(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
|
||||
{
|
||||
if (!ddt_object_exists(ddt, type, class))
|
||||
dnode_t *dn = ddt->ddt_object_dnode[type][class];
|
||||
if (dn == NULL)
|
||||
return;
|
||||
|
||||
ddt_ops[type]->ddt_op_prefetch_all(ddt->ddt_os,
|
||||
ddt->ddt_object[type][class]);
|
||||
ddt_ops[type]->ddt_op_prefetch_all(dn);
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
|
||||
const ddt_lightweight_entry_t *ddlwe, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT(ddt_object_exists(ddt, type, class));
|
||||
dnode_t *dn = ddt->ddt_object_dnode[type][class];
|
||||
ASSERT(dn != NULL);
|
||||
|
||||
return (ddt_ops[type]->ddt_op_update(ddt->ddt_os,
|
||||
ddt->ddt_object[type][class], &ddlwe->ddlwe_key,
|
||||
return (ddt_ops[type]->ddt_op_update(dn, &ddlwe->ddlwe_key,
|
||||
&ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt), tx));
|
||||
}
|
||||
|
||||
@@ -583,20 +600,20 @@ static int
|
||||
ddt_object_remove(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
|
||||
const ddt_key_t *ddk, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT(ddt_object_exists(ddt, type, class));
|
||||
dnode_t *dn = ddt->ddt_object_dnode[type][class];
|
||||
ASSERT(dn != NULL);
|
||||
|
||||
return (ddt_ops[type]->ddt_op_remove(ddt->ddt_os,
|
||||
ddt->ddt_object[type][class], ddk, tx));
|
||||
return (ddt_ops[type]->ddt_op_remove(dn, ddk, tx));
|
||||
}
|
||||
|
||||
int
|
||||
ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
|
||||
uint64_t *walk, ddt_lightweight_entry_t *ddlwe)
|
||||
{
|
||||
ASSERT(ddt_object_exists(ddt, type, class));
|
||||
dnode_t *dn = ddt->ddt_object_dnode[type][class];
|
||||
ASSERT(dn != NULL);
|
||||
|
||||
int error = ddt_ops[type]->ddt_op_walk(ddt->ddt_os,
|
||||
ddt->ddt_object[type][class], walk, &ddlwe->ddlwe_key,
|
||||
int error = ddt_ops[type]->ddt_op_walk(dn, walk, &ddlwe->ddlwe_key,
|
||||
&ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt));
|
||||
if (error == 0) {
|
||||
ddlwe->ddlwe_type = type;
|
||||
@@ -610,10 +627,10 @@ int
|
||||
ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
|
||||
uint64_t *count)
|
||||
{
|
||||
ASSERT(ddt_object_exists(ddt, type, class));
|
||||
dnode_t *dn = ddt->ddt_object_dnode[type][class];
|
||||
ASSERT(dn != NULL);
|
||||
|
||||
return (ddt_ops[type]->ddt_op_count(ddt->ddt_os,
|
||||
ddt->ddt_object[type][class], count));
|
||||
return (ddt_ops[type]->ddt_op_count(dn, count));
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1037,13 +1054,6 @@ ddt_remove(ddt_t *ddt, ddt_entry_t *dde)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&ddt->ddt_lock));
|
||||
|
||||
/* Entry is still in the log, so charge the entry back to it */
|
||||
if (dde->dde_flags & DDE_FLAG_LOGGED) {
|
||||
ddt_lightweight_entry_t ddlwe;
|
||||
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);
|
||||
ddt_histogram_add_entry(ddt, &ddt->ddt_log_histogram, &ddlwe);
|
||||
}
|
||||
|
||||
avl_remove(&ddt->ddt_tree, dde);
|
||||
ddt_free(ddt, dde);
|
||||
}
|
||||
@@ -1234,63 +1244,61 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t verify)
|
||||
|
||||
/* Time to make a new entry. */
|
||||
dde = ddt_alloc(ddt, &search);
|
||||
|
||||
/* Record the time this class was created (used by ddt prune) */
|
||||
if (ddt->ddt_flags & DDT_FLAG_FLAT)
|
||||
dde->dde_phys->ddp_flat.ddp_class_start = ddt_class_start();
|
||||
|
||||
avl_insert(&ddt->ddt_tree, dde, where);
|
||||
|
||||
/* If its in the log tree, we can "load" it from there */
|
||||
/*
|
||||
* The entry in ddt_tree has no DDE_FLAG_LOADED, so other possible
|
||||
* threads will wait even while we drop the lock.
|
||||
*/
|
||||
ddt_exit(ddt);
|
||||
|
||||
/*
|
||||
* If there is a log, we should try to "load" from there first.
|
||||
*/
|
||||
if (ddt->ddt_flags & DDT_FLAG_LOG) {
|
||||
ddt_lightweight_entry_t ddlwe;
|
||||
boolean_t from_flushing;
|
||||
|
||||
if (ddt_log_find_key(ddt, &search, &ddlwe)) {
|
||||
/*
|
||||
* See if we have the key first, and if so, set up
|
||||
* the entry.
|
||||
*/
|
||||
/* Read-only search, no locks needed (logs stable during I/O) */
|
||||
if (ddt_log_find_key(ddt, &search, &ddlwe, &from_flushing)) {
|
||||
dde->dde_type = ddlwe.ddlwe_type;
|
||||
dde->dde_class = ddlwe.ddlwe_class;
|
||||
memcpy(dde->dde_phys, &ddlwe.ddlwe_phys,
|
||||
DDT_PHYS_SIZE(ddt));
|
||||
/* Whatever we found isn't valid for this BP, eject */
|
||||
if (verify &&
|
||||
!ddt_entry_lookup_is_valid(ddt, bp, dde)) {
|
||||
|
||||
/*
|
||||
* Check validity. If invalid and no waiters, clean up
|
||||
* immediately. Otherwise continue setup for waiters.
|
||||
*/
|
||||
boolean_t valid = !verify ||
|
||||
ddt_entry_lookup_is_valid(ddt, bp, dde);
|
||||
ddt_enter(ddt);
|
||||
if (!valid && dde->dde_waiters == 0) {
|
||||
avl_remove(&ddt->ddt_tree, dde);
|
||||
ddt_free(ddt, dde);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/* Remove it and count it */
|
||||
if (ddt_log_remove_key(ddt,
|
||||
ddt->ddt_log_active, &search)) {
|
||||
DDT_KSTAT_BUMP(ddt, dds_lookup_log_active_hit);
|
||||
} else {
|
||||
VERIFY(ddt_log_remove_key(ddt,
|
||||
ddt->ddt_log_flushing, &search));
|
||||
dde->dde_flags = DDE_FLAG_LOADED | DDE_FLAG_LOGGED;
|
||||
if (from_flushing) {
|
||||
dde->dde_flags |= DDE_FLAG_FROM_FLUSHING;
|
||||
DDT_KSTAT_BUMP(ddt,
|
||||
dds_lookup_log_flushing_hit);
|
||||
} else {
|
||||
DDT_KSTAT_BUMP(ddt, dds_lookup_log_active_hit);
|
||||
}
|
||||
|
||||
dde->dde_flags = DDE_FLAG_LOADED | DDE_FLAG_LOGGED;
|
||||
|
||||
DDT_KSTAT_BUMP(ddt, dds_lookup_log_hit);
|
||||
DDT_KSTAT_BUMP(ddt, dds_lookup_existing);
|
||||
|
||||
return (dde);
|
||||
cv_broadcast(&dde->dde_cv);
|
||||
|
||||
return (valid ? dde : NULL);
|
||||
}
|
||||
|
||||
DDT_KSTAT_BUMP(ddt, dds_lookup_log_miss);
|
||||
}
|
||||
|
||||
/*
|
||||
* ddt_tree is now stable, so unlock and let everyone else keep moving.
|
||||
* Anyone landing on this entry will find it without DDE_FLAG_LOADED,
|
||||
* and go to sleep waiting for it above.
|
||||
*/
|
||||
ddt_exit(ddt);
|
||||
|
||||
/* Search all store objects for the entry. */
|
||||
error = ENOENT;
|
||||
for (type = 0; type < DDT_TYPES; type++) {
|
||||
@@ -1727,6 +1735,15 @@ ddt_table_free(ddt_t *ddt)
|
||||
wmsum_fini(&ddt->ddt_kstat_dds_lookup_stored_miss);
|
||||
|
||||
ddt_log_free(ddt);
|
||||
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
|
||||
for (ddt_class_t class = 0; class < DDT_CLASSES; class++) {
|
||||
if (ddt->ddt_object_dnode[type][class] != NULL) {
|
||||
dnode_rele(ddt->ddt_object_dnode[type][class],
|
||||
ddt);
|
||||
ddt->ddt_object_dnode[type][class] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT0(avl_numnodes(&ddt->ddt_tree));
|
||||
ASSERT0(avl_numnodes(&ddt->ddt_repair_tree));
|
||||
avl_destroy(&ddt->ddt_tree);
|
||||
@@ -2354,6 +2371,19 @@ ddt_sync_table_log(ddt_t *ddt, dmu_tx_t *tx)
|
||||
avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {
|
||||
ASSERT(dde->dde_flags & DDE_FLAG_LOADED);
|
||||
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);
|
||||
|
||||
/* If from flushing log, remove it. */
|
||||
if (dde->dde_flags & DDE_FLAG_FROM_FLUSHING) {
|
||||
VERIFY(ddt_log_remove_key(ddt,
|
||||
ddt->ddt_log_flushing, &ddlwe.ddlwe_key));
|
||||
}
|
||||
|
||||
/* Update class_start to track last modification time */
|
||||
if (ddt->ddt_flags & DDT_FLAG_FLAT) {
|
||||
ddlwe.ddlwe_phys.ddp_flat.ddp_class_start =
|
||||
ddt_class_start();
|
||||
}
|
||||
|
||||
ddt_log_entry(ddt, &ddlwe, &dlu);
|
||||
ddt_sync_scan_entry(ddt, &ddlwe, tx);
|
||||
ddt_free(ddt, dde);
|
||||
@@ -2414,6 +2444,13 @@ ddt_sync_table_flush(ddt_t *ddt, dmu_tx_t *tx)
|
||||
|
||||
ddt_lightweight_entry_t ddlwe;
|
||||
DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, &ddlwe);
|
||||
|
||||
/* Update class_start to track last modification time */
|
||||
if (ddt->ddt_flags & DDT_FLAG_FLAT) {
|
||||
ddlwe.ddlwe_phys.ddp_flat.ddp_class_start =
|
||||
ddt_class_start();
|
||||
}
|
||||
|
||||
ddt_sync_flush_entry(ddt, &ddlwe,
|
||||
dde->dde_type, dde->dde_class, tx);
|
||||
ddt_sync_scan_entry(ddt, &ddlwe, tx);
|
||||
@@ -2765,7 +2802,7 @@ ddt_prune_walk(spa_t *spa, uint64_t cutoff, ddt_age_histo_t *histogram)
|
||||
* If this entry is on the log, then the stored entry is stale
|
||||
* and we should skip it.
|
||||
*/
|
||||
if (ddt_log_find_key(ddt, &ddlwe.ddlwe_key, NULL))
|
||||
if (ddt_log_find_key(ddt, &ddlwe.ddlwe_key, NULL, NULL))
|
||||
continue;
|
||||
|
||||
/* prune older entries */
|
||||
|
||||
@@ -252,7 +252,8 @@ ddt_log_free_entry(ddt_t *ddt, ddt_log_entry_t *ddle)
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_log_update_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe)
|
||||
ddt_log_update_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe,
|
||||
boolean_t hist)
|
||||
{
|
||||
/* Create the log tree entry from a live or stored entry */
|
||||
avl_index_t where;
|
||||
@@ -262,7 +263,13 @@ ddt_log_update_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_lightweight_entry_t *ddlwe)
|
||||
ddle = ddt_log_alloc_entry(ddt);
|
||||
ddle->ddle_key = ddlwe->ddlwe_key;
|
||||
avl_insert(&ddl->ddl_tree, ddle, where);
|
||||
} else if (hist) {
|
||||
ddt_lightweight_entry_t oddlwe;
|
||||
DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, &oddlwe);
|
||||
ddt_histogram_sub_entry(ddt, &ddt->ddt_log_histogram, &oddlwe);
|
||||
}
|
||||
if (hist)
|
||||
ddt_histogram_add_entry(ddt, &ddt->ddt_log_histogram, ddlwe);
|
||||
ddle->ddle_type = ddlwe->ddlwe_type;
|
||||
ddle->ddle_class = ddlwe->ddlwe_class;
|
||||
memcpy(ddle->ddle_phys, &ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt));
|
||||
@@ -273,8 +280,7 @@ ddt_log_entry(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe, ddt_log_update_t *dlu)
|
||||
{
|
||||
ASSERT3U(dlu->dlu_dbp, !=, NULL);
|
||||
|
||||
ddt_log_update_entry(ddt, ddt->ddt_log_active, ddlwe);
|
||||
ddt_histogram_add_entry(ddt, &ddt->ddt_log_histogram, ddlwe);
|
||||
ddt_log_update_entry(ddt, ddt->ddt_log_active, ddlwe, B_TRUE);
|
||||
|
||||
/* Get our block */
|
||||
ASSERT3U(dlu->dlu_block, <, dlu->dlu_ndbp);
|
||||
@@ -381,14 +387,20 @@ ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl, const ddt_key_t *ddk)
|
||||
|
||||
boolean_t
|
||||
ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,
|
||||
ddt_lightweight_entry_t *ddlwe)
|
||||
ddt_lightweight_entry_t *ddlwe, boolean_t *from_flushing)
|
||||
{
|
||||
ddt_log_entry_t *ddle =
|
||||
avl_find(&ddt->ddt_log_active->ddl_tree, ddk, NULL);
|
||||
if (!ddle)
|
||||
ddt_log_entry_t *ddle = avl_find(&ddt->ddt_log_active->ddl_tree,
|
||||
ddk, NULL);
|
||||
if (ddle) {
|
||||
if (from_flushing)
|
||||
*from_flushing = B_FALSE;
|
||||
} else {
|
||||
ddle = avl_find(&ddt->ddt_log_flushing->ddl_tree, ddk, NULL);
|
||||
if (!ddle)
|
||||
return (B_FALSE);
|
||||
if (!ddle)
|
||||
return (B_FALSE);
|
||||
if (from_flushing)
|
||||
*from_flushing = B_TRUE;
|
||||
}
|
||||
if (ddlwe)
|
||||
DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, ddlwe);
|
||||
return (B_TRUE);
|
||||
@@ -524,7 +536,7 @@ ddt_log_load_entry(ddt_t *ddt, ddt_log_t *ddl, ddt_log_record_t *dlr,
|
||||
ddlwe.ddlwe_key = dlre->dlre_key;
|
||||
memcpy(&ddlwe.ddlwe_phys, dlre->dlre_phys, DDT_PHYS_SIZE(ddt));
|
||||
|
||||
ddt_log_update_entry(ddt, ddl, &ddlwe);
|
||||
ddt_log_update_entry(ddt, ddl, &ddlwe, B_FALSE);
|
||||
}
|
||||
|
||||
static void
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include <sys/ddt_impl.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/zio_compress.h>
|
||||
|
||||
static unsigned int ddt_zap_default_bs = 15;
|
||||
@@ -56,7 +57,7 @@ ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len)
|
||||
/* Call compress function directly to avoid hole detection. */
|
||||
abd_t sabd, dabd;
|
||||
abd_get_from_buf_struct(&sabd, (void *)src, s_len);
|
||||
abd_get_from_buf_struct(&dabd, dst, d_len);
|
||||
abd_get_from_buf_struct(&dabd, dst, d_len - 1);
|
||||
c_len = ci->ci_compress(&sabd, &dabd, s_len, d_len - 1, ci->ci_level);
|
||||
abd_free(&dabd);
|
||||
abd_free(&sabd);
|
||||
@@ -85,9 +86,10 @@ ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)
|
||||
}
|
||||
|
||||
abd_t sabd, dabd;
|
||||
abd_get_from_buf_struct(&sabd, src, s_len);
|
||||
size_t c_len = s_len - 1;
|
||||
abd_get_from_buf_struct(&sabd, src, c_len);
|
||||
abd_get_from_buf_struct(&dabd, dst, d_len);
|
||||
VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, s_len, d_len, NULL));
|
||||
VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, c_len, d_len, NULL));
|
||||
abd_free(&dabd);
|
||||
abd_free(&sabd);
|
||||
|
||||
@@ -120,54 +122,48 @@ ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_lookup(objset_t *os, uint64_t object,
|
||||
const ddt_key_t *ddk, void *phys, size_t psize)
|
||||
ddt_zap_lookup(dnode_t *dn, const ddt_key_t *ddk, void *phys, size_t psize)
|
||||
{
|
||||
uchar_t *cbuf;
|
||||
uint64_t one, csize;
|
||||
uint64_t csize;
|
||||
int error;
|
||||
|
||||
error = zap_length_uint64(os, object, (uint64_t *)ddk,
|
||||
DDT_KEY_WORDS, &one, &csize);
|
||||
if (error)
|
||||
return (error);
|
||||
cbuf = kmem_alloc(psize + 1, KM_SLEEP);
|
||||
|
||||
ASSERT3U(one, ==, 1);
|
||||
ASSERT3U(csize, <=, psize + 1);
|
||||
|
||||
cbuf = kmem_alloc(csize, KM_SLEEP);
|
||||
|
||||
error = zap_lookup_uint64(os, object, (uint64_t *)ddk,
|
||||
DDT_KEY_WORDS, 1, csize, cbuf);
|
||||
if (error == 0)
|
||||
error = zap_lookup_length_uint64_by_dnode(dn, (uint64_t *)ddk,
|
||||
DDT_KEY_WORDS, 1, psize + 1, cbuf, &csize);
|
||||
if (error == 0) {
|
||||
ASSERT3U(csize, <=, psize + 1);
|
||||
ddt_zap_decompress(cbuf, phys, csize, psize);
|
||||
}
|
||||
|
||||
kmem_free(cbuf, csize);
|
||||
kmem_free(cbuf, psize + 1);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk)
|
||||
ddt_zap_contains(dnode_t *dn, const ddt_key_t *ddk)
|
||||
{
|
||||
return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS,
|
||||
NULL, NULL));
|
||||
return (zap_length_uint64_by_dnode(dn, (uint64_t *)ddk,
|
||||
DDT_KEY_WORDS, NULL, NULL));
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)
|
||||
ddt_zap_prefetch(dnode_t *dn, const ddt_key_t *ddk)
|
||||
{
|
||||
(void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);
|
||||
(void) zap_prefetch_uint64_by_dnode(dn, (uint64_t *)ddk,
|
||||
DDT_KEY_WORDS);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_zap_prefetch_all(objset_t *os, uint64_t object)
|
||||
ddt_zap_prefetch_all(dnode_t *dn)
|
||||
{
|
||||
(void) zap_prefetch_object(os, object);
|
||||
(void) zap_prefetch_object(dn->dn_objset, dn->dn_object);
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
|
||||
ddt_zap_update(dnode_t *dn, const ddt_key_t *ddk,
|
||||
const void *phys, size_t psize, dmu_tx_t *tx)
|
||||
{
|
||||
const size_t cbuf_size = psize + 1;
|
||||
@@ -176,7 +172,7 @@ ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
|
||||
|
||||
uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size);
|
||||
|
||||
int error = zap_update_uint64(os, object, (uint64_t *)ddk,
|
||||
int error = zap_update_uint64_by_dnode(dn, (uint64_t *)ddk,
|
||||
DDT_KEY_WORDS, 1, csize, cbuf, tx);
|
||||
|
||||
kmem_free(cbuf, cbuf_size);
|
||||
@@ -185,15 +181,14 @@ ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk,
|
||||
dmu_tx_t *tx)
|
||||
ddt_zap_remove(dnode_t *dn, const ddt_key_t *ddk, dmu_tx_t *tx)
|
||||
{
|
||||
return (zap_remove_uint64(os, object, (uint64_t *)ddk,
|
||||
return (zap_remove_uint64_by_dnode(dn, (uint64_t *)ddk,
|
||||
DDT_KEY_WORDS, tx));
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
|
||||
ddt_zap_walk(dnode_t *dn, uint64_t *walk, ddt_key_t *ddk,
|
||||
void *phys, size_t psize)
|
||||
{
|
||||
zap_cursor_t zc;
|
||||
@@ -209,9 +204,10 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
|
||||
* scrub I/Os for each ZAP block that we read in, so
|
||||
* reading the ZAP is unlikely to be the bottleneck.
|
||||
*/
|
||||
zap_cursor_init_noprefetch(&zc, os, object);
|
||||
zap_cursor_init_noprefetch(&zc, dn->dn_objset, dn->dn_object);
|
||||
} else {
|
||||
zap_cursor_init_serialized(&zc, os, object, *walk);
|
||||
zap_cursor_init_serialized(&zc, dn->dn_objset, dn->dn_object,
|
||||
*walk);
|
||||
}
|
||||
if ((error = zap_cursor_retrieve(&zc, za)) == 0) {
|
||||
uint64_t csize = za->za_num_integers;
|
||||
@@ -221,7 +217,7 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
|
||||
|
||||
uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP);
|
||||
|
||||
error = zap_lookup_uint64(os, object, (uint64_t *)za->za_name,
|
||||
error = zap_lookup_uint64_by_dnode(dn, (uint64_t *)za->za_name,
|
||||
DDT_KEY_WORDS, 1, csize, cbuf);
|
||||
ASSERT0(error);
|
||||
if (error == 0) {
|
||||
@@ -240,9 +236,9 @@ ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count)
|
||||
ddt_zap_count(dnode_t *dn, uint64_t *count)
|
||||
{
|
||||
return (zap_count(os, object, count));
|
||||
return (zap_count_by_dnode(dn, count));
|
||||
}
|
||||
|
||||
const ddt_ops_t ddt_zap_ops = {
|
||||
|
||||
@@ -161,7 +161,8 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
|
||||
ASSERT3U(BP_GET_BIRTH(bp), >,
|
||||
dsl_dataset_phys(ds)->ds_prev_snap_txg);
|
||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||
/* ds_dbuf is pre-dirtied in dsl_dataset_sync(). */
|
||||
ASSERT(dmu_buf_is_dirty(ds->ds_dbuf, tx));
|
||||
mutex_enter(&ds->ds_lock);
|
||||
delta = parent_delta(ds, used);
|
||||
dsl_dataset_phys(ds)->ds_referenced_bytes += used;
|
||||
@@ -274,7 +275,8 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
|
||||
ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
|
||||
|
||||
ASSERT(!ds->ds_is_snapshot);
|
||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||
/* ds_dbuf is pre-dirtied in dsl_dataset_sync(). */
|
||||
ASSERT(dmu_buf_is_dirty(ds->ds_dbuf, tx));
|
||||
|
||||
/*
|
||||
* Track block for livelist, but ignore embedded blocks because
|
||||
|
||||
@@ -189,16 +189,16 @@ static uint_t zfs_scan_mem_lim_fact = 20;
|
||||
static uint_t zfs_scan_mem_lim_soft_fact = 20;
|
||||
|
||||
/* minimum milliseconds to scrub per txg */
|
||||
static uint_t zfs_scrub_min_time_ms = 1000;
|
||||
static uint_t zfs_scrub_min_time_ms = 750;
|
||||
|
||||
/* minimum milliseconds to obsolete per txg */
|
||||
static uint_t zfs_obsolete_min_time_ms = 500;
|
||||
|
||||
/* minimum milliseconds to free per txg */
|
||||
static uint_t zfs_free_min_time_ms = 1000;
|
||||
static uint_t zfs_free_min_time_ms = 500;
|
||||
|
||||
/* minimum milliseconds to resilver per txg */
|
||||
static uint_t zfs_resilver_min_time_ms = 3000;
|
||||
static uint_t zfs_resilver_min_time_ms = 1500;
|
||||
|
||||
static uint_t zfs_scan_checkpoint_intval = 7200; /* in seconds */
|
||||
int zfs_scan_suspend_progress = 0; /* set to prevent scans from progressing */
|
||||
@@ -208,7 +208,13 @@ static const ddt_class_t zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
|
||||
/* max number of blocks to free in a single TXG */
|
||||
static uint64_t zfs_async_block_max_blocks = UINT64_MAX;
|
||||
/* max number of dedup blocks to free in a single TXG */
|
||||
static uint64_t zfs_max_async_dedup_frees = 100000;
|
||||
static uint64_t zfs_max_async_dedup_frees = 250000;
|
||||
|
||||
/*
|
||||
* After freeing this many async ZIOs (dedup, clone, gang blocks), wait for
|
||||
* them to complete before continuing. This prevents unbounded I/O queueing.
|
||||
*/
|
||||
static uint64_t zfs_async_free_zio_wait_interval = 2000;
|
||||
|
||||
/* set to disable resilver deferring */
|
||||
static int zfs_resilver_disable_defer = B_FALSE;
|
||||
@@ -217,16 +223,14 @@ static int zfs_resilver_disable_defer = B_FALSE;
|
||||
static uint_t zfs_resilver_defer_percent = 10;
|
||||
|
||||
/*
|
||||
* We wait a few txgs after importing a pool to begin scanning so that
|
||||
* the import / mounting code isn't held up by scrub / resilver IO.
|
||||
* Unfortunately, it is a bit difficult to determine exactly how long
|
||||
* this will take since userspace will trigger fs mounts asynchronously
|
||||
* and the kernel will create zvol minors asynchronously. As a result,
|
||||
* the value provided here is a bit arbitrary, but represents a
|
||||
* reasonable estimate of how many txgs it will take to finish fully
|
||||
* importing a pool
|
||||
* Number of TXGs to wait after importing before starting background
|
||||
* work (async destroys, scan/scrub/resilver operations). This allows
|
||||
* the import command and filesystem mounts to complete quickly without
|
||||
* being delayed by background activities. The value is somewhat arbitrary
|
||||
* since userspace triggers filesystem mounts asynchronously, but 5 TXGs
|
||||
* provides a reasonable window for import completion in most cases.
|
||||
*/
|
||||
#define SCAN_IMPORT_WAIT_TXGS 5
|
||||
static uint_t zfs_import_defer_txgs = 5;
|
||||
|
||||
#define DSL_SCAN_IS_SCRUB_RESILVER(scn) \
|
||||
((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \
|
||||
@@ -1665,7 +1669,7 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
|
||||
* or
|
||||
* - the scan queue has reached its memory use limit
|
||||
*/
|
||||
uint64_t curr_time_ns = gethrtime();
|
||||
uint64_t curr_time_ns = getlrtime();
|
||||
uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;
|
||||
uint64_t sync_time_ns = curr_time_ns -
|
||||
scn->scn_dp->dp_spa->spa_sync_starttime;
|
||||
@@ -1727,7 +1731,7 @@ dsl_error_scrub_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
|
||||
* - the spa is shutting down because this pool is being exported
|
||||
* or the machine is rebooting.
|
||||
*/
|
||||
uint64_t curr_time_ns = gethrtime();
|
||||
uint64_t curr_time_ns = getlrtime();
|
||||
uint64_t error_scrub_time_ns = curr_time_ns - scn->scn_sync_start_time;
|
||||
uint64_t sync_time_ns = curr_time_ns -
|
||||
scn->scn_dp->dp_spa->spa_sync_starttime;
|
||||
@@ -3239,7 +3243,7 @@ static boolean_t
|
||||
scan_io_queue_check_suspend(dsl_scan_t *scn)
|
||||
{
|
||||
/* See comment in dsl_scan_check_suspend() */
|
||||
uint64_t curr_time_ns = gethrtime();
|
||||
uint64_t curr_time_ns = getlrtime();
|
||||
uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time;
|
||||
uint64_t sync_time_ns = curr_time_ns -
|
||||
scn->scn_dp->dp_spa->spa_sync_starttime;
|
||||
@@ -3592,12 +3596,12 @@ dsl_scan_async_block_should_pause(dsl_scan_t *scn)
|
||||
}
|
||||
|
||||
if (zfs_max_async_dedup_frees != 0 &&
|
||||
scn->scn_dedup_frees_this_txg >= zfs_max_async_dedup_frees) {
|
||||
scn->scn_async_frees_this_txg >= zfs_max_async_dedup_frees) {
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
|
||||
return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
|
||||
elapsed_nanosecs = getlrtime() - scn->scn_sync_start_time;
|
||||
return (elapsed_nanosecs / (NANOSEC / 2) > zfs_txg_timeout ||
|
||||
(NSEC2MSEC(elapsed_nanosecs) > scn->scn_async_block_min_time_ms &&
|
||||
txg_sync_waiting(scn->scn_dp)) ||
|
||||
spa_shutting_down(scn->scn_dp->dp_spa));
|
||||
@@ -3614,14 +3618,32 @@ dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
return (SET_ERROR(ERESTART));
|
||||
}
|
||||
|
||||
zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
|
||||
dmu_tx_get_txg(tx), bp, 0));
|
||||
zio_t *zio = zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
|
||||
dmu_tx_get_txg(tx), bp, 0);
|
||||
dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD,
|
||||
-bp_get_dsize_sync(scn->scn_dp->dp_spa, bp),
|
||||
-BP_GET_PSIZE(bp), -BP_GET_UCSIZE(bp), tx);
|
||||
scn->scn_visited_this_txg++;
|
||||
if (BP_GET_DEDUP(bp))
|
||||
scn->scn_dedup_frees_this_txg++;
|
||||
if (zio != NULL) {
|
||||
/*
|
||||
* zio_free_sync() returned a ZIO, meaning this is an
|
||||
* async I/O (dedup, clone or gang block).
|
||||
*/
|
||||
scn->scn_async_frees_this_txg++;
|
||||
zio_nowait(zio);
|
||||
|
||||
/*
|
||||
* After issuing N async ZIOs, wait for them to complete.
|
||||
* This makes time limits work with actual I/O completion
|
||||
* times, not just queuing times.
|
||||
*/
|
||||
uint64_t i = zfs_async_free_zio_wait_interval;
|
||||
if (i != 0 && (scn->scn_async_frees_this_txg % i) == 0) {
|
||||
VERIFY0(zio_wait(scn->scn_zio_root));
|
||||
scn->scn_zio_root = zio_root(scn->scn_dp->dp_spa, NULL,
|
||||
NULL, ZIO_FLAG_MUSTSUCCEED);
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -3865,10 +3887,10 @@ dsl_process_async_destroys(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
"free_bpobj/bptree on %s in txg %llu; err=%u",
|
||||
(longlong_t)scn->scn_visited_this_txg,
|
||||
(longlong_t)
|
||||
NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
|
||||
NSEC2MSEC(getlrtime() - scn->scn_sync_start_time),
|
||||
spa->spa_name, (longlong_t)tx->tx_txg, err);
|
||||
scn->scn_visited_this_txg = 0;
|
||||
scn->scn_dedup_frees_this_txg = 0;
|
||||
scn->scn_async_frees_this_txg = 0;
|
||||
|
||||
/*
|
||||
* Write out changes to the DDT and the BRT that may be required
|
||||
@@ -4196,14 +4218,14 @@ dsl_errorscrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
spa->spa_scrub_active = B_TRUE;
|
||||
scn->scn_sync_start_time = gethrtime();
|
||||
scn->scn_sync_start_time = getlrtime();
|
||||
|
||||
/*
|
||||
* zfs_scan_suspend_progress can be set to disable scrub progress.
|
||||
* See more detailed comment in dsl_scan_sync().
|
||||
*/
|
||||
if (zfs_scan_suspend_progress) {
|
||||
uint64_t scan_time_ns = gethrtime() - scn->scn_sync_start_time;
|
||||
uint64_t scan_time_ns = getlrtime() - scn->scn_sync_start_time;
|
||||
int mintime = zfs_scrub_min_time_ms;
|
||||
|
||||
while (zfs_scan_suspend_progress &&
|
||||
@@ -4211,7 +4233,7 @@ dsl_errorscrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
!spa_shutting_down(scn->scn_dp->dp_spa) &&
|
||||
NSEC2MSEC(scan_time_ns) < mintime) {
|
||||
delay(hz);
|
||||
scan_time_ns = gethrtime() - scn->scn_sync_start_time;
|
||||
scan_time_ns = getlrtime() - scn->scn_sync_start_time;
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -4394,6 +4416,14 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
if (spa_shutting_down(spa))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Wait a few txgs after importing before doing background work
|
||||
* (async destroys and scanning). This should help the import
|
||||
* command to complete quickly.
|
||||
*/
|
||||
if (spa->spa_syncing_txg < spa->spa_first_txg + zfs_import_defer_txgs)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the scan is inactive due to a stalled async destroy, try again.
|
||||
*/
|
||||
@@ -4402,7 +4432,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
|
||||
/* reset scan statistics */
|
||||
scn->scn_visited_this_txg = 0;
|
||||
scn->scn_dedup_frees_this_txg = 0;
|
||||
scn->scn_async_frees_this_txg = 0;
|
||||
scn->scn_holes_this_txg = 0;
|
||||
scn->scn_lt_min_this_txg = 0;
|
||||
scn->scn_gt_max_this_txg = 0;
|
||||
@@ -4413,7 +4443,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
scn->scn_avg_zio_size_this_txg = 0;
|
||||
scn->scn_zios_this_txg = 0;
|
||||
scn->scn_suspending = B_FALSE;
|
||||
scn->scn_sync_start_time = gethrtime();
|
||||
scn->scn_sync_start_time = getlrtime();
|
||||
spa->spa_scrub_active = B_TRUE;
|
||||
|
||||
/*
|
||||
@@ -4430,13 +4460,6 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
if (!dsl_scan_is_running(scn) || dsl_scan_is_paused_scrub(scn))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Wait a few txgs after importing to begin scanning so that
|
||||
* we can get the pool imported quickly.
|
||||
*/
|
||||
if (spa->spa_syncing_txg < spa->spa_first_txg + SCAN_IMPORT_WAIT_TXGS)
|
||||
return;
|
||||
|
||||
/*
|
||||
* zfs_scan_suspend_progress can be set to disable scan progress.
|
||||
* We don't want to spin the txg_sync thread, so we add a delay
|
||||
@@ -4444,7 +4467,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
* useful for testing and debugging.
|
||||
*/
|
||||
if (zfs_scan_suspend_progress) {
|
||||
uint64_t scan_time_ns = gethrtime() - scn->scn_sync_start_time;
|
||||
uint64_t scan_time_ns = getlrtime() - scn->scn_sync_start_time;
|
||||
uint_t mintime = (scn->scn_phys.scn_func ==
|
||||
POOL_SCAN_RESILVER) ? zfs_resilver_min_time_ms :
|
||||
zfs_scrub_min_time_ms;
|
||||
@@ -4454,7 +4477,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
!spa_shutting_down(scn->scn_dp->dp_spa) &&
|
||||
NSEC2MSEC(scan_time_ns) < mintime) {
|
||||
delay(hz);
|
||||
scan_time_ns = gethrtime() - scn->scn_sync_start_time;
|
||||
scan_time_ns = getlrtime() - scn->scn_sync_start_time;
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -4584,7 +4607,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
"%llu in ddt, %llu > maxtxg)",
|
||||
(longlong_t)scn->scn_visited_this_txg,
|
||||
spa->spa_name,
|
||||
(longlong_t)NSEC2MSEC(gethrtime() -
|
||||
(longlong_t)NSEC2MSEC(getlrtime() -
|
||||
scn->scn_sync_start_time),
|
||||
(longlong_t)scn->scn_objsets_visited_this_txg,
|
||||
(longlong_t)scn->scn_holes_this_txg,
|
||||
@@ -4625,7 +4648,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
(longlong_t)scn->scn_zios_this_txg,
|
||||
spa->spa_name,
|
||||
(longlong_t)scn->scn_segs_this_txg,
|
||||
(longlong_t)NSEC2MSEC(gethrtime() -
|
||||
(longlong_t)NSEC2MSEC(getlrtime() -
|
||||
scn->scn_sync_start_time),
|
||||
(longlong_t)scn->scn_avg_zio_size_this_txg,
|
||||
(longlong_t)scn->scn_avg_seg_size_this_txg);
|
||||
@@ -5319,7 +5342,10 @@ ZFS_MODULE_PARAM(zfs, zfs_, async_block_max_blocks, U64, ZMOD_RW,
|
||||
"Max number of blocks freed in one txg");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, max_async_dedup_frees, U64, ZMOD_RW,
|
||||
"Max number of dedup blocks freed in one txg");
|
||||
"Max number of dedup, clone or gang blocks freed in one txg");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, async_free_zio_wait_interval, U64, ZMOD_RW,
|
||||
"Wait for pending free I/Os after issuing this many asynchronously");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, free_bpobj_enabled, INT, ZMOD_RW,
|
||||
"Enable processing of the free_bpobj");
|
||||
@@ -5336,6 +5362,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_issue_strategy, UINT, ZMOD_RW,
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, scan_legacy, INT, ZMOD_RW,
|
||||
"Scrub using legacy non-sequential method");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, import_defer_txgs, UINT, ZMOD_RW,
|
||||
"Number of TXGs to defer background work after pool import");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, scan_checkpoint_intval, UINT, ZMOD_RW,
|
||||
"Scan progress on-disk checkpointing interval");
|
||||
|
||||
|
||||
@@ -10449,7 +10449,7 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
dsl_pool_t *dp = spa->spa_dsl_pool;
|
||||
dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
|
||||
|
||||
spa->spa_sync_starttime = gethrtime();
|
||||
spa->spa_sync_starttime = getlrtime();
|
||||
|
||||
taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid, B_TRUE);
|
||||
spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
|
||||
|
||||
@@ -720,7 +720,7 @@ spa_deadman(void *arg)
|
||||
return;
|
||||
|
||||
zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
|
||||
(gethrtime() - spa->spa_sync_starttime) / NANOSEC,
|
||||
(getlrtime() - spa->spa_sync_starttime) / NANOSEC,
|
||||
(u_longlong_t)++spa->spa_deadman_calls);
|
||||
if (zfs_deadman_enabled)
|
||||
vdev_deadman(spa->spa_root_vdev, FTAG);
|
||||
|
||||
@@ -2703,16 +2703,6 @@ vdev_raidz_io_start(zio_t *zio)
|
||||
next_offset = synced_offset;
|
||||
}
|
||||
}
|
||||
if (use_scratch) {
|
||||
zfs_dbgmsg("zio=%px %s io_offset=%llu offset_synced="
|
||||
"%lld next_offset=%lld use_scratch=%u",
|
||||
zio,
|
||||
zio->io_type == ZIO_TYPE_WRITE ? "WRITE" : "READ",
|
||||
(long long)zio->io_offset,
|
||||
(long long)synced_offset,
|
||||
(long long)next_offset,
|
||||
use_scratch);
|
||||
}
|
||||
|
||||
rm = vdev_raidz_map_alloc_expanded(zio,
|
||||
tvd->vdev_ashift, vdrz->vd_physical_width,
|
||||
@@ -2851,8 +2841,6 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
|
||||
continue;
|
||||
|
||||
if (abd_cmp(orig[c], rc->rc_abd) != 0) {
|
||||
zfs_dbgmsg("found error on col=%u devidx=%u off %llx",
|
||||
c, (int)rc->rc_devidx, (u_longlong_t)rc->rc_offset);
|
||||
vdev_raidz_checksum_error(zio, rc, orig[c]);
|
||||
rc->rc_error = SET_ERROR(ECKSUM);
|
||||
ret++;
|
||||
@@ -3175,10 +3163,6 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
|
||||
*/
|
||||
ASSERT0(zio->io_flags & ZIO_FLAG_DIO_READ);
|
||||
|
||||
zfs_dbgmsg("zio=%px repairing c=%u devidx=%u "
|
||||
"offset=%llx",
|
||||
zio, c, rc->rc_devidx, (long long)rc->rc_offset);
|
||||
|
||||
zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
|
||||
rc->rc_offset, rc->rc_abd, rc->rc_size,
|
||||
ZIO_TYPE_WRITE,
|
||||
|
||||
@@ -878,7 +878,8 @@ fzap_check(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers)
|
||||
int
|
||||
fzap_lookup(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
char *realname, int rn_len, boolean_t *ncp)
|
||||
char *realname, int rn_len, boolean_t *ncp,
|
||||
uint64_t *actual_num_integers)
|
||||
{
|
||||
zap_leaf_t *l;
|
||||
zap_entry_handle_t zeh;
|
||||
@@ -898,6 +899,8 @@ fzap_lookup(zap_name_t *zn,
|
||||
}
|
||||
|
||||
err = zap_entry_read(&zeh, integer_size, num_integers, buf);
|
||||
if (err == 0 && actual_num_integers != NULL)
|
||||
*actual_num_integers = zeh.zeh_num_integers;
|
||||
(void) zap_entry_read_name(zn->zn_zap, &zeh, rn_len, realname);
|
||||
if (ncp) {
|
||||
*ncp = zap_entry_normalization_conflict(&zeh,
|
||||
|
||||
@@ -1049,6 +1049,24 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_count_by_dnode(dnode_t *dn, uint64_t *count)
|
||||
{
|
||||
zap_t *zap;
|
||||
|
||||
int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
|
||||
FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
if (!zap->zap_ismicro) {
|
||||
err = fzap_count(zap, count);
|
||||
} else {
|
||||
*count = zap->zap_m.zap_num_entries;
|
||||
}
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* zn may be NULL; if not specified, it will be computed if needed.
|
||||
* See also the comment above zap_entry_normalization_conflict().
|
||||
@@ -1127,7 +1145,7 @@ zap_lookup_impl(zap_t *zap, const char *name,
|
||||
|
||||
if (!zap->zap_ismicro) {
|
||||
err = fzap_lookup(zn, integer_size, num_integers, buf,
|
||||
realname, rn_len, ncp);
|
||||
realname, rn_len, ncp, NULL);
|
||||
} else {
|
||||
zfs_btree_index_t idx;
|
||||
mzap_ent_t *mze = mze_find(zn, &idx);
|
||||
@@ -1282,8 +1300,9 @@ zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints)
|
||||
}
|
||||
|
||||
static int
|
||||
zap_lookup_uint64_impl(zap_t *zap, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
|
||||
zap_lookup_length_uint64_impl(zap_t *zap, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
uint64_t *actual_num_integers)
|
||||
{
|
||||
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
@@ -1292,7 +1311,7 @@ zap_lookup_uint64_impl(zap_t *zap, const uint64_t *key,
|
||||
}
|
||||
|
||||
int err = fzap_lookup(zn, integer_size, num_integers, buf,
|
||||
NULL, 0, NULL);
|
||||
NULL, 0, NULL, actual_num_integers);
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
@@ -1308,9 +1327,9 @@ zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_lookup_uint64_impl(zap, key, key_numints, integer_size,
|
||||
num_integers, buf);
|
||||
/* zap_lookup_uint64_impl() calls zap_unlockdir() */
|
||||
err = zap_lookup_length_uint64_impl(zap, key, key_numints,
|
||||
integer_size, num_integers, buf, NULL);
|
||||
/* zap_lookup_length_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
|
||||
@@ -1324,9 +1343,26 @@ zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_lookup_uint64_impl(zap, key, key_numints, integer_size,
|
||||
num_integers, buf);
|
||||
/* zap_lookup_uint64_impl() calls zap_unlockdir() */
|
||||
err = zap_lookup_length_uint64_impl(zap, key, key_numints,
|
||||
integer_size, num_integers, buf, NULL);
|
||||
/* zap_lookup_length_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_lookup_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
uint64_t *actual_num_integers)
|
||||
{
|
||||
zap_t *zap;
|
||||
|
||||
int err =
|
||||
zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_lookup_length_uint64_impl(zap, key, key_numints,
|
||||
integer_size, num_integers, buf, actual_num_integers);
|
||||
/* zap_lookup_length_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
|
||||
@@ -1395,6 +1431,27 @@ zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints, uint64_t *integer_size, uint64_t *num_integers)
|
||||
{
|
||||
zap_t *zap;
|
||||
|
||||
int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
|
||||
FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
err = fzap_length(zn, integer_size, num_integers);
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
static void
|
||||
mzap_addent(zap_name_t *zn, uint64_t value)
|
||||
{
|
||||
@@ -2003,6 +2060,7 @@ EXPORT_SYMBOL(zap_lookup);
|
||||
EXPORT_SYMBOL(zap_lookup_by_dnode);
|
||||
EXPORT_SYMBOL(zap_lookup_norm);
|
||||
EXPORT_SYMBOL(zap_lookup_uint64);
|
||||
EXPORT_SYMBOL(zap_lookup_length_uint64_by_dnode);
|
||||
EXPORT_SYMBOL(zap_contains);
|
||||
EXPORT_SYMBOL(zap_prefetch);
|
||||
EXPORT_SYMBOL(zap_prefetch_uint64);
|
||||
@@ -2016,12 +2074,14 @@ EXPORT_SYMBOL(zap_update_uint64);
|
||||
EXPORT_SYMBOL(zap_update_uint64_by_dnode);
|
||||
EXPORT_SYMBOL(zap_length);
|
||||
EXPORT_SYMBOL(zap_length_uint64);
|
||||
EXPORT_SYMBOL(zap_length_uint64_by_dnode);
|
||||
EXPORT_SYMBOL(zap_remove);
|
||||
EXPORT_SYMBOL(zap_remove_by_dnode);
|
||||
EXPORT_SYMBOL(zap_remove_norm);
|
||||
EXPORT_SYMBOL(zap_remove_uint64);
|
||||
EXPORT_SYMBOL(zap_remove_uint64_by_dnode);
|
||||
EXPORT_SYMBOL(zap_count);
|
||||
EXPORT_SYMBOL(zap_count_by_dnode);
|
||||
EXPORT_SYMBOL(zap_value_search);
|
||||
EXPORT_SYMBOL(zap_join);
|
||||
EXPORT_SYMBOL(zap_join_increment);
|
||||
|
||||
@@ -433,13 +433,13 @@ zfs_id_overobjquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
|
||||
} else {
|
||||
return (B_FALSE);
|
||||
}
|
||||
if (quotaobj == 0 && default_quota == 0)
|
||||
return (B_FALSE);
|
||||
if (zfsvfs->z_replay)
|
||||
return (B_FALSE);
|
||||
|
||||
(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)id);
|
||||
if (quotaobj == 0) {
|
||||
if (default_quota == 0)
|
||||
return (B_FALSE);
|
||||
quota = default_quota;
|
||||
} else {
|
||||
err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a);
|
||||
@@ -484,13 +484,13 @@ zfs_id_overblockquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
|
||||
} else {
|
||||
return (B_FALSE);
|
||||
}
|
||||
if (quotaobj == 0 && default_quota == 0)
|
||||
return (B_FALSE);
|
||||
if (zfsvfs->z_replay)
|
||||
return (B_FALSE);
|
||||
|
||||
(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)id);
|
||||
if (quotaobj == 0) {
|
||||
if (default_quota == 0)
|
||||
return (B_FALSE);
|
||||
quota = default_quota;
|
||||
} else {
|
||||
err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a);
|
||||
|
||||
@@ -4067,19 +4067,21 @@ zio_ddt_write(zio_t *zio)
|
||||
|
||||
/*
|
||||
* We need to write. We will create a new write with the copies
|
||||
* property adjusted to match the number of DVAs we need to need to
|
||||
* grow the DDT entry by to satisfy the request.
|
||||
* property adjusted to match the number of DVAs we need to grow
|
||||
* the DDT entry by to satisfy the request.
|
||||
*/
|
||||
zio_prop_t czp = *zp;
|
||||
zio_prop_t czp;
|
||||
if (have_dvas > 0 || parent_dvas > 0) {
|
||||
czp = *zp;
|
||||
czp.zp_copies = need_dvas;
|
||||
czp.zp_gang_copies = 0;
|
||||
zp = &czp;
|
||||
} else {
|
||||
ASSERT3U(czp.zp_copies, ==, need_dvas);
|
||||
ASSERT3U(zp->zp_copies, ==, need_dvas);
|
||||
}
|
||||
|
||||
zio_t *cio = zio_write(zio, spa, txg, bp, zio->io_orig_abd,
|
||||
zio->io_orig_size, zio->io_orig_size, &czp,
|
||||
zio->io_orig_size, zio->io_orig_size, zp,
|
||||
zio_ddt_child_write_ready, NULL,
|
||||
zio_ddt_child_write_done, dde, zio->io_priority,
|
||||
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
|
||||
@@ -4157,6 +4159,17 @@ zio_ddt_free(zio_t *zio)
|
||||
ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);
|
||||
if (v != DDT_PHYS_NONE)
|
||||
ddt_phys_decref(dde->dde_phys, v);
|
||||
else
|
||||
/*
|
||||
* If the entry was found but the phys was not, then
|
||||
* this block must have been pruned from the dedup
|
||||
* table, and the entry refers to a later version of
|
||||
* this data. Therefore, the caller is trying to delete
|
||||
* the only stored instance of this block, and so we
|
||||
* need to do a normal (not dedup) free. Clear dde so
|
||||
* we fall into the block below.
|
||||
*/
|
||||
dde = NULL;
|
||||
}
|
||||
ddt_exit(ddt);
|
||||
|
||||
|
||||
@@ -7,7 +7,9 @@ REF="HEAD"
|
||||
test_commit_bodylength()
|
||||
{
|
||||
length="72"
|
||||
body=$(git log --no-show-signature -n 1 --pretty=%b "$REF" | grep -Ev "http(s)*://" | grep -E -m 1 ".{$((length + 1))}")
|
||||
body=$(git log --no-show-signature -n 1 --pretty=%b "$REF" |
|
||||
grep -Evi -e "http(s)*://" -e "signed-off-by:" -e "reviewed-by:" |
|
||||
grep -E -m 1 ".{$((length + 1))}")
|
||||
if [ -n "$body" ]; then
|
||||
echo "error: commit message body contains line over ${length} characters"
|
||||
return 1
|
||||
|
||||
@@ -10,7 +10,7 @@ RET=0
|
||||
# check for exec stacks
|
||||
OUT=$(scanelf -qyRAF '%e %p' "$1")
|
||||
|
||||
if [ x"${OUT}" != x ]; then
|
||||
if [ "${OUT}" != "" ]; then
|
||||
RET=2
|
||||
echo "The following files contain writable and executable sections"
|
||||
echo " Files with such sections will not work properly (or at all!) on some"
|
||||
@@ -26,7 +26,7 @@ fi
|
||||
# check for TEXTRELS
|
||||
OUT=$(scanelf -qyRAF '%T %p' "$1")
|
||||
|
||||
if [ x"${OUT}" != x ]; then
|
||||
if [ "${OUT}" != "" ]; then
|
||||
RET=2
|
||||
echo "The following files contain runtime text relocations"
|
||||
echo " Text relocations force the dynamic linker to perform extra"
|
||||
|
||||
@@ -706,7 +706,8 @@ tags = ['functional', 'deadman']
|
||||
[tests/functional/dedup]
|
||||
tests = ['dedup_fdt_create', 'dedup_fdt_import', 'dedup_fdt_pacing',
|
||||
'dedup_legacy_create', 'dedup_legacy_import', 'dedup_legacy_fdt_upgrade',
|
||||
'dedup_legacy_fdt_mixed', 'dedup_quota', 'dedup_prune', 'dedup_zap_shrink']
|
||||
'dedup_legacy_fdt_mixed', 'dedup_quota', 'dedup_prune', 'dedup_prune_leak',
|
||||
'dedup_zap_shrink']
|
||||
pre =
|
||||
post =
|
||||
tags = ['functional', 'dedup']
|
||||
@@ -1019,7 +1020,7 @@ tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
|
||||
'snapshot_006_pos', 'snapshot_007_pos', 'snapshot_008_pos',
|
||||
'snapshot_009_pos', 'snapshot_010_pos', 'snapshot_011_pos',
|
||||
'snapshot_012_pos', 'snapshot_013_pos', 'snapshot_014_pos',
|
||||
'snapshot_017_pos', 'snapshot_018_pos']
|
||||
'snapshot_017_pos', 'snapshot_018_pos', 'snapshot_019_pos']
|
||||
tags = ['functional', 'snapshot']
|
||||
|
||||
[tests/functional/snapused]
|
||||
|
||||
@@ -580,7 +580,7 @@ tests = ['clone_001_pos', 'rollback_001_pos', 'rollback_002_pos',
|
||||
'snapshot_007_pos', 'snapshot_008_pos', 'snapshot_009_pos',
|
||||
'snapshot_010_pos', 'snapshot_011_pos', 'snapshot_012_pos',
|
||||
'snapshot_013_pos', 'snapshot_014_pos', 'snapshot_017_pos',
|
||||
'snapshot_018_pos']
|
||||
'snapshot_018_pos', 'snapshot_019_pos']
|
||||
tags = ['functional', 'snapshot']
|
||||
|
||||
[tests/functional/snapused]
|
||||
|
||||
@@ -36,12 +36,13 @@
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#define loff_t off_t
|
||||
#if defined(_GNU_SOURCE) && defined(__linux__)
|
||||
_Static_assert(sizeof (loff_t) == sizeof (off_t),
|
||||
"loff_t and off_t must be the same size");
|
||||
#endif
|
||||
|
||||
ssize_t
|
||||
copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int)
|
||||
copy_file_range(int, off_t *, int, off_t *, size_t, unsigned int)
|
||||
__attribute__((weak));
|
||||
|
||||
static void *
|
||||
|
||||
@@ -42,12 +42,13 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#define loff_t off_t
|
||||
#if defined(_GNU_SOURCE) && defined(__linux__)
|
||||
_Static_assert(sizeof (loff_t) == sizeof (off_t),
|
||||
"loff_t and off_t must be the same size");
|
||||
#endif
|
||||
|
||||
ssize_t
|
||||
copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int)
|
||||
copy_file_range(int, off_t *, int, off_t *, size_t, unsigned int)
|
||||
__attribute__((weak));
|
||||
|
||||
static int
|
||||
|
||||
@@ -59,16 +59,17 @@
|
||||
#endif
|
||||
#endif /* __NR_copy_file_range */
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#define loff_t off_t
|
||||
#if defined(_GNU_SOURCE) && defined(__linux__)
|
||||
_Static_assert(sizeof (loff_t) == sizeof (off_t),
|
||||
"loff_t and off_t must be the same size");
|
||||
#endif
|
||||
|
||||
ssize_t
|
||||
copy_file_range(int, loff_t *, int, loff_t *, size_t, unsigned int)
|
||||
copy_file_range(int, off_t *, int, off_t *, size_t, unsigned int)
|
||||
__attribute__((weak));
|
||||
|
||||
static inline ssize_t
|
||||
cf_copy_file_range(int sfd, loff_t *soff, int dfd, loff_t *doff,
|
||||
cf_copy_file_range(int sfd, off_t *soff, int dfd, off_t *doff,
|
||||
size_t len, unsigned int flags)
|
||||
{
|
||||
if (copy_file_range)
|
||||
@@ -151,9 +152,9 @@ usage(void)
|
||||
}
|
||||
|
||||
int do_clone(int sfd, int dfd);
|
||||
int do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len);
|
||||
int do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len);
|
||||
int do_deduperange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len);
|
||||
int do_clonerange(int sfd, int dfd, off_t soff, off_t doff, size_t len);
|
||||
int do_copyfilerange(int sfd, int dfd, off_t soff, off_t doff, size_t len);
|
||||
int do_deduperange(int sfd, int dfd, off_t soff, off_t doff, size_t len);
|
||||
|
||||
int quiet = 0;
|
||||
|
||||
@@ -203,7 +204,7 @@ main(int argc, char **argv)
|
||||
abort();
|
||||
}
|
||||
|
||||
loff_t soff = 0, doff = 0;
|
||||
off_t soff = 0, doff = 0;
|
||||
size_t len = SSIZE_MAX;
|
||||
unsigned long long len2;
|
||||
if ((argc-optind) == 5) {
|
||||
@@ -295,7 +296,7 @@ do_clone(int sfd, int dfd)
|
||||
}
|
||||
|
||||
int
|
||||
do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
|
||||
do_clonerange(int sfd, int dfd, off_t soff, off_t doff, size_t len)
|
||||
{
|
||||
if (!quiet)
|
||||
fprintf(stderr, "using FICLONERANGE\n");
|
||||
@@ -314,7 +315,7 @@ do_clonerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
|
||||
}
|
||||
|
||||
int
|
||||
do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
|
||||
do_copyfilerange(int sfd, int dfd, off_t soff, off_t doff, size_t len)
|
||||
{
|
||||
if (!quiet)
|
||||
fprintf(stderr, "using copy_file_range\n");
|
||||
@@ -341,7 +342,7 @@ do_copyfilerange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
|
||||
}
|
||||
|
||||
int
|
||||
do_deduperange(int sfd, int dfd, loff_t soff, loff_t doff, size_t len)
|
||||
do_deduperange(int sfd, int dfd, off_t soff, off_t doff, size_t len)
|
||||
{
|
||||
if (!quiet)
|
||||
fprintf(stderr, "using FIDEDUPERANGE\n");
|
||||
|
||||
@@ -3861,8 +3861,6 @@ function directory_diff # dir_a dir_b
|
||||
# do not match there is a "c" entry in one of the columns).
|
||||
if rsync --version | grep -q "[, ] crtimes"; then
|
||||
args+=("--crtimes")
|
||||
else
|
||||
log_note "This rsync package does not support --crtimes (-N)."
|
||||
fi
|
||||
|
||||
# If we are testing a ZIL replay, we need to ignore timestamp changes.
|
||||
|
||||
@@ -1482,6 +1482,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/dedup/dedup_legacy_fdt_upgrade.ksh \
|
||||
functional/dedup/dedup_legacy_fdt_mixed.ksh \
|
||||
functional/dedup/dedup_prune.ksh \
|
||||
functional/dedup/dedup_prune_leak.ksh \
|
||||
functional/dedup/dedup_quota.ksh \
|
||||
functional/dedup/dedup_zap_shrink.ksh \
|
||||
functional/delegate/cleanup.ksh \
|
||||
@@ -2121,6 +2122,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/snapshot/snapshot_016_pos.ksh \
|
||||
functional/snapshot/snapshot_017_pos.ksh \
|
||||
functional/snapshot/snapshot_018_pos.ksh \
|
||||
functional/snapshot/snapshot_019_pos.ksh \
|
||||
functional/snapused/cleanup.ksh \
|
||||
functional/snapused/setup.ksh \
|
||||
functional/snapused/snapused_001_pos.ksh \
|
||||
|
||||
+86
@@ -0,0 +1,86 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2025, Klara Inc.
|
||||
# Copyright (c) 2025, Nutanix Inc.
|
||||
#
|
||||
|
||||
# DESCRIPTION:
|
||||
# Verify that zpool ddtprune successfully reduces the number of entries
|
||||
# in the DDT.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a pool with dedup=on
|
||||
# 2. Add non-duplicate entries to the DDT
|
||||
# 3. ddtprune all entries
|
||||
# 4. Remove the file
|
||||
# 5. Verify there's no space leak
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/events/events_common.kshlib
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
log_assert "Verify DDT pruning does not cause space leak"
|
||||
|
||||
# We set the dedup log txg interval to 1, to get a log flush every txg,
|
||||
# effectively disabling the log. Without this it's hard to predict when
|
||||
# entries appear in the DDT ZAP
|
||||
log_must save_tunable DEDUP_LOG_TXG_MAX
|
||||
log_must set_tunable32 DEDUP_LOG_TXG_MAX 1
|
||||
log_must save_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
|
||||
log_must set_tunable32 DEDUP_LOG_FLUSH_ENTRIES_MIN 100000
|
||||
function cleanup
|
||||
{
|
||||
if poolexists $TESTPOOL ; then
|
||||
destroy_pool $TESTPOOL
|
||||
fi
|
||||
log_must restore_tunable DEDUP_LOG_TXG_MAX
|
||||
log_must restore_tunable DEDUP_LOG_FLUSH_ENTRIES_MIN
|
||||
}
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must zpool create -f $TESTPOOL $DISKS
|
||||
|
||||
log_must zfs create -o dedup=on $TESTPOOL/$TESTFS
|
||||
typeset mountpoint=$(get_prop mountpoint $TESTPOOL/$TESTFS)
|
||||
log_must dd if=/dev/urandom of=$mountpoint/f1 bs=1M count=16
|
||||
# We seems to need some amount of txg sync here to make it more consistently
|
||||
# reproducible
|
||||
for i in $(seq 50); do
|
||||
zpool sync $TESTPOOL
|
||||
done
|
||||
|
||||
log_must zpool ddtprune -p 100 $TESTPOOL
|
||||
log_must rm $mountpoint/f1
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
zdb_out=$(zdb -bcc $TESTPOOL)
|
||||
echo "$zdb_out"
|
||||
if echo "$zdb_out" | grep -q "leaked space"; then
|
||||
log_fail "DDT pruning causes space leak"
|
||||
fi
|
||||
|
||||
log_pass "DDT pruning does not cause space leak"
|
||||
+82
@@ -0,0 +1,82 @@
|
||||
#!/bin/ksh -p
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2025 iXsystems, Inc.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/snapshot/snapshot.cfg
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Verify that parallel snapshot automount operations don't cause AVL tree
|
||||
# panic due to duplicate mount attempts.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a filesystem with snapdir=visible
|
||||
# 2. Create a snapshot
|
||||
# 3. Trigger parallel ls operations on the snapshot directory
|
||||
# 4. Verify no kernel panic occurred and snapshot is accessible
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
destroy_pool $TESTPOOL
|
||||
}
|
||||
|
||||
verify_runnable "both"
|
||||
|
||||
log_assert "Verify parallel snapshot automount doesn't cause AVL tree panic"
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
# Create pool and filesystem
|
||||
create_pool $TESTPOOL $DISKS
|
||||
log_must zfs create -o snapdir=visible -o mountpoint=$TESTDIR $TESTPOOL/$TESTFS
|
||||
|
||||
# Create a snapshot
|
||||
log_must zfs snapshot $SNAPFS
|
||||
|
||||
# Trigger parallel automount operations to reproduce the race condition.
|
||||
# Multiple concurrent ls operations will attempt to automount the same
|
||||
# unmounted snapshot, which previously could cause duplicate mount helpers
|
||||
# and AVL tree panic.
|
||||
snapdir_path="$TESTDIR/.zfs/snapshot/$TESTSNAP"
|
||||
for i in {1..100}
|
||||
do
|
||||
ls $snapdir_path >/dev/null 2>&1 &
|
||||
done
|
||||
|
||||
# Wait for all background processes to complete
|
||||
wait
|
||||
|
||||
# Verify the snapshot is accessible and properly mounted after parallel access
|
||||
log_must ls $snapdir_path
|
||||
|
||||
# Verify we can unmount the filesystem cleanly. This confirms no processes
|
||||
# are stuck in a syscall and all automated snapshots were unmounted properly.
|
||||
# If the AVL panic occurred, unmount would fail.
|
||||
log_must zfs unmount $TESTPOOL/$TESTFS
|
||||
|
||||
log_pass "Parallel snapshot automount completed without AVL tree panic"
|
||||
+5
-13
@@ -73,7 +73,6 @@ function do_test {
|
||||
block_device_wait $zvolpath
|
||||
|
||||
# Write using sync (creates FLUSH calls after writes, but not FUA)
|
||||
old_vdev_writes=$(get_sync $DISK1)
|
||||
old_log_writes=$(get_sync $datafile3)
|
||||
|
||||
log_must fio --name=write_iops --size=5M \
|
||||
@@ -81,20 +80,13 @@ function do_test {
|
||||
--iodepth=1 --rw=randwrite --group_reporting=1 \
|
||||
--filename=$zvolpath --sync=1
|
||||
|
||||
vdev_writes=$(( $(get_sync $DISK1) - $old_vdev_writes))
|
||||
log_writes=$(( $(get_sync $datafile3) - $old_log_writes))
|
||||
|
||||
# When we're doing sync writes, we should see many more writes go to
|
||||
# the log vs the first vdev. Experiments show anywhere from a 160-320x
|
||||
# ratio of writes to the log vs the first vdev (due to some straggler
|
||||
# writes to the first vdev).
|
||||
#
|
||||
# Check that we have a large ratio (100x) of sync writes going to the
|
||||
# log device
|
||||
ratio=$(($log_writes / $vdev_writes))
|
||||
log_note "Got $log_writes log writes, $vdev_writes vdev writes."
|
||||
if [ $ratio -lt 100 ] ; then
|
||||
log_fail "Expected > 100x more log writes than vdev writes. "
|
||||
# When doing sync writes, we should see at least one SLOG write per
|
||||
# block (5MB / 4KB) == 1280.
|
||||
log_note "Got $log_writes log writes."
|
||||
if [ $log_writes -lt 1280 ] ; then
|
||||
log_fail "Expected >= 1280 log writes. "
|
||||
fi
|
||||
|
||||
# Create a data file
|
||||
|
||||
@@ -433,6 +433,9 @@
|
||||
/* iter_is_ubuf() is available */
|
||||
/* #undef HAVE_ITER_IS_UBUF */
|
||||
|
||||
/* kasan_enabled() is GPL-only */
|
||||
/* #undef HAVE_KASAN_ENABLED_GPL_ONLY */
|
||||
|
||||
/* kernel has kernel_fpu_* functions */
|
||||
/* #undef HAVE_KERNEL_FPU */
|
||||
|
||||
@@ -826,6 +829,9 @@
|
||||
/* make_request_fn() return type */
|
||||
/* #undef MAKE_REQUEST_FN_RET */
|
||||
|
||||
/* The size of 'off_t', as computed by sizeof. */
|
||||
/* #undef SIZEOF_OFF_T */
|
||||
|
||||
/* using complete_and_exit() instead */
|
||||
/* #undef SPL_KTHREAD_COMPLETE_AND_EXIT */
|
||||
|
||||
@@ -856,7 +862,7 @@
|
||||
/* #undef ZFS_DEVICE_MINOR */
|
||||
|
||||
/* Define the project alias string. */
|
||||
#define ZFS_META_ALIAS "zfs-2.4.99-248-FreeBSD_g89f729dcc"
|
||||
#define ZFS_META_ALIAS "zfs-2.4.99-292-FreeBSD_g962e68865"
|
||||
|
||||
/* Define the project author. */
|
||||
#define ZFS_META_AUTHOR "OpenZFS"
|
||||
@@ -865,7 +871,7 @@
|
||||
/* #undef ZFS_META_DATA */
|
||||
|
||||
/* Define the maximum compatible kernel version. */
|
||||
#define ZFS_META_KVER_MAX "6.17"
|
||||
#define ZFS_META_KVER_MAX "6.18"
|
||||
|
||||
/* Define the minimum compatible kernel version. */
|
||||
#define ZFS_META_KVER_MIN "4.18"
|
||||
@@ -886,10 +892,22 @@
|
||||
#define ZFS_META_NAME "zfs"
|
||||
|
||||
/* Define the project release. */
|
||||
#define ZFS_META_RELEASE "248-FreeBSD_g89f729dcc"
|
||||
#define ZFS_META_RELEASE "292-FreeBSD_g962e68865"
|
||||
|
||||
/* Define the project version. */
|
||||
#define ZFS_META_VERSION "2.4.99"
|
||||
|
||||
/* count is located in percpu_ref.data */
|
||||
/* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */
|
||||
|
||||
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||
/* #undef _FILE_OFFSET_BITS */
|
||||
|
||||
/* Define to 1 on platforms where this makes off_t a 64-bit type. */
|
||||
/* #undef _LARGE_FILES */
|
||||
|
||||
/* Number of bits in time_t, on hosts where this is settable. */
|
||||
/* #undef _TIME_BITS */
|
||||
|
||||
/* Define to 1 on platforms where this makes time_t a 64-bit type. */
|
||||
/* #undef __MINGW_USE_VC2005_COMPAT */
|
||||
|
||||
@@ -1 +1 @@
|
||||
#define ZFS_META_GITREV "zfs-2.4.99-220-ge63d026b9"
|
||||
#define ZFS_META_GITREV "zfs-2.4.99-292-g962e68865"
|
||||
|
||||
Reference in New Issue
Block a user