Notable upstream pull request merges:
 #16598 b4e4cbeb2 Always validate checksums for Direct I/O reads
 #16603 cefef28e9 vdev_id: multi-lun disks & slot num zero pad
 #16613 ab777f436 Return boolean_t in inline functions of
                  lib/libspl/include/sys/uio.h
 #16616 efeb60b86 FreeBSD: ignore some includes when not building kernel
 #16617 ca0141f32 zpool/zfs: restore -V & --version options
 #16632 7bf525530 zpool/zfs: allow --json wherever -j is allowed
 #16635 7e4be9275 zdb: fix printf format in dump_zap()
 #16640 48dfe3974 Fallback to strerror() when strerror_l() isn't available
 #16650 38a04f0a7 freebsd: Use compiler.h from FreeBSD's base's linuxkpi
 #16664 fba6a9069 zfs_debug: Restore log size limit for userspace
 #16665 fba6a9069 Fix compile-time warnings caused by duplicate struct
                  typedefs
 #16667 b2f6de7b5 zdb: show bp in uberblock dump

Obtained from:	OpenZFS
OpenZFS commit:	b2f6de7b58
This commit is contained in:
Martin Matuska
2024-10-20 20:08:55 +02:00
74 changed files with 1112 additions and 344 deletions
+1 -1
View File
@@ -11,7 +11,7 @@ concurrency:
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
permissions:
actions: read
contents: read
@@ -18,19 +18,21 @@ ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -q -N ""
# we expect RAM shortage
cat << EOF | sudo tee /etc/ksmtuned.conf > /dev/null
# /etc/ksmtuned.conf - Configuration file for ksmtuned
# https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/7/html/virtualization_tuning_and_optimization_guide/chap-ksm
KSM_MONITOR_INTERVAL=60
# Millisecond sleep between ksm scans for 16Gb server.
# Smaller servers sleep more, bigger sleep less.
KSM_SLEEP_MSEC=10
KSM_NPAGES_BOOST=300
KSM_NPAGES_DECAY=-50
KSM_NPAGES_MIN=64
KSM_NPAGES_MAX=2048
KSM_SLEEP_MSEC=30
KSM_THRES_COEF=25
KSM_THRES_CONST=2048
KSM_NPAGES_BOOST=0
KSM_NPAGES_DECAY=0
KSM_NPAGES_MIN=1000
KSM_NPAGES_MAX=25000
KSM_THRES_COEF=80
KSM_THRES_CONST=8192
LOGFILE=/var/log/ksmtuned.log
DEBUG=1
+22 -10
View File
@@ -14,7 +14,7 @@ OSv=$OS
# compressed with .zst extension
REPO="https://github.com/mcmilk/openzfs-freebsd-images"
FREEBSD="$REPO/releases/download/v2024-09-16"
FREEBSD="$REPO/releases/download/v2024-10-05"
URLzs=""
# Ubuntu mirrors
@@ -62,33 +62,45 @@ case "$OS" in
OSv="fedora39"
URL="https://download.fedoraproject.org/pub/fedora/linux/releases/40/Cloud/x86_64/images/Fedora-Cloud-Base-Generic.x86_64-40-1.14.qcow2"
;;
freebsd13r)
freebsd13-3r)
OSNAME="FreeBSD 13.3-RELEASE"
OSv="freebsd13.0"
URLzs="$FREEBSD/amd64-freebsd-13.3-RELEASE.qcow2.zst"
BASH="/usr/local/bin/bash"
NIC="rtl8139"
;;
freebsd13-4r)
OSNAME="FreeBSD 13.4-RELEASE"
OSv="freebsd13.0"
URLzs="$FREEBSD/amd64-freebsd-13.4-RELEASE.qcow2.zst"
BASH="/usr/local/bin/bash"
NIC="rtl8139"
;;
freebsd13)
OSNAME="FreeBSD 13.4-STABLE"
OSv="freebsd13.0"
URLzs="$FREEBSD/amd64-freebsd-13.4-STABLE.qcow2.zst"
freebsd14-0r)
OSNAME="FreeBSD 14.0-RELEASE"
OSv="freebsd14.0"
URLzs="$FREEBSD/amd64-freebsd-14.0-RELEASE.qcow2.zst"
BASH="/usr/local/bin/bash"
NIC="rtl8139"
;;
freebsd14r)
freebsd14-1r)
OSNAME="FreeBSD 14.1-RELEASE"
OSv="freebsd14.0"
URLzs="$FREEBSD/amd64-freebsd-14.1-RELEASE.qcow2.zst"
BASH="/usr/local/bin/bash"
;;
freebsd14)
freebsd13-4s)
OSNAME="FreeBSD 13.4-STABLE"
OSv="freebsd13.0"
URLzs="$FREEBSD/amd64-freebsd-13.4-STABLE.qcow2.zst"
BASH="/usr/local/bin/bash"
;;
freebsd14-1s)
OSNAME="FreeBSD 14.1-STABLE"
OSv="freebsd14.0"
URLzs="$FREEBSD/amd64-freebsd-14.1-STABLE.qcow2.zst"
BASH="/usr/local/bin/bash"
;;
freebsd15)
freebsd15-0c)
OSNAME="FreeBSD 15.0-CURRENT"
OSv="freebsd14.0"
URLzs="$FREEBSD/amd64-freebsd-15.0-CURRENT.qcow2.zst"
@@ -14,17 +14,21 @@ PID=$(pidof /usr/bin/qemu-system-x86_64)
tail --pid=$PID -f /dev/null
sudo virsh undefine openzfs
# definitions of per operating system
# default values per test vm:
VMs=2
CPU=2
# cpu pinning
CPUSET=("0,1" "2,3")
case "$OS" in
freebsd*)
VMs=2
CPU=3
# FreeBSD can't be optimized via ksmtuned
RAM=6
;;
*)
VMs=2
CPU=3
RAM=7
# Linux can be optimized via ksmtuned
RAM=8
;;
esac
@@ -73,6 +77,7 @@ EOF
--cpu host-passthrough \
--virt-type=kvm --hvm \
--vcpus=$CPU,sockets=1 \
--cpuset=${CPUSET[$((i-1))]} \
--memory $((1024*RAM)) \
--memballoon model=virtio \
--graphics none \
@@ -11,12 +11,10 @@ function output() {
}
function outfile() {
test -s "$1" || return
cat "$1" >> "out-$logfile.md"
}
function outfile_plain() {
test -s "$1" || return
output "<pre>"
cat "$1" >> "out-$logfile.md"
output "</pre>"
@@ -45,6 +43,8 @@ if [ ! -f out-1.md ]; then
tar xf "$tarfile"
test -s env.txt || continue
source env.txt
# when uname.txt is there, the other files are also ok
test -s uname.txt || continue
output "\n## Functional Tests: $OSNAME\n"
outfile_plain uname.txt
outfile_plain summary.txt
+8 -6
View File
@@ -22,8 +22,8 @@ jobs:
- name: Generate OS config and CI type
id: os
run: |
FULL_OS='["almalinux8", "almalinux9", "centos-stream9", "debian11", "debian12", "fedora39", "fedora40", "freebsd13", "freebsd13r", "freebsd14", "freebsd14r", "ubuntu20", "ubuntu22", "ubuntu24"]'
QUICK_OS='["almalinux8", "almalinux9", "debian12", "fedora40", "freebsd13", "freebsd14", "ubuntu24"]'
FULL_OS='["almalinux8", "almalinux9", "centos-stream9", "debian11", "debian12", "fedora39", "fedora40", "freebsd13-4r", "freebsd14-0r", "freebsd14-1s", "ubuntu20", "ubuntu22", "ubuntu24"]'
QUICK_OS='["almalinux8", "almalinux9", "debian12", "fedora40", "freebsd13-3r", "freebsd14-1r", "ubuntu24"]'
# determine CI type when running on PR
ci_type="full"
if ${{ github.event_name == 'pull_request' }}; then
@@ -46,10 +46,12 @@ jobs:
strategy:
fail-fast: false
matrix:
# all:
# os: [almalinux8, almalinux9, archlinux, centos-stream9, fedora39, fedora40, debian11, debian12, freebsd13, freebsd13r, freebsd14, freebsd14r, freebsd15, ubuntu20, ubuntu22, ubuntu24]
# openzfs:
# os: [almalinux8, almalinux9, centos-stream9, debian11, debian12, fedora39, fedora40, freebsd13, freebsd13r, freebsd14, freebsd14r, ubuntu20, ubuntu22, ubuntu24]
# rhl: almalinux8, almalinux9, centos-stream9, fedora39, fedora40
# debian: debian11, debian12, ubuntu20, ubuntu22, ubuntu24
# misc: archlinux, tumbleweed
# FreeBSD Release: freebsd13-3r, freebsd13-4r, freebsd14-0r, freebsd14-1r
# FreeBSD Stable: freebsd13-4s, freebsd14-1s
# FreeBSD Current: freebsd15-0c
os: ${{ fromJson(needs.test-config.outputs.test_os) }}
runs-on: ubuntu-24.04
steps:
+2 -2
View File
@@ -1,8 +1,8 @@
Meta: 1
Name: zfs
Branch: 1.0
Version: 2.3.0
Release: rc1
Version: 2.3.99
Release: 1
Release-Tags: relext
License: CDDL
Author: OpenZFS
+4
View File
@@ -4266,6 +4266,10 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
(void) printf("\ttimestamp = %llu UTC = %s",
(u_longlong_t)ub->ub_timestamp, ctime(&timestamp));
char blkbuf[BP_SPRINTF_LEN];
snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
(void) printf("\tbp = %s\n", blkbuf);
(void) printf("\tmmp_magic = %016llx\n",
(u_longlong_t)ub->ub_mmp_magic);
if (MMP_VALID(ub)) {
+25 -5
View File
@@ -2162,6 +2162,7 @@ zfs_do_get(int argc, char **argv)
cb.cb_type = ZFS_TYPE_DATASET;
struct option long_options[] = {
{"json", no_argument, NULL, 'j'},
{"json-int", no_argument, NULL, ZFS_OPTION_JSON_NUMS_AS_INT},
{0, 0, 0, 0}
};
@@ -3852,6 +3853,7 @@ zfs_do_list(int argc, char **argv)
nvlist_t *data = NULL;
struct option long_options[] = {
{"json", no_argument, NULL, 'j'},
{"json-int", no_argument, NULL, ZFS_OPTION_JSON_NUMS_AS_INT},
{0, 0, 0, 0}
};
@@ -7436,9 +7438,15 @@ share_mount(int op, int argc, char **argv)
uint_t nthr;
jsobj = data = item = NULL;
struct option long_options[] = {
{"json", no_argument, NULL, 'j'},
{0, 0, 0, 0}
};
/* check options */
while ((c = getopt(argc, argv, op == OP_MOUNT ? ":ajRlvo:Of" : "al"))
!= -1) {
while ((c = getopt_long(argc, argv,
op == OP_MOUNT ? ":ajRlvo:Of" : "al",
op == OP_MOUNT ? long_options : NULL, NULL)) != -1) {
switch (c) {
case 'a':
do_all = 1;
@@ -8374,8 +8382,14 @@ zfs_do_channel_program(int argc, char **argv)
boolean_t sync_flag = B_TRUE, json_output = B_FALSE;
zpool_handle_t *zhp;
struct option long_options[] = {
{"json", no_argument, NULL, 'j'},
{0, 0, 0, 0}
};
/* check options */
while ((c = getopt(argc, argv, "nt:m:j")) != -1) {
while ((c = getopt_long(argc, argv, "nt:m:j", long_options,
NULL)) != -1) {
switch (c) {
case 't':
case 'm': {
@@ -9083,7 +9097,13 @@ zfs_do_version(int argc, char **argv)
int c;
nvlist_t *jsobj = NULL, *zfs_ver = NULL;
boolean_t json = B_FALSE;
while ((c = getopt(argc, argv, "j")) != -1) {
struct option long_options[] = {
{"json", no_argument, NULL, 'j'},
{0, 0, 0, 0}
};
while ((c = getopt_long(argc, argv, "j", long_options, NULL)) != -1) {
switch (c) {
case 'j':
json = B_TRUE;
@@ -9187,7 +9207,7 @@ main(int argc, char **argv)
* Special case '-V|--version'
*/
if ((strcmp(cmdname, "-V") == 0) || (strcmp(cmdname, "--version") == 0))
return (zfs_do_version(argc, argv));
return (zfs_version_print() != 0);
/*
* Special case 'help'
+16 -2
View File
@@ -7340,6 +7340,7 @@ zpool_do_list(int argc, char **argv)
current_prop_type = ZFS_TYPE_POOL;
struct option long_options[] = {
{"json", no_argument, NULL, 'j'},
{"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT},
{"json-pool-key-guid", no_argument, NULL,
ZPOOL_OPTION_POOL_KEY_GUID},
@@ -9224,6 +9225,12 @@ vdev_stats_nvlist(zpool_handle_t *zhp, status_cbdata_t *cb, nvlist_t *nv,
}
}
if (cb->cb_print_dio_verify) {
nice_num_str_nvlist(vds, "dio_verify_errors",
vs->vs_dio_verify_errors, cb->cb_literal,
cb->cb_json_as_int, ZFS_NICENUM_1024);
}
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
&notpresent) == 0) {
nice_num_str_nvlist(vds, ZPOOL_CONFIG_NOT_PRESENT,
@@ -10975,6 +10982,7 @@ zpool_do_status(int argc, char **argv)
struct option long_options[] = {
{"power", no_argument, NULL, ZPOOL_OPTION_POWER},
{"json", no_argument, NULL, 'j'},
{"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT},
{"json-flat-vdevs", no_argument, NULL,
ZPOOL_OPTION_JSON_FLAT_VDEVS},
@@ -12583,6 +12591,7 @@ zpool_do_get(int argc, char **argv)
current_prop_type = cb.cb_type;
struct option long_options[] = {
{"json", no_argument, NULL, 'j'},
{"json-int", no_argument, NULL, ZPOOL_OPTION_JSON_NUMS_AS_INT},
{"json-pool-key-guid", no_argument, NULL,
ZPOOL_OPTION_POOL_KEY_GUID},
@@ -13497,7 +13506,12 @@ zpool_do_version(int argc, char **argv)
int c;
nvlist_t *jsobj = NULL, *zfs_ver = NULL;
boolean_t json = B_FALSE;
while ((c = getopt(argc, argv, "j")) != -1) {
struct option long_options[] = {
{"json", no_argument, NULL, 'j'},
};
while ((c = getopt_long(argc, argv, "j", long_options, NULL)) != -1) {
switch (c) {
case 'j':
json = B_TRUE;
@@ -13613,7 +13627,7 @@ main(int argc, char **argv)
* Special case '-V|--version'
*/
if ((strcmp(cmdname, "-V") == 0) || (strcmp(cmdname, "--version") == 0))
return (zpool_do_version(argc, argv));
return (zfs_version_print() != 0);
/*
* Special case 'help'
+18 -1
View File
@@ -6717,6 +6717,17 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
*
* Only after a full scrub has been completed is it safe to start injecting
* data corruption. See the comment in zfs_fault_inject().
*
* EBUSY may be returned for the following six cases. It's the callers
* responsibility to handle them accordingly.
*
* Current state Requested
* 1. Normal Scrub Running Normal Scrub or Error Scrub
* 2. Normal Scrub Paused Error Scrub
* 3. Normal Scrub Paused Pause Normal Scrub
* 4. Error Scrub Running Normal Scrub or Error Scrub
* 5. Error Scrub Paused Pause Error Scrub
* 6. Resilvering Anything else
*/
static int
ztest_scrub_impl(spa_t *spa)
@@ -8082,8 +8093,14 @@ ztest_raidz_expand_check(spa_t *spa)
(void) printf("verifying an interrupted raidz "
"expansion using a pool scrub ...\n");
}
/* Will fail here if there is non-recoverable corruption detected */
VERIFY0(ztest_scrub_impl(spa));
int error = ztest_scrub_impl(spa);
if (error == EBUSY)
error = 0;
VERIFY0(error);
if (ztest_opts.zo_verbose >= 1) {
(void) printf("raidz expansion scrub check complete\n");
}
+12 -8
View File
@@ -58,9 +58,9 @@ deb-utils: deb-local rpm-utils-initramfs
pkg1=$${name}-$${version}.$${arch}.rpm; \
pkg2=libnvpair3-$${version}.$${arch}.rpm; \
pkg3=libuutil3-$${version}.$${arch}.rpm; \
pkg4=libzfs5-$${version}.$${arch}.rpm; \
pkg5=libzpool5-$${version}.$${arch}.rpm; \
pkg6=libzfs5-devel-$${version}.$${arch}.rpm; \
pkg4=libzfs6-$${version}.$${arch}.rpm; \
pkg5=libzpool6-$${version}.$${arch}.rpm; \
pkg6=libzfs6-devel-$${version}.$${arch}.rpm; \
pkg7=$${name}-test-$${version}.$${arch}.rpm; \
pkg8=$${name}-dracut-$${version}.noarch.rpm; \
pkg9=$${name}-initramfs-$${version}.$${arch}.rpm; \
@@ -72,7 +72,7 @@ deb-utils: deb-local rpm-utils-initramfs
path_prepend=`mktemp -d /tmp/intercept.XXXXXX`; \
echo "#!$(SHELL)" > $${path_prepend}/dh_shlibdeps; \
echo "`which dh_shlibdeps` -- \
-xlibuutil3linux -xlibnvpair3linux -xlibzfs5linux -xlibzpool5linux" \
-xlibuutil3linux -xlibnvpair3linux -xlibzfs6linux -xlibzpool6linux" \
>> $${path_prepend}/dh_shlibdeps; \
## These -x arguments are passed to dpkg-shlibdeps, which exclude the
## Debianized packages from the auto-generated dependencies of the new debs,
@@ -93,13 +93,17 @@ debian:
cp -r contrib/debian debian; chmod +x debian/rules;
native-deb-utils: native-deb-local debian
while [ -f debian/deb-build.lock ]; do sleep 1; done; \
echo "native-deb-utils" > debian/deb-build.lock; \
cp contrib/debian/control debian/control; \
$(DPKGBUILD) -b -rfakeroot -us -uc;
$(DPKGBUILD) -b -rfakeroot -us -uc; \
$(RM) -f debian/deb-build.lock
native-deb-kmod: native-deb-local debian
while [ -f debian/deb-build.lock ]; do sleep 1; done; \
echo "native-deb-kmod" > debian/deb-build.lock; \
sh scripts/make_gitrev.sh; \
fakeroot debian/rules override_dh_binary-modules;
fakeroot debian/rules override_dh_binary-modules; \
$(RM) -f debian/deb-build.lock
native-deb: native-deb-utils native-deb-kmod
.NOTPARALLEL: native-deb native-deb-utils native-deb-kmod
+1 -1
View File
@@ -33,7 +33,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER], [
ZFS_AC_CONFIG_USER_MAKEDEV_IN_MKDEV
ZFS_AC_CONFIG_USER_ZFSEXEC
AC_CHECK_FUNCS([execvpe issetugid mlockall strlcat strlcpy gettid])
AC_CHECK_FUNCS([execvpe issetugid mlockall strerror_l strlcat strlcpy gettid])
AC_SUBST(RM)
])
@@ -12,14 +12,14 @@ dist_noinst_DATA += %D%/openzfs-libpam-zfs.postinst
dist_noinst_DATA += %D%/openzfs-libpam-zfs.prerm
dist_noinst_DATA += %D%/openzfs-libuutil3.docs
dist_noinst_DATA += %D%/openzfs-libuutil3.install.in
dist_noinst_DATA += %D%/openzfs-libzfs4.docs
dist_noinst_DATA += %D%/openzfs-libzfs4.install.in
dist_noinst_DATA += %D%/openzfs-libzfs6.docs
dist_noinst_DATA += %D%/openzfs-libzfs6.install.in
dist_noinst_DATA += %D%/openzfs-libzfsbootenv1.docs
dist_noinst_DATA += %D%/openzfs-libzfsbootenv1.install.in
dist_noinst_DATA += %D%/openzfs-libzfs-dev.docs
dist_noinst_DATA += %D%/openzfs-libzfs-dev.install.in
dist_noinst_DATA += %D%/openzfs-libzpool5.docs
dist_noinst_DATA += %D%/openzfs-libzpool5.install.in
dist_noinst_DATA += %D%/openzfs-libzpool6.docs
dist_noinst_DATA += %D%/openzfs-libzpool6.install.in
dist_noinst_DATA += %D%/openzfs-python3-pyzfs.install
dist_noinst_DATA += %D%/openzfs-zfs-dkms.config
dist_noinst_DATA += %D%/openzfs-zfs-dkms.dkms
+2 -2
View File
@@ -6,6 +6,6 @@ contrib/pyzfs/libzfs_core/bindings/__pycache__/
contrib/pyzfs/pyzfs.egg-info/
debian/openzfs-libnvpair3.install
debian/openzfs-libuutil3.install
debian/openzfs-libzfs4.install
debian/openzfs-libzfs6.install
debian/openzfs-libzfs-dev.install
debian/openzfs-libzpool5.install
debian/openzfs-libzpool6.install
+13 -13
View File
@@ -78,9 +78,9 @@ Architecture: linux-any
Depends: libssl-dev | libssl1.0-dev,
openzfs-libnvpair3 (= ${binary:Version}),
openzfs-libuutil3 (= ${binary:Version}),
openzfs-libzfs4 (= ${binary:Version}),
openzfs-libzfs6 (= ${binary:Version}),
openzfs-libzfsbootenv1 (= ${binary:Version}),
openzfs-libzpool5 (= ${binary:Version}),
openzfs-libzpool6 (= ${binary:Version}),
${misc:Depends}
Replaces: libzfslinux-dev
Conflicts: libzfslinux-dev
@@ -90,18 +90,18 @@ Description: OpenZFS filesystem development files for Linux
libraries of OpenZFS filesystem.
.
This package includes the development files of libnvpair3, libuutil3,
libzpool5 and libzfs4.
libzpool6 and libzfs6.
Package: openzfs-libzfs4
Package: openzfs-libzfs6
Section: contrib/libs
Architecture: linux-any
Depends: ${misc:Depends}, ${shlibs:Depends}
# The libcurl4 is loaded through dlopen("libcurl.so.4").
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=988521
Recommends: libcurl4
Breaks: libzfs2, libzfs4
Replaces: libzfs2, libzfs4, libzfs4linux
Conflicts: libzfs4linux
Breaks: libzfs2, libzfs4, libzfs4linux, libzfs6linux
Replaces: libzfs2, libzfs4, libzfs4linux, libzfs6linux
Conflicts: libzfs6linux
Description: OpenZFS filesystem library for Linux - general support
OpenZFS is a storage platform that encompasses the functionality of
traditional filesystems and volume managers. It supports data checksums,
@@ -123,13 +123,13 @@ Description: OpenZFS filesystem library for Linux - label info support
.
The zfsbootenv library provides support for modifying ZFS label information.
Package: openzfs-libzpool5
Package: openzfs-libzpool6
Section: contrib/libs
Architecture: linux-any
Depends: ${misc:Depends}, ${shlibs:Depends}
Breaks: libzpool2, libzpool5
Replaces: libzpool2, libzpool5, libzpool5linux
Conflicts: libzpool5linux
Breaks: libzpool2, libzpool5, libzpool5linux, libzpool6linux
Replaces: libzpool2, libzpool5, libzpool5linux, libzpool6linux
Conflicts: libzpool6linux
Description: OpenZFS pool library for Linux
OpenZFS is a storage platform that encompasses the functionality of
traditional filesystems and volume managers. It supports data checksums,
@@ -246,8 +246,8 @@ Architecture: linux-any
Pre-Depends: ${misc:Pre-Depends}
Depends: openzfs-libnvpair3 (= ${binary:Version}),
openzfs-libuutil3 (= ${binary:Version}),
openzfs-libzfs4 (= ${binary:Version}),
openzfs-libzpool5 (= ${binary:Version}),
openzfs-libzfs6 (= ${binary:Version}),
openzfs-libzpool6 (= ${binary:Version}),
python3,
${misc:Depends},
${shlibs:Depends}
@@ -98,6 +98,7 @@ usr/share/man/man8/zpool-attach.8
usr/share/man/man8/zpool-checkpoint.8
usr/share/man/man8/zpool-clear.8
usr/share/man/man8/zpool-create.8
usr/share/man/man8/zpool-ddtprune.8
usr/share/man/man8/zpool-destroy.8
usr/share/man/man8/zpool-detach.8
usr/share/man/man8/zpool-ddtprune.8
@@ -113,6 +114,7 @@ usr/share/man/man8/zpool-list.8
usr/share/man/man8/zpool-offline.8
usr/share/man/man8/zpool-online.8
usr/share/man/man8/zpool-prefetch.8
usr/share/man/man8/zpool-prefetch.8
usr/share/man/man8/zpool-reguid.8
usr/share/man/man8/zpool-remove.8
usr/share/man/man8/zpool-reopen.8
@@ -344,7 +344,7 @@ mount_fs()
# Need the _original_ datasets mountpoint!
mountpoint=$(get_fs_value "$fs" mountpoint)
ZFS_CMD="mount -o zfsutil -t zfs"
ZFS_CMD="mount.zfs -o zfsutil"
if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then
# Can't use the mountpoint property. Might be one of our
# clones. Check the 'org.zol:mountpoint' property set in
@@ -359,9 +359,8 @@ mount_fs()
# isn't the root fs.
return 0
fi
# Don't use mount.zfs -o zfsutils for legacy mountpoint
if [ "$mountpoint" = "legacy" ]; then
ZFS_CMD="mount -t zfs"
ZFS_CMD="mount.zfs"
fi
# Last hail-mary: Hope 'rootmnt' is set!
mountpoint=""
+4
View File
@@ -276,7 +276,11 @@ _LIBZUTIL_H void update_vdev_config_dev_sysfs_path(nvlist_t *nv,
* Thread-safe strerror() for use in ZFS libraries
*/
static inline char *zfs_strerror(int errnum) {
#ifdef HAVE_STRERROR_L
return (strerror_l(errnum, uselocale(0)));
#else
return (strerror(errnum));
#endif
}
#ifdef __cplusplus
@@ -38,8 +38,7 @@
#include <sys/rwlock.h>
#include <sys/wait.h>
#include <sys/wmsum.h>
typedef struct kstat_s kstat_t;
#include <sys/kstat.h>
#define TASKQ_NAMELEN 31
@@ -30,6 +30,8 @@
extern "C" {
#endif
struct abd;
struct abd_scatter {
uint_t abd_offset;
uint_t abd_nents;
@@ -41,10 +43,8 @@ struct abd_linear {
struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
};
typedef struct abd abd_t;
typedef int abd_iter_page_func_t(struct page *, size_t, size_t, void *);
int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *,
int abd_iterate_page_func(struct abd *, size_t, size_t, abd_iter_page_func_t *,
void *);
/*
@@ -52,11 +52,11 @@ int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *,
* Note: these are only needed to support vdev_classic. See comment in
* vdev_disk.c.
*/
unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
unsigned int abd_bio_map_off(struct bio *, struct abd *, unsigned int, size_t);
unsigned long abd_nr_pages_off(struct abd *, unsigned int, size_t);
__attribute__((malloc))
abd_t *abd_alloc_from_pages(struct page **, unsigned long, uint64_t);
struct abd *abd_alloc_from_pages(struct page **, unsigned long, uint64_t);
#ifdef __cplusplus
}
@@ -69,6 +69,7 @@ typedef struct vfs {
boolean_t vfs_do_relatime;
boolean_t vfs_nbmand;
boolean_t vfs_do_nbmand;
kmutex_t vfs_mntpt_lock;
} vfs_t;
typedef struct zfs_mnt {
+2 -1
View File
@@ -42,7 +42,8 @@ extern "C" {
#define FM_EREPORT_ZFS_DATA "data"
#define FM_EREPORT_ZFS_DELAY "delay"
#define FM_EREPORT_ZFS_DEADMAN "deadman"
#define FM_EREPORT_ZFS_DIO_VERIFY "dio_verify"
#define FM_EREPORT_ZFS_DIO_VERIFY_WR "dio_verify_wr"
#define FM_EREPORT_ZFS_DIO_VERIFY_RD "dio_verify_rd"
#define FM_EREPORT_ZFS_POOL "zpool"
#define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown"
#define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed"
+1 -1
View File
@@ -57,7 +57,7 @@ void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
void vdev_raidz_child_done(zio_t *);
void vdev_raidz_io_done(zio_t *);
void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
struct raidz_row *vdev_raidz_row_alloc(int);
struct raidz_row *vdev_raidz_row_alloc(int, zio_t *);
void vdev_raidz_reflow_copy_scratch(spa_t *);
void raidz_dtl_reassessed(vdev_t *);
+15 -14
View File
@@ -208,25 +208,25 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_PROBE (1ULL << 16)
#define ZIO_FLAG_TRYHARD (1ULL << 17)
#define ZIO_FLAG_OPTIONAL (1ULL << 18)
#define ZIO_FLAG_DIO_READ (1ULL << 19)
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)
/*
* Flags not inherited by any children.
*/
#define ZIO_FLAG_DONT_QUEUE (1ULL << 19) /* must be first for INHERIT */
#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 20)
#define ZIO_FLAG_IO_BYPASS (1ULL << 21)
#define ZIO_FLAG_IO_REWRITE (1ULL << 22)
#define ZIO_FLAG_RAW_COMPRESS (1ULL << 23)
#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 24)
#define ZIO_FLAG_GANG_CHILD (1ULL << 25)
#define ZIO_FLAG_DDT_CHILD (1ULL << 26)
#define ZIO_FLAG_GODFATHER (1ULL << 27)
#define ZIO_FLAG_NOPWRITE (1ULL << 28)
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
#define ZIO_FLAG_DELEGATED (1ULL << 30)
#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 31)
#define ZIO_FLAG_DONT_QUEUE (1ULL << 20) /* must be first for INHERIT */
#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 21)
#define ZIO_FLAG_IO_BYPASS (1ULL << 22)
#define ZIO_FLAG_IO_REWRITE (1ULL << 23)
#define ZIO_FLAG_RAW_COMPRESS (1ULL << 24)
#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 25)
#define ZIO_FLAG_GANG_CHILD (1ULL << 26)
#define ZIO_FLAG_DDT_CHILD (1ULL << 27)
#define ZIO_FLAG_GODFATHER (1ULL << 28)
#define ZIO_FLAG_NOPWRITE (1ULL << 29)
#define ZIO_FLAG_REEXECUTED (1ULL << 30)
#define ZIO_FLAG_DELEGATED (1ULL << 31)
#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 32)
#define ZIO_ALLOCATOR_NONE (-1)
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
@@ -647,6 +647,7 @@ extern void zio_vdev_io_redone(zio_t *zio);
extern void zio_change_priority(zio_t *pio, zio_priority_t priority);
extern void zio_checksum_verified(zio_t *zio);
extern void zio_dio_chksum_verify_error_report(zio_t *zio);
extern int zio_worst_error(int e1, int e2);
extern enum zio_checksum zio_checksum_select(enum zio_checksum child,
+186 -35
View File
@@ -25,19 +25,32 @@
#include <sys/backtrace.h>
#include <sys/types.h>
#include <sys/debug.h>
#include <unistd.h>
/*
* libspl_backtrace() must be safe to call from inside a signal hander. This
* mostly means it must not allocate, and so we can't use things like printf.
* Output helpers. libspl_backtrace() must not block, must be thread-safe and
* must be safe to call from a signal handler. At least, that means not having
* printf, so we end up having to call write() directly on the fd. That's
* awkward, as we always have to pass through a length, and some systems will
* complain if we don't consume the return. So we have some macros to make
* things a little more palatable.
*/
#define spl_bt_write_n(fd, s, n) \
do { ssize_t r __maybe_unused = write(fd, s, n); } while (0)
#define spl_bt_write(fd, s) spl_bt_write_n(fd, s, sizeof (s))
#if defined(HAVE_LIBUNWIND)
#define UNW_LOCAL_ONLY
#include <libunwind.h>
/*
* Convert `v` to ASCII hex characters. The bottom `n` nybbles (4-bits ie one
* hex digit) will be written, up to `buflen`. The buffer will not be
* null-terminated. Returns the number of digits written.
*/
static size_t
libspl_u64_to_hex_str(uint64_t v, size_t digits, char *buf, size_t buflen)
spl_bt_u64_to_hex_str(uint64_t v, size_t n, char *buf, size_t buflen)
{
static const char hexdigits[] = {
'0', '1', '2', '3', '4', '5', '6', '7',
@@ -45,10 +58,10 @@ libspl_u64_to_hex_str(uint64_t v, size_t digits, char *buf, size_t buflen)
};
size_t pos = 0;
boolean_t want = (digits == 0);
boolean_t want = (n == 0);
for (int i = 15; i >= 0; i--) {
const uint64_t d = v >> (i * 4) & 0xf;
if (!want && (d != 0 || digits > i))
if (!want && (d != 0 || n > i))
want = B_TRUE;
if (want) {
buf[pos++] = hexdigits[d];
@@ -62,40 +75,181 @@ libspl_u64_to_hex_str(uint64_t v, size_t digits, char *buf, size_t buflen)
void
libspl_backtrace(int fd)
{
ssize_t ret __attribute__((unused));
unw_context_t uc;
unw_cursor_t cp;
unw_word_t loc;
unw_word_t v;
char buf[128];
size_t n;
int err;
ret = write(fd, "Call trace:\n", 12);
/* Snapshot the current frame and state. */
unw_getcontext(&uc);
/*
* TODO: walk back to the frame that tripped the assertion / the place
* where the signal was recieved.
*/
/*
* Register dump. We're going to loop over all the registers in the
* top frame, and show them, with names, in a nice three-column
* layout, which keeps us within 80 columns.
*/
spl_bt_write(fd, "Registers:\n");
/* Initialise a frame cursor, starting at the current frame */
unw_init_local(&cp, &uc);
while (unw_step(&cp) > 0) {
unw_get_reg(&cp, UNW_REG_IP, &loc);
ret = write(fd, " [0x", 5);
n = libspl_u64_to_hex_str(loc, 10, buf, sizeof (buf));
ret = write(fd, buf, n);
ret = write(fd, "] ", 2);
unw_get_proc_name(&cp, buf, sizeof (buf), &loc);
for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {}
ret = write(fd, buf, n);
ret = write(fd, "+0x", 3);
n = libspl_u64_to_hex_str(loc, 2, buf, sizeof (buf));
ret = write(fd, buf, n);
#ifdef HAVE_LIBUNWIND_ELF
ret = write(fd, " (in ", 5);
unw_get_elf_filename(&cp, buf, sizeof (buf), &loc);
for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {}
ret = write(fd, buf, n);
ret = write(fd, " +0x", 4);
n = libspl_u64_to_hex_str(loc, 2, buf, sizeof (buf));
ret = write(fd, buf, n);
ret = write(fd, ")", 1);
#endif
ret = write(fd, "\n", 1);
/*
* libunwind's list of possible registers for this architecture is an
* enum, unw_regnum_t. UNW_TDEP_LAST_REG is the highest-numbered
* register in that list, however, not all register numbers in this
* range are defined by the architecture, and not all defined registers
* will be present on every implementation of that architecture.
* Moreover, libunwind provides nice names for most, but not all
* registers, but these are hardcoded; a name being available does not
* mean that register is available.
*
* So, we have to pull this all together here. We try to get the value
* of every possible register. If we get a value for it, then the
* register must exist, and so we get its name. If libunwind has no
* name for it, we synthesize something. These cases should be rare,
* and they're usually for uninteresting or niche registers, so it
* shouldn't really matter. We can see the value, and that's the main
* thing.
*/
uint_t cols = 0;
for (uint_t regnum = 0; regnum <= UNW_TDEP_LAST_REG; regnum++) {
/*
* Get the value. Any error probably means the register
* doesn't exist, and we skip it.
*/
if (unw_get_reg(&cp, regnum, &v) < 0)
continue;
/*
* Register name. If libunwind doesn't have a name for it,
* it will return "???". As a shortcut, we just treat '?'
* is an alternate end-of-string character.
*/
const char *name = unw_regname(regnum);
for (n = 0; name[n] != '\0' && name[n] != '?'; n++) {}
if (n == 0) {
/*
* No valid name, so make one of the form "?xx", where
* "xx" is the two-char hex of libunwind's register
* number.
*/
buf[0] = '?';
n = spl_bt_u64_to_hex_str(regnum, 2,
&buf[1], sizeof (buf)-1) + 1;
name = buf;
}
/*
* Two spaces of padding before each column, plus extra
* spaces to align register names shorter than three chars.
*/
spl_bt_write_n(fd, " ", 5-MIN(n, 3));
/* Register name and column punctuation */
spl_bt_write_n(fd, name, n);
spl_bt_write(fd, ": 0x");
/*
* Convert register value (from unw_get_reg()) to hex. We're
* assuming that all registers are 64-bits wide, which is
* probably fine for any general-purpose registers on any
* machine currently in use. A more generic way would be to
* look at the width of unw_word_t, but that would also
* complicate the column code a bit. This is fine.
*/
n = spl_bt_u64_to_hex_str(v, 16, buf, sizeof (buf));
spl_bt_write_n(fd, buf, n);
/* Every third column, emit a newline */
if (!(++cols % 3))
spl_bt_write(fd, "\n");
}
/* If we finished before the third column, emit a newline. */
if (cols % 3)
spl_bt_write(fd, "\n");
/* Now the main event, the backtrace. */
spl_bt_write(fd, "Call trace:\n");
/* Reset the cursor to the top again. */
unw_init_local(&cp, &uc);
do {
/*
* Getting the IP should never fail; libunwind handles it
* specially, because its used a lot internally. Still, no
* point being silly about it, as the last thing we want is
* our crash handler to crash. So if it ever does fail, we'll
* show an error line, but keep going to the next frame.
*/
if (unw_get_reg(&cp, UNW_REG_IP, &v) < 0) {
spl_bt_write(fd, " [couldn't get IP register; "
"corrupt frame?]");
continue;
}
/* IP & punctuation */
n = spl_bt_u64_to_hex_str(v, 16, buf, sizeof (buf));
spl_bt_write(fd, " [0x");
spl_bt_write_n(fd, buf, n);
spl_bt_write(fd, "] ");
/*
* Function ("procedure") name for the current frame. `v`
* receives the offset from the named function to the IP, which
* we show as a "+offset" suffix.
*
* If libunwind can't determine the name, we just show "???"
* instead. We've already displayed the IP above; that will
* have to do.
*
* unw_get_proc_name() will return ENOMEM if the buffer is too
* small, instead truncating the name. So we treat that as a
* success and use whatever is in the buffer.
*/
err = unw_get_proc_name(&cp, buf, sizeof (buf), &v);
if (err == 0 || err == -UNW_ENOMEM) {
for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {}
spl_bt_write_n(fd, buf, n);
/* Offset from proc name */
spl_bt_write(fd, "+0x");
n = spl_bt_u64_to_hex_str(v, 2, buf, sizeof (buf));
spl_bt_write_n(fd, buf, n);
} else
spl_bt_write(fd, "???");
#ifdef HAVE_LIBUNWIND_ELF
/*
* Newer libunwind has unw_get_elf_filename(), which gets
* the name of the ELF object that the frame was executing in.
* Like `unw_get_proc_name()`, `v` recieves the offset within
* the file, and UNW_ENOMEM indicates that a truncate filename
* was left in the buffer.
*/
err = unw_get_elf_filename(&cp, buf, sizeof (buf), &v);
if (err == 0 || err == -UNW_ENOMEM) {
for (n = 0; n < sizeof (buf) && buf[n] != '\0'; n++) {}
spl_bt_write(fd, " (in ");
spl_bt_write_n(fd, buf, n);
/* Offset within file */
spl_bt_write(fd, " +0x");
n = spl_bt_u64_to_hex_str(v, 2, buf, sizeof (buf));
spl_bt_write_n(fd, buf, n);
spl_bt_write(fd, ")");
}
#endif
spl_bt_write(fd, "\n");
} while (unw_step(&cp) > 0);
}
#elif defined(HAVE_BACKTRACE)
#include <execinfo.h>
@@ -103,15 +257,12 @@ libspl_backtrace(int fd)
void
libspl_backtrace(int fd)
{
ssize_t ret __attribute__((unused));
void *btptrs[64];
size_t nptrs = backtrace(btptrs, 64);
ret = write(fd, "Call trace:\n", 12);
spl_bt_write(fd, "Call trace:\n");
backtrace_symbols_fd(btptrs, nptrs, fd);
}
#else
#include <sys/debug.h>
void
libspl_backtrace(int fd __maybe_unused)
{
+1 -1
View File
@@ -70,7 +70,7 @@ if BUILD_FREEBSD
libzfs_la_LIBADD += -lutil -lgeom
endif
libzfs_la_LDFLAGS += -version-info 5:0:1
libzfs_la_LDFLAGS += -version-info 6:0:0
pkgconfig_DATA += %D%/libzfs.pc
+1 -1
View File
@@ -1,4 +1,4 @@
<abi-corpus version='2.0' architecture='elf-amd-x86_64' soname='libzfs.so.4'>
<abi-corpus version='2.0' architecture='elf-amd-x86_64' soname='libzfs.so.6'>
<elf-needed>
<dependency name='libzfs_core.so.3'/>
<dependency name='libnvpair.so.3'/>
+1 -1
View File
@@ -2796,7 +2796,7 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
}
/*
* With EBUSY, five cases are possible:
* With EBUSY, six cases are possible:
*
* Current state Requested
* 1. Normal Scrub Running Normal Scrub or Error Scrub
+1 -1
View File
@@ -212,7 +212,7 @@ if BUILD_FREEBSD
libzpool_la_LIBADD += -lgeom
endif
libzpool_la_LDFLAGS += -version-info 5:0:0
libzpool_la_LDFLAGS += -version-info 6:0:0
if TARGET_CPU_POWERPC
module/zfs/libzpool_la-vdev_raidz_math_powerpc_altivec.$(OBJEXT) : CFLAGS += -maltivec
@@ -35,9 +35,25 @@ typedef struct zfs_dbgmsg {
static list_t zfs_dbgmsgs;
static kmutex_t zfs_dbgmsgs_lock;
static uint_t zfs_dbgmsg_size = 0;
static uint_t zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
int zfs_dbgmsg_enable = B_TRUE;
static void
zfs_dbgmsg_purge(uint_t max_size)
{
while (zfs_dbgmsg_size > max_size) {
zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs);
if (zdm == NULL)
return;
uint_t size = zdm->zdm_size;
kmem_free(zdm, size);
zfs_dbgmsg_size -= size;
}
}
void
zfs_dbgmsg_init(void)
{
@@ -74,6 +90,8 @@ __zfs_dbgmsg(char *buf)
mutex_enter(&zfs_dbgmsgs_lock);
list_insert_tail(&zfs_dbgmsgs, zdm);
zfs_dbgmsg_size += size;
zfs_dbgmsg_purge(zfs_dbgmsg_maxsize);
mutex_exit(&zfs_dbgmsgs_lock);
}
+1 -1
View File
@@ -436,7 +436,7 @@ write.
It can also help to identify if reported checksum errors are tied to Direct I/O
writes.
Each verify error causes a
.Sy dio_verify
.Sy dio_verify_wr
zevent.
Direct Write I/O checkum verify errors can be seen with
.Nm zpool Cm status Fl d .
+9 -1
View File
@@ -92,6 +92,11 @@ before a generic mapping for the same slot.
In this way a custom mapping may be applied to a particular channel
and a default mapping applied to the others.
.
.It Sy zpad_slot Ar digits
Pad slot numbers with zeros to make them
.Ar digits
long, which can help to make disk names a consistent length and easier to sort.
.
.It Sy multipath Sy yes Ns | Ns Sy no
Specifies whether
.Xr vdev_id 8
@@ -122,7 +127,7 @@ device is connected to.
The default is
.Sy 4 .
.
.It Sy slot Sy bay Ns | Ns Sy phy Ns | Ns Sy port Ns | Ns Sy id Ns | Ns Sy lun Ns | Ns Sy ses
.It Sy slot Sy bay Ns | Ns Sy phy Ns | Ns Sy port Ns | Ns Sy id Ns | Ns Sy lun Ns | Ns Sy bay_lun Ns | Ns Sy ses
Specifies from which element of a SAS identifier the slot number is
taken.
The default is
@@ -138,6 +143,9 @@ use the SAS port as the slot number.
use the scsi id as the slot number.
.It Sy lun
use the scsi lun as the slot number.
.It Sy bay_lun
read the slot number from the bay identifier and append the lun number.
Useful for multi-lun multi-actuator hard drives.
.It Sy ses
use the SCSI Enclosure Services (SES) enclosure device slot number,
as reported by
+1 -1
View File
@@ -71,7 +71,7 @@ The following fields are displayed:
Used for scripting mode.
Do not print headers and separate fields by a single tab instead of arbitrary
white space.
.It Fl j Op Ar --json-int
.It Fl j , -json Op Ar --json-int
Print the output in JSON format.
Specify
.Sy --json-int
+1 -1
View File
@@ -59,7 +59,7 @@
.Xc
Displays all ZFS file systems currently mounted.
.Bl -tag -width "-j"
.It Fl j
.It Fl j , -json
Displays all mounted file systems in JSON format.
.El
.It Xo
+1 -1
View File
@@ -50,7 +50,7 @@ and any attempts to access or modify other pools will cause an error.
.
.Sh OPTIONS
.Bl -tag -width "-t"
.It Fl j
.It Fl j , -json
Display channel program output in JSON format.
When this flag is specified and standard output is empty -
channel program encountered an error.
+1 -1
View File
@@ -130,7 +130,7 @@ The value
can be used to display all properties that apply to the given dataset's type
.Pq Sy filesystem , volume , snapshot , No or Sy bookmark .
.Bl -tag -width "-s source"
.It Fl j Op Ar --json-int
.It Fl j , -json Op Ar --json-int
Display the output in JSON format.
Specify
.Sy --json-int
+4 -1
View File
@@ -98,7 +98,10 @@ This can be an indicator of problems with the underlying storage device.
The number of delay events is ratelimited by the
.Sy zfs_slow_io_events_per_second
module parameter.
.It Sy dio_verify
.It Sy dio_verify_rd
Issued when there was a checksum verify error after a Direct I/O read has been
issued.
.It Sy dio_verify_wr
Issued when there was a checksum verify error after a Direct I/O write has been
issued.
This event can only take place if the module parameter
+2 -2
View File
@@ -98,7 +98,7 @@ See the
.Xr zpoolprops 7
manual page for more information on the available pool properties.
.Bl -tag -compact -offset Ds -width "-o field"
.It Fl j Op Ar --json-int, --json-pool-key-guid
.It Fl j , -json Op Ar --json-int, --json-pool-key-guid
Display the list of properties in JSON format.
Specify
.Sy --json-int
@@ -157,7 +157,7 @@ See the
.Xr vdevprops 7
manual page for more information on the available pool properties.
.Bl -tag -compact -offset Ds -width "-o field"
.It Fl j Op Ar --json-int
.It Fl j , -json Op Ar --json-int
Display the list of properties in JSON format.
Specify
.Sy --json-int
+1 -1
View File
@@ -59,7 +59,7 @@ is specified, the command exits after
.Ar count
reports are printed.
.Bl -tag -width Ds
.It Fl j Op Ar --json-int, --json-pool-key-guid
.It Fl j , -json Op Ar --json-int, --json-pool-key-guid
Display the list of pools in JSON format.
Specify
.Sy --json-int
+7 -3
View File
@@ -70,7 +70,7 @@ See the
option of
.Nm zpool Cm iostat
for complete details.
.It Fl j Op Ar --json-int, --json-flat-vdevs, --json-pool-key-guid
.It Fl j , -json Op Ar --json-int, --json-flat-vdevs, --json-pool-key-guid
Display the status for ZFS pools in JSON format.
Specify
.Sy --json-int
@@ -82,14 +82,18 @@ Specify
.Sy --json-pool-key-guid
to set pool GUID as key for pool objects instead of pool names.
.It Fl d
Display the number of Direct I/O write checksum verify errors that have occured
on a top-level VDEV.
Display the number of Direct I/O read/write checksum verify errors that have
occured on a top-level VDEV.
See
.Sx zfs_vdev_direct_write_verify
in
.Xr zfs 4
for details about the conditions that can cause Direct I/O write checksum
verify failures to occur.
Direct I/O reads checksum verify errors can also occur if the contents of the
buffer are being manipulated after the I/O has been issued and is in flight.
In the case of Direct I/O read checksum verify errors, the I/O will be reissued
through the ARC.
.It Fl D
Display a histogram of deduplication statistics, showing the allocated
.Pq physically present on disk
@@ -620,9 +620,16 @@ abd_borrow_buf_copy(abd_t *abd, size_t n)
/*
* Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will
* no change the contents of the ABD and will ASSERT that you didn't modify
* the buffer since it was borrowed. If you want any changes you made to buf to
* be copied back to abd, use abd_return_buf_copy() instead.
* not change the contents of the ABD. If you want any changes you made to
* buf to be copied back to abd, use abd_return_buf_copy() instead. If the
* ABD is not constructed from user pages from Direct I/O then an ASSERT
* checks to make sure the contents of the buffer have not changed since it was
* borrowed. We can not ASSERT the contents of the buffer have not changed if
* it is composed of user pages. While Direct I/O write pages are placed under
* write protection and can not be changed, this is not the case for Direct I/O
* reads. The pages of a Direct I/O read could be manipulated at any time.
* Checksum verifications in the ZIO pipeline check for this issue and handle
* it by returning an error on checksum verification failure.
*/
void
abd_return_buf(abd_t *abd, void *buf, size_t n)
@@ -632,8 +639,34 @@ abd_return_buf(abd_t *abd, void *buf, size_t n)
#ifdef ZFS_DEBUG
(void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
#endif
if (abd_is_linear(abd)) {
if (abd_is_from_pages(abd)) {
if (!abd_is_linear_page(abd))
zio_buf_free(buf, n);
} else if (abd_is_linear(abd)) {
ASSERT3P(buf, ==, abd_to_buf(abd));
} else if (abd_is_gang(abd)) {
#ifdef ZFS_DEBUG
/*
* We have to be careful with gang ABD's that we do not ASSERT
* for any ABD's that contain user pages from Direct I/O. See
* the comment above about Direct I/O read buffers possibly
* being manipulated. In order to handle this, we jsut iterate
* through the gang ABD and only verify ABD's that are not from
* user pages.
*/
void *cmp_buf = buf;
for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
cabd != NULL;
cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
if (!abd_is_from_pages(cabd)) {
ASSERT0(abd_cmp_buf(cabd, cmp_buf,
cabd->abd_size));
}
cmp_buf = (char *)cmp_buf + cabd->abd_size;
}
#endif
zio_buf_free(buf, n);
} else {
ASSERT0(abd_cmp_buf(abd, buf, n));
zio_buf_free(buf, n);
@@ -1008,7 +1008,9 @@ abd_borrow_buf_copy(abd_t *abd, size_t n)
* borrowed. We can not ASSERT that the contents of the buffer have not changed
* if it is composed of user pages because the pages can not be placed under
* write protection and the user could have possibly changed the contents in
* the pages at any time.
* the pages at any time. This is also an issue for Direct I/O reads. Checksum
* verifications in the ZIO pipeline check for this issue and handle it by
* returning an error on checksum verification failure.
*/
void
abd_return_buf(abd_t *abd, void *buf, size_t n)
@@ -767,9 +767,6 @@ zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
uint64_t id, pos = 0;
int error = 0;
if (zfsvfs->z_vfs->vfs_mntpoint == NULL)
return (SET_ERROR(ENOENT));
cookie = spl_fstrans_mark();
snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
@@ -786,8 +783,14 @@ zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
break;
}
snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
zfsvfs->z_vfs->vfs_mntpoint, snapname);
mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
zfsvfs->z_vfs->vfs_mntpoint, snapname);
} else
error = SET_ERROR(ENOENT);
mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
out:
kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
spl_fstrans_unmark(cookie);
@@ -1049,6 +1052,66 @@ exportfs_flush(void)
(void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
}
/*
* Returns the path in char format for given struct path. Uses
* d_path exported by kernel to convert struct path to char
* format. Returns the correct path for mountpoints and chroot
* environments.
*
* If chroot environment has directories that are mounted with
* --bind or --rbind flag, d_path returns the complete path inside
* chroot environment but does not return the absolute path, i.e.
* the path to chroot environment is missing.
*/
static int
get_root_path(struct path *path, char *buff, int len)
{
char *path_buffer, *path_ptr;
int error = 0;
path_get(path);
path_buffer = kmem_zalloc(len, KM_SLEEP);
path_ptr = d_path(path, path_buffer, len);
if (IS_ERR(path_ptr))
error = SET_ERROR(-PTR_ERR(path_ptr));
else
strcpy(buff, path_ptr);
kmem_free(path_buffer, len);
path_put(path);
return (error);
}
/*
* Returns if the current process root is chrooted or not. Linux
* kernel exposes the task_struct for current process and init.
* Since init process root points to actual root filesystem when
* Linux runtime is reached, we can compare the current process
* root with init process root to determine if root of the current
* process is different from init, which can reliably determine if
* current process is in chroot context or not.
*/
static int
is_current_chrooted(void)
{
struct task_struct *curr = current, *global = &init_task;
struct path cr_root, gl_root;
task_lock(curr);
get_fs_root(curr->fs, &cr_root);
task_unlock(curr);
task_lock(global);
get_fs_root(global->fs, &gl_root);
task_unlock(global);
int chrooted = !path_equal(&cr_root, &gl_root);
path_put(&gl_root);
path_put(&cr_root);
return (chrooted);
}
/*
* Attempt to unmount a snapshot by making a call to user space.
* There is no assurance that this can or will succeed, is just a
@@ -1123,14 +1186,50 @@ zfsctl_snapshot_mount(struct path *path, int flags)
if (error)
goto error;
if (is_current_chrooted() == 0) {
/*
* Current process is not in chroot context
*/
char *m = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
struct path mnt_path;
mnt_path.mnt = path->mnt;
mnt_path.dentry = path->mnt->mnt_root;
/*
* Get path to current mountpoint
*/
error = get_root_path(&mnt_path, m, MAXPATHLEN);
if (error != 0) {
kmem_free(m, MAXPATHLEN);
goto error;
}
mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
/*
* If current mnountpoint and vfs_mntpoint are not same,
* store current mountpoint in vfs_mntpoint.
*/
if (strcmp(zfsvfs->z_vfs->vfs_mntpoint, m) != 0) {
kmem_strfree(zfsvfs->z_vfs->vfs_mntpoint);
zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
}
} else
zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
kmem_free(m, MAXPATHLEN);
}
/*
* Construct a mount point path from sb of the ctldir inode and dirent
* name, instead of from d_path(), so that chroot'd process doesn't fail
* on mount.zfs(8).
*/
mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s",
zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "",
dname(dentry));
mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
snprintf(options, 7, "%s",
zfs_snapshot_no_setuid ? "nosuid" : "suid");
@@ -115,7 +115,7 @@ zfsvfs_vfs_free(vfs_t *vfsp)
if (vfsp != NULL) {
if (vfsp->vfs_mntpoint != NULL)
kmem_strfree(vfsp->vfs_mntpoint);
mutex_destroy(&vfsp->vfs_mntpt_lock);
kmem_free(vfsp, sizeof (vfs_t));
}
}
@@ -197,10 +197,11 @@ zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)
vfsp->vfs_do_nbmand = B_TRUE;
break;
case TOKEN_MNTPOINT:
if (vfsp->vfs_mntpoint != NULL)
kmem_strfree(vfsp->vfs_mntpoint);
vfsp->vfs_mntpoint = match_strdup(&args[0]);
if (vfsp->vfs_mntpoint == NULL)
return (SET_ERROR(ENOMEM));
break;
default:
break;
@@ -219,6 +220,7 @@ zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)
int error;
tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
mutex_init(&tmp_vfsp->vfs_mntpt_lock, NULL, MUTEX_DEFAULT, NULL);
if (mntopts != NULL) {
substring_t args[MAX_OPT_ARGS];
@@ -206,6 +206,7 @@ _VALSTR_BITFIELD_IMPL(zio_flag,
{ '.', "PR", "PROBE" },
{ '.', "TH", "TRYHARD" },
{ '.', "OP", "OPTIONAL" },
{ '.', "RD", "DIO_READ" },
{ '.', "DQ", "DONT_QUEUE" },
{ '.', "DP", "DONT_PROPAGATE" },
{ '.', "BY", "IO_BYPASS" },
+1 -1
View File
@@ -330,7 +330,7 @@ dmu_read_abd(dnode_t *dn, uint64_t offset, uint64_t size,
*/
zio_t *cio = zio_read(rio, spa, bp, mbuf, db->db.db_size,
dmu_read_abd_done, NULL, ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_CANFAIL, &zb);
ZIO_FLAG_CANFAIL | ZIO_FLAG_DIO_READ, &zb);
mutex_exit(&db->db_mtx);
zfs_racct_read(spa, db->db.db_size, 1, flags);
+9 -2
View File
@@ -2987,6 +2987,7 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
dsl_dataset_t *ds;
uint64_t val;
dmu_tx_t *tx = ddrsa->ddrsa_tx;
char *oldname, *newname;
int error;
error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val);
@@ -3011,8 +3012,14 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
VERIFY0(zap_add(dp->dp_meta_objset,
dsl_dataset_phys(hds)->ds_snapnames_zapobj,
ds->ds_snapname, 8, 1, &ds->ds_object, tx));
zvol_rename_minors(dp->dp_spa, ddrsa->ddrsa_oldsnapname,
ddrsa->ddrsa_newsnapname, B_TRUE);
oldname = kmem_asprintf("%s@%s", ddrsa->ddrsa_fsname,
ddrsa->ddrsa_oldsnapname);
newname = kmem_asprintf("%s@%s", ddrsa->ddrsa_fsname,
ddrsa->ddrsa_newsnapname);
zvol_rename_minors(dp->dp_spa, oldname, newname, B_TRUE);
kmem_strfree(oldname);
kmem_strfree(newname);
dsl_dataset_rele(ds, FTAG);
return (0);
+1 -1
View File
@@ -1026,7 +1026,7 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,
ASSERT3U(vdc->vdc_nparity, >, 0);
raidz_row_t *rr = vdev_raidz_row_alloc(groupwidth);
raidz_row_t *rr = vdev_raidz_row_alloc(groupwidth, zio);
rr->rr_bigcols = bc;
rr->rr_firstdatacol = vdc->vdc_nparity;
#ifdef ZFS_DEBUG
@@ -34,6 +34,7 @@
#include <sys/zap.h>
#include <sys/abd.h>
#include <sys/zthr.h>
#include <sys/fm/fs/zfs.h>
/*
* An indirect vdev corresponds to a vdev that has been removed. Since
@@ -1832,6 +1833,19 @@ vdev_indirect_io_done(zio_t *zio)
zio_bad_cksum_t zbc;
int ret = zio_checksum_error(zio, &zbc);
/*
* Any Direct I/O read that has a checksum error must be treated as
* suspicious as the contents of the buffer could be getting
* manipulated while the I/O is taking place. The checksum verify error
* will be reported to the top-level VDEV.
*/
if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) {
zio->io_error = ret;
zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
zio_dio_chksum_verify_error_report(zio);
ret = 0;
}
if (ret == 0) {
zio_checksum_verified(zio);
return;
@@ -764,6 +764,27 @@ vdev_mirror_io_done(zio_t *zio)
ASSERT(zio->io_type == ZIO_TYPE_READ);
/*
* Any Direct I/O read that has a checksum error must be treated as
* suspicious as the contents of the buffer could be getting
* manipulated while the I/O is taking place. The checksum verify error
* will be reported to the top-level Mirror VDEV.
*
* There will be no attampt at reading any additional data copies. If
* the buffer is still being manipulated while attempting to read from
* another child, there exists a possibly that the checksum could be
* verified as valid. However, the buffer contents could again get
* manipulated after verifying the checksum. This would lead to bad data
* being written out during self healing.
*/
if ((zio->io_flags & ZIO_FLAG_DIO_READ) &&
(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)) {
zio_dio_chksum_verify_error_report(zio);
zio->io_error = vdev_mirror_worst_error(mm);
ASSERT3U(zio->io_error, ==, ECKSUM);
return;
}
/*
* If we don't have a good copy yet, keep trying other children.
*/
+39 -5
View File
@@ -433,7 +433,7 @@ const zio_vsd_ops_t vdev_raidz_vsd_ops = {
};
raidz_row_t *
vdev_raidz_row_alloc(int cols)
vdev_raidz_row_alloc(int cols, zio_t *zio)
{
raidz_row_t *rr =
kmem_zalloc(offsetof(raidz_row_t, rr_col[cols]), KM_SLEEP);
@@ -445,7 +445,17 @@ vdev_raidz_row_alloc(int cols)
raidz_col_t *rc = &rr->rr_col[c];
rc->rc_shadow_devidx = INT_MAX;
rc->rc_shadow_offset = UINT64_MAX;
rc->rc_allow_repair = 1;
/*
* We can not allow self healing to take place for Direct I/O
* reads. There is nothing that stops the buffer contents from
* being manipulated while the I/O is in flight. It is possible
* that the checksum could be verified on the buffer and then
* the contents of that buffer are manipulated afterwards. This
* could lead to bad data being written out during self
* healing.
*/
if (!(zio->io_flags & ZIO_FLAG_DIO_READ))
rc->rc_allow_repair = 1;
}
return (rr);
}
@@ -619,7 +629,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
}
ASSERT3U(acols, <=, scols);
rr = vdev_raidz_row_alloc(scols);
rr = vdev_raidz_row_alloc(scols, zio);
rm->rm_row[0] = rr;
rr->rr_cols = acols;
rr->rr_bigcols = bc;
@@ -765,7 +775,7 @@ vdev_raidz_map_alloc_expanded(zio_t *zio,
for (uint64_t row = 0; row < rows; row++) {
boolean_t row_use_scratch = B_FALSE;
raidz_row_t *rr = vdev_raidz_row_alloc(cols);
raidz_row_t *rr = vdev_raidz_row_alloc(cols, zio);
rm->rm_row[row] = rr;
/* The starting RAIDZ (parent) vdev sector of the row. */
@@ -2633,6 +2643,20 @@ raidz_checksum_verify(zio_t *zio)
raidz_map_t *rm = zio->io_vsd;
int ret = zio_checksum_error(zio, &zbc);
/*
* Any Direct I/O read that has a checksum error must be treated as
* suspicious as the contents of the buffer could be getting
* manipulated while the I/O is taking place. The checksum verify error
* will be reported to the top-level RAIDZ VDEV.
*/
if (zio->io_flags & ZIO_FLAG_DIO_READ && ret == ECKSUM) {
zio->io_error = ret;
zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
zio_dio_chksum_verify_error_report(zio);
zio_checksum_verified(zio);
return (0);
}
if (ret != 0 && zbc.zbc_injected != 0)
rm->rm_ecksuminjected = 1;
@@ -2776,6 +2800,11 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
(rc->rc_error == 0 || rc->rc_size == 0)) {
continue;
}
/*
* We do not allow self healing for Direct I/O reads.
* See comment in vdev_raid_row_alloc().
*/
ASSERT0(zio->io_flags & ZIO_FLAG_DIO_READ);
zfs_dbgmsg("zio=%px repairing c=%u devidx=%u "
"offset=%llx",
@@ -2979,6 +3008,8 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
/* Check for success */
if (raidz_checksum_verify(zio) == 0) {
if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
return (0);
/* Reconstruction succeeded - report errors */
for (int i = 0; i < rm->rm_nrows; i++) {
@@ -3379,7 +3410,6 @@ vdev_raidz_io_done_unrecoverable(zio_t *zio)
zio_bad_cksum_t zbc;
zbc.zbc_has_cksum = 0;
zbc.zbc_injected = rm->rm_ecksuminjected;
mutex_enter(&cvd->vdev_stat_lock);
cvd->vdev_stat.vs_checksum_errors++;
mutex_exit(&cvd->vdev_stat_lock);
@@ -3444,6 +3474,9 @@ vdev_raidz_io_done(zio_t *zio)
}
if (raidz_checksum_verify(zio) == 0) {
if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
goto done;
for (int i = 0; i < rm->rm_nrows; i++) {
raidz_row_t *rr = rm->rm_row[i];
vdev_raidz_io_done_verified(zio, rr);
@@ -3538,6 +3571,7 @@ vdev_raidz_io_done(zio_t *zio)
}
}
}
done:
if (rm->rm_lr != NULL) {
zfs_rangelock_exit(rm->rm_lr);
rm->rm_lr = NULL;
+31 -2
View File
@@ -77,8 +77,15 @@ static int zfs_bclone_wait_dirty = 0;
* Enable Direct I/O. If this setting is 0, then all I/O requests will be
* directed through the ARC acting as though the dataset property direct was
* set to disabled.
*
* Disabled by default on FreeBSD until a potential range locking issue in
* zfs_getpages() can be resolved.
*/
#ifdef __FreeBSD__
static int zfs_dio_enabled = 0;
#else
static int zfs_dio_enabled = 1;
#endif
/*
@@ -303,6 +310,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
(void) cr;
int error = 0;
boolean_t frsync = B_FALSE;
boolean_t dio_checksum_failure = B_FALSE;
zfsvfs_t *zfsvfs = ZTOZSB(zp);
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
@@ -424,8 +432,26 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
if (error) {
/* convert checksum errors into IO errors */
if (error == ECKSUM)
error = SET_ERROR(EIO);
if (error == ECKSUM) {
/*
* If a Direct I/O read returned a checksum
* verify error, then it must be treated as
* suspicious. The contents of the buffer could
* have beeen manipulated while the I/O was in
* flight. In this case, the remainder of I/O
* request will just be reissued through the
* ARC.
*/
if (uio->uio_extflg & UIO_DIRECT) {
dio_checksum_failure = B_TRUE;
uio->uio_extflg &= ~UIO_DIRECT;
n += dio_remaining_resid;
dio_remaining_resid = 0;
continue;
} else {
error = SET_ERROR(EIO);
}
}
#if defined(__linux__)
/*
@@ -472,6 +498,9 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
out:
zfs_rangelock_exit(lr);
if (dio_checksum_failure == B_TRUE)
uio->uio_extflg |= UIO_DIRECT;
/*
* Cleanup for Direct I/O if requested.
*/
+85 -35
View File
@@ -804,11 +804,11 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait,
pio->io_reexecute |= zio->io_reexecute;
ASSERT3U(*countp, >, 0);
if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) {
ASSERT3U(*errorp, ==, EIO);
ASSERT3U(pio->io_child_type, ==, ZIO_CHILD_LOGICAL);
/*
* Propogate the Direct I/O checksum verify failure to the parent.
*/
if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR)
pio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
}
(*countp)--;
@@ -1573,6 +1573,14 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
*/
pipeline |= ZIO_STAGE_CHECKSUM_VERIFY;
pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
/*
* We never allow the mirror VDEV to attempt reading from any
* additional data copies after the first Direct I/O checksum
* verify failure. This is to avoid bad data being written out
* through the mirror during self healing. See comment in
* vdev_mirror_io_done() for more details.
*/
ASSERT0(pio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
} else if (type == ZIO_TYPE_WRITE &&
pio->io_prop.zp_direct_write == B_TRUE) {
/*
@@ -4555,18 +4563,18 @@ zio_vdev_io_assess(zio_t *zio)
}
/*
* If a Direct I/O write checksum verify error has occurred then this
* I/O should not attempt to be issued again. Instead the EIO will
* be returned.
* If a Direct I/O operation has a checksum verify error then this I/O
* should not attempt to be issued again.
*/
if (zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR) {
ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL);
ASSERT3U(zio->io_error, ==, EIO);
if (zio->io_type == ZIO_TYPE_WRITE) {
ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_LOGICAL);
ASSERT3U(zio->io_error, ==, EIO);
}
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
return (zio);
}
if (zio_injection_enabled && zio->io_error == 0)
zio->io_error = zio_handle_fault_injection(zio, EIO);
@@ -4864,16 +4872,40 @@ zio_checksum_verify(zio_t *zio)
ASSERT3U(zio->io_prop.zp_checksum, ==, ZIO_CHECKSUM_LABEL);
}
ASSERT0(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
IMPLY(zio->io_flags & ZIO_FLAG_DIO_READ,
!(zio->io_flags & ZIO_FLAG_SPECULATIVE));
if ((error = zio_checksum_error(zio, &info)) != 0) {
zio->io_error = error;
if (error == ECKSUM &&
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
mutex_enter(&zio->io_vd->vdev_stat_lock);
zio->io_vd->vdev_stat.vs_checksum_errors++;
mutex_exit(&zio->io_vd->vdev_stat_lock);
(void) zfs_ereport_start_checksum(zio->io_spa,
zio->io_vd, &zio->io_bookmark, zio,
zio->io_offset, zio->io_size, &info);
if (zio->io_flags & ZIO_FLAG_DIO_READ) {
zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
zio_t *pio = zio_unique_parent(zio);
/*
* Any Direct I/O read that has a checksum
* error must be treated as suspicous as the
* contents of the buffer could be getting
* manipulated while the I/O is taking place.
*
* The checksum verify error will only be
* reported here for disk and file VDEV's and
* will be reported on those that the failure
* occurred on. Other types of VDEV's report the
* verify failure in their own code paths.
*/
if (pio->io_child_type == ZIO_CHILD_LOGICAL) {
zio_dio_chksum_verify_error_report(zio);
}
} else {
mutex_enter(&zio->io_vd->vdev_stat_lock);
zio->io_vd->vdev_stat.vs_checksum_errors++;
mutex_exit(&zio->io_vd->vdev_stat_lock);
(void) zfs_ereport_start_checksum(zio->io_spa,
zio->io_vd, &zio->io_bookmark, zio,
zio->io_offset, zio->io_size, &info);
}
}
}
@@ -4899,22 +4931,8 @@ zio_dio_checksum_verify(zio_t *zio)
if ((error = zio_checksum_error(zio, NULL)) != 0) {
zio->io_error = error;
if (error == ECKSUM) {
mutex_enter(&zio->io_vd->vdev_stat_lock);
zio->io_vd->vdev_stat.vs_dio_verify_errors++;
mutex_exit(&zio->io_vd->vdev_stat_lock);
zio->io_error = SET_ERROR(EIO);
zio->io_flags |= ZIO_FLAG_DIO_CHKSUM_ERR;
/*
* The EIO error must be propagated up to the logical
* parent ZIO in zio_notify_parent() so it can be
* returned to dmu_write_abd().
*/
zio->io_flags &= ~ZIO_FLAG_DONT_PROPAGATE;
(void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY,
zio->io_spa, zio->io_vd, &zio->io_bookmark,
zio, 0);
zio_dio_chksum_verify_error_report(zio);
}
}
@@ -4932,6 +4950,39 @@ zio_checksum_verified(zio_t *zio)
zio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
}
/*
* Report Direct I/O checksum verify error and create ZED event.
*/
void
zio_dio_chksum_verify_error_report(zio_t *zio)
{
ASSERT(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR);
if (zio->io_child_type == ZIO_CHILD_LOGICAL)
return;
mutex_enter(&zio->io_vd->vdev_stat_lock);
zio->io_vd->vdev_stat.vs_dio_verify_errors++;
mutex_exit(&zio->io_vd->vdev_stat_lock);
if (zio->io_type == ZIO_TYPE_WRITE) {
/*
* Convert checksum error for writes into EIO.
*/
zio->io_error = SET_ERROR(EIO);
/*
* Report dio_verify_wr ZED event.
*/
(void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY_WR,
zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0);
} else {
/*
* Report dio_verify_rd ZED event.
*/
(void) zfs_ereport_post(FM_EREPORT_ZFS_DIO_VERIFY_RD,
zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, 0);
}
}
/*
* ==========================================================================
* Error rank. Error are ranked in the order 0, ENXIO, ECKSUM, EIO, other.
@@ -5343,10 +5394,9 @@ zio_done(zio_t *zio)
if (zio->io_reexecute) {
/*
* A Direct I/O write that has a checksum verify error should
* not attempt to reexecute. Instead, EAGAIN should just be
* propagated back up so the write can be attempt to be issued
* through the ARC.
* A Direct I/O operation that has a checksum verify error
* should not attempt to reexecute. Instead, the error should
* just be propagated back.
*/
ASSERT(!(zio->io_flags & ZIO_FLAG_DIO_CHKSUM_ERR));
+24 -21
View File
@@ -99,10 +99,10 @@ License: @ZFS_META_LICENSE@
URL: https://github.com/openzfs/zfs
Source0: %{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
Requires: libzpool5%{?_isa} = %{version}-%{release}
Requires: libzpool6%{?_isa} = %{version}-%{release}
Requires: libnvpair3%{?_isa} = %{version}-%{release}
Requires: libuutil3%{?_isa} = %{version}-%{release}
Requires: libzfs5%{?_isa} = %{version}-%{release}
Requires: libzfs6%{?_isa} = %{version}-%{release}
Requires: %{name}-kmod = %{version}
Provides: %{name}-kmod-common = %{version}-%{release}
Obsoletes: spl <= %{version}
@@ -150,21 +150,22 @@ Requires: sysstat
%description
This package contains the core ZFS command line utilities.
%package -n libzpool5
%package -n libzpool6
Summary: Native ZFS pool library for Linux
Group: System Environment/Kernel
Obsoletes: libzpool2 <= %{version}
Obsoletes: libzpool4 <= %{version}
Obsoletes: libzpool5 <= %{version}
%description -n libzpool5
%description -n libzpool6
This package contains the zpool library, which provides support
for managing zpools
%if %{defined ldconfig_scriptlets}
%ldconfig_scriptlets -n libzpool5
%ldconfig_scriptlets -n libzpool6
%else
%post -n libzpool5 -p /sbin/ldconfig
%postun -n libzpool5 -p /sbin/ldconfig
%post -n libzpool6 -p /sbin/ldconfig
%postun -n libzpool6 -p /sbin/ldconfig
%endif
%package -n libnvpair3
@@ -211,37 +212,39 @@ This library provides a variety of compatibility functions for OpenZFS:
# The library version is encoded in the package name. When updating the
# version information it is important to add an obsoletes line below for
# the previous version of the package.
%package -n libzfs5
%package -n libzfs6
Summary: Native ZFS filesystem library for Linux
Group: System Environment/Kernel
Obsoletes: libzfs2 <= %{version}
Obsoletes: libzfs4 <= %{version}
Obsoletes: libzfs5 <= %{version}
%description -n libzfs5
%description -n libzfs6
This package provides support for managing ZFS filesystems
%if %{defined ldconfig_scriptlets}
%ldconfig_scriptlets -n libzfs5
%ldconfig_scriptlets -n libzfs6
%else
%post -n libzfs5 -p /sbin/ldconfig
%postun -n libzfs5 -p /sbin/ldconfig
%post -n libzfs6 -p /sbin/ldconfig
%postun -n libzfs6 -p /sbin/ldconfig
%endif
%package -n libzfs5-devel
%package -n libzfs6-devel
Summary: Development headers
Group: System Environment/Kernel
Requires: libzfs5%{?_isa} = %{version}-%{release}
Requires: libzpool5%{?_isa} = %{version}-%{release}
Requires: libzfs6%{?_isa} = %{version}-%{release}
Requires: libzpool6%{?_isa} = %{version}-%{release}
Requires: libnvpair3%{?_isa} = %{version}-%{release}
Requires: libuutil3%{?_isa} = %{version}-%{release}
Provides: libzpool5-devel = %{version}-%{release}
Provides: libzpool6-devel = %{version}-%{release}
Provides: libnvpair3-devel = %{version}-%{release}
Provides: libuutil3-devel = %{version}-%{release}
Obsoletes: zfs-devel <= %{version}
Obsoletes: libzfs2-devel <= %{version}
Obsoletes: libzfs4-devel <= %{version}
Obsoletes: libzfs5-devel <= %{version}
%description -n libzfs5-devel
%description -n libzfs6-devel
This package contains the header files needed for building additional
applications against the ZFS libraries.
@@ -290,7 +293,7 @@ Summary: Python %{python_version} wrapper for libzfs_core
Group: Development/Languages/Python
License: Apache-2.0
BuildArch: noarch
Requires: libzfs5 = %{version}-%{release}
Requires: libzfs6 = %{version}-%{release}
Requires: libnvpair3 = %{version}-%{release}
Requires: libffi
Requires: python%{__python_pkg_version}
@@ -534,7 +537,7 @@ systemctl --system daemon-reload >/dev/null || true
%config(noreplace) %{_bashcompletiondir}/zfs
%config(noreplace) %{_bashcompletiondir}/zpool
%files -n libzpool5
%files -n libzpool6
%{_libdir}/libzpool.so.*
%files -n libnvpair3
@@ -543,10 +546,10 @@ systemctl --system daemon-reload >/dev/null || true
%files -n libuutil3
%{_libdir}/libuutil.so.*
%files -n libzfs5
%files -n libzfs6
%{_libdir}/libzfs*.so.*
%files -n libzfs5-devel
%files -n libzfs6-devel
%{_pkgconfigdir}/libzfs.pc
%{_pkgconfigdir}/libzfsbootenv.pc
%{_pkgconfigdir}/libzfs_core.pc
@@ -697,8 +697,8 @@ tags = ['functional', 'delegate']
tests = ['dio_aligned_block', 'dio_async_always', 'dio_async_fio_ioengines',
'dio_compression', 'dio_dedup', 'dio_encryption', 'dio_grow_block',
'dio_max_recordsize', 'dio_mixed', 'dio_mmap', 'dio_overwrites',
'dio_property', 'dio_random', 'dio_recordsize', 'dio_unaligned_block',
'dio_unaligned_filesize']
'dio_property', 'dio_random', 'dio_read_verify', 'dio_recordsize',
'dio_unaligned_block', 'dio_unaligned_filesize']
tags = ['functional', 'direct']
[tests/functional/exec]
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2022 by Triad National Security, LLC.
* Copyright (c) 2024 by Triad National Security, LLC.
*/
#include <sys/types.h>
@@ -39,51 +39,59 @@
#define MIN(a, b) ((a) < (b)) ? (a) : (b)
#endif
static char *outputfile = NULL;
static char *filename = NULL;
static int blocksize = 131072; /* 128K */
static int wr_err_expected = 0;
static int err_expected = 0;
static int read_op = 0;
static int write_op = 0;
static int numblocks = 100;
static char *execname = NULL;
static int print_usage = 0;
static int randompattern = 0;
static int ofd;
static int fd;
char *buf = NULL;
typedef struct {
int entire_file_written;
int entire_file_completed;
} pthread_args_t;
static void
usage(void)
{
(void) fprintf(stderr,
"usage %s -o outputfile [-b blocksize] [-e wr_error_expected]\n"
" [-n numblocks] [-p randpattern] [-h help]\n"
"usage %s -f filename [-b blocksize] [-e wr_error_expected]\n"
" [-n numblocks] [-p randompattern] -r read_op \n"
" -w write_op [-h help]\n"
"\n"
"Testing whether checksum verify works correctly for O_DIRECT.\n"
"when manipulating the contents of a userspace buffer.\n"
"\n"
" outputfile: File to write to.\n"
" blocksize: Size of each block to write (must be at \n"
" least >= 512).\n"
" wr_err_expected: Whether pwrite() is expected to return EIO\n"
" while manipulating the contents of the\n"
" buffer.\n"
" numblocks: Total number of blocksized blocks to\n"
" write.\n"
" randpattern: Fill data buffer with random data. Default\n"
" behavior is to fill the buffer with the \n"
" known data pattern (0xdeadbeef).\n"
" filename: File to read or write to.\n"
" blocksize: Size of each block to write (must be at \n"
" least >= 512).\n"
" err_expected: Whether write() is expected to return EIO\n"
" while manipulating the contents of the\n"
" buffer.\n"
" numblocks: Total number of blocksized blocks to\n"
" write.\n"
" read_op: Perform reads to the filename file while\n"
" while manipulating the buffer contents\n"
" write_op: Perform writes to the filename file while\n"
" manipulating the buffer contents\n"
" randompattern: Fill data buffer with random data for \n"
" writes. Default behavior is to fill the \n"
" buffer with known data pattern (0xdeadbeef)\n"
" help: Print usage information and exit.\n"
"\n"
" Required parameters:\n"
" outputfile\n"
" filename\n"
" read_op or write_op\n"
"\n"
" Default Values:\n"
" blocksize -> 131072\n"
" wr_err_expexted -> false\n"
" numblocks -> 100\n"
" randpattern -> false\n",
" randompattern -> false\n",
execname);
(void) exit(1);
}
@@ -97,16 +105,21 @@ parse_options(int argc, char *argv[])
extern int optind, optopt;
execname = argv[0];
while ((c = getopt(argc, argv, "b:ehn:o:p")) != -1) {
while ((c = getopt(argc, argv, "b:ef:hn:rw")) != -1) {
switch (c) {
case 'b':
blocksize = atoi(optarg);
break;
case 'e':
wr_err_expected = 1;
err_expected = 1;
break;
case 'f':
filename = optarg;
break;
case 'h':
print_usage = 1;
break;
@@ -115,12 +128,12 @@ parse_options(int argc, char *argv[])
numblocks = atoi(optarg);
break;
case 'o':
outputfile = optarg;
case 'r':
read_op = 1;
break;
case 'p':
randompattern = 1;
case 'w':
write_op = 1;
break;
case ':':
@@ -141,7 +154,8 @@ parse_options(int argc, char *argv[])
if (errflag || print_usage == 1)
(void) usage();
if (blocksize < 512 || outputfile == NULL || numblocks <= 0) {
if (blocksize < 512 || filename == NULL || numblocks <= 0 ||
(read_op == 0 && write_op == 0)) {
(void) fprintf(stderr,
"Required paramater(s) missing or invalid.\n");
(void) usage();
@@ -160,10 +174,10 @@ write_thread(void *arg)
ssize_t wrote = 0;
pthread_args_t *args = (pthread_args_t *)arg;
while (!args->entire_file_written) {
wrote = pwrite(ofd, buf, blocksize, offset);
while (!args->entire_file_completed) {
wrote = pwrite(fd, buf, blocksize, offset);
if (wrote != blocksize) {
if (wr_err_expected)
if (err_expected)
assert(errno == EIO);
else
exit(2);
@@ -173,7 +187,35 @@ write_thread(void *arg)
left -= blocksize;
if (left == 0)
args->entire_file_written = 1;
args->entire_file_completed = 1;
}
pthread_exit(NULL);
}
/*
* Read blocksize * numblocks to the file using O_DIRECT.
*/
static void *
read_thread(void *arg)
{
size_t offset = 0;
int total_data = blocksize * numblocks;
int left = total_data;
ssize_t read = 0;
pthread_args_t *args = (pthread_args_t *)arg;
while (!args->entire_file_completed) {
read = pread(fd, buf, blocksize, offset);
if (read != blocksize) {
exit(2);
}
offset = ((offset + blocksize) % total_data);
left -= blocksize;
if (left == 0)
args->entire_file_completed = 1;
}
pthread_exit(NULL);
@@ -189,7 +231,7 @@ manipulate_buf_thread(void *arg)
char rand_char;
pthread_args_t *args = (pthread_args_t *)arg;
while (!args->entire_file_written) {
while (!args->entire_file_completed) {
rand_offset = (rand() % blocksize);
rand_char = (rand() % (126 - 33) + 33);
buf[rand_offset] = rand_char;
@@ -202,9 +244,9 @@ int
main(int argc, char *argv[])
{
const char *datapattern = "0xdeadbeef";
int ofd_flags = O_WRONLY | O_CREAT | O_DIRECT;
int fd_flags = O_DIRECT;
mode_t mode = S_IRUSR | S_IWUSR;
pthread_t write_thr;
pthread_t io_thr;
pthread_t manipul_thr;
int left = blocksize;
int offset = 0;
@@ -213,9 +255,15 @@ main(int argc, char *argv[])
parse_options(argc, argv);
ofd = open(outputfile, ofd_flags, mode);
if (ofd == -1) {
(void) fprintf(stderr, "%s, %s\n", execname, outputfile);
if (write_op) {
fd_flags |= (O_WRONLY | O_CREAT);
} else {
fd_flags |= O_RDONLY;
}
fd = open(filename, fd_flags, mode);
if (fd == -1) {
(void) fprintf(stderr, "%s, %s\n", execname, filename);
perror("open");
exit(2);
}
@@ -228,24 +276,22 @@ main(int argc, char *argv[])
exit(2);
}
if (!randompattern) {
/* Putting known data pattern in buffer */
while (left) {
size_t amt = MIN(strlen(datapattern), left);
memcpy(&buf[offset], datapattern, amt);
offset += amt;
left -= amt;
if (write_op) {
if (!randompattern) {
/* Putting known data pattern in buffer */
while (left) {
size_t amt = MIN(strlen(datapattern), left);
memcpy(&buf[offset], datapattern, amt);
offset += amt;
left -= amt;
}
} else {
/* Putting random data in buffer */
for (int i = 0; i < blocksize; i++)
buf[i] = rand();
}
} else {
/* Putting random data in buffer */
for (int i = 0; i < blocksize; i++)
buf[i] = rand();
}
/*
* Writing using O_DIRECT while manipulating the buffer contents until
* the entire file is written.
*/
if ((rc = pthread_create(&manipul_thr, NULL, manipulate_buf_thread,
&args))) {
fprintf(stderr, "error: pthreads_create, manipul_thr, "
@@ -253,18 +299,34 @@ main(int argc, char *argv[])
exit(2);
}
if ((rc = pthread_create(&write_thr, NULL, write_thread, &args))) {
fprintf(stderr, "error: pthreads_create, write_thr, "
"rc: %d\n", rc);
exit(2);
if (write_op) {
/*
* Writing using O_DIRECT while manipulating the buffer contents
* until the entire file is written.
*/
if ((rc = pthread_create(&io_thr, NULL, write_thread, &args))) {
fprintf(stderr, "error: pthreads_create, io_thr, "
"rc: %d\n", rc);
exit(2);
}
} else {
/*
* Reading using O_DIRECT while manipulating the buffer contents
* until the entire file is read.
*/
if ((rc = pthread_create(&io_thr, NULL, read_thread, &args))) {
fprintf(stderr, "error: pthreads_create, io_thr, "
"rc: %d\n", rc);
exit(2);
}
}
pthread_join(write_thr, NULL);
pthread_join(io_thr, NULL);
pthread_join(manipul_thr, NULL);
assert(args.entire_file_written == 1);
assert(args.entire_file_completed == 1);
(void) close(ofd);
(void) close(fd);
free(buf);
@@ -1477,6 +1477,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/direct/dio_overwrites.ksh \
functional/direct/dio_property.ksh \
functional/direct/dio_random.ksh \
functional/direct/dio_read_verify.ksh \
functional/direct/dio_recordsize.ksh \
functional/direct/dio_unaligned_block.ksh \
functional/direct/dio_unaligned_filesize.ksh \
@@ -30,28 +30,39 @@
# STRATEGY:
# 1. Run different zfs/zpool -j commands and check for valid JSON
#
# -j and --json mean the same thing. Each command will be run twice, replacing
# JSONFLAG with the flag under test.
list=(
"zpool status -j -g --json-int --json-flat-vdevs --json-pool-key-guid"
"zpool status -p -j -g --json-int --json-flat-vdevs --json-pool-key-guid"
"zpool status -j -c upath"
"zpool status -j"
"zpool status -j testpool1"
"zpool list -j"
"zpool list -j -g"
"zpool list -j -o fragmentation"
"zpool get -j size"
"zpool get -j all"
"zpool version -j"
"zfs list -j"
"zfs list -j testpool1"
"zfs get -j all"
"zfs get -j available"
"zfs mount -j"
"zfs version -j"
"zpool status JSONFLAG -g --json-int --json-flat-vdevs --json-pool-key-guid"
"zpool status -p JSONFLAG -g --json-int --json-flat-vdevs --json-pool-key-guid"
"zpool status JSONFLAG -c upath"
"zpool status JSONFLAG"
"zpool status JSONFLAG testpool1"
"zpool list JSONFLAG"
"zpool list JSONFLAG -g"
"zpool list JSONFLAG -o fragmentation"
"zpool get JSONFLAG size"
"zpool get JSONFLAG all"
"zpool version JSONFLAG"
"zfs list JSONFLAG"
"zfs list JSONFLAG testpool1"
"zfs get JSONFLAG all"
"zfs get JSONFLAG available"
"zfs mount JSONFLAG"
"zfs version JSONFLAG"
)
for cmd in "${list[@]}" ; do
log_must eval "$cmd | jq > /dev/null"
done
function run_json_tests
{
typeset flag=$1
for cmd in "${list[@]}" ; do
cmd=${cmd//JSONFLAG/$flag}
log_must eval "$cmd | jq > /dev/null"
done
}
log_must run_json_tests -j
log_must run_json_tests --json
log_pass "zpool and zfs commands outputted valid JSON"
@@ -113,7 +113,7 @@ wait
parallel_time=$SECONDS
log_note "asyncronously imported 4 pools in $parallel_time seconds"
log_must test $parallel_time -lt $(($sequential_time / 3))
log_must test $parallel_time -lt $(($sequential_time / 2))
#
# export pools with import delay injectors
@@ -132,6 +132,6 @@ log_must zpool import -a -d $DEVICE_DIR -f
parallel_time=$SECONDS
log_note "asyncronously imported 4 pools in $parallel_time seconds"
log_must test $parallel_time -lt $(($sequential_time / 3))
log_must test $parallel_time -lt $(($sequential_time / 2))
log_pass "Pool imports occur in parallel"
@@ -221,7 +221,7 @@ function ddt_dedup_vdev_limit
# For here, we just set the entry count a little higher than what we
# expect to allow for some instability.
#
log_must test $(ddt_entries) -le 600000
log_must test $(ddt_entries) -le 650000
do_clean
}
@@ -84,8 +84,9 @@ function get_zpool_status_chksum_verify_failures # pool_name vdev_type
function get_zed_dio_verify_events # pool
{
typeset pool=$1
typeset op=$2
val=$(zpool events $pool | grep -c dio_verify)
val=$(zpool events $pool | grep -c "dio_verify_${op}")
echo "$val"
}
@@ -96,11 +97,12 @@ function get_zed_dio_verify_events # pool
# zpool events
# After getting that counts will clear the out the ZPool errors and events
#
function check_dio_write_chksum_verify_failures # pool vdev_type expect_errors
function check_dio_chksum_verify_failures # pool vdev_type op expect_errors
{
typeset pool=$1
typeset vdev_type=$2
typeset expect_errors=$3
typeset op=$4
typeset note_str="expecting none"
if [[ $expect_errors -ne 0 ]]; then
@@ -108,10 +110,10 @@ function check_dio_write_chksum_verify_failures # pool vdev_type expect_errors
fi
log_note "Checking for Direct I/O write checksum verify errors \
$note_str on ZPool: $pool"
$note_str on ZPool: $pool with $vdev_type"
status_failures=$(get_zpool_status_chksum_verify_failures $pool $vdev_type)
zed_dio_verify_events=$(get_zed_dio_verify_events $pool)
zed_dio_verify_events=$(get_zed_dio_verify_events $pool $op)
if [[ $expect_errors -ne 0 ]]; then
if [[ $status_failures -eq 0 ||
@@ -0,0 +1,107 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2024 by Triad National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/direct/dio.cfg
. $STF_SUITE/tests/functional/direct/dio.kshlib
#
# DESCRIPTION:
# Verify checksum verify works for Direct I/O reads.
#
# STRATEGY:
# 1. Create a zpool from each vdev type.
# 2. Start a Direct I/O read workload while manipulating the user buffer
# contents.
# 3. Verify there are Direct I/O read verify failures using
# zpool status -d and checking for zevents. We also make sure there
# are reported no data errors.
#
verify_runnable "global"
log_assert "Verify checksum verify works for Direct I/O reads."
log_onexit dio_cleanup
NUMBLOCKS=300
BS=$((128 * 1024)) # 128k
log_must truncate -s $MINVDEVSIZE $DIO_VDEVS
# We will verify that there are no checksum errors for every Direct I/O read
# while manipulating the buffer contents while the I/O is still in flight and
# also that Direct I/O checksum verify failures and dio_verify_rd zevents are
# reported.
for type in "" "mirror" "raidz" "draid"; do
typeset vdev_type=$type
if [[ "${vdev_type}" == "" ]]; then
vdev_type="stripe"
fi
log_note "Verifying every Direct I/O read verify with VDEV type \
${vdev_type}"
create_pool $TESTPOOL1 $type $DIO_VDEVS
log_must eval "zfs create -o recordsize=128k -o compression=off \
$TESTPOOL1/$TESTFS1"
mntpnt=$(get_prop mountpoint $TESTPOOL1/$TESTFS1)
prev_dio_rd=$(get_iostats_stat $TESTPOOL1 direct_read_count)
prev_arc_rd=$(get_iostats_stat $TESTPOOL1 arc_read_count)
# Create the file before trying to manipulate the contents
log_must stride_dd -o "$mntpnt/direct-write.iso" -i /dev/urandom \
-b $BS -c $NUMBLOCKS -D
# Manipulate the buffer contents will reading the file with Direct I/O
log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \
-n $NUMBLOCKS -b $BS -r
# Getting new Direct I/O and ARC Write counts.
curr_dio_rd=$(get_iostats_stat $TESTPOOL1 direct_read_count)
curr_arc_rd=$(get_iostats_stat $TESTPOOL1 arc_read_count)
total_dio_rd=$((curr_dio_rd - prev_dio_rd))
total_arc_rd=$((curr_arc_rd - prev_arc_rd))
log_note "Making sure there are no checksum errors with the ZPool"
log_must check_pool_status $TESTPOOL "errors" "No known data errors"
log_note "Making sure we have Direct I/O and ARC reads logged"
if [[ $total_dio_rd -lt 1 ]]; then
log_fail "No Direct I/O reads $total_dio_rd"
fi
if [[ $total_arc_rd -lt 1 ]]; then
log_fail "No ARC reads $total_arc_rd"
fi
log_note "Making sure we have Direct I/O write checksum verifies with ZPool"
check_dio_chksum_verify_failures "$TESTPOOL1" "$vdev_type" 1 "rd"
destroy_pool $TESTPOOL1
done
log_pass "Verified checksum verify works for Direct I/O reads."
@@ -46,7 +46,7 @@ verify_runnable "global"
function cleanup
{
log_must rm -f "$mntpnt/direct-write.iso"
check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0
check_dio_chksum_verify_failures $TESTPOOL "raidz" 0 "wr"
}
log_assert "Verify stable pages work for Direct I/O writes."
@@ -76,8 +76,8 @@ do
# Manipulate the user's buffer while running O_DIRECT write
# workload with the buffer.
log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \
-n $NUMBLOCKS -b $BS
log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \
-n $NUMBLOCKS -b $BS -w
# Reading back the contents of the file
log_must stride_dd -i $mntpnt/direct-write.iso -o /dev/null \
@@ -91,8 +91,8 @@ log_must set_tunable32 VDEV_DIRECT_WR_VERIFY 0
log_note "Verifying no panics for Direct I/O writes with compression"
log_must zfs set compression=on $TESTPOOL/$TESTFS
prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count)
log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" -n $NUMBLOCKS \
-b $BS
log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" -n $NUMBLOCKS \
-b $BS -w
curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count)
total_dio_wr=$((curr_dio_wr - prev_dio_wr))
@@ -116,8 +116,8 @@ for i in $(seq 1 $ITERATIONS); do
$i of $ITERATIONS with zfs_vdev_direct_write_verify=0"
prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count)
log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \
-n $NUMBLOCKS -b $BS
log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \
-n $NUMBLOCKS -b $BS -w
# Reading file back to verify checksum errors
filesize=$(get_file_size "$mntpnt/direct-write.iso")
@@ -144,7 +144,7 @@ for i in $(seq 1 $ITERATIONS); do
fi
log_note "Making sure we have no Direct I/O write checksum verifies \
with ZPool"
check_dio_write_chksum_verify_failures $TESTPOOL "raidz" 0
check_dio_chksum_verify_failures $TESTPOOL "raidz" 0 "wr"
log_must rm -f "$mntpnt/direct-write.iso"
done
@@ -166,8 +166,8 @@ for i in $(seq 1 $ITERATIONS); do
$ITERATIONS with zfs_vdev_direct_write_verify=1"
prev_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count)
log_must manipulate_user_buffer -o "$mntpnt/direct-write.iso" \
-n $NUMBLOCKS -b $BS -e
log_must manipulate_user_buffer -f "$mntpnt/direct-write.iso" \
-n $NUMBLOCKS -b $BS -e -w
# Reading file back to verify there no are checksum errors
filesize=$(get_file_size "$mntpnt/direct-write.iso")
@@ -175,7 +175,7 @@ for i in $(seq 1 $ITERATIONS); do
log_must stride_dd -i "$mntpnt/direct-write.iso" -o /dev/null -b $BS \
-c $num_blocks
# Getting new Direct I/O and ARC Write counts.
# Getting new Direct I/O write counts.
curr_dio_wr=$(get_iostats_stat $TESTPOOL direct_write_count)
total_dio_wr=$((curr_dio_wr - prev_dio_wr))
@@ -188,7 +188,7 @@ for i in $(seq 1 $ITERATIONS); do
fi
log_note "Making sure we have Direct I/O write checksum verifies with ZPool"
check_dio_write_chksum_verify_failures "$TESTPOOL" "raidz" 1
check_dio_chksum_verify_failures "$TESTPOOL" "raidz" 1 "wr"
done
log_must rm -f "$mntpnt/direct-write.iso"
@@ -48,6 +48,8 @@ function cleanup
log_must set_tunable32 RESILVER_MIN_TIME_MS $ORIG_RESILVER_MIN_TIME
log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
$ORIG_SCAN_SUSPEND_PROGRESS
log_must set_tunable32 RESILVER_DEFER_PERCENT \
$ORIG_RESILVER_DEFER_PERCENT
log_must set_tunable32 ZEVENT_LEN_MAX $ORIG_ZFS_ZEVENT_LEN_MAX
log_must zinject -c all
destroy_pool $TESTPOOL1
@@ -90,6 +92,7 @@ log_assert "Check for unnecessary resilver restarts"
ORIG_RESILVER_MIN_TIME=$(get_tunable RESILVER_MIN_TIME_MS)
ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
ORIG_RESILVER_DEFER_PERCENT=$(get_tunable RESILVER_DEFER_PERCENT)
ORIG_ZFS_ZEVENT_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
set -A RESTARTS -- '1' '2' '2' '2'
@@ -117,5 +117,18 @@ log_must zfs set snapdev=visible $TESTPOOL
verify_inherited 'snapdev' 'hidden' $SUBZVOL $VOLFS
blockdev_missing $SUBSNAPDEV
blockdev_exists $SNAPDEV
log_must zfs destroy $SNAP
# 4. Verify "rename" is correctly reflected when "snapdev=visible"
# 4.1 First create a snapshot and verify the device is present
log_must zfs snapshot $SNAP
log_must zfs set snapdev=visible $ZVOL
blockdev_exists $SNAPDEV
# 4.2 rename the snapshot and verify the devices are updated
log_must zfs rename $SNAP $SNAP-new
blockdev_missing $SNAPDEV
blockdev_exists $SNAPDEV-new
# 4.3 cleanup
log_must zfs destroy $SNAP-new
log_pass "ZFS volume property 'snapdev' works as expected"
+15 -3
View File
@@ -124,6 +124,7 @@ TOPOLOGY=
BAY=
ENCL_ID=""
UNIQ_ENCL_ID=""
ZPAD=1
usage() {
cat << EOF
@@ -154,7 +155,7 @@ map_slot() {
if [ -z "$MAPPED_SLOT" ] ; then
MAPPED_SLOT=$LINUX_SLOT
fi
printf "%d" "${MAPPED_SLOT}"
printf "%0${ZPAD}d" "${MAPPED_SLOT}"
}
map_channel() {
@@ -430,6 +431,15 @@ sas_handler() {
d=$(eval echo '$'{$i})
SLOT=$(echo "$d" | sed -e 's/^.*://')
;;
"bay_lun")
# Like 'bay' but with the LUN number appened. Added for SAS
# multi-actuator HDDs, where one physical drive has multiple
# LUNs, thus multiple logical drives share the same bay number
i=$((i + 2))
d=$(eval echo '$'{$i})
LUN="-lun$(echo "$d" | sed -e 's/^.*://')"
SLOT=$(cat "$end_device_dir/bay_identifier" 2>/dev/null)
;;
"ses")
# look for this SAS path in all SCSI Enclosure Services
# (SES) enclosures
@@ -460,7 +470,7 @@ sas_handler() {
if [ -z "$CHAN" ] ; then
return
fi
echo "${CHAN}"-"${JBOD}"-"${SLOT}${PART}"
echo "${CHAN}"-"${JBOD}"-"${SLOT}${LUN}${PART}"
else
CHAN=$(map_channel "$PCI_ID" "$PORT")
SLOT=$(map_slot "$SLOT" "$CHAN")
@@ -468,7 +478,7 @@ sas_handler() {
if [ -z "$CHAN" ] ; then
return
fi
echo "${CHAN}${SLOT}${PART}"
echo "${CHAN}${SLOT}${LUN}${PART}"
fi
}
@@ -748,6 +758,8 @@ if [ -z "$BAY" ] ; then
BAY=$(awk '($1 == "slot") {print $2; exit}' "$CONFIG")
fi
ZPAD=$(awk '($1 == "zpad_slot") {print $2; exit}' "$CONFIG")
TOPOLOGY=${TOPOLOGY:-sas_direct}
# Should we create /dev/by-enclosure symlinks?
+3 -12
View File
@@ -687,15 +687,6 @@
/* All required iov_iter interfaces are available */
/* #undef HAVE_VFS_IOV_ITER */
/* fops->iterate() is available */
/* #undef HAVE_VFS_ITERATE */
/* fops->iterate_shared() is available */
/* #undef HAVE_VFS_ITERATE_SHARED */
/* fops->readdir() is available */
/* #undef HAVE_VFS_READDIR */
/* address_space_operations->readpages exists */
/* #undef HAVE_VFS_READPAGES */
@@ -801,7 +792,7 @@
/* #undef ZFS_DEVICE_MINOR */
/* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.3.0-rc1-FreeBSD_g3a9fca901"
#define ZFS_META_ALIAS "zfs-2.3.99-31-FreeBSD_gb2f6de7b5"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@@ -831,10 +822,10 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
#define ZFS_META_RELEASE "rc1-FreeBSD_g3a9fca901"
#define ZFS_META_RELEASE "31-FreeBSD_gb2f6de7b5"
/* Define the project version. */
#define ZFS_META_VERSION "2.3.0"
#define ZFS_META_VERSION "2.3.99"
/* count is located in percpu_ref.data */
/* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */
+1 -1
View File
@@ -1 +1 @@
#define ZFS_META_GITREV "zfs-2.3.0-rc1-0-g3a9fca901"
#define ZFS_META_GITREV "zfs-2.3.99-31-gb2f6de7b5"