zfs: merge openzfs/zfs@a170134fe
Notable upstream pull request merges: #18372eaaea55b6Consistently encode DRR_BEGIN packed nvlist payloads with NV_ENCODE_XDR #18410891e379d0Fix failfast default and usage #18470a2d053329zdb: Add some more file layout output, triggered by -v #18472d50f5b6d0dsl_dir: avoid dd_lock during snapshots_changed updates #18493d65015938Vdev allocation bias/class change #184978fdc86675zfs: annotate nested dd_lock in reservation sync accounting #18494956deba27zdb: detect BRT and DDT leaks during block traversal #18499c7cfe0805zarcstat: detect attached L2ARC device with no data #18503439b802e7sa: fix sa_add_projid lock ordering #18508968f4db03zpool-attach.8: add EXAMPLES section #1851345dddc452zfs.4: Fix documentation of zfs_arc_dnode_reduce_percent #185168ff64005azap: split implementation out into more files #18520181e1b522Fix double free for blocks cloned after DDT prune #18535 -multiple zstream: fix crashes when refcount tracking enabled #18536 -multiple refcount tag fixups #18541a65ed7afdzpool/zfs: accept --help and -? after a subcommand #185446fb72fda0zio_ddt_write: compute have_dvas after taking dde_io_lock #18546 -multiple zap: internal locking uplift #1855040a87651dzap_impl: use flex array field for mzap_phys_t.mz_chunks #18551 -multiple zap: make the _by_dnode() op variants be the primary implementation #18570112b0131bzpl_xattr: stop heap-allocating prefixed xattr names #185784bc8c39b6zed: Prefer dRAID distributed spares to regular ones #18596e30ab5fa4FreeBSD: Make it possible to build openzfs.ko with sanitizers #18597472ddca11zed: Prefer spares with matching rotational and size #18599c90dc2808enforce exact decompressed length for lz4, gzip, and zstd #18603 -multiple zap: add zap_cursor_init_by_dnode; cursor unit tests; mock dnode refcounts #1860459dc88602nvpair: Check for un-terminated strings in packed nvlist #18606ef6f26145When reading a vdev label skip libzfs_core_init() #186130aa4088dcsharenfs: Check for invalid characters #1861580fb85b80Fix the integer type in zfs_ioc_userspace_many() #18616e199f6d98Fix uninitialized variable warning in vdev_prop_get() #186177de42602cExtend dataset zfs_ioc_set_prop() secpolicy #186225fea0c838Parallelize metaslab_sync_done() calls #18623cab50d5adAdd additional verification of size fields and strings #18630 -multiple zap: misc function removal / uplift / tests #18633a8ef128daFix uninitialized variable warning in zil_parse() Obtained from: OpenZFS OpenZFS commit:a170134feb
This commit is contained in:
@@ -163,6 +163,8 @@ KERNEL_C = \
|
||||
vdev_root.c \
|
||||
vdev_trim.c \
|
||||
zap.c \
|
||||
zap_fat.c \
|
||||
zap_impl.c \
|
||||
zap_leaf.c \
|
||||
zap_micro.c \
|
||||
zcp.c \
|
||||
|
||||
@@ -346,6 +346,8 @@ contrib/openzfs/module/zfs/vdev_removal.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/vdev_root.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/vdev_trim.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/zap.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/zap_fat.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/zap_impl.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/zap_leaf.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/zap_micro.c optional zfs compile-with "${ZFS_C}"
|
||||
contrib/openzfs/module/zfs/zcp.c optional zfs compile-with "${ZFS_C}"
|
||||
|
||||
+82
-47
@@ -1,61 +1,96 @@
|
||||
|
||||
## The testings are done this way
|
||||
## CI overview
|
||||
|
||||
The main test pipeline is `zfs-qemu.yml`. Code checking and other
|
||||
workflows run independently alongside it.
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph CleanUp and Summary
|
||||
CleanUp+Summary
|
||||
subgraph Functional testing
|
||||
Setup[test-config: pick ci_type + OS matrix]
|
||||
Setup --> almalinux
|
||||
Setup --> centos[centos-stream]
|
||||
Setup --> debian
|
||||
Setup --> fedora
|
||||
Setup --> ubuntu
|
||||
Setup --> freebsd
|
||||
almalinux --> Cleanup[cleanup + summary]
|
||||
centos --> Cleanup
|
||||
debian --> Cleanup
|
||||
fedora --> Cleanup
|
||||
ubuntu --> Cleanup
|
||||
freebsd --> Cleanup
|
||||
end
|
||||
|
||||
subgraph Functional Testings
|
||||
sanity-checks-20.04
|
||||
zloop-checks-20.04
|
||||
functional-testing-20.04-->Part1-20.04
|
||||
functional-testing-20.04-->Part2-20.04
|
||||
functional-testing-20.04-->Part3-20.04
|
||||
functional-testing-20.04-->Part4-20.04
|
||||
functional-testing-22.04-->Part1-22.04
|
||||
functional-testing-22.04-->Part2-22.04
|
||||
functional-testing-22.04-->Part3-22.04
|
||||
functional-testing-22.04-->Part4-22.04
|
||||
sanity-checks-22.04
|
||||
zloop-checks-22.04
|
||||
end
|
||||
|
||||
subgraph Code Checking + Building
|
||||
Build-Ubuntu-20.04
|
||||
subgraph Code checking
|
||||
checkstyle.yaml
|
||||
codeql.yml
|
||||
checkstyle.yml
|
||||
Build-Ubuntu-22.04
|
||||
smatch.yml
|
||||
end
|
||||
|
||||
Build-Ubuntu-20.04-->sanity-checks-20.04
|
||||
Build-Ubuntu-20.04-->zloop-checks-20.04
|
||||
Build-Ubuntu-20.04-->functional-testing-20.04
|
||||
Build-Ubuntu-22.04-->sanity-checks-22.04
|
||||
Build-Ubuntu-22.04-->zloop-checks-22.04
|
||||
Build-Ubuntu-22.04-->functional-testing-22.04
|
||||
|
||||
sanity-checks-20.04-->CleanUp+Summary
|
||||
Part1-20.04-->CleanUp+Summary
|
||||
Part2-20.04-->CleanUp+Summary
|
||||
Part3-20.04-->CleanUp+Summary
|
||||
Part4-20.04-->CleanUp+Summary
|
||||
Part1-22.04-->CleanUp+Summary
|
||||
Part2-22.04-->CleanUp+Summary
|
||||
Part3-22.04-->CleanUp+Summary
|
||||
Part4-22.04-->CleanUp+Summary
|
||||
sanity-checks-22.04-->CleanUp+Summary
|
||||
subgraph Other workflows
|
||||
zfs-arm.yml
|
||||
zloop.yml
|
||||
labels.yml
|
||||
end
|
||||
```
|
||||
|
||||
Every `qemu-vm` matrix entry runs on a fixed `ubuntu-24.04` host.
|
||||
The steps inside one entry are:
|
||||
|
||||
1) build zfs modules for Ubuntu 20.04 and 22.04 (~15m)
|
||||
2) 2x zloop test (~10m) + 2x sanity test (~25m)
|
||||
3) 4x functional testings in parts 1..4 (each ~1h)
|
||||
4) cleanup and create summary
|
||||
- content of summary depends on the results of the steps
|
||||
1) set up QEMU and boot the guest (~2-4m)
|
||||
2) install build dependencies in the guest (~2-4m)
|
||||
3) build zfs modules in the guest (~8-12m)
|
||||
4) run functional tests (~2-4h)
|
||||
5) package and upload per-OS test logs (~10s)
|
||||
|
||||
When everything runs fine, the full run should be done in
|
||||
about 2 hours.
|
||||
A per-OS entry takes about 3 to 4 hours. Once all entries finish, the
|
||||
`cleanup` job aggregates the results into a summary.
|
||||
|
||||
The codeql.yml and checkstyle.yml are not part in this circle.
|
||||
### `ci_type` selection
|
||||
|
||||
`test-config` runs `.github/workflows/scripts/generate-ci-type.py` against
|
||||
the PR's changed files and picks one of:
|
||||
|
||||
| `ci_type` | OS matrix |
|
||||
|-----------|--------------------------------------------|
|
||||
| `docs` | empty (documentation-only PRs) |
|
||||
| `quick` | 6 Linux + 1 FreeBSD |
|
||||
| `linux` | all supported Linux distros |
|
||||
| `freebsd` | all supported FreeBSD versions |
|
||||
| default | cross-platform sample |
|
||||
|
||||
Pushes to `openzfs/zfs` skip the matrix entirely; only PRs (and pushes to
|
||||
forks) build.
|
||||
|
||||
Authors can force a specific ci_type by adding `ZFS-CI-Type: <type>` to
|
||||
the most recent commit message. The `ZTS_OS_OVERRIDE` repository variable
|
||||
can also alter the selection. The `workflow_dispatch` trigger accepts
|
||||
`fedora_kernel_ver` (Fedora-only run with a chosen kernel) and
|
||||
`specific_os` (pin the matrix to one OS).
|
||||
|
||||
### Supported guests
|
||||
|
||||
Auto-selected:
|
||||
|
||||
- Linux: almalinux 8/9/10, centos-stream 9/10, debian 11/12/13,
|
||||
fedora 43/44, ubuntu 22/24/26
|
||||
- FreeBSD: 14.4-RELEASE/STABLE, 15.0-RELEASE, 15.1-STABLE, 16.0-CURRENT
|
||||
|
||||
Available via `specific_os` or `ZTS_OS_OVERRIDE`:
|
||||
|
||||
- archlinux, tumbleweed
|
||||
|
||||
### Code checking
|
||||
|
||||
- `checkstyle.yaml`: source-style checks
|
||||
- `codeql.yml`: CodeQL analysis
|
||||
- `smatch.yml`: smatch analysis
|
||||
|
||||
### Other workflows
|
||||
|
||||
- `zfs-arm.yml`: ARM build on `ubuntu-24.04-arm`
|
||||
- `zloop.yml`: host-side zloop
|
||||
- `labels.yml`: maintains PR status labels
|
||||
- `zfs-qemu-packages.yml`: manually dispatched, builds release RPMs or
|
||||
tests RPM installation from the ZFS yum repo
|
||||
|
||||
+1
-1
@@ -12,7 +12,7 @@ jobs:
|
||||
checkstyle:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
- name: Install dependencies
|
||||
|
||||
+4
-4
@@ -11,7 +11,7 @@ concurrency:
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
@@ -31,15 +31,15 @@ jobs:
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
uses: github/codeql-action/init@v4
|
||||
with:
|
||||
config-file: .github/codeql-${{ matrix.language }}.yml
|
||||
languages: ${{ matrix.language }}
|
||||
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
uses: github/codeql-action/autobuild@v4
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
||||
uses: github/codeql-action/analyze@v4
|
||||
with:
|
||||
category: "/language:${{matrix.language}}"
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
Output format: "<type> <source>" where source is "manual" (from
|
||||
ZFS-CI-Type commit tag) or "auto" (from file change heuristics).
|
||||
|
||||
Prints "docs auto" if every changed file is documentation; the qemu
|
||||
matrix is skipped in that case.
|
||||
|
||||
Prints "quick manual" if:
|
||||
- the *last* commit message contains 'ZFS-CI-Type: quick'
|
||||
or "quick auto" if (heuristics):
|
||||
@@ -28,10 +31,24 @@
|
||||
r'.*\.gitignore'
|
||||
]))
|
||||
|
||||
"""
|
||||
Patterns of files that are documentation only.
|
||||
"""
|
||||
DOCS_ONLY_REGEX = list(map(re.compile, [
|
||||
r'man/.*',
|
||||
r'.*\.md',
|
||||
r'AUTHORS',
|
||||
r'COPYRIGHT',
|
||||
r'LICENSE',
|
||||
r'NOTICE',
|
||||
r'\.gitignore',
|
||||
]))
|
||||
|
||||
"""
|
||||
Patterns of files that are considered to trigger full CI.
|
||||
"""
|
||||
FULL_RUN_REGEX = list(map(re.compile, [
|
||||
r'\.github/workflows/.*\.ya?ml',
|
||||
r'\.github/workflows/scripts/.*',
|
||||
r'cmd.*',
|
||||
r'configs/.*',
|
||||
@@ -116,6 +133,12 @@ def output_type(type, source, reason):
|
||||
f'changed file "{f}" matches pattern "{r.pattern}"'
|
||||
)
|
||||
|
||||
if changed_files and all(
|
||||
any(r.match(f) for r in DOCS_ONLY_REGEX)
|
||||
for f in changed_files):
|
||||
output_type('docs', 'auto',
|
||||
'all changed files are documentation')
|
||||
|
||||
# catch-all
|
||||
output_type('quick', 'auto',
|
||||
'no changed file matches full CI patterns')
|
||||
|
||||
@@ -17,6 +17,8 @@ sudo docker builder prune -a
|
||||
unneeded="microsoft-edge-stable|azure-cli|google-cloud|google-chrome-stable|"\
|
||||
"temurin|llvm|firefox|mysql-server|snapd|android|dotnet|haskell|ghcup|"\
|
||||
"powershell|julia|swift|miniconda|chromium"
|
||||
# refresh package index before removing packages
|
||||
sudo apt-get -y update
|
||||
sudo apt-get -y remove $(dpkg-query -f '${binary:Package}\n' -W | grep -E "'$unneeded'")
|
||||
sudo apt-get -y autoremove
|
||||
|
||||
|
||||
+29
-24
@@ -28,6 +28,7 @@ NIC="virtio"
|
||||
# additional options for virt-install
|
||||
OPTS[0]=""
|
||||
OPTS[1]=""
|
||||
ALT_URL=""
|
||||
|
||||
case "$OS" in
|
||||
almalinux8)
|
||||
@@ -56,11 +57,22 @@ case "$OS" in
|
||||
centos-stream9)
|
||||
OSNAME="CentOS Stream 9"
|
||||
URL="https://cloud.centos.org/centos/9-stream/x86_64/images/CentOS-Stream-GenericCloud-9-latest.x86_64.qcow2"
|
||||
|
||||
# Sometimes we get HTTP errors for the first link. Fall back to the
|
||||
# "Composes" repo as an alternative. The "Composes" repo includes
|
||||
# autogenerated nightly CentOS Stream images. We have to lookup the URL
|
||||
# dynamically since the qcow2 file name has the date in it.
|
||||
ALT_URL=$(wget --accept "CentOS-Stream-GenericCloud-9-*.x86_64.qcow2" --spider -np --recursive --no-verbose \
|
||||
https://composes.stream.centos.org/stream-9/production/latest-CentOS-Stream/compose/BaseOS/x86_64/images/ 2>&1 | \
|
||||
awk '/200 OK/{print $(NF-2)}')
|
||||
;;
|
||||
centos-stream10)
|
||||
OSNAME="CentOS Stream 10"
|
||||
OSv="centos-stream9"
|
||||
URL="https://cloud.centos.org/centos/10-stream/x86_64/images/CentOS-Stream-GenericCloud-10-latest.x86_64.qcow2"
|
||||
ALT_URL=$(wget --accept "CentOS-Stream-GenericCloud-10-*.x86_64.qcow2" --spider -np --recursive --no-verbose \
|
||||
https://composes.stream.centos.org/stream-10/production/latest-CentOS-Stream/compose/BaseOS/x86_64/images/ 2>&1 | \
|
||||
awk '/200 OK/{print $(NF-2)}')
|
||||
;;
|
||||
debian11)
|
||||
OSNAME="Debian 11"
|
||||
@@ -78,11 +90,6 @@ case "$OS" in
|
||||
OPTS[0]="--boot"
|
||||
OPTS[1]="uefi=on"
|
||||
;;
|
||||
fedora42)
|
||||
OSNAME="Fedora 42"
|
||||
OSv="fedora-unknown"
|
||||
URL="https://download.fedoraproject.org/pub/fedora/linux/releases/42/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-42-1.1.x86_64.qcow2"
|
||||
;;
|
||||
fedora43)
|
||||
OSNAME="Fedora 43"
|
||||
OSv="fedora-unknown"
|
||||
@@ -93,14 +100,6 @@ case "$OS" in
|
||||
OSv="fedora-unknown"
|
||||
URL="https://download.fedoraproject.org/pub/fedora/linux/releases/44/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-44-1.7.x86_64.qcow2"
|
||||
;;
|
||||
freebsd13-5r)
|
||||
FreeBSD="13.5-RELEASE"
|
||||
OSNAME="FreeBSD $FreeBSD"
|
||||
OSv="freebsd13.0"
|
||||
URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz"
|
||||
KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz"
|
||||
NIC="rtl8139"
|
||||
;;
|
||||
freebsd14-4r)
|
||||
FreeBSD="14.4-RELEASE"
|
||||
OSNAME="FreeBSD $FreeBSD"
|
||||
@@ -111,18 +110,10 @@ case "$OS" in
|
||||
freebsd15-0r)
|
||||
FreeBSD="15.0-RELEASE"
|
||||
OSNAME="FreeBSD $FreeBSD"
|
||||
OSv="freebsd15.0"
|
||||
OSv="freebsd14.0"
|
||||
URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz"
|
||||
KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz"
|
||||
;;
|
||||
freebsd13-5s)
|
||||
FreeBSD="13.5-STABLE"
|
||||
OSNAME="FreeBSD $FreeBSD"
|
||||
OSv="freebsd13.0"
|
||||
URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz"
|
||||
KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz"
|
||||
NIC="rtl8139"
|
||||
;;
|
||||
freebsd14-4s)
|
||||
FreeBSD="14.4-STABLE"
|
||||
OSNAME="FreeBSD $FreeBSD"
|
||||
@@ -131,7 +122,7 @@ case "$OS" in
|
||||
KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz"
|
||||
;;
|
||||
freebsd15-1s)
|
||||
FreeBSD="15.1-PRERELEASE"
|
||||
FreeBSD="15.1-STABLE"
|
||||
OSNAME="FreeBSD $FreeBSD"
|
||||
OSv="freebsd14.0"
|
||||
URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz"
|
||||
@@ -160,6 +151,11 @@ case "$OS" in
|
||||
OSv="ubuntu24.04"
|
||||
URL="$UBMIRROR/noble/current/noble-server-cloudimg-amd64.img"
|
||||
;;
|
||||
ubuntu26)
|
||||
OSNAME="Ubuntu 26.04"
|
||||
OSv="ubuntu24.04"
|
||||
URL="$UBMIRROR/resolute/current/resolute-server-cloudimg-amd64.img"
|
||||
;;
|
||||
*)
|
||||
echo "Wrong value for OS variable!"
|
||||
exit 111
|
||||
@@ -173,7 +169,6 @@ echo "ENV=$ENV" >> $ENV
|
||||
# result path
|
||||
echo 'RESPATH="/var/tmp/test_results"' >> $ENV
|
||||
|
||||
# FreeBSD 13 has problems with: e1000 and virtio
|
||||
echo "NIC=$NIC" >> $ENV
|
||||
|
||||
# freebsd15 -> used in zfs-qemu.yml
|
||||
@@ -221,6 +216,16 @@ for cmd in 'axel -q -o' 'curl --fail -LSs -o' ; do
|
||||
if [ -s "$IMG" ] ; then
|
||||
# Successful download
|
||||
break
|
||||
else
|
||||
if [ -n "$ALT_URL" ] ; then
|
||||
# Try the $ALT_URL if specified
|
||||
echo "Loading alternative $ALT_URL with $cmd..."
|
||||
time eval "$cmd $IMG $ALT_URL"
|
||||
if [ -s "$IMG" ]; then
|
||||
# Successful ALT_URL download
|
||||
break
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
@@ -215,7 +215,7 @@ case "$1" in
|
||||
tumbleweed)
|
||||
tumbleweed
|
||||
;;
|
||||
ubuntu*)
|
||||
ubuntu22|ubuntu24)
|
||||
debian
|
||||
echo "##[group]Install Ubuntu specific"
|
||||
sudo apt-get install -yq linux-tools-common libtirpc-dev \
|
||||
@@ -226,6 +226,27 @@ case "$1" in
|
||||
# https://github.com/actions/runner-images/issues/9946
|
||||
sudo apt-get install -yq build-essential
|
||||
|
||||
echo "##[endgroup]"
|
||||
echo "##[group]Delete Ubuntu OpenZFS modules"
|
||||
for i in $(find /lib/modules -name zfs -type d); do sudo rm -rvf $i; done
|
||||
echo "##[endgroup]"
|
||||
;;
|
||||
ubuntu26)
|
||||
debian
|
||||
echo "##[group]Install Ubuntu specific"
|
||||
# Skip linux-modules-extra which is already installed
|
||||
sudo apt-get install -yq linux-tools-common
|
||||
sudo apt-get install -yq libtirpc-dev
|
||||
sudo apt-get install -yq dh-sequence-dkms
|
||||
|
||||
# Need 'build-essential' explicitly for ARM builder
|
||||
# https://github.com/actions/runner-images/issues/9946
|
||||
sudo apt-get install -yq build-essential
|
||||
|
||||
# Replace sudo-rs with sudo for now because the Rust version
|
||||
# does not support -E to preserve the entire environment
|
||||
sudo update-alternatives --set sudo /usr/bin/sudo.ws
|
||||
|
||||
echo "##[endgroup]"
|
||||
echo "##[group]Delete Ubuntu OpenZFS modules"
|
||||
for i in $(find /lib/modules -name zfs -type d); do sudo rm -rvf $i; done
|
||||
@@ -267,8 +288,19 @@ case "$1" in
|
||||
;;
|
||||
debian*|ubuntu*)
|
||||
sudo -E systemctl enable nfs-kernel-server
|
||||
sudo -E systemctl enable qemu-guest-agent
|
||||
sudo -E systemctl enable smbd
|
||||
|
||||
# enable usershares (disabled by default on ubuntu 26.04)
|
||||
sudo -E sed -i '/usershare max shares/s/^#//' /etc/samba/smb.conf
|
||||
|
||||
# add systemd drop-in to allow the service to be enabled
|
||||
sudo -E mkdir -p /etc/systemd/system/qemu-guest-agent.service.d/
|
||||
sudo -E tee /etc/systemd/system/qemu-guest-agent.service.d/override.conf <<EOF
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
sudo -E systemctl daemon-reload
|
||||
sudo -E systemctl enable qemu-guest-agent
|
||||
;;
|
||||
*)
|
||||
# All other linux distros
|
||||
@@ -292,7 +324,7 @@ case "$1" in
|
||||
echo 'GRUB_SERIAL_COMMAND="serial --speed=115200"' \
|
||||
| sudo tee -a /etc/default/grub >/dev/null
|
||||
;;
|
||||
ubuntu24)
|
||||
ubuntu24|ubuntu26)
|
||||
GRUB_CFG="/boot/grub/grub.cfg"
|
||||
GRUB_MKCONFIG="grub-mkconfig"
|
||||
echo 'GRUB_DISABLE_OS_PROBER="false"' \
|
||||
|
||||
@@ -2,9 +2,12 @@
|
||||
# 3) Wait for VM to boot from previous step and launch dependencies
|
||||
# script on it.
|
||||
#
|
||||
# $1: OS name (like 'fedora41')
|
||||
# $2: (optional) Experimental kernel version to install on fedora,
|
||||
# like "6.14".
|
||||
# qemu-3-deps.sh [--poweroff] OS_NAME [FEDORA_VERSION]
|
||||
#
|
||||
# --poweroff: Power off the VM after installing dependencies
|
||||
# OS_NAME: OS name (like 'fedora41')
|
||||
# FEDORA_VERSION: (optional) Experimental Fedora kernel version, like "6.14" to
|
||||
# install instead of Fedora defaults.
|
||||
######################################################################
|
||||
|
||||
.github/workflows/scripts/qemu-wait-for-vm.sh vm0
|
||||
@@ -15,8 +18,13 @@
|
||||
# we need to update the kernel version in zfs's META file to allow the
|
||||
# build to happen. We update our local copy of META here, since we know
|
||||
# it will be rsync'd up in the next step.
|
||||
if [ -n "${2:-}" ] ; then
|
||||
sed -i -E 's/Linux-Maximum: .+/Linux-Maximum: 99.99/g' META
|
||||
#
|
||||
# Look to see if the last argument looks like a kernel version.
|
||||
ver="${@: -1}"
|
||||
if [[ $ver =~ ^[0-9]+\.[0-9]+ ]] ; then
|
||||
# We got a kernel version, update META to say we support it so we
|
||||
# can test against it.
|
||||
sed -i -E 's/Linux-Maximum: .+/Linux-Maximum: '$ver'/g' META
|
||||
fi
|
||||
|
||||
scp .github/workflows/scripts/qemu-3-deps-vm.sh zfs@vm0:qemu-3-deps-vm.sh
|
||||
|
||||
@@ -5,10 +5,12 @@
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# qemu-4-build-vm.sh OS [--enable-debug][--dkms][--patch-level NUM]
|
||||
# [--poweroff][--release][--repo][--tarball]
|
||||
# qemu-4-build-vm.sh OS [--custom-branch BRANCH][--enable-debug][--dkms]
|
||||
# [--patch-level NUM][--poweroff][--release][--repo][--tarball]
|
||||
#
|
||||
# OS: OS name like 'fedora41'
|
||||
# --custom-branch: When building packages, checkout this version of ZFS to
|
||||
# build, but use the current CI scripts to do it.
|
||||
# --enable-debug: Build RPMs with '--enable-debug' (for testing)
|
||||
# --dkms: Build DKMS RPMs as well
|
||||
# --patch-level NUM: Use a custom patch level number for packages.
|
||||
@@ -27,8 +29,27 @@ POWEROFF=""
|
||||
RELEASE=""
|
||||
REPO=""
|
||||
TARBALL=""
|
||||
CUSTOM_BRANCH=""
|
||||
PREV_BRANCH=""
|
||||
|
||||
cleanup() {
|
||||
if [ -n "$PREV_BRANCH" ] ; then
|
||||
git checkout $PREV_BRANCH
|
||||
fi
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--custom-branch)
|
||||
CUSTOM_BRANCH="$2"
|
||||
# If the user specifies a custom tag/branch to build, and the build
|
||||
# fails, we want to make sure our workflow scripts are restored to the
|
||||
# current (more modern) versions so the subsequent CI steps use those.
|
||||
shift
|
||||
shift
|
||||
PREV_BRANCH=$(git branch --show-current)
|
||||
trap 'cleanup' ERR
|
||||
;;
|
||||
--enable-debug)
|
||||
ENABLE_DEBUG=1
|
||||
shift
|
||||
@@ -337,7 +358,7 @@ fi
|
||||
#
|
||||
# rhel8.10
|
||||
# almalinux9.5
|
||||
# fedora42
|
||||
# fedora44
|
||||
source /etc/os-release
|
||||
if which hostnamectl &> /dev/null ; then
|
||||
# Fedora 42+ use hostnamectl
|
||||
@@ -367,6 +388,11 @@ if [ -n "$ENABLE_DEBUG" ] ; then
|
||||
extra="--enable-debug"
|
||||
fi
|
||||
|
||||
if [ -n "$CUSTOM_BRANCH" ] ; then
|
||||
git fetch --unshallow
|
||||
git checkout $CUSTOM_BRANCH
|
||||
fi
|
||||
|
||||
# build
|
||||
case "$OS" in
|
||||
freebsd*)
|
||||
@@ -393,6 +419,8 @@ case "$OS" in
|
||||
;;
|
||||
esac
|
||||
|
||||
git checkout $PREV_BRANCH
|
||||
PREV_BRANCH=""
|
||||
|
||||
# building the zfs module was ok
|
||||
echo 0 > /var/tmp/build-exitcode.txt
|
||||
|
||||
@@ -25,8 +25,14 @@ cd lustre-release
|
||||
|
||||
# Include Lustre patches to build against master/zfs-2.4.x. Once these
|
||||
# patches are merged we can remove these lines.
|
||||
#
|
||||
# LU-19539 osd-zfs: use osd_dmu_write() wrapper for xattrs
|
||||
# LU-19761 osd-zfs: Build against ZFS 2.4.0
|
||||
# LU-19249 build: Compatibility updates for kernel v6.16
|
||||
#
|
||||
patches=('https://review.whamcloud.com/changes/fs%2Flustre-release~62101/revisions/2/patch?download'
|
||||
'https://review.whamcloud.com/changes/fs%2Flustre-release~63267/revisions/9/patch?download')
|
||||
'https://review.whamcloud.com/changes/fs%2Flustre-release~63267/revisions/9/patch?download'
|
||||
'https://review.whamcloud.com/changes/fs%2Flustre-release~60619/revisions/13/patch?download')
|
||||
|
||||
for p in "${patches[@]}" ; do
|
||||
curl $p | base64 -d > patch
|
||||
|
||||
@@ -79,6 +79,7 @@ function do_builtin_build() {
|
||||
|
||||
cd $HOME/linux-$fullver
|
||||
./scripts/config --enable ZFS
|
||||
./scripts/config --enable ZFS_DEBUG
|
||||
yes "" | make oldconfig
|
||||
make -j `nproc`
|
||||
) &> /var/tmp/builtin.txt || rc=$?
|
||||
@@ -185,6 +186,13 @@ case "$OS" in
|
||||
sudo mount -o noatime /dev/vdb /var/tmp
|
||||
sudo chmod 1777 /var/tmp
|
||||
sudo mv -f /tmp/*.txt /var/tmp
|
||||
|
||||
# Allow for longer RCU timeouts due to the heavily virtualized and
|
||||
# potentially oversubscribed nature of the CI environment.
|
||||
rcu_cpu_stall_timeout="/sys/module/rcupdate/parameters/rcu_cpu_stall_timeout"
|
||||
if test -f $rcu_cpu_stall_timeout; then
|
||||
echo 120 | sudo sh -c "cat > '$rcu_cpu_stall_timeout'"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
@@ -3,6 +3,14 @@ name: smatch
|
||||
on:
|
||||
push:
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- 'man/**'
|
||||
- '**.md'
|
||||
- 'AUTHORS'
|
||||
- 'COPYRIGHT'
|
||||
- 'LICENSE'
|
||||
- 'NOTICE'
|
||||
- '.gitignore'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
@@ -10,6 +18,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
smatch:
|
||||
if: github.event_name == 'pull_request' || github.repository != 'openzfs/zfs'
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout smatch
|
||||
|
||||
+50
-1
@@ -3,11 +3,30 @@ name: zfs-arm
|
||||
on:
|
||||
push:
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- 'man/**'
|
||||
- '**.md'
|
||||
- 'AUTHORS'
|
||||
- 'COPYRIGHT'
|
||||
- 'LICENSE'
|
||||
- 'NOTICE'
|
||||
- '.gitignore'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
gcc_ver:
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
description: "(optional) install specific GCC version, like '16'"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
zfs-arm:
|
||||
name: ZFS ARM build
|
||||
if: github.event_name == 'pull_request' || github.repository != 'openzfs/zfs'
|
||||
runs-on: ubuntu-24.04-arm
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
@@ -18,6 +37,31 @@ jobs:
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
sudo apt-get -y remove firefox || true
|
||||
|
||||
# Do we want to test with a custom GCC version?
|
||||
if [ "${{ github.event.inputs.gcc_ver }}" != "" ] ; then
|
||||
ver="${{ github.event.inputs.gcc_ver }}"
|
||||
|
||||
sudo add-apt-repository ppa:ubuntu-toolchain-r/test
|
||||
sudo apt-get update
|
||||
|
||||
echo "GCCs available:"
|
||||
awk '/Package: gcc-/{print $2}' /var/lib/apt/lists/*ubuntu-toolchain-r*Packages
|
||||
|
||||
sudo apt-get -y install gcc g++ gcc-$ver g++-$ver
|
||||
|
||||
sudo update-alternatives --remove-all gcc || true 2>&1
|
||||
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-$ver 100
|
||||
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-$ver 100
|
||||
sudo update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100
|
||||
sudo update-alternatives --set cc /usr/bin/gcc
|
||||
sudo update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100
|
||||
sudo update-alternatives --set c++ /usr/bin/g++
|
||||
|
||||
sudo update-alternatives --set gcc "/usr/bin/gcc-$ver"
|
||||
sudo update-alternatives --set g++ "/usr/bin/g++-$ver"
|
||||
fi
|
||||
|
||||
.github/workflows/scripts/qemu-3-deps-vm.sh ubuntu24
|
||||
|
||||
# We're running the VM scripts locally on the runner, so need to fix
|
||||
@@ -28,7 +72,12 @@ jobs:
|
||||
- name: Build modules
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
.github/workflows/scripts/qemu-4-build-vm.sh --enable-debug ubuntu24
|
||||
# Even though we may have installed a newer GCC, the kernel builds don't
|
||||
# seem to honor it, and instead use the older GCC. I assume this is
|
||||
# to match up with whatever GCC version was used for the kernel. Always
|
||||
# specify KERNEL_CC to get around this. This works when using the
|
||||
# default GCC and with a custom GCC.
|
||||
KERNEL_CC=/usr/bin/gcc .github/workflows/scripts/qemu-4-build-vm.sh --enable-debug ubuntu24
|
||||
|
||||
# Quick sanity test since we're not running the full ZTS
|
||||
sudo modprobe zfs
|
||||
|
||||
+13
-1
@@ -42,6 +42,11 @@ on:
|
||||
required: false
|
||||
default: ""
|
||||
description: "(optional) repo URL (blank: use http://download.zfsonlinux.org)"
|
||||
custom_branch:
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
description: "(optional) custom tag/branch to build using current CI (like 'zfs-2.2.9')"
|
||||
lookup:
|
||||
type: boolean
|
||||
required: false
|
||||
@@ -58,7 +63,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: ['almalinux8', 'almalinux9', 'almalinux10', 'fedora42', 'fedora43', 'fedora44']
|
||||
os: ['almalinux8', 'almalinux9', 'almalinux10', 'fedora43', 'fedora44']
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
@@ -94,9 +99,16 @@ jobs:
|
||||
if [ -n "${{ github.event.inputs.patch_level }}" ] ; then
|
||||
EXTRA="--patch-level ${{ github.event.inputs.patch_level }}"
|
||||
fi
|
||||
if [ -n "${{ github.event.inputs.custom_branch }}" ] ; then
|
||||
EXTRA+=" --custom-branch ${{ github.event.inputs.custom_branch }}"
|
||||
fi
|
||||
|
||||
.github/workflows/scripts/qemu-4-build.sh $EXTRA \
|
||||
--repo --release --dkms --tarball ${{ matrix.os }}
|
||||
|
||||
if [ -n "${{ github.event.inputs.custom_branch }}" ] ; then
|
||||
echo "Built packages for ${{ github.event.inputs.custom_branch }}"
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Prepare artifacts
|
||||
|
||||
+21
-10
@@ -14,7 +14,7 @@ on:
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
description: "(optional) Only run on this specific OS (like 'fedora42' or 'alpine3-23')"
|
||||
description: "(optional) Only run on this specific OS (like 'fedora44' or 'alpine3-23')"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
@@ -23,6 +23,7 @@ concurrency:
|
||||
jobs:
|
||||
test-config:
|
||||
name: Setup
|
||||
if: github.event_name == 'pull_request' || github.repository != 'openzfs/zfs'
|
||||
runs-on: ubuntu-24.04
|
||||
outputs:
|
||||
test_os: ${{ steps.os.outputs.os }}
|
||||
@@ -45,24 +46,27 @@ jobs:
|
||||
fi
|
||||
|
||||
case "$ci_type" in
|
||||
docs)
|
||||
os_selection='[]'
|
||||
;;
|
||||
quick)
|
||||
os_selection='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd15-1s", "ubuntu24"]'
|
||||
os_selection='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora44", "freebsd15-1s", "ubuntu26"]'
|
||||
;;
|
||||
linux)
|
||||
os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "debian13", "fedora42", "fedora43", "fedora44", "ubuntu22", "ubuntu24"]'
|
||||
os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "debian13", "fedora43", "fedora44", "ubuntu22", "ubuntu24", "ubuntu26"]'
|
||||
;;
|
||||
freebsd)
|
||||
os_selection='["freebsd13-5r", "freebsd14-4r", "freebsd13-5s", "freebsd14-4s", "freebsd15-1s", "freebsd16-0c"]'
|
||||
os_selection='["freebsd14-4r", "freebsd14-4s", "freebsd15-0r", "freebsd15-1s", "freebsd16-0c"]'
|
||||
;;
|
||||
*)
|
||||
# default list
|
||||
os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora42", "fedora43", "fedora44", "freebsd14-4r", "freebsd15-1s", "freebsd16-0c", "ubuntu22", "ubuntu24"]'
|
||||
os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora43", "fedora44", "freebsd14-4r", "freebsd15-0r", "freebsd15-1s", "freebsd16-0c", "ubuntu22", "ubuntu24", "ubuntu26"]'
|
||||
;;
|
||||
esac
|
||||
|
||||
# Repository-level override for OS selection.
|
||||
# Set vars.ZTS_OS_OVERRIDE in repo settings to restrict targets
|
||||
# (e.g. '["debian13"]' or '["debian13", "fedora42"]').
|
||||
# (e.g. '["debian13"]' or '["debian13", "fedora44"]').
|
||||
# Manual ZFS-CI-Type in commit messages bypasses the override.
|
||||
if [ -n "${{ vars.ZTS_OS_OVERRIDE }}" ] && [ "$ci_source" != "manual" ]; then
|
||||
override='${{ vars.ZTS_OS_OVERRIDE }}'
|
||||
@@ -91,15 +95,19 @@ jobs:
|
||||
qemu-vm:
|
||||
name: qemu-x86
|
||||
needs: [ test-config ]
|
||||
if: >-
|
||||
(github.event_name == 'pull_request' ||
|
||||
github.repository != 'openzfs/zfs') &&
|
||||
needs.test-config.outputs.ci_type != 'docs'
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# rhl: almalinux8, almalinux9, centos-streamX, fedora4x
|
||||
# debian: debian12, debian13, ubuntu22, ubuntu24
|
||||
# debian: debian12, debian13, ubuntu22, ubuntu24, ubuntu26
|
||||
# misc: archlinux, tumbleweed
|
||||
# FreeBSD variants of november 2025:
|
||||
# FreeBSD Release: freebsd13-5r, freebsd14-4r, freebsd15-0r
|
||||
# FreeBSD Stable: freebsd13-5s, freebsd14-4s, freebsd15-1s
|
||||
# FreeBSD Release: freebsd14-4r, freebsd15-0r
|
||||
# FreeBSD Stable: freebsd14-4s, freebsd15-1s
|
||||
# FreeBSD Current: freebsd16-0c
|
||||
os: ${{ fromJson(needs.test-config.outputs.test_os) }}
|
||||
runs-on: ubuntu-24.04
|
||||
@@ -153,7 +161,10 @@ jobs:
|
||||
run: .github/workflows/scripts/qemu-8-summary.sh '${{ steps.artifact-upload.outputs.artifact-url }}'
|
||||
|
||||
cleanup:
|
||||
if: always()
|
||||
if: >-
|
||||
(github.event_name == 'pull_request' ||
|
||||
github.repository != 'openzfs/zfs') &&
|
||||
always()
|
||||
name: Cleanup
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ qemu-vm ]
|
||||
|
||||
@@ -3,6 +3,14 @@ name: zloop
|
||||
on:
|
||||
push:
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- 'man/**'
|
||||
- '**.md'
|
||||
- 'AUTHORS'
|
||||
- 'COPYRIGHT'
|
||||
- 'LICENSE'
|
||||
- 'NOTICE'
|
||||
- '.gitignore'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
|
||||
@@ -138,6 +138,7 @@ cstyle:
|
||||
! -path './include/sys/lua/*' \
|
||||
! -path './module/lua/l*.[ch]' \
|
||||
! -path './module/zfs/lz4.c' \
|
||||
! -path './tests/unit/munit.[ch]' \
|
||||
$(cstyle_line)
|
||||
|
||||
filter_executable = -exec test -x '{}' \; -print
|
||||
|
||||
@@ -52,7 +52,7 @@ All RHEL (and compatible systems: AlmaLinux OS, Rocky Linux, etc) on the **full*
|
||||
|
||||
All Ubuntu **LTS** releases are supported.
|
||||
|
||||
**Supported Ubuntu releases**: **24.04 “Noble”**, **22.04 “Jammy”**.
|
||||
**Supported Ubuntu releases**: **26.04 “Resolute”**, **24.04 “Noble”**, **22.04 “Jammy”**.
|
||||
|
||||
### Debian
|
||||
|
||||
@@ -68,4 +68,4 @@ Generally, if a distribution is following an LTS kernel, it should work well wit
|
||||
|
||||
All FreeBSD releases receiving [security support](https://www.freebsd.org/security/#sup) are supported by OpenZFS.
|
||||
|
||||
**Supported FreeBSD releases**: **15.0**, **14.4**, **13.5**.
|
||||
**Supported FreeBSD releases**: **15.0**, **14.4**.
|
||||
|
||||
@@ -54,7 +54,6 @@ ztest_LDADD = \
|
||||
libnvpair.la
|
||||
|
||||
ztest_LDADD += -lm
|
||||
ztest_LDFLAGS = -pthread
|
||||
|
||||
|
||||
include $(srcdir)/%D%/raidz_test/Makefile.am
|
||||
|
||||
@@ -565,10 +565,10 @@ def init():
|
||||
|
||||
update_hdr_intr()
|
||||
|
||||
# check if L2ARC exists
|
||||
# check if L2ARC exists; fall back to l2_size for older kernels that
|
||||
# do not export l2_ndev
|
||||
snap_stats()
|
||||
l2_size = cur.get("l2_size")
|
||||
if l2_size:
|
||||
if cur.get("l2_ndev") or cur.get("l2_size"):
|
||||
l2exist = True
|
||||
|
||||
if desired_cols:
|
||||
|
||||
@@ -856,7 +856,10 @@ def section_l2arc(kstats_dict):
|
||||
# The L2ARC statistics live in the same section as the normal ARC stuff
|
||||
arc_stats = isolate_section('arcstats', kstats_dict)
|
||||
|
||||
if arc_stats['l2_size'] == '0':
|
||||
# Skip the section only when no cache device is attached. Fall back to
|
||||
# l2_size for older kernels that do not export l2_ndev.
|
||||
if arc_stats.get('l2_ndev', '0') == '0' and \
|
||||
arc_stats['l2_size'] == '0':
|
||||
print('L2ARC not detected, skipping section\n')
|
||||
return
|
||||
|
||||
|
||||
@@ -2802,18 +2802,18 @@ print_file_layout_raidz(vdev_t *vd, blkptr_t *bp, uint64_t file_offset,
|
||||
vd->vdev_children, vdrz->vd_nparity);
|
||||
raidz_row_t *rr = rm->rm_row[0];
|
||||
|
||||
/*
|
||||
* Account for out of order disks in raidz1.
|
||||
* For now just reverse them back and adjust for it later.
|
||||
*/
|
||||
if (rr->rr_firstdatacol == 1 && (zio.io_offset & (1ULL << 20))) {
|
||||
uint64_t devidx = rr->rr_col[0].rc_devidx;
|
||||
rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
|
||||
rr->rr_col[1].rc_devidx = devidx;
|
||||
}
|
||||
|
||||
if (!dump_opt['H']) {
|
||||
int last_disk = vd->vdev_children - 1;
|
||||
/*
|
||||
* Account for out of order disks in raidz1.
|
||||
* For now just reverse them back and adjust for it later.
|
||||
*/
|
||||
if (rr->rr_firstdatacol == 1 &&
|
||||
(zio.io_offset & (1ULL << 20))) {
|
||||
uint64_t devidx = rr->rr_col[0].rc_devidx;
|
||||
rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
|
||||
rr->rr_col[1].rc_devidx = devidx;
|
||||
}
|
||||
int first_disk = rr->rr_col[0].rc_devidx;
|
||||
|
||||
(void) printf("%12llx", (u_longlong_t)file_offset);
|
||||
@@ -2843,23 +2843,49 @@ print_file_layout_raidz(vdev_t *vd, blkptr_t *bp, uint64_t file_offset,
|
||||
static uint64_t next_offset = 0;
|
||||
|
||||
if (next_offset != file_offset) {
|
||||
(void) printf("skip hole\t-\t%llx\n",
|
||||
(u_longlong_t)((file_offset - next_offset) >>
|
||||
vd->vdev_ashift));
|
||||
(void) printf("skip hole\t-\t\t%lld\n",
|
||||
(u_longlong_t)((file_offset - next_offset) / 512));
|
||||
}
|
||||
next_offset = file_offset + BP_GET_LSIZE(bp);
|
||||
uint64_t tmp_offset = file_offset;
|
||||
|
||||
|
||||
for (int c = 0; c < rr->rr_cols; c++) {
|
||||
boolean_t pcol = c < rr->rr_firstdatacol;
|
||||
raidz_col_t *rc = &rr->rr_col[c];
|
||||
char *path = vd->vdev_child[rc->rc_devidx]->vdev_path;
|
||||
// c < rr->rr_firstdatacol
|
||||
|
||||
if (rc->rc_size == 0)
|
||||
continue;
|
||||
(void) printf("%s\t%llu\t%d\n",
|
||||
(void) printf("%s\t\t%llu\t%d",
|
||||
zfs_basename(path),
|
||||
(u_longlong_t)(rc->rc_offset +
|
||||
VDEV_LABEL_START_SIZE)/512,
|
||||
(int)rc->rc_size/512);
|
||||
if (dump_opt['v']) {
|
||||
char label = pcol ? 'P' : 'D';
|
||||
int num;
|
||||
|
||||
if (c < 2) {
|
||||
num = 0;
|
||||
} else {
|
||||
num = pcol ? c :
|
||||
(c - rr->rr_firstdatacol);
|
||||
}
|
||||
printf("\t%c%d", label, num);
|
||||
if (dump_opt['v'] > 1) {
|
||||
unsigned long long off;
|
||||
if (pcol)
|
||||
off = file_offset;
|
||||
else
|
||||
off = tmp_offset;
|
||||
off = off / 512ULL;
|
||||
printf("\t%llu", off);
|
||||
}
|
||||
}
|
||||
if (!pcol)
|
||||
tmp_offset += rc->rc_size;
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2989,7 +3015,12 @@ dump_indirect_layout(dnode_t *dn)
|
||||
* Start layout with a header
|
||||
*/
|
||||
if (dump_opt['H']) {
|
||||
(void) printf("DISK\t\tLBA\t\tCOUNT\n");
|
||||
(void) printf("DISK\t\t\tLBA\tCOUNT");
|
||||
if (dump_opt['v'])
|
||||
(void) printf("\tTYPE");
|
||||
if (dump_opt['v'] > 1)
|
||||
(void) printf("\tOFFSET");
|
||||
printf("\n");
|
||||
} else {
|
||||
char diskhdr[16];
|
||||
|
||||
@@ -6325,22 +6356,15 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
dmu_object_type_t type)
|
||||
{
|
||||
int i;
|
||||
boolean_t claimed = B_FALSE;
|
||||
boolean_t ddt_block = B_FALSE;
|
||||
boolean_t brt_block = B_FALSE;
|
||||
|
||||
ASSERT(type < ZDB_OT_TOTAL);
|
||||
|
||||
if (zilog && zil_bp_tree_add(zilog, bp) != 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* This flag controls if we will issue a claim for the block while
|
||||
* counting it, to ensure that all blocks are referenced in space maps.
|
||||
* We don't issue claims if we're not doing leak tracking, because it's
|
||||
* expensive if the user isn't interested. We also don't claim the
|
||||
* second or later occurences of cloned or dedup'd blocks, because we
|
||||
* already claimed them the first time.
|
||||
*/
|
||||
boolean_t do_claim = !dump_opt['L'];
|
||||
|
||||
spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER);
|
||||
|
||||
blkptr_t tempbp;
|
||||
@@ -6371,21 +6395,30 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
ddt_entry_t *dde = ddt_lookup(ddt, bp, B_TRUE);
|
||||
|
||||
/*
|
||||
* ddt_lookup() can return NULL if this block didn't exist
|
||||
* in the DDT and creating it would take the DDT over its
|
||||
* quota. Since we got the block from disk, it must exist in
|
||||
* the DDT, so this can't happen. However, when unique entries
|
||||
* are pruned, the dedup bit can be set with no corresponding
|
||||
* entry in the DDT.
|
||||
* ddt_lookup() can return NULL when unique entries are pruned
|
||||
* from the DDT.
|
||||
*/
|
||||
if (dde == NULL) {
|
||||
ddt_exit(ddt);
|
||||
goto skipped;
|
||||
goto ddt_done;
|
||||
}
|
||||
|
||||
/* Get the phys for this variant */
|
||||
ddt_phys_variant_t v = ddt_phys_select(ddt, dde, bp);
|
||||
|
||||
/*
|
||||
* DDT_PHYS_NONE means the block has the dedup bit set but
|
||||
* its DVA doesn't match any phys in the entry. This can
|
||||
* happen when a DVA was evicted from the DDT and re-added
|
||||
* on a hash collision. The block may still have a BRT entry.
|
||||
*/
|
||||
if (v == DDT_PHYS_NONE) {
|
||||
ddt_exit(ddt);
|
||||
goto ddt_done;
|
||||
}
|
||||
|
||||
ddt_block = B_TRUE;
|
||||
|
||||
/*
|
||||
* This entry may have multiple sets of DVAs. We must claim
|
||||
* each set the first time we see them in a real block on disk,
|
||||
@@ -6400,8 +6433,14 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
dde->dde_io =
|
||||
(void *)(((uintptr_t)dde->dde_io) | (1 << v));
|
||||
|
||||
/* Consume a reference for this block. */
|
||||
if (ddt_phys_total_refcnt(ddt, dde->dde_phys) > 0)
|
||||
/*
|
||||
* Consume a reference. If this variant's refcount is already
|
||||
* zero, the DDT tracking is exhausted — more filesystem
|
||||
* references exist than the DDT accounts for.
|
||||
*/
|
||||
boolean_t ddt_refcnt_exhausted =
|
||||
(ddt_phys_refcnt(dde->dde_phys, v) == 0);
|
||||
if (!ddt_refcnt_exhausted)
|
||||
ddt_phys_decref(dde->dde_phys, v);
|
||||
|
||||
/*
|
||||
@@ -6430,20 +6469,21 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
bp = &tempbp;
|
||||
}
|
||||
|
||||
if (seen) {
|
||||
if (seen && !ddt_refcnt_exhausted) {
|
||||
/*
|
||||
* The second or later time we see this block,
|
||||
* it's a duplicate and we count it.
|
||||
*/
|
||||
zcb->zcb_dedup_asize += BP_GET_ASIZE(bp);
|
||||
zcb->zcb_dedup_blocks++;
|
||||
|
||||
/* Already claimed, don't do it again. */
|
||||
do_claim = B_FALSE;
|
||||
claimed = B_TRUE;
|
||||
}
|
||||
|
||||
ddt_exit(ddt);
|
||||
} else if (zcb->zcb_brt_is_active &&
|
||||
}
|
||||
|
||||
ddt_done:
|
||||
if (!claimed && zcb->zcb_brt_is_active &&
|
||||
brt_maybe_exists(zcb->zcb_spa, bp)) {
|
||||
/*
|
||||
* Cloned blocks are special. We need to count them, so we can
|
||||
@@ -6451,10 +6491,8 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
* only claim them once.
|
||||
*
|
||||
* To do this, we keep our own in-memory BRT. For each block
|
||||
* we haven't seen before, we look it up in the real BRT and
|
||||
* if its there, we note it and its refcount then proceed as
|
||||
* normal. If we see the block again, we count it as a clone
|
||||
* and then give it no further consideration.
|
||||
* we haven't seen before, we look it up in the real BRT. If
|
||||
* we see the block again, we count it as a clone.
|
||||
*/
|
||||
zdb_brt_entry_t zbre_search, *zbre;
|
||||
avl_index_t where;
|
||||
@@ -6462,36 +6500,27 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
zbre_search.zbre_dva = bp->blk_dva[0];
|
||||
zbre = avl_find(&zcb->zcb_brt, &zbre_search, &where);
|
||||
if (zbre == NULL) {
|
||||
/* Not seen before; track it */
|
||||
uint64_t refcnt =
|
||||
brt_entry_get_refcount(zcb->zcb_spa, bp);
|
||||
if (refcnt > 0) {
|
||||
brt_block = B_TRUE;
|
||||
zbre = umem_zalloc(sizeof (zdb_brt_entry_t),
|
||||
UMEM_NOFAIL);
|
||||
zbre->zbre_dva = bp->blk_dva[0];
|
||||
zbre->zbre_refcount = refcnt;
|
||||
avl_insert(&zcb->zcb_brt, zbre, where);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Second or later occurrence, count it and take a
|
||||
* refcount.
|
||||
*/
|
||||
zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
|
||||
zcb->zcb_clone_blocks++;
|
||||
|
||||
zbre->zbre_refcount--;
|
||||
if (zbre->zbre_refcount == 0) {
|
||||
avl_remove(&zcb->zcb_brt, zbre);
|
||||
umem_free(zbre, sizeof (zdb_brt_entry_t));
|
||||
} else {
|
||||
brt_block = B_TRUE;
|
||||
if (zbre->zbre_refcount > 0) {
|
||||
zcb->zcb_clone_asize += BP_GET_ASIZE(bp);
|
||||
zcb->zcb_clone_blocks++;
|
||||
zbre->zbre_refcount--;
|
||||
claimed = B_TRUE;
|
||||
}
|
||||
|
||||
/* Already claimed, don't do it again. */
|
||||
do_claim = B_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
skipped:
|
||||
for (i = 0; i < 4; i++) {
|
||||
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
|
||||
int t = (i & 1) ? type : ZDB_OT_TOTAL;
|
||||
@@ -6650,12 +6679,21 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
#undef BIN
|
||||
|
||||
hist_skipped:
|
||||
if (!do_claim)
|
||||
if (claimed || dump_opt['L'])
|
||||
return;
|
||||
|
||||
VERIFY0(zio_wait(zio_claim(NULL, zcb->zcb_spa,
|
||||
int claim_err = zio_wait(zio_claim(NULL, zcb->zcb_spa,
|
||||
spa_min_claim_txg(zcb->zcb_spa), bp, NULL, NULL,
|
||||
ZIO_FLAG_CANFAIL)));
|
||||
ZIO_FLAG_CANFAIL));
|
||||
if (claim_err != 0) {
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
|
||||
(void) printf("block claim error %d%s%s: %s\n",
|
||||
claim_err, brt_block ? " (BRT)" : "",
|
||||
ddt_block ? " (DDT)" : "", blkbuf);
|
||||
zcb->zcb_haderrors = 1;
|
||||
zcb->zcb_errors[claim_err]++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -7431,10 +7469,66 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
|
||||
static boolean_t
|
||||
zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
|
||||
{
|
||||
if (dump_opt['L'])
|
||||
return (B_FALSE);
|
||||
|
||||
boolean_t leaks = B_FALSE;
|
||||
|
||||
/*
|
||||
* Report leaked BRT entries whose refcount was not fully consumed by
|
||||
* the traversal.
|
||||
*/
|
||||
if (zcb->zcb_brt_is_active) {
|
||||
void *cookie = NULL;
|
||||
zdb_brt_entry_t *zbre;
|
||||
while ((zbre = avl_destroy_nodes(
|
||||
&zcb->zcb_brt, &cookie)) != NULL) {
|
||||
if (!dump_opt['L'] && zbre->zbre_refcount != 0) {
|
||||
(void) printf("BRT leak: vdev %llu, "
|
||||
"offset 0x%llx, refcount %llu\n",
|
||||
(u_longlong_t)DVA_GET_VDEV(
|
||||
&zbre->zbre_dva),
|
||||
(u_longlong_t)DVA_GET_OFFSET(
|
||||
&zbre->zbre_dva),
|
||||
(u_longlong_t)zbre->zbre_refcount);
|
||||
leaks = B_TRUE;
|
||||
}
|
||||
umem_free(zbre, sizeof (zdb_brt_entry_t));
|
||||
}
|
||||
avl_destroy(&zcb->zcb_brt);
|
||||
}
|
||||
|
||||
if (dump_opt['L'])
|
||||
return (leaks);
|
||||
|
||||
/*
|
||||
* Report leaked DDT entries whose refcount was not fully consumed by
|
||||
* the traversal. Entries in the DDT ZAP that were never looked up
|
||||
* are not detected here.
|
||||
*/
|
||||
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
|
||||
ddt_t *ddt = spa->spa_ddt[c];
|
||||
if (ddt == NULL)
|
||||
continue;
|
||||
ddt_enter(ddt);
|
||||
for (ddt_entry_t *dde = avl_first(&ddt->ddt_tree); dde != NULL;
|
||||
dde = AVL_NEXT(&ddt->ddt_tree, dde)) {
|
||||
for (int p = 0; p < DDT_NPHYS(ddt); p++) {
|
||||
ddt_phys_variant_t v = DDT_PHYS_VARIANT(ddt, p);
|
||||
uint64_t refcnt = ddt_phys_refcnt(dde->dde_phys,
|
||||
v);
|
||||
if (refcnt == 0)
|
||||
continue;
|
||||
blkptr_t blk;
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
ddt_bp_create(ddt->ddt_checksum, &dde->dde_key,
|
||||
dde->dde_phys, v, &blk);
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
|
||||
(void) printf("DDT leak: refcount %llu %s\n",
|
||||
(u_longlong_t)refcnt, blkbuf);
|
||||
leaks = B_TRUE;
|
||||
}
|
||||
}
|
||||
ddt_exit(ddt);
|
||||
}
|
||||
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
for (unsigned c = 0; c < rvd->vdev_children; c++) {
|
||||
vdev_t *vd = rvd->vdev_child[c];
|
||||
@@ -10136,7 +10230,7 @@ main(int argc, char **argv)
|
||||
* Automate cachefile
|
||||
*/
|
||||
if (!spa_config_path_env && !config_path_console && target &&
|
||||
libzfs_core_init() == 0) {
|
||||
!dump_opt['l'] && libzfs_core_init() == 0) {
|
||||
char *pname = strdup(target);
|
||||
const char *value;
|
||||
nvlist_t *pnvl = NULL;
|
||||
@@ -10519,6 +10613,7 @@ main(int argc, char **argv)
|
||||
}
|
||||
|
||||
if (dump_opt['f'] && os != NULL) {
|
||||
dump_opt['v'] = verbose;
|
||||
dump_file_data_layout(os);
|
||||
} else if (dump_opt['B']) {
|
||||
dump_backup(target, objset_id,
|
||||
|
||||
@@ -41,6 +41,5 @@ zed_LDADD = \
|
||||
libnvpair.la
|
||||
|
||||
zed_LDADD += -lrt $(LIBATOMIC_LIBS) $(LIBUDEV_LIBS) $(LIBUUID_LIBS)
|
||||
zed_LDFLAGS = -pthread
|
||||
|
||||
dist_noinst_DATA += %D%/agents/README.md
|
||||
|
||||
@@ -350,6 +350,60 @@ is_draid_fdomain_failure(fmd_hdl_t *hdl, libzfs_handle_t *zhdl,
|
||||
return (res);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns B_TRUE if spare 'a' should be tried before spare 'b' when
|
||||
* replacing a failed vdev with the given characteristics.
|
||||
*
|
||||
* Ordering criteria (most to least significant):
|
||||
* 1. Distributed spare matching the failed vdev's dRAID is preferred
|
||||
* most (distributed spares rebuild faster than traditional spares).
|
||||
* Regular spares (no TOP_GUID) come next. Non-matching distributed
|
||||
* spares are tried last, as the kernel will reject them anyway.
|
||||
* 2. Matching rotational is preferred over mismatching.
|
||||
* 3. Large enough is preferred over too small.
|
||||
* 4. Smaller size is preferred over bigger (best fit).
|
||||
*/
|
||||
static boolean_t
|
||||
spare_is_preferred(nvlist_t *a, nvlist_t *b, boolean_t have_rotational,
|
||||
uint64_t vdev_rotational, uint64_t vdev_size, uint64_t top_guid)
|
||||
{
|
||||
uint64_t a_top = 0, b_top = 0;
|
||||
(void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_TOP_GUID, &a_top);
|
||||
(void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_TOP_GUID, &b_top);
|
||||
int a_pri = (a_top == 0) ? 1 :
|
||||
(a_top == top_guid || top_guid == 0) ? 2 : 0;
|
||||
int b_pri = (b_top == 0) ? 1 :
|
||||
(b_top == top_guid || top_guid == 0) ? 2 : 0;
|
||||
if (a_pri != b_pri)
|
||||
return (a_pri > b_pri);
|
||||
|
||||
if (have_rotational) {
|
||||
uint64_t a_rotational = 0, b_rotational = 0;
|
||||
(void) nvlist_lookup_uint64(a, ZPOOL_CONFIG_VDEV_ROTATIONAL,
|
||||
&a_rotational);
|
||||
(void) nvlist_lookup_uint64(b, ZPOOL_CONFIG_VDEV_ROTATIONAL,
|
||||
&b_rotational);
|
||||
if ((a_rotational == vdev_rotational) !=
|
||||
(b_rotational == vdev_rotational))
|
||||
return (a_rotational == vdev_rotational);
|
||||
}
|
||||
|
||||
vdev_stat_t *vs;
|
||||
unsigned int c;
|
||||
uint64_t a_size = 0, b_size = 0;
|
||||
if (nvlist_lookup_uint64_array(a, ZPOOL_CONFIG_VDEV_STATS,
|
||||
(uint64_t **)&vs, &c) == 0)
|
||||
a_size = vs->vs_rsize;
|
||||
if (nvlist_lookup_uint64_array(b, ZPOOL_CONFIG_VDEV_STATS,
|
||||
(uint64_t **)&vs, &c) == 0)
|
||||
b_size = vs->vs_rsize;
|
||||
boolean_t a_ok = (a_size >= vdev_size);
|
||||
boolean_t b_ok = (b_size >= vdev_size);
|
||||
if (a_ok != b_ok)
|
||||
return (a_ok);
|
||||
return (a_size < b_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a vdev, attempt to replace it with every known spare until one
|
||||
* succeeds or we run out of devices to try.
|
||||
@@ -364,6 +418,10 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
|
||||
char *dev_name;
|
||||
zprop_source_t source;
|
||||
int ashift;
|
||||
uint64_t vdev_rotational = 0, vdev_size = 0, top_guid = 0;
|
||||
boolean_t have_vdev_rotational;
|
||||
vdev_stat_t *vs;
|
||||
unsigned int c;
|
||||
|
||||
config = zpool_get_config(zhp, NULL);
|
||||
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
@@ -377,6 +435,35 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
|
||||
&spares, &nspares) != 0)
|
||||
return (B_FALSE);
|
||||
|
||||
/*
|
||||
* Collect the failed vdev's parameters for optimal replacement.
|
||||
*/
|
||||
have_vdev_rotational = (nvlist_lookup_uint64(vdev,
|
||||
ZPOOL_CONFIG_VDEV_ROTATIONAL, &vdev_rotational) == 0);
|
||||
if (nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
|
||||
(uint64_t **)&vs, &c) == 0)
|
||||
vdev_size = vs->vs_rsize;
|
||||
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_TOP_GUID, &top_guid);
|
||||
|
||||
/*
|
||||
* Build a sorted index array over the spares, so that better
|
||||
* candicates are tried first.
|
||||
*/
|
||||
uint_t order[nspares];
|
||||
for (s = 0; s < nspares; s++)
|
||||
order[s] = s;
|
||||
for (s = 1; s < nspares; s++) {
|
||||
uint_t key = order[s];
|
||||
int j = (int)s - 1;
|
||||
while (j >= 0 && spare_is_preferred(spares[key],
|
||||
spares[order[j]], have_vdev_rotational, vdev_rotational,
|
||||
vdev_size, top_guid)) {
|
||||
order[j + 1] = order[j];
|
||||
j--;
|
||||
}
|
||||
order[j + 1] = key;
|
||||
}
|
||||
|
||||
/*
|
||||
* lookup "ashift" pool property, we may need it for the replacement
|
||||
*/
|
||||
@@ -394,25 +481,26 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
|
||||
* replace it.
|
||||
*/
|
||||
for (s = 0; s < nspares; s++) {
|
||||
nvlist_t *spare = spares[order[s]];
|
||||
boolean_t rebuild = B_FALSE;
|
||||
const char *spare_name, *type;
|
||||
|
||||
if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
|
||||
if (nvlist_lookup_string(spare, ZPOOL_CONFIG_PATH,
|
||||
&spare_name) != 0)
|
||||
continue;
|
||||
|
||||
/* prefer sequential resilvering for distributed spares */
|
||||
if ((nvlist_lookup_string(spares[s], ZPOOL_CONFIG_TYPE,
|
||||
if ((nvlist_lookup_string(spare, ZPOOL_CONFIG_TYPE,
|
||||
&type) == 0) && strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0)
|
||||
rebuild = B_TRUE;
|
||||
|
||||
/* if set, add the "ashift" pool property to the spare nvlist */
|
||||
if (source != ZPROP_SRC_DEFAULT)
|
||||
(void) nvlist_add_uint64(spares[s],
|
||||
(void) nvlist_add_uint64(spare,
|
||||
ZPOOL_CONFIG_ASHIFT, ashift);
|
||||
|
||||
(void) nvlist_add_nvlist_array(replacement,
|
||||
ZPOOL_CONFIG_CHILDREN, (const nvlist_t **)&spares[s], 1);
|
||||
ZPOOL_CONFIG_CHILDREN, (const nvlist_t **)&spare, 1);
|
||||
|
||||
fmd_hdl_debug(hdl, "zpool_vdev_replace '%s' with spare '%s'",
|
||||
dev_name, zfs_basename(spare_name));
|
||||
|
||||
@@ -9399,6 +9399,18 @@ main(int argc, char **argv)
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Special case '<subcommand> --help|-?'
|
||||
*/
|
||||
if (argc >= 3 && (strcmp(argv[2], "--help") == 0 ||
|
||||
strcmp(argv[2], "-?") == 0)) {
|
||||
int idx;
|
||||
if (find_command_idx(cmdname, &idx) == 0) {
|
||||
current_command = &command_table[idx];
|
||||
usage(B_FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
|
||||
|
||||
libzfs_print_on_error(g_zfs, B_TRUE);
|
||||
|
||||
@@ -13878,6 +13878,18 @@ main(int argc, char **argv)
|
||||
if (strcmp(cmdname, "help") == 0)
|
||||
return (zpool_do_help(argc, argv));
|
||||
|
||||
/*
|
||||
* Special case '<subcommand> --help|-?'
|
||||
*/
|
||||
if (argc >= 3 && (strcmp(argv[2], "--help") == 0 ||
|
||||
strcmp(argv[2], "-?") == 0)) {
|
||||
int idx;
|
||||
if (find_command_idx(cmdname, &idx) == 0) {
|
||||
current_command = &command_table[idx];
|
||||
usage(B_FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
if ((g_zfs = libzfs_init()) == NULL) {
|
||||
(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
|
||||
return (1);
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
#include <libintl.h>
|
||||
#include <stddef.h>
|
||||
#include <libzfs.h>
|
||||
#include <signal.h>
|
||||
#include <sys/backtrace.h>
|
||||
#include "zstream.h"
|
||||
|
||||
void
|
||||
@@ -53,9 +55,43 @@ zstream_usage(void)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void sig_handler(int signo)
|
||||
{
|
||||
struct sigaction action;
|
||||
libspl_backtrace(STDERR_FILENO);
|
||||
|
||||
/*
|
||||
* Restore default action and re-raise signal so SIGSEGV and
|
||||
* SIGABRT can trigger a core dump.
|
||||
*/
|
||||
action.sa_handler = SIG_DFL;
|
||||
sigemptyset(&action.sa_mask);
|
||||
action.sa_flags = 0;
|
||||
(void) sigaction(signo, &action, NULL);
|
||||
raise(signo);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
/*
|
||||
* Set up signal handlers, so if we crash due to bad data in the stream
|
||||
* we can get more info. Unlike ztest, we don't bail out if we can't
|
||||
* set up signal handlers, because zstream is very useful without them.
|
||||
*/
|
||||
struct sigaction action = { .sa_handler = sig_handler };
|
||||
sigemptyset(&action.sa_mask);
|
||||
action.sa_flags = 0;
|
||||
if (sigaction(SIGSEGV, &action, NULL) < 0) {
|
||||
(void) fprintf(stderr, "zstream: cannot catch SIGSEGV: %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
if (sigaction(SIGABRT, &action, NULL) < 0) {
|
||||
(void) fprintf(stderr, "zstream: cannot catch SIGABRT: %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
|
||||
char *basename = strrchr(argv[0], '/');
|
||||
basename = basename ? (basename + 1) : argv[0];
|
||||
if (argc >= 1 && strcmp(basename, "zstreamdump") == 0)
|
||||
|
||||
@@ -385,6 +385,20 @@ zstream_do_dump(int argc, char *argv[])
|
||||
(void) ssread(buf, sz, &zc);
|
||||
if (ferror(send_stream))
|
||||
perror("fread");
|
||||
|
||||
uint8_t *nv_header = (uint8_t *)buf;
|
||||
boolean_t xdr = nv_header[0] == NV_ENCODE_XDR;
|
||||
boolean_t big_endian = nv_header[1] == 0;
|
||||
const char *nc;
|
||||
if (xdr) {
|
||||
nc = "NV_ENCODE_XDR";
|
||||
} else if (big_endian) {
|
||||
nc = "NV_ENCODE_NATIVE (big-endian)";
|
||||
} else {
|
||||
nc = "NV_ENCODE_NATIVE (little-endian)";
|
||||
}
|
||||
printf("nvlist encoding = %s\n", nc);
|
||||
|
||||
err = nvlist_unpack(buf, sz, &nv, 0);
|
||||
if (err) {
|
||||
perror(strerror(err));
|
||||
|
||||
@@ -99,6 +99,7 @@ zstream_do_recompress(int argc, char *argv[])
|
||||
exit(1);
|
||||
}
|
||||
|
||||
zfs_refcount_init();
|
||||
abd_init();
|
||||
fletcher_4_init();
|
||||
zio_init();
|
||||
@@ -353,6 +354,7 @@ zstream_do_recompress(int argc, char *argv[])
|
||||
zio_fini();
|
||||
zstd_fini();
|
||||
abd_fini();
|
||||
zfs_refcount_fini();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ AM_CFLAGS += $(IMPLICIT_FALLTHROUGH)
|
||||
AM_CFLAGS += $(DEBUG_CFLAGS)
|
||||
AM_CFLAGS += $(ASAN_CFLAGS)
|
||||
AM_CFLAGS += $(UBSAN_CFLAGS)
|
||||
AM_CFLAGS += $(PTHREAD_CFLAGS)
|
||||
AM_CFLAGS += $(CODE_COVERAGE_CFLAGS)
|
||||
AM_CFLAGS += $(NO_FORMAT_ZERO_LENGTH)
|
||||
AM_CFLAGS += $(NO_FORMAT_TRUNCATION)
|
||||
@@ -57,6 +58,7 @@ endif
|
||||
AM_LDFLAGS = $(DEBUG_LDFLAGS)
|
||||
AM_LDFLAGS += $(ASAN_LDFLAGS)
|
||||
AM_LDFLAGS += $(UBSAN_LDFLAGS)
|
||||
AM_LDFLAGS += $(PTHREAD_LIBS)
|
||||
|
||||
if BUILD_FREEBSD
|
||||
AM_LDFLAGS += -fstack-protector-strong
|
||||
|
||||
@@ -0,0 +1,523 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later WITH Autoconf-exception-macro
|
||||
# ===========================================================================
|
||||
# https://www.gnu.org/software/autoconf-archive/ax_pthread.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# This macro figures out how to build C programs using POSIX threads. It
|
||||
# sets the PTHREAD_LIBS output variable to the threads library and linker
|
||||
# flags, and the PTHREAD_CFLAGS output variable to any special C compiler
|
||||
# flags that are needed. (The user can also force certain compiler
|
||||
# flags/libs to be tested by setting these environment variables.)
|
||||
#
|
||||
# Also sets PTHREAD_CC and PTHREAD_CXX to any special C compiler that is
|
||||
# needed for multi-threaded programs (defaults to the value of CC
|
||||
# respectively CXX otherwise). (This is necessary on e.g. AIX to use the
|
||||
# special cc_r/CC_r compiler alias.)
|
||||
#
|
||||
# NOTE: You are assumed to not only compile your program with these flags,
|
||||
# but also to link with them as well. For example, you might link with
|
||||
# $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS
|
||||
# $PTHREAD_CXX $CXXFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS
|
||||
#
|
||||
# If you are only building threaded programs, you may wish to use these
|
||||
# variables in your default LIBS, CFLAGS, and CC:
|
||||
#
|
||||
# LIBS="$PTHREAD_LIBS $LIBS"
|
||||
# CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
|
||||
# CXXFLAGS="$CXXFLAGS $PTHREAD_CFLAGS"
|
||||
# CC="$PTHREAD_CC"
|
||||
# CXX="$PTHREAD_CXX"
|
||||
#
|
||||
# In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant
|
||||
# has a nonstandard name, this macro defines PTHREAD_CREATE_JOINABLE to
|
||||
# that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX).
|
||||
#
|
||||
# Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the
|
||||
# PTHREAD_PRIO_INHERIT symbol is defined when compiling with
|
||||
# PTHREAD_CFLAGS.
|
||||
#
|
||||
# ACTION-IF-FOUND is a list of shell commands to run if a threads library
|
||||
# is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it
|
||||
# is not found. If ACTION-IF-FOUND is not specified, the default action
|
||||
# will define HAVE_PTHREAD.
|
||||
#
|
||||
# Please let the authors know if this macro fails on any platform, or if
|
||||
# you have any other suggestions or comments. This macro was based on work
|
||||
# by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help
|
||||
# from M. Frigo), as well as ac_pthread and hb_pthread macros posted by
|
||||
# Alejandro Forero Cuervo to the autoconf macro repository. We are also
|
||||
# grateful for the helpful feedback of numerous users.
|
||||
#
|
||||
# Updated for Autoconf 2.68 by Daniel Richard G.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
|
||||
# Copyright (c) 2011 Daniel Richard G. <skunk@iSKUNK.ORG>
|
||||
# Copyright (c) 2019 Marc Stevens <marc.stevens@cwi.nl>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation, either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||
# gives unlimited permission to copy, distribute and modify the configure
|
||||
# scripts that are the output of Autoconf when processing the Macro. You
|
||||
# need not follow the terms of the GNU General Public License when using
|
||||
# or distributing such scripts, even though portions of the text of the
|
||||
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||
# all other use of the material that constitutes the Autoconf Macro.
|
||||
#
|
||||
# This special exception to the GPL applies to versions of the Autoconf
|
||||
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||
# modified version of the Autoconf Macro, you may extend this special
|
||||
# exception to the GPL to apply to your modified version as well.
|
||||
|
||||
#serial 31
|
||||
|
||||
AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD])
|
||||
AC_DEFUN([AX_PTHREAD], [
|
||||
AC_REQUIRE([AC_CANONICAL_HOST])
|
||||
AC_REQUIRE([AC_PROG_CC])
|
||||
AC_REQUIRE([AC_PROG_SED])
|
||||
AC_LANG_PUSH([C])
|
||||
ax_pthread_ok=no
|
||||
|
||||
# We used to check for pthread.h first, but this fails if pthread.h
|
||||
# requires special compiler flags (e.g. on Tru64 or Sequent).
|
||||
# It gets checked for in the link test anyway.
|
||||
|
||||
# First of all, check if the user has set any of the PTHREAD_LIBS,
|
||||
# etcetera environment variables, and if threads linking works using
|
||||
# them:
|
||||
if test "x$PTHREAD_CFLAGS$PTHREAD_LIBS" != "x"; then
|
||||
ax_pthread_save_CC="$CC"
|
||||
ax_pthread_save_CFLAGS="$CFLAGS"
|
||||
ax_pthread_save_LIBS="$LIBS"
|
||||
AS_IF([test "x$PTHREAD_CC" != "x"], [CC="$PTHREAD_CC"])
|
||||
AS_IF([test "x$PTHREAD_CXX" != "x"], [CXX="$PTHREAD_CXX"])
|
||||
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
AC_MSG_CHECKING([for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS])
|
||||
AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_join])], [ax_pthread_ok=yes])
|
||||
AC_MSG_RESULT([$ax_pthread_ok])
|
||||
if test "x$ax_pthread_ok" = "xno"; then
|
||||
PTHREAD_LIBS=""
|
||||
PTHREAD_CFLAGS=""
|
||||
fi
|
||||
CC="$ax_pthread_save_CC"
|
||||
CFLAGS="$ax_pthread_save_CFLAGS"
|
||||
LIBS="$ax_pthread_save_LIBS"
|
||||
fi
|
||||
|
||||
# We must check for the threads library under a number of different
|
||||
# names; the ordering is very important because some systems
|
||||
# (e.g. DEC) have both -lpthread and -lpthreads, where one of the
|
||||
# libraries is broken (non-POSIX).
|
||||
|
||||
# Create a list of thread flags to try. Items with a "," contain both
|
||||
# C compiler flags (before ",") and linker flags (after ","). Other items
|
||||
# starting with a "-" are C compiler flags, and remaining items are
|
||||
# library names, except for "none" which indicates that we try without
|
||||
# any flags at all, and "pthread-config" which is a program returning
|
||||
# the flags for the Pth emulation library.
|
||||
|
||||
ax_pthread_flags="pthreads none -Kthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config"
|
||||
|
||||
# The ordering *is* (sometimes) important. Some notes on the
|
||||
# individual items follow:
|
||||
|
||||
# pthreads: AIX (must check this before -lpthread)
|
||||
# none: in case threads are in libc; should be tried before -Kthread and
|
||||
# other compiler flags to prevent continual compiler warnings
|
||||
# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
|
||||
# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads), Tru64
|
||||
# (Note: HP C rejects this with "bad form for `-t' option")
|
||||
# -pthreads: Solaris/gcc (Note: HP C also rejects)
|
||||
# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
|
||||
# doesn't hurt to check since this sometimes defines pthreads and
|
||||
# -D_REENTRANT too), HP C (must be checked before -lpthread, which
|
||||
# is present but should not be used directly; and before -mthreads,
|
||||
# because the compiler interprets this as "-mt" + "-hreads")
|
||||
# -mthreads: Mingw32/gcc, Lynx/gcc
|
||||
# pthread: Linux, etcetera
|
||||
# --thread-safe: KAI C++
|
||||
# pthread-config: use pthread-config program (for GNU Pth library)
|
||||
|
||||
case $host_os in
|
||||
|
||||
freebsd*)
|
||||
|
||||
# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
|
||||
# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
|
||||
|
||||
ax_pthread_flags="-kthread lthread $ax_pthread_flags"
|
||||
;;
|
||||
|
||||
hpux*)
|
||||
|
||||
# From the cc(1) man page: "[-mt] Sets various -D flags to enable
|
||||
# multi-threading and also sets -lpthread."
|
||||
|
||||
ax_pthread_flags="-mt -pthread pthread $ax_pthread_flags"
|
||||
;;
|
||||
|
||||
openedition*)
|
||||
|
||||
# IBM z/OS requires a feature-test macro to be defined in order to
|
||||
# enable POSIX threads at all, so give the user a hint if this is
|
||||
# not set. (We don't define these ourselves, as they can affect
|
||||
# other portions of the system API in unpredictable ways.)
|
||||
|
||||
AC_EGREP_CPP([AX_PTHREAD_ZOS_MISSING],
|
||||
[
|
||||
# if !defined(_OPEN_THREADS) && !defined(_UNIX03_THREADS)
|
||||
AX_PTHREAD_ZOS_MISSING
|
||||
# endif
|
||||
],
|
||||
[AC_MSG_WARN([IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support.])])
|
||||
;;
|
||||
|
||||
solaris*)
|
||||
|
||||
# On Solaris (at least, for some versions), libc contains stubbed
|
||||
# (non-functional) versions of the pthreads routines, so link-based
|
||||
# tests will erroneously succeed. (N.B.: The stubs are missing
|
||||
# pthread_cleanup_push, or rather a function called by this macro,
|
||||
# so we could check for that, but who knows whether they'll stub
|
||||
# that too in a future libc.) So we'll check first for the
|
||||
# standard Solaris way of linking pthreads (-mt -lpthread).
|
||||
|
||||
ax_pthread_flags="-mt,-lpthread pthread $ax_pthread_flags"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Are we compiling with Clang?
|
||||
|
||||
AC_CACHE_CHECK([whether $CC is Clang],
|
||||
[ax_cv_PTHREAD_CLANG],
|
||||
[ax_cv_PTHREAD_CLANG=no
|
||||
# Note that Autoconf sets GCC=yes for Clang as well as GCC
|
||||
if test "x$GCC" = "xyes"; then
|
||||
AC_EGREP_CPP([AX_PTHREAD_CC_IS_CLANG],
|
||||
[/* Note: Clang 2.7 lacks __clang_[a-z]+__ */
|
||||
# if defined(__clang__) && defined(__llvm__)
|
||||
AX_PTHREAD_CC_IS_CLANG
|
||||
# endif
|
||||
],
|
||||
[ax_cv_PTHREAD_CLANG=yes])
|
||||
fi
|
||||
])
|
||||
ax_pthread_clang="$ax_cv_PTHREAD_CLANG"
|
||||
|
||||
|
||||
# GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC)
|
||||
|
||||
# Note that for GCC and Clang -pthread generally implies -lpthread,
|
||||
# except when -nostdlib is passed.
|
||||
# This is problematic using libtool to build C++ shared libraries with pthread:
|
||||
# [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25460
|
||||
# [2] https://bugzilla.redhat.com/show_bug.cgi?id=661333
|
||||
# [3] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=468555
|
||||
# To solve this, first try -pthread together with -lpthread for GCC
|
||||
|
||||
AS_IF([test "x$GCC" = "xyes"],
|
||||
[ax_pthread_flags="-pthread,-lpthread -pthread -pthreads $ax_pthread_flags"])
|
||||
|
||||
# Clang takes -pthread (never supported any other flag), but we'll try with -lpthread first
|
||||
|
||||
AS_IF([test "x$ax_pthread_clang" = "xyes"],
|
||||
[ax_pthread_flags="-pthread,-lpthread -pthread"])
|
||||
|
||||
|
||||
# The presence of a feature test macro requesting re-entrant function
|
||||
# definitions is, on some systems, a strong hint that pthreads support is
|
||||
# correctly enabled
|
||||
|
||||
case $host_os in
|
||||
darwin* | hpux* | linux* | osf* | solaris*)
|
||||
ax_pthread_check_macro="_REENTRANT"
|
||||
;;
|
||||
|
||||
aix*)
|
||||
ax_pthread_check_macro="_THREAD_SAFE"
|
||||
;;
|
||||
|
||||
*)
|
||||
ax_pthread_check_macro="--"
|
||||
;;
|
||||
esac
|
||||
AS_IF([test "x$ax_pthread_check_macro" = "x--"],
|
||||
[ax_pthread_check_cond=0],
|
||||
[ax_pthread_check_cond="!defined($ax_pthread_check_macro)"])
|
||||
|
||||
|
||||
if test "x$ax_pthread_ok" = "xno"; then
|
||||
for ax_pthread_try_flag in $ax_pthread_flags; do
|
||||
|
||||
case $ax_pthread_try_flag in
|
||||
none)
|
||||
AC_MSG_CHECKING([whether pthreads work without any flags])
|
||||
;;
|
||||
|
||||
*,*)
|
||||
PTHREAD_CFLAGS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\1/"`
|
||||
PTHREAD_LIBS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\2/"`
|
||||
AC_MSG_CHECKING([whether pthreads work with "$PTHREAD_CFLAGS" and "$PTHREAD_LIBS"])
|
||||
;;
|
||||
|
||||
-*)
|
||||
AC_MSG_CHECKING([whether pthreads work with $ax_pthread_try_flag])
|
||||
PTHREAD_CFLAGS="$ax_pthread_try_flag"
|
||||
;;
|
||||
|
||||
pthread-config)
|
||||
AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no])
|
||||
AS_IF([test "x$ax_pthread_config" = "xno"], [continue])
|
||||
PTHREAD_CFLAGS="`pthread-config --cflags`"
|
||||
PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
|
||||
;;
|
||||
|
||||
*)
|
||||
AC_MSG_CHECKING([for the pthreads library -l$ax_pthread_try_flag])
|
||||
PTHREAD_LIBS="-l$ax_pthread_try_flag"
|
||||
;;
|
||||
esac
|
||||
|
||||
ax_pthread_save_CFLAGS="$CFLAGS"
|
||||
ax_pthread_save_LIBS="$LIBS"
|
||||
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
|
||||
# Check for various functions. We must include pthread.h,
|
||||
# since some functions may be macros. (On the Sequent, we
|
||||
# need a special flag -Kthread to make this header compile.)
|
||||
# We check for pthread_join because it is in -lpthread on IRIX
|
||||
# while pthread_create is in libc. We check for pthread_attr_init
|
||||
# due to DEC craziness with -lpthreads. We check for
|
||||
# pthread_cleanup_push because it is one of the few pthread
|
||||
# functions on Solaris that doesn't have a non-functional libc stub.
|
||||
# We try pthread_create on general principles.
|
||||
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>
|
||||
# if $ax_pthread_check_cond
|
||||
# error "$ax_pthread_check_macro must be defined"
|
||||
# endif
|
||||
static void *some_global = NULL;
|
||||
static void routine(void *a)
|
||||
{
|
||||
/* To avoid any unused-parameter or
|
||||
unused-but-set-parameter warning. */
|
||||
some_global = a;
|
||||
}
|
||||
static void *start_routine(void *a) { return a; }],
|
||||
[pthread_t th; pthread_attr_t attr;
|
||||
pthread_create(&th, 0, start_routine, 0);
|
||||
pthread_join(th, 0);
|
||||
pthread_attr_init(&attr);
|
||||
pthread_cleanup_push(routine, 0);
|
||||
pthread_cleanup_pop(0) /* ; */])],
|
||||
[ax_pthread_ok=yes],
|
||||
[])
|
||||
|
||||
CFLAGS="$ax_pthread_save_CFLAGS"
|
||||
LIBS="$ax_pthread_save_LIBS"
|
||||
|
||||
AC_MSG_RESULT([$ax_pthread_ok])
|
||||
AS_IF([test "x$ax_pthread_ok" = "xyes"], [break])
|
||||
|
||||
PTHREAD_LIBS=""
|
||||
PTHREAD_CFLAGS=""
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
# Clang needs special handling, because older versions handle the -pthread
|
||||
# option in a rather... idiosyncratic way
|
||||
|
||||
if test "x$ax_pthread_clang" = "xyes"; then
|
||||
|
||||
# Clang takes -pthread; it has never supported any other flag
|
||||
|
||||
# (Note 1: This will need to be revisited if a system that Clang
|
||||
# supports has POSIX threads in a separate library. This tends not
|
||||
# to be the way of modern systems, but it's conceivable.)
|
||||
|
||||
# (Note 2: On some systems, notably Darwin, -pthread is not needed
|
||||
# to get POSIX threads support; the API is always present and
|
||||
# active. We could reasonably leave PTHREAD_CFLAGS empty. But
|
||||
# -pthread does define _REENTRANT, and while the Darwin headers
|
||||
# ignore this macro, third-party headers might not.)
|
||||
|
||||
# However, older versions of Clang make a point of warning the user
|
||||
# that, in an invocation where only linking and no compilation is
|
||||
# taking place, the -pthread option has no effect ("argument unused
|
||||
# during compilation"). They expect -pthread to be passed in only
|
||||
# when source code is being compiled.
|
||||
#
|
||||
# Problem is, this is at odds with the way Automake and most other
|
||||
# C build frameworks function, which is that the same flags used in
|
||||
# compilation (CFLAGS) are also used in linking. Many systems
|
||||
# supported by AX_PTHREAD require exactly this for POSIX threads
|
||||
# support, and in fact it is often not straightforward to specify a
|
||||
# flag that is used only in the compilation phase and not in
|
||||
# linking. Such a scenario is extremely rare in practice.
|
||||
#
|
||||
# Even though use of the -pthread flag in linking would only print
|
||||
# a warning, this can be a nuisance for well-run software projects
|
||||
# that build with -Werror. So if the active version of Clang has
|
||||
# this misfeature, we search for an option to squash it.
|
||||
|
||||
AC_CACHE_CHECK([whether Clang needs flag to prevent "argument unused" warning when linking with -pthread],
|
||||
[ax_cv_PTHREAD_CLANG_NO_WARN_FLAG],
|
||||
[ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown
|
||||
# Create an alternate version of $ac_link that compiles and
|
||||
# links in two steps (.c -> .o, .o -> exe) instead of one
|
||||
# (.c -> exe), because the warning occurs only in the second
|
||||
# step
|
||||
ax_pthread_save_ac_link="$ac_link"
|
||||
ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g'
|
||||
ax_pthread_link_step=`AS_ECHO(["$ac_link"]) | sed "$ax_pthread_sed"`
|
||||
ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)"
|
||||
ax_pthread_save_CFLAGS="$CFLAGS"
|
||||
for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do
|
||||
AS_IF([test "x$ax_pthread_try" = "xunknown"], [break])
|
||||
CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS"
|
||||
ac_link="$ax_pthread_save_ac_link"
|
||||
AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])],
|
||||
[ac_link="$ax_pthread_2step_ac_link"
|
||||
AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])],
|
||||
[break])
|
||||
])
|
||||
done
|
||||
ac_link="$ax_pthread_save_ac_link"
|
||||
CFLAGS="$ax_pthread_save_CFLAGS"
|
||||
AS_IF([test "x$ax_pthread_try" = "x"], [ax_pthread_try=no])
|
||||
ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try"
|
||||
])
|
||||
|
||||
case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in
|
||||
no | unknown) ;;
|
||||
*) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;;
|
||||
esac
|
||||
|
||||
fi # $ax_pthread_clang = yes
|
||||
|
||||
|
||||
|
||||
# Various other checks:
|
||||
if test "x$ax_pthread_ok" = "xyes"; then
|
||||
ax_pthread_save_CFLAGS="$CFLAGS"
|
||||
ax_pthread_save_LIBS="$LIBS"
|
||||
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
|
||||
# Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
|
||||
AC_CACHE_CHECK([for joinable pthread attribute],
|
||||
[ax_cv_PTHREAD_JOINABLE_ATTR],
|
||||
[ax_cv_PTHREAD_JOINABLE_ATTR=unknown
|
||||
for ax_pthread_attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>],
|
||||
[int attr = $ax_pthread_attr; return attr /* ; */])],
|
||||
[ax_cv_PTHREAD_JOINABLE_ATTR=$ax_pthread_attr; break],
|
||||
[])
|
||||
done
|
||||
])
|
||||
AS_IF([test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xunknown" && \
|
||||
test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xPTHREAD_CREATE_JOINABLE" && \
|
||||
test "x$ax_pthread_joinable_attr_defined" != "xyes"],
|
||||
[AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE],
|
||||
[$ax_cv_PTHREAD_JOINABLE_ATTR],
|
||||
[Define to necessary symbol if this constant
|
||||
uses a non-standard name on your system.])
|
||||
ax_pthread_joinable_attr_defined=yes
|
||||
])
|
||||
|
||||
AC_CACHE_CHECK([whether more special flags are required for pthreads],
|
||||
[ax_cv_PTHREAD_SPECIAL_FLAGS],
|
||||
[ax_cv_PTHREAD_SPECIAL_FLAGS=no
|
||||
case $host_os in
|
||||
solaris*)
|
||||
ax_cv_PTHREAD_SPECIAL_FLAGS="-D_POSIX_PTHREAD_SEMANTICS"
|
||||
;;
|
||||
esac
|
||||
])
|
||||
AS_IF([test "x$ax_cv_PTHREAD_SPECIAL_FLAGS" != "xno" && \
|
||||
test "x$ax_pthread_special_flags_added" != "xyes"],
|
||||
[PTHREAD_CFLAGS="$ax_cv_PTHREAD_SPECIAL_FLAGS $PTHREAD_CFLAGS"
|
||||
ax_pthread_special_flags_added=yes])
|
||||
|
||||
AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT],
|
||||
[ax_cv_PTHREAD_PRIO_INHERIT],
|
||||
[AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]],
|
||||
[[int i = PTHREAD_PRIO_INHERIT;
|
||||
return i;]])],
|
||||
[ax_cv_PTHREAD_PRIO_INHERIT=yes],
|
||||
[ax_cv_PTHREAD_PRIO_INHERIT=no])
|
||||
])
|
||||
AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" && \
|
||||
test "x$ax_pthread_prio_inherit_defined" != "xyes"],
|
||||
[AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.])
|
||||
ax_pthread_prio_inherit_defined=yes
|
||||
])
|
||||
|
||||
CFLAGS="$ax_pthread_save_CFLAGS"
|
||||
LIBS="$ax_pthread_save_LIBS"
|
||||
|
||||
# More AIX lossage: compile with *_r variant
|
||||
if test "x$GCC" != "xyes"; then
|
||||
case $host_os in
|
||||
aix*)
|
||||
AS_CASE(["x/$CC"],
|
||||
[x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6],
|
||||
[#handle absolute path differently from PATH based program lookup
|
||||
AS_CASE(["x$CC"],
|
||||
[x/*],
|
||||
[
|
||||
AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])
|
||||
AS_IF([test "x${CXX}" != "x"], [AS_IF([AS_EXECUTABLE_P([${CXX}_r])],[PTHREAD_CXX="${CXX}_r"])])
|
||||
],
|
||||
[
|
||||
AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])
|
||||
AS_IF([test "x${CXX}" != "x"], [AC_CHECK_PROGS([PTHREAD_CXX],[${CXX}_r],[$CXX])])
|
||||
]
|
||||
)
|
||||
])
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
|
||||
test -n "$PTHREAD_CC" || PTHREAD_CC="$CC"
|
||||
test -n "$PTHREAD_CXX" || PTHREAD_CXX="$CXX"
|
||||
|
||||
AC_SUBST([PTHREAD_LIBS])
|
||||
AC_SUBST([PTHREAD_CFLAGS])
|
||||
AC_SUBST([PTHREAD_CC])
|
||||
AC_SUBST([PTHREAD_CXX])
|
||||
|
||||
# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
|
||||
if test "x$ax_pthread_ok" = "xyes"; then
|
||||
ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1])
|
||||
:
|
||||
else
|
||||
ax_pthread_ok=no
|
||||
$2
|
||||
fi
|
||||
AC_LANG_POP
|
||||
])dnl AX_PTHREAD
|
||||
@@ -0,0 +1,34 @@
|
||||
dnl # SPDX-License-Identifier: CDDL-1.0
|
||||
dnl #
|
||||
dnl # 5.6 API change
|
||||
dnl # Before 5.6, fs_parse() took a struct fs_parameter_description
|
||||
dnl # which wraps the parameter specs with name and enum pointers. From 5.6,
|
||||
dnl # the description struct was removed and fs_parse() accepts the
|
||||
dnl # fs_parameter_spec directly.
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_FS_PARSE], [
|
||||
ZFS_LINUX_TEST_SRC([fs_parse], [
|
||||
#include <linux/fs_context.h>
|
||||
#include <linux/fs_parser.h>
|
||||
],[
|
||||
static const struct fs_parameter_spec specs[] = {
|
||||
{}
|
||||
};
|
||||
int test __attribute__ ((unused));
|
||||
struct fs_context *fc __attribute__ ((unused)) = NULL;
|
||||
struct fs_parameter param __attribute__ ((unused));
|
||||
struct fs_parse_result result __attribute__ ((unused));
|
||||
test = fs_parse(fc, specs, ¶m, &result);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_FS_PARSE], [
|
||||
AC_MSG_CHECKING([whether fs_parse() takes fs_parameter_spec directly])
|
||||
ZFS_LINUX_TEST_RESULT([fs_parse], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_FS_PARSE_TAKES_SPEC, 1,
|
||||
[fs_parse() takes fs_parameter_spec directly])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
@@ -78,6 +78,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
|
||||
ZFS_AC_KERNEL_SRC_SECURITY_INODE
|
||||
ZFS_AC_KERNEL_SRC_FS_CONTEXT
|
||||
ZFS_AC_KERNEL_SRC_FS_PARSE
|
||||
ZFS_AC_KERNEL_SRC_SB_DYING
|
||||
ZFS_AC_KERNEL_SRC_SET_NLINK
|
||||
ZFS_AC_KERNEL_SRC_SGET
|
||||
@@ -153,9 +154,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_MSG_CHECKING([for available kernel interfaces])
|
||||
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
|
||||
AC_MSG_RESULT([done])
|
||||
ZFS_LINUX_TEST_COMPILE_ALL([kabi], [for available kernel interfaces])
|
||||
])
|
||||
|
||||
dnl #
|
||||
@@ -203,6 +202,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||
ZFS_AC_KERNEL_TRUNCATE_SETSIZE
|
||||
ZFS_AC_KERNEL_SECURITY_INODE
|
||||
ZFS_AC_KERNEL_FS_CONTEXT
|
||||
ZFS_AC_KERNEL_FS_PARSE
|
||||
ZFS_AC_KERNEL_SB_DYING
|
||||
ZFS_AC_KERNEL_SET_NLINK
|
||||
ZFS_AC_KERNEL_SGET
|
||||
@@ -753,6 +753,108 @@ AC_DEFUN([ZFS_LINUX_TEST_MODPOST], [
|
||||
], [], [yes])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # Progress output for ZFS_LINUX_TEST_COMPILE_ALL
|
||||
dnl #
|
||||
dnl # From clean, we currently have ~250 kernel tests to compile. This can
|
||||
dnl # take anywhere from a few seconds to a few minutes while we wait for
|
||||
dnl # the module build invocation to complete (see ZFS_LINUX_COMPILE).
|
||||
dnl #
|
||||
dnl # To show some progress in the main set of tests, we start a background
|
||||
dnl # job to monitor the build progress and update the output.
|
||||
dnl #
|
||||
AC_DEFUN([_ZFS_LINUX_TEST_COMPILE_PROGRESS_START], [
|
||||
dnl # normal "checking for..." output
|
||||
AC_MSG_CHECKING([$2])
|
||||
|
||||
dnl # don't start the background job if configure was called with
|
||||
dnl # --silent or --quiet, or if configure's output stream is not
|
||||
dnl # attached to a terminal
|
||||
AS_IF([test "x$silent" != "xyes" -a -t AS_MESSAGE_FD], [
|
||||
dnl # save "checking" message for cleanup later
|
||||
_zfs_linux_test_progress_text="$2"
|
||||
|
||||
dnl # new shell job in background
|
||||
(
|
||||
dnl # ZFS_LINUX_CONFTEST_MAKEFILE adds one line per
|
||||
dnl # test to the top Makefile, so the line count
|
||||
dnl # is our target
|
||||
total=$(wc -l < $1/Makefile)
|
||||
count=0
|
||||
|
||||
dnl # eject if our parent process has gone away. this
|
||||
dnl # is protection against the parent being killed.
|
||||
dnl # (we can't use trap because autoconf generates
|
||||
dnl # that and doesn't provide an easy way to hook it).
|
||||
while kill -0 $$ 2>/dev/null ; do
|
||||
|
||||
dnl # ZFS_LINUX_TEST_COMPILE_ALL has a short
|
||||
dnl # second stage for modpost, where build.log
|
||||
dnl # recreated. we make some effort to both
|
||||
dnl # detect that and handle it, mostly by
|
||||
dnl # making sure the counter never goes
|
||||
dnl # backwards.
|
||||
if test "$count" -lt "$total" ; then
|
||||
dnl # if build.log went away, then
|
||||
dnl # we never got to do a last count,
|
||||
dnl # so we can assume they're all
|
||||
dnl # finished and just bump the count
|
||||
dnl # to the total
|
||||
if ! test -f $1/build.log ; then
|
||||
count=$total
|
||||
else
|
||||
dnl # look for compilation lines
|
||||
dnl # (CC) for .o files that
|
||||
dnl # are in a dir (so not
|
||||
dnl # whole-of-build artifacts)
|
||||
dnl # and only have a a single
|
||||
dnl # period (so not .mod.o
|
||||
dnl # link artifacts)
|
||||
count_n=$(awk '/CC/ && /\/[[^\.]]+\.o$/ { c++ } END { print c }' $1/build.log 2>/dev/null)
|
||||
if test "x$count_n" != "x" ; then
|
||||
dnl # empty output
|
||||
dnl # means awk failed,
|
||||
dnl # likely build.log
|
||||
dnl # went away. use
|
||||
dnl # the current count
|
||||
count=$count_n
|
||||
fi
|
||||
fi
|
||||
|
||||
dnl # re-output the entire message with
|
||||
dnl # the new counts
|
||||
printf '\rchecking %s... %d/%d' "$2" "$count" "$total" >&6
|
||||
fi
|
||||
|
||||
dnl # yield before loop
|
||||
sleep 0.5
|
||||
done
|
||||
) &
|
||||
|
||||
dnl # save the pid so we can kill it later
|
||||
_zfs_linux_test_progress_pid=$!
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([_ZFS_LINUX_TEST_COMPILE_PROGRESS_DONE], [
|
||||
dnl # only do cleanup if we actually started the job
|
||||
AS_IF([test "x$_zfs_linux_test_progress_pid" != "x"], [
|
||||
dnl # kill it; no-op if it already died
|
||||
kill $_zfs_linux_test_progress_pid 2>/dev/null
|
||||
dnl # wait for it to really go away and clean it up
|
||||
wait $_zfs_linux_test_progress_pid 2>/dev/null
|
||||
dnl # reprint the original checking line. the control code
|
||||
dnl # is ANSI "erase entire line"
|
||||
printf '\r\033\1332Kchecking %s... ' "$_zfs_linux_test_progress_text" >&AS_MESSAGE_FD
|
||||
dnl # cleanup for next run
|
||||
_zfs_linux_test_progress_pid=
|
||||
_zfs_linux_test_progress_text=
|
||||
])
|
||||
|
||||
dnl # normal final output for screen and config.log
|
||||
AC_MSG_RESULT([$1])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # Perform the compilation of the test cases in two phases.
|
||||
dnl #
|
||||
@@ -771,6 +873,10 @@ dnl # The maximum allowed parallelism can be controlled by setting the
|
||||
dnl # TEST_JOBS environment variable. Otherwise, it default to $(nproc).
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [
|
||||
AS_IF([test "x$2" != "x"], [
|
||||
_ZFS_LINUX_TEST_COMPILE_PROGRESS_START([build], [$2])
|
||||
])
|
||||
|
||||
dnl # Phase 1 - Compilation only, final linking is skipped.
|
||||
ZFS_LINUX_TEST_COMPILE([$1], [build])
|
||||
|
||||
@@ -818,6 +924,10 @@ AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [
|
||||
])
|
||||
done
|
||||
])
|
||||
|
||||
AS_IF([test "x$2" != "x"], [
|
||||
_ZFS_LINUX_TEST_COMPILE_PROGRESS_DONE([done])
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
|
||||
@@ -39,6 +39,18 @@ dnl # (If INVARIANTS is detected, we need to force DEBUG, or strange panics
|
||||
dnl # can ensue.)
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_DEBUG], [
|
||||
dnl #
|
||||
dnl # In the Linux kernel copy-builtin build, assertion/debug support
|
||||
dnl # is selected by CONFIG_ZFS_DEBUG (Kconfig).
|
||||
dnl #
|
||||
AH_BOTTOM([
|
||||
#ifdef CONFIG_ZFS
|
||||
#undef ZFS_DEBUG
|
||||
#ifdef CONFIG_ZFS_DEBUG
|
||||
#define ZFS_DEBUG 1
|
||||
#endif
|
||||
#endif])
|
||||
|
||||
AC_MSG_CHECKING([whether assertion support will be enabled])
|
||||
AC_ARG_ENABLE([debug],
|
||||
[AS_HELP_STRING([--enable-debug],
|
||||
|
||||
@@ -54,6 +54,7 @@ AC_PROG_LN_S
|
||||
PKG_PROG_PKG_CONFIG
|
||||
AM_PROG_AS
|
||||
AM_PROG_CC_C_O
|
||||
AX_PTHREAD
|
||||
AX_CODE_COVERAGE
|
||||
_AM_PROG_TAR(pax)
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@ usr/bin/zarcsummary.py
|
||||
usr/share/zfs/zfs-helpers.sh
|
||||
etc/default/zfs
|
||||
etc/init.d
|
||||
etc/sudoers.d
|
||||
etc/zfs/vdev_id.conf.alias.example
|
||||
etc/zfs/vdev_id.conf.multipath.example
|
||||
etc/zfs/vdev_id.conf.sas_direct.example
|
||||
|
||||
@@ -840,27 +840,41 @@ zfs_key_config_modify_session_counter(pam_handle_t *pamh,
|
||||
errno);
|
||||
return (-1);
|
||||
}
|
||||
if (chown(runtime_path, 0, 0) != 0) {
|
||||
pam_syslog(pamh, LOG_ERR, "Can't chown runtime path: %d",
|
||||
errno);
|
||||
const int runtime_fd = open(runtime_path,
|
||||
O_RDONLY | O_CLOEXEC | O_NOFOLLOW | O_DIRECTORY);
|
||||
if (runtime_fd < 0) {
|
||||
pam_syslog(pamh, LOG_ERR, "Can't open runtime path: %d", errno);
|
||||
return (-1);
|
||||
}
|
||||
if (chmod(runtime_path, S_IRWXU) != 0) {
|
||||
if (fchown(runtime_fd, 0, 0) != 0) {
|
||||
pam_syslog(pamh, LOG_ERR, "Can't chown runtime path: %d",
|
||||
errno);
|
||||
close(runtime_fd);
|
||||
return (-1);
|
||||
}
|
||||
if (fchmod(runtime_fd, S_IRWXU) != 0) {
|
||||
pam_syslog(pamh, LOG_ERR, "Can't chmod runtime path: %d",
|
||||
errno);
|
||||
close(runtime_fd);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
char *counter_path;
|
||||
if (asprintf(&counter_path, "%s/%u", runtime_path, config->uid) == -1)
|
||||
if (asprintf(&counter_path, "%u", config->uid) == -1) {
|
||||
close(runtime_fd);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
const int fd = open(counter_path,
|
||||
const int fd = openat(runtime_fd, counter_path,
|
||||
O_RDWR | O_CLOEXEC | O_CREAT | O_NOFOLLOW,
|
||||
S_IRUSR | S_IWUSR);
|
||||
int ret = errno;
|
||||
|
||||
free(counter_path);
|
||||
close(runtime_fd);
|
||||
|
||||
if (fd < 0) {
|
||||
pam_syslog(pamh, LOG_ERR, "Can't open counter file: %d", errno);
|
||||
pam_syslog(pamh, LOG_ERR, "Can't open counter file: %d", ret);
|
||||
return (-1);
|
||||
}
|
||||
if (flock(fd, LOCK_EX) != 0) {
|
||||
@@ -871,7 +885,6 @@ zfs_key_config_modify_session_counter(pam_handle_t *pamh,
|
||||
char counter[20];
|
||||
char *pos = counter;
|
||||
int remaining = sizeof (counter) - 1;
|
||||
int ret;
|
||||
counter[sizeof (counter) - 1] = 0;
|
||||
while (remaining > 0 && (ret = read(fd, pos, remaining)) > 0) {
|
||||
remaining -= ret;
|
||||
|
||||
@@ -43,6 +43,17 @@ config ZFS
|
||||
|
||||
To compile this file system support as a module, choose M here.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config ZFS_DEBUG
|
||||
bool "ZFS debugging"
|
||||
depends on ZFS
|
||||
help
|
||||
Enable ZFS debugging. This turns on all ASSERT() assertions,
|
||||
enables additional debug-only code paths, and promotes
|
||||
compiler warnings to errors. This should only be enabled for
|
||||
development or troubleshooting.
|
||||
|
||||
If unsure, say N.
|
||||
EOF
|
||||
|
||||
|
||||
@@ -1,10 +1,4 @@
|
||||
# SPDX-License-Identifier: CDDL-1.0
|
||||
sudoersddir = $(sysconfdir)/sudoers.d
|
||||
sudoersd_DATA = \
|
||||
%D%/sudoers.d/zfs
|
||||
|
||||
dist_noinst_DATA += $(sudoersd_DATA)
|
||||
|
||||
|
||||
sysconf_zfsdir = $(sysconfdir)/zfs
|
||||
|
||||
@@ -88,8 +82,6 @@ systemdgenerator_PROGRAMS = \
|
||||
%C%_systemd_system_generators_zfs_mount_generator_LDADD = \
|
||||
libzfs.la
|
||||
|
||||
%C%_systemd_system_generators_zfs_mount_generator_LDFLAGS = -pthread
|
||||
|
||||
CPPCHECKTARGETS += $(systemdgenerator_PROGRAMS)
|
||||
endif
|
||||
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
##
|
||||
## Allow any user to run `zpool iostat/status -c smart` in order
|
||||
## to read basic SMART health statistics for a pool.
|
||||
##
|
||||
## CAUTION: Any syntax error introduced here will break sudo.
|
||||
## Editing with 'visudo' is recommended: visudo -f /etc/sudoers.d/zfs
|
||||
##
|
||||
|
||||
# ALL ALL = (root) NOPASSWD: /usr/sbin/smartctl -a /dev/[hsv]d[a-z0-9]*
|
||||
@@ -29,6 +29,5 @@
|
||||
#define _SYS_ARC_OS_H
|
||||
|
||||
int param_set_arc_free_target(SYSCTL_HANDLER_ARGS);
|
||||
int param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
#include <linux/sched.h>
|
||||
|
||||
typedef enum {
|
||||
RW_DRIVER = 2,
|
||||
RW_DEFAULT = 4,
|
||||
RW_NOLOCKDEP = 5
|
||||
} krw_type_t;
|
||||
@@ -75,20 +74,35 @@ spl_rw_set_type(krwlock_t *rwp, krw_type_t type)
|
||||
{
|
||||
rwp->rw_type = type;
|
||||
}
|
||||
|
||||
static inline void
|
||||
spl_rw_lockdep_off(void)
|
||||
{
|
||||
lockdep_off();
|
||||
}
|
||||
|
||||
static inline void
|
||||
spl_rw_lockdep_on(void)
|
||||
{
|
||||
lockdep_on();
|
||||
}
|
||||
|
||||
static inline void
|
||||
spl_rw_lockdep_off_maybe(krwlock_t *rwp) \
|
||||
{ \
|
||||
if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
|
||||
lockdep_off(); \
|
||||
spl_rw_lockdep_off(); \
|
||||
}
|
||||
static inline void
|
||||
spl_rw_lockdep_on_maybe(krwlock_t *rwp) \
|
||||
{ \
|
||||
if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
|
||||
lockdep_on(); \
|
||||
spl_rw_lockdep_on(); \
|
||||
}
|
||||
#else /* CONFIG_LOCKDEP */
|
||||
#define spl_rw_set_type(rwp, type)
|
||||
#define spl_rw_lockdep_off()
|
||||
#define spl_rw_lockdep_on()
|
||||
#define spl_rw_lockdep_off_maybe(rwp)
|
||||
#define spl_rw_lockdep_on_maybe(rwp)
|
||||
#endif /* CONFIG_LOCKDEP */
|
||||
@@ -117,6 +131,56 @@ RW_READ_HELD(krwlock_t *rwp)
|
||||
* will be correctly located in the users code which is important
|
||||
* for the built in kernel lock analysis tools
|
||||
*/
|
||||
#define spl_rw_tryenter_impl(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
int _rc_ = 0; \
|
||||
\
|
||||
switch (rw) { \
|
||||
case RW_READER: \
|
||||
_rc_ = down_read_trylock(SEM(rwp)); \
|
||||
break; \
|
||||
case RW_WRITER: \
|
||||
if ((_rc_ = down_write_trylock(SEM(rwp)))) \
|
||||
spl_rw_set_owner(rwp); \
|
||||
break; \
|
||||
default: \
|
||||
VERIFY(0); \
|
||||
} \
|
||||
_rc_; \
|
||||
})
|
||||
|
||||
#define spl_rw_enter_impl(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
switch (rw) { \
|
||||
case RW_READER: \
|
||||
down_read(SEM(rwp)); \
|
||||
break; \
|
||||
case RW_WRITER: \
|
||||
down_write(SEM(rwp)); \
|
||||
spl_rw_set_owner(rwp); \
|
||||
break; \
|
||||
default: \
|
||||
VERIFY(0); \
|
||||
} \
|
||||
})
|
||||
|
||||
#define spl_rw_exit_impl(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
if (RW_WRITE_HELD(rwp)) { \
|
||||
spl_rw_clear_owner(rwp); \
|
||||
up_write(SEM(rwp)); \
|
||||
} else { \
|
||||
ASSERT(RW_READ_HELD(rwp)); \
|
||||
up_read(SEM(rwp)); \
|
||||
} \
|
||||
})
|
||||
|
||||
#define spl_rw_downgrade_impl(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_clear_owner(rwp); \
|
||||
downgrade_write(SEM(rwp)); \
|
||||
})
|
||||
|
||||
#define rw_init(rwp, name, type, arg) /* CSTYLED */ \
|
||||
({ \
|
||||
static struct lock_class_key __key; \
|
||||
@@ -140,60 +204,60 @@ RW_READ_HELD(krwlock_t *rwp)
|
||||
|
||||
#define rw_tryenter(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
int _rc_ = 0; \
|
||||
\
|
||||
spl_rw_lockdep_off_maybe(rwp); \
|
||||
switch (rw) { \
|
||||
case RW_READER: \
|
||||
_rc_ = down_read_trylock(SEM(rwp)); \
|
||||
break; \
|
||||
case RW_WRITER: \
|
||||
if ((_rc_ = down_write_trylock(SEM(rwp)))) \
|
||||
spl_rw_set_owner(rwp); \
|
||||
break; \
|
||||
default: \
|
||||
VERIFY(0); \
|
||||
} \
|
||||
int _rc_ = spl_rw_tryenter_impl(rwp, rw); \
|
||||
spl_rw_lockdep_on_maybe(rwp); \
|
||||
_rc_; \
|
||||
})
|
||||
|
||||
#define rw_tryenter_nolockdep(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off(); \
|
||||
int _rc_ = spl_rw_tryenter_impl(rwp, rw); \
|
||||
spl_rw_lockdep_on(); \
|
||||
_rc_; \
|
||||
})
|
||||
|
||||
#define rw_enter(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off_maybe(rwp); \
|
||||
switch (rw) { \
|
||||
case RW_READER: \
|
||||
down_read(SEM(rwp)); \
|
||||
break; \
|
||||
case RW_WRITER: \
|
||||
down_write(SEM(rwp)); \
|
||||
spl_rw_set_owner(rwp); \
|
||||
break; \
|
||||
default: \
|
||||
VERIFY(0); \
|
||||
} \
|
||||
spl_rw_enter_impl(rwp, rw); \
|
||||
spl_rw_lockdep_on_maybe(rwp); \
|
||||
})
|
||||
|
||||
#define rw_enter_nolockdep(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off(); \
|
||||
spl_rw_enter_impl(rwp, rw); \
|
||||
spl_rw_lockdep_on(); \
|
||||
})
|
||||
|
||||
#define rw_exit(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off_maybe(rwp); \
|
||||
if (RW_WRITE_HELD(rwp)) { \
|
||||
spl_rw_clear_owner(rwp); \
|
||||
up_write(SEM(rwp)); \
|
||||
} else { \
|
||||
ASSERT(RW_READ_HELD(rwp)); \
|
||||
up_read(SEM(rwp)); \
|
||||
} \
|
||||
spl_rw_exit_impl(rwp); \
|
||||
spl_rw_lockdep_on_maybe(rwp); \
|
||||
})
|
||||
|
||||
#define rw_exit_nolockdep(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off(); \
|
||||
spl_rw_exit_impl(rwp); \
|
||||
spl_rw_lockdep_on(); \
|
||||
})
|
||||
|
||||
#define rw_downgrade(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off_maybe(rwp); \
|
||||
spl_rw_clear_owner(rwp); \
|
||||
downgrade_write(SEM(rwp)); \
|
||||
spl_rw_downgrade_impl(rwp); \
|
||||
spl_rw_lockdep_on_maybe(rwp); \
|
||||
})
|
||||
|
||||
#define rw_downgrade_nolockdep(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off(); \
|
||||
spl_rw_downgrade_impl(rwp); \
|
||||
spl_rw_lockdep_on(); \
|
||||
})
|
||||
|
||||
#endif /* _SPL_RWLOCK_H */
|
||||
|
||||
@@ -95,8 +95,7 @@ typedef void arc_prune_func_t(uint64_t bytes, void *priv);
|
||||
extern uint_t zfs_arc_average_blocksize;
|
||||
extern int l2arc_exclude_special;
|
||||
|
||||
/* generic arc_done_func_t's which you can use */
|
||||
arc_read_done_func_t arc_bcopy_func;
|
||||
/* generic arc_done_func_t which can be used */
|
||||
arc_read_done_func_t arc_getbuf_func;
|
||||
|
||||
/* generic arc_prune_func_t wrapper for callbacks */
|
||||
|
||||
@@ -832,6 +832,8 @@ typedef struct arc_stats {
|
||||
* due to ARC_FLAG_UNCACHED being set.
|
||||
*/
|
||||
kstat_named_t arcstat_uncached_evictable_metadata;
|
||||
/* Number of L2ARC devices currently attached across all pools. */
|
||||
kstat_named_t arcstat_l2_ndev;
|
||||
kstat_named_t arcstat_l2_hits;
|
||||
kstat_named_t arcstat_l2_misses;
|
||||
/*
|
||||
@@ -1103,7 +1105,7 @@ extern arc_sums_t arc_sums;
|
||||
extern hrtime_t arc_growtime;
|
||||
extern boolean_t arc_warm;
|
||||
extern uint_t arc_grow_retry;
|
||||
extern uint_t arc_no_grow_shift;
|
||||
extern uint_t zfs_arc_no_grow_shift;
|
||||
extern uint_t arc_shrink_shift;
|
||||
extern kmutex_t arc_prune_mtx;
|
||||
extern list_t arc_prune_list;
|
||||
@@ -1134,6 +1136,7 @@ extern int param_set_arc_int(ZFS_MODULE_PARAM_ARGS);
|
||||
extern int param_set_arc_min(ZFS_MODULE_PARAM_ARGS);
|
||||
extern int param_set_arc_max(ZFS_MODULE_PARAM_ARGS);
|
||||
extern int param_set_l2arc_dwpd_limit(ZFS_MODULE_PARAM_ARGS);
|
||||
extern int param_set_arc_no_grow_shift(ZFS_MODULE_PARAM_ARGS);
|
||||
extern void l2arc_dwpd_bump_reset(void);
|
||||
|
||||
/* used in zdb.c */
|
||||
|
||||
@@ -363,6 +363,7 @@ typedef enum {
|
||||
/* Small enough to not hog a whole line of printout in zpool(8). */
|
||||
#define ZPROP_MAX_COMMENT 32
|
||||
#define ZPROP_BOOLEAN_NA 2
|
||||
#define ZPROP_BOOLEAN_INHERIT 2
|
||||
|
||||
#define ZPROP_VALUE "value"
|
||||
#define ZPROP_SOURCE "source"
|
||||
@@ -476,6 +477,8 @@ typedef enum {
|
||||
VDEV_PROP_SCHEDULER,
|
||||
VDEV_PROP_FDOMAIN,
|
||||
VDEV_PROP_FGROUP,
|
||||
VDEV_PROP_ALLOC_BIAS,
|
||||
VDEV_PROP_ROTATIONAL,
|
||||
VDEV_NUM_PROPS
|
||||
} vdev_prop_t;
|
||||
|
||||
@@ -491,6 +494,16 @@ typedef enum {
|
||||
VDEV_SCHEDULER_OFF
|
||||
} vdev_scheduler_type_t;
|
||||
|
||||
/*
|
||||
* Allocation bias for top-level vdevs (alloc_bias property).
|
||||
*/
|
||||
typedef enum vdev_alloc_bias {
|
||||
VDEV_BIAS_NONE,
|
||||
VDEV_BIAS_LOG, /* dedicated to ZIL data (SLOG) */
|
||||
VDEV_BIAS_SPECIAL, /* dedicated to ddt, metadata, and small blks */
|
||||
VDEV_BIAS_DEDUP /* dedicated to dedup metadata */
|
||||
} vdev_alloc_bias_t;
|
||||
|
||||
/*
|
||||
* Dataset property functions shared between libzfs and kernel.
|
||||
*/
|
||||
@@ -919,6 +932,7 @@ typedef struct zpool_load_policy {
|
||||
#define ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH "vdev_enc_sysfs_path"
|
||||
|
||||
#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk"
|
||||
#define ZPOOL_CONFIG_VDEV_ROTATIONAL "rotational"
|
||||
#define ZPOOL_CONFIG_ERRCOUNT "error_count"
|
||||
#define ZPOOL_CONFIG_NOT_PRESENT "not_present"
|
||||
#define ZPOOL_CONFIG_SPARES "spares"
|
||||
|
||||
@@ -330,7 +330,7 @@ struct metaslab_group {
|
||||
*
|
||||
* As the space map grows (as a result of the appends) it will
|
||||
* eventually become space-inefficient. When the metaslab's in-core
|
||||
* free tree is zfs_condense_pct/100 times the size of the minimal
|
||||
* free tree is zfs_metaslab_condense_pct/100 times the size of the minimal
|
||||
* on-disk representation, we rewrite it in its minimized form. If a
|
||||
* metaslab needs to condense then we must set the ms_condensing flag to
|
||||
* ensure that allocations are not performed on the metaslab that is
|
||||
|
||||
@@ -155,14 +155,6 @@ struct vdev_queue {
|
||||
kmutex_t vq_lock;
|
||||
};
|
||||
|
||||
typedef enum vdev_alloc_bias {
|
||||
VDEV_BIAS_NONE,
|
||||
VDEV_BIAS_LOG, /* dedicated to ZIL data (SLOG) */
|
||||
VDEV_BIAS_SPECIAL, /* dedicated to ddt, metadata, and small blks */
|
||||
VDEV_BIAS_DEDUP /* dedicated to dedup metadata */
|
||||
} vdev_alloc_bias_t;
|
||||
|
||||
|
||||
/*
|
||||
* On-disk indirect vdev state.
|
||||
*
|
||||
@@ -600,6 +592,7 @@ extern boolean_t vdev_log_state_valid(vdev_t *vd);
|
||||
extern int vdev_load(vdev_t *vd);
|
||||
extern int vdev_dtl_load(vdev_t *vd);
|
||||
extern void vdev_sync(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_sync_dispatch(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);
|
||||
extern void vdev_dirty_leaves(vdev_t *vd, int flags, uint64_t txg);
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||
* Copyright 2017 Nexenta Systems, Inc.
|
||||
* Copyright (c) 2026, TrueNAS.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_H
|
||||
@@ -121,13 +122,13 @@ typedef enum zap_flags {
|
||||
/*
|
||||
* Create a new zapobj with no attributes and return its object number.
|
||||
*/
|
||||
uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
|
||||
uint64_t zap_create(objset_t *os, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
uint64_t zap_create_dnsize(objset_t *ds, dmu_object_type_t ot,
|
||||
uint64_t zap_create_dnsize(objset_t *os, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
|
||||
uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
|
||||
uint64_t zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
uint64_t zap_create_norm_dnsize(objset_t *ds, int normflags,
|
||||
uint64_t zap_create_norm_dnsize(objset_t *os, int normflags,
|
||||
dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,
|
||||
int dnodesize, dmu_tx_t *tx);
|
||||
uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
|
||||
@@ -137,11 +138,22 @@ uint64_t zap_create_flags_dnsize(objset_t *os, int normflags,
|
||||
zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift,
|
||||
int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
|
||||
int dnodesize, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Create a zap object and return a pointer to the newly allocated dnode via
|
||||
* the allocated_dnode argument. The returned dnode will be held and the
|
||||
* caller is responsible for releasing the hold by calling dnode_rele().
|
||||
*/
|
||||
uint64_t zap_create_hold(objset_t *os, int normflags, zap_flags_t flags,
|
||||
dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
|
||||
dmu_object_type_t bonustype, int bonuslen, int dnodesize,
|
||||
dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Create a new zapobj with no attributes, and add an entry to an existing
|
||||
* zapobj with the given name as key and the object number of the new zapobj as
|
||||
* the value. Returns the object number of the new zapobj.
|
||||
*/
|
||||
uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,
|
||||
uint64_t parent_obj, const char *name, dmu_tx_t *tx);
|
||||
uint64_t zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot,
|
||||
@@ -157,20 +169,21 @@ void mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags,
|
||||
* Create a new zapobj with no attributes from the given (unallocated)
|
||||
* object number.
|
||||
*/
|
||||
int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
|
||||
int zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
int zap_create_claim_dnsize(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
|
||||
int zap_create_claim_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
|
||||
int zap_create_claim_norm(objset_t *ds, uint64_t obj,
|
||||
int zap_create_claim_norm(objset_t *os, uint64_t obj,
|
||||
int normflags, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
int zap_create_claim_norm_dnsize(objset_t *ds, uint64_t obj,
|
||||
int zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj,
|
||||
int normflags, dmu_object_type_t ot,
|
||||
dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* The zapobj passed in must be a valid ZAP object for all of the
|
||||
* following routines.
|
||||
* All operations on a zapobj take either the the objset/objectid pair
|
||||
* that "names" the object, or an existing dnode_t for the object. The
|
||||
* zapobj passed in must be a valid ZAP object.
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -178,7 +191,7 @@ int zap_create_claim_norm_dnsize(objset_t *ds, uint64_t obj,
|
||||
*
|
||||
* Frees the object number using dmu_object_free.
|
||||
*/
|
||||
int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
|
||||
int zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Manipulate attributes.
|
||||
@@ -207,21 +220,32 @@ int zap_destroy(objset_t *ds, uint64_t zapobj, dmu_tx_t *tx);
|
||||
* fit will be transferred to 'buf'. If the entire attribute was not
|
||||
* transferred, the call will return EOVERFLOW.
|
||||
*/
|
||||
int zap_lookup(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
int zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_lookup_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
|
||||
/*
|
||||
* If rn_len is nonzero, realname will be set to the name of the found
|
||||
* entry (which may be different from the requested name if matchtype is
|
||||
* not MT_EXACT).
|
||||
* not zero).
|
||||
*
|
||||
* If normalization_conflictp is not NULL, it will be set if there is
|
||||
* another name with the same case/unicode normalized form.
|
||||
*/
|
||||
int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
int zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *normalization_conflictp);
|
||||
int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *ncp);
|
||||
|
||||
/*
|
||||
* The _uint64 variants take an array of uint64_t as the key. The ZAP must
|
||||
* be created with ZAP_FLAG_UINT64_KEY.
|
||||
*/
|
||||
int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
@@ -229,20 +253,31 @@ int zap_lookup_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int zap_lookup_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
uint64_t *actual_num_integers);
|
||||
int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
|
||||
|
||||
/*
|
||||
* Lookup the attribute with the given name. Returns ENOENT if it does not
|
||||
* exist, 0 if it does. This is like zap_lookup(), but may be more efficient.
|
||||
*/
|
||||
int zap_contains(objset_t *os, uint64_t zapobj, const char *name);
|
||||
int zap_contains_by_dnode(dnode_t *dn, const char *name);
|
||||
|
||||
/*
|
||||
* Prefetch the blocks within the ZAP where the given key is stored. The
|
||||
* prefetch IO will occure in the background.
|
||||
*/
|
||||
int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);
|
||||
int zap_prefetch_object(objset_t *os, uint64_t zapobj);
|
||||
|
||||
/* Prefetch by uint64_t[] key. */
|
||||
int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints);
|
||||
int zap_prefetch_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints);
|
||||
|
||||
int zap_lookup_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *ncp);
|
||||
/*
|
||||
* Prefetch the entire ZAP object. Unlike zap_prefetch(), will block until
|
||||
* the entire object is loaded into the ARC.
|
||||
*/
|
||||
int zap_prefetch_object(objset_t *os, uint64_t zapobj);
|
||||
|
||||
/*
|
||||
* Create an attribute with the given name and value.
|
||||
@@ -250,13 +285,15 @@ int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
|
||||
* If an attribute with the given name already exists, the call will
|
||||
* fail and return EEXIST.
|
||||
*/
|
||||
int zap_add(objset_t *ds, uint64_t zapobj, const char *key,
|
||||
int zap_add(objset_t *os, uint64_t zapobj, const char *key,
|
||||
int integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
int zap_add_by_dnode(dnode_t *dn, const char *key,
|
||||
int integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key,
|
||||
|
||||
/* Add by uint64_t[] key. */
|
||||
int zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints, int integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
@@ -271,8 +308,12 @@ int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
* existing attribute's integer size, in which case the attribute's
|
||||
* integer size will be updated to the new value.
|
||||
*/
|
||||
int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
int zap_update(objset_t *os, uint64_t zapobj, const char *name,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
int zap_update_by_dnode(dnode_t *dn, const char *name, int integer_size,
|
||||
uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
|
||||
/* Update by uint64_t[] key. */
|
||||
int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
@@ -287,8 +328,12 @@ int zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
* If the requested attribute does not exist, the call will fail and
|
||||
* return ENOENT.
|
||||
*/
|
||||
int zap_length(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
int zap_length(objset_t *os, uint64_t zapobj, const char *name,
|
||||
uint64_t *integer_size, uint64_t *num_integers);
|
||||
int zap_length_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t *integer_size, uint64_t *num_integers);
|
||||
|
||||
/* Attribute length by uint64_t[] key. */
|
||||
int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints, uint64_t *integer_size, uint64_t *num_integers);
|
||||
int zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
@@ -300,10 +345,12 @@ int zap_length_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
* If the specified attribute does not exist, the call will fail and
|
||||
* return ENOENT.
|
||||
*/
|
||||
int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
|
||||
int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
||||
matchtype_t mt, dmu_tx_t *tx);
|
||||
int zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx);
|
||||
int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx);
|
||||
int zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
|
||||
matchtype_t mt, dmu_tx_t *tx);
|
||||
|
||||
/* Remove by uint64_t[] key. */
|
||||
int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints, dmu_tx_t *tx);
|
||||
int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
@@ -313,9 +360,19 @@ int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
* Returns (in *count) the number of attributes in the specified zap
|
||||
* object.
|
||||
*/
|
||||
int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
|
||||
int zap_count(objset_t *os, uint64_t zapobj, uint64_t *count);
|
||||
int zap_count_by_dnode(dnode_t *dn, uint64_t *count);
|
||||
|
||||
/*
|
||||
* Lookup an existing uint64 value, add the delta value to it, and store
|
||||
* update it with the new value. If the new value is 0, removes the key
|
||||
* entirely.
|
||||
*/
|
||||
int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
|
||||
dmu_tx_t *tx);
|
||||
int zap_increment_by_dnode(dnode_t *dn, const char *name, int64_t delta,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Returns (in name) the name of the entry whose (value & mask)
|
||||
* (za_first_integer) is value, or ENOENT if not found. The string
|
||||
@@ -324,21 +381,8 @@ int zap_count_by_dnode(dnode_t *dn, uint64_t *count);
|
||||
*/
|
||||
int zap_value_search(objset_t *os, uint64_t zapobj,
|
||||
uint64_t value, uint64_t mask, char *name, uint64_t namelen);
|
||||
|
||||
/*
|
||||
* Transfer all the entries from fromobj into intoobj. Only works on
|
||||
* int_size=8 num_integers=1 values. Fails if there are any duplicated
|
||||
* entries.
|
||||
*/
|
||||
int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);
|
||||
|
||||
/* Same as zap_join, but set the values to 'value'. */
|
||||
int zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
|
||||
uint64_t value, dmu_tx_t *tx);
|
||||
|
||||
/* Same as zap_join, but add together any duplicated entries. */
|
||||
int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
|
||||
dmu_tx_t *tx);
|
||||
int zap_value_search_by_dnode(dnode_t *dn,
|
||||
uint64_t value, uint64_t mask, char *name, uint64_t namelen);
|
||||
|
||||
/*
|
||||
* Manipulate entries where the name + value are the "same" (the name is
|
||||
@@ -347,8 +391,10 @@ int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
|
||||
int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
|
||||
int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
|
||||
int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);
|
||||
int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
int zap_add_int_by_dnode(dnode_t *dn, uint64_t value, dmu_tx_t *tx);
|
||||
int zap_remove_int_by_dnode(dnode_t *dn, uint64_t value, dmu_tx_t *tx);
|
||||
int zap_lookup_int_by_dnode(dnode_t *dn, uint64_t value);
|
||||
|
||||
/* Here the key is an int and the value is a different int. */
|
||||
int zap_add_int_key(objset_t *os, uint64_t obj,
|
||||
@@ -358,22 +404,19 @@ int zap_update_int_key(objset_t *os, uint64_t obj,
|
||||
int zap_lookup_int_key(objset_t *os, uint64_t obj,
|
||||
uint64_t key, uint64_t *valuep);
|
||||
|
||||
int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
|
||||
dmu_tx_t *tx);
|
||||
int zap_add_int_key_by_dnode(dnode_t *dn,
|
||||
uint64_t key, uint64_t value, dmu_tx_t *tx);
|
||||
int zap_update_int_key_by_dnode(dnode_t *dn,
|
||||
uint64_t key, uint64_t value, dmu_tx_t *tx);
|
||||
int zap_lookup_int_key_by_dnode(dnode_t *dn,
|
||||
uint64_t key, uint64_t *valuep);
|
||||
|
||||
struct zap;
|
||||
struct zap_leaf;
|
||||
typedef struct zap_cursor {
|
||||
/* This structure is opaque! */
|
||||
objset_t *zc_objset;
|
||||
struct zap *zc_zap;
|
||||
struct zap_leaf *zc_leaf;
|
||||
uint64_t zc_zapobj;
|
||||
uint64_t zc_serialized;
|
||||
uint64_t zc_hash;
|
||||
uint32_t zc_cd;
|
||||
boolean_t zc_prefetch;
|
||||
} zap_cursor_t;
|
||||
/*
|
||||
* The interface for listing all the attributes of a zapobj can be
|
||||
* thought of as cursor moving down a list of the attributes one by
|
||||
* one. The cookie returned by the zap_cursor_serialize routine is
|
||||
* persistent across system calls (and across reboot, even).
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
int za_integer_length;
|
||||
@@ -389,9 +432,6 @@ typedef struct {
|
||||
char za_name[];
|
||||
} zap_attribute_t;
|
||||
|
||||
void zap_init(void);
|
||||
void zap_fini(void);
|
||||
|
||||
/*
|
||||
* Alloc and free zap_attribute_t.
|
||||
*/
|
||||
@@ -399,22 +439,52 @@ zap_attribute_t *zap_attribute_alloc(void);
|
||||
zap_attribute_t *zap_attribute_long_alloc(void);
|
||||
void zap_attribute_free(zap_attribute_t *attrp);
|
||||
|
||||
/*
|
||||
* The interface for listing all the attributes of a zapobj can be
|
||||
* thought of as cursor moving down a list of the attributes one by
|
||||
* one. The cookie returned by the zap_cursor_serialize routine is
|
||||
* persistent across system calls (and across reboot, even).
|
||||
*/
|
||||
struct zap;
|
||||
struct zap_leaf;
|
||||
|
||||
typedef struct zap_cursor {
|
||||
/* This structure is opaque! */
|
||||
struct zap *zc_zap;
|
||||
struct zap_leaf *zc_leaf;
|
||||
uint64_t zc_hash;
|
||||
uint32_t zc_cd;
|
||||
boolean_t zc_prefetch;
|
||||
/*
|
||||
* Legacy fields to main source compat with Lustre, which accesses
|
||||
* them directly. Not to be used in new code!
|
||||
*/
|
||||
objset_t *zc_objset;
|
||||
uint64_t zc_zapobj;
|
||||
} zap_cursor_t;
|
||||
|
||||
/*
|
||||
* Initialize a zap cursor, pointing to the "first" attribute of the
|
||||
* zapobj. You must _fini the cursor when you are done with it.
|
||||
* Initialize a zap cursor, pointing to the "first" attribute of the zapobj.
|
||||
* The entire zapobj will be prefetched. You must call zap_cursor_fini the
|
||||
* cursor when you are done with it.
|
||||
*/
|
||||
void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj);
|
||||
void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os,
|
||||
uint64_t zapobj);
|
||||
int zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj);
|
||||
int zap_cursor_init_by_dnode(zap_cursor_t *zc, dnode_t *dn);
|
||||
void zap_cursor_fini(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Initialize a cursor at the beginning, but request that we not prefetch
|
||||
* the entire ZAP object.
|
||||
*/
|
||||
int zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os,
|
||||
uint64_t zapobj);
|
||||
|
||||
/*
|
||||
* Initialize a zap cursor pointing to the position recorded by
|
||||
* zap_cursor_serialize (in the "serialized" argument). You can also
|
||||
* use a "serialized" argument of 0 to start at the beginning of the
|
||||
* zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to
|
||||
* zap_cursor_init(...).)
|
||||
*/
|
||||
int zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os,
|
||||
uint64_t zapobj, uint64_t serialized);
|
||||
int zap_cursor_init_serialized_by_dnode(zap_cursor_t *zc, dnode_t *dn,
|
||||
uint64_t serialized);
|
||||
|
||||
/*
|
||||
* Get the attribute currently pointed to by the cursor. Returns
|
||||
* ENOENT if at the end of the attributes.
|
||||
@@ -435,17 +505,6 @@ void zap_cursor_advance(zap_cursor_t *zc);
|
||||
*/
|
||||
uint64_t zap_cursor_serialize(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Initialize a zap cursor pointing to the position recorded by
|
||||
* zap_cursor_serialize (in the "serialized" argument). You can also
|
||||
* use a "serialized" argument of 0 to start at the beginning of the
|
||||
* zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to
|
||||
* zap_cursor_init(...).)
|
||||
*/
|
||||
void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *ds,
|
||||
uint64_t zapobj, uint64_t serialized);
|
||||
|
||||
|
||||
#define ZAP_HISTOGRAM_SIZE 10
|
||||
|
||||
typedef struct zap_stats {
|
||||
@@ -535,7 +594,12 @@ typedef struct zap_stats {
|
||||
* statistics. This interface shouldn't be relied on unless you really
|
||||
* know what you're doing.
|
||||
*/
|
||||
int zap_get_stats(objset_t *ds, uint64_t zapobj, zap_stats_t *zs);
|
||||
int zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs);
|
||||
int zap_get_stats_by_dnode(dnode_t *dn, zap_stats_t *zs);
|
||||
|
||||
/* ZAP subsystem setup/teardown */
|
||||
void zap_init(void);
|
||||
void zap_fini(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
||||
* Copyright 2017 Nexenta Systems, Inc.
|
||||
* Copyright (c) 2024, Klara, Inc.
|
||||
* Copyright (c) 2026, TrueNAS.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_IMPL_H
|
||||
@@ -33,7 +34,6 @@
|
||||
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -62,8 +62,9 @@ typedef struct mzap_phys {
|
||||
uint64_t mz_salt;
|
||||
uint64_t mz_normflags;
|
||||
uint64_t mz_pad[5];
|
||||
mzap_ent_phys_t mz_chunk[1];
|
||||
|
||||
/* actually variable size depending on block size */
|
||||
mzap_ent_phys_t mz_chunk[];
|
||||
} mzap_phys_t;
|
||||
|
||||
typedef struct mzap_ent {
|
||||
@@ -170,6 +171,9 @@ typedef struct zap {
|
||||
} zap_u;
|
||||
} zap_t;
|
||||
|
||||
#define zap_f zap_u.zap_fat
|
||||
#define zap_m zap_u.zap_micro
|
||||
|
||||
static inline zap_phys_t *
|
||||
zap_f_phys(zap_t *zap)
|
||||
{
|
||||
@@ -182,6 +186,10 @@ zap_m_phys(zap_t *zap)
|
||||
return (zap->zap_dbuf->db_data);
|
||||
}
|
||||
|
||||
/*
|
||||
* zap_name_t carries the original key and whatever we've derived from it
|
||||
* (normalised form, hash, etc) as we work through completing the operation.
|
||||
*/
|
||||
typedef struct zap_name {
|
||||
zap_t *zn_zap;
|
||||
int zn_key_intlen;
|
||||
@@ -196,25 +204,94 @@ typedef struct zap_name {
|
||||
char zn_normbuf[];
|
||||
} zap_name_t;
|
||||
|
||||
#define zap_f zap_u.zap_fat
|
||||
#define zap_m zap_u.zap_micro
|
||||
/*
|
||||
* Allocate a zap_name_t. The longname flag ensures there is enough room to
|
||||
* hold a long filename when the 'longname' pool feature is active.
|
||||
*/
|
||||
zap_name_t *zap_name_alloc(zap_t *zap, boolean_t longname);
|
||||
|
||||
/*
|
||||
* Allocate a zap_name_t for the given key. zap_name_init_str() will be
|
||||
* called to normalise the key and initialise the struct.
|
||||
*/
|
||||
zap_name_t *zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt);
|
||||
|
||||
/*
|
||||
* Allocate a zap_name_t for a uint64 array key.
|
||||
*/
|
||||
zap_name_t *zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints);
|
||||
|
||||
/*
|
||||
* Free a zap_name_t.
|
||||
*/
|
||||
void zap_name_free(zap_name_t *zn);
|
||||
|
||||
/*
|
||||
* Initialise an existing zap_name_t with the normalised form of the key,
|
||||
* computed according to the given matchtype.
|
||||
*/
|
||||
int zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt);
|
||||
|
||||
/*
|
||||
* Compare 'matchname' with the name represented by the zap_name_t, applying
|
||||
* the same normalisation method first. Returns true if the normalised forms
|
||||
* match, false otherwise.
|
||||
*/
|
||||
boolean_t zap_match(zap_name_t *zn, const char *matchname);
|
||||
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
|
||||
/*
|
||||
* Compute and return the 64-bit hash for the name, according to the name
|
||||
* type and hash flags.
|
||||
*/
|
||||
uint64_t zap_hash(zap_name_t *zn);
|
||||
|
||||
/*
|
||||
* Return a zap_t for the given on-disk object, locked and ready for use.
|
||||
* The zap_t will be allocated and loaded from disk if its not already loaded.
|
||||
*/
|
||||
int zap_lock(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
|
||||
zap_t **zapp);
|
||||
void zap_unlockdir(zap_t *zap, const void *tag);
|
||||
int zap_lock_by_dnode(dnode_t *dn, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
|
||||
zap_t **zapp);
|
||||
|
||||
/* Unlock and release a zap_t. */
|
||||
void zap_unlock(zap_t *zap, const void *tag);
|
||||
|
||||
/*
|
||||
* Try to upgrade a zap lock from READER to WRITER. If the upgrade is not
|
||||
* possible without blocking, returns 0. If the upgrade happened, returns 1.
|
||||
*/
|
||||
int zap_lock_try_upgrade(zap_t *zap, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Upgrade a zap lock from READER to WRITER. If it can't be upgraded
|
||||
* immediately it will block.
|
||||
*/
|
||||
void zap_lock_upgrade(zap_t *zap, dmu_tx_t *tx);
|
||||
|
||||
/* zap_t release function for when associated dbuf is evicted. */
|
||||
void zap_evict_sync(void *dbu);
|
||||
zap_name_t *zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt);
|
||||
void zap_name_free(zap_name_t *zn);
|
||||
|
||||
/* Misc internal state & config. */
|
||||
int zap_hashbits(zap_t *zap);
|
||||
uint32_t zap_maxcd(zap_t *zap);
|
||||
uint64_t zap_getflags(zap_t *zap);
|
||||
|
||||
/* Microzap implementation. */
|
||||
zap_t *mzap_open(dmu_buf_t *db);
|
||||
int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);
|
||||
mzap_ent_t *mze_find(zap_name_t *zn, zfs_btree_index_t *idx);
|
||||
boolean_t mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash);
|
||||
void mze_destroy(zap_t *zap);
|
||||
boolean_t mzap_normalization_conflict(zap_t *zap, zap_name_t *zn,
|
||||
mzap_ent_t *mze, zfs_btree_index_t *idx);
|
||||
void mzap_addent(zap_name_t *zn, uint64_t value);
|
||||
void mzap_byteswap(mzap_phys_t *buf, size_t size);
|
||||
uint64_t zap_get_micro_max_size(spa_t *spa);
|
||||
|
||||
#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
|
||||
|
||||
/* Fatzap implementation. */
|
||||
void fzap_byteswap(void *buf, size_t size);
|
||||
int fzap_count(zap_t *zap, uint64_t *count);
|
||||
int fzap_lookup(zap_name_t *zn,
|
||||
@@ -223,20 +300,17 @@ int fzap_lookup(zap_name_t *zn,
|
||||
uint64_t *actual_num_integers);
|
||||
void fzap_prefetch(zap_name_t *zn);
|
||||
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, const void *tag, dmu_tx_t *tx);
|
||||
int fzap_update(zap_name_t *zn,
|
||||
int integer_size, uint64_t num_integers, const void *val,
|
||||
const void *tag, dmu_tx_t *tx);
|
||||
const void *val, dmu_tx_t *tx);
|
||||
int fzap_update(zap_name_t *zn, int integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
int fzap_length(zap_name_t *zn,
|
||||
uint64_t *integer_size, uint64_t *num_integers);
|
||||
int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
|
||||
int fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za);
|
||||
void fzap_get_stats(zap_t *zap, zap_stats_t *zs);
|
||||
void zap_put_leaf(struct zap_leaf *l);
|
||||
|
||||
int fzap_add_cd(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, uint32_t cd, const void *tag, dmu_tx_t *tx);
|
||||
int fzap_add_cd(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, uint32_t cd, dmu_tx_t *tx);
|
||||
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -139,12 +139,12 @@ enum zio_stage {
|
||||
|
||||
ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W---- */
|
||||
|
||||
ZIO_STAGE_BRT_FREE = 1 << 9, /* --F--- */
|
||||
ZIO_STAGE_DDT_READ_START = 1 << 9, /* R----- */
|
||||
ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R----- */
|
||||
ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W---- */
|
||||
ZIO_STAGE_DDT_FREE = 1 << 12, /* --F--- */
|
||||
|
||||
ZIO_STAGE_DDT_READ_START = 1 << 10, /* R----- */
|
||||
ZIO_STAGE_DDT_READ_DONE = 1 << 11, /* R----- */
|
||||
ZIO_STAGE_DDT_WRITE = 1 << 12, /* -W---- */
|
||||
ZIO_STAGE_DDT_FREE = 1 << 13, /* --F--- */
|
||||
ZIO_STAGE_BRT_FREE = 1 << 13, /* --F--- */
|
||||
|
||||
ZIO_STAGE_GANG_ASSEMBLE = 1 << 14, /* RWFC-- */
|
||||
ZIO_STAGE_GANG_ISSUE = 1 << 15, /* RWFC-- */
|
||||
@@ -259,8 +259,7 @@ enum zio_stage {
|
||||
ZIO_STAGE_DVA_FREE)
|
||||
|
||||
#define ZIO_DDT_FREE_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_STAGE_FREE_BP_INIT | \
|
||||
(ZIO_FREE_PIPELINE | \
|
||||
ZIO_STAGE_ISSUE_ASYNC | \
|
||||
ZIO_STAGE_DDT_FREE)
|
||||
|
||||
|
||||
@@ -63,7 +63,3 @@ libspl_la_LIBADD = \
|
||||
libspl_la_LIBADD += $(LIBATOMIC_LIBS) $(LIBCLOCK_GETTIME)
|
||||
|
||||
libspl_assert_la_LIBADD = $(BACKTRACE_LIBS) $(LIBUNWIND_LIBS)
|
||||
|
||||
if BUILD_FREEBSD
|
||||
libspl_assert_la_LIBADD += -lpthread
|
||||
endif
|
||||
|
||||
@@ -76,7 +76,7 @@ libzfs_la_LIBADD = \
|
||||
|
||||
libzfs_la_LIBADD += -lrt -lm $(LIBCRYPTO_LIBS) $(ZLIB_LIBS) $(LIBFETCH_LIBS) $(LTLIBINTL)
|
||||
|
||||
libzfs_la_LDFLAGS = -pthread
|
||||
libzfs_la_LDFLAGS = -version-info 7:0:0
|
||||
|
||||
if !ASAN_ENABLED
|
||||
libzfs_la_LDFLAGS += -Wl,-z,defs
|
||||
@@ -86,8 +86,6 @@ if BUILD_FREEBSD
|
||||
libzfs_la_LIBADD += -lutil -lgeom
|
||||
endif
|
||||
|
||||
libzfs_la_LDFLAGS += -version-info 7:0:0
|
||||
|
||||
pkgconfig_DATA += %D%/libzfs.pc
|
||||
|
||||
dist_noinst_DATA += %D%/libzfs.abi %D%/libzfs.suppr
|
||||
|
||||
@@ -2553,7 +2553,7 @@
|
||||
<typedef-decl name='__uint32_t' type-id='f0981eeb' id='62f1140c'/>
|
||||
<typedef-decl name='__uint64_t' type-id='7359adad' id='8910171f'/>
|
||||
<typedef-decl name='size_t' type-id='7359adad' id='b59d7dce'/>
|
||||
<class-decl name='libzfs_handle' size-in-bits='18432' is-struct='yes' visibility='default' id='c8a9d9d8'>
|
||||
<class-decl name='libzfs_handle' size-in-bits='18496' is-struct='yes' visibility='default' id='c8a9d9d8'>
|
||||
<data-member access='public' layout-offset-in-bits='0'>
|
||||
<var-decl name='libzfs_error' type-id='95e97e5e' visibility='default'/>
|
||||
</data-member>
|
||||
@@ -2605,6 +2605,9 @@
|
||||
<data-member access='public' layout-offset-in-bits='18112'>
|
||||
<var-decl name='zh_mnttab' type-id='f20fbd51' visibility='default'/>
|
||||
</data-member>
|
||||
<data-member access='public' layout-offset-in-bits='18432'>
|
||||
<var-decl name='zh_mnttab_cache_enabled' type-id='c19b74c3' visibility='default'/>
|
||||
</data-member>
|
||||
</class-decl>
|
||||
<class-decl name='zfs_handle' size-in-bits='4928' is-struct='yes' visibility='default' id='f6ee4445'>
|
||||
<data-member access='public' layout-offset-in-bits='0'>
|
||||
@@ -6412,7 +6415,9 @@
|
||||
<enumerator name='VDEV_PROP_SCHEDULER' value='55'/>
|
||||
<enumerator name='VDEV_PROP_FDOMAIN' value='56'/>
|
||||
<enumerator name='VDEV_PROP_FGROUP' value='57'/>
|
||||
<enumerator name='VDEV_NUM_PROPS' value='58'/>
|
||||
<enumerator name='VDEV_PROP_ALLOC_BIAS' value='58'/>
|
||||
<enumerator name='VDEV_PROP_ROTATIONAL' value='59'/>
|
||||
<enumerator name='VDEV_NUM_PROPS' value='60'/>
|
||||
</enum-decl>
|
||||
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
|
||||
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
|
||||
|
||||
@@ -177,6 +177,7 @@ changelist_postfix(prop_changelist_t *clp)
|
||||
char shareopts[ZFS_MAXPROPLEN];
|
||||
boolean_t commit_smb_shares = B_FALSE;
|
||||
boolean_t commit_nfs_shares = B_FALSE;
|
||||
int rc = 0;
|
||||
|
||||
/*
|
||||
* If CL_GATHER_DONT_UNMOUNT is set, it means we don't want to (un)mount
|
||||
@@ -266,7 +267,7 @@ changelist_postfix(prop_changelist_t *clp)
|
||||
const enum sa_protocol nfs[] =
|
||||
{SA_PROTOCOL_NFS, SA_NO_PROTOCOL};
|
||||
if (sharenfs && mounted) {
|
||||
zfs_share(cn->cn_handle, nfs);
|
||||
rc = zfs_share(cn->cn_handle, nfs);
|
||||
commit_nfs_shares = B_TRUE;
|
||||
} else if (cn->cn_shared || clp->cl_waslegacy) {
|
||||
zfs_unshare(cn->cn_handle, NULL, nfs);
|
||||
@@ -275,7 +276,7 @@ changelist_postfix(prop_changelist_t *clp)
|
||||
const enum sa_protocol smb[] =
|
||||
{SA_PROTOCOL_SMB, SA_NO_PROTOCOL};
|
||||
if (sharesmb && mounted) {
|
||||
zfs_share(cn->cn_handle, smb);
|
||||
rc = zfs_share(cn->cn_handle, smb);
|
||||
commit_smb_shares = B_TRUE;
|
||||
} else if (cn->cn_shared || clp->cl_waslegacy) {
|
||||
zfs_unshare(cn->cn_handle, NULL, smb);
|
||||
@@ -291,7 +292,15 @@ changelist_postfix(prop_changelist_t *clp)
|
||||
*p++ = SA_NO_PROTOCOL;
|
||||
zfs_commit_shares(proto);
|
||||
|
||||
return (0);
|
||||
/*
|
||||
* It's possible rc != 0 since we set a mountpoint or option while
|
||||
* SMB/NFS was not running. This is fine, and we should not return
|
||||
* an error up the stack.
|
||||
*
|
||||
* At this point we only want to report mountpoint/shareops parsing
|
||||
* errors.
|
||||
*/
|
||||
return (rc == SA_SYNTAX_ERR ? rc : 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -2031,12 +2031,21 @@ zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Export the pool from the system. Setting force overrides the
|
||||
* active-shared-spare check. The caller must unmount all datasets
|
||||
* in the pool first.
|
||||
*/
|
||||
int
|
||||
zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
|
||||
{
|
||||
return (zpool_export_common(zhp, force, B_FALSE, log_str));
|
||||
}
|
||||
|
||||
/*
|
||||
* Force-export the pool: bypasses the active-shared-spare check, and skips
|
||||
* writing the exported-state labels and updating the cachefile.
|
||||
*/
|
||||
int
|
||||
zpool_export_force(zpool_handle_t *zhp, const char *log_str)
|
||||
{
|
||||
@@ -2574,6 +2583,10 @@ xlate_init_err(int err)
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start (or cancel/suspend/uninit) the initialize operation on every
|
||||
* leaf vdev of the pool.
|
||||
*/
|
||||
int
|
||||
zpool_initialize_one(zpool_handle_t *zhp, void *data)
|
||||
{
|
||||
@@ -2685,6 +2698,10 @@ zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
|
||||
return (err == 0 ? 0 : -1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start (or cancel/suspend/uninit) the initialize operation on the listed
|
||||
* vdevs. Returns once the new state is committed.
|
||||
*/
|
||||
int
|
||||
zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
|
||||
nvlist_t *vds)
|
||||
@@ -2692,6 +2709,9 @@ zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
|
||||
return (zpool_initialize_impl(zhp, cmd_type, vds, B_FALSE));
|
||||
}
|
||||
|
||||
/*
|
||||
* Like zpool_initialize(), but waits for each listed vdev to finish.
|
||||
*/
|
||||
int
|
||||
zpool_initialize_wait(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
|
||||
nvlist_t *vds)
|
||||
@@ -2746,6 +2766,10 @@ zpool_collect_leaves(zpool_handle_t *zhp, nvlist_t *nvroot, nvlist_t *res)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start (or cancel/suspend) the trim operation on every leaf vdev of
|
||||
* the pool.
|
||||
*/
|
||||
int
|
||||
zpool_trim_one(zpool_handle_t *zhp, void *data)
|
||||
{
|
||||
@@ -3393,6 +3417,11 @@ __zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up a vdev in the pool by path, name, or guid. Returns the
|
||||
* vdev's configuration nvlist, or NULL on no match. Also, fills
|
||||
* in avail_spare, l2cache, and log if they are non-NULL.
|
||||
*/
|
||||
nvlist_t *
|
||||
zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
|
||||
boolean_t *l2cache, boolean_t *log)
|
||||
@@ -4637,7 +4666,10 @@ zpool_reopen_one(zpool_handle_t *zhp, void *data)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* call into libzfs_core to execute the sync IOCTL per pool */
|
||||
/*
|
||||
* Block until every buffered write for the pool has reached the
|
||||
* underlying disks.
|
||||
*/
|
||||
int
|
||||
zpool_sync_one(zpool_handle_t *zhp, void *data)
|
||||
{
|
||||
@@ -4913,6 +4945,10 @@ zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Format the program name and its command-line arguments into a single
|
||||
* space-separated string.
|
||||
*/
|
||||
void
|
||||
zfs_save_arguments(int argc, char **argv, char *string, int len)
|
||||
{
|
||||
@@ -4925,6 +4961,10 @@ zfs_save_arguments(int argc, char **argv, char *string, int len)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Append a message to the pool's command-history log, retrievable via
|
||||
* "zpool history".
|
||||
*/
|
||||
int
|
||||
zpool_log_history(libzfs_handle_t *hdl, const char *message)
|
||||
{
|
||||
@@ -5220,6 +5260,11 @@ zpool_obj_to_path_impl(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
|
||||
free(mntpnt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Translate a (dataset object id, file object id) pair into a readable
|
||||
* path. If the dataset is mounted the result is an absolute filesystem
|
||||
* path; otherwise it is `dataset:path`.
|
||||
*/
|
||||
void
|
||||
zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
|
||||
char *pathname, size_t len)
|
||||
@@ -5227,6 +5272,10 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
|
||||
zpool_obj_to_path_impl(zhp, dsobj, obj, pathname, len, B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Translate a (dataset object id, file object id) pair into a
|
||||
* `dataset:path` string.
|
||||
*/
|
||||
void
|
||||
zpool_obj_to_path_ds(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
|
||||
char *pathname, size_t len)
|
||||
@@ -5281,6 +5330,10 @@ zpool_wait_status(zpool_handle_t *zhp, zpool_wait_activity_t activity,
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Store a boot configuration map in the bootenv area of each leaf
|
||||
* vdev's labels.
|
||||
*/
|
||||
int
|
||||
zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap)
|
||||
{
|
||||
@@ -5294,6 +5347,9 @@ zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the boot configuration map from each leaf vdev's bootenv area.
|
||||
*/
|
||||
int
|
||||
zpool_get_bootenv(zpool_handle_t *zhp, nvlist_t **nvlp)
|
||||
{
|
||||
@@ -5741,6 +5797,9 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,
|
||||
return (ENOENT);
|
||||
if (prop == VDEV_PROP_SIT_OUT)
|
||||
return (ENOENT);
|
||||
/* Only valid for top-level vdevs */
|
||||
if (prop == VDEV_PROP_ALLOC_BIAS)
|
||||
return (ENOENT);
|
||||
}
|
||||
if (vdev_prop_index_to_string(prop, intval,
|
||||
(const char **)&strval) != 0)
|
||||
|
||||
@@ -64,6 +64,10 @@ sa_enable_share(const char *zfsname, const char *mountpoint,
|
||||
{
|
||||
VALIDATE_PROTOCOL(protocol, SA_INVALID_PROTOCOL);
|
||||
|
||||
int error = sa_validate_shareopts(shareopts, protocol);
|
||||
if (error != SA_OK)
|
||||
return (error);
|
||||
|
||||
const struct sa_share_impl args =
|
||||
init_share(zfsname, mountpoint, shareopts);
|
||||
return (fstypes[protocol]->enable_share(&args));
|
||||
@@ -111,6 +115,10 @@ sa_validate_shareopts(const char *options, enum sa_protocol protocol)
|
||||
{
|
||||
VALIDATE_PROTOCOL(protocol, SA_INVALID_PROTOCOL);
|
||||
|
||||
/* error out on invalid characters */
|
||||
if (strpbrk(options, "\a\b\f\n\r") != NULL)
|
||||
return (SA_SYNTAX_ERR);
|
||||
|
||||
return (fstypes[protocol]->validate_shareopts(options));
|
||||
}
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ libzfs_core_la_LIBADD = \
|
||||
|
||||
libzfs_core_la_LIBADD += $(LTLIBINTL)
|
||||
|
||||
libzfs_core_la_LDFLAGS = -pthread
|
||||
libzfs_core_la_LDFLAGS = -version-info 3:0:0
|
||||
|
||||
if !ASAN_ENABLED
|
||||
libzfs_core_la_LDFLAGS += -Wl,-z,defs
|
||||
@@ -43,8 +43,6 @@ if BUILD_FREEBSD
|
||||
libzfs_core_la_LIBADD += -lutil -lgeom
|
||||
endif
|
||||
|
||||
libzfs_core_la_LDFLAGS += -version-info 3:0:0
|
||||
|
||||
pkgconfig_DATA += %D%/libzfs_core.pc
|
||||
|
||||
dist_noinst_DATA += %D%/libzfs_core.abi %D%/libzfs_core.suppr
|
||||
|
||||
@@ -166,6 +166,8 @@ nodist_libzpool_la_SOURCES = \
|
||||
module/zfs/vdev_root.c \
|
||||
module/zfs/vdev_trim.c \
|
||||
module/zfs/zap.c \
|
||||
module/zfs/zap_fat.c \
|
||||
module/zfs/zap_impl.c \
|
||||
module/zfs/zap_leaf.c \
|
||||
module/zfs/zap_micro.c \
|
||||
module/zfs/zcp.c \
|
||||
@@ -212,7 +214,7 @@ libzpool_la_LIBADD = \
|
||||
|
||||
libzpool_la_LIBADD += $(LIBCLOCK_GETTIME) $(ZLIB_LIBS) -lm
|
||||
|
||||
libzpool_la_LDFLAGS = -pthread
|
||||
libzpool_la_LDFLAGS = -version-info 7:0:0
|
||||
|
||||
if !ASAN_ENABLED
|
||||
libzpool_la_LDFLAGS += -Wl,-z,defs
|
||||
@@ -222,8 +224,6 @@ if BUILD_FREEBSD
|
||||
libzpool_la_LIBADD += -lgeom
|
||||
endif
|
||||
|
||||
libzpool_la_LDFLAGS += -version-info 7:0:0
|
||||
|
||||
if TARGET_CPU_POWERPC
|
||||
module/zfs/libzpool_la-vdev_raidz_math_powerpc_altivec.$(OBJEXT) : CFLAGS += -maltivec
|
||||
module/zfs/libzpool_la-vdev_raidz_math_powerpc_altivec.l$(OBJEXT): CFLAGS += -maltivec
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
|
||||
.\" Copyright (c) 2019 Datto Inc.
|
||||
.\" Copyright (c) 2023, 2024, 2025, Klara, Inc.
|
||||
.\" Copyright (c) 2026, Mateusz Piotrowski <0mp@FreeBSD.org>
|
||||
.\"
|
||||
.\" The contents of this file are subject to the terms of the Common Development
|
||||
.\" and Distribution License (the "License"). You may not use this file except
|
||||
@@ -18,7 +19,7 @@
|
||||
.\" own identifying information:
|
||||
.\" Portions Copyright [yyyy] [name of copyright owner]
|
||||
.\"
|
||||
.Dd September 15, 2025
|
||||
.Dd May 8, 2026
|
||||
.Dt ZFS 4
|
||||
.Os
|
||||
.
|
||||
@@ -389,6 +390,18 @@ this is
|
||||
or
|
||||
.Em 2*1024 Pq with Sy ashift Ns = Ns Sy 12 .
|
||||
.
|
||||
.It Sy metaslab_df_alloc_threshold Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq u64
|
||||
Minimum size which forces the dynamic allocator to change its allocation
|
||||
strategy.
|
||||
Once the space map cannot satisfy an allocation of this size, it switches to a
|
||||
more aggressive strategy (searching by size rather than offset).
|
||||
.
|
||||
.It Sy metaslab_df_free_pct Ns = Ns Sy 4 Ns % Pq uint
|
||||
The minimum free space, in percent, which must be available in a space map to
|
||||
continue allocations in a first-fit fashion.
|
||||
Once free space drops below this level, allocations switch to a best-fit
|
||||
strategy.
|
||||
.
|
||||
.It Sy metaslab_df_use_largest_segment Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
||||
If not searching forward (due to
|
||||
.Sy metaslab_df_max_search , metaslab_df_free_pct ,
|
||||
@@ -445,6 +458,32 @@ This improves performance, especially when there are many metaslabs per vdev
|
||||
and the allocation can't actually be satisfied
|
||||
(so we would otherwise iterate all metaslabs).
|
||||
.
|
||||
.It Sy zfs_metaslab_sm_blksz_no_log Ns = Ns Sy 16384 Ns B Po 16 KiB Pc Pq int
|
||||
Block size for the metaslab space maps in pools where the
|
||||
.Sy log_spacemap
|
||||
feature is disabled.
|
||||
Multiple metaslabs are modified per transaction group, so a smaller block size
|
||||
lets more, scattered I/O operations be issued.
|
||||
Must be a power of 2 greater than
|
||||
.Sy 4096 .
|
||||
This parameter can only be set at module load time.
|
||||
.
|
||||
.It Sy zfs_metaslab_sm_blksz_with_log Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq int
|
||||
Block size for the metaslab space maps in pools where the
|
||||
.Sy log_spacemap
|
||||
feature is enabled.
|
||||
Changes are batched in the per-pool log spacemap and flushed to each metaslab's
|
||||
space map only occasionally, so a larger block size is more efficient.
|
||||
Must be a power of 2 greater than
|
||||
.Sy 4096 .
|
||||
This parameter can only be set at module load time.
|
||||
.
|
||||
.It Sy zfs_metaslab_condense_pct Ns = Ns Sy 200 Ns % Pq uint
|
||||
Condense an on-disk space map when its size exceeds this percentage of
|
||||
the in-memory representation.
|
||||
The minimum is
|
||||
.Sy 100 .
|
||||
.
|
||||
.It Sy zfs_vdev_default_ms_count Ns = Ns Sy 200 Pq uint
|
||||
When a vdev is added, target this number of metaslabs per top-level vdev.
|
||||
.
|
||||
@@ -768,9 +807,15 @@ See also
|
||||
which serves a similar purpose but has a higher priority if nonzero.
|
||||
.
|
||||
.It Sy zfs_arc_dnode_reduce_percent Ns = Ns Sy 10 Ns % Pq u64
|
||||
Percentage of ARC dnodes to try to scan in response to demand for non-metadata
|
||||
when the number of bytes consumed by dnodes exceeds
|
||||
.Sy zfs_arc_dnode_limit .
|
||||
Percentage used to size dnode prune requests.
|
||||
The request size is the larger of two values:
|
||||
.Sy zfs_arc_dnode_reduce_percent
|
||||
applied to the dnode count above
|
||||
.Sy zfs_arc_dnode_limit ,
|
||||
or
|
||||
.Sy zfs_arc_dnode_reduce_percent
|
||||
applied to the total dnode count
|
||||
when non-evictable metadata exceeds 3/4 of the metadata target.
|
||||
.
|
||||
.It Sy zfs_arc_average_blocksize Ns = Ns Sy 8192 Ns B Po 8 KiB Pc Pq uint
|
||||
The ARC's buffer hash table is sized based on the assumption of an average
|
||||
@@ -911,6 +956,19 @@ but that was not proven to be useful.
|
||||
Number of missing top-level vdevs which will be allowed during
|
||||
pool import (only in read-only mode).
|
||||
.
|
||||
.It Sy zfs_max_missing_tvds_cachefile Ns = Ns Sy 2 Pq u64
|
||||
Number of missing top-level vdevs tolerated when importing a pool
|
||||
from a cachefile, before the trusted config is read from the MOS.
|
||||
A cachefile can fall out of sync with the on-disk config after a
|
||||
device removal that did not rewrite the cachefile, so the default
|
||||
of 2 still lets the import reach a copy of the MOS.
|
||||
.
|
||||
.It Sy zfs_max_missing_tvds_scan Ns = Ns Sy 0 Pq u64
|
||||
Number of missing top-level vdevs tolerated when importing a pool
|
||||
by scanning device paths, before the trusted config is read from
|
||||
the MOS.
|
||||
Defaults to 0 because a scan should detect every present device.
|
||||
.
|
||||
.It Sy zfs_max_nvlist_src_size Ns = Sy 0 Pq u64
|
||||
Maximum size in bytes allowed to be passed as
|
||||
.Sy zc_nvlist_src_size
|
||||
@@ -948,8 +1006,6 @@ equivalent to the greater of the number of online CPUs and
|
||||
If less than
|
||||
.Sy arc_c No >> Sy zfs_arc_no_grow_shift
|
||||
free memory is available, the ARC is not allowed to grow.
|
||||
This parameter is
|
||||
.Fx Ns -specific .
|
||||
.
|
||||
.It Sy zfs_arc_overflow_shift Ns = Ns Sy 8 Pq int
|
||||
The ARC size is considered to be overflowing if it exceeds the current
|
||||
|
||||
@@ -142,6 +142,8 @@ See
|
||||
.Xr zpool-attach 8 .
|
||||
.It Sy trim_support
|
||||
Indicates if a leaf device supports trim operations.
|
||||
.It Sy rotational
|
||||
Indicates whether the device backing this vdev uses rotating media.
|
||||
.El
|
||||
.Pp
|
||||
The following native properties can be used to change the behavior of a vdev.
|
||||
@@ -183,9 +185,12 @@ output.
|
||||
A text comment up to 8192 characters long
|
||||
.It Sy bootsize
|
||||
The amount of space to reserve for the EFI system partition
|
||||
.It Sy failfast
|
||||
.It Sy failfast Ns = Ns Sy inherit Ns | Ns Sy on Ns | Ns Sy off
|
||||
If this device should propagate BIO errors back to ZFS, used to disable
|
||||
failfast.
|
||||
.Sy inherit
|
||||
causes the vdev to adopt the behavior of its parent vdev,
|
||||
recursively up the tree.
|
||||
.It Sy sit_out
|
||||
Only valid for
|
||||
.Sy RAIDZ
|
||||
@@ -218,6 +223,21 @@ If this device should perform new allocations, used to disable a device
|
||||
when it is scheduled for later removal.
|
||||
See
|
||||
.Xr zpool-remove 8 .
|
||||
.It Sy alloc_bias Ns = Ns Sy none Ns | Ns Sy log Ns | Ns Sy special Ns | Ns Sy dedup
|
||||
Controls the allocation class for a top-level vdev.
|
||||
Changes take effect after an export and import of the pool.
|
||||
Changing to/from log is not implemented, since it may lead to data loss in
|
||||
case of the log device failure.
|
||||
Setting to
|
||||
.Sy special
|
||||
and
|
||||
.Sy dedup
|
||||
requires
|
||||
.Sy feature@allocation_classes
|
||||
to be enabled.
|
||||
At least one top-level vdev must remain in the normal
|
||||
.Pq Sy none
|
||||
class.
|
||||
.It Sy scheduler Ns = Ns Sy auto Ns | Ns Sy on Ns | Ns Sy off
|
||||
Controls how I/O requests are added to the vdev queue when reading or
|
||||
writing to this vdev.
|
||||
|
||||
@@ -284,10 +284,15 @@ Decode and display block from an embedded block pointer specified by the
|
||||
arguments.
|
||||
.It Fl f , -file-layout
|
||||
Display the file layout of an object for the disks of a raidz vdev.
|
||||
Numeric values in the disply are hexadecimal.
|
||||
With
|
||||
.Fl H ,
|
||||
the output is in scripted mode for easy parsing, with all values
|
||||
being presented as 512 byte blocks.
|
||||
being presented as 512 byte blocks in decimal; with
|
||||
.Fl v ,
|
||||
the block type (parity or data) is displayed; with
|
||||
.Fl vv ,
|
||||
the offset into the file for each block is also printed.
|
||||
Only a single top-level raidz vdev is supported.
|
||||
.It Fl h , -history
|
||||
Display pool history similar to
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
.\" Copyright 2017 Nexenta Systems, Inc.
|
||||
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
.\"
|
||||
.Dd November 8, 2023
|
||||
.Dd May 9, 2026
|
||||
.Dt ZPOOL-ATTACH 8
|
||||
.Os
|
||||
.
|
||||
@@ -132,6 +132,35 @@ Waits until
|
||||
has finished resilvering or expanding before returning.
|
||||
.El
|
||||
.
|
||||
.Sh EXAMPLES
|
||||
.\" Example 1 is example 5 from zpool.8.
|
||||
.\" Make sure to update them bidirectionally
|
||||
.Ss Example 1 : No Making a non-mirrored ZFS Storage Pool mirrored
|
||||
The following command converts an existing single device
|
||||
.Ar sda
|
||||
into a mirror by attaching a second device to it,
|
||||
.Ar sdb .
|
||||
.Dl # Nm zpool Cm attach Ar tank Pa sda sdb
|
||||
.
|
||||
.Ss Example 2 : No Expanding a RAID-Z vdev with an additional disk
|
||||
The following command adds
|
||||
.Ar sdg
|
||||
to the existing
|
||||
.Ar raidz2-0
|
||||
vdev in
|
||||
.Ar tank ,
|
||||
turning a 6-wide RAID-Z2 into a 7-wide RAID-Z2:
|
||||
.Dl # Nm zpool Cm attach Ar tank raidz2-0 Pa sdg
|
||||
Progress is reported by
|
||||
.Nm zpool Cm status .
|
||||
The operation requires the
|
||||
.Sy raidz_expansion
|
||||
pool feature, and
|
||||
.Ar sdg
|
||||
must be at least as large as the smallest existing disk in the vdev.
|
||||
Old blocks keep their original data-to-parity ratio; only blocks written
|
||||
after the expansion use the new ratio.
|
||||
.
|
||||
.Sh SEE ALSO
|
||||
.Xr zpool-add 8 ,
|
||||
.Xr zpool-detach 8 ,
|
||||
|
||||
@@ -458,12 +458,12 @@ ZIO_STAGE_CHECKSUM_GENERATE:0x00000080:-W----
|
||||
|
||||
ZIO_STAGE_NOP_WRITE:0x00000100:-W----
|
||||
|
||||
ZIO_STAGE_BRT_FREE:0x00000200:--F---
|
||||
ZIO_STAGE_DDT_READ_START:0x00000200:R-----
|
||||
ZIO_STAGE_DDT_READ_DONE:0x00000400:R-----
|
||||
ZIO_STAGE_DDT_WRITE:0x00000800:-W----
|
||||
ZIO_STAGE_DDT_FREE:0x00001000:--F---
|
||||
|
||||
ZIO_STAGE_DDT_READ_START:0x00000400:R-----
|
||||
ZIO_STAGE_DDT_READ_DONE:0x00000800:R-----
|
||||
ZIO_STAGE_DDT_WRITE:0x00001000:-W----
|
||||
ZIO_STAGE_DDT_FREE:0x00002000:--F---
|
||||
ZIO_STAGE_BRT_FREE:0x00002000:--F---
|
||||
|
||||
ZIO_STAGE_GANG_ASSEMBLE:0x00004000:RWFC--
|
||||
ZIO_STAGE_GANG_ISSUE:0x00008000:RWFC--
|
||||
|
||||
@@ -109,10 +109,7 @@ environment variable set.
|
||||
If a script requires the use of a privileged command, like
|
||||
.Xr smartctl 8 ,
|
||||
then it's recommended you allow the user access to it in
|
||||
.Pa /etc/sudoers
|
||||
or add the user to the
|
||||
.Pa /etc/sudoers.d/zfs
|
||||
file.
|
||||
.Pa /etc/sudoers .
|
||||
.Pp
|
||||
If
|
||||
.Fl c
|
||||
|
||||
@@ -245,6 +245,7 @@ Invalid command line options were specified.
|
||||
.
|
||||
.Sh EXAMPLES
|
||||
.\" Examples 1, 2, 3, 4, 12, 13 are shared with zpool-create.8.
|
||||
.\" Example 5 is shared with zpool-attach.8.
|
||||
.\" Examples 6, 14 are shared with zpool-add.8.
|
||||
.\" Examples 7, 16 are shared with zpool-list.8.
|
||||
.\" Examples 8 are shared with zpool-destroy.8.
|
||||
|
||||
@@ -4,9 +4,11 @@
|
||||
|
||||
ZFS_MODULE_CFLAGS += -std=gnu11 -Wno-declaration-after-statement
|
||||
ZFS_MODULE_CFLAGS += -Wmissing-prototypes
|
||||
ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@ @KERNEL_NO_FORMAT_ZERO_LENGTH@
|
||||
ZFS_MODULE_CFLAGS += @KERNEL_NO_FORMAT_ZERO_LENGTH@
|
||||
|
||||
ifneq ($(KBUILD_EXTMOD),)
|
||||
ZFS_MODULE_CFLAGS += @KERNEL_DEBUG_CFLAGS@
|
||||
ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
|
||||
zfs_include = @abs_top_srcdir@/include
|
||||
icp_include = @abs_srcdir@/icp/include
|
||||
zstd_include = @abs_srcdir@/zstd/include
|
||||
@@ -16,6 +18,12 @@ ZFS_MODULE_CFLAGS += -I@abs_top_builddir@/include
|
||||
src = @abs_srcdir@
|
||||
obj = @abs_builddir@
|
||||
else
|
||||
ifeq ($(CONFIG_ZFS_DEBUG),y)
|
||||
ZFS_MODULE_CFLAGS += -Werror
|
||||
ZFS_MODULE_CPPFLAGS += -DDEBUG -UNDEBUG -DZFS_DEBUG
|
||||
else
|
||||
ZFS_MODULE_CPPFLAGS += -UDEBUG -DNDEBUG
|
||||
endif
|
||||
zfs_include = $(srctree)/include/zfs
|
||||
icp_include = $(src)/icp/include
|
||||
zstd_include = $(src)/zstd/include
|
||||
@@ -28,7 +36,6 @@ ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/spl
|
||||
ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/zfs
|
||||
ZFS_MODULE_CFLAGS += -I$(zfs_include)
|
||||
ZFS_MODULE_CPPFLAGS += -D_KERNEL
|
||||
ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
|
||||
|
||||
# KASAN enables -Werror=frame-larger-than=1024, which
|
||||
# breaks oh so many parts of our build.
|
||||
@@ -408,6 +415,8 @@ ZFS_OBJS := \
|
||||
vdev_root.o \
|
||||
vdev_trim.o \
|
||||
zap.o \
|
||||
zap_fat.o \
|
||||
zap_impl.o \
|
||||
zap_leaf.o \
|
||||
zap_micro.o \
|
||||
zcp.o \
|
||||
|
||||
@@ -65,6 +65,12 @@ CFLAGS+= -DZFS_DEBUG -g
|
||||
CFLAGS += -DNDEBUG
|
||||
.endif
|
||||
|
||||
.for _SAN in KASAN KMSAN KUBSAN
|
||||
.if defined(WITH_${_SAN}) && ${WITH_${_SAN}} == "true"
|
||||
KERN_OPTS_EXTRA+= ${_SAN}
|
||||
.endif
|
||||
.endfor
|
||||
|
||||
.if defined(WITH_GCOV) && ${WITH_GCOV} == "true"
|
||||
CFLAGS+= -fprofile-arcs -ftest-coverage
|
||||
.endif
|
||||
@@ -345,6 +351,8 @@ SRCS+= abd.c \
|
||||
vdev_root.c \
|
||||
vdev_trim.c \
|
||||
zap.c \
|
||||
zap_fat.c \
|
||||
zap_impl.c \
|
||||
zap_leaf.c \
|
||||
zap_micro.c \
|
||||
zcp.c \
|
||||
@@ -475,8 +483,8 @@ CFLAGS.vdev_raidz_math_avx2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
|
||||
CFLAGS.vdev_raidz_math_avx512f.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
|
||||
CFLAGS.vdev_raidz_math_scalar.c= -Wno-cast-qual
|
||||
CFLAGS.vdev_raidz_math_sse2.c= -Wno-cast-qual -Wno-duplicate-decl-specifier
|
||||
CFLAGS.zap_impl.c= -Wno-cast-qual
|
||||
CFLAGS.zap_leaf.c= -Wno-cast-qual
|
||||
CFLAGS.zap_micro.c= -Wno-cast-qual
|
||||
CFLAGS.zcp.c= -Wno-cast-qual
|
||||
CFLAGS.zfs_fletcher.c= -Wno-cast-qual -Wno-pointer-arith
|
||||
CFLAGS.zfs_fletcher_avx512.c= -Wno-cast-qual -Wno-pointer-arith
|
||||
|
||||
@@ -135,7 +135,8 @@
|
||||
#define NVP_SIZE_CALC(name_len, data_len) \
|
||||
(NV_ALIGN((sizeof (nvpair_t)) + name_len) + NV_ALIGN(data_len))
|
||||
|
||||
static int i_get_value_size(data_type_t type, const void *data, uint_t nelem);
|
||||
static int i_get_value_size(data_type_t type, const void *data, uint_t nelem,
|
||||
size_t max_size);
|
||||
static int nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type,
|
||||
uint_t nelem, const void *data);
|
||||
|
||||
@@ -810,8 +811,10 @@ i_validate_nvpair(nvpair_t *nvp)
|
||||
* verify nvp_type, nvp_value_elem, and also possibly
|
||||
* verify string values and get the value size.
|
||||
*/
|
||||
size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp));
|
||||
size1 = nvp->nvp_size - NVP_VALOFF(nvp);
|
||||
size2 = i_get_value_size(type, NVP_VALUE(nvp), NVP_NELEM(nvp),
|
||||
size1);
|
||||
|
||||
if (size2 < 0 || size1 != NV_ALIGN(size2))
|
||||
return (EFAULT);
|
||||
|
||||
@@ -1002,12 +1005,21 @@ nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
|
||||
* DATA_TYPE_STRING and
|
||||
* DATA_TYPE_STRING_ARRAY
|
||||
* Is data == NULL then the size of the string(s) is excluded.
|
||||
*
|
||||
* If 'max_size' is non-zero, then don't look beyond 'max_size' number of
|
||||
* bytes when calculating a value size. Note that 'max_size' should include
|
||||
* the NULL terminator byte when calculating string size. If 'max_size' is 0,
|
||||
* it is ignored.
|
||||
*/
|
||||
static int
|
||||
i_get_value_size(data_type_t type, const void *data, uint_t nelem)
|
||||
i_get_value_size(data_type_t type, const void *data, uint_t nelem,
|
||||
size_t max_size)
|
||||
{
|
||||
uint64_t value_sz;
|
||||
|
||||
if (max_size == 0)
|
||||
max_size = INT32_MAX;
|
||||
|
||||
if (i_validate_type_nelem(type, nelem) != 0)
|
||||
return (-1);
|
||||
|
||||
@@ -1052,10 +1064,15 @@ i_get_value_size(data_type_t type, const void *data, uint_t nelem)
|
||||
break;
|
||||
#endif
|
||||
case DATA_TYPE_STRING:
|
||||
if (data == NULL)
|
||||
if (data == NULL) {
|
||||
value_sz = 0;
|
||||
else
|
||||
value_sz = strlen(data) + 1;
|
||||
} else {
|
||||
value_sz = strnlen(data, max_size);
|
||||
if (value_sz >= max_size) {
|
||||
return (-1); /* string not terminated */
|
||||
}
|
||||
value_sz += 1;
|
||||
}
|
||||
break;
|
||||
case DATA_TYPE_BOOLEAN_ARRAY:
|
||||
value_sz = (uint64_t)nelem * sizeof (boolean_t);
|
||||
@@ -1089,16 +1106,23 @@ i_get_value_size(data_type_t type, const void *data, uint_t nelem)
|
||||
break;
|
||||
case DATA_TYPE_STRING_ARRAY:
|
||||
value_sz = (uint64_t)nelem * sizeof (uint64_t);
|
||||
|
||||
if (data != NULL) {
|
||||
char *const *strs = data;
|
||||
uint_t i;
|
||||
size_t newsize;
|
||||
|
||||
/* no alignment requirement for strings */
|
||||
for (i = 0; i < nelem; i++) {
|
||||
if (strs[i] == NULL)
|
||||
return (-1);
|
||||
value_sz += strlen(strs[i]) + 1;
|
||||
|
||||
newsize = strnlen(strs[i], max_size);
|
||||
|
||||
if (newsize == max_size)
|
||||
return (-1); /* not terminated */
|
||||
|
||||
value_sz += newsize + 1; /* +1 for NULL */
|
||||
max_size -= newsize + 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -1163,7 +1187,7 @@ nvlist_add_common(nvlist_t *nvl, const char *name,
|
||||
* In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
|
||||
* is the size of the string(s) included.
|
||||
*/
|
||||
if ((value_sz = i_get_value_size(type, data, nelem)) < 0)
|
||||
if ((value_sz = i_get_value_size(type, data, nelem, 0)) < 0)
|
||||
return (EINVAL);
|
||||
|
||||
if (i_validate_nvpair_value(type, nelem, data) != 0)
|
||||
@@ -1588,7 +1612,7 @@ nvpair_value_common(const nvpair_t *nvp, data_type_t type, uint_t *nelem,
|
||||
#endif
|
||||
if (data == NULL)
|
||||
return (EINVAL);
|
||||
if ((value_sz = i_get_value_size(type, NULL, 1)) < 0)
|
||||
if ((value_sz = i_get_value_size(type, NULL, 1, 0)) < 0)
|
||||
return (EINVAL);
|
||||
memcpy(data, NVP_VALUE(nvp), (size_t)value_sz);
|
||||
if (nelem != NULL)
|
||||
@@ -3019,7 +3043,8 @@ nvs_native_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
|
||||
* In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
|
||||
* is the size of the string(s) excluded.
|
||||
*/
|
||||
if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp))) < 0)
|
||||
if ((value_sz = i_get_value_size(type, NULL, NVP_NELEM(nvp),
|
||||
NVP_SIZE(nvp))) < 0)
|
||||
return (EFAULT);
|
||||
|
||||
if (NVP_SIZE_CALC(nvp->nvp_name_sz, value_sz) > nvp->nvp_size)
|
||||
@@ -3333,7 +3358,7 @@ nvs_xdr_nvp_op(nvstream_t *nvs, nvpair_t *nvp)
|
||||
* In case of data types DATA_TYPE_STRING and DATA_TYPE_STRING_ARRAY
|
||||
* is the size of the string(s) excluded.
|
||||
*/
|
||||
if ((value_sz = i_get_value_size(type, NULL, nelem)) < 0)
|
||||
if ((value_sz = i_get_value_size(type, NULL, nelem, NVP_SIZE(nvp)) < 0))
|
||||
return (EFAULT);
|
||||
|
||||
/* if there is no data to extract then return */
|
||||
|
||||
@@ -72,9 +72,6 @@ SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
|
||||
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, free_target,
|
||||
param_set_arc_free_target, 0, CTLFLAG_RW,
|
||||
"Desired number of free pages below which ARC triggers reclaim");
|
||||
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, no_grow_shift,
|
||||
param_set_arc_no_grow_shift, 0, ZMOD_RW,
|
||||
"log2(fraction of ARC which must be free to allow growing)");
|
||||
|
||||
int64_t
|
||||
arc_available_memory(void)
|
||||
|
||||
@@ -292,7 +292,7 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int err, val;
|
||||
|
||||
val = arc_no_grow_shift;
|
||||
val = zfs_arc_no_grow_shift;
|
||||
err = sysctl_handle_int(oidp, &val, 0, req);
|
||||
if (err != 0 || req->newptr == NULL)
|
||||
return (err);
|
||||
@@ -300,7 +300,7 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
|
||||
if (val < 0 || val >= arc_shrink_shift)
|
||||
return (EINVAL);
|
||||
|
||||
arc_no_grow_shift = val;
|
||||
zfs_arc_no_grow_shift = val;
|
||||
|
||||
if (arg2 != 0)
|
||||
warn_deprecated_sysctl("arc_no_grow_shift", "arc.no_grow_shift");
|
||||
@@ -541,14 +541,14 @@ SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log,
|
||||
|
||||
/*
|
||||
* The in-core space map representation is more compact than its on-disk form.
|
||||
* The zfs_condense_pct determines how much more compact the in-core
|
||||
* The zfs_metaslab_condense_pct determines how much more compact the in-core
|
||||
* space map representation must be before we compact it on-disk.
|
||||
* Values should be greater than or equal to 100.
|
||||
*/
|
||||
extern uint_t zfs_condense_pct;
|
||||
extern uint_t zfs_metaslab_condense_pct;
|
||||
|
||||
SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct,
|
||||
CTLFLAG_RWTUN, &zfs_condense_pct, 0,
|
||||
SYSCTL_UINT(_vfs_zfs, OID_AUTO, metaslab_condense_pct,
|
||||
CTLFLAG_RWTUN, &zfs_metaslab_condense_pct, 0,
|
||||
"Condense on-disk spacemap when it is more than this many percents"
|
||||
" of in-memory counterpart");
|
||||
|
||||
@@ -617,18 +617,6 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval,
|
||||
"Configuration cache file write, retry after failure, interval"
|
||||
" (seconds)");
|
||||
|
||||
extern uint64_t zfs_max_missing_tvds_cachefile;
|
||||
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile,
|
||||
CTLFLAG_RWTUN, &zfs_max_missing_tvds_cachefile, 0,
|
||||
"Allow importing pools with missing top-level vdevs in cache file");
|
||||
|
||||
extern uint64_t zfs_max_missing_tvds_scan;
|
||||
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan,
|
||||
CTLFLAG_RWTUN, &zfs_max_missing_tvds_scan, 0,
|
||||
"Allow importing pools with missing top-level vdevs during scan");
|
||||
|
||||
/* spa_misc.c */
|
||||
|
||||
extern int zfs_flags;
|
||||
|
||||
@@ -188,6 +188,12 @@ spl_kvmalloc(size_t size, gfp_t lflags)
|
||||
return (ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
* vmalloc fallback. KM_VMEM may not have been requested originally if
|
||||
* we've come through spl_kmem_alloc_impl(), so we need to remove
|
||||
* __GFP_COMP, which is not a valid flag for vmalloc.
|
||||
*/
|
||||
lflags &= ~__GFP_COMP;
|
||||
return (spl_vmalloc(size, lflags));
|
||||
}
|
||||
|
||||
|
||||
@@ -410,6 +410,24 @@ param_set_arc_int(const char *buf, zfs_kernel_param_t *kp)
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
param_set_arc_no_grow_shift(const char *buf, zfs_kernel_param_t *kp)
|
||||
{
|
||||
unsigned long val;
|
||||
int error;
|
||||
|
||||
error = kstrtoul(buf, 0, &val);
|
||||
if (error)
|
||||
return (SET_ERROR(error));
|
||||
|
||||
if (val >= arc_shrink_shift)
|
||||
return (-SET_ERROR(EINVAL));
|
||||
|
||||
zfs_arc_no_grow_shift = val;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
param_set_l2arc_dwpd_limit(const char *buf, zfs_kernel_param_t *kp)
|
||||
{
|
||||
|
||||
@@ -931,8 +931,14 @@ vdev_disk_io_rw(zio_t *zio)
|
||||
return (SET_ERROR(EIO));
|
||||
}
|
||||
|
||||
vdev_t *iter = v;
|
||||
while (iter != NULL && iter->vdev_failfast == ZPROP_BOOLEAN_INHERIT)
|
||||
iter = iter->vdev_parent;
|
||||
|
||||
boolean_t failfast = iter ? iter->vdev_failfast == 1 :
|
||||
vdev_prop_default_numeric(VDEV_PROP_FAILFAST);
|
||||
if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
|
||||
v->vdev_failfast == B_TRUE) {
|
||||
failfast) {
|
||||
bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
|
||||
zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
|
||||
}
|
||||
|
||||
@@ -1689,6 +1689,24 @@ zfs_suspend_fs(zfsvfs_t *zfsvfs)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a referenced znode at or after zp. The z_znodes_lock protects the
|
||||
* list walk; the returned inode reference keeps the znode alive after the
|
||||
* lock is dropped for zfs_rezget().
|
||||
*/
|
||||
static znode_t *
|
||||
zfs_resume_hold_next_znode(zfsvfs_t *zfsvfs, znode_t *zp)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&zfsvfs->z_znodes_lock));
|
||||
|
||||
for (; zp != NULL; zp = list_next(&zfsvfs->z_all_znodes, zp)) {
|
||||
if (igrab(ZTOI(zp)) != NULL)
|
||||
return (zp);
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Rebuild SA and release VOPs. Note that ownership of the underlying dataset
|
||||
* is an invariant across any of the operations that can be performed while the
|
||||
@@ -1732,13 +1750,23 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
|
||||
* dbufs. If a zfs_rezget() fails, then we unhash the inode
|
||||
* and mark it stale. This prevents a collision if a new
|
||||
* inode/object is created which must use the same inode
|
||||
* number. The stale inode will be be released when the
|
||||
* VFS prunes the dentry holding the remaining references
|
||||
* on the stale inode.
|
||||
* number. The stale inode will be released when the VFS
|
||||
* prunes the dentry holding the remaining references on
|
||||
* the stale inode.
|
||||
*
|
||||
* zfs_rezget() takes the per-object znode hold lock. Pin each znode
|
||||
* while holding z_znodes_lock, then drop the list lock before calling
|
||||
* zfs_rezget() to preserve the normal zh_lock -> z_znodes_lock order.
|
||||
*/
|
||||
mutex_enter(&zfsvfs->z_znodes_lock);
|
||||
for (zp = list_head(&zfsvfs->z_all_znodes); zp;
|
||||
zp = list_next(&zfsvfs->z_all_znodes, zp)) {
|
||||
zp = zfs_resume_hold_next_znode(zfsvfs,
|
||||
list_head(&zfsvfs->z_all_znodes));
|
||||
while (zp != NULL) {
|
||||
znode_t *next = zfs_resume_hold_next_znode(zfsvfs,
|
||||
list_next(&zfsvfs->z_all_znodes, zp));
|
||||
|
||||
mutex_exit(&zfsvfs->z_znodes_lock);
|
||||
|
||||
err2 = zfs_rezget(zp);
|
||||
if (err2) {
|
||||
zpl_d_drop_aliases(ZTOI(zp));
|
||||
@@ -1747,9 +1775,14 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
|
||||
|
||||
/* see comment in zfs_suspend_fs() */
|
||||
if (zp->z_suspended) {
|
||||
zfs_zrele_async(zp);
|
||||
zp->z_suspended = B_FALSE;
|
||||
zfs_zrele_async(zp);
|
||||
}
|
||||
|
||||
zfs_zrele_async(zp);
|
||||
|
||||
mutex_enter(&zfsvfs->z_znodes_lock);
|
||||
zp = next;
|
||||
}
|
||||
mutex_exit(&zfsvfs->z_znodes_lock);
|
||||
|
||||
|
||||
@@ -2434,9 +2434,13 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
|
||||
&zp->z_pflags, sizeof (zp->z_pflags));
|
||||
|
||||
if (attrzp) {
|
||||
/*
|
||||
* attrzp is zp's hidden xattr directory, so the second
|
||||
* znode lock acquisition is nested rather than recursive.
|
||||
*/
|
||||
if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
|
||||
mutex_enter(&attrzp->z_acl_lock);
|
||||
mutex_enter(&attrzp->z_lock);
|
||||
mutex_enter_nested(&attrzp->z_acl_lock, NESTED_SINGLE);
|
||||
mutex_enter_nested(&attrzp->z_lock, NESTED_SINGLE);
|
||||
SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
|
||||
SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
|
||||
sizeof (attrzp->z_pflags));
|
||||
@@ -4074,18 +4078,32 @@ zfs_inactive(struct inode *ip)
|
||||
{
|
||||
znode_t *zp = ITOZ(ip);
|
||||
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
||||
krwlock_t *zti_lock = &zfsvfs->z_teardown_inactive_lock;
|
||||
uint64_t atime[2];
|
||||
int error;
|
||||
int need_unlock = 0;
|
||||
boolean_t no_lockdep = B_FALSE;
|
||||
|
||||
/* Only read lock if we haven't already write locked, e.g. rollback */
|
||||
if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {
|
||||
if (!RW_WRITE_HELD(zti_lock)) {
|
||||
need_unlock = 1;
|
||||
rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
|
||||
/*
|
||||
* kswapd reaches evict_inode() with fs_reclaim held. Suppress
|
||||
* lockdep only for this reclaim-thread acquire/release pair.
|
||||
*/
|
||||
no_lockdep = current_is_reclaim_thread();
|
||||
if (no_lockdep)
|
||||
rw_enter_nolockdep(zti_lock, RW_READER);
|
||||
else
|
||||
rw_enter(zti_lock, RW_READER);
|
||||
}
|
||||
if (zp->z_sa_hdl == NULL) {
|
||||
if (need_unlock)
|
||||
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
||||
if (need_unlock) {
|
||||
if (no_lockdep)
|
||||
rw_exit_nolockdep(zti_lock);
|
||||
else
|
||||
rw_exit(zti_lock);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -4111,8 +4129,12 @@ zfs_inactive(struct inode *ip)
|
||||
}
|
||||
|
||||
zfs_zinactive(zp);
|
||||
if (need_unlock)
|
||||
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
||||
if (need_unlock) {
|
||||
if (no_lockdep)
|
||||
rw_exit_nolockdep(zti_lock);
|
||||
else
|
||||
rw_exit(zti_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -550,10 +550,11 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg)
|
||||
*
|
||||
* Finally, all filesystems get automatic handling for the 'source' option,
|
||||
* that is, the "name" of the filesystem (the first column of df(1)'s output).
|
||||
* However, this only happens if the handler does not otherwise handle
|
||||
* the 'source' option. Since we handle _all_ options because of 'sloppy', we
|
||||
* deal with this explicitly by calling into the kernel's helper for this,
|
||||
* vfs_parse_fs_param_source(), which sets up fc->source.
|
||||
* However, this only happens if the handler does not otherwise handle the
|
||||
* 'source' option. Since we handle _all_ options because of 'sloppy', we have
|
||||
* ot handle it ourselves. Normally we would call vfs_parse_fs_param_source()
|
||||
* to deal with this, but that didn't appear until 5.14, and it's small enough
|
||||
* that we can just handle it ourselves.
|
||||
*
|
||||
* source
|
||||
*
|
||||
@@ -565,6 +566,7 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg)
|
||||
*/
|
||||
|
||||
enum {
|
||||
Opt_source,
|
||||
Opt_exec, Opt_suid, Opt_dev,
|
||||
Opt_atime, Opt_relatime, Opt_strictatime,
|
||||
Opt_saxattr, Opt_dirxattr, Opt_noxattr,
|
||||
@@ -574,6 +576,8 @@ enum {
|
||||
};
|
||||
|
||||
static const struct fs_parameter_spec zpl_param_spec[] = {
|
||||
fsparam_string("source", Opt_source),
|
||||
|
||||
fsparam_flag_no("exec", Opt_exec),
|
||||
fsparam_flag_no("suid", Opt_suid),
|
||||
fsparam_flag_no("dev", Opt_dev),
|
||||
@@ -609,18 +613,34 @@ static const struct fs_parameter_spec zpl_param_spec[] = {
|
||||
{}
|
||||
};
|
||||
|
||||
/*
|
||||
* Before 5.6, fs_parse() took a struct fs_parameter_description
|
||||
* which wraps the parameter specs with name and enum pointers. From 5.6,
|
||||
* the description struct was removed and fs_parse() accepts the
|
||||
* fs_parameter_spec directly.
|
||||
*/
|
||||
static int
|
||||
zpl_fs_parse(struct fs_context *fc, struct fs_parameter *param,
|
||||
struct fs_parse_result *result)
|
||||
{
|
||||
#ifdef HAVE_FS_PARSE_TAKES_SPEC
|
||||
return (fs_parse(fc, zpl_param_spec, param, result));
|
||||
#else
|
||||
static const struct fs_parameter_description zpl_param_desc = {
|
||||
.name = "zfs",
|
||||
.specs = zpl_param_spec,
|
||||
};
|
||||
return (fs_parse(fc, &zpl_param_desc, param, result));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
zpl_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
{
|
||||
vfs_t *vfs = fc->fs_private;
|
||||
|
||||
/* Handle 'source' explicitly so we don't trip on it as an unknown. */
|
||||
int opt = vfs_parse_fs_param_source(fc, param);
|
||||
if (opt != -ENOPARAM)
|
||||
return (opt);
|
||||
|
||||
struct fs_parse_result result;
|
||||
opt = fs_parse(fc, zpl_param_spec, param, &result);
|
||||
int opt = zpl_fs_parse(fc, param, &result);
|
||||
if (opt == -ENOPARAM) {
|
||||
/*
|
||||
* Convert unknowns to warnings, to work around the whole
|
||||
@@ -632,6 +652,16 @@ zpl_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
return (opt);
|
||||
|
||||
switch (opt) {
|
||||
case Opt_source:
|
||||
if (fc->source != NULL) {
|
||||
cmn_err(CE_NOTE,
|
||||
"ZFS: multiple 'source' options not supported");
|
||||
return (-SET_ERROR(EINVAL));
|
||||
}
|
||||
fc->source = param->string;
|
||||
param->string = NULL;
|
||||
break;
|
||||
|
||||
case Opt_exec:
|
||||
vfs->vfs_exec = !result.negated;
|
||||
vfs->vfs_do_exec = B_TRUE;
|
||||
@@ -794,7 +824,7 @@ zpl_parse_monolithic(struct fs_context *fc, void *data)
|
||||
|
||||
/* Check if this is one of our options. */
|
||||
struct fs_parse_result result;
|
||||
int opt = fs_parse(fc, zpl_param_spec, ¶m, &result);
|
||||
int opt = zpl_fs_parse(fc, ¶m, &result);
|
||||
if (opt >= 0) {
|
||||
/*
|
||||
* We already know this one of our options, so a
|
||||
@@ -874,9 +904,14 @@ zpl_get_tree(struct fs_context *fc)
|
||||
if (sb->s_root == NULL) {
|
||||
vfs_t *vfs = fc->fs_private;
|
||||
|
||||
/* Apply readonly flag as mount option */
|
||||
if (fc->sb_flags & SB_RDONLY) {
|
||||
vfs->vfs_readonly = B_TRUE;
|
||||
/*
|
||||
* If SB_RDONLY was set/cleared from mount options, update
|
||||
* them in the options struct so we set up the filesystem
|
||||
* in the proper state.
|
||||
*/
|
||||
if (fc->sb_flags_mask & SB_RDONLY) {
|
||||
vfs->vfs_readonly =
|
||||
(fc->sb_flags & SB_RDONLY) ? B_TRUE : B_FALSE;
|
||||
vfs->vfs_do_readonly = B_TRUE;
|
||||
}
|
||||
|
||||
|
||||
@@ -701,6 +701,24 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value,
|
||||
* ZFS allows extended user attributes to be disabled administratively
|
||||
* by setting the 'xattr=off' property on the dataset.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Concatenate prefix + name into a NUL-terminated stack buffer.
|
||||
* Linux fs/xattr.c (import_xattr_name) caps the full xattr name at
|
||||
* XATTR_NAME_MAX before any handler runs, so XATTR_NAME_MAX + 1
|
||||
* bytes always fit.
|
||||
*/
|
||||
static inline void
|
||||
zpl_xattr_join_name(char *buf, size_t buflen, const char *prefix,
|
||||
size_t prefix_len, const char *name, size_t name_len)
|
||||
{
|
||||
ASSERT3U(prefix_len + name_len + 1, <=, buflen);
|
||||
|
||||
memcpy(buf, prefix, prefix_len);
|
||||
memcpy(buf + prefix_len, name, name_len);
|
||||
buf[prefix_len + name_len] = '\0';
|
||||
}
|
||||
|
||||
static int
|
||||
__zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,
|
||||
const char *name, size_t name_len)
|
||||
@@ -726,9 +744,13 @@ __zpl_xattr_user_get(struct inode *ip, const char *name,
|
||||
* try again without the namespace prefix for compatibility with
|
||||
* other platforms.
|
||||
*/
|
||||
char *xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
|
||||
char xattr_name[XATTR_NAME_MAX + 1];
|
||||
|
||||
zpl_xattr_join_name(xattr_name, sizeof (xattr_name),
|
||||
XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN,
|
||||
name, strlen(name));
|
||||
|
||||
error = zpl_xattr_get(ip, xattr_name, value, size);
|
||||
kmem_strfree(xattr_name);
|
||||
if (error == -ENODATA)
|
||||
error = zpl_xattr_get(ip, name, value, size);
|
||||
|
||||
@@ -758,8 +780,13 @@ __zpl_xattr_user_set(zidmap_t *user_ns,
|
||||
* XATTR_CREATE: fail if xattr already exists
|
||||
* XATTR_REPLACE: fail if xattr does not exist
|
||||
*/
|
||||
char *prefixed_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
|
||||
char prefixed_name[XATTR_NAME_MAX + 1];
|
||||
const char *clear_name, *set_name;
|
||||
|
||||
zpl_xattr_join_name(prefixed_name, sizeof (prefixed_name),
|
||||
XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN,
|
||||
name, strlen(name));
|
||||
|
||||
if (zfs_xattr_compat) {
|
||||
clear_name = prefixed_name;
|
||||
set_name = name;
|
||||
@@ -776,7 +803,7 @@ __zpl_xattr_user_set(zidmap_t *user_ns,
|
||||
* because it already exists. Stop here.
|
||||
*/
|
||||
if (error == -EEXIST)
|
||||
goto out;
|
||||
return (error);
|
||||
/*
|
||||
* If XATTR_REPLACE was specified and we succeeded to clear
|
||||
* an xattr, we don't need to replace anything when setting
|
||||
@@ -788,10 +815,7 @@ __zpl_xattr_user_set(zidmap_t *user_ns,
|
||||
/*
|
||||
* Set the new value with the configured name format.
|
||||
*/
|
||||
error = zpl_xattr_set(ip, set_name, value, size, flags);
|
||||
out:
|
||||
kmem_strfree(prefixed_name);
|
||||
return (error);
|
||||
return (zpl_xattr_set(ip, set_name, value, size, flags));
|
||||
}
|
||||
ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
|
||||
|
||||
@@ -824,17 +848,16 @@ static int
|
||||
__zpl_xattr_trusted_get(struct inode *ip, const char *name,
|
||||
void *value, size_t size)
|
||||
{
|
||||
char *xattr_name;
|
||||
int error;
|
||||
char xattr_name[XATTR_NAME_MAX + 1];
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return (-EACCES);
|
||||
/* xattr_resolve_name will do this for us if this is defined */
|
||||
xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
|
||||
error = zpl_xattr_get(ip, xattr_name, value, size);
|
||||
kmem_strfree(xattr_name);
|
||||
|
||||
return (error);
|
||||
zpl_xattr_join_name(xattr_name, sizeof (xattr_name),
|
||||
XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN,
|
||||
name, strlen(name));
|
||||
|
||||
return (zpl_xattr_get(ip, xattr_name, value, size));
|
||||
}
|
||||
ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
|
||||
|
||||
@@ -844,17 +867,16 @@ __zpl_xattr_trusted_set(zidmap_t *user_ns,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
(void) user_ns;
|
||||
char *xattr_name;
|
||||
int error;
|
||||
char xattr_name[XATTR_NAME_MAX + 1];
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return (-EACCES);
|
||||
/* xattr_resolve_name will do this for us if this is defined */
|
||||
xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
|
||||
error = zpl_xattr_set(ip, xattr_name, value, size, flags);
|
||||
kmem_strfree(xattr_name);
|
||||
|
||||
return (error);
|
||||
zpl_xattr_join_name(xattr_name, sizeof (xattr_name),
|
||||
XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN,
|
||||
name, strlen(name));
|
||||
|
||||
return (zpl_xattr_set(ip, xattr_name, value, size, flags));
|
||||
}
|
||||
ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
|
||||
|
||||
@@ -889,14 +911,13 @@ static int
|
||||
__zpl_xattr_security_get(struct inode *ip, const char *name,
|
||||
void *value, size_t size)
|
||||
{
|
||||
char *xattr_name;
|
||||
int error;
|
||||
/* xattr_resolve_name will do this for us if this is defined */
|
||||
xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
|
||||
error = zpl_xattr_get(ip, xattr_name, value, size);
|
||||
kmem_strfree(xattr_name);
|
||||
char xattr_name[XATTR_NAME_MAX + 1];
|
||||
|
||||
return (error);
|
||||
zpl_xattr_join_name(xattr_name, sizeof (xattr_name),
|
||||
XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN,
|
||||
name, strlen(name));
|
||||
|
||||
return (zpl_xattr_get(ip, xattr_name, value, size));
|
||||
}
|
||||
ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
|
||||
|
||||
@@ -906,14 +927,13 @@ __zpl_xattr_security_set(zidmap_t *user_ns,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
(void) user_ns;
|
||||
char *xattr_name;
|
||||
int error;
|
||||
/* xattr_resolve_name will do this for us if this is defined */
|
||||
xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
|
||||
error = zpl_xattr_set(ip, xattr_name, value, size, flags);
|
||||
kmem_strfree(xattr_name);
|
||||
char xattr_name[XATTR_NAME_MAX + 1];
|
||||
|
||||
return (error);
|
||||
zpl_xattr_join_name(xattr_name, sizeof (xattr_name),
|
||||
XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN,
|
||||
name, strlen(name));
|
||||
|
||||
return (zpl_xattr_set(ip, xattr_name, value, size, flags));
|
||||
}
|
||||
ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
|
||||
|
||||
|
||||
@@ -238,11 +238,11 @@ _VALSTR_BITFIELD_IMPL(zio_stage,
|
||||
{ 'E', "EN", "ENCRYPT" },
|
||||
{ 'C', "CG", "CHECKSUM_GENERATE" },
|
||||
{ 'N', "NW", "NOP_WRITE" },
|
||||
{ 'B', "BF", "BRT_FREE" },
|
||||
{ 'd', "dS", "DDT_READ_START" },
|
||||
{ 'd', "dD", "DDT_READ_DONE" },
|
||||
{ 'd', "dW", "DDT_WRITE" },
|
||||
{ 'd', "dF", "DDT_FREE" },
|
||||
{ 'B', "BF", "BRT_FREE" },
|
||||
{ 'G', "GA", "GANG_ASSEMBLE" },
|
||||
{ 'G', "GI", "GANG_ISSUE" },
|
||||
{ 'D', "DT", "DVA_THROTTLE" },
|
||||
|
||||
@@ -374,10 +374,16 @@ vdev_prop_init(void)
|
||||
{ "on", 1},
|
||||
{ NULL }
|
||||
};
|
||||
static const zprop_index_t boolean_inherit_table[] = {
|
||||
{ "off", 0},
|
||||
{ "on", 1},
|
||||
{ "inherit", ZPROP_BOOLEAN_INHERIT},
|
||||
{ NULL }
|
||||
};
|
||||
static const zprop_index_t boolean_na_table[] = {
|
||||
{ "off", 0},
|
||||
{ "on", 1},
|
||||
{ "-", 2}, /* ZPROP_BOOLEAN_NA */
|
||||
{ "-", ZPROP_BOOLEAN_NA},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
@@ -388,6 +394,14 @@ vdev_prop_init(void)
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static const zprop_index_t vdev_alloc_bias_table[] = {
|
||||
{ "none", VDEV_BIAS_NONE },
|
||||
{ "log", VDEV_BIAS_LOG },
|
||||
{ "special", VDEV_BIAS_SPECIAL },
|
||||
{ "dedup", VDEV_BIAS_DEDUP },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
struct zfs_mod_supported_features *sfeatures =
|
||||
zfs_mod_list_supported(ZFS_SYSFS_VDEV_PROPERTIES);
|
||||
|
||||
@@ -547,8 +561,8 @@ vdev_prop_init(void)
|
||||
|
||||
/* default index properties */
|
||||
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
|
||||
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
|
||||
sfeatures);
|
||||
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off | inherit", "FAILFAST",
|
||||
boolean_inherit_table, sfeatures);
|
||||
zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
|
||||
B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
|
||||
"SLOW_IO_EVENTS", boolean_table, sfeatures);
|
||||
@@ -556,6 +570,13 @@ vdev_prop_init(void)
|
||||
VDEV_SCHEDULER_AUTO, PROP_DEFAULT, ZFS_TYPE_VDEV,
|
||||
"auto | on | off", "IO_SCHEDULER",
|
||||
vdevschedulertype_table, sfeatures);
|
||||
zprop_register_index(VDEV_PROP_ALLOC_BIAS, "alloc_bias",
|
||||
VDEV_BIAS_NONE, PROP_DEFAULT, ZFS_TYPE_VDEV,
|
||||
"none | log | special | dedup", "ALLOC_BIAS",
|
||||
vdev_alloc_bias_table, sfeatures);
|
||||
zprop_register_index(VDEV_PROP_ROTATIONAL, "rotational", 0,
|
||||
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "ROTATIONAL",
|
||||
boolean_table, sfeatures);
|
||||
|
||||
/* hidden properties */
|
||||
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
|
||||
|
||||
@@ -398,14 +398,14 @@ uint_t zfs_arc_pc_percent = 0;
|
||||
|
||||
/*
|
||||
* log2(fraction of ARC which must be free to allow growing).
|
||||
* I.e. If there is less than arc_c >> arc_no_grow_shift free memory,
|
||||
* I.e. If there is less than arc_c >> zfs_arc_no_grow_shift free memory,
|
||||
* when reading a new block into the ARC, we will evict an equal-sized block
|
||||
* from the ARC.
|
||||
*
|
||||
* This must be less than arc_shrink_shift, so that when we shrink the ARC,
|
||||
* we will still not allow it to grow.
|
||||
*/
|
||||
uint_t arc_no_grow_shift = 5;
|
||||
uint_t zfs_arc_no_grow_shift = 5;
|
||||
|
||||
|
||||
/*
|
||||
@@ -586,6 +586,7 @@ arc_stats_t arc_stats = {
|
||||
{ "uncached_metadata", KSTAT_DATA_UINT64 },
|
||||
{ "uncached_evictable_data", KSTAT_DATA_UINT64 },
|
||||
{ "uncached_evictable_metadata", KSTAT_DATA_UINT64 },
|
||||
{ "l2_ndev", KSTAT_DATA_UINT64 },
|
||||
{ "l2_hits", KSTAT_DATA_UINT64 },
|
||||
{ "l2_misses", KSTAT_DATA_UINT64 },
|
||||
{ "l2_prefetch_asize", KSTAT_DATA_UINT64 },
|
||||
@@ -4975,7 +4976,7 @@ arc_reap_cb_check(void *arg, zthr_t *zthr)
|
||||
*/
|
||||
arc_growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
|
||||
return (B_TRUE);
|
||||
} else if (free_memory < arc_c >> arc_no_grow_shift) {
|
||||
} else if (free_memory < arc_c >> zfs_arc_no_grow_shift) {
|
||||
arc_no_grow = B_TRUE;
|
||||
} else if (gethrtime() >= arc_growtime) {
|
||||
arc_no_grow = B_FALSE;
|
||||
@@ -5571,20 +5572,6 @@ arc_buf_access(arc_buf_t *buf)
|
||||
!HDR_ISTYPE_METADATA(hdr), data, metadata, hits);
|
||||
}
|
||||
|
||||
/* a generic arc_read_done_func_t which you can use */
|
||||
void
|
||||
arc_bcopy_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
|
||||
arc_buf_t *buf, void *arg)
|
||||
{
|
||||
(void) zio, (void) zb, (void) bp;
|
||||
|
||||
if (buf == NULL)
|
||||
return;
|
||||
|
||||
memcpy(arg, buf->b_data, arc_buf_size(buf));
|
||||
arc_buf_destroy(buf, arg);
|
||||
}
|
||||
|
||||
/* a generic arc_read_done_func_t */
|
||||
void
|
||||
arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
|
||||
@@ -7440,6 +7427,7 @@ arc_kstat_update(kstat_t *ksp, int rw)
|
||||
aggsum_value(&arc_sums.arcstat_dnode_size);
|
||||
as->arcstat_bonus_size.value.ui64 =
|
||||
wmsum_value(&arc_sums.arcstat_bonus_size);
|
||||
as->arcstat_l2_ndev.value.ui64 = l2arc_ndev;
|
||||
as->arcstat_l2_hits.value.ui64 =
|
||||
wmsum_value(&arc_sums.arcstat_l2_hits);
|
||||
as->arcstat_l2_misses.value.ui64 =
|
||||
@@ -7654,7 +7642,8 @@ arc_tuning_update(boolean_t verbose)
|
||||
/* Valid range: 1 - N */
|
||||
if (zfs_arc_shrink_shift) {
|
||||
arc_shrink_shift = zfs_arc_shrink_shift;
|
||||
arc_no_grow_shift = MIN(arc_no_grow_shift, arc_shrink_shift -1);
|
||||
zfs_arc_no_grow_shift = MIN(zfs_arc_no_grow_shift,
|
||||
arc_shrink_shift - 1);
|
||||
}
|
||||
|
||||
/* Valid range: 1 - N ms */
|
||||
@@ -11683,6 +11672,7 @@ EXPORT_SYMBOL(arc_write);
|
||||
EXPORT_SYMBOL(arc_read);
|
||||
EXPORT_SYMBOL(arc_buf_info);
|
||||
EXPORT_SYMBOL(arc_getbuf_func);
|
||||
EXPORT_SYMBOL(arc_buf_destroy);
|
||||
EXPORT_SYMBOL(arc_add_prune_callback);
|
||||
EXPORT_SYMBOL(arc_remove_prune_callback);
|
||||
|
||||
@@ -11701,6 +11691,10 @@ ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, grow_retry, param_set_arc_int,
|
||||
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, shrink_shift, param_set_arc_int,
|
||||
param_get_uint, ZMOD_RW, "log2(fraction of ARC to reclaim)");
|
||||
|
||||
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, no_grow_shift,
|
||||
param_set_arc_no_grow_shift, param_get_uint, ZMOD_RW,
|
||||
"log2(fraction of ARC which must be free to allow growing)");
|
||||
|
||||
#ifdef _KERNEL
|
||||
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, pc_percent, UINT, ZMOD_RW,
|
||||
"Percent of pagecache to reclaim ARC to");
|
||||
|
||||
@@ -221,7 +221,7 @@ ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx, ddt_log_update_t *dlu)
|
||||
uint64_t length = nblocks * dlu->dlu_dn->dn_datablksz;
|
||||
|
||||
VERIFY0(dmu_buf_hold_array_by_dnode(dlu->dlu_dn, offset, length,
|
||||
B_FALSE, FTAG, &dlu->dlu_ndbp, &dlu->dlu_dbp,
|
||||
B_FALSE, dlu, &dlu->dlu_ndbp, &dlu->dlu_dbp,
|
||||
DMU_READ_NO_PREFETCH | DMU_UNCACHEDIO));
|
||||
|
||||
dlu->dlu_tx = tx;
|
||||
@@ -338,7 +338,7 @@ ddt_log_commit(ddt_t *ddt, ddt_log_update_t *dlu)
|
||||
*/
|
||||
dmu_buf_fill_done(dlu->dlu_dbp[dlu->dlu_block], dlu->dlu_tx, B_FALSE);
|
||||
|
||||
dmu_buf_rele_array(dlu->dlu_dbp, dlu->dlu_ndbp, FTAG);
|
||||
dmu_buf_rele_array(dlu->dlu_dbp, dlu->dlu_ndbp, dlu);
|
||||
|
||||
ddt->ddt_log_active->ddl_length +=
|
||||
dlu->dlu_ndbp * (uint64_t)dlu->dlu_dn->dn_datablksz;
|
||||
|
||||
@@ -1859,7 +1859,7 @@ do_userquota_cacheflush(objset_t *os, userquota_cache_t *cache, dmu_tx_t *tx)
|
||||
&cookie)) != NULL) {
|
||||
/*
|
||||
* os_userused_lock protects against concurrent calls to
|
||||
* zap_increment_int(). It's needed because zap_increment_int()
|
||||
* zap_increment(). It's needed because zap_increment()
|
||||
* is not thread-safe (i.e. not atomic).
|
||||
*/
|
||||
mutex_enter(&os->os_userused_lock);
|
||||
|
||||
@@ -2901,16 +2901,20 @@ receive_read_record(dmu_recv_cookie_t *drc)
|
||||
{
|
||||
struct drr_object *drro =
|
||||
&drc->drc_rrd->header.drr_u.drr_object;
|
||||
uint32_t size = DRR_OBJECT_PAYLOAD_SIZE(drro);
|
||||
uint32_t size;
|
||||
void *buf = NULL;
|
||||
dmu_object_info_t doi;
|
||||
|
||||
size = DRR_OBJECT_PAYLOAD_SIZE(drro);
|
||||
if (size > SPA_MAXBLOCKSIZE)
|
||||
return (SET_ERROR(ERANGE));
|
||||
|
||||
if (size != 0)
|
||||
buf = kmem_zalloc(size, KM_SLEEP);
|
||||
buf = vmem_zalloc(size, KM_SLEEP);
|
||||
|
||||
err = receive_read_payload_and_next_header(drc, size, buf);
|
||||
if (err != 0) {
|
||||
kmem_free(buf, size);
|
||||
vmem_free(buf, size);
|
||||
return (err);
|
||||
}
|
||||
err = dmu_object_info(drc->drc_os, drro->drr_object, &doi);
|
||||
@@ -2934,7 +2938,11 @@ receive_read_record(dmu_recv_cookie_t *drc)
|
||||
case DRR_WRITE:
|
||||
{
|
||||
struct drr_write *drrw = &drc->drc_rrd->header.drr_u.drr_write;
|
||||
int size = DRR_WRITE_PAYLOAD_SIZE(drrw);
|
||||
uint64_t size = DRR_WRITE_PAYLOAD_SIZE(drrw);
|
||||
|
||||
if (size > SPA_MAXBLOCKSIZE)
|
||||
return (SET_ERROR(ERANGE));
|
||||
|
||||
abd_t *abd = abd_alloc_linear(size, B_FALSE);
|
||||
err = receive_read_payload_and_next_header(drc, size,
|
||||
abd_to_buf(abd));
|
||||
@@ -2951,12 +2959,18 @@ receive_read_record(dmu_recv_cookie_t *drc)
|
||||
{
|
||||
struct drr_write_embedded *drrwe =
|
||||
&drc->drc_rrd->header.drr_u.drr_write_embedded;
|
||||
uint32_t size = P2ROUNDUP(drrwe->drr_psize, 8);
|
||||
void *buf = kmem_zalloc(size, KM_SLEEP);
|
||||
uint32_t size;
|
||||
void *buf;
|
||||
|
||||
size = P2ROUNDUP(drrwe->drr_psize, 8);
|
||||
if (size > SPA_MAXBLOCKSIZE)
|
||||
return (SET_ERROR(ERANGE));
|
||||
|
||||
buf = vmem_zalloc(size, KM_SLEEP);
|
||||
|
||||
err = receive_read_payload_and_next_header(drc, size, buf);
|
||||
if (err != 0) {
|
||||
kmem_free(buf, size);
|
||||
vmem_free(buf, size);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@@ -2985,7 +2999,11 @@ receive_read_record(dmu_recv_cookie_t *drc)
|
||||
case DRR_SPILL:
|
||||
{
|
||||
struct drr_spill *drrs = &drc->drc_rrd->header.drr_u.drr_spill;
|
||||
int size = DRR_SPILL_PAYLOAD_SIZE(drrs);
|
||||
uint64_t size = DRR_SPILL_PAYLOAD_SIZE(drrs);
|
||||
|
||||
if (size > SPA_MAXBLOCKSIZE)
|
||||
return (SET_ERROR(ERANGE));
|
||||
|
||||
abd_t *abd = abd_alloc_linear(size, B_FALSE);
|
||||
err = receive_read_payload_and_next_header(drc, size,
|
||||
abd_to_buf(abd));
|
||||
@@ -3136,7 +3154,7 @@ receive_process_record(struct receive_writer_arg *rwa,
|
||||
abd_free(rrd->abd);
|
||||
rrd->abd = NULL;
|
||||
} else if (rrd->payload != NULL) {
|
||||
kmem_free(rrd->payload, rrd->payload_size);
|
||||
vmem_free(rrd->payload, rrd->payload_size);
|
||||
rrd->payload = NULL;
|
||||
}
|
||||
return (0);
|
||||
@@ -3150,7 +3168,7 @@ receive_process_record(struct receive_writer_arg *rwa,
|
||||
rrd->abd = NULL;
|
||||
rrd->payload = NULL;
|
||||
} else if (rrd->payload != NULL) {
|
||||
kmem_free(rrd->payload, rrd->payload_size);
|
||||
vmem_free(rrd->payload, rrd->payload_size);
|
||||
rrd->payload = NULL;
|
||||
}
|
||||
|
||||
@@ -3163,7 +3181,7 @@ receive_process_record(struct receive_writer_arg *rwa,
|
||||
{
|
||||
struct drr_object *drro = &rrd->header.drr_u.drr_object;
|
||||
err = receive_object(rwa, drro, rrd->payload);
|
||||
kmem_free(rrd->payload, rrd->payload_size);
|
||||
vmem_free(rrd->payload, rrd->payload_size);
|
||||
rrd->payload = NULL;
|
||||
break;
|
||||
}
|
||||
@@ -3201,7 +3219,7 @@ receive_process_record(struct receive_writer_arg *rwa,
|
||||
struct drr_write_embedded *drrwe =
|
||||
&rrd->header.drr_u.drr_write_embedded;
|
||||
err = receive_write_embedded(rwa, drrwe, rrd->payload);
|
||||
kmem_free(rrd->payload, rrd->payload_size);
|
||||
vmem_free(rrd->payload, rrd->payload_size);
|
||||
rrd->payload = NULL;
|
||||
break;
|
||||
}
|
||||
@@ -3270,7 +3288,7 @@ receive_writer_thread(void *arg)
|
||||
rrd->abd = NULL;
|
||||
rrd->payload = NULL;
|
||||
} else if (rrd->payload != NULL) {
|
||||
kmem_free(rrd->payload, rrd->payload_size);
|
||||
vmem_free(rrd->payload, rrd->payload_size);
|
||||
rrd->payload = NULL;
|
||||
}
|
||||
/*
|
||||
|
||||
@@ -2241,6 +2241,37 @@ setup_send_progress(struct dmu_send_params *dspp)
|
||||
return (dssp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Payloads must be multiples of 8 bytes for historical compatibility, but
|
||||
* XDR-encoded nvlists are sized in multiples of 4 bytes and may need padding.
|
||||
*
|
||||
* Here we do the simplest possible thing and copy the data to a separate
|
||||
* buffer. Not ideal in terms of performance and memory use, but most BEGIN
|
||||
* nvlists are small or absent, the allocation is momentary, and we'll need
|
||||
* to do this at most once per dataset.
|
||||
*
|
||||
* It's OK if there is extra data after a packed nvlist on the receiving
|
||||
* side because packed nvlists have an internal end-of-list marker.
|
||||
*
|
||||
* The new buffer is allocated with kmem_alloc() and can be freed with
|
||||
* fnvlist_pack_free(), like the original.
|
||||
*/
|
||||
static inline void
|
||||
pad_packed_nvlist(char **buffer, size_t *size)
|
||||
{
|
||||
size_t size_in = *size;
|
||||
size_t extra_bytes = P2ROUNDUP(size_in, 8) - size_in;
|
||||
if (extra_bytes != 0) {
|
||||
size_t expanded_size = size_in + extra_bytes;
|
||||
char *longbuf = kmem_alloc(expanded_size, KM_SLEEP);
|
||||
memcpy(longbuf, *buffer, size_in);
|
||||
memset(longbuf + size_in, 0, extra_bytes);
|
||||
fnvlist_pack_free(*buffer, size_in);
|
||||
*buffer = longbuf;
|
||||
*size = expanded_size;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Actually do the bulk of the work in a zfs send.
|
||||
*
|
||||
@@ -2474,7 +2505,7 @@ dmu_send_impl(struct dmu_send_params *dspp)
|
||||
|
||||
dsl_pool_rele(dp, tag);
|
||||
|
||||
void *payload = NULL;
|
||||
char *payload = NULL;
|
||||
size_t payload_len = 0;
|
||||
nvlist_t *nvl = fnvlist_alloc();
|
||||
|
||||
@@ -2548,7 +2579,9 @@ dmu_send_impl(struct dmu_send_params *dspp)
|
||||
}
|
||||
|
||||
if (!nvlist_empty(nvl)) {
|
||||
payload = fnvlist_pack(nvl, &payload_len);
|
||||
VERIFY0(nvlist_pack(nvl, &payload, &payload_len,
|
||||
NV_ENCODE_XDR, KM_SLEEP));
|
||||
pad_packed_nvlist(&payload, &payload_len);
|
||||
drr->drr_payloadlen = payload_len;
|
||||
}
|
||||
|
||||
|
||||
@@ -490,7 +490,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
|
||||
} else {
|
||||
dmu_buf_t *db;
|
||||
VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
|
||||
DB_RF_MUST_SUCCEED, FTAG, &db));
|
||||
DB_RF_MUST_SUCCEED, tag, &db));
|
||||
dmu_buf_will_fill(db, tx, B_FALSE);
|
||||
VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
|
||||
SPA_MINBLOCKSIZE), tx));
|
||||
|
||||
@@ -1534,9 +1534,28 @@ dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
/* call from syncing context when we actually write/free space for this dd */
|
||||
void
|
||||
dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
|
||||
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
|
||||
static void dsl_dir_diduse_transfer_space_impl(dsl_dir_t *dd, int64_t used,
|
||||
int64_t compressed, int64_t uncompressed, int64_t tonew,
|
||||
dd_used_t oldtype, dd_used_t newtype, boolean_t nested, dmu_tx_t *tx);
|
||||
|
||||
static void
|
||||
dsl_dir_lock_enter(dsl_dir_t *dd, boolean_t nested)
|
||||
{
|
||||
/*
|
||||
* lockdep needs an explicit subclass when a child dd_lock
|
||||
* nests an ancestor.
|
||||
*/
|
||||
if (nested) {
|
||||
mutex_enter_nested(&dd->dd_lock, NESTED_SINGLE);
|
||||
} else {
|
||||
mutex_enter(&dd->dd_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dir_diduse_space_impl(dsl_dir_t *dd, dd_used_t type,
|
||||
int64_t used, int64_t compressed, int64_t uncompressed,
|
||||
boolean_t nested, dmu_tx_t *tx)
|
||||
{
|
||||
int64_t accounted_delta;
|
||||
|
||||
@@ -1554,7 +1573,7 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
|
||||
*/
|
||||
boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
|
||||
if (needlock)
|
||||
mutex_enter(&dd->dd_lock);
|
||||
dsl_dir_lock_enter(dd, nested);
|
||||
dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
|
||||
accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);
|
||||
ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);
|
||||
@@ -1582,12 +1601,20 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
|
||||
mutex_exit(&dd->dd_lock);
|
||||
|
||||
if (dd->dd_parent != NULL) {
|
||||
dsl_dir_diduse_transfer_space(dd->dd_parent,
|
||||
dsl_dir_diduse_transfer_space_impl(dd->dd_parent,
|
||||
accounted_delta, compressed, uncompressed,
|
||||
used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
|
||||
used, DD_USED_CHILD_RSRV, DD_USED_CHILD, nested, tx);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type, int64_t used,
|
||||
int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dir_diduse_space_impl(dd, type, used, compressed, uncompressed,
|
||||
B_FALSE, tx);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
|
||||
dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
|
||||
@@ -1612,10 +1639,10 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
|
||||
mutex_exit(&dd->dd_lock);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
|
||||
static void
|
||||
dsl_dir_diduse_transfer_space_impl(dsl_dir_t *dd, int64_t used,
|
||||
int64_t compressed, int64_t uncompressed, int64_t tonew,
|
||||
dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
|
||||
dd_used_t oldtype, dd_used_t newtype, boolean_t nested, dmu_tx_t *tx)
|
||||
{
|
||||
int64_t accounted_delta;
|
||||
|
||||
@@ -1625,7 +1652,7 @@ dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
|
||||
|
||||
dmu_buf_will_dirty(dd->dd_dbuf, tx);
|
||||
|
||||
mutex_enter(&dd->dd_lock);
|
||||
dsl_dir_lock_enter(dd, nested);
|
||||
dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
|
||||
accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);
|
||||
ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);
|
||||
@@ -1656,12 +1683,21 @@ dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
|
||||
mutex_exit(&dd->dd_lock);
|
||||
|
||||
if (dd->dd_parent != NULL) {
|
||||
dsl_dir_diduse_transfer_space(dd->dd_parent,
|
||||
dsl_dir_diduse_transfer_space_impl(dd->dd_parent,
|
||||
accounted_delta, compressed, uncompressed,
|
||||
used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
|
||||
used, DD_USED_CHILD_RSRV, DD_USED_CHILD, nested, tx);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
|
||||
int64_t compressed, int64_t uncompressed, int64_t tonew,
|
||||
dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dir_diduse_transfer_space_impl(dd, used, compressed,
|
||||
uncompressed, tonew, oldtype, newtype, B_FALSE, tx);
|
||||
}
|
||||
|
||||
typedef struct dsl_dir_set_qr_arg {
|
||||
const char *ddsqra_name;
|
||||
zprop_source_t ddsqra_source;
|
||||
@@ -1828,8 +1864,8 @@ dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
|
||||
|
||||
if (dd->dd_parent != NULL) {
|
||||
/* Roll up this additional usage into our ancestors */
|
||||
dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
|
||||
delta, 0, 0, tx);
|
||||
dsl_dir_diduse_space_impl(dd->dd_parent, DD_USED_CHILD_RSRV,
|
||||
delta, 0, 0, B_TRUE, tx);
|
||||
}
|
||||
mutex_exit(&dd->dd_lock);
|
||||
}
|
||||
@@ -2268,22 +2304,29 @@ dsl_dir_snap_cmtime_update(dsl_dir_t *dd, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
inode_timespec_t t;
|
||||
|
||||
ASSERT(dsl_pool_sync_context(dp));
|
||||
gethrestime(&t);
|
||||
|
||||
mutex_enter(&dd->dd_lock);
|
||||
dd->dd_snap_cmtime = t;
|
||||
if (spa_feature_is_enabled(dp->dp_spa,
|
||||
SPA_FEATURE_EXTENSIBLE_DATASET)) {
|
||||
objset_t *mos = dd->dd_pool->dp_meta_objset;
|
||||
uint64_t ddobj = dd->dd_object;
|
||||
dsl_dir_zapify(dd, tx);
|
||||
VERIFY0(zap_update(mos, ddobj,
|
||||
DD_FIELD_SNAPSHOTS_CHANGED,
|
||||
sizeof (uint64_t),
|
||||
sizeof (inode_timespec_t) / sizeof (uint64_t),
|
||||
&t, tx));
|
||||
}
|
||||
mutex_exit(&dd->dd_lock);
|
||||
|
||||
if (!spa_feature_is_enabled(dp->dp_spa,
|
||||
SPA_FEATURE_EXTENSIBLE_DATASET)) {
|
||||
return;
|
||||
}
|
||||
|
||||
objset_t *mos = dd->dd_pool->dp_meta_objset;
|
||||
|
||||
/*
|
||||
* dsl_dir_zapify() and zap_update() may dirty buffers and recurse
|
||||
* into space accounting, so do not call them with dd_lock held.
|
||||
*/
|
||||
dsl_dir_zapify(dd, tx);
|
||||
VERIFY0(zap_update(mos, dd->dd_object, DD_FIELD_SNAPSHOTS_CHANGED,
|
||||
sizeof (uint64_t),
|
||||
sizeof (inode_timespec_t) / sizeof (uint64_t), &t, tx));
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -1280,6 +1280,7 @@ dsl_errorscrub_pause_resume_sync(void *arg, dmu_tx_t *tx)
|
||||
spa->spa_scan_pass_errorscrub_pause = gethrestime_sec();
|
||||
scn->errorscrub_phys.dep_paused_flags = B_TRUE;
|
||||
dsl_errorscrub_sync_state(scn, tx);
|
||||
zap_cursor_fini(&scn->errorscrub_cursor);
|
||||
spa_event_notify(spa, NULL, NULL, ESC_ZFS_ERRORSCRUB_PAUSED);
|
||||
} else {
|
||||
ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL);
|
||||
|
||||
@@ -96,13 +96,17 @@ zfs_gzip_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
||||
/* check if hardware accelerator can be used */
|
||||
if (qat_dc_use_accel(d_len)) {
|
||||
if (qat_compress(QAT_DECOMPRESS, s_start, s_len,
|
||||
d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS)
|
||||
return (0);
|
||||
d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS) {
|
||||
if ((size_t)dstlen == d_len)
|
||||
return (0);
|
||||
}
|
||||
/* if hardware de-compress fail, do it again with software */
|
||||
}
|
||||
|
||||
if (uncompress_func(d_start, &dstlen, s_start, s_len) != Z_OK)
|
||||
return (-1);
|
||||
if ((size_t)dstlen != d_len)
|
||||
return (-1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -89,17 +89,24 @@ zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
||||
(void) n;
|
||||
const char *src = s_start;
|
||||
uint32_t bufsiz = BE_IN32(src);
|
||||
int decoded;
|
||||
|
||||
/* invalid compressed buffer size encoded at start */
|
||||
if (bufsiz + sizeof (bufsiz) > s_len)
|
||||
return (1);
|
||||
|
||||
/*
|
||||
* Returns 0 on success (decompression function returned non-negative)
|
||||
* and non-zero on failure (decompression function returned negative).
|
||||
* LZ4_uncompress_unknownOutputSize returns the number of bytes decoded
|
||||
* on success, or a negative value on failure. An OpenZFS block must
|
||||
* expand to exactly d_len bytes
|
||||
*/
|
||||
return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
|
||||
d_start, bufsiz, d_len) < 0);
|
||||
decoded = LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
|
||||
d_start, bufsiz, d_len);
|
||||
if (decoded < 0)
|
||||
return (1);
|
||||
if (d_len != (size_t)decoded)
|
||||
return (1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
|
||||
|
||||
@@ -82,11 +82,11 @@ int zfs_metaslab_sm_blksz_with_log = (1 << 17);
|
||||
|
||||
/*
|
||||
* The in-core space map representation is more compact than its on-disk form.
|
||||
* The zfs_condense_pct determines how much more compact the in-core
|
||||
* The zfs_metaslab_condense_pct determines how much more compact the in-core
|
||||
* space map representation must be before we compact it on-disk.
|
||||
* Values should be greater than or equal to 100.
|
||||
*/
|
||||
uint_t zfs_condense_pct = 200;
|
||||
uint_t zfs_metaslab_condense_pct = 200;
|
||||
|
||||
/*
|
||||
* Condensing a metaslab is not guaranteed to actually reduce the amount of
|
||||
@@ -3826,8 +3826,8 @@ metaslab_group_preload(metaslab_group_t *mg)
|
||||
* increase as a result of writing out the free space range tree.
|
||||
*
|
||||
* 2. Condense if the on on-disk space map representation is at least
|
||||
* zfs_condense_pct/100 times the size of the optimal representation
|
||||
* (i.e. zfs_condense_pct = 110 and in-core = 1MB, optimal = 1.1MB).
|
||||
* zfs_metaslab_condense_pct/100 times the size of the optimal representation
|
||||
* (i.e. zfs_metaslab_condense_pct = 110 and in-core = 1MB, optimal = 1.1MB).
|
||||
*
|
||||
* 3. Do not condense if the on-disk size of the space map does not actually
|
||||
* decrease.
|
||||
@@ -3863,7 +3863,8 @@ metaslab_should_condense(metaslab_t *msp)
|
||||
uint64_t optimal_size = space_map_estimate_optimal_size(sm,
|
||||
msp->ms_allocatable, SM_NO_VDEVID);
|
||||
|
||||
return (object_size >= (optimal_size * zfs_condense_pct / 100) &&
|
||||
return (object_size >=
|
||||
(optimal_size * zfs_metaslab_condense_pct / 100) &&
|
||||
object_size > zfs_metaslab_condense_block_threshold * record_size);
|
||||
}
|
||||
|
||||
@@ -6442,6 +6443,14 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_max_search, UINT, ZMOD_RW,
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_use_largest_segment, INT, ZMOD_RW,
|
||||
"When looking in size tree, use largest segment instead of exact fit");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_alloc_threshold, U64, ZMOD_RW,
|
||||
"Minimum size which forces the dynamic allocator to change its "
|
||||
"allocation strategy");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, df_free_pct, UINT, ZMOD_RW,
|
||||
"The minimum free space, in percent, to continue allocations in a "
|
||||
"first-fit fashion");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, max_size_cache_sec, U64,
|
||||
ZMOD_RW, "How long to trust the cached max chunk size of a metaslab");
|
||||
|
||||
@@ -6454,6 +6463,18 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, try_hard_before_gang, INT,
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, UINT, ZMOD_RW,
|
||||
"Normally only consider this many of the best metaslabs in each vdev");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, sm_blksz_no_log, INT, ZMOD_RW,
|
||||
"Block size for space map in pools with log space map disabled. "
|
||||
"Power of 2 greater than 4096.");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, sm_blksz_with_log, INT, ZMOD_RW,
|
||||
"Block size for space map in pools with log space map enabled. "
|
||||
"Power of 2 greater than 4096.");
|
||||
|
||||
ZFS_MODULE_PARAM_CALL(zfs, zfs_, active_allocator,
|
||||
param_set_active_allocator, param_get_charp, ZMOD_RW,
|
||||
"SPA active allocator");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, condense_pct, UINT, ZMOD_RW,
|
||||
"Condense on-disk spacemap when it is more than this many percents "
|
||||
"of in-memory counterpart");
|
||||
|
||||
@@ -1605,8 +1605,8 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid)
|
||||
|
||||
bulk = kmem_zalloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
|
||||
attrs = kmem_zalloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
|
||||
mutex_enter(&hdl->sa_lock);
|
||||
mutex_enter(&zp->z_lock);
|
||||
mutex_enter(&hdl->sa_lock);
|
||||
|
||||
err = sa_lookup_locked(hdl, SA_ZPL_PROJID(zfsvfs), &projid,
|
||||
sizeof (uint64_t));
|
||||
@@ -1750,8 +1750,8 @@ sa_add_projid(sa_handle_t *hdl, dmu_tx_t *tx, uint64_t projid)
|
||||
zp->z_is_sa = B_TRUE;
|
||||
|
||||
out:
|
||||
mutex_exit(&zp->z_lock);
|
||||
mutex_exit(&hdl->sa_lock);
|
||||
mutex_exit(&zp->z_lock);
|
||||
kmem_free(attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
|
||||
kmem_free(bulk, sizeof (sa_bulk_attr_t) * ZPL_END);
|
||||
if (dxattr_obj)
|
||||
|
||||
@@ -8333,12 +8333,20 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
|
||||
return (spa_vdev_exit(spa, newrootvd, txg, error));
|
||||
|
||||
/*
|
||||
* log, dedup and special vdevs should not be replaced by spares.
|
||||
* Spares can't replace logs
|
||||
*/
|
||||
if ((oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE ||
|
||||
oldvd->vdev_top->vdev_islog) && newvd->vdev_isspare) {
|
||||
if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
|
||||
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
|
||||
|
||||
/*
|
||||
* For special and dedup vdevs a spare must have matching rotational
|
||||
* characteristics. A rotating spare replacing a non-rotating vdev
|
||||
* would silently degrade pool performance, so we reject the mismatch.
|
||||
*/
|
||||
if (newvd->vdev_isspare &&
|
||||
oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE &&
|
||||
newvd->vdev_nonrot != oldvd->vdev_nonrot)
|
||||
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
|
||||
}
|
||||
|
||||
/*
|
||||
* A dRAID spare can only replace a child of its parent dRAID vdev.
|
||||
@@ -11011,6 +11019,10 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
ASSERT0(spa->spa_vdev_removal->svr_bytes_done[txg & TXG_MASK]);
|
||||
}
|
||||
|
||||
for (vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd;
|
||||
vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)))
|
||||
vdev_sync_dispatch(vd, txg);
|
||||
|
||||
spa_sync_rewrite_vdev_config(spa, tx);
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
@@ -11035,9 +11047,6 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
|
||||
dsl_pool_sync_done(dp, txg);
|
||||
|
||||
/*
|
||||
* Update usable space statistics.
|
||||
*/
|
||||
while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
|
||||
!= NULL)
|
||||
vdev_sync_done(vd, txg);
|
||||
@@ -11811,6 +11820,12 @@ ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, U64, ZMOD_RW,
|
||||
"Allow importing pool with up to this number of missing top-level "
|
||||
"vdevs (in read-only mode)");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds_cachefile, U64, ZMOD_RW,
|
||||
"Allow importing pools with missing top-level vdevs in cache file");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds_scan, U64, ZMOD_RW,
|
||||
"Allow importing pools with missing top-level vdevs during scan");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_pause, INT,
|
||||
ZMOD_RW, "Set the livelist condense zthr to pause");
|
||||
|
||||
|
||||
@@ -460,6 +460,7 @@ vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
|
||||
} else if (vd->vdev_leaf_zap != 0) {
|
||||
*objid = vd->vdev_leaf_zap;
|
||||
} else {
|
||||
*objid = 0;
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
@@ -474,8 +475,11 @@ vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
|
||||
uint64_t objid;
|
||||
int err;
|
||||
|
||||
if (vdev_prop_get_objid(vd, &objid) != 0)
|
||||
return (EINVAL);
|
||||
if (vdev_prop_get_objid(vd, &objid) != 0) {
|
||||
/* No ZAP: property was never set, return the default. */
|
||||
*value = vdev_prop_default_numeric(prop);
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
|
||||
sizeof (uint64_t), 1, value);
|
||||
@@ -963,6 +967,20 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
||||
&vd->vdev_wholedisk) != 0)
|
||||
vd->vdev_wholedisk = -1ULL;
|
||||
|
||||
/*
|
||||
* Restore the last-known rotational status for leaf vdevs. vdev_open()
|
||||
* will overwrite this with the hardware value when the device is
|
||||
* accessible; the persisted value acts as a fallback for failed or
|
||||
* missing devices so that spare selection can still match on device
|
||||
* type even when the original disk is gone.
|
||||
*/
|
||||
if (vd->vdev_ops->vdev_op_leaf) {
|
||||
uint64_t rotational = 0;
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_VDEV_ROTATIONAL,
|
||||
&rotational) == 0)
|
||||
vd->vdev_nonrot = !rotational;
|
||||
}
|
||||
|
||||
vic = &vd->vdev_indirect_config;
|
||||
|
||||
ASSERT0(vic->vic_mapping_object);
|
||||
@@ -1117,6 +1135,11 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
||||
if (top_level && (ops == &vdev_raidz_ops || ops == &vdev_draid_ops))
|
||||
vd->vdev_autosit =
|
||||
vdev_prop_default_numeric(VDEV_PROP_AUTOSIT);
|
||||
if (ops == &vdev_root_ops)
|
||||
vd->vdev_failfast =
|
||||
vdev_prop_default_numeric(VDEV_PROP_FAILFAST);
|
||||
else
|
||||
vd->vdev_failfast = ZPROP_BOOLEAN_INHERIT;
|
||||
|
||||
/*
|
||||
* Add ourselves to the parent's list of children.
|
||||
@@ -3912,10 +3935,9 @@ vdev_load(vdev_t *vd)
|
||||
vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast),
|
||||
1, &failfast);
|
||||
if (error == 0) {
|
||||
vd->vdev_failfast = failfast & 1;
|
||||
vd->vdev_failfast = failfast;
|
||||
} else if (error == ENOENT) {
|
||||
vd->vdev_failfast = vdev_prop_default_numeric(
|
||||
VDEV_PROP_FAILFAST);
|
||||
vd->vdev_failfast = ZPROP_BOOLEAN_INHERIT;
|
||||
} else {
|
||||
vdev_dbgmsg(vd,
|
||||
"vdev_load: zap_lookup(top_zap=%llu) "
|
||||
@@ -4224,17 +4246,39 @@ vdev_remove_empty_log(vdev_t *vd, uint64_t txg)
|
||||
dmu_tx_commit(tx);
|
||||
}
|
||||
|
||||
static void
|
||||
metaslab_sync_done_task(void *arg)
|
||||
{
|
||||
metaslab_t *msp = arg;
|
||||
spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
|
||||
metaslab_sync_done(msp, spa_syncing_txg(spa));
|
||||
}
|
||||
|
||||
void
|
||||
vdev_sync_dispatch(vdev_t *vd, uint64_t txg)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
|
||||
ASSERT(vdev_is_concrete(vd));
|
||||
|
||||
for (metaslab_t *msp = txg_list_head(&vd->vdev_ms_list, TXG_CLEAN(txg));
|
||||
msp; msp = txg_list_next(&vd->vdev_ms_list, msp, TXG_CLEAN(txg))) {
|
||||
(void) taskq_dispatch(spa->spa_sync_tq,
|
||||
metaslab_sync_done_task, msp, TQ_SLEEP);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vdev_sync_done(vdev_t *vd, uint64_t txg)
|
||||
{
|
||||
metaslab_t *msp;
|
||||
boolean_t reassess = !txg_list_empty(&vd->vdev_ms_list, TXG_CLEAN(txg));
|
||||
|
||||
ASSERT(vdev_is_concrete(vd));
|
||||
|
||||
while ((msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)))
|
||||
!= NULL)
|
||||
metaslab_sync_done(msp, txg);
|
||||
taskq_wait(vd->vdev_spa->spa_sync_tq);
|
||||
|
||||
while (txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)) != NULL)
|
||||
;
|
||||
|
||||
if (reassess) {
|
||||
metaslab_sync_reassess(vd->vdev_mg);
|
||||
@@ -6093,6 +6137,29 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
|
||||
strval);
|
||||
}
|
||||
break;
|
||||
case VDEV_PROP_ALLOC_BIAS: {
|
||||
intval = fnvpair_value_uint64(elem);
|
||||
ASSERT3U(intval, !=, VDEV_BIAS_LOG);
|
||||
const char *bias_str =
|
||||
(intval == VDEV_BIAS_SPECIAL) ?
|
||||
VDEV_ALLOC_BIAS_SPECIAL :
|
||||
(intval == VDEV_BIAS_DEDUP) ?
|
||||
VDEV_ALLOC_BIAS_DEDUP : NULL;
|
||||
if (bias_str == NULL) {
|
||||
(void) zap_remove(mos, objid,
|
||||
VDEV_TOP_ZAP_ALLOCATION_BIAS, tx);
|
||||
} else {
|
||||
VERIFY0(zap_update(mos, objid,
|
||||
VDEV_TOP_ZAP_ALLOCATION_BIAS,
|
||||
1, strlen(bias_str) + 1, bias_str, tx));
|
||||
spa_activate_allocation_classes(spa, tx);
|
||||
}
|
||||
spa_history_log_internal(spa, "vdev set", tx,
|
||||
"vdev_guid=%llu: alloc_bias=%s",
|
||||
(u_longlong_t)vdev_guid,
|
||||
bias_str != NULL ? bias_str : "none");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
/* normalize the property name */
|
||||
propname = vdev_prop_to_name(prop);
|
||||
@@ -6207,11 +6274,14 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
error = spa_vdev_alloc(spa, vdev_guid);
|
||||
break;
|
||||
case VDEV_PROP_FAILFAST:
|
||||
if (nvpair_value_uint64(elem, &intval) != 0) {
|
||||
if (nvpair_value_uint64(elem, &intval) != 0 ||
|
||||
intval > ZPROP_BOOLEAN_INHERIT ||
|
||||
(intval == ZPROP_BOOLEAN_INHERIT &&
|
||||
vd->vdev_ops == &vdev_root_ops)) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
vd->vdev_failfast = intval & 1;
|
||||
vd->vdev_failfast = intval;
|
||||
break;
|
||||
case VDEV_PROP_SIT_OUT:
|
||||
/* Only expose this for a draid or raidz leaf */
|
||||
@@ -6319,6 +6389,53 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
}
|
||||
vd->vdev_scheduler = intval;
|
||||
break;
|
||||
case VDEV_PROP_ALLOC_BIAS:
|
||||
if (nvpair_value_uint64(elem, &intval) != 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
if (vd != vd->vdev_top || vd->vdev_top_zap == 0) {
|
||||
error = ENOTSUP;
|
||||
break;
|
||||
}
|
||||
/* Log vdevs are not supported: remove and re-add. */
|
||||
if (vd->vdev_islog) {
|
||||
error = ENOTSUP;
|
||||
break;
|
||||
}
|
||||
/* special/dedup needs allocation_classes feature */
|
||||
if (intval != VDEV_BIAS_NONE &&
|
||||
((intval != VDEV_BIAS_SPECIAL &&
|
||||
intval != VDEV_BIAS_DEDUP) ||
|
||||
!spa_feature_is_enabled(spa,
|
||||
SPA_FEATURE_ALLOCATION_CLASSES))) {
|
||||
error = ENOTSUP;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Disallow converting the last normal vdev to
|
||||
* avoid pool suspension on failed allocations.
|
||||
*/
|
||||
if (intval != VDEV_BIAS_NONE &&
|
||||
vd->vdev_alloc_bias == VDEV_BIAS_NONE) {
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
int normal = 0;
|
||||
for (uint64_t c = 0;
|
||||
c < rvd->vdev_children; c++) {
|
||||
vdev_t *cvd = rvd->vdev_child[c];
|
||||
if (vdev_is_concrete(cvd) &&
|
||||
cvd->vdev_alloc_bias ==
|
||||
VDEV_BIAS_NONE &&
|
||||
!cvd->vdev_noalloc)
|
||||
normal++;
|
||||
}
|
||||
if (normal <= 1) {
|
||||
error = ENOTSUP;
|
||||
break;
|
||||
}
|
||||
}
|
||||
vd->vdev_alloc_bias = (vdev_alloc_bias_t)intval;
|
||||
break;
|
||||
default:
|
||||
/* Most processing is done in vdev_props_set_sync */
|
||||
break;
|
||||
@@ -6350,7 +6467,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
objset_t *mos = spa->spa_meta_objset;
|
||||
int err = 0;
|
||||
uint64_t objid;
|
||||
uint64_t objid = 0;
|
||||
uint64_t vdev_guid;
|
||||
nvpair_t *elem = NULL;
|
||||
nvlist_t *nvprops = NULL;
|
||||
@@ -6369,9 +6486,15 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
|
||||
nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
|
||||
|
||||
if (vdev_prop_get_objid(vd, &objid) != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
ASSERT(objid != 0);
|
||||
/*
|
||||
* A missing ZAP is normal for spare and L2ARC vdevs, which are
|
||||
* not part of the main vdev tree and never get ZAPs allocated.
|
||||
* Many properties are sourced directly from vdev_t fields and
|
||||
* work fine without one; ZAP-backed properties will return their
|
||||
* default values. objid is set to 0 when absent and the few
|
||||
* cases that call zap_lookup directly guard against this below.
|
||||
*/
|
||||
(void) vdev_prop_get_objid(vd, &objid);
|
||||
|
||||
mutex_enter(&spa->spa_props_lock);
|
||||
|
||||
@@ -6694,18 +6817,28 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
break;
|
||||
case VDEV_PROP_FAILFAST:
|
||||
src = ZPROP_SRC_LOCAL;
|
||||
strval = NULL;
|
||||
|
||||
err = zap_lookup(mos, objid, nvpair_name(elem),
|
||||
sizeof (uint64_t), 1, &intval);
|
||||
if (objid != 0) {
|
||||
err = zap_lookup(mos, objid,
|
||||
nvpair_name(elem),
|
||||
sizeof (uint64_t), 1, &intval);
|
||||
} else {
|
||||
err = ENOENT;
|
||||
}
|
||||
if (err == ENOENT) {
|
||||
intval = vdev_prop_default_numeric(
|
||||
prop);
|
||||
if (vd->vdev_ops == &vdev_root_ops)
|
||||
intval =
|
||||
vdev_prop_default_numeric(
|
||||
prop);
|
||||
else
|
||||
intval = ZPROP_BOOLEAN_INHERIT;
|
||||
err = 0;
|
||||
} else if (err) {
|
||||
break;
|
||||
}
|
||||
if (intval == vdev_prop_default_numeric(prop))
|
||||
if (intval == ZPROP_BOOLEAN_INHERIT ||
|
||||
(vd->vdev_ops == &vdev_root_ops &&
|
||||
intval == 1))
|
||||
src = ZPROP_SRC_DEFAULT;
|
||||
|
||||
vdev_prop_add_list(outnvl, propname, strval,
|
||||
@@ -6746,6 +6879,17 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
boolval, src);
|
||||
break;
|
||||
case VDEV_PROP_ALLOC_BIAS:
|
||||
if (vd == vd->vdev_top) {
|
||||
vdev_prop_add_list(outnvl, propname,
|
||||
NULL, vd->vdev_alloc_bias,
|
||||
ZPROP_SRC_NONE);
|
||||
}
|
||||
continue;
|
||||
case VDEV_PROP_ROTATIONAL:
|
||||
vdev_prop_add_list(outnvl, propname, NULL,
|
||||
!vd->vdev_nonrot, ZPROP_SRC_NONE);
|
||||
continue;
|
||||
case VDEV_PROP_CHECKSUM_N:
|
||||
case VDEV_PROP_CHECKSUM_T:
|
||||
case VDEV_PROP_IO_N:
|
||||
@@ -6771,6 +6915,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
/* FALLTHRU */
|
||||
case VDEV_PROP_USERPROP:
|
||||
/* User Properites */
|
||||
if (objid == 0)
|
||||
continue;
|
||||
src = ZPROP_SRC_LOCAL;
|
||||
|
||||
err = zap_length(mos, objid, nvpair_name(elem),
|
||||
|
||||
@@ -467,6 +467,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
|
||||
if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)))
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id);
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid);
|
||||
if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
|
||||
vd->vdev_top != NULL) {
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_TOP_GUID,
|
||||
vd->vdev_top->vdev_guid);
|
||||
}
|
||||
|
||||
if (vd->vdev_path != NULL)
|
||||
fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path);
|
||||
@@ -493,6 +498,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
|
||||
vd->vdev_wholedisk);
|
||||
}
|
||||
|
||||
if (vd->vdev_ops->vdev_op_leaf) {
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_ROTATIONAL,
|
||||
!vd->vdev_nonrot);
|
||||
}
|
||||
|
||||
if (vd->vdev_not_present && !(flags & VDEV_CONFIG_MISSING))
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1);
|
||||
|
||||
@@ -502,6 +512,9 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
|
||||
if (flags & VDEV_CONFIG_L2CACHE)
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift);
|
||||
|
||||
if ((flags & VDEV_CONFIG_SPARE) && vd->vdev_asize != 0)
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE, vd->vdev_asize);
|
||||
|
||||
if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
|
||||
vd == vd->vdev_top) {
|
||||
fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
|
||||
@@ -1392,6 +1405,7 @@ vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *bootenv)
|
||||
VB_NVLIST);
|
||||
break;
|
||||
}
|
||||
vbe->vbe_bootenv[sizeof (vbe->vbe_bootenv) - 1] = '\0';
|
||||
fnvlist_add_string(bootenv, FREEBSD_BOOTONCE, buf);
|
||||
}
|
||||
|
||||
|
||||
@@ -102,14 +102,14 @@
|
||||
|
||||
#define WVR(X) [w##X] "=w" (w##X)
|
||||
|
||||
#define UVR0_(REG, ...) [w##REG] "+&w" (w##REG)
|
||||
#define UVR1_(_1, REG, ...) [w##REG] "+&w" (w##REG)
|
||||
#define UVR2_(_1, _2, REG, ...) [w##REG] "+&w" (w##REG)
|
||||
#define UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&w" (w##REG)
|
||||
#define UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&w" (w##REG)
|
||||
#define UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&w" (w##REG)
|
||||
#define UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&w" (w##REG)
|
||||
#define UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&w" (w##REG)
|
||||
#define UVR0_(REG, ...) [w##REG] "+w" (w##REG)
|
||||
#define UVR1_(_1, REG, ...) [w##REG] "+w" (w##REG)
|
||||
#define UVR2_(_1, _2, REG, ...) [w##REG] "+w" (w##REG)
|
||||
#define UVR3_(_1, _2, _3, REG, ...) [w##REG] "+w" (w##REG)
|
||||
#define UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+w" (w##REG)
|
||||
#define UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+w" (w##REG)
|
||||
#define UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+w" (w##REG)
|
||||
#define UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+w" (w##REG)
|
||||
|
||||
#define UVR0(r...) UVR0_(r)
|
||||
#define UVR1(r...) UVR1_(r)
|
||||
@@ -120,7 +120,7 @@
|
||||
#define UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)
|
||||
#define UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)
|
||||
|
||||
#define UVR(X) [w##X] "+&w" (w##X)
|
||||
#define UVR(X) [w##X] "+w" (w##X)
|
||||
|
||||
#define R_01(REG1, REG2, ...) REG1, REG2
|
||||
#define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
|
||||
|
||||
+1119
-1529
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,558 @@
|
||||
// SPDX-License-Identifier: CDDL-1.0
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright 2017 Nexenta Systems, Inc.
|
||||
* Copyright (c) 2024, Klara, Inc.
|
||||
* Copyright (c) 2026, TrueNAS.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zap_impl.h>
|
||||
|
||||
static kmem_cache_t *zap_name_cache;
|
||||
static kmem_cache_t *zap_attr_cache;
|
||||
static kmem_cache_t *zap_name_long_cache;
|
||||
static kmem_cache_t *zap_attr_long_cache;
|
||||
|
||||
/* Setup/teardown caches. Part of the public interface in zap.h. */
|
||||
void
|
||||
zap_init(void)
|
||||
{
|
||||
zap_name_cache = kmem_cache_create("zap_name",
|
||||
sizeof (zap_name_t) + ZAP_MAXNAMELEN, 0, NULL, NULL,
|
||||
NULL, NULL, NULL, 0);
|
||||
|
||||
zap_attr_cache = kmem_cache_create("zap_attr_cache",
|
||||
sizeof (zap_attribute_t) + ZAP_MAXNAMELEN, 0, NULL,
|
||||
NULL, NULL, NULL, NULL, 0);
|
||||
|
||||
zap_name_long_cache = kmem_cache_create("zap_name_long",
|
||||
sizeof (zap_name_t) + ZAP_MAXNAMELEN_NEW, 0, NULL, NULL,
|
||||
NULL, NULL, NULL, 0);
|
||||
|
||||
zap_attr_long_cache = kmem_cache_create("zap_attr_long_cache",
|
||||
sizeof (zap_attribute_t) + ZAP_MAXNAMELEN_NEW, 0, NULL,
|
||||
NULL, NULL, NULL, NULL, 0);
|
||||
}
|
||||
|
||||
void
|
||||
zap_fini(void)
|
||||
{
|
||||
kmem_cache_destroy(zap_name_cache);
|
||||
kmem_cache_destroy(zap_attr_cache);
|
||||
kmem_cache_destroy(zap_name_long_cache);
|
||||
kmem_cache_destroy(zap_attr_long_cache);
|
||||
}
|
||||
|
||||
static int
|
||||
zap_normalize(zap_t *zap, const char *name, char *namenorm, int normflags,
|
||||
size_t outlen)
|
||||
{
|
||||
ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY));
|
||||
|
||||
size_t inlen = strlen(name) + 1;
|
||||
|
||||
int err = 0;
|
||||
(void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
|
||||
normflags | U8_TEXTPREP_IGNORE_NULL | U8_TEXTPREP_IGNORE_INVALID,
|
||||
U8_UNICODE_LATEST, &err);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
zap_name_t *
|
||||
zap_name_alloc(zap_t *zap, boolean_t longname)
|
||||
{
|
||||
kmem_cache_t *cache = longname ? zap_name_long_cache : zap_name_cache;
|
||||
zap_name_t *zn = kmem_cache_alloc(cache, KM_SLEEP);
|
||||
|
||||
zn->zn_zap = zap;
|
||||
zn->zn_normbuf_len = longname ? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN;
|
||||
return (zn);
|
||||
}
|
||||
|
||||
zap_name_t *
|
||||
zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt)
|
||||
{
|
||||
size_t key_len = strlen(key) + 1;
|
||||
zap_name_t *zn = zap_name_alloc(zap, (key_len > ZAP_MAXNAMELEN));
|
||||
if (zap_name_init_str(zn, key, mt) != 0) {
|
||||
zap_name_free(zn);
|
||||
return (NULL);
|
||||
}
|
||||
return (zn);
|
||||
}
|
||||
|
||||
zap_name_t *
|
||||
zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
|
||||
{
|
||||
zap_name_t *zn = kmem_cache_alloc(zap_name_cache, KM_SLEEP);
|
||||
|
||||
ASSERT0(zap->zap_normflags);
|
||||
zn->zn_zap = zap;
|
||||
zn->zn_key_intlen = sizeof (*key);
|
||||
zn->zn_key_orig = zn->zn_key_norm = key;
|
||||
zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;
|
||||
zn->zn_matchtype = 0;
|
||||
zn->zn_normbuf_len = ZAP_MAXNAMELEN;
|
||||
|
||||
zn->zn_hash = zap_hash(zn);
|
||||
return (zn);
|
||||
}
|
||||
|
||||
void
|
||||
zap_name_free(zap_name_t *zn)
|
||||
{
|
||||
if (zn->zn_normbuf_len == ZAP_MAXNAMELEN) {
|
||||
kmem_cache_free(zap_name_cache, zn);
|
||||
} else {
|
||||
ASSERT3U(zn->zn_normbuf_len, ==, ZAP_MAXNAMELEN_NEW);
|
||||
kmem_cache_free(zap_name_long_cache, zn);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt)
|
||||
{
|
||||
zap_t *zap = zn->zn_zap;
|
||||
size_t key_len = strlen(key) + 1;
|
||||
|
||||
/* Make sure zn is allocated for longname if key is long */
|
||||
IMPLY(key_len > ZAP_MAXNAMELEN,
|
||||
zn->zn_normbuf_len == ZAP_MAXNAMELEN_NEW);
|
||||
|
||||
zn->zn_key_intlen = sizeof (*key);
|
||||
zn->zn_key_orig = key;
|
||||
zn->zn_key_orig_numints = key_len;
|
||||
zn->zn_matchtype = mt;
|
||||
zn->zn_normflags = zap->zap_normflags;
|
||||
|
||||
/*
|
||||
* If we're dealing with a case sensitive lookup on a mixed or
|
||||
* insensitive fs, remove U8_TEXTPREP_TOUPPER or the lookup
|
||||
* will fold case to all caps overriding the lookup request.
|
||||
*/
|
||||
if (mt & MT_MATCH_CASE)
|
||||
zn->zn_normflags &= ~U8_TEXTPREP_TOUPPER;
|
||||
|
||||
if (zap->zap_normflags) {
|
||||
/*
|
||||
* We *must* use zap_normflags because this normalization is
|
||||
* what the hash is computed from.
|
||||
*/
|
||||
if (zap_normalize(zap, key, zn->zn_normbuf,
|
||||
zap->zap_normflags, zn->zn_normbuf_len) != 0)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
zn->zn_key_norm = zn->zn_normbuf;
|
||||
zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
|
||||
} else {
|
||||
if (mt != 0)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
zn->zn_key_norm = zn->zn_key_orig;
|
||||
zn->zn_key_norm_numints = zn->zn_key_orig_numints;
|
||||
}
|
||||
|
||||
zn->zn_hash = zap_hash(zn);
|
||||
|
||||
if (zap->zap_normflags != zn->zn_normflags) {
|
||||
/*
|
||||
* We *must* use zn_normflags because this normalization is
|
||||
* what the matching is based on. (Not the hash!)
|
||||
*/
|
||||
if (zap_normalize(zap, key, zn->zn_normbuf,
|
||||
zn->zn_normflags, zn->zn_normbuf_len) != 0)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
zap_match(zap_name_t *zn, const char *matchname)
|
||||
{
|
||||
boolean_t res = B_FALSE;
|
||||
ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));
|
||||
|
||||
if (zn->zn_matchtype & MT_NORMALIZE) {
|
||||
size_t namelen = zn->zn_normbuf_len;
|
||||
char normbuf[ZAP_MAXNAMELEN];
|
||||
char *norm = normbuf;
|
||||
|
||||
/*
|
||||
* Cannot allocate this on-stack as it exceed the stack-limit of
|
||||
* 1024.
|
||||
*/
|
||||
if (namelen > ZAP_MAXNAMELEN)
|
||||
norm = kmem_alloc(namelen, KM_SLEEP);
|
||||
|
||||
if (zap_normalize(zn->zn_zap, matchname, norm,
|
||||
zn->zn_normflags, namelen) != 0) {
|
||||
res = B_FALSE;
|
||||
} else {
|
||||
res = (strcmp(zn->zn_key_norm, norm) == 0);
|
||||
}
|
||||
if (norm != normbuf)
|
||||
kmem_free(norm, namelen);
|
||||
} else {
|
||||
res = (strcmp(zn->zn_key_orig, matchname) == 0);
|
||||
}
|
||||
return (res);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
zap_hash(zap_name_t *zn)
|
||||
{
|
||||
zap_t *zap = zn->zn_zap;
|
||||
uint64_t h = 0;
|
||||
|
||||
if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) {
|
||||
ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY);
|
||||
h = *(uint64_t *)zn->zn_key_orig;
|
||||
} else {
|
||||
h = zap->zap_salt;
|
||||
ASSERT(h != 0);
|
||||
ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
|
||||
|
||||
if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
|
||||
const uint64_t *wp = zn->zn_key_norm;
|
||||
|
||||
ASSERT(zn->zn_key_intlen == 8);
|
||||
for (int i = 0; i < zn->zn_key_norm_numints;
|
||||
wp++, i++) {
|
||||
uint64_t word = *wp;
|
||||
|
||||
for (int j = 0; j < 8; j++) {
|
||||
h = (h >> 8) ^
|
||||
zfs_crc64_table[(h ^ word) & 0xFF];
|
||||
word >>= NBBY;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const uint8_t *cp = zn->zn_key_norm;
|
||||
|
||||
/*
|
||||
* We previously stored the terminating null on
|
||||
* disk, but didn't hash it, so we need to
|
||||
* continue to not hash it. (The
|
||||
* zn_key_*_numints includes the terminating
|
||||
* null for non-binary keys.)
|
||||
*/
|
||||
int len = zn->zn_key_norm_numints - 1;
|
||||
|
||||
ASSERT(zn->zn_key_intlen == 1);
|
||||
for (int i = 0; i < len; cp++, i++) {
|
||||
h = (h >> 8) ^
|
||||
zfs_crc64_table[(h ^ *cp) & 0xFF];
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Don't use all 64 bits, since we need some in the cookie for
|
||||
* the collision differentiator. We MUST use the high bits,
|
||||
* since those are the ones that we first pay attention to when
|
||||
* choosing the bucket.
|
||||
*/
|
||||
h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);
|
||||
|
||||
return (h);
|
||||
}
|
||||
|
||||
static int
|
||||
zap_lock_impl(dnode_t *dn, dmu_buf_t *db, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
|
||||
{
|
||||
ASSERT0(db->db_offset);
|
||||
objset_t *os = dmu_buf_get_objset(db);
|
||||
uint64_t obj = db->db_object;
|
||||
|
||||
*zapp = NULL;
|
||||
|
||||
if (DMU_OT_BYTESWAP(dn->dn_type) != DMU_BSWAP_ZAP)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
zap_t *zap = dmu_buf_get_user(db);
|
||||
if (zap == NULL) {
|
||||
zap = mzap_open(db);
|
||||
if (zap == NULL) {
|
||||
/*
|
||||
* mzap_open() didn't like what it saw on-disk.
|
||||
* Check for corruption!
|
||||
*/
|
||||
return (SET_ERROR(EIO));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We're checking zap_ismicro without the lock held, in order to
|
||||
* tell what type of lock we want. Once we have some sort of
|
||||
* lock, see if it really is the right type. In practice this
|
||||
* can only be different if it was upgraded from micro to fat,
|
||||
* and micro wanted WRITER but fat only needs READER.
|
||||
*/
|
||||
krw_t lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
|
||||
rw_enter(&zap->zap_rwlock, lt);
|
||||
if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
|
||||
/* it was upgraded, now we only need reader */
|
||||
ASSERT(lt == RW_WRITER);
|
||||
ASSERT(RW_READER ==
|
||||
((!zap->zap_ismicro && fatreader) ? RW_READER : lti));
|
||||
rw_downgrade(&zap->zap_rwlock);
|
||||
lt = RW_READER;
|
||||
}
|
||||
|
||||
zap->zap_objset = os;
|
||||
zap->zap_dnode = dn;
|
||||
|
||||
if (lt == RW_WRITER)
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
|
||||
ASSERT3P(zap->zap_dbuf, ==, db);
|
||||
|
||||
ASSERT(!zap->zap_ismicro ||
|
||||
zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
|
||||
if (zap->zap_ismicro && tx && adding &&
|
||||
zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
|
||||
uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
|
||||
if (newsz > zap_get_micro_max_size(dmu_objset_spa(os))) {
|
||||
dprintf("upgrading obj %llu: num_entries=%u\n",
|
||||
(u_longlong_t)obj, zap->zap_m.zap_num_entries);
|
||||
*zapp = zap;
|
||||
int err = mzap_upgrade(zapp, tx, 0);
|
||||
if (err != 0)
|
||||
rw_exit(&zap->zap_rwlock);
|
||||
return (err);
|
||||
}
|
||||
VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx));
|
||||
zap->zap_m.zap_num_chunks =
|
||||
db->db_size / MZAP_ENT_LEN - 1;
|
||||
|
||||
if (newsz > SPA_OLD_MAXBLOCKSIZE) {
|
||||
dsl_dataset_t *ds = dmu_objset_ds(os);
|
||||
if (!dsl_dataset_feature_is_active(ds,
|
||||
SPA_FEATURE_LARGE_MICROZAP)) {
|
||||
/*
|
||||
* A microzap just grew beyond the old limit
|
||||
* for the first time, so we have to ensure the
|
||||
* feature flag is activated.
|
||||
* zap_get_micro_max_size() won't let us get
|
||||
* here if the feature is not enabled, so we
|
||||
* don't need any other checks beforehand.
|
||||
*
|
||||
* Since we're in open context, we can't
|
||||
* activate the feature directly, so we instead
|
||||
* flag it on the dataset for next sync.
|
||||
*/
|
||||
dsl_dataset_dirty(ds, tx);
|
||||
mutex_enter(&ds->ds_lock);
|
||||
ds->ds_feature_activation
|
||||
[SPA_FEATURE_LARGE_MICROZAP] =
|
||||
(void *)B_TRUE;
|
||||
mutex_exit(&ds->ds_lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*zapp = zap;
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zap_lock_by_dnode(dnode_t *dn, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
|
||||
zap_t **zapp)
|
||||
{
|
||||
dmu_buf_t *db;
|
||||
int err;
|
||||
|
||||
err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_lock_impl(dn, db, tx, lti, fatreader, adding, zapp);
|
||||
if (err != 0)
|
||||
dmu_buf_rele(db, tag);
|
||||
else
|
||||
VERIFY(dnode_add_ref(dn, tag));
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_lock(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
|
||||
zap_t **zapp)
|
||||
{
|
||||
dnode_t *dn;
|
||||
int err;
|
||||
|
||||
err = dnode_hold(os, obj, tag, &dn);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_lock_by_dnode(dn, tx, lti, fatreader, adding, tag, zapp);
|
||||
dnode_rele(dn, tag);
|
||||
return (err);
|
||||
}
|
||||
|
||||
void
|
||||
zap_unlock(zap_t *zap, const void *tag)
|
||||
{
|
||||
rw_exit(&zap->zap_rwlock);
|
||||
dnode_rele(zap->zap_dnode, tag);
|
||||
dmu_buf_rele(zap->zap_dbuf, tag);
|
||||
}
|
||||
|
||||
int
|
||||
zap_lock_try_upgrade(zap_t *zap, dmu_tx_t *tx)
|
||||
{
|
||||
if (RW_WRITE_HELD(&zap->zap_rwlock))
|
||||
/* Already have writer, nothing to do. */
|
||||
return (1);
|
||||
|
||||
/* Try to upgrade the lock in-place. */
|
||||
if (rw_tryupgrade(&zap->zap_rwlock)) {
|
||||
/*
|
||||
* Got it, mark buffer dirty, since we only do that in
|
||||
* zap_lock_impl() for writer.
|
||||
*/
|
||||
dmu_buf_will_dirty(zap->zap_dbuf, tx);
|
||||
return (1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
zap_lock_upgrade(zap_t *zap, dmu_tx_t *tx)
|
||||
{
|
||||
if (zap_lock_try_upgrade(zap, tx))
|
||||
return;
|
||||
|
||||
/*
|
||||
* It's safe to drop the lock here because we still have a hold on
|
||||
* zap_dbuf, which prevents the dbuf being evicted and the zap_t being
|
||||
* deallocated.
|
||||
*/
|
||||
rw_exit(&zap->zap_rwlock);
|
||||
|
||||
rw_enter(&zap->zap_rwlock, RW_WRITER);
|
||||
dmu_buf_will_dirty(zap->zap_dbuf, tx);
|
||||
}
|
||||
|
||||
void
|
||||
zap_evict_sync(void *dbu)
|
||||
{
|
||||
zap_t *zap = dbu;
|
||||
|
||||
rw_destroy(&zap->zap_rwlock);
|
||||
|
||||
if (zap->zap_ismicro)
|
||||
mze_destroy(zap);
|
||||
else
|
||||
mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
|
||||
|
||||
kmem_free(zap, sizeof (zap_t));
|
||||
}
|
||||
|
||||
uint64_t
|
||||
zap_getflags(zap_t *zap)
|
||||
{
|
||||
if (zap->zap_ismicro)
|
||||
return (0);
|
||||
return (zap_f_phys(zap)->zap_flags);
|
||||
}
|
||||
|
||||
int
|
||||
zap_hashbits(zap_t *zap)
|
||||
{
|
||||
if (zap_getflags(zap) & ZAP_FLAG_HASH64)
|
||||
return (48);
|
||||
else
|
||||
return (28);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
zap_maxcd(zap_t *zap)
|
||||
{
|
||||
if (zap_getflags(zap) & ZAP_FLAG_HASH64)
|
||||
return ((1<<16)-1);
|
||||
else
|
||||
return (-1U);
|
||||
}
|
||||
|
||||
/* DNU byteswap callback for DMU_BSWAP_ZAP, see dmu_ot_byteswap. */
|
||||
void
|
||||
zap_byteswap(void *buf, size_t size)
|
||||
{
|
||||
uint64_t block_type = *(uint64_t *)buf;
|
||||
|
||||
if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
|
||||
/* ASSERT(magic == ZAP_LEAF_MAGIC); */
|
||||
mzap_byteswap(buf, size);
|
||||
} else {
|
||||
fzap_byteswap(buf, size);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Cursor attribute allocator/free. Part of the public interface in zap.h,
|
||||
* in this file to get access to the kmem caches.
|
||||
*/
|
||||
static zap_attribute_t *
|
||||
zap_attribute_alloc_impl(boolean_t longname)
|
||||
{
|
||||
zap_attribute_t *za;
|
||||
|
||||
za = kmem_cache_alloc((longname)? zap_attr_long_cache : zap_attr_cache,
|
||||
KM_SLEEP);
|
||||
za->za_name_len = (longname)? ZAP_MAXNAMELEN_NEW : ZAP_MAXNAMELEN;
|
||||
return (za);
|
||||
}
|
||||
|
||||
zap_attribute_t *
|
||||
zap_attribute_alloc(void)
|
||||
{
|
||||
return (zap_attribute_alloc_impl(B_FALSE));
|
||||
}
|
||||
|
||||
zap_attribute_t *
|
||||
zap_attribute_long_alloc(void)
|
||||
{
|
||||
return (zap_attribute_alloc_impl(B_TRUE));
|
||||
}
|
||||
|
||||
void
|
||||
zap_attribute_free(zap_attribute_t *za)
|
||||
{
|
||||
if (za->za_name_len == ZAP_MAXNAMELEN) {
|
||||
kmem_cache_free(zap_attr_cache, za);
|
||||
} else {
|
||||
ASSERT3U(za->za_name_len, ==, ZAP_MAXNAMELEN_NEW);
|
||||
kmem_cache_free(zap_attr_long_cache, za);
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user