fusefs: redo vnode attribute locking

Previously most fields in fuse_vnode_data were protected by the vnode
lock.  But because DEBUG_VFS_LOCKS was never enabled by default until
stable/15 the assertions were never checked, and many were wrong.
Others were missing.  This led to panics in stable/15 and 16.0-CURRENT,
when a vnode was expected to be exclusively locked but wasn't, for fuse
file systems that mount with "-o async".

In some places it isn't possible to exclusively lock the vnode when
accessing these fields.  So protect them with a new mutex instead.  This
fixes panics and unprotected field accesses in VOP_READ,
VOP_COPY_FILE_RANGE, VOP_GETATTR, VOP_BMAP, and FUSE_NOTIFY_INVAL_ENTRY.
Add assertions everywhere the protected fields are accessed.

Lock the vnode exclusively when handling FUSE_NOTIFY_INVAL_INODE.

During fuse_vnode_setsize, if the vnode isn't already exclusively
locked, use the vn_delayed_setsize mechanism.  This fixes panics during
VOP_READ or VOP_GETATTR.

Also, ensure that fuse_vnop_rename locks the "from" vnode.

Finally, reorder elements in struct fuse_vnode_data to reduce the
structure size.

Fixes:		283391
Reported by:	kargl, markj, vishwin, Abdelkader Boudih, groenveld@acm.org
MFC after:	2 weeks
Sponsored by:	ConnectWise
Reviewed by:	kib
Differential Revision: https://reviews.freebsd.org/D55230
This commit is contained in:
Alan Somers
2026-01-23 14:23:51 -07:00
parent ce9aff829e
commit 7e68af7ce2
11 changed files with 609 additions and 79 deletions
+24 -10
View File
@@ -44,10 +44,13 @@ using namespace testing;
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
class Bmap: public FuseTest {
class Bmap: public FuseTest,
public WithParamInterface<tuple<int, int>>
{
public:
virtual void SetUp() {
m_maxreadahead = UINT32_MAX;
m_init_flags |= get<0>(GetParam());
FuseTest::SetUp();
}
void expect_bmap(uint64_t ino, uint64_t lbn, uint32_t blocksize, uint64_t pbn)
@@ -73,12 +76,12 @@ void expect_lookup(const char *relpath, uint64_t ino, off_t size)
}
};
class BmapEof: public Bmap, public WithParamInterface<int> {};
class BmapEof: public Bmap {};
/*
* Test FUSE_BMAP
*/
TEST_F(Bmap, bmap)
TEST_P(Bmap, bmap)
{
struct fiobmap2_arg arg;
/*
@@ -124,7 +127,7 @@ TEST_F(Bmap, bmap)
* If the daemon does not implement VOP_BMAP, fusefs should return sensible
* defaults.
*/
TEST_F(Bmap, default_)
TEST_P(Bmap, default_)
{
struct fiobmap2_arg arg;
const off_t filesize = 1 << 30;
@@ -181,7 +184,7 @@ TEST_F(Bmap, default_)
* The server returns an error for some reason for FUSE_BMAP. fusefs should
* faithfully report that error up to the caller.
*/
TEST_F(Bmap, einval)
TEST_P(Bmap, einval)
{
struct fiobmap2_arg arg;
const off_t filesize = 1 << 30;
@@ -217,7 +220,7 @@ TEST_F(Bmap, einval)
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=264196 . The bug did not
* lie in fusefs, but this is a convenient place for a regression test.
*/
TEST_F(Bmap, spurious_einval)
TEST_P(Bmap, spurious_einval)
{
const off_t filesize = 4ull << 30;
const ino_t ino = 42;
@@ -288,7 +291,7 @@ TEST_P(BmapEof, eof)
int fd;
int ngetattrs;
ngetattrs = GetParam();
ngetattrs = get<1>(GetParam());
FuseTest::expect_lookup(RELPATH, ino, mode, filesize, 1, 0);
expect_open(ino, 0, 1);
// Depending on ngetattrs, FUSE_READ could be called with either
@@ -348,6 +351,17 @@ TEST_P(BmapEof, eof)
leak(fd);
}
INSTANTIATE_TEST_SUITE_P(BE, BmapEof,
Values(1, 2, 3)
);
/*
* Try with and without async reads, because it affects the type of vnode lock
* on entry to fuse_vnop_bmap.
*/
INSTANTIATE_TEST_SUITE_P(B, Bmap, Values(
tuple(0, 0),
tuple(FUSE_ASYNC_READ, 0)
));
INSTANTIATE_TEST_SUITE_P(BE, BmapEof, Values(
tuple(0, 1),
tuple(0, 2),
tuple(0, 3)
));
+26 -12
View File
@@ -47,8 +47,15 @@ using namespace testing;
* invalidation. This file tests our client's handling of those messages.
*/
class Notify: public FuseTest {
class Notify: public FuseTest,
public WithParamInterface<int>
{
public:
virtual void SetUp() {
m_init_flags |= GetParam();
FuseTest::SetUp();
}
/* Ignore an optional FUSE_FSYNC */
void maybe_expect_fsync(uint64_t ino)
{
@@ -154,7 +161,7 @@ static void* store(void* arg) {
}
/* Invalidate a nonexistent entry */
TEST_F(Notify, inval_entry_nonexistent)
TEST_P(Notify, inval_entry_nonexistent)
{
const static char *name = "foo";
struct inval_entry_args iea;
@@ -173,7 +180,7 @@ TEST_F(Notify, inval_entry_nonexistent)
}
/* Invalidate a cached entry */
TEST_F(Notify, inval_entry)
TEST_P(Notify, inval_entry)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -211,7 +218,7 @@ TEST_F(Notify, inval_entry)
* Invalidate a cached entry beneath the root, which uses a slightly different
* code path.
*/
TEST_F(Notify, inval_entry_below_root)
TEST_P(Notify, inval_entry_below_root)
{
const static char FULLPATH[] = "mountpoint/some_dir/foo";
const static char DNAME[] = "some_dir";
@@ -258,7 +265,7 @@ TEST_F(Notify, inval_entry_below_root)
}
/* Invalidating an entry invalidates the parent directory's attributes */
TEST_F(Notify, inval_entry_invalidates_parent_attrs)
TEST_P(Notify, inval_entry_invalidates_parent_attrs)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -302,7 +309,7 @@ TEST_F(Notify, inval_entry_invalidates_parent_attrs)
}
TEST_F(Notify, inval_inode_nonexistent)
TEST_P(Notify, inval_inode_nonexistent)
{
struct inval_inode_args iia;
ino_t ino = 42;
@@ -320,7 +327,7 @@ TEST_F(Notify, inval_inode_nonexistent)
EXPECT_EQ(0, (intptr_t)thr0_value);
}
TEST_F(Notify, inval_inode_with_clean_cache)
TEST_P(Notify, inval_inode_with_clean_cache)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -390,7 +397,7 @@ TEST_F(Notify, inval_inode_with_clean_cache)
* nothing bad should happen.
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=290519
*/
TEST_F(Notify, notify_after_unmount)
TEST_P(Notify, notify_after_unmount)
{
const static char *name = "foo";
struct inval_entry_args iea;
@@ -408,7 +415,7 @@ TEST_F(Notify, notify_after_unmount)
/* FUSE_NOTIFY_STORE with a file that's not in the entry cache */
/* disabled because FUSE_NOTIFY_STORE is not yet implemented */
TEST_F(Notify, DISABLED_store_nonexistent)
TEST_P(Notify, DISABLED_store_nonexistent)
{
struct store_args sa;
ino_t ino = 42;
@@ -427,7 +434,7 @@ TEST_F(Notify, DISABLED_store_nonexistent)
/* Store data into for a file that does not yet have anything cached */
/* disabled because FUSE_NOTIFY_STORE is not yet implemented */
TEST_F(Notify, DISABLED_store_with_blank_cache)
TEST_P(Notify, DISABLED_store_with_blank_cache)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -465,7 +472,7 @@ TEST_F(Notify, DISABLED_store_with_blank_cache)
leak(fd);
}
TEST_F(NotifyWriteback, inval_inode_with_dirty_cache)
TEST_P(NotifyWriteback, inval_inode_with_dirty_cache)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -506,7 +513,7 @@ TEST_F(NotifyWriteback, inval_inode_with_dirty_cache)
leak(fd);
}
TEST_F(NotifyWriteback, inval_inode_attrs_only)
TEST_P(NotifyWriteback, inval_inode_attrs_only)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -600,3 +607,10 @@ TEST(PreMount, inval_entry_before_mount)
EXPECT_EQ(ENODEV, errno);
delete out;
}
/*
* Try with and without async reads, because it affects the type of vnode lock
* acquired in fuse_internal_invalidate_entry.
*/
INSTANTIATE_TEST_SUITE_P(N, Notify, Values(0, FUSE_ASYNC_READ));
INSTANTIATE_TEST_SUITE_P(N, NotifyWriteback, Values(0, FUSE_ASYNC_READ));
+192
View File
@@ -95,6 +95,19 @@ class AsyncRead: public AioRead {
}
};
class AsyncReadNoAttrCache: public Read {
virtual void SetUp() {
m_init_flags = FUSE_ASYNC_READ;
Read::SetUp();
}
public:
void expect_lookup(const char *relpath, uint64_t ino)
{
// Don't return size, and set attr_valid=0
FuseTest::expect_lookup(relpath, ino, S_IFREG | 0644, 0, 1, 0);
}
};
class ReadAhead: public Read,
public WithParamInterface<tuple<bool, int>>
{
@@ -351,6 +364,185 @@ TEST_F(AsyncRead, async_read)
leak(fd);
}
/*
* Regression test for a VFS locking bug: as of
* 22bb70a6b3bb7799276ab480e40665b7d6e4ce25 (17-December-2024), fusefs did not
* obtain an exclusive vnode lock before attempting to clear the attr cache
* after an unexpected eof. The vnode lock would already be exclusive except
* when FUSE_ASYNC_READ is set.
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=283391
*/
TEST_F(AsyncRead, eof)
{
const char FULLPATH[] = "mountpoint/some_file.txt";
const char RELPATH[] = "some_file.txt";
const char *CONTENTS = "abcdefghijklmnop";
uint64_t ino = 42;
int fd;
uint64_t offset = 100;
ssize_t bufsize = strlen(CONTENTS);
ssize_t partbufsize = 3 * bufsize / 4;
ssize_t r;
uint8_t buf[bufsize];
struct stat sb;
expect_lookup(RELPATH, ino, offset + bufsize);
expect_open(ino, 0, 1);
expect_read(ino, 0, offset + bufsize, offset + partbufsize, CONTENTS);
expect_getattr(ino, offset + partbufsize);
fd = open(FULLPATH, O_RDONLY);
ASSERT_LE(0, fd) << strerror(errno);
r = pread(fd, buf, bufsize, offset);
ASSERT_LE(0, r) << strerror(errno);
EXPECT_EQ(partbufsize, r) << strerror(errno);
ASSERT_EQ(0, fstat(fd, &sb));
EXPECT_EQ((off_t)(offset + partbufsize), sb.st_size);
leak(fd);
}
/*
* If the daemon disables the attribute cache (or if it has expired), then the
* kernel must fetch attributes during VOP_READ. If async reads are enabled,
* then fuse_internal_cache_attrs will be called without the vnode exclusively
* locked. Regression test for
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=291064
*/
TEST_F(AsyncReadNoAttrCache, read)
{
const char FULLPATH[] = "mountpoint/some_file.txt";
const char RELPATH[] = "some_file.txt";
const char *CONTENTS = "abcdefgh";
uint64_t ino = 42;
mode_t mode = S_IFREG | 0644;
int fd;
ssize_t bufsize = strlen(CONTENTS);
uint8_t buf[bufsize];
expect_lookup(RELPATH, ino);
expect_open(ino, 0, 1);
EXPECT_CALL(*m_mock, process(
ResultOf([=](auto in) {
return (in.header.opcode == FUSE_GETATTR &&
in.header.nodeid == ino);
}, Eq(true)),
_)
).WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out)
{
SET_OUT_HEADER_LEN(out, attr);
out.body.attr.attr.ino = ino;
out.body.attr.attr.mode = mode;
out.body.attr.attr.size = bufsize;
out.body.attr.attr_valid = 0;
})));
expect_read(ino, 0, bufsize, bufsize, CONTENTS);
fd = open(FULLPATH, O_RDONLY);
ASSERT_LE(0, fd) << strerror(errno);
ASSERT_EQ(bufsize, read(fd, buf, bufsize)) << strerror(errno);
ASSERT_EQ(0, memcmp(buf, CONTENTS, bufsize));
leak(fd);
}
/*
* If the vnode's attr cache has expired before VOP_READ begins, the kernel
* will have to fetch the file's size from the server. If it has changed since
* our last query, we'll need to update the vnode pager. But we'll only have a
* shared vnode lock.
*/
TEST_F(AsyncReadNoAttrCache, read_sizechange)
{
const char FULLPATH[] = "mountpoint/some_file.txt";
const char RELPATH[] = "some_file.txt";
const char *CONTENTS = "abcdefgh";
uint64_t ino = 42;
mode_t mode = S_IFREG | 0644;
int fd;
ssize_t bufsize = strlen(CONTENTS);
uint8_t buf[bufsize];
ssize_t size1 = bufsize - 1;
ssize_t size2 = bufsize;
Sequence seq;
expect_lookup(RELPATH, ino);
expect_open(ino, 0, 1);
EXPECT_CALL(*m_mock, process(
ResultOf([=](auto in) {
return (in.header.opcode == FUSE_GETATTR &&
in.header.nodeid == ino);
}, Eq(true)),
_)
).Times(2)
.InSequence(seq)
.WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out)
{
SET_OUT_HEADER_LEN(out, attr);
out.body.attr.attr.ino = ino;
out.body.attr.attr.mode = mode;
out.body.attr.attr.size = size1;
out.body.attr.attr_valid = 0;
})));
EXPECT_CALL(*m_mock, process(
ResultOf([=](auto in) {
return (in.header.opcode == FUSE_READ &&
in.header.nodeid == ino &&
in.body.read.offset == 0 &&
in.body.read.size == size1);
}, Eq(true)),
_)
).Times(1)
.InSequence(seq)
.WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
out.header.len = sizeof(struct fuse_out_header) + size1;
memmove(out.body.bytes, CONTENTS, size1);
})));
EXPECT_CALL(*m_mock, process(
ResultOf([=](auto in) {
return (in.header.opcode == FUSE_GETATTR &&
in.header.nodeid == ino);
}, Eq(true)),
_)
).InSequence(seq)
.WillOnce(Invoke(ReturnImmediate([=](auto i __unused, auto& out)
{
SET_OUT_HEADER_LEN(out, attr);
out.body.attr.attr.ino = ino;
out.body.attr.attr.mode = mode;
out.body.attr.attr.size = size2;
out.body.attr.attr_valid = 0;
})));
EXPECT_CALL(*m_mock, process(
ResultOf([=](auto in) {
return (in.header.opcode == FUSE_READ &&
in.header.nodeid == ino &&
in.body.read.offset == 0 &&
in.body.read.size == size2);
}, Eq(true)),
_)
).Times(1)
.InSequence(seq)
.WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
out.header.len = sizeof(struct fuse_out_header) + size2;
memmove(out.body.bytes, CONTENTS, size2);
})));
fd = open(FULLPATH, O_RDONLY);
ASSERT_LE(0, fd) << strerror(errno);
ASSERT_EQ(size1, read(fd, buf, bufsize)) << strerror(errno);
ASSERT_EQ(0, memcmp(buf, CONTENTS, size1));
/* Read again, but this time the server has changed the file's size */
bzero(buf, size2);
ASSERT_EQ(size2, pread(fd, buf, bufsize, 0)) << strerror(errno);
ASSERT_EQ(0, memcmp(buf, CONTENTS, size2));
leak(fd);
}
/* The kernel should update the cached atime attribute during a read */
TEST_F(Read, atime)
{
+90
View File
@@ -163,6 +163,96 @@ TEST_F(Rename, entry_cache_negative_purge)
ASSERT_EQ(0, access(FULLDST, F_OK)) << strerror(errno);
}
static volatile int stopit = 0;
static void* setattr_th(void* arg) {
char *path = (char*)arg;
while (stopit == 0)
chmod(path, 0777);
return 0;
}
/*
* Rename restarts the syscall to avoid a LOR
*
* This test triggers a race: the chmod() calls VOP_SETATTR, which locks its
* vnode, but fuse_vnop_rename also tries to lock the same vnode. The result
* is that, in order to avoid a LOR, fuse_vnop_rename returns ERELOOKUP to
* restart the syscall.
*
* To verify that the race is hit, watch the fusefs:fusefs:vnops:erelookup
* dtrace probe while the test is running. On my system, that probe fires more
* than 100 times per second during this test.
*/
TEST_F(Rename, erelookup)
{
const char FULLDST[] = "mountpoint/dstdir/dst";
const char RELDSTDIR[] = "dstdir";
const char RELDST[] = "dst";
const char FULLSRC[] = "mountpoint/src";
const char RELSRC[] = "src";
pthread_t th0;
uint64_t ino = 42;
uint64_t dst_dir_ino = 43;
uint32_t mode = S_IFDIR | 0644;
struct timespec now, timeout;
EXPECT_LOOKUP(FUSE_ROOT_ID, RELSRC)
.WillRepeatedly(Invoke(
ReturnImmediate([=](auto i __unused, auto& out) {
SET_OUT_HEADER_LEN(out, entry);
out.body.entry.attr.mode = mode;
out.body.entry.nodeid = ino;
out.body.entry.attr.nlink = 2;
out.body.entry.attr_valid = UINT64_MAX;
out.body.entry.attr.uid = 0;
out.body.entry.attr.gid = 0;
out.body.entry.entry_valid = UINT64_MAX;
}))
);
EXPECT_LOOKUP(FUSE_ROOT_ID, RELDSTDIR)
.WillRepeatedly(Invoke(ReturnImmediate([=](auto in __unused, auto& out)
{
SET_OUT_HEADER_LEN(out, entry);
out.body.entry.nodeid = dst_dir_ino;
out.body.entry.entry_valid = UINT64_MAX;
out.body.entry.attr_valid = UINT64_MAX;
out.body.entry.attr.mode = S_IFDIR | 0755;
out.body.entry.attr.ino = dst_dir_ino;
out.body.entry.attr.uid = geteuid();
out.body.entry.attr.gid = getegid();
})));
EXPECT_LOOKUP(dst_dir_ino, RELDST)
.WillRepeatedly(Invoke(ReturnErrno(ENOENT)));
EXPECT_CALL(*m_mock, process(
ResultOf([=](auto in) {
return (in.header.opcode == FUSE_SETATTR &&
in.header.nodeid == ino);
}, Eq(true)),
_)
).WillRepeatedly(Invoke(ReturnErrno(EIO)));
EXPECT_CALL(*m_mock, process(
ResultOf([=](auto in) {
return (in.header.opcode == FUSE_RENAME);
}, Eq(true)),
_)
).WillRepeatedly(Invoke(ReturnErrno(EIO)));
ASSERT_EQ(0, pthread_create(&th0, NULL, setattr_th, (void*)FULLSRC))
<< strerror(errno);
ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &timeout));
timeout.tv_nsec += NAP_NS;
do {
ASSERT_EQ(-1, rename(FULLSRC, FULLDST));
EXPECT_EQ(EIO, errno);
ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &now));
} while (timespeccmp(&now, &timeout, <));
stopit = 1;
pthread_join(th0, NULL);
}
TEST_F(Rename, exdev)
{
const char FULLB[] = "mountpoint/src";