linux: suppress reclaim lockdep in zfs_inactive via rwlock wrappers

kswapd can enter zfs_inactive() from inode reclaim while holding
fs_reclaim. The z_teardown_inactive_lock still serializes teardown,
but the reclaim-thread acquire/release pair can produce a lockdep
cycle through zfs_zinactive() and zfs_rmnode().

Add Linux rwlock nolockdep wrappers alongside the existing rwlock
macros and use them only for the reclaim-thread
z_teardown_inactive_lock acquire/release in zfs_inactive(). Keep
the real rwsem semantics unchanged and leave CONFIG_LOCKDEP
handling in the platform rwlock layer.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: ZhengYuan Huang <gality369@gmail.com>
Closes #18505
This commit is contained in:
ZhengYuan Huang
2026-05-14 12:53:14 +08:00
committed by Brian Behlendorf
parent 8b24164f29
commit be6b6ea8c6
2 changed files with 124 additions and 42 deletions
+100 -36
View File
@@ -30,7 +30,6 @@
#include <linux/sched.h>
typedef enum {
RW_DRIVER = 2,
RW_DEFAULT = 4,
RW_NOLOCKDEP = 5
} krw_type_t;
@@ -75,20 +74,35 @@ spl_rw_set_type(krwlock_t *rwp, krw_type_t type)
{
rwp->rw_type = type;
}
static inline void
spl_rw_lockdep_off(void)
{
lockdep_off();
}
static inline void
spl_rw_lockdep_on(void)
{
lockdep_on();
}
static inline void
spl_rw_lockdep_off_maybe(krwlock_t *rwp) \
{ \
if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
lockdep_off(); \
spl_rw_lockdep_off(); \
}
static inline void
spl_rw_lockdep_on_maybe(krwlock_t *rwp) \
{ \
if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
lockdep_on(); \
spl_rw_lockdep_on(); \
}
#else /* CONFIG_LOCKDEP */
#define spl_rw_set_type(rwp, type)
#define spl_rw_lockdep_off()
#define spl_rw_lockdep_on()
#define spl_rw_lockdep_off_maybe(rwp)
#define spl_rw_lockdep_on_maybe(rwp)
#endif /* CONFIG_LOCKDEP */
@@ -117,6 +131,56 @@ RW_READ_HELD(krwlock_t *rwp)
* will be correctly located in the users code which is important
* for the built in kernel lock analysis tools
*/
#define spl_rw_tryenter_impl(rwp, rw) /* CSTYLED */ \
({ \
int _rc_ = 0; \
\
switch (rw) { \
case RW_READER: \
_rc_ = down_read_trylock(SEM(rwp)); \
break; \
case RW_WRITER: \
if ((_rc_ = down_write_trylock(SEM(rwp)))) \
spl_rw_set_owner(rwp); \
break; \
default: \
VERIFY(0); \
} \
_rc_; \
})
#define spl_rw_enter_impl(rwp, rw) /* CSTYLED */ \
({ \
switch (rw) { \
case RW_READER: \
down_read(SEM(rwp)); \
break; \
case RW_WRITER: \
down_write(SEM(rwp)); \
spl_rw_set_owner(rwp); \
break; \
default: \
VERIFY(0); \
} \
})
#define spl_rw_exit_impl(rwp) /* CSTYLED */ \
({ \
if (RW_WRITE_HELD(rwp)) { \
spl_rw_clear_owner(rwp); \
up_write(SEM(rwp)); \
} else { \
ASSERT(RW_READ_HELD(rwp)); \
up_read(SEM(rwp)); \
} \
})
#define spl_rw_downgrade_impl(rwp) /* CSTYLED */ \
({ \
spl_rw_clear_owner(rwp); \
downgrade_write(SEM(rwp)); \
})
#define rw_init(rwp, name, type, arg) /* CSTYLED */ \
({ \
static struct lock_class_key __key; \
@@ -140,60 +204,60 @@ RW_READ_HELD(krwlock_t *rwp)
#define rw_tryenter(rwp, rw) /* CSTYLED */ \
({ \
int _rc_ = 0; \
\
spl_rw_lockdep_off_maybe(rwp); \
switch (rw) { \
case RW_READER: \
_rc_ = down_read_trylock(SEM(rwp)); \
break; \
case RW_WRITER: \
if ((_rc_ = down_write_trylock(SEM(rwp)))) \
spl_rw_set_owner(rwp); \
break; \
default: \
VERIFY(0); \
} \
int _rc_ = spl_rw_tryenter_impl(rwp, rw); \
spl_rw_lockdep_on_maybe(rwp); \
_rc_; \
})
#define rw_tryenter_nolockdep(rwp, rw) /* CSTYLED */ \
({ \
spl_rw_lockdep_off(); \
int _rc_ = spl_rw_tryenter_impl(rwp, rw); \
spl_rw_lockdep_on(); \
_rc_; \
})
#define rw_enter(rwp, rw) /* CSTYLED */ \
({ \
spl_rw_lockdep_off_maybe(rwp); \
switch (rw) { \
case RW_READER: \
down_read(SEM(rwp)); \
break; \
case RW_WRITER: \
down_write(SEM(rwp)); \
spl_rw_set_owner(rwp); \
break; \
default: \
VERIFY(0); \
} \
spl_rw_enter_impl(rwp, rw); \
spl_rw_lockdep_on_maybe(rwp); \
})
#define rw_enter_nolockdep(rwp, rw) /* CSTYLED */ \
({ \
spl_rw_lockdep_off(); \
spl_rw_enter_impl(rwp, rw); \
spl_rw_lockdep_on(); \
})
#define rw_exit(rwp) /* CSTYLED */ \
({ \
spl_rw_lockdep_off_maybe(rwp); \
if (RW_WRITE_HELD(rwp)) { \
spl_rw_clear_owner(rwp); \
up_write(SEM(rwp)); \
} else { \
ASSERT(RW_READ_HELD(rwp)); \
up_read(SEM(rwp)); \
} \
spl_rw_exit_impl(rwp); \
spl_rw_lockdep_on_maybe(rwp); \
})
#define rw_exit_nolockdep(rwp) /* CSTYLED */ \
({ \
spl_rw_lockdep_off(); \
spl_rw_exit_impl(rwp); \
spl_rw_lockdep_on(); \
})
#define rw_downgrade(rwp) /* CSTYLED */ \
({ \
spl_rw_lockdep_off_maybe(rwp); \
spl_rw_clear_owner(rwp); \
downgrade_write(SEM(rwp)); \
spl_rw_downgrade_impl(rwp); \
spl_rw_lockdep_on_maybe(rwp); \
})
#define rw_downgrade_nolockdep(rwp) /* CSTYLED */ \
({ \
spl_rw_lockdep_off(); \
spl_rw_downgrade_impl(rwp); \
spl_rw_lockdep_on(); \
})
#endif /* _SPL_RWLOCK_H */
+24 -6
View File
@@ -4078,18 +4078,32 @@ zfs_inactive(struct inode *ip)
{
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
krwlock_t *zti_lock = &zfsvfs->z_teardown_inactive_lock;
uint64_t atime[2];
int error;
int need_unlock = 0;
boolean_t no_lockdep = B_FALSE;
/* Only read lock if we haven't already write locked, e.g. rollback */
if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {
if (!RW_WRITE_HELD(zti_lock)) {
need_unlock = 1;
rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
/*
* kswapd reaches evict_inode() with fs_reclaim held. Suppress
* lockdep only for this reclaim-thread acquire/release pair.
*/
no_lockdep = current_is_reclaim_thread();
if (no_lockdep)
rw_enter_nolockdep(zti_lock, RW_READER);
else
rw_enter(zti_lock, RW_READER);
}
if (zp->z_sa_hdl == NULL) {
if (need_unlock)
rw_exit(&zfsvfs->z_teardown_inactive_lock);
if (need_unlock) {
if (no_lockdep)
rw_exit_nolockdep(zti_lock);
else
rw_exit(zti_lock);
}
return;
}
@@ -4115,8 +4129,12 @@ zfs_inactive(struct inode *ip)
}
zfs_zinactive(zp);
if (need_unlock)
rw_exit(&zfsvfs->z_teardown_inactive_lock);
if (need_unlock) {
if (no_lockdep)
rw_exit_nolockdep(zti_lock);
else
rw_exit(zti_lock);
}
}
/*