linux: suppress reclaim lockdep in zfs_inactive via rwlock wrappers
kswapd can enter zfs_inactive() from inode reclaim while holding fs_reclaim. The z_teardown_inactive_lock still serializes teardown, but the reclaim-thread acquire/release pair can produce a lockdep cycle through zfs_zinactive() and zfs_rmnode(). Add Linux rwlock nolockdep wrappers alongside the existing rwlock macros and use them only for the reclaim-thread z_teardown_inactive_lock acquire/release in zfs_inactive(). Keep the real rwsem semantics unchanged and leave CONFIG_LOCKDEP handling in the platform rwlock layer. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: ZhengYuan Huang <gality369@gmail.com> Closes #18505
This commit is contained in:
committed by
Brian Behlendorf
parent
8b24164f29
commit
be6b6ea8c6
@@ -30,7 +30,6 @@
|
||||
#include <linux/sched.h>
|
||||
|
||||
typedef enum {
|
||||
RW_DRIVER = 2,
|
||||
RW_DEFAULT = 4,
|
||||
RW_NOLOCKDEP = 5
|
||||
} krw_type_t;
|
||||
@@ -75,20 +74,35 @@ spl_rw_set_type(krwlock_t *rwp, krw_type_t type)
|
||||
{
|
||||
rwp->rw_type = type;
|
||||
}
|
||||
|
||||
static inline void
|
||||
spl_rw_lockdep_off(void)
|
||||
{
|
||||
lockdep_off();
|
||||
}
|
||||
|
||||
static inline void
|
||||
spl_rw_lockdep_on(void)
|
||||
{
|
||||
lockdep_on();
|
||||
}
|
||||
|
||||
static inline void
|
||||
spl_rw_lockdep_off_maybe(krwlock_t *rwp) \
|
||||
{ \
|
||||
if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
|
||||
lockdep_off(); \
|
||||
spl_rw_lockdep_off(); \
|
||||
}
|
||||
static inline void
|
||||
spl_rw_lockdep_on_maybe(krwlock_t *rwp) \
|
||||
{ \
|
||||
if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
|
||||
lockdep_on(); \
|
||||
spl_rw_lockdep_on(); \
|
||||
}
|
||||
#else /* CONFIG_LOCKDEP */
|
||||
#define spl_rw_set_type(rwp, type)
|
||||
#define spl_rw_lockdep_off()
|
||||
#define spl_rw_lockdep_on()
|
||||
#define spl_rw_lockdep_off_maybe(rwp)
|
||||
#define spl_rw_lockdep_on_maybe(rwp)
|
||||
#endif /* CONFIG_LOCKDEP */
|
||||
@@ -117,6 +131,56 @@ RW_READ_HELD(krwlock_t *rwp)
|
||||
* will be correctly located in the users code which is important
|
||||
* for the built in kernel lock analysis tools
|
||||
*/
|
||||
#define spl_rw_tryenter_impl(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
int _rc_ = 0; \
|
||||
\
|
||||
switch (rw) { \
|
||||
case RW_READER: \
|
||||
_rc_ = down_read_trylock(SEM(rwp)); \
|
||||
break; \
|
||||
case RW_WRITER: \
|
||||
if ((_rc_ = down_write_trylock(SEM(rwp)))) \
|
||||
spl_rw_set_owner(rwp); \
|
||||
break; \
|
||||
default: \
|
||||
VERIFY(0); \
|
||||
} \
|
||||
_rc_; \
|
||||
})
|
||||
|
||||
#define spl_rw_enter_impl(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
switch (rw) { \
|
||||
case RW_READER: \
|
||||
down_read(SEM(rwp)); \
|
||||
break; \
|
||||
case RW_WRITER: \
|
||||
down_write(SEM(rwp)); \
|
||||
spl_rw_set_owner(rwp); \
|
||||
break; \
|
||||
default: \
|
||||
VERIFY(0); \
|
||||
} \
|
||||
})
|
||||
|
||||
#define spl_rw_exit_impl(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
if (RW_WRITE_HELD(rwp)) { \
|
||||
spl_rw_clear_owner(rwp); \
|
||||
up_write(SEM(rwp)); \
|
||||
} else { \
|
||||
ASSERT(RW_READ_HELD(rwp)); \
|
||||
up_read(SEM(rwp)); \
|
||||
} \
|
||||
})
|
||||
|
||||
#define spl_rw_downgrade_impl(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_clear_owner(rwp); \
|
||||
downgrade_write(SEM(rwp)); \
|
||||
})
|
||||
|
||||
#define rw_init(rwp, name, type, arg) /* CSTYLED */ \
|
||||
({ \
|
||||
static struct lock_class_key __key; \
|
||||
@@ -140,60 +204,60 @@ RW_READ_HELD(krwlock_t *rwp)
|
||||
|
||||
#define rw_tryenter(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
int _rc_ = 0; \
|
||||
\
|
||||
spl_rw_lockdep_off_maybe(rwp); \
|
||||
switch (rw) { \
|
||||
case RW_READER: \
|
||||
_rc_ = down_read_trylock(SEM(rwp)); \
|
||||
break; \
|
||||
case RW_WRITER: \
|
||||
if ((_rc_ = down_write_trylock(SEM(rwp)))) \
|
||||
spl_rw_set_owner(rwp); \
|
||||
break; \
|
||||
default: \
|
||||
VERIFY(0); \
|
||||
} \
|
||||
int _rc_ = spl_rw_tryenter_impl(rwp, rw); \
|
||||
spl_rw_lockdep_on_maybe(rwp); \
|
||||
_rc_; \
|
||||
})
|
||||
|
||||
#define rw_tryenter_nolockdep(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off(); \
|
||||
int _rc_ = spl_rw_tryenter_impl(rwp, rw); \
|
||||
spl_rw_lockdep_on(); \
|
||||
_rc_; \
|
||||
})
|
||||
|
||||
#define rw_enter(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off_maybe(rwp); \
|
||||
switch (rw) { \
|
||||
case RW_READER: \
|
||||
down_read(SEM(rwp)); \
|
||||
break; \
|
||||
case RW_WRITER: \
|
||||
down_write(SEM(rwp)); \
|
||||
spl_rw_set_owner(rwp); \
|
||||
break; \
|
||||
default: \
|
||||
VERIFY(0); \
|
||||
} \
|
||||
spl_rw_enter_impl(rwp, rw); \
|
||||
spl_rw_lockdep_on_maybe(rwp); \
|
||||
})
|
||||
|
||||
#define rw_enter_nolockdep(rwp, rw) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off(); \
|
||||
spl_rw_enter_impl(rwp, rw); \
|
||||
spl_rw_lockdep_on(); \
|
||||
})
|
||||
|
||||
#define rw_exit(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off_maybe(rwp); \
|
||||
if (RW_WRITE_HELD(rwp)) { \
|
||||
spl_rw_clear_owner(rwp); \
|
||||
up_write(SEM(rwp)); \
|
||||
} else { \
|
||||
ASSERT(RW_READ_HELD(rwp)); \
|
||||
up_read(SEM(rwp)); \
|
||||
} \
|
||||
spl_rw_exit_impl(rwp); \
|
||||
spl_rw_lockdep_on_maybe(rwp); \
|
||||
})
|
||||
|
||||
#define rw_exit_nolockdep(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off(); \
|
||||
spl_rw_exit_impl(rwp); \
|
||||
spl_rw_lockdep_on(); \
|
||||
})
|
||||
|
||||
#define rw_downgrade(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off_maybe(rwp); \
|
||||
spl_rw_clear_owner(rwp); \
|
||||
downgrade_write(SEM(rwp)); \
|
||||
spl_rw_downgrade_impl(rwp); \
|
||||
spl_rw_lockdep_on_maybe(rwp); \
|
||||
})
|
||||
|
||||
#define rw_downgrade_nolockdep(rwp) /* CSTYLED */ \
|
||||
({ \
|
||||
spl_rw_lockdep_off(); \
|
||||
spl_rw_downgrade_impl(rwp); \
|
||||
spl_rw_lockdep_on(); \
|
||||
})
|
||||
|
||||
#endif /* _SPL_RWLOCK_H */
|
||||
|
||||
@@ -4078,18 +4078,32 @@ zfs_inactive(struct inode *ip)
|
||||
{
|
||||
znode_t *zp = ITOZ(ip);
|
||||
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
||||
krwlock_t *zti_lock = &zfsvfs->z_teardown_inactive_lock;
|
||||
uint64_t atime[2];
|
||||
int error;
|
||||
int need_unlock = 0;
|
||||
boolean_t no_lockdep = B_FALSE;
|
||||
|
||||
/* Only read lock if we haven't already write locked, e.g. rollback */
|
||||
if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {
|
||||
if (!RW_WRITE_HELD(zti_lock)) {
|
||||
need_unlock = 1;
|
||||
rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
|
||||
/*
|
||||
* kswapd reaches evict_inode() with fs_reclaim held. Suppress
|
||||
* lockdep only for this reclaim-thread acquire/release pair.
|
||||
*/
|
||||
no_lockdep = current_is_reclaim_thread();
|
||||
if (no_lockdep)
|
||||
rw_enter_nolockdep(zti_lock, RW_READER);
|
||||
else
|
||||
rw_enter(zti_lock, RW_READER);
|
||||
}
|
||||
if (zp->z_sa_hdl == NULL) {
|
||||
if (need_unlock)
|
||||
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
||||
if (need_unlock) {
|
||||
if (no_lockdep)
|
||||
rw_exit_nolockdep(zti_lock);
|
||||
else
|
||||
rw_exit(zti_lock);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -4115,8 +4129,12 @@ zfs_inactive(struct inode *ip)
|
||||
}
|
||||
|
||||
zfs_zinactive(zp);
|
||||
if (need_unlock)
|
||||
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
||||
if (need_unlock) {
|
||||
if (no_lockdep)
|
||||
rw_exit_nolockdep(zti_lock);
|
||||
else
|
||||
rw_exit(zti_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user