zap: add zap_cursor_init_by_dnode() & rework cursor resource lifetime
This commit adds zap_cursor_init_by_dnode() (and
zap_cursor_init_serialized_by_dnode()), which allow the target ZAP to
provided via an existing dnode rather than the traditional objset+object
pair.
This requires some reorganisation of the way that zap_cursor_t is
initialised. Up until now, zap_cursor_init() has merely stored the
objset, object, serialized form and prefetch flag, and left it until
zap_cursor_retrieve() to actually call zap_lock(). This makes a
_by_dnode() form complicated, because it is a held resource that needs
to be released, but might not be used if zap_cursor_retrieve() is not
called. So there's a bunch of state tracking required.
However, all cursor users immediately follow zap_cursor_init() with
zap_cursor_retrieve(), so there's nothing gained by delaying holds. This
allows us to simplify things, by calling zap_lock() directly in
zap_cursor_init() and retaining it until zap_cursor_fini().
This does however means the _init() functions are now fallible, and can
return an error. This adds complexity to most of the call sites, which
are typically in a for loop of the form:
for (zap_cursor_init(...);
zap_cursor_retrieve(...) == 0;
zap_cursor_advance(...))
To avoid needing to make significant changes at every call site, a
failed _init() call will also zero the cursor struct. If the caller
doesn't check the return and continues to zap_cursor_retrieve(), they
will get an EIO return, and zap_cursor_fini() will just return.
The existing zc_objset and zc_zapobj fields are retained to support
source backcompat for Lustre, which inspects them directly.
Sponsored-by: TrueNAS
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com>
Signed-off-by: Rob Norris <rob.norris@truenas.com>
Closes #18603
This commit is contained in:
committed by
Brian Behlendorf
parent
68980eb105
commit
efda1093ff
+13
-6
@@ -443,16 +443,20 @@ void zap_attribute_free(zap_attribute_t *attrp);
|
||||
|
||||
struct zap;
|
||||
struct zap_leaf;
|
||||
|
||||
typedef struct zap_cursor {
|
||||
/* This structure is opaque! */
|
||||
objset_t *zc_objset;
|
||||
struct zap *zc_zap;
|
||||
struct zap_leaf *zc_leaf;
|
||||
uint64_t zc_zapobj;
|
||||
uint64_t zc_serialized;
|
||||
uint64_t zc_hash;
|
||||
uint32_t zc_cd;
|
||||
boolean_t zc_prefetch;
|
||||
/*
|
||||
* Legacy fields to main source compat with Lustre, which accesses
|
||||
* them directly. Not to be used in new code!
|
||||
*/
|
||||
objset_t *zc_objset;
|
||||
uint64_t zc_zapobj;
|
||||
} zap_cursor_t;
|
||||
|
||||
/*
|
||||
@@ -460,14 +464,15 @@ typedef struct zap_cursor {
|
||||
* The entire zapobj will be prefetched. You must call zap_cursor_fini the
|
||||
* cursor when you are done with it.
|
||||
*/
|
||||
void zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj);
|
||||
int zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj);
|
||||
int zap_cursor_init_by_dnode(zap_cursor_t *zc, dnode_t *dn);
|
||||
void zap_cursor_fini(zap_cursor_t *zc);
|
||||
|
||||
/*
|
||||
* Initialize a cursor at the beginning, but request that we not prefetch
|
||||
* the entire ZAP object.
|
||||
*/
|
||||
void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os,
|
||||
int zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os,
|
||||
uint64_t zapobj);
|
||||
|
||||
/*
|
||||
@@ -477,8 +482,10 @@ void zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os,
|
||||
* zapobj (ie. zap_cursor_init_serialized(..., 0) is equivalent to
|
||||
* zap_cursor_init(...).)
|
||||
*/
|
||||
void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os,
|
||||
int zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os,
|
||||
uint64_t zapobj, uint64_t serialized);
|
||||
int zap_cursor_init_serialized_by_dnode(zap_cursor_t *zc, dnode_t *dn,
|
||||
uint64_t serialized);
|
||||
|
||||
/*
|
||||
* Get the attribute currently pointed to by the cursor. Returns
|
||||
|
||||
+75
-43
@@ -1072,53 +1072,100 @@ zap_lookup_int_key(objset_t *os, uint64_t obj, uint64_t key, uint64_t *valuep)
|
||||
|
||||
/* zap_cursor */
|
||||
|
||||
static void
|
||||
zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
|
||||
static int
|
||||
zap_cursor_init_by_dnode_impl(zap_cursor_t *zc, dnode_t *dn,
|
||||
uint64_t serialized, boolean_t prefetch)
|
||||
{
|
||||
zc->zc_objset = os;
|
||||
zc->zc_zap = NULL;
|
||||
zc->zc_leaf = NULL;
|
||||
zc->zc_zapobj = zapobj;
|
||||
zc->zc_serialized = serialized;
|
||||
zc->zc_hash = 0;
|
||||
zc->zc_cd = 0;
|
||||
|
||||
int err = zap_lock_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
|
||||
zc, &zc->zc_zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
zc->zc_prefetch = prefetch;
|
||||
zc->zc_objset = dn->dn_objset;
|
||||
zc->zc_zapobj = dn->dn_object;
|
||||
|
||||
int hb = zap_hashbits(zc->zc_zap);
|
||||
zc->zc_hash = serialized << (64 - hb);
|
||||
zc->zc_cd = serialized >> hb;
|
||||
if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */
|
||||
zc->zc_cd = 0;
|
||||
|
||||
/*
|
||||
* Drop ZAP read lock, but keep the hold, so the holds on the
|
||||
* underlying dnode and header dbuf are maintained.
|
||||
*/
|
||||
rw_exit(&zc->zc_zap->zap_rwlock);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
static int
|
||||
zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
|
||||
uint64_t serialized, uint32_t prefetch)
|
||||
{
|
||||
dnode_t *dn = NULL;
|
||||
int err = dnode_hold(os, zapobj, FTAG, &dn);
|
||||
if (err != 0) {
|
||||
zc->zc_zap = NULL;
|
||||
zc->zc_leaf = NULL;
|
||||
return (err);
|
||||
}
|
||||
|
||||
err = zap_cursor_init_by_dnode_impl(zc, dn, serialized, prefetch);
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
|
||||
{
|
||||
zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE);
|
||||
return (zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE));
|
||||
}
|
||||
|
||||
void
|
||||
int
|
||||
zap_cursor_init_by_dnode(zap_cursor_t *zc, dnode_t *dn)
|
||||
{
|
||||
return (zap_cursor_init_by_dnode_impl(zc, dn, 0, B_TRUE));
|
||||
}
|
||||
|
||||
int
|
||||
zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
|
||||
{
|
||||
zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE);
|
||||
return (zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE));
|
||||
}
|
||||
|
||||
void
|
||||
int
|
||||
zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
|
||||
uint64_t serialized)
|
||||
{
|
||||
zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE);
|
||||
return (zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE));
|
||||
}
|
||||
|
||||
int
|
||||
zap_cursor_init_serialized_by_dnode(zap_cursor_t *zc, dnode_t *dn,
|
||||
uint64_t serialized)
|
||||
{
|
||||
return (zap_cursor_init_by_dnode_impl(zc, dn, serialized, B_TRUE));
|
||||
}
|
||||
|
||||
void
|
||||
zap_cursor_fini(zap_cursor_t *zc)
|
||||
{
|
||||
if (zc->zc_zap) {
|
||||
rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
|
||||
zap_unlock(zc->zc_zap, NULL);
|
||||
zc->zc_zap = NULL;
|
||||
}
|
||||
if (zc->zc_leaf) {
|
||||
rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
|
||||
zap_put_leaf(zc->zc_leaf);
|
||||
zc->zc_leaf = NULL;
|
||||
}
|
||||
zc->zc_objset = NULL;
|
||||
if (zc->zc_zap) {
|
||||
rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
|
||||
zap_unlock(zc->zc_zap, zc);
|
||||
}
|
||||
memset(zc, 0, sizeof (zap_cursor_t));
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1126,30 +1173,15 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (zc->zc_zap == NULL)
|
||||
/* zap_cursor_init failed, cursor is invalid */
|
||||
return (SET_ERROR(EIO));
|
||||
|
||||
if (zc->zc_hash == -1ULL)
|
||||
return (SET_ERROR(ENOENT));
|
||||
|
||||
if (zc->zc_zap == NULL) {
|
||||
int hb;
|
||||
err = zap_lock(zc->zc_objset, zc->zc_zapobj, NULL,
|
||||
RW_READER, TRUE, FALSE, NULL, &zc->zc_zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
|
||||
|
||||
/*
|
||||
* To support zap_cursor_init_serialized, advance, retrieve,
|
||||
* we must add to the existing zc_cd, which may already
|
||||
* be 1 due to the zap_cursor_advance.
|
||||
*/
|
||||
ASSERT0(zc->zc_hash);
|
||||
hb = zap_hashbits(zc->zc_zap);
|
||||
zc->zc_hash = zc->zc_serialized << (64 - hb);
|
||||
zc->zc_cd += zc->zc_serialized >> hb;
|
||||
if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */
|
||||
zc->zc_cd = 0;
|
||||
} else {
|
||||
rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
|
||||
}
|
||||
if (!zc->zc_zap->zap_ismicro) {
|
||||
err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
|
||||
} else {
|
||||
@@ -1184,6 +1216,7 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
|
||||
err = SET_ERROR(ENOENT);
|
||||
}
|
||||
}
|
||||
|
||||
rw_exit(&zc->zc_zap->zap_rwlock);
|
||||
return (err);
|
||||
}
|
||||
@@ -1199,10 +1232,9 @@ zap_cursor_advance(zap_cursor_t *zc)
|
||||
uint64_t
|
||||
zap_cursor_serialize(zap_cursor_t *zc)
|
||||
{
|
||||
if (zc->zc_hash == -1ULL)
|
||||
if (zc->zc_zap == NULL || zc->zc_hash == -1ULL)
|
||||
return (-1ULL);
|
||||
if (zc->zc_zap == NULL)
|
||||
return (zc->zc_serialized);
|
||||
|
||||
ASSERT0((zc->zc_hash & zap_maxcd(zc->zc_zap)));
|
||||
ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user