libsa/zfs: further improve handling of stale labels
Fix two problems with6dd0803ffd. First problem is that situation when newer label was read before stale one, was handled differently to reverse order case. Second problem is that vdev_free() would free the fully initialized leaf vdev that carried stale label. In a case when vdev carries a stale label, but is still referenced by a different label with new a configuration, we don't want to free it, but rather insert it into the new configuration. o Provide a helper function nvlist_find_vdev_guid() that checks presence of certain GUID in a label. o In top level vdev store the GUID of vdev used to instantiate top vdev. o Cover all possible cases in the block in vdev_probe() where we encounter a known configuration. Make the diagnostic print more informative and looking same regardless of probe order. Make this whole block easier to read reducing one level of indentation for a price of a single comparison at runtime. Reviewed by: mav, imp Differential Revision: https://reviews.freebsd.org/D51913 Fixes:6dd0803ffd
This commit is contained in:
+109
-18
@@ -833,6 +833,14 @@ vdev_replacing_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
|
|||||||
return (kid->v_read(kid, bp, buf, offset, bytes));
|
return (kid->v_read(kid, bp, buf, offset, bytes));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* List of vdevs that were fully initialized from their own label, but later a
|
||||||
|
* newer label was found that obsoleted the stale label, freeing its
|
||||||
|
* configuration tree. We keep those vdevs around, since a new configuration
|
||||||
|
* may include them.
|
||||||
|
*/
|
||||||
|
static vdev_list_t orphans = STAILQ_HEAD_INITIALIZER(orphans);
|
||||||
|
|
||||||
static vdev_t *
|
static vdev_t *
|
||||||
vdev_find(vdev_list_t *list, uint64_t guid)
|
vdev_find(vdev_list_t *list, uint64_t guid)
|
||||||
{
|
{
|
||||||
@@ -854,6 +862,11 @@ vdev_create(uint64_t guid, vdev_read_t *_read)
|
|||||||
vdev_t *vdev;
|
vdev_t *vdev;
|
||||||
vdev_indirect_config_t *vic;
|
vdev_indirect_config_t *vic;
|
||||||
|
|
||||||
|
if ((vdev = vdev_find(&orphans, guid))) {
|
||||||
|
STAILQ_REMOVE(&orphans, vdev, vdev, v_childlink);
|
||||||
|
return (vdev);
|
||||||
|
}
|
||||||
|
|
||||||
vdev = calloc(1, sizeof(vdev_t));
|
vdev = calloc(1, sizeof(vdev_t));
|
||||||
if (vdev != NULL) {
|
if (vdev != NULL) {
|
||||||
STAILQ_INIT(&vdev->v_children);
|
STAILQ_INIT(&vdev->v_children);
|
||||||
@@ -1101,8 +1114,8 @@ vdev_insert(vdev_t *top_vdev, vdev_t *vdev)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t txg,
|
vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t label_guid,
|
||||||
const nvlist_t *nvlist)
|
uint64_t txg, const nvlist_t *nvlist)
|
||||||
{
|
{
|
||||||
vdev_t *top_vdev, *vdev;
|
vdev_t *top_vdev, *vdev;
|
||||||
nvlist_t **kids = NULL;
|
nvlist_t **kids = NULL;
|
||||||
@@ -1116,6 +1129,7 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t txg,
|
|||||||
return (rc);
|
return (rc);
|
||||||
top_vdev->v_spa = spa;
|
top_vdev->v_spa = spa;
|
||||||
top_vdev->v_top = top_vdev;
|
top_vdev->v_top = top_vdev;
|
||||||
|
top_vdev->v_label = label_guid;
|
||||||
top_vdev->v_txg = txg;
|
top_vdev->v_txg = txg;
|
||||||
(void )vdev_insert(spa->spa_root_vdev, top_vdev);
|
(void )vdev_insert(spa->spa_root_vdev, top_vdev);
|
||||||
}
|
}
|
||||||
@@ -1160,12 +1174,14 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t txg,
|
|||||||
static int
|
static int
|
||||||
vdev_init_from_label(spa_t *spa, const nvlist_t *nvlist)
|
vdev_init_from_label(spa_t *spa, const nvlist_t *nvlist)
|
||||||
{
|
{
|
||||||
uint64_t top_guid, txg;
|
uint64_t top_guid, label_guid, txg;
|
||||||
nvlist_t *vdevs;
|
nvlist_t *vdevs;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (nvlist_find(nvlist, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64,
|
if (nvlist_find(nvlist, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64,
|
||||||
NULL, &top_guid, NULL) ||
|
NULL, &top_guid, NULL) ||
|
||||||
|
nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
|
||||||
|
NULL, &label_guid, NULL) != 0 ||
|
||||||
nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64,
|
nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64,
|
||||||
NULL, &txg, NULL) != 0 ||
|
NULL, &txg, NULL) != 0 ||
|
||||||
nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
|
nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
|
||||||
@@ -1174,7 +1190,7 @@ vdev_init_from_label(spa_t *spa, const nvlist_t *nvlist)
|
|||||||
return (ENOENT);
|
return (ENOENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = vdev_from_nvlist(spa, top_guid, txg, vdevs);
|
rc = vdev_from_nvlist(spa, top_guid, label_guid, txg, vdevs);
|
||||||
nvlist_destroy(vdevs);
|
nvlist_destroy(vdevs);
|
||||||
return (rc);
|
return (rc);
|
||||||
}
|
}
|
||||||
@@ -1271,6 +1287,9 @@ vdev_free(struct vdev *vdev)
|
|||||||
|
|
||||||
STAILQ_FOREACH_SAFE(kid, &vdev->v_children, v_childlink, safe)
|
STAILQ_FOREACH_SAFE(kid, &vdev->v_children, v_childlink, safe)
|
||||||
vdev_free(kid);
|
vdev_free(kid);
|
||||||
|
if (vdev->v_phys_read != NULL)
|
||||||
|
STAILQ_INSERT_HEAD(&orphans, vdev, v_childlink);
|
||||||
|
else
|
||||||
free(vdev);
|
free(vdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1323,7 +1342,7 @@ vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist)
|
|||||||
* XXXGL: how can this happen?
|
* XXXGL: how can this happen?
|
||||||
*/
|
*/
|
||||||
if (vdev == NULL)
|
if (vdev == NULL)
|
||||||
rc = vdev_from_nvlist(spa, guid, 0, kids[i]);
|
rc = vdev_from_nvlist(spa, guid, 0, 0, kids[i]);
|
||||||
else
|
else
|
||||||
rc = vdev_update_from_nvlist(spa->spa_root_vdev, guid,
|
rc = vdev_update_from_nvlist(spa->spa_root_vdev, guid,
|
||||||
kids[i]);
|
kids[i]);
|
||||||
@@ -1344,6 +1363,53 @@ vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist)
|
|||||||
return (rc);
|
return (rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
nvlist_find_child_guid(const nvlist_t *nvlist, uint64_t guid)
|
||||||
|
{
|
||||||
|
nvlist_t **kids = NULL;
|
||||||
|
int nkids, i;
|
||||||
|
bool rv = false;
|
||||||
|
|
||||||
|
if (nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY,
|
||||||
|
&nkids, &kids, NULL) != 0)
|
||||||
|
nkids = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < nkids; i++) {
|
||||||
|
uint64_t kid_guid;
|
||||||
|
|
||||||
|
if (nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
|
||||||
|
NULL, &kid_guid, NULL) != 0)
|
||||||
|
break;
|
||||||
|
if (kid_guid == guid)
|
||||||
|
rv = true;
|
||||||
|
else
|
||||||
|
rv = nvlist_find_child_guid(kids[i], guid);
|
||||||
|
if (rv)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nkids; i++)
|
||||||
|
nvlist_destroy(kids[i]);
|
||||||
|
free(kids);
|
||||||
|
|
||||||
|
return (rv);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
nvlist_find_vdev_guid(const nvlist_t *nvlist, uint64_t guid)
|
||||||
|
{
|
||||||
|
nvlist_t *vdevs;
|
||||||
|
bool rv;
|
||||||
|
|
||||||
|
if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, NULL,
|
||||||
|
&vdevs, NULL) != 0)
|
||||||
|
return (false);
|
||||||
|
rv = nvlist_find_child_guid(vdevs, guid);
|
||||||
|
nvlist_destroy(vdevs);
|
||||||
|
|
||||||
|
return (rv);
|
||||||
|
}
|
||||||
|
|
||||||
static spa_t *
|
static spa_t *
|
||||||
spa_find_by_guid(uint64_t guid)
|
spa_find_by_guid(uint64_t guid)
|
||||||
{
|
{
|
||||||
@@ -2012,7 +2078,7 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv,
|
|||||||
{
|
{
|
||||||
vdev_t vtmp;
|
vdev_t vtmp;
|
||||||
spa_t *spa;
|
spa_t *spa;
|
||||||
vdev_t *vdev;
|
vdev_t *vdev, *top;
|
||||||
nvlist_t *nvl;
|
nvlist_t *nvl;
|
||||||
uint64_t val;
|
uint64_t val;
|
||||||
uint64_t guid, pool_guid, top_guid, txg;
|
uint64_t guid, pool_guid, top_guid, txg;
|
||||||
@@ -2109,20 +2175,45 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv,
|
|||||||
nvlist_destroy(nvl);
|
nvlist_destroy(nvl);
|
||||||
return (ENOMEM);
|
return (ENOMEM);
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
struct vdev *kid;
|
|
||||||
|
|
||||||
STAILQ_FOREACH(kid, &spa->spa_root_vdev->v_children,
|
/*
|
||||||
v_childlink)
|
* Check if configuration is already known. If configuration is known
|
||||||
if (kid->v_guid == top_guid && kid->v_txg < txg) {
|
* and txg numbers don't match, we got 2x2 scenarios here. First, is
|
||||||
printf("ZFS: pool %s vdev %s ignoring stale "
|
* the label being read right now _newer_ than the one read before.
|
||||||
"label from txg 0x%jx, using 0x%jx@0x%jx\n",
|
* Second, is the vdev that provided the stale label _present_ in the
|
||||||
spa->spa_name, kid->v_name,
|
* newer configuration. If neither is true, we completely ignore the
|
||||||
kid->v_txg, guid, txg);
|
* label.
|
||||||
STAILQ_REMOVE(&spa->spa_root_vdev->v_children,
|
*/
|
||||||
kid, vdev, v_childlink);
|
STAILQ_FOREACH(top, &spa->spa_root_vdev->v_children, v_childlink)
|
||||||
vdev_free(kid);
|
if (top->v_guid == top_guid) {
|
||||||
|
bool newer, present;
|
||||||
|
|
||||||
|
if (top->v_txg == txg)
|
||||||
break;
|
break;
|
||||||
|
newer = (top->v_txg < txg);
|
||||||
|
present = newer ?
|
||||||
|
nvlist_find_vdev_guid(nvl, top->v_label) :
|
||||||
|
(vdev_find(&top->v_children, guid) != NULL);
|
||||||
|
printf("ZFS: pool %s vdev %s %s stale label from "
|
||||||
|
"0x%jx@0x%jx, %s 0x%jx@0x%jx\n",
|
||||||
|
spa->spa_name, top->v_name,
|
||||||
|
present ? "using" : "ignoring",
|
||||||
|
newer ? top->v_label : guid,
|
||||||
|
newer ? top->v_txg : txg,
|
||||||
|
present ? "referred by" : "using",
|
||||||
|
newer ? guid : top->v_label,
|
||||||
|
newer ? txg : top->v_txg);
|
||||||
|
if (newer) {
|
||||||
|
STAILQ_REMOVE(&spa->spa_root_vdev->v_children,
|
||||||
|
top, vdev, v_childlink);
|
||||||
|
vdev_free(top);
|
||||||
|
break;
|
||||||
|
} else if (present) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
nvlist_destroy(nvl);
|
||||||
|
return (EIO);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2024,7 +2024,8 @@ typedef struct vdev {
|
|||||||
vdev_list_t v_children; /* children of this vdev */
|
vdev_list_t v_children; /* children of this vdev */
|
||||||
const char *v_name; /* vdev name */
|
const char *v_name; /* vdev name */
|
||||||
uint64_t v_guid; /* vdev guid */
|
uint64_t v_guid; /* vdev guid */
|
||||||
uint64_t v_txg; /* most recent transaction */
|
uint64_t v_label; /* label instantiated from (top vdev) */
|
||||||
|
uint64_t v_txg; /* most recent transaction (top vdev) */
|
||||||
uint64_t v_id; /* index in parent */
|
uint64_t v_id; /* index in parent */
|
||||||
uint64_t v_psize; /* physical device capacity */
|
uint64_t v_psize; /* physical device capacity */
|
||||||
int v_ashift; /* offset to block shift */
|
int v_ashift; /* offset to block shift */
|
||||||
|
|||||||
Reference in New Issue
Block a user