Parallelize metaslab_sync_done() calls
Some of our random write benchmarks on a fragmented pool show that single-threaded portion of sync process (txg_sync_thread) can use up to 45% of CPU time. Most of it is consumed by metaslab_sync() and metaslab_sync_done(), during which time the pool is not doing anything else. While metaslab_sync() is not trivial to parallelize due to having single spacemap log, metaslab_sync_done() is doing only per-metaslab accounting and they can run in parallel. Even better, we can run them while waiting for vdev label update and cache flush I/Os. With this patch on my test system similar test randomly writing 12 100GB files with 4KB blocks shows IOPS increase from 176K to 220K. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com> Closes #18622
This commit is contained in:
@@ -592,6 +592,7 @@ extern boolean_t vdev_log_state_valid(vdev_t *vd);
|
||||
extern int vdev_load(vdev_t *vd);
|
||||
extern int vdev_dtl_load(vdev_t *vd);
|
||||
extern void vdev_sync(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_sync_dispatch(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);
|
||||
extern void vdev_dirty_leaves(vdev_t *vd, int flags, uint64_t txg);
|
||||
|
||||
+4
-3
@@ -11019,6 +11019,10 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
ASSERT0(spa->spa_vdev_removal->svr_bytes_done[txg & TXG_MASK]);
|
||||
}
|
||||
|
||||
for (vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd;
|
||||
vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)))
|
||||
vdev_sync_dispatch(vd, txg);
|
||||
|
||||
spa_sync_rewrite_vdev_config(spa, tx);
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
@@ -11043,9 +11047,6 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
|
||||
dsl_pool_sync_done(dp, txg);
|
||||
|
||||
/*
|
||||
* Update usable space statistics.
|
||||
*/
|
||||
while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
|
||||
!= NULL)
|
||||
vdev_sync_done(vd, txg);
|
||||
|
||||
+26
-4
@@ -4246,17 +4246,39 @@ vdev_remove_empty_log(vdev_t *vd, uint64_t txg)
|
||||
dmu_tx_commit(tx);
|
||||
}
|
||||
|
||||
static void
|
||||
metaslab_sync_done_task(void *arg)
|
||||
{
|
||||
metaslab_t *msp = arg;
|
||||
spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
|
||||
metaslab_sync_done(msp, spa_syncing_txg(spa));
|
||||
}
|
||||
|
||||
void
|
||||
vdev_sync_dispatch(vdev_t *vd, uint64_t txg)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
|
||||
ASSERT(vdev_is_concrete(vd));
|
||||
|
||||
for (metaslab_t *msp = txg_list_head(&vd->vdev_ms_list, TXG_CLEAN(txg));
|
||||
msp; msp = txg_list_next(&vd->vdev_ms_list, msp, TXG_CLEAN(txg))) {
|
||||
(void) taskq_dispatch(spa->spa_sync_tq,
|
||||
metaslab_sync_done_task, msp, TQ_SLEEP);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vdev_sync_done(vdev_t *vd, uint64_t txg)
|
||||
{
|
||||
metaslab_t *msp;
|
||||
boolean_t reassess = !txg_list_empty(&vd->vdev_ms_list, TXG_CLEAN(txg));
|
||||
|
||||
ASSERT(vdev_is_concrete(vd));
|
||||
|
||||
while ((msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)))
|
||||
!= NULL)
|
||||
metaslab_sync_done(msp, txg);
|
||||
taskq_wait(vd->vdev_spa->spa_sync_tq);
|
||||
|
||||
while (txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)) != NULL)
|
||||
;
|
||||
|
||||
if (reassess) {
|
||||
metaslab_sync_reassess(vd->vdev_mg);
|
||||
|
||||
Reference in New Issue
Block a user