Parallelize metaslab_sync_done() calls

Some of our random write benchmarks on a fragmented pool show that
single-threaded portion of sync process (txg_sync_thread) can use
up to 45% of CPU time.  Most of it is consumed by metaslab_sync()
and metaslab_sync_done(), during which time the pool is not doing
anything else.

While metaslab_sync() is not trivial to parallelize due to having
single spacemap log, metaslab_sync_done() is doing only per-metaslab
accounting and they can run in parallel.  Even better, we can run
them while waiting for vdev label update and cache flush I/Os.

With this patch on my test system similar test randomly writing 12
100GB files with 4KB blocks shows IOPS increase from 176K to 220K.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <alexander.motin@TrueNAS.com>
Closes #18622
This commit is contained in:
Alexander Motin
2026-06-04 16:25:40 -04:00
committed by GitHub
parent e03375947c
commit 5fea0c838a
3 changed files with 31 additions and 7 deletions
+1
View File
@@ -592,6 +592,7 @@ extern boolean_t vdev_log_state_valid(vdev_t *vd);
extern int vdev_load(vdev_t *vd); extern int vdev_load(vdev_t *vd);
extern int vdev_dtl_load(vdev_t *vd); extern int vdev_dtl_load(vdev_t *vd);
extern void vdev_sync(vdev_t *vd, uint64_t txg); extern void vdev_sync(vdev_t *vd, uint64_t txg);
extern void vdev_sync_dispatch(vdev_t *vd, uint64_t txg);
extern void vdev_sync_done(vdev_t *vd, uint64_t txg); extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg); extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg);
extern void vdev_dirty_leaves(vdev_t *vd, int flags, uint64_t txg); extern void vdev_dirty_leaves(vdev_t *vd, int flags, uint64_t txg);
+4 -3
View File
@@ -11019,6 +11019,10 @@ spa_sync(spa_t *spa, uint64_t txg)
ASSERT0(spa->spa_vdev_removal->svr_bytes_done[txg & TXG_MASK]); ASSERT0(spa->spa_vdev_removal->svr_bytes_done[txg & TXG_MASK]);
} }
for (vd = txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd;
vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)))
vdev_sync_dispatch(vd, txg);
spa_sync_rewrite_vdev_config(spa, tx); spa_sync_rewrite_vdev_config(spa, tx);
dmu_tx_commit(tx); dmu_tx_commit(tx);
@@ -11043,9 +11047,6 @@ spa_sync(spa_t *spa, uint64_t txg)
dsl_pool_sync_done(dp, txg); dsl_pool_sync_done(dp, txg);
/*
* Update usable space statistics.
*/
while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
!= NULL) != NULL)
vdev_sync_done(vd, txg); vdev_sync_done(vd, txg);
+26 -4
View File
@@ -4246,17 +4246,39 @@ vdev_remove_empty_log(vdev_t *vd, uint64_t txg)
dmu_tx_commit(tx); dmu_tx_commit(tx);
} }
static void
metaslab_sync_done_task(void *arg)
{
metaslab_t *msp = arg;
spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
metaslab_sync_done(msp, spa_syncing_txg(spa));
}
void
vdev_sync_dispatch(vdev_t *vd, uint64_t txg)
{
spa_t *spa = vd->vdev_spa;
ASSERT(vdev_is_concrete(vd));
for (metaslab_t *msp = txg_list_head(&vd->vdev_ms_list, TXG_CLEAN(txg));
msp; msp = txg_list_next(&vd->vdev_ms_list, msp, TXG_CLEAN(txg))) {
(void) taskq_dispatch(spa->spa_sync_tq,
metaslab_sync_done_task, msp, TQ_SLEEP);
}
}
void void
vdev_sync_done(vdev_t *vd, uint64_t txg) vdev_sync_done(vdev_t *vd, uint64_t txg)
{ {
metaslab_t *msp;
boolean_t reassess = !txg_list_empty(&vd->vdev_ms_list, TXG_CLEAN(txg)); boolean_t reassess = !txg_list_empty(&vd->vdev_ms_list, TXG_CLEAN(txg));
ASSERT(vdev_is_concrete(vd)); ASSERT(vdev_is_concrete(vd));
while ((msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))) taskq_wait(vd->vdev_spa->spa_sync_tq);
!= NULL)
metaslab_sync_done(msp, txg); while (txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)) != NULL)
;
if (reassess) { if (reassess) {
metaslab_sync_reassess(vd->vdev_mg); metaslab_sync_reassess(vd->vdev_mg);