Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ CONTRIBUTORS:
Felix Schmidt <felixschmidt20@aol.com>
Feng Sun <loyou85@gmail.com>
Finix Yan <yancw@info2soft.com>
Francesco Conti <Pesc0@users.noreply.github.com>
Francesco Mazzoli <f@mazzo.li>
Frederik Wessels <wessels147@gmail.com>
Friedrich Weber <f.weber@proxmox.com>
Expand Down
82 changes: 64 additions & 18 deletions module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -10734,10 +10734,16 @@ spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx)
* Rewrite the vdev configuration (which includes the uberblock) to
* commit the transaction group.
*
* If there are no dirty vdevs, we sync the uberblock to a few random
* top-level vdevs that are known to be visible in the config cache
* (see spa_vdev_add() for a complete description). If there *are* dirty
* vdevs, sync the uberblock to all vdevs.
* If there are no dirty vdevs, we sync the uberblock to (in this
* order of preference):
* - all vdevs touched by the current txg
* - special vdevs
* - a few random top-level vdevs that are known to be visible in
* the config cache (see spa_vdev_add() for a complete description).
*
* This allows to keep rotational drives asleep if not used.
*
* If there *are* dirty vdevs, sync the uberblock to all vdevs.
*/
static void
spa_sync_rewrite_vdev_config(spa_t *spa, dmu_tx_t *tx)
Expand All @@ -10755,29 +10761,69 @@ spa_sync_rewrite_vdev_config(spa_t *spa, dmu_tx_t *tx)
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);

if (list_is_empty(&spa->spa_config_dirty_list)) {
vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL };
int svdcount = 0;
int children = rvd->vdev_children;
int c0 = random_in_range(children);
uint64_t children = rvd->vdev_children;
vdev_t **svd = kmem_alloc(sizeof(vdev_t *) * children, KM_SLEEP);
uint64_t svdcount = 0;

for (int c = 0; c < children; c++) {
vdev_t *vd =
rvd->vdev_child[(c0 + c) % children];

/* Stop when revisiting the first vdev */
if (c > 0 && svd[0] == vd)
break;
// Find all dirty top-level vdevs
for (uint64_t c = 0; c < children; c++) {
vdev_t *vd = rvd->vdev_child[c];

if (vd->vdev_ms_array == 0 ||
vd->vdev_islog ||
!vdev_is_concrete(vd))
continue;

svd[svdcount++] = vd;
if (svdcount == SPA_SYNC_MIN_VDEVS)
break;
if (txg_list_member(&spa->spa_vdev_txg_list,
vd, TXG_CLEAN(txg)))
svd[svdcount++] = vd;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need or want to sync all dirty vdevs. Previous code was doing only 3 random at a time, and it was fine. Sync means several full-stroke head seeks and cache flushed, that are expensive.

}

// If none were dirty but the pool has special
// vdevs, select those.
if (svdcount == 0 && spa_has_special(spa)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure here or below we can get svdcount == 0, since it would mean we haven't written anything anywhere, unless the spa_vdev_txg_list for TXG_CLEAN(txg) does something unexpected here. What we could do instead though, is to append the list (without duplication) with random special (and the normal) vdevs if there are less then 3 (SPA_SYNC_MIN_VDEVS).

for (uint64_t c = 0; c < children; c++) {
vdev_t *vd = rvd->vdev_child[c];

if (vd->vdev_ms_array == 0 ||
vd->vdev_islog ||
!vdev_is_concrete(vd))
continue;

if (vd->vdev_alloc_bias == VDEV_BIAS_SPECIAL)
svd[svdcount++] = vd;
}
}

/*
* If none were dirty and pool does not have
* special vdevs: randomly select up to
* SPA_SYNC_MIN_VDEVS top-level vdevs.
*/
if (svdcount == 0) {
int c0 = random_in_range(children);

for (uint64_t c = 0; c < children; c++) {
vdev_t *vd = rvd->vdev_child[(c0 + c) % children];

// Stop when revisiting the first vdev
if (c > 0 && svd[0] == vd)
break;

if (vd->vdev_ms_array == 0 ||
vd->vdev_islog ||
!vdev_is_concrete(vd))
continue;

svd[svdcount++] = vd;

if (svdcount >= SPA_SYNC_MIN_VDEVS)
break;
}
}

error = vdev_config_sync(svd, svdcount, txg);
kmem_free(svd, sizeof(vdev_t *) * children);
} else {
error = vdev_config_sync(rvd->vdev_child,
rvd->vdev_children, txg);
Expand Down
Loading