Lines Matching +full:config +full:- +full:space
1 // SPDX-License-Identifier: CDDL-1.0
10 * or https://opensource.org/licenses/CDDL-1.0.
30 * A storage pool checkpoint can be thought of as a pool-wide snapshot or
36 * zpool on-disk features. If a pool has a checkpoint that is no longer
41 * - The pool has a new feature flag and a new entry in the MOS. The feature
43 * until the checkpoint is fully discarded. The entry in the MOS config
49 * - Each vdev contains a vdev-wide space map while the pool has a checkpoint,
50 * which persists until the checkpoint is fully discarded. The space map
55 * - Each metaslab's ms_sm space map behaves the same as without the
58 * ALLOCATED in the metaslab's space map and they are added as FREE in the
59 * vdev's checkpoint space map.
61 * - Each uberblock has a field (ub_checkpoint_txg) which holds the txg that
66 * - To create a checkpoint, we first wait for the current TXG to be synced,
69 * uberblock in MOS config, increment the feature flag for the checkpoint
76 * - When a checkpoint exists, we need to ensure that the blocks that
86 * and not-checkpointed blocks.
91 * checkpointed extents will remain allocated in the metaslab's ms_sm space
97 * - To discard the checkpoint we use an early synctask to delete the
98 * checkpointed uberblock from the MOS config, set spa_checkpoint_txg to 0,
99 * and wakeup the discarding zthr thread (an open-context async thread).
111 * top-level vdevs, a final synctask runs that decrements the feature flag.
113 * - To rewind to the checkpoint, we first use the current uberblock and
115 * config. After we retrieve the checkpointed uberblock, we use it as the
128 * - In the hypothetical event that we take a checkpoint, remove a vdev,
132 * can change the config:
135 * - As most of the checkpoint logic is implemented in the SPA and doesn't
136 * distinguish datasets when it comes to space accounting, having a
154 * prefetching of the checkpoint space map done on each vdev while
157 * The reason it exists is because top-level vdevs with long checkpoint
158 * space maps can potentially take up a lot of memory depending on the
177 pcs->pcs_state = CS_CHECKPOINT_DISCARDING; in spa_checkpoint_get_stats()
179 pcs->pcs_state = CS_CHECKPOINT_EXISTS; in spa_checkpoint_get_stats()
181 pcs->pcs_space = spa->spa_checkpoint_info.sci_dspace; in spa_checkpoint_get_stats()
182 pcs->pcs_start_time = spa->spa_checkpoint_info.sci_timestamp; in spa_checkpoint_get_stats()
192 spa->spa_checkpoint_info.sci_timestamp = 0; in spa_checkpoint_discard_complete_sync()
211 vdev_t *vd = sdc->sdc_vd; in spa_checkpoint_discard_sync_callback()
212 metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; in spa_checkpoint_discard_sync_callback()
213 uint64_t end = sme->sme_offset + sme->sme_run; in spa_checkpoint_discard_sync_callback()
215 if (sdc->sdc_entry_limit == 0) in spa_checkpoint_discard_sync_callback()
219 * Since the space map is not condensed, we know that in spa_checkpoint_discard_sync_callback()
224 * the checkpoint's space map entries should not cross in spa_checkpoint_discard_sync_callback()
226 * that handles metaslab-crossing segments in the future. in spa_checkpoint_discard_sync_callback()
228 VERIFY3U(sme->sme_type, ==, SM_FREE); in spa_checkpoint_discard_sync_callback()
229 VERIFY3U(sme->sme_offset, >=, ms->ms_start); in spa_checkpoint_discard_sync_callback()
230 VERIFY3U(end, <=, ms->ms_start + ms->ms_size); in spa_checkpoint_discard_sync_callback()
238 mutex_enter(&ms->ms_lock); in spa_checkpoint_discard_sync_callback()
239 if (zfs_range_tree_is_empty(ms->ms_freeing)) in spa_checkpoint_discard_sync_callback()
240 vdev_dirty(vd, VDD_METASLAB, ms, sdc->sdc_txg); in spa_checkpoint_discard_sync_callback()
241 zfs_range_tree_add(ms->ms_freeing, sme->sme_offset, sme->sme_run); in spa_checkpoint_discard_sync_callback()
242 mutex_exit(&ms->ms_lock); in spa_checkpoint_discard_sync_callback()
244 ASSERT3U(vd->vdev_spa->spa_checkpoint_info.sci_dspace, >=, in spa_checkpoint_discard_sync_callback()
245 sme->sme_run); in spa_checkpoint_discard_sync_callback()
246 ASSERT3U(vd->vdev_stat.vs_checkpoint_space, >=, sme->sme_run); in spa_checkpoint_discard_sync_callback()
248 vd->vdev_spa->spa_checkpoint_info.sci_dspace -= sme->sme_run; in spa_checkpoint_discard_sync_callback()
249 vd->vdev_stat.vs_checkpoint_space -= sme->sme_run; in spa_checkpoint_discard_sync_callback()
250 sdc->sdc_entry_limit--; in spa_checkpoint_discard_sync_callback()
259 vdev_t *rvd = spa->spa_root_vdev; in spa_checkpoint_accounting_verify()
263 for (uint64_t c = 0; c < rvd->vdev_children; c++) { in spa_checkpoint_accounting_verify()
264 vdev_t *vd = rvd->vdev_child[c]; in spa_checkpoint_accounting_verify()
266 if (vd->vdev_checkpoint_sm != NULL) { in spa_checkpoint_accounting_verify()
268 -space_map_allocated(vd->vdev_checkpoint_sm); in spa_checkpoint_accounting_verify()
270 vd->vdev_stat.vs_checkpoint_space; in spa_checkpoint_accounting_verify()
274 ASSERT0(vd->vdev_stat.vs_checkpoint_space); in spa_checkpoint_accounting_verify()
277 ASSERT3U(spa->spa_checkpoint_info.sci_dspace, ==, ckpoint_sm_space_sum); in spa_checkpoint_accounting_verify()
288 * The space map callback is applied only to non-debug entries. in spa_checkpoint_discard_thread_sync()
290 * number of non-debug entries, we want to ensure that we only in spa_checkpoint_discard_thread_sync()
291 * read what we prefetched from open-context. in spa_checkpoint_discard_thread_sync()
293 * Thus, we set the maximum entries that the space map callback in spa_checkpoint_discard_thread_sync()
299 * entry is two-word. in spa_checkpoint_discard_thread_sync()
305 * Iterate from the end of the space map towards the beginning, in spa_checkpoint_discard_thread_sync()
307 * space map. The iteration stops if one of the following in spa_checkpoint_discard_thread_sync()
310 * 1] We reached the beginning of the space map. At this point in spa_checkpoint_discard_thread_sync()
311 * the space map should be completely empty and in spa_checkpoint_discard_thread_sync()
313 * The next step would be to free and close the space map in spa_checkpoint_discard_thread_sync()
318 * space map entries in memory) and space_map_incremental_destroy in spa_checkpoint_discard_thread_sync()
320 * in the space map that will be cleared in a future invocation in spa_checkpoint_discard_thread_sync()
325 sdc.sdc_txg = tx->tx_txg; in spa_checkpoint_discard_thread_sync()
329 space_map_length(vd->vdev_checkpoint_sm) / sizeof (uint64_t); in spa_checkpoint_discard_thread_sync()
331 error = space_map_incremental_destroy(vd->vdev_checkpoint_sm, in spa_checkpoint_discard_thread_sync()
335 space_map_length(vd->vdev_checkpoint_sm) / sizeof (uint64_t); in spa_checkpoint_discard_thread_sync()
338 spa_checkpoint_accounting_verify(vd->vdev_spa); in spa_checkpoint_discard_thread_sync()
342 "deleted %llu words - %llu words are left", in spa_checkpoint_discard_thread_sync()
343 (u_longlong_t)tx->tx_txg, (longlong_t)vd->vdev_id, in spa_checkpoint_discard_thread_sync()
344 (u_longlong_t)(words_before - words_after), in spa_checkpoint_discard_thread_sync()
351 "space map of vdev %llu\n", in spa_checkpoint_discard_thread_sync()
352 (longlong_t)error, vd->vdev_id); in spa_checkpoint_discard_thread_sync()
355 ASSERT0(space_map_allocated(vd->vdev_checkpoint_sm)); in spa_checkpoint_discard_thread_sync()
356 ASSERT0(space_map_length(vd->vdev_checkpoint_sm)); in spa_checkpoint_discard_thread_sync()
358 space_map_free(vd->vdev_checkpoint_sm, tx); in spa_checkpoint_discard_thread_sync()
359 space_map_close(vd->vdev_checkpoint_sm); in spa_checkpoint_discard_thread_sync()
360 vd->vdev_checkpoint_sm = NULL; in spa_checkpoint_discard_thread_sync()
362 VERIFY0(zap_remove(spa_meta_objset(vd->vdev_spa), in spa_checkpoint_discard_thread_sync()
363 vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, tx)); in spa_checkpoint_discard_thread_sync()
370 vdev_t *rvd = spa->spa_root_vdev; in spa_checkpoint_discard_is_done()
375 for (uint64_t c = 0; c < rvd->vdev_children; c++) { in spa_checkpoint_discard_is_done()
376 if (rvd->vdev_child[c]->vdev_checkpoint_sm != NULL) in spa_checkpoint_discard_is_done()
378 ASSERT0(rvd->vdev_child[c]->vdev_stat.vs_checkpoint_space); in spa_checkpoint_discard_is_done()
403 vdev_t *rvd = spa->spa_root_vdev; in spa_checkpoint_discard_thread()
405 for (uint64_t c = 0; c < rvd->vdev_children; c++) { in spa_checkpoint_discard_thread()
406 vdev_t *vd = rvd->vdev_child[c]; in spa_checkpoint_discard_thread()
408 while (vd->vdev_checkpoint_sm != NULL) { in spa_checkpoint_discard_thread()
409 space_map_t *checkpoint_sm = vd->vdev_checkpoint_sm; in spa_checkpoint_discard_thread()
416 ASSERT3P(vd->vdev_ops, !=, &vdev_indirect_ops); in spa_checkpoint_discard_thread()
421 space_map_length(checkpoint_sm) - size; in spa_checkpoint_discard_thread()
424 * Ensure that the part of the space map that will in spa_checkpoint_discard_thread()
429 checkpoint_sm->sm_dbuf, offset, size, in spa_checkpoint_discard_thread()
433 "while prefetching checkpoint space map " in spa_checkpoint_discard_thread()
435 error, vd->vdev_id); in spa_checkpoint_discard_thread()
438 VERIFY0(dsl_sync_task(spa->spa_name, NULL, in spa_checkpoint_discard_thread()
447 VERIFY0(spa->spa_checkpoint_info.sci_dspace); in spa_checkpoint_discard_thread()
448 VERIFY0(dsl_sync_task(spa->spa_name, NULL, in spa_checkpoint_discard_thread()
458 spa_t *spa = dmu_tx_pool(tx)->dp_spa; in spa_checkpoint_check()
466 if (spa->spa_removing_phys.sr_state == DSS_SCANNING) in spa_checkpoint_check()
469 if (spa->spa_raidz_expand != NULL) in spa_checkpoint_check()
472 if (spa->spa_checkpoint_txg != 0) in spa_checkpoint_check()
486 spa_t *spa = dp->dp_spa; in spa_checkpoint_sync()
487 uberblock_t checkpoint = spa->spa_ubsync; in spa_checkpoint_sync()
495 ASSERT0(spa->spa_checkpoint_info.sci_timestamp); in spa_checkpoint_sync()
496 ASSERT0(spa->spa_checkpoint_info.sci_dspace); in spa_checkpoint_sync()
503 ASSERT3U(checkpoint.ub_txg, ==, spa->spa_syncing_txg - 1); in spa_checkpoint_sync()
515 spa->spa_checkpoint_txg = checkpoint.ub_txg; in spa_checkpoint_sync()
516 spa->spa_checkpoint_info.sci_timestamp = checkpoint.ub_timestamp; in spa_checkpoint_sync()
519 VERIFY0(zap_add(spa->spa_dsl_pool->dp_meta_objset, in spa_checkpoint_sync()
528 * space maps and uberblock). in spa_checkpoint_sync()
549 mutex_enter(&spa->spa_vdev_top_lock); in spa_checkpoint()
572 mutex_exit(&spa->spa_vdev_top_lock); in spa_checkpoint()
582 spa_t *spa = dmu_tx_pool(tx)->dp_spa; in spa_checkpoint_discard_check()
587 if (spa->spa_checkpoint_txg == 0) in spa_checkpoint_discard_check()
600 spa_t *spa = dmu_tx_pool(tx)->dp_spa; in spa_checkpoint_discard_sync()
605 spa->spa_checkpoint_txg = 0; in spa_checkpoint_discard_sync()
607 zthr_wakeup(spa->spa_checkpoint_discard_zthr); in spa_checkpoint_discard_sync()
623 * ms_checkpointing and vdev_checkpoint_sm) and re-create any in spa_checkpoint_discard()
624 * space maps that the discarding open-context thread has in spa_checkpoint_discard()
638 "Limit for memory used in prefetching the checkpoint space map done "