Lines Matching +full:assert +full:- +full:falling +full:- +full:edge
1 // SPDX-License-Identifier: CDDL-1.0
10 * or https://opensource.org/licenses/CDDL-1.0.
44 * This file contains all the routines used when modifying on-disk SPA state.
114 * moves it to some Illumos-specific place, or removes it entirely.
120 * prevent CPU-intensive kernel threads from affecting latency on interactive
167 * are so high frequency and short-lived that the taskq itself can become a
169 * additional degree of parallelism specified by the number of threads per-
222 * This is used by zdb to analyze non-idle pools.
244 * A non-zero value for zfs_max_missing_tvds means that we allow importing
245 * pools with missing top-level vdevs. This is strictly intended for advanced
247 * missing devices can only be imported read-only for safety reasons, and their
248 * fail-mode will be automatically set to "continue".
269 * might be incomplete or out-dated.
272 * an out-dated cachefile where a device removal was not registered.
275 * SPA_DVAS_PER_BP - 1 so that some copies of the MOS would still be available
278 uint64_t zfs_max_missing_tvds_cachefile = SPA_DVAS_PER_BP - 1;
294 * points for the livelist to be removed - used to test condense/destroy races
406 vdev_t *rvd = spa->spa_root_vdev; in spa_prop_get_config()
407 dsl_pool_t *pool = spa->spa_dsl_pool; in spa_prop_get_config()
413 ASSERT(MUTEX_HELD(&spa->spa_props_lock)); in spa_prop_get_config()
430 size - alloc, src); in spa_prop_get_config()
432 spa->spa_checkpoint_info.sci_dspace, src); in spa_prop_get_config()
456 rvd->vdev_state, src); in spa_prop_get_config()
477 if (pool->dp_free_dir != NULL) { in spa_prop_get_config()
479 dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes, in spa_prop_get_config()
486 if (pool->dp_leak_dir != NULL) { in spa_prop_get_config()
488 dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes, in spa_prop_get_config()
498 if (spa->spa_comment != NULL) { in spa_prop_get_config()
499 spa_prop_add_list(nv, ZPOOL_PROP_COMMENT, spa->spa_comment, in spa_prop_get_config()
503 if (spa->spa_compatibility != NULL) { in spa_prop_get_config()
505 spa->spa_compatibility, 0, ZPROP_SRC_LOCAL); in spa_prop_get_config()
508 if (spa->spa_root != NULL) in spa_prop_get_config()
509 spa_prop_add_list(nv, ZPOOL_PROP_ALTROOT, spa->spa_root, in spa_prop_get_config()
528 if ((dp = list_head(&spa->spa_config_list)) != NULL) { in spa_prop_get_config()
529 if (dp->scd_path == NULL) { in spa_prop_get_config()
532 } else if (strcmp(dp->scd_path, spa_config_path) != 0) { in spa_prop_get_config()
534 dp->scd_path, 0, ZPROP_SRC_LOCAL); in spa_prop_get_config()
545 objset_t *mos = spa->spa_meta_objset; in spa_prop_get()
554 mutex_enter(&spa->spa_props_lock); in spa_prop_get()
562 if (mos == NULL || spa->spa_pool_props_object == 0) in spa_prop_get()
568 for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); in spa_prop_get()
576 if ((prop = zpool_name_to_prop(za->za_name)) == in spa_prop_get()
577 ZPOOL_PROP_INVAL && !zfs_prop_user(za->za_name)) in spa_prop_get()
580 switch (za->za_integer_length) { in spa_prop_get()
583 if (za->za_first_integer != in spa_prop_get()
591 za->za_first_integer, FTAG, &ds); in spa_prop_get()
601 intval = za->za_first_integer; in spa_prop_get()
613 strval = kmem_alloc(za->za_num_integers, KM_SLEEP); in spa_prop_get()
614 err = zap_lookup(mos, spa->spa_pool_props_object, in spa_prop_get()
615 za->za_name, 1, za->za_num_integers, strval); in spa_prop_get()
617 kmem_free(strval, za->za_num_integers); in spa_prop_get()
624 spa_prop_add_user(nv, za->za_name, strval, in spa_prop_get()
627 kmem_free(strval, za->za_num_integers); in spa_prop_get()
636 mutex_exit(&spa->spa_props_lock); in spa_prop_get()
741 spa->spa_hostid = hostid; in spa_prop_validate()
762 if (!vdev_is_bootable(spa->spa_root_vdev)) { in spa_prop_validate()
802 * the user to change the in-core failmode property in spa_prop_validate()
810 spa->spa_failmode = intval; in spa_prop_validate()
831 ASSERT(slash != NULL); in spa_prop_validate()
889 dp->scd_path = spa_strdup(spa_config_path); in spa_configfile_set()
891 dp->scd_path = NULL; in spa_configfile_set()
893 dp->scd_path = spa_strdup(cachefile); in spa_configfile_set()
895 list_insert_head(&spa->spa_config_list, dp); in spa_configfile_set()
930 ASSERT(zpool_prop_feature(nvpair_name(elem))); in spa_prop_set()
945 error = dsl_sync_task(spa->spa_name, NULL, in spa_prop_set()
958 return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props, in spa_prop_set()
971 if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { in spa_prop_clear_bootfs()
972 VERIFY(zap_remove(spa->spa_meta_objset, in spa_prop_clear_bootfs()
973 spa->spa_pool_props_object, in spa_prop_clear_bootfs()
975 spa->spa_bootfs = 0; in spa_prop_clear_bootfs()
983 spa_t *spa = dmu_tx_pool(tx)->dp_spa; in spa_change_guid_check()
984 vdev_t *rvd = spa->spa_root_vdev; in spa_change_guid_check()
994 vdev_state = rvd->vdev_state; in spa_change_guid_check()
1009 spa_t *spa = dmu_tx_pool(tx)->dp_spa; in spa_change_guid_sync()
1011 vdev_t *rvd = spa->spa_root_vdev; in spa_change_guid_sync()
1016 rvd->vdev_guid = *newguid; in spa_change_guid_sync()
1017 rvd->vdev_guid_sum += (*newguid - oldguid); in spa_change_guid_sync()
1027 * re-import a pool built from a clone of our own vdevs. We will modify
1044 mutex_enter(&spa->spa_vdev_top_lock); in spa_change_guid()
1062 error = dsl_sync_task(spa->spa_name, spa_change_guid_check, in spa_change_guid()
1071 vdev_clear_kobj_evt(spa->spa_root_vdev); in spa_change_guid()
1072 for (int i = 0; i < spa->spa_l2cache.sav_count; i++) in spa_change_guid()
1073 vdev_clear_kobj_evt(spa->spa_l2cache.sav_vdevs[i]); in spa_change_guid()
1081 mutex_exit(&spa->spa_vdev_top_lock); in spa_change_guid()
1099 ret = memcmp(&sa->se_bookmark, &sb->se_bookmark, in spa_error_entry_compare()
1107 * re-initializes them in the process.
1112 ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); in spa_get_errlists()
1114 memcpy(last, &spa->spa_errlist_last, sizeof (avl_tree_t)); in spa_get_errlists()
1115 memcpy(scrub, &spa->spa_errlist_scrub, sizeof (avl_tree_t)); in spa_get_errlists()
1117 avl_create(&spa->spa_errlist_scrub, in spa_get_errlists()
1120 avl_create(&spa->spa_errlist_last, in spa_get_errlists()
1129 enum zti_modes mode = ztip->zti_mode; in spa_taskqs_init()
1130 uint_t value = ztip->zti_value; in spa_taskqs_init()
1131 uint_t count = ztip->zti_count; in spa_taskqs_init()
1132 spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; in spa_taskqs_init()
1149 count = MIN(count, spa->spa_alloc_count); in spa_taskqs_init()
1150 while (spa->spa_alloc_count % count != 0 && in spa_taskqs_init()
1151 spa->spa_alloc_count < count * 2) in spa_taskqs_init()
1152 count--; in spa_taskqs_init()
1180 * ------- ------- ------- ------- ------- in spa_taskqs_init()
1193 count--; in spa_taskqs_init()
1201 tqs->stqs_count = 0; in spa_taskqs_init()
1202 tqs->stqs_taskq = NULL; in spa_taskqs_init()
1213 tqs->stqs_count = count; in spa_taskqs_init()
1214 tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); in spa_taskqs_init()
1228 if (zio_taskq_sysdc && spa->spa_proc != &p0) { in spa_taskqs_init()
1231 spa->spa_proc, zio_taskq_basedc, flags); in spa_taskqs_init()
1243 INT_MAX, spa->spa_proc, flags); in spa_taskqs_init()
1248 tqs->stqs_taskq[i] = tq; in spa_taskqs_init()
1255 spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; in spa_taskqs_fini()
1257 if (tqs->stqs_taskq == NULL) { in spa_taskqs_fini()
1258 ASSERT3U(tqs->stqs_count, ==, 0); in spa_taskqs_fini()
1262 for (uint_t i = 0; i < tqs->stqs_count; i++) { in spa_taskqs_fini()
1263 ASSERT3P(tqs->stqs_taskq[i], !=, NULL); in spa_taskqs_fini()
1264 taskq_destroy(tqs->stqs_taskq[i]); in spa_taskqs_fini()
1267 kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *)); in spa_taskqs_fini()
1268 tqs->stqs_taskq = NULL; in spa_taskqs_fini()
1285 * 'null' can only be set on the high-priority queues (queue selection for
1286 * high-priority queues will fall back to the regular queue if the high-pri
1340 /* Per-mode setup */ in spa_taskq_param_set()
1410 * Can only null the high-priority queues; the general- in spa_taskq_param_set()
1460 if (zti->zti_mode == ZTI_MODE_FIXED) in spa_taskq_param_get()
1462 modes[zti->zti_mode], zti->zti_count, in spa_taskq_param_get()
1463 zti->zti_value); in spa_taskq_param_get()
1466 modes[zti->zti_mode]); in spa_taskq_param_get()
1484 return (-err); in spa_taskq_read_param_set()
1498 return (-err); in spa_taskq_write_param_set()
1507 * On FreeBSD load-time parameters can be set up before malloc() is available,
1520 if (err || req->newptr == NULL) in spa_taskq_read_param()
1533 if (err || req->newptr == NULL) in spa_taskq_write_param()
1549 spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; in spa_taskq_dispatch()
1552 ASSERT3P(tqs->stqs_taskq, !=, NULL); in spa_taskq_dispatch()
1553 ASSERT3U(tqs->stqs_count, !=, 0); in spa_taskq_dispatch()
1560 ASSERT(zio); in spa_taskq_dispatch()
1561 ASSERT(taskq_empty_ent(&zio->io_tqent)); in spa_taskq_dispatch()
1563 if (tqs->stqs_count == 1) { in spa_taskq_dispatch()
1564 tq = tqs->stqs_taskq[0]; in spa_taskq_dispatch()
1567 tq = tqs->stqs_taskq[zio->io_allocator % tqs->stqs_count]; in spa_taskq_dispatch()
1569 tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count]; in spa_taskq_dispatch()
1573 &zio->io_tqent); in spa_taskq_dispatch()
1596 CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, in spa_thread()
1597 spa->spa_name); in spa_thread()
1599 ASSERT(curproc != &p0); in spa_thread()
1600 (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), in spa_thread()
1601 "zpool-%s", spa->spa_name); in spa_thread()
1602 (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); in spa_thread()
1609 mutex_enter(&curproc->p_lock); in spa_thread()
1613 curthread->t_bind_pset = zio_taskq_psrset_bind; in spa_thread()
1617 "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); in spa_thread()
1620 mutex_exit(&curproc->p_lock); in spa_thread()
1632 spa->spa_proc = curproc; in spa_thread()
1633 spa->spa_did = curthread->t_did; in spa_thread()
1637 mutex_enter(&spa->spa_proc_lock); in spa_thread()
1638 ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); in spa_thread()
1640 spa->spa_proc_state = SPA_PROC_ACTIVE; in spa_thread()
1641 cv_broadcast(&spa->spa_proc_cv); in spa_thread()
1644 while (spa->spa_proc_state == SPA_PROC_ACTIVE) in spa_thread()
1645 cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); in spa_thread()
1646 CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); in spa_thread()
1648 ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); in spa_thread()
1649 spa->spa_proc_state = SPA_PROC_GONE; in spa_thread()
1650 spa->spa_proc = &p0; in spa_thread()
1651 cv_broadcast(&spa->spa_proc_cv); in spa_thread()
1654 mutex_enter(&curproc->p_lock); in spa_thread()
1668 ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); in spa_activate()
1670 spa->spa_state = POOL_STATE_ACTIVE; in spa_activate()
1671 spa->spa_final_txg = UINT64_MAX; in spa_activate()
1672 spa->spa_mode = mode; in spa_activate()
1673 spa->spa_read_spacemaps = spa_mode_readable_spacemaps; in spa_activate()
1675 spa->spa_normal_class = metaslab_class_create(spa, "normal", in spa_activate()
1677 spa->spa_log_class = metaslab_class_create(spa, "log", msp, B_TRUE); in spa_activate()
1678 spa->spa_embedded_log_class = metaslab_class_create(spa, in spa_activate()
1680 spa->spa_special_class = metaslab_class_create(spa, "special", in spa_activate()
1682 spa->spa_dedup_class = metaslab_class_create(spa, "dedup", in spa_activate()
1686 mutex_enter(&spa->spa_proc_lock); in spa_activate()
1687 ASSERT(spa->spa_proc_state == SPA_PROC_NONE); in spa_activate()
1688 ASSERT(spa->spa_proc == &p0); in spa_activate()
1689 spa->spa_did = 0; in spa_activate()
1693 if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { in spa_activate()
1696 spa->spa_proc_state = SPA_PROC_CREATED; in spa_activate()
1697 while (spa->spa_proc_state == SPA_PROC_CREATED) { in spa_activate()
1698 cv_wait(&spa->spa_proc_cv, in spa_activate()
1699 &spa->spa_proc_lock); in spa_activate()
1701 ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); in spa_activate()
1702 ASSERT(spa->spa_proc != &p0); in spa_activate()
1703 ASSERT(spa->spa_did != 0); in spa_activate()
1708 spa->spa_name); in spa_activate()
1713 mutex_exit(&spa->spa_proc_lock); in spa_activate()
1716 if (spa->spa_proc == &p0) { in spa_activate()
1721 spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL, in spa_activate()
1725 list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), in spa_activate()
1727 list_create(&spa->spa_evicting_os_list, sizeof (objset_t), in spa_activate()
1729 list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), in spa_activate()
1732 txg_list_create(&spa->spa_vdev_txg_list, spa, in spa_activate()
1735 avl_create(&spa->spa_errlist_scrub, in spa_activate()
1738 avl_create(&spa->spa_errlist_last, in spa_activate()
1741 avl_create(&spa->spa_errlist_healed, in spa_activate()
1747 spa_keystore_init(&spa->spa_keystore); in spa_activate()
1750 * This taskq is used to perform zvol-minor-related tasks in spa_activate()
1764 spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri, in spa_activate()
1770 spa->spa_metaslab_taskq = taskq_create("z_metaslab", in spa_activate()
1779 spa->spa_prefetch_taskq = taskq_create("z_prefetch", 100, in spa_activate()
1786 spa->spa_upgrade_taskq = taskq_create("z_upgrade", 100, in spa_activate()
1796 ASSERT(spa->spa_sync_on == B_FALSE); in spa_deactivate()
1797 ASSERT(spa->spa_dsl_pool == NULL); in spa_deactivate()
1798 ASSERT(spa->spa_root_vdev == NULL); in spa_deactivate()
1799 ASSERT(spa->spa_async_zio_root == NULL); in spa_deactivate()
1800 ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); in spa_deactivate()
1804 if (spa->spa_zvol_taskq) { in spa_deactivate()
1805 taskq_destroy(spa->spa_zvol_taskq); in spa_deactivate()
1806 spa->spa_zvol_taskq = NULL; in spa_deactivate()
1809 if (spa->spa_metaslab_taskq) { in spa_deactivate()
1810 taskq_destroy(spa->spa_metaslab_taskq); in spa_deactivate()
1811 spa->spa_metaslab_taskq = NULL; in spa_deactivate()
1814 if (spa->spa_prefetch_taskq) { in spa_deactivate()
1815 taskq_destroy(spa->spa_prefetch_taskq); in spa_deactivate()
1816 spa->spa_prefetch_taskq = NULL; in spa_deactivate()
1819 if (spa->spa_upgrade_taskq) { in spa_deactivate()
1820 taskq_destroy(spa->spa_upgrade_taskq); in spa_deactivate()
1821 spa->spa_upgrade_taskq = NULL; in spa_deactivate()
1824 txg_list_destroy(&spa->spa_vdev_txg_list); in spa_deactivate()
1826 list_destroy(&spa->spa_config_dirty_list); in spa_deactivate()
1827 list_destroy(&spa->spa_evicting_os_list); in spa_deactivate()
1828 list_destroy(&spa->spa_state_dirty_list); in spa_deactivate()
1830 taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid); in spa_deactivate()
1839 ASSERT3P(spa->spa_txg_zio[i], !=, NULL); in spa_deactivate()
1840 VERIFY0(zio_wait(spa->spa_txg_zio[i])); in spa_deactivate()
1841 spa->spa_txg_zio[i] = NULL; in spa_deactivate()
1844 metaslab_class_destroy(spa->spa_normal_class); in spa_deactivate()
1845 spa->spa_normal_class = NULL; in spa_deactivate()
1847 metaslab_class_destroy(spa->spa_log_class); in spa_deactivate()
1848 spa->spa_log_class = NULL; in spa_deactivate()
1850 metaslab_class_destroy(spa->spa_embedded_log_class); in spa_deactivate()
1851 spa->spa_embedded_log_class = NULL; in spa_deactivate()
1853 metaslab_class_destroy(spa->spa_special_class); in spa_deactivate()
1854 spa->spa_special_class = NULL; in spa_deactivate()
1856 metaslab_class_destroy(spa->spa_dedup_class); in spa_deactivate()
1857 spa->spa_dedup_class = NULL; in spa_deactivate()
1864 avl_destroy(&spa->spa_errlist_scrub); in spa_deactivate()
1865 avl_destroy(&spa->spa_errlist_last); in spa_deactivate()
1866 avl_destroy(&spa->spa_errlist_healed); in spa_deactivate()
1868 spa_keystore_fini(&spa->spa_keystore); in spa_deactivate()
1870 spa->spa_state = POOL_STATE_UNINITIALIZED; in spa_deactivate()
1872 mutex_enter(&spa->spa_proc_lock); in spa_deactivate()
1873 if (spa->spa_proc_state != SPA_PROC_NONE) { in spa_deactivate()
1874 ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); in spa_deactivate()
1875 spa->spa_proc_state = SPA_PROC_DEACTIVATE; in spa_deactivate()
1876 cv_broadcast(&spa->spa_proc_cv); in spa_deactivate()
1877 while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { in spa_deactivate()
1878 ASSERT(spa->spa_proc != &p0); in spa_deactivate()
1879 cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); in spa_deactivate()
1881 ASSERT(spa->spa_proc_state == SPA_PROC_GONE); in spa_deactivate()
1882 spa->spa_proc_state = SPA_PROC_NONE; in spa_deactivate()
1884 ASSERT(spa->spa_proc == &p0); in spa_deactivate()
1885 mutex_exit(&spa->spa_proc_lock); in spa_deactivate()
1892 if (spa->spa_did != 0) { in spa_deactivate()
1893 thread_join(spa->spa_did); in spa_deactivate()
1894 spa->spa_did = 0; in spa_deactivate()
1918 if ((*vdp)->vdev_ops->vdev_op_leaf) in spa_config_parse()
1943 ASSERT(*vdp != NULL); in spa_config_parse()
1957 if (!spa->spa_sync_on) in spa_should_flush_logs_on_unload()
1976 dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); in spa_unload_log_sm_flush_all()
1979 ASSERT3U(spa->spa_log_flushall_txg, ==, 0); in spa_unload_log_sm_flush_all()
1980 spa->spa_log_flushall_txg = dmu_tx_get_txg(tx); in spa_unload_log_sm_flush_all()
1983 txg_wait_synced(spa_get_dsl(spa), spa->spa_log_flushall_txg); in spa_unload_log_sm_flush_all()
1993 while ((sls = avl_destroy_nodes(&spa->spa_sm_logs_by_txg, in spa_unload_log_sm_metadata()
1995 VERIFY0(sls->sls_mscount); in spa_unload_log_sm_metadata()
1999 while ((e = list_remove_head(&spa->spa_log_summary)) != NULL) { in spa_unload_log_sm_metadata()
2000 VERIFY0(e->lse_mscount); in spa_unload_log_sm_metadata()
2004 spa->spa_unflushed_stats.sus_nblocks = 0; in spa_unload_log_sm_metadata()
2005 spa->spa_unflushed_stats.sus_memused = 0; in spa_unload_log_sm_metadata()
2006 spa->spa_unflushed_stats.sus_blocklimit = 0; in spa_unload_log_sm_metadata()
2012 if (spa->spa_condense_zthr != NULL) { in spa_destroy_aux_threads()
2013 zthr_destroy(spa->spa_condense_zthr); in spa_destroy_aux_threads()
2014 spa->spa_condense_zthr = NULL; in spa_destroy_aux_threads()
2016 if (spa->spa_checkpoint_discard_zthr != NULL) { in spa_destroy_aux_threads()
2017 zthr_destroy(spa->spa_checkpoint_discard_zthr); in spa_destroy_aux_threads()
2018 spa->spa_checkpoint_discard_zthr = NULL; in spa_destroy_aux_threads()
2020 if (spa->spa_livelist_delete_zthr != NULL) { in spa_destroy_aux_threads()
2021 zthr_destroy(spa->spa_livelist_delete_zthr); in spa_destroy_aux_threads()
2022 spa->spa_livelist_delete_zthr = NULL; in spa_destroy_aux_threads()
2024 if (spa->spa_livelist_condense_zthr != NULL) { in spa_destroy_aux_threads()
2025 zthr_destroy(spa->spa_livelist_condense_zthr); in spa_destroy_aux_threads()
2026 spa->spa_livelist_condense_zthr = NULL; in spa_destroy_aux_threads()
2028 if (spa->spa_raidz_expand_zthr != NULL) { in spa_destroy_aux_threads()
2029 zthr_destroy(spa->spa_raidz_expand_zthr); in spa_destroy_aux_threads()
2030 spa->spa_raidz_expand_zthr = NULL; in spa_destroy_aux_threads()
2040 ASSERT(MUTEX_HELD(&spa_namespace_lock) || in spa_unload()
2041 spa->spa_export_thread == curthread); in spa_unload()
2042 ASSERT(spa_state(spa) != POOL_STATE_UNINITIALIZED); in spa_unload()
2054 if (spa->spa_final_txg == UINT64_MAX) { in spa_unload()
2069 if (spa->spa_root_vdev) { in spa_unload()
2070 vdev_t *root_vdev = spa->spa_root_vdev; in spa_unload()
2080 spa->spa_final_txg = spa_last_synced_txg(spa) + in spa_unload()
2088 if (spa->spa_sync_on) { in spa_unload()
2089 txg_sync_stop(spa->spa_dsl_pool); in spa_unload()
2090 spa->spa_sync_on = B_FALSE; in spa_unload()
2097 taskq_wait(spa->spa_metaslab_taskq); in spa_unload()
2099 if (spa->spa_mmp.mmp_thread) in spa_unload()
2105 if (spa->spa_async_zio_root != NULL) { in spa_unload()
2107 (void) zio_wait(spa->spa_async_zio_root[i]); in spa_unload()
2108 kmem_free(spa->spa_async_zio_root, max_ncpus * sizeof (void *)); in spa_unload()
2109 spa->spa_async_zio_root = NULL; in spa_unload()
2112 if (spa->spa_vdev_removal != NULL) { in spa_unload()
2113 spa_vdev_removal_destroy(spa->spa_vdev_removal); in spa_unload()
2114 spa->spa_vdev_removal = NULL; in spa_unload()
2121 bpobj_close(&spa->spa_deferred_bpobj); in spa_unload()
2128 if (spa->spa_root_vdev) in spa_unload()
2129 vdev_free(spa->spa_root_vdev); in spa_unload()
2130 ASSERT(spa->spa_root_vdev == NULL); in spa_unload()
2135 if (spa->spa_dsl_pool) { in spa_unload()
2136 dsl_pool_close(spa->spa_dsl_pool); in spa_unload()
2137 spa->spa_dsl_pool = NULL; in spa_unload()
2138 spa->spa_meta_objset = NULL; in spa_unload()
2150 if (spa->spa_spares.sav_vdevs) { in spa_unload()
2151 for (int i = 0; i < spa->spa_spares.sav_count; i++) in spa_unload()
2152 vdev_free(spa->spa_spares.sav_vdevs[i]); in spa_unload()
2153 kmem_free(spa->spa_spares.sav_vdevs, in spa_unload()
2154 spa->spa_spares.sav_count * sizeof (void *)); in spa_unload()
2155 spa->spa_spares.sav_vdevs = NULL; in spa_unload()
2157 if (spa->spa_spares.sav_config) { in spa_unload()
2158 nvlist_free(spa->spa_spares.sav_config); in spa_unload()
2159 spa->spa_spares.sav_config = NULL; in spa_unload()
2161 spa->spa_spares.sav_count = 0; in spa_unload()
2163 if (spa->spa_l2cache.sav_vdevs) { in spa_unload()
2164 for (int i = 0; i < spa->spa_l2cache.sav_count; i++) { in spa_unload()
2165 vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); in spa_unload()
2166 vdev_free(spa->spa_l2cache.sav_vdevs[i]); in spa_unload()
2168 kmem_free(spa->spa_l2cache.sav_vdevs, in spa_unload()
2169 spa->spa_l2cache.sav_count * sizeof (void *)); in spa_unload()
2170 spa->spa_l2cache.sav_vdevs = NULL; in spa_unload()
2172 if (spa->spa_l2cache.sav_config) { in spa_unload()
2173 nvlist_free(spa->spa_l2cache.sav_config); in spa_unload()
2174 spa->spa_l2cache.sav_config = NULL; in spa_unload()
2176 spa->spa_l2cache.sav_count = 0; in spa_unload()
2178 spa->spa_async_suspended = 0; in spa_unload()
2180 spa->spa_indirect_vdevs_loaded = B_FALSE; in spa_unload()
2182 if (spa->spa_comment != NULL) { in spa_unload()
2183 spa_strfree(spa->spa_comment); in spa_unload()
2184 spa->spa_comment = NULL; in spa_unload()
2186 if (spa->spa_compatibility != NULL) { in spa_unload()
2187 spa_strfree(spa->spa_compatibility); in spa_unload()
2188 spa->spa_compatibility = NULL; in spa_unload()
2191 spa->spa_raidz_expand = NULL; in spa_unload()
2192 spa->spa_checkpoint_txg = 0; in spa_unload()
2198 * Load (or re-load) the current list of vdevs describing the active spares for
2201 * then re-generate a more complete list including status information.
2223 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); in spa_load_spares()
2228 if (spa->spa_spares.sav_vdevs) { in spa_load_spares()
2229 for (i = 0; i < spa->spa_spares.sav_count; i++) { in spa_load_spares()
2230 vd = spa->spa_spares.sav_vdevs[i]; in spa_load_spares()
2233 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, in spa_load_spares()
2234 B_FALSE)) != NULL && tvd->vdev_isspare) in spa_load_spares()
2240 kmem_free(spa->spa_spares.sav_vdevs, in spa_load_spares()
2241 spa->spa_spares.sav_count * sizeof (void *)); in spa_load_spares()
2244 if (spa->spa_spares.sav_config == NULL) in spa_load_spares()
2247 VERIFY0(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, in spa_load_spares()
2250 spa->spa_spares.sav_count = (int)nspares; in spa_load_spares()
2251 spa->spa_spares.sav_vdevs = NULL; in spa_load_spares()
2265 spa->spa_spares.sav_vdevs = kmem_zalloc(nspares * sizeof (void *), in spa_load_spares()
2267 for (i = 0; i < spa->spa_spares.sav_count; i++) { in spa_load_spares()
2270 ASSERT(vd != NULL); in spa_load_spares()
2272 spa->spa_spares.sav_vdevs[i] = vd; in spa_load_spares()
2274 if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, in spa_load_spares()
2276 if (!tvd->vdev_isspare) in spa_load_spares()
2296 vd->vdev_top = vd; in spa_load_spares()
2297 vd->vdev_aux = &spa->spa_spares; in spa_load_spares()
2310 fnvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES); in spa_load_spares()
2312 spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), in spa_load_spares()
2314 for (i = 0; i < spa->spa_spares.sav_count; i++) in spa_load_spares()
2316 spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); in spa_load_spares()
2317 fnvlist_add_nvlist_array(spa->spa_spares.sav_config, in spa_load_spares()
2319 spa->spa_spares.sav_count); in spa_load_spares()
2320 for (i = 0; i < spa->spa_spares.sav_count; i++) in spa_load_spares()
2322 kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); in spa_load_spares()
2326 * Load (or re-load) the current list of vdevs describing the active l2cache for
2329 * then re-generate a more complete list including status information.
2331 * not re-opened.
2341 spa_aux_vdev_t *sav = &spa->spa_l2cache; in spa_load_l2cache()
2356 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); in spa_load_l2cache()
2358 oldvdevs = sav->sav_vdevs; in spa_load_l2cache()
2359 oldnvdevs = sav->sav_count; in spa_load_l2cache()
2360 sav->sav_vdevs = NULL; in spa_load_l2cache()
2361 sav->sav_count = 0; in spa_load_l2cache()
2363 if (sav->sav_config == NULL) { in spa_load_l2cache()
2369 VERIFY0(nvlist_lookup_nvlist_array(sav->sav_config, in spa_load_l2cache()
2382 if (vd != NULL && guid == vd->vdev_guid) { in spa_load_l2cache()
2398 ASSERT(vd != NULL); in spa_load_l2cache()
2407 vd->vdev_top = vd; in spa_load_l2cache()
2408 vd->vdev_aux = sav; in spa_load_l2cache()
2431 sav->sav_vdevs = newvdevs; in spa_load_l2cache()
2432 sav->sav_count = (int)nl2cache; in spa_load_l2cache()
2438 fnvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE); in spa_load_l2cache()
2440 if (sav->sav_count > 0) in spa_load_l2cache()
2441 l2cache = kmem_alloc(sav->sav_count * sizeof (void *), in spa_load_l2cache()
2443 for (i = 0; i < sav->sav_count; i++) in spa_load_l2cache()
2445 sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); in spa_load_l2cache()
2446 fnvlist_add_nvlist_array(sav->sav_config, ZPOOL_CONFIG_L2CACHE, in spa_load_l2cache()
2447 (const nvlist_t * const *)l2cache, sav->sav_count); in spa_load_l2cache()
2459 ASSERT(vd->vdev_isl2cache); in spa_load_l2cache()
2461 if (spa_l2cache_exists(vd->vdev_guid, &pool) && in spa_load_l2cache()
2472 for (i = 0; i < sav->sav_count; i++) in spa_load_l2cache()
2474 if (sav->sav_count) in spa_load_l2cache()
2475 kmem_free(l2cache, sav->sav_count * sizeof (void *)); in spa_load_l2cache()
2487 error = dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db); in load_nvlist()
2491 nvsize = *(uint64_t *)db->db_data; in load_nvlist()
2495 error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, in load_nvlist()
2505 * Concrete top-level vdevs that are not missing and are not logs. At every
2511 vdev_t *rvd = spa->spa_root_vdev; in spa_healthy_core_tvds()
2514 for (uint64_t i = 0; i < rvd->vdev_children; i++) { in spa_healthy_core_tvds()
2515 vdev_t *vd = rvd->vdev_child[i]; in spa_healthy_core_tvds()
2516 if (vd->vdev_islog) in spa_healthy_core_tvds()
2532 for (uint64_t c = 0; c < vd->vdev_children; c++) in spa_check_removed()
2533 spa_check_removed(vd->vdev_child[c]); in spa_check_removed()
2535 if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) && in spa_check_removed()
2537 zfs_post_autoreplace(vd->vdev_spa, vd); in spa_check_removed()
2538 spa_event_notify(vd->vdev_spa, vd, NULL, ESC_ZFS_VDEV_CHECK); in spa_check_removed()
2545 vdev_t *rvd = spa->spa_root_vdev; in spa_check_for_missing_logs()
2552 if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { in spa_check_for_missing_logs()
2556 child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t *), in spa_check_for_missing_logs()
2560 for (uint64_t c = 0; c < rvd->vdev_children; c++) { in spa_check_for_missing_logs()
2561 vdev_t *tvd = rvd->vdev_child[c]; in spa_check_for_missing_logs()
2568 if (tvd->vdev_islog && in spa_check_for_missing_logs()
2569 tvd->vdev_state == VDEV_STATE_CANT_OPEN) { in spa_check_for_missing_logs()
2578 fnvlist_add_nvlist(spa->spa_load_info, in spa_check_for_missing_logs()
2585 kmem_free(child, rvd->vdev_children * sizeof (char **)); in spa_check_for_missing_logs()
2593 for (uint64_t c = 0; c < rvd->vdev_children; c++) { in spa_check_for_missing_logs()
2594 vdev_t *tvd = rvd->vdev_child[c]; in spa_check_for_missing_logs()
2596 if (tvd->vdev_islog && in spa_check_for_missing_logs()
2597 tvd->vdev_state == VDEV_STATE_CANT_OPEN) { in spa_check_for_missing_logs()
2619 switch (spa->spa_log_state) { in spa_check_logs()
2625 rv = (dmu_objset_find_dp(dp, dp->dp_root_dir_obj, in spa_check_logs()
2640 vdev_t *rvd = spa->spa_root_vdev; in spa_passivate_log()
2643 ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); in spa_passivate_log()
2645 for (int c = 0; c < rvd->vdev_children; c++) { in spa_passivate_log()
2646 vdev_t *tvd = rvd->vdev_child[c]; in spa_passivate_log()
2648 if (tvd->vdev_islog) { in spa_passivate_log()
2649 ASSERT3P(tvd->vdev_log_mg, ==, NULL); in spa_passivate_log()
2650 metaslab_group_passivate(tvd->vdev_mg); in spa_passivate_log()
2664 vdev_t *rvd = spa->spa_root_vdev; in spa_activate_log()
2666 ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); in spa_activate_log()
2668 for (int c = 0; c < rvd->vdev_children; c++) { in spa_activate_log()
2669 vdev_t *tvd = rvd->vdev_child[c]; in spa_activate_log()
2671 if (tvd->vdev_islog) { in spa_activate_log()
2672 ASSERT3P(tvd->vdev_log_mg, ==, NULL); in spa_activate_log()
2673 metaslab_group_activate(tvd->vdev_mg); in spa_activate_log()
2691 txg_wait_synced(spa->spa_dsl_pool, 0); in spa_reset_logs()
2699 for (int i = 0; i < sav->sav_count; i++) in spa_aux_check_removed()
2700 spa_check_removed(sav->sav_vdevs[i]); in spa_aux_check_removed()
2706 spa_t *spa = zio->io_spa; in spa_claim_notify()
2708 if (zio->io_error) in spa_claim_notify()
2711 mutex_enter(&spa->spa_props_lock); /* any mutex will do */ in spa_claim_notify()
2712 if (spa->spa_claim_max_txg < BP_GET_LOGICAL_BIRTH(zio->io_bp)) in spa_claim_notify()
2713 spa->spa_claim_max_txg = BP_GET_LOGICAL_BIRTH(zio->io_bp); in spa_claim_notify()
2714 mutex_exit(&spa->spa_props_lock); in spa_claim_notify()
2726 blkptr_t *bp = zio->io_bp; in spa_load_verify_done()
2727 spa_load_error_t *sle = zio->io_private; in spa_load_verify_done()
2729 int error = zio->io_error; in spa_load_verify_done()
2730 spa_t *spa = zio->io_spa; in spa_load_verify_done()
2732 abd_free(zio->io_abd); in spa_load_verify_done()
2736 atomic_inc_64(&sle->sle_meta_count); in spa_load_verify_done()
2738 atomic_inc_64(&sle->sle_data_count); in spa_load_verify_done()
2741 mutex_enter(&spa->spa_scrub_lock); in spa_load_verify_done()
2742 spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp); in spa_load_verify_done()
2743 cv_broadcast(&spa->spa_scrub_io_cv); in spa_load_verify_done()
2744 mutex_exit(&spa->spa_scrub_lock); in spa_load_verify_done()
2760 spa_load_error_t *sle = rio->io_private; in spa_load_verify_cb()
2779 atomic_inc_64(&sle->sle_meta_count); in spa_load_verify_cb()
2783 if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) || in spa_load_verify_cb()
2788 (!spa_load_verify_data || !sle->sle_verify_data)) in spa_load_verify_cb()
2795 mutex_enter(&spa->spa_scrub_lock); in spa_load_verify_cb()
2796 while (spa->spa_load_verify_bytes >= maxinflight_bytes) in spa_load_verify_cb()
2797 cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); in spa_load_verify_cb()
2798 spa->spa_load_verify_bytes += size; in spa_load_verify_cb()
2799 mutex_exit(&spa->spa_scrub_lock); in spa_load_verify_cb()
2802 spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, in spa_load_verify_cb()
2828 zpool_get_load_policy(spa->spa_config, &policy); in spa_load_verify()
2834 dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); in spa_load_verify()
2835 error = dmu_objset_find_dp(spa->spa_dsl_pool, in spa_load_verify()
2836 spa->spa_dsl_pool->dp_root_dir_obj, verify_dataset_name_len, NULL, in spa_load_verify()
2838 dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); in spa_load_verify()
2853 if (spa->spa_extreme_rewind) { in spa_load_verify()
2861 error = traverse_pool(spa, spa->spa_verify_min_txg, in spa_load_verify()
2867 ASSERT0(spa->spa_load_verify_bytes); in spa_load_verify()
2869 spa->spa_load_meta_errors = sle.sle_meta_count; in spa_load_verify()
2870 spa->spa_load_data_errors = sle.sle_data_count; in spa_load_verify()
2884 spa->spa_load_txg = spa->spa_uberblock.ub_txg; in spa_load_verify()
2885 spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; in spa_load_verify()
2887 loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts; in spa_load_verify()
2888 fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_LOAD_TIME, in spa_load_verify()
2889 spa->spa_load_txg_ts); in spa_load_verify()
2890 fnvlist_add_int64(spa->spa_load_info, ZPOOL_CONFIG_REWIND_TIME, in spa_load_verify()
2892 fnvlist_add_uint64(spa->spa_load_info, in spa_load_verify()
2894 fnvlist_add_uint64(spa->spa_load_info, in spa_load_verify()
2897 spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; in spa_load_verify()
2918 (void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object, in spa_prop_find()
2928 int error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, in spa_dir_prop()
2949 return (spa->spa_livelists_to_delete != 0); in spa_livelist_delete_check()
2964 zio_free(spa, tx->tx_txg, bp); in delete_blkptr_cb()
2965 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD, in delete_blkptr_cb()
2966 -bp_get_dsize_sync(spa, bp), in delete_blkptr_cb()
2967 -BP_GET_PSIZE(bp), -BP_GET_UCSIZE(bp), tx); in delete_blkptr_cb()
2981 *llp = za->za_first_integer; in dsl_get_next_livelist_obj()
2988 * context: freeing block pointers and updating the pool-wide data
3002 spa_t *spa = sda->spa; in sublist_delete_sync()
3003 dsl_deadlist_t *ll = sda->ll; in sublist_delete_sync()
3004 uint64_t key = sda->key; in sublist_delete_sync()
3005 bplist_t *to_free = sda->to_free; in sublist_delete_sync()
3021 spa_t *spa = lda->spa; in livelist_delete_sync()
3022 uint64_t ll_obj = lda->ll_obj; in livelist_delete_sync()
3023 uint64_t zap_obj = lda->zap_obj; in livelist_delete_sync()
3024 objset_t *mos = spa->spa_meta_objset; in livelist_delete_sync()
3037 spa->spa_livelists_to_delete = 0; in livelist_delete_sync()
3046 * the pool-wide livelist data.
3053 objset_t *mos = spa->spa_meta_objset; in spa_livelist_delete_cb()
3054 uint64_t zap_obj = spa->spa_livelists_to_delete; in spa_livelist_delete_cb()
3070 int err = dsl_process_sub_livelist(&dle->dle_bpobj, &to_free, in spa_livelist_delete_cb()
3076 .key = dle->dle_mintxg, in spa_livelist_delete_cb()
3081 (u_longlong_t)dle->dle_bpobj.bpo_object, in spa_livelist_delete_cb()
3082 (u_longlong_t)ll_obj, (longlong_t)count - 1); in spa_livelist_delete_cb()
3109 ASSERT3P(spa->spa_livelist_delete_zthr, ==, NULL); in spa_start_livelist_destroy_thread()
3110 spa->spa_livelist_delete_zthr = in spa_start_livelist_destroy_thread()
3125 ASSERT(tx == NULL); in livelist_track_new_cb()
3128 bplist_append(lna->frees, bp); in livelist_track_new_cb()
3130 bplist_append(lna->allocs, bp); in livelist_track_new_cb()
3147 spa_t *spa = lca->spa; in spa_livelist_condense_sync()
3149 dsl_dataset_t *ds = spa->spa_to_condense.ds; in spa_livelist_condense_sync()
3152 if (spa->spa_to_condense.cancelled) { in spa_livelist_condense_sync()
3157 dsl_deadlist_entry_t *first = spa->spa_to_condense.first; in spa_livelist_condense_sync()
3158 dsl_deadlist_entry_t *next = spa->spa_to_condense.next; in spa_livelist_condense_sync()
3159 dsl_deadlist_t *ll = &ds->ds_dir->dd_livelist; in spa_livelist_condense_sync()
3169 uint64_t first_obj = first->dle_bpobj.bpo_object; in spa_livelist_condense_sync()
3170 uint64_t next_obj = next->dle_bpobj.bpo_object; in spa_livelist_condense_sync()
3171 uint64_t cur_first_size = first->dle_bpobj.bpo_phys->bpo_num_blkptrs; in spa_livelist_condense_sync()
3172 uint64_t cur_next_size = next->dle_bpobj.bpo_phys->bpo_num_blkptrs; in spa_livelist_condense_sync()
3176 .allocs = &lca->to_keep, in spa_livelist_condense_sync()
3180 if (cur_first_size > lca->first_size) { in spa_livelist_condense_sync()
3181 VERIFY0(livelist_bpobj_iterate_from_nofree(&first->dle_bpobj, in spa_livelist_condense_sync()
3182 livelist_track_new_cb, &new_bps, lca->first_size)); in spa_livelist_condense_sync()
3184 if (cur_next_size > lca->next_size) { in spa_livelist_condense_sync()
3185 VERIFY0(livelist_bpobj_iterate_from_nofree(&next->dle_bpobj, in spa_livelist_condense_sync()
3186 livelist_track_new_cb, &new_bps, lca->next_size)); in spa_livelist_condense_sync()
3190 ASSERT(bpobj_is_empty(&first->dle_bpobj)); in spa_livelist_condense_sync()
3191 dsl_deadlist_remove_entry(ll, next->dle_mintxg, tx); in spa_livelist_condense_sync()
3193 bplist_iterate(&lca->to_keep, dsl_deadlist_insert_alloc_cb, ll, tx); in spa_livelist_condense_sync()
3200 "(%llu blkptrs) and bpobj %llu (%llu blkptrs) -> bpobj %llu " in spa_livelist_condense_sync()
3201 "(%llu blkptrs)", (u_longlong_t)tx->tx_txg, dsname, in spa_livelist_condense_sync()
3202 (u_longlong_t)ds->ds_object, (u_longlong_t)first_obj, in spa_livelist_condense_sync()
3205 (u_longlong_t)first->dle_bpobj.bpo_object, in spa_livelist_condense_sync()
3206 (u_longlong_t)first->dle_bpobj.bpo_phys->bpo_num_blkptrs); in spa_livelist_condense_sync()
3208 dmu_buf_rele(ds->ds_dbuf, spa); in spa_livelist_condense_sync()
3209 spa->spa_to_condense.ds = NULL; in spa_livelist_condense_sync()
3210 bplist_clear(&lca->to_keep); in spa_livelist_condense_sync()
3211 bplist_destroy(&lca->to_keep); in spa_livelist_condense_sync()
3213 spa->spa_to_condense.syncing = B_FALSE; in spa_livelist_condense_sync()
3224 dsl_deadlist_entry_t *first = spa->spa_to_condense.first; in spa_livelist_condense_cb()
3225 dsl_deadlist_entry_t *next = spa->spa_to_condense.next; in spa_livelist_condense_cb()
3230 bplist_create(&lca->to_keep); in spa_livelist_condense_cb()
3247 int err = dsl_process_sub_livelist(&first->dle_bpobj, &lca->to_keep, t, in spa_livelist_condense_cb()
3250 err = dsl_process_sub_livelist(&next->dle_bpobj, &lca->to_keep, in spa_livelist_condense_cb()
3258 dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); in spa_livelist_condense_cb()
3267 spa->spa_to_condense.syncing = B_TRUE; in spa_livelist_condense_cb()
3268 lca->spa = spa; in spa_livelist_condense_cb()
3269 lca->first_size = first_size; in spa_livelist_condense_cb()
3270 lca->next_size = next_size; in spa_livelist_condense_cb()
3283 ASSERT(err != 0); in spa_livelist_condense_cb()
3284 bplist_clear(&lca->to_keep); in spa_livelist_condense_cb()
3285 bplist_destroy(&lca->to_keep); in spa_livelist_condense_cb()
3287 dmu_buf_rele(spa->spa_to_condense.ds->ds_dbuf, spa); in spa_livelist_condense_cb()
3288 spa->spa_to_condense.ds = NULL; in spa_livelist_condense_cb()
3302 if ((spa->spa_to_condense.ds != NULL) && in spa_livelist_condense_cb_check()
3303 (spa->spa_to_condense.syncing == B_FALSE) && in spa_livelist_condense_cb_check()
3304 (spa->spa_to_condense.cancelled == B_FALSE)) { in spa_livelist_condense_cb_check()
3313 spa->spa_to_condense.ds = NULL; in spa_start_livelist_condensing_thread()
3314 spa->spa_to_condense.first = NULL; in spa_start_livelist_condensing_thread()
3315 spa->spa_to_condense.next = NULL; in spa_start_livelist_condensing_thread()
3316 spa->spa_to_condense.syncing = B_FALSE; in spa_start_livelist_condensing_thread()
3317 spa->spa_to_condense.cancelled = B_FALSE; in spa_start_livelist_condensing_thread()
3319 ASSERT3P(spa->spa_livelist_condense_zthr, ==, NULL); in spa_start_livelist_condensing_thread()
3320 spa->spa_livelist_condense_zthr = in spa_start_livelist_condensing_thread()
3329 ASSERT(spa_writeable(spa)); in spa_spawn_aux_threads()
3336 ASSERT3P(spa->spa_checkpoint_discard_zthr, ==, NULL); in spa_spawn_aux_threads()
3337 spa->spa_checkpoint_discard_zthr = in spa_spawn_aux_threads()
3344 * Fix up config after a partly-completed split. This is done with the
3345 * ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off
3393 /* attempt to re-online it */ in spa_try_repair()
3394 vd[i]->vdev_offline = B_FALSE; in spa_try_repair()
3399 vdev_reopen(spa->spa_root_vdev); in spa_try_repair()
3404 vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL) in spa_try_repair()
3419 vdev_reopen(spa->spa_root_vdev); in spa_try_repair()
3431 spa->spa_load_state = state; in spa_load()
3436 gethrestime(&spa->spa_loaded_ts); in spa_load()
3444 spa->spa_minref = zfs_refcount_count(&spa->spa_refcount); in spa_load()
3447 spa->spa_loaded_ts.tv_sec = 0; in spa_load()
3448 spa->spa_loaded_ts.tv_nsec = 0; in spa_load()
3455 spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; in spa_load()
3456 spa->spa_ena = 0; in spa_load()
3466 * Count the number of per-vdev ZAPs associated with all of the vdevs in the
3468 * spa's per-vdev ZAP list.
3473 spa_t *spa = vd->vdev_spa; in vdev_count_verify_zaps()
3476 if (spa_feature_is_active(vd->vdev_spa, SPA_FEATURE_AVZ_V2) && in vdev_count_verify_zaps()
3477 vd->vdev_root_zap != 0) { in vdev_count_verify_zaps()
3479 ASSERT0(zap_lookup_int(spa->spa_meta_objset, in vdev_count_verify_zaps()
3480 spa->spa_all_vdev_zaps, vd->vdev_root_zap)); in vdev_count_verify_zaps()
3482 if (vd->vdev_top_zap != 0) { in vdev_count_verify_zaps()
3484 ASSERT0(zap_lookup_int(spa->spa_meta_objset, in vdev_count_verify_zaps()
3485 spa->spa_all_vdev_zaps, vd->vdev_top_zap)); in vdev_count_verify_zaps()
3487 if (vd->vdev_leaf_zap != 0) { in vdev_count_verify_zaps()
3489 ASSERT0(zap_lookup_int(spa->spa_meta_objset, in vdev_count_verify_zaps()
3490 spa->spa_all_vdev_zaps, vd->vdev_leaf_zap)); in vdev_count_verify_zaps()
3493 for (uint64_t i = 0; i < vd->vdev_children; i++) { in vdev_count_verify_zaps()
3494 total += vdev_count_verify_zaps(vd->vdev_child[i]); in vdev_count_verify_zaps()
3530 * Disable the MMP activity check - This is used by zdb which in spa_activity_check_required()
3533 if (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) in spa_activity_check_required()
3539 if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay == 0) in spa_activity_check_required()
3548 if (tryconfig_txg && tryconfig_txg == ub->ub_txg && in spa_activity_check_required()
3549 tryconfig_timestamp && tryconfig_timestamp == ub->ub_timestamp && in spa_activity_check_required()
3575 * Nanoseconds the activity check must watch for changes on-disk.
3595 ASSERT(MMP_IMPORT_SAFETY_FACTOR >= 100); in spa_activity_check_duration()
3616 ub->ub_mmp_delay) * import_intervals); in spa_activity_check_duration()
3622 (u_longlong_t)ub->ub_mmp_delay, in spa_activity_check_duration()
3627 * zfs-0.7 compatibility case in spa_activity_check_duration()
3631 ub->ub_mmp_delay) * import_intervals); in spa_activity_check_duration()
3636 (u_longlong_t)ub->ub_mmp_delay, in spa_activity_check_duration()
3641 zfs_dbgmsg("pool last imported on non-MMP aware " in spa_activity_check_duration()
3655 * 0 - no activity detected
3656 * EREMOTEIO - remote activity detected
3657 * EINTR - user canceled the operation
3663 uint64_t txg = ub->ub_txg; in spa_activity_check()
3664 uint64_t timestamp = ub->ub_timestamp; in spa_activity_check()
3665 uint64_t mmp_config = ub->ub_mmp_config; in spa_activity_check()
3670 vdev_t *rvd = spa->spa_root_vdev; in spa_activity_check()
3701 /* Add a small random factor in case of simultaneous imports (0-25%) */ in spa_activity_check()
3716 (u_longlong_t)NSEC2MSEC(import_expire - now)); in spa_activity_check()
3721 NSEC2SEC(import_expire - gethrtime())); in spa_activity_check()
3726 if (txg != ub->ub_txg || timestamp != ub->ub_timestamp || in spa_activity_check()
3732 (u_longlong_t)txg, (u_longlong_t)ub->ub_txg, in spa_activity_check()
3734 (u_longlong_t)ub->ub_timestamp, in spa_activity_check()
3736 (u_longlong_t)ub->ub_mmp_config); in spa_activity_check()
3748 if (error != -1) { in spa_activity_check()
3762 * spa->spa_load_info nvlist. If the remote hostname or hostid are in spa_activity_check()
3766 * ZPOOL_CONFIG_MMP_STATE - observed pool status (mandatory) in spa_activity_check()
3767 * ZPOOL_CONFIG_MMP_HOSTNAME - hostname from the active pool in spa_activity_check()
3768 * ZPOOL_CONFIG_MMP_HOSTID - hostid from the active pool in spa_activity_check()
3778 fnvlist_add_string(spa->spa_load_info, in spa_activity_check()
3785 fnvlist_add_uint64(spa->spa_load_info, in spa_activity_check()
3790 fnvlist_add_uint64(spa->spa_load_info, in spa_activity_check()
3792 fnvlist_add_uint64(spa->spa_load_info, in spa_activity_check()
3811 ASSERT(spa_multihost(spa) && spa_suspended(spa)); in spa_mmp_remote_host_activity()
3819 vdev_uberblock_load(spa->spa_root_vdev, &best_ub, &best_label); in spa_mmp_remote_host_activity()
3841 if (best_ub.ub_txg != spa->spa_uberblock.ub_txg || in spa_mmp_remote_host_activity()
3842 best_ub.ub_timestamp != spa->spa_uberblock.ub_timestamp) { in spa_mmp_remote_host_activity()
3845 (u_longlong_t)spa->spa_uberblock.ub_txg, in spa_mmp_remote_host_activity()
3847 (u_longlong_t)spa->spa_uberblock.ub_timestamp, in spa_mmp_remote_host_activity()
3855 return (spa_activity_check(spa, &spa->spa_uberblock, spa->spa_config, in spa_mmp_remote_host_activity()
3877 "See: https://openzfs.github.io/openzfs-docs/msg/" in spa_verify_host()
3878 "ZFS-8000-EY", in spa_verify_host()
3894 nvlist_t *nvtree, *nvl, *config = spa->spa_config;
3906 &spa->spa_ubsync.ub_version) != 0)
3907 spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
3925 if ((spa->spa_load_state == SPA_LOAD_IMPORT ||
3926 spa->spa_load_state == SPA_LOAD_TRYIMPORT) &&
3929 if ((spa->spa_load_state == SPA_LOAD_IMPORT ||
3930 spa->spa_load_state == SPA_LOAD_TRYIMPORT) &&
3939 spa->spa_config_guid = pool_guid;
3941 nvlist_free(spa->spa_load_info);
3942 spa->spa_load_info = fnvlist_alloc();
3944 ASSERT(spa->spa_comment == NULL);
3946 spa->spa_comment = spa_strdup(comment);
3948 ASSERT(spa->spa_compatibility == NULL);
3951 spa->spa_compatibility = spa_strdup(compatibility);
3954 &spa->spa_config_txg);
3957 spa->spa_config_splitting = fnvlist_dup(nvl);
3968 spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *),
3971 spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
3993 ASSERT(spa->spa_root_vdev == rvd);
3994 ASSERT3U(spa->spa_min_ashift, >=, SPA_MINBLOCKSHIFT);
3995 ASSERT3U(spa->spa_max_ashift, <=, SPA_MAXBLOCKSHIFT);
3998 ASSERT(spa_guid(spa) == pool_guid);
4014 * spa_missing_tvds_allowed defines how many top-level vdevs can be
4017 if (spa->spa_trust_config) {
4018 spa->spa_missing_tvds_allowed = zfs_max_missing_tvds;
4019 } else if (spa->spa_config_source == SPA_CONFIG_SRC_CACHEFILE) {
4020 spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_cachefile;
4021 } else if (spa->spa_config_source == SPA_CONFIG_SRC_SCAN) {
4022 spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_scan;
4024 spa->spa_missing_tvds_allowed = 0;
4027 spa->spa_missing_tvds_allowed =
4028 MAX(zfs_max_missing_tvds, spa->spa_missing_tvds_allowed);
4031 error = vdev_open(spa->spa_root_vdev);
4034 if (spa->spa_missing_tvds != 0) {
4035 spa_load_note(spa, "vdev tree has %lld missing top-level "
4036 "vdevs.", (u_longlong_t)spa->spa_missing_tvds);
4037 if (spa->spa_trust_config && (spa->spa_mode & SPA_MODE_WRITE)) {
4047 spa_load_note(spa, "pools with missing top-level "
4048 "vdevs can only be opened in read-only mode.");
4052 "%lld missing top-level vdevs at this stage.",
4053 (u_longlong_t)spa->spa_missing_tvds_allowed);
4060 if (spa->spa_missing_tvds != 0 || error != 0)
4061 vdev_dbgmsg_print_tree(spa->spa_root_vdev, 2);
4076 vdev_t *rvd = spa->spa_root_vdev;
4087 if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
4100 spa->spa_state = POOL_STATE_ACTIVE;
4101 spa->spa_ubsync = spa->spa_uberblock;
4102 spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
4103 TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1;
4104 spa->spa_first_txg = spa->spa_last_ubsync_txg ?
4105 spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
4106 spa->spa_claim_max_txg = spa->spa_first_txg;
4107 spa->spa_prev_software_version = ub->ub_software_version;
4113 vdev_t *rvd = spa->spa_root_vdev;
4115 uberblock_t *ub = &spa->spa_uberblock;
4123 * we are opening the checkpointed state read-only, we have
4133 * just in the edge case.
4135 if (ub->ub_checkpoint_txg != 0 &&
4149 if (ub->ub_txg == 0) {
4155 if (spa->spa_load_max_txg != UINT64_MAX) {
4157 (u_longlong_t)spa->spa_load_max_txg);
4160 (u_longlong_t)ub->ub_txg);
4161 if (ub->ub_raidz_reflow_info != 0) {
4175 spa->spa_config);
4177 if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay &&
4180 fnvlist_add_uint64(spa->spa_load_info,
4186 spa_activity_check(spa, ub, spa->spa_config, B_TRUE);
4192 fnvlist_add_uint64(spa->spa_load_info,
4194 fnvlist_add_uint64(spa->spa_load_info,
4195 ZPOOL_CONFIG_MMP_TXG, ub->ub_txg);
4196 fnvlist_add_uint16(spa->spa_load_info,
4204 if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) {
4207 (u_longlong_t)ub->ub_version);
4211 if (ub->ub_version >= SPA_VERSION_FEATURES) {
4234 * Update our in-core representation with the definitive values
4237 nvlist_free(spa->spa_label_features);
4238 spa->spa_label_features = fnvlist_dup(features);
4248 if (ub->ub_version >= SPA_VERSION_FEATURES) {
4253 for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features,
4255 nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
4263 fnvlist_add_nvlist(spa->spa_load_info,
4274 if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) {
4276 spa_try_repair(spa, spa->spa_config);
4278 nvlist_free(spa->spa_config_splitting);
4279 spa->spa_config_splitting = NULL;
4294 vdev_t *rvd = spa->spa_root_vdev;
4296 error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
4302 spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
4311 vdev_t *mrvd, *rvd = spa->spa_root_vdev;
4317 if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object, B_TRUE)
4330 if (load_nvlist(spa, spa->spa_config_object, &mos_config)
4340 if (spa->spa_load_state == SPA_LOAD_OPEN) {
4381 spa_load_note(spa, "vdev_copy_path_strict failed, falling "
4388 spa->spa_root_vdev = mrvd;
4393 * If 'zpool import' used a cached config, then the on-disk hostid and
4404 fnvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_HOSTID,
4407 fnvlist_add_string(spa->spa_load_info, ZPOOL_CONFIG_HOSTNAME,
4421 if (nvlist_lookup_nvlist(spa->spa_config, ZPOOL_LOAD_POLICY,
4425 spa->spa_config_source = SPA_CONFIG_SRC_MOS;
4433 spa->spa_trust_config = B_TRUE;
4451 if (spa->spa_load_state != SPA_LOAD_TRYIMPORT &&
4452 !spa->spa_extreme_rewind && zfs_max_missing_tvds == 0) {
4461 if (healthy_tvds_mos - healthy_tvds >=
4464 "top-level vdevs compared to MOS (%lld vs %lld). ",
4485 if (rvd->vdev_guid_sum != spa->spa_uberblock.ub_guid_sum) {
4488 (u_longlong_t)spa->spa_uberblock.ub_guid_sum,
4489 (u_longlong_t)rvd->vdev_guid_sum);
4501 vdev_t *rvd = spa->spa_root_vdev;
4531 vdev_t *rvd = spa->spa_root_vdev;
4538 &spa->spa_feat_for_read_obj, B_TRUE) != 0) {
4543 &spa->spa_feat_for_write_obj, B_TRUE) != 0) {
4548 &spa->spa_feat_desc_obj, B_TRUE) != 0) {
4560 spa->spa_load_state == SPA_LOAD_TRYIMPORT) {
4567 fnvlist_add_nvlist(spa->spa_load_info,
4571 fnvlist_add_nvlist(spa->spa_load_info,
4579 fnvlist_add_boolean(spa->spa_load_info,
4586 * import in read-write mode and (if it is not) whether the
4587 * pool is available for import in read-only mode. If the pool
4588 * is available for import in read-write mode, it is displayed
4590 * in read-only mode, it is displayed as unavailable in
4591 * userland. If the pool is available for import in read-only
4592 * mode but not read-write mode, it is displayed as unavailable
4594 * available for open in read-only mode.
4598 * the pool can be opened read-only before returning to
4610 * Load refcounts for ZFS features from disk into an in-memory
4619 spa->spa_feat_refcount_cache[i] = refcount;
4621 spa->spa_feat_refcount_cache[i] =
4635 &spa->spa_feat_enabled_txg_obj, B_TRUE) != 0)
4646 spa->spa_errata = ZPOOL_ERRATA_ZOL_8308_ENCRYPTION;
4656 vdev_t *rvd = spa->spa_root_vdev;
4658 spa->spa_is_initializing = B_TRUE;
4659 error = dsl_pool_open(spa->spa_dsl_pool);
4660 spa->spa_is_initializing = B_FALSE;
4674 vdev_t *rvd = spa->spa_root_vdev;
4677 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
4679 sizeof (spa->spa_cksum_salt.zcs_bytes),
4680 spa->spa_cksum_salt.zcs_bytes);
4683 (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes,
4684 sizeof (spa->spa_cksum_salt.zcs_bytes));
4693 error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj);
4695 spa_load_failed(spa, "error opening deferred-frees bpobj "
4702 * (raid-z deflation). If we have an older pool, this will not
4705 error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate, B_FALSE);
4710 &spa->spa_creation_version, B_FALSE);
4718 error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last,
4724 &spa->spa_errlog_scrub, B_FALSE);
4730 &spa->spa_scrubbed_last_txg, B_FALSE);
4739 &spa->spa_livelists_to_delete, B_FALSE);
4747 error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history, B_FALSE);
4752 * Load the per-vdev ZAP map. If we have an older pool, this will not
4760 if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0) {
4766 &spa->spa_all_vdev_zaps, B_FALSE);
4771 spa->spa_avz_action = AVZ_ACTION_INITIALIZE;
4772 ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev));
4779 * we have orphaned per-vdev ZAPs in the MOS. Defer their
4782 spa->spa_avz_action = AVZ_ACTION_DESTROY;
4787 ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev));
4791 spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
4793 error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object,
4801 spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
4803 spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
4804 spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
4805 spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
4807 &spa->spa_dedup_table_quota);
4808 spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost);
4809 spa_prop_find(spa, ZPOOL_PROP_AUTOTRIM, &spa->spa_autotrim);
4810 spa->spa_autoreplace = (autoreplace != 0);
4814 * If we are importing a pool with missing top-level vdevs,
4818 if (spa->spa_missing_tvds > 0 &&
4819 spa->spa_failmode != ZIO_FAILURE_MODE_CONTINUE &&
4820 spa->spa_load_state != SPA_LOAD_TRYIMPORT) {
4823 spa->spa_failmode = ZIO_FAILURE_MODE_CONTINUE;
4833 vdev_t *rvd = spa->spa_root_vdev;
4836 * If we're assembling the pool from the split-off vdevs of
4844 error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object,
4849 ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
4850 if (load_nvlist(spa, spa->spa_spares.sav_object,
4851 &spa->spa_spares.sav_config) != 0) {
4860 spa->spa_spares.sav_sync = B_TRUE;
4867 &spa->spa_l2cache.sav_object, B_FALSE);
4871 ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
4872 if (load_nvlist(spa, spa->spa_l2cache.sav_object,
4873 &spa->spa_l2cache.sav_config) != 0) {
4882 spa->spa_l2cache.sav_sync = B_TRUE;
4892 vdev_t *rvd = spa->spa_root_vdev;
4900 (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) == 0) {
4901 fnvlist_add_uint64(spa->spa_load_info,
4913 if (spa->spa_autoreplace && spa->spa_load_state != SPA_LOAD_TRYIMPORT) {
4914 spa_check_removed(spa->spa_root_vdev);
4920 if (spa->spa_load_state != SPA_LOAD_IMPORT) {
4921 spa_aux_check_removed(&spa->spa_spares);
4922 spa_aux_check_removed(&spa->spa_l2cache);
4956 vdev_t *rvd = spa->spa_root_vdev;
4971 vdev_t *rvd = spa->spa_root_vdev;
4985 vdev_t *rvd = spa->spa_root_vdev;
4990 if (spa->spa_missing_tvds != 0) {
5009 vdev_t *rvd = spa->spa_root_vdev;
5015 if (spa->spa_load_state != SPA_LOAD_TRYIMPORT) {
5041 spa->spa_claiming = B_TRUE;
5044 (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
5048 spa->spa_claiming = B_FALSE;
5057 vdev_t *rvd = spa->spa_root_vdev;
5065 * in-core spa_config and update the disk labels.
5067 if (update_config_cache || config_cache_txg != spa->spa_config_txg ||
5068 spa->spa_load_state == SPA_LOAD_IMPORT ||
5069 spa->spa_load_state == SPA_LOAD_RECOVER ||
5070 (spa->spa_import_flags & ZFS_IMPORT_VERBATIM))
5073 for (int c = 0; c < rvd->vdev_children; c++)
5074 if (rvd->vdev_child[c]->vdev_ms_array == 0)
5088 spa_mode_t mode = spa->spa_mode;
5089 int async_suspended = spa->spa_async_suspended;
5100 spa->spa_async_suspended = async_suspended;
5109 ASSERT0(spa->spa_checkpoint_txg);
5110 ASSERT(MUTEX_HELD(&spa_namespace_lock) ||
5111 spa->spa_load_thread == curthread);
5113 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
5126 spa->spa_checkpoint_txg = checkpoint.ub_txg;
5127 spa->spa_checkpoint_info.sci_timestamp = checkpoint.ub_timestamp;
5137 ASSERT(MUTEX_HELD(&spa_namespace_lock));
5138 ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE);
5144 * also effectively puts the spa in read-only mode since
5149 spa->spa_trust_config = B_FALSE;
5213 ASSERT(MUTEX_HELD(&spa_namespace_lock));
5214 ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
5216 error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
5230 ASSERT3U(checkpoint.ub_txg, <, spa->spa_uberblock.ub_txg);
5240 checkpoint.ub_txg = spa->spa_uberblock.ub_txg + 1;
5246 spa->spa_uberblock = checkpoint;
5256 * pool as read-only. Specifically, verifying the checkpointed
5261 vdev_t *rvd = spa->spa_root_vdev;
5266 int children = rvd->vdev_children;
5270 vdev_t *vd = rvd->vdev_child[(c0 + c) % children];
5276 if (vd->vdev_ms_array == 0 || vd->vdev_islog ||
5284 error = vdev_config_sync(svd, svdcount, spa->spa_first_txg);
5286 spa->spa_last_synced_guid = rvd->vdev_guid;
5356 (spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
5360 ASSERT(MUTEX_HELD(&spa_namespace_lock));
5361 ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE);
5406 spa->spa_load_thread = curthread;
5419 * from the pool and their contents were re-mapped to other vdevs. Note
5457 * Retrieve the list of auxiliary devices - cache devices and spares -
5494 ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT);
5498 * read-only mode but not read-write mode. We now have enough
5501 error = spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT,
5526 * pool. If we are importing the pool in read-write mode, a few
5530 if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER ||
5531 spa->spa_load_max_txg == UINT64_MAX)) {
5532 uint64_t config_cache_txg = spa->spa_config_txg;
5534 ASSERT(spa->spa_load_state != SPA_LOAD_TRYIMPORT);
5540 if (RRSS_GET_STATE(&spa->spa_uberblock) == RRSS_SCRATCH_VALID)
5550 (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg);
5560 * Kick-off the syncing thread.
5562 spa->spa_sync_on = B_TRUE;
5563 txg_sync_start(spa->spa_dsl_pool);
5574 txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
5591 if (vdev_rebuild_active(spa->spa_root_vdev)) {
5593 } else if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
5594 vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
5628 dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
5632 vdev_initialize_restart(spa->spa_root_vdev);
5634 vdev_trim_restart(spa->spa_root_vdev);
5639 zio_handle_import_delay(spa, gethrtime() - load_start);
5647 spa->spa_load_thread = NULL;
5657 spa_mode_t mode = spa->spa_mode;
5662 spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1;
5668 (u_longlong_t)spa->spa_load_max_txg);
5690 if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
5691 spa->spa_load_max_txg = spa->spa_load_txg;
5694 spa->spa_load_max_txg = max_request;
5696 spa->spa_extreme_rewind = B_TRUE;
5704 * When attempting checkpoint-rewind on a pool with no
5708 ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
5713 if (spa->spa_root_vdev != NULL)
5714 config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
5716 spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg;
5717 spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp;
5734 loadinfo = spa->spa_load_info;
5735 spa->spa_load_info = fnvlist_alloc();
5738 spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
5739 safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
5747 while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg &&
5748 spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) {
5749 if (spa->spa_load_max_txg < safe_rewind_txg)
5750 spa->spa_extreme_rewind = B_TRUE;
5754 spa->spa_extreme_rewind = B_FALSE;
5755 spa->spa_load_max_txg = UINT64_MAX;
5769 spa->spa_load_info);
5772 fnvlist_free(spa->spa_load_info);
5773 spa->spa_load_info = loadinfo;
5821 if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
5826 zpool_get_load_policy(nvpolicy ? nvpolicy : spa->spa_config,
5834 spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
5835 spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE;
5864 if (config != NULL && spa->spa_config) {
5865 *config = fnvlist_dup(spa->spa_config);
5868 spa->spa_load_info);
5872 spa->spa_last_open_failed = error;
5883 *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
5891 spa->spa_load_info);
5895 spa->spa_last_open_failed = 0;
5896 spa->spa_last_ubsync_txg = 0;
5897 spa->spa_load_txg = 0;
5936 spa->spa_inject_ref++;
5946 spa->spa_inject_ref--;
5964 ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
5966 if (spa->spa_spares.sav_count == 0)
5970 VERIFY0(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
5990 vs->vs_state = VDEV_STATE_CANT_OPEN;
5991 vs->vs_aux = VDEV_AUX_SPARED;
5993 vs->vs_state =
5994 spa->spa_spares.sav_vdevs[i]->vdev_state;
6014 ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
6016 if (spa->spa_l2cache.sav_count == 0)
6020 VERIFY0(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
6037 for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
6039 spa->spa_l2cache.sav_vdevs[j]->vdev_guid) {
6040 vd = spa->spa_l2cache.sav_vdevs[j];
6044 ASSERT(vd != NULL);
6061 if (spa->spa_feat_for_read_obj != 0) {
6062 for (zap_cursor_init(&zc, spa->spa_meta_objset,
6063 spa->spa_feat_for_read_obj);
6066 ASSERT(za->za_integer_length == sizeof (uint64_t) &&
6067 za->za_num_integers == 1);
6068 VERIFY0(nvlist_add_uint64(features, za->za_name,
6069 za->za_first_integer));
6074 if (spa->spa_feat_for_write_obj != 0) {
6075 for (zap_cursor_init(&zc, spa->spa_meta_objset,
6076 spa->spa_feat_for_write_obj);
6079 ASSERT(za->za_integer_length == sizeof (uint64_t) &&
6080 za->za_num_integers == 1);
6081 VERIFY0(nvlist_add_uint64(features, za->za_name,
6082 za->za_first_integer));
6121 ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
6123 mutex_enter(&spa->spa_feat_stats_lock);
6124 features = spa->spa_feat_stats;
6130 spa->spa_feat_stats = features;
6137 mutex_exit(&spa->spa_feat_stats_lock);
6154 * self-inconsistent.
6161 loadtimes[0] = spa->spa_loaded_ts.tv_sec;
6162 loadtimes[1] = spa->spa_loaded_ts.tv_nsec;
6173 spa->spa_failmode);
6176 spa->spa_suspended);
6216 * specified, as long as they are well-formed.
6228 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
6247 * Set the pending device list so we correctly handle device in-use
6250 sav->sav_pending = dev;
6251 sav->sav_npending = ndev;
6258 if (!vd->vdev_ops->vdev_op_leaf) {
6264 vd->vdev_top = vd;
6269 vd->vdev_guid);
6282 sav->sav_pending = NULL;
6283 sav->sav_npending = 0;
6292 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
6295 &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES,
6301 &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE,
6311 if (sav->sav_config != NULL) {
6320 VERIFY0(nvlist_lookup_nvlist_array(sav->sav_config, config,
6330 fnvlist_remove(sav->sav_config, config);
6332 fnvlist_add_nvlist_array(sav->sav_config, config,
6341 sav->sav_config = fnvlist_alloc();
6342 fnvlist_add_nvlist_array(sav->sav_config, config,
6355 spa_aux_vdev_t *sav = &spa->spa_l2cache;
6357 for (i = 0; i < sav->sav_count; i++) {
6360 vd = sav->sav_vdevs[i];
6361 ASSERT(vd != NULL);
6363 if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
6377 if (dcp->cp_crypt != ZIO_CRYPT_OFF &&
6378 dcp->cp_crypt != ZIO_CRYPT_INHERIT &&
6446 spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME;
6486 ASSERT(SPA_VERSION_IS_SUPPORTED(version));
6488 spa->spa_first_txg = txg;
6489 spa->spa_uberblock.ub_txg = txg - 1;
6490 spa->spa_uberblock.ub_version = version;
6491 spa->spa_ubsync = spa->spa_uberblock;
6492 spa->spa_load_state = SPA_LOAD_CREATE;
6493 spa->spa_removing_phys.sr_state = DSS_NONE;
6494 spa->spa_removing_phys.sr_removing_vdev = -1;
6495 spa->spa_removing_phys.sr_prev_indirect_vdev = -1;
6496 spa->spa_indirect_vdevs_loaded = B_TRUE;
6501 spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *),
6504 spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
6516 ASSERT(error != 0 || rvd != NULL);
6517 ASSERT(error != 0 || spa->spa_root_vdev == rvd);
6530 for (int c = 0; error == 0 && c < rvd->vdev_children; c++) {
6531 vdev_t *vd = rvd->vdev_child[c];
6553 spa->spa_spares.sav_config = fnvlist_alloc();
6554 fnvlist_add_nvlist_array(spa->spa_spares.sav_config,
6560 spa->spa_spares.sav_sync = B_TRUE;
6568 VERIFY0(nvlist_alloc(&spa->spa_l2cache.sav_config,
6570 fnvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
6576 spa->spa_l2cache.sav_sync = B_TRUE;
6579 spa->spa_is_initializing = B_TRUE;
6580 spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, dcp, txg);
6581 spa->spa_is_initializing = B_FALSE;
6599 if (version >= SPA_VERSION_ZPOOL_HISTORY && !spa->spa_history)
6608 spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset,
6612 if (zap_add(spa->spa_meta_objset,
6614 sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) {
6618 if (zap_add(spa->spa_meta_objset,
6626 spa->spa_deflate = TRUE;
6627 if (zap_add(spa->spa_meta_objset,
6629 sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
6635 * Create the deferred-free bpobj. Turn off compression
6636 * because sync-to-convergence takes longer if the blocksize
6639 obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx);
6640 dmu_object_set_compress(spa->spa_meta_objset, obj,
6642 if (zap_add(spa->spa_meta_objset,
6647 VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj,
6648 spa->spa_meta_objset, obj));
6653 (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes,
6654 sizeof (spa->spa_cksum_salt.zcs_bytes));
6659 spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
6660 spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
6661 spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
6662 spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
6663 spa->spa_multihost = zpool_prop_default_numeric(ZPOOL_PROP_MULTIHOST);
6664 spa->spa_autotrim = zpool_prop_default_numeric(ZPOOL_PROP_AUTOTRIM);
6665 spa->spa_dedup_table_quota =
6678 spa->spa_sync_on = B_TRUE;
6692 spa->spa_minref = zfs_refcount_count(&spa->spa_refcount);
6693 spa->spa_load_state = SPA_LOAD_NONE;
6703 * Import a non-root pool into the system.
6738 spa->spa_import_flags = flags;
6741 * Verbatim import - Take a pool and insert it into the namespace
6744 if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) {
6766 spa->spa_config_source = SPA_CONFIG_SRC_TRYIMPORT;
6769 spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
6781 fnvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info);
6788 if (spa->spa_spares.sav_config) {
6789 nvlist_free(spa->spa_spares.sav_config);
6790 spa->spa_spares.sav_config = NULL;
6793 if (spa->spa_l2cache.sav_config) {
6794 nvlist_free(spa->spa_l2cache.sav_config);
6795 spa->spa_l2cache.sav_config = NULL;
6822 if (spa->spa_spares.sav_config)
6823 fnvlist_remove(spa->spa_spares.sav_config,
6826 spa->spa_spares.sav_config = fnvlist_alloc();
6827 fnvlist_add_nvlist_array(spa->spa_spares.sav_config,
6833 spa->spa_spares.sav_sync = B_TRUE;
6834 spa->spa_spares.sav_label_sync = B_TRUE;
6838 if (spa->spa_l2cache.sav_config)
6839 fnvlist_remove(spa->spa_l2cache.sav_config,
6842 spa->spa_l2cache.sav_config = fnvlist_alloc();
6843 fnvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
6849 spa->spa_l2cache.sav_sync = B_TRUE;
6850 spa->spa_l2cache.sav_label_sync = B_TRUE;
6856 if (spa->spa_autoreplace) {
6857 spa_aux_check_removed(&spa->spa_spares);
6858 spa_aux_check_removed(&spa->spa_l2cache);
6863 * Update the config cache to include the newly-imported pool.
6907 (void) snprintf(name, MAXPATHLEN, "%s-%llx-%s",
6918 zpool_get_load_policy(spa->spa_config, &policy);
6920 spa->spa_load_max_txg = policy.zlp_txg;
6921 spa->spa_extreme_rewind = B_TRUE;
6931 spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE;
6933 spa->spa_config_source = SPA_CONFIG_SRC_SCAN;
6944 spa->spa_import_flags |= ZFS_IMPORT_MISSING_LOG;
6951 if (spa->spa_root_vdev != NULL) {
6952 config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
6956 spa->spa_uberblock.ub_timestamp);
6958 spa->spa_load_info);
6960 spa->spa_errata);
6967 if ((!error || error == EEXIST) && spa->spa_bootfs) {
6975 spa->spa_bootfs, tmpname) == 0) {
7042 if (spa->spa_is_exporting) {
7047 spa->spa_is_exporting = B_TRUE;
7056 if (spa->spa_zvol_taskq) {
7058 taskq_wait(spa->spa_zvol_taskq);
7061 spa->spa_export_thread = curthread;
7064 if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
7074 if (spa->spa_sync_on) {
7075 txg_wait_synced(spa->spa_dsl_pool, 0);
7084 if (!spa_refcount_zero(spa) || (spa->spa_inject_ref != 0)) {
7093 * notice the spa->spa_export_thread and wait until we signal
7097 if (spa->spa_sync_on) {
7098 vdev_t *rvd = spa->spa_root_vdev;
7132 spa->spa_state = new_state;
7143 * spa_sync() -> spa_flush_metaslabs() may dirty the final TXGs.
7152 spa->spa_final_txg = spa_last_synced_txg(spa) +
7166 if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
7171 if (oldconfig && spa->spa_config)
7172 *oldconfig = fnvlist_dup(spa->spa_config);
7175 zio_handle_export_delay(spa, gethrtime() - export_start);
7191 spa->spa_is_exporting = B_FALSE;
7192 spa->spa_export_thread = NULL;
7203 spa->spa_is_exporting = B_FALSE;
7204 spa->spa_export_thread = NULL;
7259 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
7274 vdev_t *rvd = spa->spa_root_vdev;
7279 ASSERT(spa_writeable(spa));
7287 spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */
7297 if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0)
7300 if (vd->vdev_children != 0 &&
7311 rvd->vdev_children)) == 0) {
7332 if (spa->spa_vdev_removal != NULL ||
7333 spa->spa_removing_phys.sr_prev_indirect_vdev != -1) {
7334 for (int c = 0; c < vd->vdev_children; c++) {
7335 tvd = vd->vdev_child[c];
7336 if (spa->spa_vdev_removal != NULL &&
7337 tvd->vdev_ashift != spa->spa_max_ashift) {
7348 if (tvd->vdev_ops == &vdev_mirror_ops) {
7350 cid < tvd->vdev_children; cid++) {
7351 vdev_t *cvd = tvd->vdev_child[cid];
7352 if (!cvd->vdev_ops->vdev_op_leaf) {
7361 if (check_ashift && spa->spa_max_ashift == spa->spa_min_ashift) {
7362 for (int c = 0; c < vd->vdev_children; c++) {
7363 tvd = vd->vdev_child[c];
7364 if (tvd->vdev_ashift != spa->spa_max_ashift) {
7371 for (int c = 0; c < vd->vdev_children; c++) {
7372 tvd = vd->vdev_child[c];
7374 tvd->vdev_id = rvd->vdev_children;
7380 spa_set_aux_vdevs(&spa->spa_spares, spares, nspares,
7383 spa->spa_spares.sav_sync = B_TRUE;
7387 spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache,
7390 spa->spa_l2cache.sav_sync = B_TRUE;
7400 tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
7401 dsl_sync_task_nowait(spa->spa_dsl_pool, spa_draid_feature_incr,
7415 * spa_load() checks for added-but-not-initialized vdevs, so that
7448 * draid2:6d:10c:2s-0 DEGRADED 0 0 0
7449 * scsi-0QEMU_QEMU_HARDDISK_d1 ONLINE 0 0 0
7450 * scsi-0QEMU_QEMU_HARDDISK_d2 ONLINE 0 0 0
7451 * scsi-0QEMU_QEMU_HARDDISK_d3 ONLINE 0 0 0
7452 * scsi-0QEMU_QEMU_HARDDISK_d4 ONLINE 0 0 0
7453 * scsi-0QEMU_QEMU_HARDDISK_d5 ONLINE 0 0 0
7454 * scsi-0QEMU_QEMU_HARDDISK_d6 ONLINE 0 0 0
7455 * scsi-0QEMU_QEMU_HARDDISK_d7 ONLINE 0 0 0
7456 * scsi-0QEMU_QEMU_HARDDISK_d8 ONLINE 0 0 0
7457 * scsi-0QEMU_QEMU_HARDDISK_d9 ONLINE 0 0 0
7458 * spare-9 DEGRADED 0 0 0
7459 * replacing-0 DEGRADED 0 93 0
7460 * scsi-0QEMU_QEMU_HARDDISK_d10-part1/old UNAVAIL 0 0 0
7461 * spare-1 DEGRADED 0 0 0
7462 * scsi-0QEMU_QEMU_HARDDISK_d10 REMOVED 0 0 0
7463 * draid2-0-0 ONLINE 0 0 0
7464 * draid2-0-1 ONLINE 0 0 0
7466 * draid2-0-0 INUSE currently in use
7467 * draid2-0-1 INUSE currently in use
7482 ppvd = pvd->vdev_parent;
7496 if ((ppvd->vdev_ops == &vdev_spare_ops) ||
7497 (ppvd->vdev_ops == &vdev_draid_spare_ops))
7498 if (pvd->vdev_ops == &vdev_replacing_ops)
7499 if (newvd->vdev_isspare)
7507 * a mirror, a raidz, or a leaf device that is also a top-level (e.g. a
7528 vdev_t *rvd = spa->spa_root_vdev;
7535 ASSERT(spa_writeable(spa));
7541 ASSERT(MUTEX_HELD(&spa_namespace_lock));
7563 if (spa->spa_vdev_removal != NULL) {
7571 boolean_t raidz = oldvd->vdev_ops == &vdev_raidz_ops;
7580 if (spa->spa_raidz_expand != NULL) {
7584 } else if (!oldvd->vdev_ops->vdev_op_leaf) {
7591 pvd = oldvd->vdev_parent;
7597 if (newrootvd->vdev_children != 1)
7600 newvd = newrootvd->vdev_child[0];
7602 if (!newvd->vdev_ops->vdev_op_leaf)
7611 if ((oldvd->vdev_top->vdev_alloc_bias != VDEV_BIAS_NONE ||
7612 oldvd->vdev_top->vdev_islog) && newvd->vdev_isspare) {
7619 if (newvd->vdev_ops == &vdev_draid_spare_ops &&
7620 oldvd->vdev_top != vdev_draid_spare_get_parent(newvd)) {
7631 if (pvd->vdev_top != NULL)
7632 tvd = pvd->vdev_top;
7634 if (tvd->vdev_ops != &vdev_mirror_ops &&
7635 tvd->vdev_ops != &vdev_root_ops &&
7636 tvd->vdev_ops != &vdev_draid_ops) {
7647 if (pvd->vdev_ops != &vdev_mirror_ops &&
7648 pvd->vdev_ops != &vdev_root_ops &&
7658 if (pvd->vdev_ops == &vdev_spare_ops &&
7659 oldvd->vdev_isspare &&
7660 !spa_has_spare(spa, newvd->vdev_guid))
7668 * the same (spare replaces spare, non-spare replaces
7669 * non-spare).
7671 if (pvd->vdev_ops == &vdev_replacing_ops &&
7674 } else if (pvd->vdev_ops == &vdev_spare_ops &&
7675 newvd->vdev_isspare != oldvd->vdev_isspare) {
7685 if (newvd->vdev_isspare)
7694 vdev_t *min_vdev = raidz ? oldvd->vdev_child[0] : oldvd;
7695 if (newvd->vdev_asize < vdev_get_min_asize(min_vdev))
7700 * than the top-level vdev.
7702 if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) {
7708 * RAIDZ-expansion-specific checks.
7717 for (int i = 0; i < oldvd->vdev_children; i++) {
7718 if (vdev_is_dead(oldvd->vdev_child[i]) ||
7719 !oldvd->vdev_child[i]->vdev_ops->vdev_op_leaf) {
7723 /* Also fail if reserved boot area is in-use */
7724 if (vdev_check_boot_reserve(spa, oldvd->vdev_child[i])
7736 * move it to a spa_strdup-ed string.
7738 char *tmp = kmem_asprintf("raidz%u-%u",
7739 (uint_t)vdev_get_nparity(oldvd), (uint_t)oldvd->vdev_id);
7743 oldvdpath = spa_strdup(oldvd->vdev_path);
7745 newvdpath = spa_strdup(newvd->vdev_path);
7748 * If this is an in-place replacement, update oldvd's path and devid
7752 spa_strfree(oldvd->vdev_path);
7753 oldvd->vdev_path = kmem_alloc(strlen(newvdpath) + 5,
7755 (void) sprintf(oldvd->vdev_path, "%s/old",
7757 if (oldvd->vdev_devid != NULL) {
7758 spa_strfree(oldvd->vdev_devid);
7759 oldvd->vdev_devid = NULL;
7762 oldvdpath = spa_strdup(oldvd->vdev_path);
7769 if (!raidz && pvd->vdev_ops != pvops) {
7771 ASSERT(pvd->vdev_ops == pvops);
7772 ASSERT(oldvd->vdev_parent == pvd);
7775 ASSERT(pvd->vdev_top->vdev_parent == rvd);
7781 newvd->vdev_id = pvd->vdev_children;
7782 newvd->vdev_crtxg = oldvd->vdev_crtxg;
7790 tvd = newvd->vdev_top;
7791 ASSERT(pvd->vdev_top == tvd);
7792 ASSERT(tvd->vdev_parent == rvd);
7798 * for any dmu_sync-ed blocks. It will propagate upward when
7817 tvd->vdev_rz_expanding = B_TRUE;
7822 dmu_tx_t *tx = dmu_tx_create_assigned(spa->spa_dsl_pool,
7824 dsl_sync_task_nowait(spa->spa_dsl_pool, vdev_raidz_attach_sync,
7829 dtl_max_txg - TXG_INITIAL);
7831 if (newvd->vdev_isspare) {
7836 newvd_isspare = newvd->vdev_isspare;
7845 * We do this to ensure that dmu_sync-ed blocks have been
7849 newvd->vdev_rebuild_txg = txg;
7853 newvd->vdev_resilver_txg = txg;
7860 dsl_scan_restart_resilver(spa->spa_dsl_pool,
7866 if (spa->spa_bootfs)
7899 vdev_t *rvd __maybe_unused = spa->spa_root_vdev;
7905 ASSERT(spa_writeable(spa));
7925 ASSERT(MUTEX_HELD(&spa_namespace_lock));
7935 if (!vd->vdev_ops->vdev_op_leaf)
7938 pvd = vd->vdev_parent;
7942 * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing
7948 * ask to detach C, which would leave us with just A -- not what
7950 * parent/child relationship hasn't changed -- in this example,
7953 if (pvd->vdev_guid != pguid && pguid != 0)
7959 if (replace_done && pvd->vdev_ops != &vdev_replacing_ops &&
7960 pvd->vdev_ops != &vdev_spare_ops)
7963 ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
7969 if (pvd->vdev_ops != &vdev_replacing_ops &&
7970 pvd->vdev_ops != &vdev_mirror_ops &&
7971 pvd->vdev_ops != &vdev_spare_ops)
7981 ASSERT(pvd->vdev_children >= 2);
7988 if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 &&
7989 vd->vdev_path != NULL) {
7990 size_t len = strlen(vd->vdev_path);
7992 for (int c = 0; c < pvd->vdev_children; c++) {
7993 cvd = pvd->vdev_child[c];
7995 if (cvd == vd || cvd->vdev_path == NULL)
7998 if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
7999 strcmp(cvd->vdev_path + len, "/old") == 0) {
8000 spa_strfree(cvd->vdev_path);
8001 cvd->vdev_path = spa_strdup(vd->vdev_path);
8014 if (pvd->vdev_ops == &vdev_spare_ops && vd->vdev_id == 0) {
8015 vdev_t *last_cvd = pvd->vdev_child[pvd->vdev_children - 1];
8017 if (last_cvd->vdev_isspare &&
8018 last_cvd->vdev_ops != &vdev_draid_spare_ops) {
8027 * But if we can't do it, don't treat the error as fatal --
8042 cvd = pvd->vdev_child[pvd->vdev_children - 1];
8054 ASSERT(cvd->vdev_isspare);
8056 unspare_guid = cvd->vdev_guid;
8058 cvd->vdev_unspare = B_TRUE;
8065 if (pvd->vdev_children == 1) {
8066 if (pvd->vdev_ops == &vdev_spare_ops)
8067 cvd->vdev_unspare = B_FALSE;
8073 * may have been the previous top-level vdev.
8075 tvd = cvd->vdev_top;
8076 ASSERT(tvd->vdev_parent == rvd);
8090 if (spa->spa_autoexpand) {
8099 * vd->vdev_detached is set and free vd's DTL object in syncing context.
8103 vdpath = spa_strdup(vd->vdev_path ? vd->vdev_path : "none");
8105 (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
8106 vd->vdev_detached = B_TRUE;
8131 if (altspa->spa_state != POOL_STATE_ACTIVE ||
8159 ASSERT(MUTEX_HELD(&spa_namespace_lock));
8165 if (vd == NULL || vd->vdev_detached) {
8168 } else if (!vd->vdev_ops->vdev_op_leaf || !vdev_is_concrete(vd)) {
8175 mutex_enter(&vd->vdev_initialize_lock);
8186 (vd->vdev_initialize_thread != NULL ||
8187 vd->vdev_top->vdev_removing || vd->vdev_top->vdev_rz_expanding)) {
8188 mutex_exit(&vd->vdev_initialize_lock);
8191 (vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE &&
8192 vd->vdev_initialize_state != VDEV_INITIALIZE_SUSPENDED)) {
8193 mutex_exit(&vd->vdev_initialize_lock);
8196 vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE) {
8197 mutex_exit(&vd->vdev_initialize_lock);
8200 vd->vdev_initialize_thread != NULL) {
8201 mutex_exit(&vd->vdev_initialize_lock);
8221 mutex_exit(&vd->vdev_initialize_lock);
8265 txg_wait_synced(spa->spa_dsl_pool, 0);
8277 ASSERT(MUTEX_HELD(&spa_namespace_lock));
8283 if (vd == NULL || vd->vdev_detached) {
8286 } else if (!vd->vdev_ops->vdev_op_leaf || !vdev_is_concrete(vd)) {
8292 } else if (!vd->vdev_has_trim) {
8295 } else if (secure && !vd->vdev_has_securetrim) {
8299 mutex_enter(&vd->vdev_trim_lock);
8309 (vd->vdev_trim_thread != NULL || vd->vdev_top->vdev_removing ||
8310 vd->vdev_top->vdev_rz_expanding)) {
8311 mutex_exit(&vd->vdev_trim_lock);
8314 (vd->vdev_trim_state != VDEV_TRIM_ACTIVE &&
8315 vd->vdev_trim_state != VDEV_TRIM_SUSPENDED)) {
8316 mutex_exit(&vd->vdev_trim_lock);
8319 vd->vdev_trim_state != VDEV_TRIM_ACTIVE) {
8320 mutex_exit(&vd->vdev_trim_lock);
8337 mutex_exit(&vd->vdev_trim_lock);
8386 txg_wait_synced(spa->spa_dsl_pool, 0);
8411 ASSERT(spa_writeable(spa));
8415 ASSERT(MUTEX_HELD(&spa_namespace_lock));
8447 rvd = spa->spa_root_vdev;
8449 for (c = 0; c < rvd->vdev_children; c++) {
8450 vdev_t *vd = rvd->vdev_child[c];
8453 if (vd->vdev_islog || (vd->vdev_ops != &vdev_indirect_ops &&
8462 if (children != (lastlog != 0 ? lastlog : rvd->vdev_children))
8481 if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole ||
8482 spa->spa_root_vdev->vdev_child[c]->vdev_islog) {
8491 if (spa->spa_root_vdev->vdev_child[c]->vdev_ops ==
8510 if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops ||
8511 vml[c]->vdev_islog ||
8513 vml[c]->vdev_isspare ||
8514 vml[c]->vdev_isl2cache ||
8516 vml[c]->vdev_children != 0 ||
8517 vml[c]->vdev_state != VDEV_STATE_HEALTHY ||
8518 c != spa->spa_root_vdev->vdev_child[c]->vdev_id) {
8531 vml[c]->vdev_top->vdev_ms_array);
8533 vml[c]->vdev_top->vdev_ms_shift);
8535 vml[c]->vdev_top->vdev_asize);
8537 vml[c]->vdev_top->vdev_ashift);
8539 /* transfer per-vdev ZAPs */
8540 ASSERT3U(vml[c]->vdev_leaf_zap, !=, 0);
8542 ZPOOL_CONFIG_VDEV_LEAF_ZAP, vml[c]->vdev_leaf_zap));
8544 ASSERT3U(vml[c]->vdev_top->vdev_top_zap, !=, 0);
8547 vml[c]->vdev_parent->vdev_top_zap));
8559 vml[c]->vdev_offline = B_TRUE;
8561 vdev_reopen(spa->spa_root_vdev);
8571 mutex_enter(&spa->spa_props_lock);
8572 fnvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, nvl);
8573 mutex_exit(&spa->spa_props_lock);
8574 spa->spa_config_splitting = nvl;
8575 vdev_config_dirty(spa->spa_root_vdev);
8582 fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, spa->spa_config_txg);
8591 newspa->spa_avz_action = AVZ_ACTION_REBUILD;
8592 newspa->spa_config_txg = spa->spa_config_txg;
8618 if (vml[c] != NULL && vml[c]->vdev_ops != &vdev_indirect_ops) {
8619 mutex_enter(&vml[c]->vdev_initialize_lock);
8622 mutex_exit(&vml[c]->vdev_initialize_lock);
8624 mutex_enter(&vml[c]->vdev_trim_lock);
8626 mutex_exit(&vml[c]->vdev_trim_lock);
8636 newspa->spa_config_source = SPA_CONFIG_SRC_SPLIT;
8637 newspa->spa_is_splitting = B_TRUE;
8645 if (newspa->spa_root_vdev != NULL) {
8646 newspa->spa_config_splitting = fnvlist_alloc();
8647 fnvlist_add_uint64(newspa->spa_config_splitting,
8649 spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL,
8663 vdev_config_dirty(newspa->spa_root_vdev);
8673 tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
8678 if (vml[c] != NULL && vml[c]->vdev_ops != &vdev_indirect_ops) {
8679 vdev_t *tvd = vml[c]->vdev_top;
8688 &tvd->vdev_dtl_list, vml[c], t);
8694 "vdev=%s", vml[c]->vdev_path);
8699 spa->spa_avz_action = AVZ_ACTION_REBUILD;
8700 vdev_config_dirty(spa->spa_root_vdev);
8701 spa->spa_config_splitting = NULL;
8714 newspa->spa_is_splitting = B_FALSE;
8731 /* re-online all offlined disks */
8734 vml[c]->vdev_offline = B_FALSE;
8742 vdev_reopen(spa->spa_root_vdev);
8744 nvlist_free(spa->spa_config_splitting);
8745 spa->spa_config_splitting = NULL;
8761 for (int c = 0; c < vd->vdev_children; c++) {
8762 oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]);
8775 if (vd->vdev_ops == &vdev_replacing_ops) {
8776 ASSERT(vd->vdev_children > 1);
8778 newvd = vd->vdev_child[vd->vdev_children - 1];
8779 oldvd = vd->vdev_child[0];
8791 if (vd->vdev_ops == &vdev_spare_ops) {
8792 vdev_t *first = vd->vdev_child[0];
8793 vdev_t *last = vd->vdev_child[vd->vdev_children - 1];
8795 if (last->vdev_unspare) {
8798 } else if (first->vdev_unspare) {
8820 if (vd->vdev_children > 2) {
8821 newvd = vd->vdev_child[1];
8823 if (newvd->vdev_isspare && last->vdev_isspare &&
8842 while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) {
8843 pvd = vd->vdev_parent;
8844 ppvd = pvd->vdev_parent;
8845 guid = vd->vdev_guid;
8846 pguid = pvd->vdev_guid;
8847 ppguid = ppvd->vdev_guid;
8854 if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 &&
8855 ppvd->vdev_children == 2) {
8856 ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
8857 sguid = ppvd->vdev_child[1]->vdev_guid;
8859 ASSERT(vd->vdev_resilver_txg == 0 || !vdev_dtl_required(vd));
8888 ASSERT(spa_writeable(spa));
8895 if (!vd->vdev_ops->vdev_op_leaf)
8899 if (strcmp(value, vd->vdev_path) != 0) {
8900 spa_strfree(vd->vdev_path);
8901 vd->vdev_path = spa_strdup(value);
8905 if (vd->vdev_fru == NULL) {
8906 vd->vdev_fru = spa_strdup(value);
8908 } else if (strcmp(value, vd->vdev_fru) != 0) {
8909 spa_strfree(vd->vdev_fru);
8910 vd->vdev_fru = spa_strdup(value);
8938 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
8940 if (dsl_scan_resilvering(spa->spa_dsl_pool))
8943 return (dsl_scrub_set_pause_resume(spa->spa_dsl_pool, cmd));
8949 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
8950 if (dsl_scan_resilvering(spa->spa_dsl_pool))
8953 return (dsl_scan_cancel(spa->spa_dsl_pool));
8966 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
8983 !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
8992 return (dsl_scan(spa->spa_dsl_pool, func, txgstart, txgend));
9004 if (vd->vdev_remove_wanted) {
9005 vd->vdev_remove_wanted = B_FALSE;
9006 vd->vdev_delayed_close = B_FALSE;
9015 vd->vdev_stat.vs_read_errors = 0;
9016 vd->vdev_stat.vs_write_errors = 0;
9017 vd->vdev_stat.vs_checksum_errors = 0;
9019 vdev_state_dirty(vd->vdev_top);
9025 for (int c = 0; c < vd->vdev_children; c++)
9026 spa_async_remove(spa, vd->vdev_child[c], by_kernel);
9032 if (vd->vdev_fault_wanted) {
9034 vd->vdev_fault_wanted = B_FALSE;
9040 if (!vd->vdev_top->vdev_islog && vd->vdev_aux == NULL &&
9048 for (int c = 0; c < vd->vdev_children; c++)
9049 spa_async_fault_vdev(vd->vdev_child[c], suspend);
9055 if (!spa->spa_autoexpand)
9058 for (int c = 0; c < vd->vdev_children; c++) {
9059 vdev_t *cvd = vd->vdev_child[c];
9063 if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
9066 spa_event_notify(vd->vdev_spa, vd, NULL, ESC_ZFS_VDEV_AUTOEXPAND);
9073 dsl_pool_t *dp = spa->spa_dsl_pool;
9076 ASSERT(spa->spa_sync_on);
9078 mutex_enter(&spa->spa_async_lock);
9079 tasks = spa->spa_async_tasks;
9080 spa->spa_async_tasks = 0;
9081 mutex_exit(&spa->spa_async_lock);
9113 (u_longlong_t)(new_space - old_space));
9125 spa_async_remove(spa, spa->spa_root_vdev, by_kernel);
9126 for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
9127 spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i],
9129 for (int i = 0; i < spa->spa_spares.sav_count; i++)
9130 spa_async_remove(spa, spa->spa_spares.sav_vdevs[i],
9137 spa_async_autoexpand(spa, spa->spa_root_vdev);
9147 spa_async_fault_vdev(spa->spa_root_vdev, &suspend);
9166 !vdev_rebuild_active(spa->spa_root_vdev) &&
9168 !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
9174 vdev_initialize_restart(spa->spa_root_vdev);
9182 vdev_trim_restart(spa->spa_root_vdev);
9220 mutex_enter(&spa->spa_async_lock);
9221 spa->spa_async_thread = NULL;
9222 cv_broadcast(&spa->spa_async_cv);
9223 mutex_exit(&spa->spa_async_lock);
9230 mutex_enter(&spa->spa_async_lock);
9231 spa->spa_async_suspended++;
9232 while (spa->spa_async_thread != NULL)
9233 cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
9234 mutex_exit(&spa->spa_async_lock);
9238 zthr_t *condense_thread = spa->spa_condense_zthr;
9242 zthr_t *raidz_expand_thread = spa->spa_raidz_expand_zthr;
9246 zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr;
9250 zthr_t *ll_delete_thread = spa->spa_livelist_delete_zthr;
9254 zthr_t *ll_condense_thread = spa->spa_livelist_condense_zthr;
9262 mutex_enter(&spa->spa_async_lock);
9263 ASSERT(spa->spa_async_suspended != 0);
9264 spa->spa_async_suspended--;
9265 mutex_exit(&spa->spa_async_lock);
9268 zthr_t *condense_thread = spa->spa_condense_zthr;
9272 zthr_t *raidz_expand_thread = spa->spa_raidz_expand_zthr;
9276 zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr;
9280 zthr_t *ll_delete_thread = spa->spa_livelist_delete_zthr;
9284 zthr_t *ll_condense_thread = spa->spa_livelist_condense_zthr;
9296 non_config_tasks = spa->spa_async_tasks & ~SPA_ASYNC_CONFIG_UPDATE;
9297 config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE;
9298 if (spa->spa_ccw_fail_time == 0) {
9302 (gethrtime() - spa->spa_ccw_fail_time) <
9312 mutex_enter(&spa->spa_async_lock);
9314 !spa->spa_async_suspended &&
9315 spa->spa_async_thread == NULL)
9316 spa->spa_async_thread = thread_create(NULL, 0,
9318 mutex_exit(&spa->spa_async_lock);
9324 zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task);
9325 mutex_enter(&spa->spa_async_lock);
9326 spa->spa_async_tasks |= task;
9327 mutex_exit(&spa->spa_async_lock);
9333 return (spa->spa_async_tasks);
9369 zio_nowait(zio_free_sync(pio, pio->io_spa, dmu_tx_get_txg(tx), bp,
9370 pio->io_flags));
9378 ASSERT(!bp_freed);
9408 * would be considered a no-op as spa_deferred_bpobj should
9412 * immediately) for the edge-case scenario where we just
9417 VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj,
9435 * saves us a pre-read to get data we don't actually care about.
9442 memset(packed + nvsize, 0, bufsize - nvsize);
9444 dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
9448 VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
9450 *(uint64_t *)db->db_data = nvsize;
9462 if (!sav->sav_sync)
9470 if (sav->sav_object == 0) {
9471 sav->sav_object = dmu_object_alloc(spa->spa_meta_objset,
9474 VERIFY(zap_update(spa->spa_meta_objset,
9476 &sav->sav_object, tx) == 0);
9480 if (sav->sav_count == 0) {
9484 list = kmem_alloc(sav->sav_count*sizeof (void *), KM_SLEEP);
9485 for (i = 0; i < sav->sav_count; i++)
9486 list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
9489 (const nvlist_t * const *)list, sav->sav_count);
9490 for (i = 0; i < sav->sav_count; i++)
9492 kmem_free(list, sav->sav_count * sizeof (void *));
9495 spa_sync_nvlist(spa, sav->sav_object, nvroot, tx);
9498 sav->sav_sync = B_FALSE;
9502 * Rebuild spa's all-vdev ZAP from the vdev ZAPs indicated in each vdev_t.
9503 * The all-vdev ZAP must be empty.
9508 spa_t *spa = vd->vdev_spa;
9510 if (vd->vdev_root_zap != 0 &&
9512 VERIFY0(zap_add_int(spa->spa_meta_objset, avz,
9513 vd->vdev_root_zap, tx));
9515 if (vd->vdev_top_zap != 0) {
9516 VERIFY0(zap_add_int(spa->spa_meta_objset, avz,
9517 vd->vdev_top_zap, tx));
9519 if (vd->vdev_leaf_zap != 0) {
9520 VERIFY0(zap_add_int(spa->spa_meta_objset, avz,
9521 vd->vdev_leaf_zap, tx));
9523 for (uint64_t i = 0; i < vd->vdev_children; i++) {
9524 spa_avz_build(vd->vdev_child[i], avz, tx);
9534 * If the pool is being imported from a pre-per-vdev-ZAP version of ZFS,
9535 * its config may not be dirty but we still need to build per-vdev ZAPs.
9539 if (list_is_empty(&spa->spa_config_dirty_list) &&
9540 spa->spa_avz_action == AVZ_ACTION_NONE)
9545 ASSERT(spa->spa_avz_action == AVZ_ACTION_NONE ||
9546 spa->spa_avz_action == AVZ_ACTION_INITIALIZE ||
9547 spa->spa_all_vdev_zaps != 0);
9549 if (spa->spa_avz_action == AVZ_ACTION_REBUILD) {
9551 uint64_t new_avz = zap_create(spa->spa_meta_objset,
9553 spa_avz_build(spa->spa_root_vdev, new_avz, tx);
9559 for (zap_cursor_init(&zc, spa->spa_meta_objset,
9560 spa->spa_all_vdev_zaps);
9563 uint64_t vdzap = za->za_first_integer;
9564 if (zap_lookup_int(spa->spa_meta_objset, new_avz,
9570 VERIFY0(zap_destroy(spa->spa_meta_objset, vdzap,
9579 VERIFY0(zap_destroy(spa->spa_meta_objset,
9580 spa->spa_all_vdev_zaps, tx));
9583 VERIFY0(zap_update(spa->spa_meta_objset,
9587 spa->spa_all_vdev_zaps = new_avz;
9588 } else if (spa->spa_avz_action == AVZ_ACTION_DESTROY) {
9593 for (zap_cursor_init(&zc, spa->spa_meta_objset,
9594 spa->spa_all_vdev_zaps);
9597 uint64_t zap = za->za_first_integer;
9598 VERIFY0(zap_destroy(spa->spa_meta_objset, zap, tx));
9605 VERIFY0(zap_destroy(spa->spa_meta_objset,
9606 spa->spa_all_vdev_zaps, tx));
9607 VERIFY0(zap_remove(spa->spa_meta_objset,
9609 spa->spa_all_vdev_zaps = 0;
9612 if (spa->spa_all_vdev_zaps == 0) {
9613 spa->spa_all_vdev_zaps = zap_create_link(spa->spa_meta_objset,
9617 spa->spa_avz_action = AVZ_ACTION_NONE;
9620 vdev_construct_zaps(spa->spa_root_vdev, tx);
9622 config = spa_config_generate(spa, spa->spa_root_vdev,
9629 if (spa->spa_ubsync.ub_version < spa->spa_uberblock.ub_version)
9631 spa->spa_uberblock.ub_version);
9635 nvlist_free(spa->spa_config_syncing);
9636 spa->spa_config_syncing = config;
9638 spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
9646 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
9651 ASSERT(tx->tx_txg != TXG_INITIAL);
9653 ASSERT(SPA_VERSION_IS_SUPPORTED(version));
9654 ASSERT(version >= spa_version(spa));
9656 spa->spa_uberblock.ub_version = version;
9657 vdev_config_dirty(spa->spa_root_vdev);
9669 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
9670 objset_t *mos = spa->spa_meta_objset;
9673 mutex_enter(&spa->spa_props_lock);
9696 * 'altroot' is a non-persistent property. It should
9699 ASSERT(spa->spa_root != NULL);
9705 * 'readonly' and 'cachefile' are also non-persistent
9711 if (spa->spa_comment != NULL)
9712 spa_strfree(spa->spa_comment);
9713 spa->spa_comment = spa_strdup(strval);
9722 if (tx->tx_txg != TXG_INITIAL) {
9723 vdev_config_dirty(spa->spa_root_vdev);
9731 if (spa->spa_compatibility != NULL)
9732 spa_strfree(spa->spa_compatibility);
9733 spa->spa_compatibility = spa_strdup(strval);
9737 if (tx->tx_txg != TXG_INITIAL) {
9738 vdev_config_dirty(spa->spa_root_vdev);
9756 ASSERT(zpool_prop_feature(elemname));
9764 if (spa->spa_pool_props_object == 0) {
9765 spa->spa_pool_props_object =
9781 ASSERT(proptype == PROP_TYPE_STRING);
9786 spa->spa_pool_props_object,
9790 spa->spa_pool_props_object,
9805 spa->spa_pool_props_object, propname,
9813 spa->spa_delegation = intval;
9816 spa->spa_bootfs = intval;
9819 spa->spa_failmode = intval;
9822 spa->spa_autotrim = intval;
9827 spa->spa_autoexpand = intval;
9828 if (tx->tx_txg != TXG_INITIAL)
9833 spa->spa_multihost = intval;
9836 spa->spa_dedup_table_quota = intval;
9842 ASSERT(0); /* not allowed */
9848 mutex_exit(&spa->spa_props_lock);
9852 * Perform one-time upgrade on-disk changes. spa_version() does not
9864 dsl_pool_t *dp = spa->spa_dsl_pool;
9865 rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
9867 if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
9868 spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
9872 spa->spa_minref += 3;
9875 if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES &&
9876 spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) {
9880 if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES &&
9881 spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) {
9885 spa->spa_minref += 3;
9888 if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES &&
9889 spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
9899 if (spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
9914 if (zap_contains(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
9916 VERIFY0(zap_add(spa->spa_meta_objset,
9918 sizeof (spa->spa_cksum_salt.zcs_bytes),
9919 spa->spa_cksum_salt.zcs_bytes, tx));
9922 rrw_exit(&dp->dp_config_rwlock, FTAG);
9928 vdev_indirect_mapping_t *vim __maybe_unused = vd->vdev_indirect_mapping;
9929 vdev_indirect_births_t *vib __maybe_unused = vd->vdev_indirect_births;
9931 if (vd->vdev_ops == &vdev_indirect_ops) {
9932 ASSERT(vim != NULL);
9933 ASSERT(vib != NULL);
9939 ASSERT(vd->vdev_obsolete_sm != NULL);
9940 ASSERT(vd->vdev_removing ||
9941 vd->vdev_ops == &vdev_indirect_ops);
9942 ASSERT(vdev_indirect_mapping_num_entries(vim) > 0);
9943 ASSERT(vdev_indirect_mapping_bytes_mapped(vim) > 0);
9945 space_map_object(vd->vdev_obsolete_sm));
9947 space_map_allocated(vd->vdev_obsolete_sm));
9949 ASSERT(vd->vdev_obsolete_segments != NULL);
9956 ASSERT0(zfs_range_tree_space(vd->vdev_obsolete_segments));
9960 * Set the top-level vdev's max queue depth. Evaluate each top-level's
9967 ASSERT(spa_writeable(spa));
9977 ASSERT(spa_writeable(spa));
9979 vdev_t *rvd = spa->spa_root_vdev;
9980 for (int c = 0; c < rvd->vdev_children; c++) {
9981 vdev_t *vd = rvd->vdev_child[c];
9994 objset_t *mos = spa->spa_meta_objset;
9995 dsl_pool_t *dp = spa->spa_dsl_pool;
9996 uint64_t txg = tx->tx_txg;
9997 bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
10000 int pass = ++spa->spa_sync_pass;
10003 spa_sync_aux_dev(spa, &spa->spa_spares, tx,
10005 spa_sync_aux_dev(spa, &spa->spa_l2cache, tx,
10026 &spa->spa_deferred_bpobj, tx);
10039 while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
10045 * dsl_pool_sync() -> dp_sync_tasks may have dirtied
10047 * be a no-op. So we must sync the config to the MOS
10048 * before checking for no-op.
10074 BP_GET_LOGICAL_BIRTH(&spa->spa_uberblock.ub_rootbp) < txg &&
10078 * TXG is a no-op. Avoid syncing deferred frees, so
10079 * that we can keep this TXG as a no-op.
10081 ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
10082 ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
10083 ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg));
10084 ASSERT(txg_list_empty(&dp->dp_early_sync_tasks, txg));
10097 * top-level vdevs that are known to be visible in the config cache
10104 vdev_t *rvd = spa->spa_root_vdev;
10105 uint64_t txg = tx->tx_txg;
10116 if (list_is_empty(&spa->spa_config_dirty_list)) {
10119 int children = rvd->vdev_children;
10124 rvd->vdev_child[(c0 + c) % children];
10130 if (vd->vdev_ms_array == 0 ||
10131 vd->vdev_islog ||
10141 error = vdev_config_sync(rvd->vdev_child,
10142 rvd->vdev_children, txg);
10146 spa->spa_last_synced_guid = rvd->vdev_guid;
10172 (void) zio_wait(spa->spa_txg_zio[txg & TXG_MASK]);
10173 spa->spa_txg_zio[txg & TXG_MASK] = zio_root(spa, NULL, NULL,
10188 spa->spa_syncing_txg = txg;
10189 spa->spa_sync_pass = 0;
10196 while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
10198 if (vd->vdev_aux == NULL) {
10212 while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
10221 dsl_pool_t *dp = spa->spa_dsl_pool;
10224 spa->spa_sync_starttime = gethrtime();
10225 taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
10226 spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
10228 NSEC_TO_TICK(spa->spa_deadman_synctime));
10232 * set spa_deflate if we have no raid-z vdevs.
10234 if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE &&
10235 spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) {
10236 vdev_t *rvd = spa->spa_root_vdev;
10239 for (i = 0; i < rvd->vdev_children; i++) {
10240 vd = rvd->vdev_child[i];
10241 if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
10244 if (i == rvd->vdev_children) {
10245 spa->spa_deflate = TRUE;
10246 VERIFY0(zap_add(spa->spa_meta_objset,
10248 sizeof (uint64_t), 1, &spa->spa_deflate, tx));
10259 if (!list_is_empty(&spa->spa_config_dirty_list)) {
10262 * the number of ZAPs in the per-vdev ZAP list. This only gets
10268 ASSERT0(zap_count(spa->spa_meta_objset,
10269 spa->spa_all_vdev_zaps, &all_vdev_zap_entry_count));
10270 ASSERT3U(vdev_count_verify_zaps(spa->spa_root_vdev), ==,
10275 if (spa->spa_vdev_removal != NULL) {
10276 ASSERT0(spa->spa_vdev_removal->svr_bytes_done[txg & TXG_MASK]);
10282 taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
10283 spa->spa_deadman_tqid = 0;
10288 while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL)
10295 if (spa->spa_config_syncing != NULL) {
10296 spa_config_set(spa, spa->spa_config_syncing);
10297 spa->spa_config_txg = txg;
10298 spa->spa_config_syncing = NULL;
10306 while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
10310 metaslab_class_evict_old(spa->spa_normal_class, txg);
10311 metaslab_class_evict_old(spa->spa_log_class, txg);
10313 metaslab_class_evict_old(spa->spa_special_class, txg);
10314 metaslab_class_evict_old(spa->spa_dedup_class, txg);
10327 ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
10328 ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
10329 ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg));
10334 spa->spa_sync_pass = 0;
10342 spa->spa_ubsync = spa->spa_uberblock;
10381 ASSERT(spa->spa_sync_tq == NULL);
10382 ASSERT3S(spa->spa_alloc_count, <=, boot_ncpus);
10385 * - do not allow more allocators than cpus.
10386 * - there may be more cpus than allocators.
10387 * - do not allow more sync taskq threads than allocators or cpus.
10389 int nthreads = spa->spa_alloc_count;
10390 spa->spa_syncthreads = kmem_zalloc(sizeof (spa_syncthread_info_t) *
10393 spa->spa_sync_tq = taskq_create_synced(name, nthreads, minclsyspri,
10395 VERIFY(spa->spa_sync_tq != NULL);
10398 spa_syncthread_info_t *ti = spa->spa_syncthreads;
10400 ti->sti_thread = kthreads[i];
10401 ti->sti_allocator = i;
10405 return (spa->spa_sync_tq);
10411 ASSERT(spa->spa_sync_tq != NULL);
10413 taskq_wait(spa->spa_sync_tq);
10414 taskq_destroy(spa->spa_sync_tq);
10415 kmem_free(spa->spa_syncthreads,
10416 sizeof (spa_syncthread_info_t) * spa->spa_alloc_count);
10417 spa->spa_sync_tq = NULL;
10425 if (spa->spa_alloc_count == 1)
10428 mutex_enter(&spa->spa_allocs_use->sau_lock);
10429 uint_t r = spa->spa_allocs_use->sau_rotor;
10431 if (++r == spa->spa_alloc_count)
10433 } while (spa->spa_allocs_use->sau_inuse[r]);
10434 spa->spa_allocs_use->sau_inuse[r] = B_TRUE;
10435 spa->spa_allocs_use->sau_rotor = r;
10436 mutex_exit(&spa->spa_allocs_use->sau_lock);
10438 spa_syncthread_info_t *ti = spa->spa_syncthreads;
10439 for (i = 0; i < spa->spa_alloc_count; i++, ti++) {
10440 if (ti->sti_thread == curthread) {
10441 ti->sti_allocator = r;
10445 ASSERT3S(i, <, spa->spa_alloc_count);
10452 if (spa->spa_alloc_count > 1)
10453 spa->spa_allocs_use->sau_inuse[allocator] = B_FALSE;
10459 zbookmark_phys_t *bm = &zio->io_bookmark;
10460 spa_t *spa = zio->io_spa;
10462 ASSERT(zio->io_type == ZIO_TYPE_WRITE);
10471 ASSERT(spa != NULL);
10472 ASSERT(bm != NULL);
10479 if (spa->spa_sync_tq != NULL) {
10480 spa_syncthread_info_t *ti = spa->spa_syncthreads;
10481 for (int i = 0; i < spa->spa_alloc_count; i++, ti++) {
10482 if (ti->sti_thread == curthread) {
10483 zio->io_allocator = ti->sti_allocator;
10496 uint64_t hv = cityhash4(bm->zb_objset, bm->zb_object, bm->zb_level,
10497 bm->zb_blkid >> 20);
10499 zio->io_allocator = (uint_t)hv % spa->spa_alloc_count;
10533 if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
10548 if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL)
10552 for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
10553 vd = spa->spa_l2cache.sav_vdevs[i];
10554 if (vd->vdev_guid == guid)
10558 for (i = 0; i < spa->spa_spares.sav_count; i++) {
10559 vd = spa->spa_spares.sav_vdevs[i];
10560 if (vd->vdev_guid == guid)
10571 ASSERT(spa_writeable(spa));
10576 * This should only be called for a non-faulted pool, and since a
10580 ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version));
10581 ASSERT3U(version, >=, spa->spa_uberblock.ub_version);
10583 spa->spa_uberblock.ub_version = version;
10584 vdev_config_dirty(spa->spa_root_vdev);
10598 for (i = 0; i < sav->sav_count; i++)
10599 if (sav->sav_vdevs[i]->vdev_guid == guid)
10602 for (i = 0; i < sav->sav_npending; i++) {
10603 if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID,
10614 return (spa_has_aux_vdev(spa, guid, &spa->spa_l2cache));
10620 return (spa_has_aux_vdev(spa, guid, &spa->spa_spares));
10632 spa_aux_vdev_t *sav = &spa->spa_spares;
10634 for (i = 0; i < sav->sav_count; i++) {
10635 if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool,
10647 vdev_t *rvd = spa->spa_root_vdev;
10650 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
10651 vdev_t *vd = rvd->vdev_child[c];
10654 m += vd->vdev_ms_count;
10660 * Notify any waiting threads that some activity has switched from being in-
10661 * progress to not-in-progress so that the thread can wake up and determine
10671 mutex_enter(&spa->spa_activities_lock);
10672 cv_broadcast(&spa->spa_activities_cv);
10673 mutex_exit(&spa->spa_activities_lock);
10683 mutex_enter(&spa->spa_activities_lock);
10684 spa->spa_waiters_cancel = B_TRUE;
10685 cv_broadcast(&spa->spa_activities_cv);
10686 while (spa->spa_waiters != 0)
10687 cv_wait(&spa->spa_waiters_cv, &spa->spa_activities_lock);
10688 spa->spa_waiters_cancel = B_FALSE;
10689 mutex_exit(&spa->spa_activities_lock);
10696 spa_t *spa = vd->vdev_spa;
10698 ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER));
10699 ASSERT(MUTEX_HELD(&spa->spa_activities_lock));
10700 ASSERT(activity == ZPOOL_WAIT_INITIALIZE ||
10704 &vd->vdev_initialize_lock : &vd->vdev_trim_lock;
10706 mutex_exit(&spa->spa_activities_lock);
10708 mutex_enter(&spa->spa_activities_lock);
10711 (vd->vdev_initialize_state == VDEV_INITIALIZE_ACTIVE) :
10712 (vd->vdev_trim_state == VDEV_TRIM_ACTIVE);
10718 for (int i = 0; i < vd->vdev_children; i++) {
10719 if (spa_vdev_activity_in_progress_impl(vd->vdev_child[i],
10737 mutex_exit(&spa->spa_activities_lock);
10739 mutex_enter(&spa->spa_activities_lock);
10744 if (vd == NULL || !vd->vdev_ops->vdev_op_leaf) {
10749 vd = spa->spa_root_vdev;
10760 * ---------------------------
10764 * in-memory representation of the relevant on-disk state which can be used to
10765 * determine whether or not the activity is in progress. The in-memory state and
10771 * When the state is checked, both the activity-specific lock (if there is one)
10772 * and spa_activities_lock are held. In some cases, the activity-specific lock
10775 * thread releases the activity-specific lock and, if the activity is in
10781 * needs to hold its activity-specific lock when updating the state, and this
10792 * activity-specific lock. The order in which spa_activities_lock and the
10795 * completing thread calls spa_notify_waiters with the activity-specific lock
10796 * held, then the waiting thread must also acquire the activity-specific lock
10806 ASSERT(MUTEX_HELD(&spa->spa_activities_lock));
10818 !bpobj_is_empty(&spa->spa_dsl_pool->dp_free_bpobj)) ||
10828 mutex_exit(&spa->spa_activities_lock);
10830 mutex_enter(&spa->spa_activities_lock);
10832 *in_progress = vdev_replace_in_progress(spa->spa_root_vdev);
10836 *in_progress = (spa->spa_removing_phys.sr_state ==
10840 *in_progress = vdev_rebuild_active(spa->spa_root_vdev);
10847 dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
10849 is_scrub = (scn->scn_phys.scn_func == POOL_SCAN_SCRUB);
10850 scanning = (scn->scn_phys.scn_state == DSS_SCANNING);
10858 vdev_raidz_expand_t *vre = spa->spa_raidz_expand;
10859 *in_progress = (vre != NULL && vre->vre_state == DSS_SCANNING);
10903 mutex_enter(&spa->spa_activities_lock);
10904 spa->spa_waiters++;
10913 if (error || !in_progress || spa->spa_waiters_cancel)
10918 if (cv_wait_sig(&spa->spa_activities_cv,
10919 &spa->spa_activities_lock) == 0) {
10925 spa->spa_waiters--;
10926 cv_signal(&spa->spa_waiters_cv);
10927 mutex_exit(&spa->spa_activities_lock);
10963 ev->resource = resource;
10976 zfs_zevent_post(ev->resource, NULL, zfs_zevent_post_cb);
11076 "Allow importing pool with up to this number of missing top-level "
11077 "vdevs (in read-only mode)");