Lines Matching +full:max +full:- +full:rt

1 // SPDX-License-Identifier: CDDL-1.0
10 * or https://opensource.org/licenses/CDDL-1.0.
45 * DMU to keep more data in-core, and also to waste more I/O bandwidth
50 * Enabled whenever we want to stress test the use of double-word
59 * or two indirect blocks (16K-32K, rather than 128K).
83 * Iterate through the space map, invoking the callback on each (non-debug)
89 uint64_t blksz = sm->sm_blksz; in space_map_iterate()
95 dmu_prefetch(sm->sm_os, space_map_object(sm), 0, 0, end, in space_map_iterate()
103 error = dmu_buf_hold(sm->sm_os, space_map_object(sm), in space_map_iterate()
108 uint64_t *block_start = db->db_data; in space_map_iterate()
109 uint64_t block_length = MIN(end - block_base, blksz); in space_map_iterate()
115 ASSERT3U(blksz, ==, db->db_size); in space_map_iterate()
128 * at the end of space map blocks in-order in space_map_iterate()
129 * to not split a double-word entry in the in space_map_iterate()
153 /* it is a two-word entry */ in space_map_iterate()
167 uint64_t entry_offset = (raw_offset << sm->sm_shift) + in space_map_iterate()
168 sm->sm_start; in space_map_iterate()
169 uint64_t entry_run = raw_run << sm->sm_shift; in space_map_iterate()
171 VERIFY0(P2PHASE(entry_offset, 1ULL << sm->sm_shift)); in space_map_iterate()
172 VERIFY0(P2PHASE(entry_run, 1ULL << sm->sm_shift)); in space_map_iterate()
173 ASSERT3U(entry_offset, >=, sm->sm_start); in space_map_iterate()
174 ASSERT3U(entry_offset, <, sm->sm_start + sm->sm_size); in space_map_iterate()
175 ASSERT3U(entry_run, <=, sm->sm_size); in space_map_iterate()
177 sm->sm_start + sm->sm_size); in space_map_iterate()
215 sm->sm_phys->smp_length - sizeof (uint64_t); in space_map_reversed_last_block_entries()
216 error = dmu_buf_hold(sm->sm_os, space_map_object(sm), last_word_offset, in space_map_reversed_last_block_entries()
221 ASSERT3U(sm->sm_object, ==, db->db_object); in space_map_reversed_last_block_entries()
222 ASSERT3U(sm->sm_blksz, ==, db->db_size); in space_map_reversed_last_block_entries()
223 ASSERT3U(bufsz, >=, db->db_size); in space_map_reversed_last_block_entries()
226 uint64_t *words = db->db_data; in space_map_reversed_last_block_entries()
228 (sm->sm_phys->smp_length - db->db_offset) / sizeof (uint64_t); in space_map_reversed_last_block_entries()
233 uint64_t j = n - 1; in space_map_reversed_last_block_entries()
240 * words of the double-word entry in the right in space_map_reversed_last_block_entries()
244 buf[j - 1] = entry; in space_map_reversed_last_block_entries()
250 j -= 2; in space_map_reversed_last_block_entries()
255 j--; in space_map_reversed_last_block_entries()
263 ASSERT3S(j, ==, -1); in space_map_reversed_last_block_entries()
270 * Note: This function performs destructive actions - specifically
279 uint64_t bufsz = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE); in space_map_incremental_destroy()
282 dmu_buf_will_dirty(sm->sm_dbuf, tx); in space_map_incremental_destroy()
287 * approach is that we don't have any field on-disk that points in space_map_incremental_destroy()
297 * can't distinguish two-word space map entries from their second in space_map_incremental_destroy()
327 sm->sm_phys->smp_length -= sizeof (uint64_t); in space_map_incremental_destroy()
357 (raw_offset << sm->sm_shift) + sm->sm_start; in space_map_incremental_destroy()
358 uint64_t entry_run = raw_run << sm->sm_shift; in space_map_incremental_destroy()
360 VERIFY0(P2PHASE(entry_offset, 1ULL << sm->sm_shift)); in space_map_incremental_destroy()
361 VERIFY0(P2PHASE(entry_run, 1ULL << sm->sm_shift)); in space_map_incremental_destroy()
362 VERIFY3U(entry_offset, >=, sm->sm_start); in space_map_incremental_destroy()
363 VERIFY3U(entry_offset, <, sm->sm_start + sm->sm_size); in space_map_incremental_destroy()
364 VERIFY3U(entry_run, <=, sm->sm_size); in space_map_incremental_destroy()
366 sm->sm_start + sm->sm_size); in space_map_incremental_destroy()
379 sm->sm_phys->smp_alloc -= entry_run; in space_map_incremental_destroy()
381 sm->sm_phys->smp_alloc += entry_run; in space_map_incremental_destroy()
382 sm->sm_phys->smp_length -= words * sizeof (uint64_t); in space_map_incremental_destroy()
405 if (sme->sme_type == smla->smla_type) { in space_map_load_callback()
406 VERIFY3U(zfs_range_tree_space(smla->smla_rt) + sme->sme_run, <=, in space_map_load_callback()
407 smla->smla_sm->sm_size); in space_map_load_callback()
408 zfs_range_tree_add(smla->smla_rt, sme->sme_offset, in space_map_load_callback()
409 sme->sme_run); in space_map_load_callback()
411 zfs_range_tree_remove(smla->smla_rt, sme->sme_offset, in space_map_load_callback()
412 sme->sme_run); in space_map_load_callback()
423 space_map_load_length(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype, in space_map_load_length() argument
428 VERIFY0(zfs_range_tree_space(rt)); in space_map_load_length()
431 zfs_range_tree_add(rt, sm->sm_start, sm->sm_size); in space_map_load_length()
433 smla.smla_rt = rt; in space_map_load_length()
440 zfs_range_tree_vacate(rt, NULL, NULL); in space_map_load_length()
450 space_map_load(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype) in space_map_load() argument
452 return (space_map_load_length(sm, rt, maptype, space_map_length(sm))); in space_map_load()
458 if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t)) in space_map_histogram_clear()
461 memset(sm->sm_phys->smp_histogram, 0, in space_map_histogram_clear()
462 sizeof (sm->sm_phys->smp_histogram)); in space_map_histogram_clear()
466 space_map_histogram_verify(space_map_t *sm, zfs_range_tree_t *rt) in space_map_histogram_verify() argument
469 * Verify that the in-core range tree does not have any in space_map_histogram_verify()
472 for (int i = 0; i < sm->sm_shift; i++) { in space_map_histogram_verify()
473 if (rt->rt_histogram[i] != 0) in space_map_histogram_verify()
480 space_map_histogram_add(space_map_t *sm, zfs_range_tree_t *rt, dmu_tx_t *tx) in space_map_histogram_add() argument
487 if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t)) in space_map_histogram_add()
490 dmu_buf_will_dirty(sm->sm_dbuf, tx); in space_map_histogram_add()
492 ASSERT(space_map_histogram_verify(sm, rt)); in space_map_histogram_add()
496 * between 2^sm_shift to 2^(32+sm_shift-1). The range tree, in space_map_histogram_add()
501 for (int i = sm->sm_shift; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++) { in space_map_histogram_add()
505 * 2^(32+sm_shift-1), we need to normalize the values in in space_map_histogram_add()
512 ASSERT3U(i, >=, idx + sm->sm_shift); in space_map_histogram_add()
513 sm->sm_phys->smp_histogram[idx] += in space_map_histogram_add()
514 rt->rt_histogram[i] << (i - idx - sm->sm_shift); in space_map_histogram_add()
519 * larger than the max bucket size into the last bucket. in space_map_histogram_add()
521 if (idx < SPACE_MAP_HISTOGRAM_SIZE - 1) { in space_map_histogram_add()
522 ASSERT3U(idx + sm->sm_shift, ==, i); in space_map_histogram_add()
532 dmu_buf_will_dirty(sm->sm_dbuf, tx); in space_map_write_intro_debug()
536 SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(tx->tx_pool->dp_spa)) | in space_map_write_intro_debug()
539 dmu_write(sm->sm_os, space_map_object(sm), sm->sm_phys->smp_length, in space_map_write_intro_debug()
542 sm->sm_phys->smp_length += sizeof (dentry); in space_map_write_intro_debug()
570 ASSERT3U(db->db_size, ==, sm->sm_blksz); in space_map_write_seg()
572 uint64_t *block_base = db->db_data; in space_map_write_seg()
573 uint64_t *block_end = block_base + (sm->sm_blksz / sizeof (uint64_t)); in space_map_write_seg()
575 (sm->sm_phys->smp_length - db->db_offset) / sizeof (uint64_t); in space_map_write_seg()
579 uint64_t size = (rend - rstart) >> sm->sm_shift; in space_map_write_seg()
580 uint64_t start = (rstart - sm->sm_start) >> sm->sm_shift; in space_map_write_seg()
583 ASSERT3U(rstart, >=, sm->sm_start); in space_map_write_seg()
584 ASSERT3U(rstart, <, sm->sm_start + sm->sm_size); in space_map_write_seg()
585 ASSERT3U(rend - rstart, <=, sm->sm_size); in space_map_write_seg()
586 ASSERT3U(rend, <=, sm->sm_start + sm->sm_size); in space_map_write_seg()
598 uint64_t next_word_offset = sm->sm_phys->smp_length; in space_map_write_seg()
599 VERIFY0(dmu_buf_hold(sm->sm_os, in space_map_write_seg()
607 ASSERT3U(db->db_size, ==, sm->sm_blksz); in space_map_write_seg()
609 block_base = db->db_data; in space_map_write_seg()
612 (db->db_size / sizeof (uint64_t)); in space_map_write_seg()
616 * If we are writing a two-word entry and we only have one in space_map_write_seg()
618 * entry and write the two-word entry in the next block. in space_map_write_seg()
628 sm->sm_phys->smp_length += sizeof (uint64_t); in space_map_write_seg()
655 panic("%d-word space map entries are not supported", in space_map_write_seg()
659 sm->sm_phys->smp_length += words * sizeof (uint64_t); in space_map_write_seg()
662 size -= run_len; in space_map_write_seg()
673 space_map_write_impl(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype, in space_map_write_impl() argument
676 spa_t *spa = tx->tx_pool->dp_spa; in space_map_write_impl()
686 uint64_t initial_objsize = sm->sm_phys->smp_length; in space_map_write_impl()
688 space_map_estimate_optimal_size(sm, rt, SM_NO_VDEVID); in space_map_write_impl()
697 uint64_t next_word_offset = sm->sm_phys->smp_length; in space_map_write_impl()
698 VERIFY0(dmu_buf_hold(sm->sm_os, space_map_object(sm), in space_map_write_impl()
700 ASSERT3U(db->db_size, ==, sm->sm_blksz); in space_map_write_impl()
704 zfs_btree_t *t = &rt->rt_root; in space_map_write_impl()
708 uint64_t offset = (zfs_rs_get_start(rs, rt) - sm->sm_start) >> in space_map_write_impl()
709 sm->sm_shift; in space_map_write_impl()
710 uint64_t length = (zfs_rs_get_end(rs, rt) - in space_map_write_impl()
711 zfs_rs_get_start(rs, rt)) >> sm->sm_shift; in space_map_write_impl()
715 * We only write two-word entries when both of the following in space_map_write_impl()
719 * [2] The offset or run is too big for a single-word entry, in space_map_write_impl()
724 * we write two-word entries occasionally when the feature is in space_map_write_impl()
736 space_map_write_seg(sm, zfs_rs_get_start(rs, rt), in space_map_write_impl()
737 zfs_rs_get_end(rs, rt), maptype, vdev_id, words, &db, in space_map_write_impl()
750 ASSERT3U(estimated_final_objsize, >=, sm->sm_phys->smp_length); in space_map_write_impl()
760 space_map_write(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype, in space_map_write() argument
763 ASSERT(dsl_pool_sync_context(dmu_objset_pool(sm->sm_os))); in space_map_write()
766 dmu_buf_will_dirty(sm->sm_dbuf, tx); in space_map_write()
769 * This field is no longer necessary since the in-core space map in space_map_write()
773 sm->sm_phys->smp_object = sm->sm_object; in space_map_write()
775 if (zfs_range_tree_is_empty(rt)) { in space_map_write()
776 VERIFY3U(sm->sm_object, ==, sm->sm_phys->smp_object); in space_map_write()
781 sm->sm_phys->smp_alloc += zfs_range_tree_space(rt); in space_map_write()
783 sm->sm_phys->smp_alloc -= zfs_range_tree_space(rt); in space_map_write()
785 uint64_t nodes = zfs_btree_numnodes(&rt->rt_root); in space_map_write()
786 uint64_t rt_space = zfs_range_tree_space(rt); in space_map_write()
788 space_map_write_impl(sm, rt, maptype, vdev_id, tx); in space_map_write()
794 VERIFY3U(nodes, ==, zfs_btree_numnodes(&rt->rt_root)); in space_map_write()
795 VERIFY3U(zfs_range_tree_space(rt), ==, rt_space); in space_map_write()
804 error = dmu_bonus_hold(sm->sm_os, sm->sm_object, sm, &sm->sm_dbuf); in space_map_open_impl()
808 dmu_object_size_from_db(sm->sm_dbuf, &sm->sm_blksz, &blocks); in space_map_open_impl()
809 sm->sm_phys = sm->sm_dbuf->db_data; in space_map_open_impl()
826 sm->sm_start = start; in space_map_open()
827 sm->sm_size = size; in space_map_open()
828 sm->sm_shift = shift; in space_map_open()
829 sm->sm_os = os; in space_map_open()
830 sm->sm_object = object; in space_map_open()
831 sm->sm_blksz = 0; in space_map_open()
832 sm->sm_dbuf = NULL; in space_map_open()
833 sm->sm_phys = NULL; in space_map_open()
851 if (sm->sm_dbuf != NULL) in space_map_close()
852 dmu_buf_rele(sm->sm_dbuf, sm); in space_map_close()
853 sm->sm_dbuf = NULL; in space_map_close()
854 sm->sm_phys = NULL; in space_map_close()
862 objset_t *os = sm->sm_os; in space_map_truncate()
870 dmu_object_info_from_db(sm->sm_dbuf, &doi); in space_map_truncate()
876 * free and re-allocate its object with the updated sizes. in space_map_truncate()
887 (u_longlong_t)sm->sm_object, in space_map_truncate()
892 dmu_buf_rele(sm->sm_dbuf, sm); in space_map_truncate()
894 sm->sm_object = space_map_alloc(sm->sm_os, blocksize, tx); in space_map_truncate()
897 VERIFY0(dmu_free_range(os, space_map_object(sm), 0, -1ULL, tx)); in space_map_truncate()
904 memset(sm->sm_phys->smp_histogram, 0, in space_map_truncate()
905 sizeof (sm->sm_phys->smp_histogram)); in space_map_truncate()
908 dmu_buf_will_dirty(sm->sm_dbuf, tx); in space_map_truncate()
909 sm->sm_phys->smp_length = 0; in space_map_truncate()
910 sm->sm_phys->smp_alloc = 0; in space_map_truncate()
957 space_map_free_obj(sm->sm_os, space_map_object(sm), tx); in space_map_free()
958 sm->sm_object = 0; in space_map_free()
962 * Given a range tree, it makes a worst-case estimate of how much
967 space_map_estimate_optimal_size(space_map_t *sm, zfs_range_tree_t *rt, in space_map_estimate_optimal_size() argument
970 spa_t *spa = dmu_objset_spa(sm->sm_os); in space_map_estimate_optimal_size()
971 uint64_t shift = sm->sm_shift; in space_map_estimate_optimal_size()
972 uint64_t *histogram = rt->rt_histogram; in space_map_estimate_optimal_size()
977 * range tree would have on-disk as a space map, we iterate through in space_map_estimate_optimal_size()
980 * Note that this is a highest-bound/worst-case estimate for the in space_map_estimate_optimal_size()
985 * of a block attempting to write a two-word entry. in space_map_estimate_optimal_size()
994 * of entries in [2^i, (2^(i+1))-1] of that range_tree. Given in space_map_estimate_optimal_size()
996 * can be represented using a single-word entry, ones that can in space_map_estimate_optimal_size()
997 * be represented with a double-word entry, and ones that can in space_map_estimate_optimal_size()
998 * only be represented with multiple two-word entries. in space_map_estimate_optimal_size()
1001 * are only two groups: single-word entry buckets and multiple in space_map_estimate_optimal_size()
1002 * single-word entry buckets. The information below assumes in space_map_estimate_optimal_size()
1003 * two-word entries enabled, but it can easily applied when in space_map_estimate_optimal_size()
1007 * single-word entry we look at the maximum run that such entry in space_map_estimate_optimal_size()
1011 * maximum run that can be represented by a single-word entry, in space_map_estimate_optimal_size()
1013 * SM_RUN_BITS + shift - 1. in space_map_estimate_optimal_size()
1016 * double-word entry, we follow the same approach. Finally, any in space_map_estimate_optimal_size()
1017 * bucket higher than that are represented with multiple two-word in space_map_estimate_optimal_size()
1019 * segments can be represented with a single two-word entry is X, in space_map_estimate_optimal_size()
1020 * then bucket X+1 will need 2 two-word entries for each of its in space_map_estimate_optimal_size()
1025 * the example with the one-word entry, the maximum run that can in space_map_estimate_optimal_size()
1026 * be represented in a one-word entry 2^(SM_RUN_BITS + shift) is in space_map_estimate_optimal_size()
1027 * not part of bucket SM_RUN_BITS + shift - 1. Thus, segments of in space_map_estimate_optimal_size()
1029 * we start counting two-word entries and this is one more reason in space_map_estimate_optimal_size()
1037 (vdev_id == SM_NO_VDEVID && sm->sm_size < SM_OFFSET_MAX)) { in space_map_estimate_optimal_size()
1041 * assume the worst-case of every single word entry being in space_map_estimate_optimal_size()
1049 uint64_t single_entry_max_bucket = SM_RUN_BITS + shift - 1; in space_map_estimate_optimal_size()
1057 1ULL << (idx - single_entry_max_bucket); in space_map_estimate_optimal_size()
1067 uint64_t double_entry_max_bucket = SM2_RUN_BITS + shift - 1; in space_map_estimate_optimal_size()
1073 entries_for_seg = 1ULL << (idx - double_entry_max_bucket); in space_map_estimate_optimal_size()
1083 size += ((size / sm->sm_blksz) + 1) * sizeof (uint64_t); in space_map_estimate_optimal_size()
1091 return (sm != NULL ? sm->sm_object : 0); in space_map_object()
1097 return (sm != NULL ? sm->sm_phys->smp_alloc : 0); in space_map_allocated()
1103 return (sm != NULL ? sm->sm_phys->smp_length : 0); in space_map_length()
1111 return (DIV_ROUND_UP(space_map_length(sm), sm->sm_blksz)); in space_map_nblocks()