Lines Matching +full:write +full:- +full:1 +full:- +full:bps

1 // SPDX-License-Identifier: CDDL-1.0
10 * or https://opensource.org/licenses/CDDL-1.0.
55 * - Deduplication is automatic and Block Cloning is not - one has to use a
57 * - Deduplication keeps all data blocks in its table, even those referenced
62 * - Deduplication needs data to work - one needs to pass real data to the
63 * write(2) syscall, so hash can be calculated. Block Cloning doesn't require
65 * neither the cost of reading the data, nor the cost of writing the data -
67 * - If the D (dedup) bit is not set in the block pointer, it means that
75 * - The BRT entry is much smaller than the DDT entry - for BRT we only store
77 * - Dedup keys are cryptographic hashes, so two blocks that are close to each
79 * The BRT entry keys are offsets into a single top-level VDEV, so data blocks
81 * - Scrub will only do a single pass over a block that is referenced multiple
86 * - Deduplication requires cryptographically strong hash as a checksum or
93 * is a small number of top-level VDEVs and a large number of blocks stored in
95 * maintaining one BRT for each top-level VDEV, so we can then have only offset
108 * top-level VDEV into 16MB regions. For each region we maintain a counter that
110 * creates the entries count array of 16bit numbers for each top-level VDEV.
112 * same transaction group as the BRT updates to keep everything in-sync. We can
114 * 1TB VDEV the array requires only 128kB of memory (we may decide to decrease
129 * is not yet implemented - for now we will update entire array if there was
142 * references? To avoid this dilemma BRT cooperates with DDT - if a given block
177 * block-aligned or we will return an error so the upper layer can
179 * Using copy_file_range(2) will call OS-independent zfs_clone_range() function.
182 * function from the source file. Once we have BPs from the source file we call
184 * allocates BPs for us. We iterate over all source BPs. If the given BP is
185 * a hole or an embedded block, we just copy BP as-is. If it points to a real
189 * We use this pending list to keep track of all BPs that got new references
193 * - The block we want to clone may have been created within the same
196 * - The block we want to clone may have been modified within the same
198 * - A block may be cloned multiple times during one transaction group (that's
199 * why pending list is actually a tree and not an append-only list - this
202 * - A block may be cloned and freed within the same transaction group
204 * - A block may be cloned and within the same transaction group the clone
206 * - A file might have been deleted, but the caller still has a file descriptor
218 * all the new clones to the BRT table - we load BRT entries and update
220 * function. This function will sync all dirty per-top-level-vdev BRTs,
225 * Every clone operation is divided into chunks (similar to write) and each
227 * how many BPs we can fit into a single ZIL entry.
229 * as when we log clone operations we cannot use the source object - it may
230 * reside on a different dataset, so we log BPs we want to clone.
237 * entries, we will bump reference counters for their BPs in the BRT. Then
251 static int brt_zap_prefetch = 1;
307 #define BRTSTAT_BUMP(stat) wmsum_add(&brt_sums.stat, 1)
315 rw_enter(&spa->spa_brt_lock, RW_READER); in brt_rlock()
321 rw_enter(&spa->spa_brt_lock, RW_WRITER); in brt_wlock()
327 rw_exit(&spa->spa_brt_lock); in brt_unlock()
334 ASSERT3U(idx, <, brtvd->bv_size); in brt_vdev_entcount_get()
336 if (unlikely(brtvd->bv_need_byteswap)) { in brt_vdev_entcount_get()
337 return (BSWAP_16(brtvd->bv_entcount[idx])); in brt_vdev_entcount_get()
339 return (brtvd->bv_entcount[idx]); in brt_vdev_entcount_get()
347 ASSERT3U(idx, <, brtvd->bv_size); in brt_vdev_entcount_set()
349 if (unlikely(brtvd->bv_need_byteswap)) { in brt_vdev_entcount_set()
350 brtvd->bv_entcount[idx] = BSWAP_16(entcnt); in brt_vdev_entcount_set()
352 brtvd->bv_entcount[idx] = entcnt; in brt_vdev_entcount_set()
361 ASSERT3U(idx, <, brtvd->bv_size); in brt_vdev_entcount_inc()
366 brt_vdev_entcount_set(brtvd, idx, entcnt + 1); in brt_vdev_entcount_inc()
374 ASSERT3U(idx, <, brtvd->bv_size); in brt_vdev_entcount_dec()
379 brt_vdev_entcount_set(brtvd, idx, entcnt - 1); in brt_vdev_entcount_dec()
388 uint64_t nblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size); in brt_vdev_dump()
391 (u_longlong_t)brtvd->bv_vdevid, in brt_vdev_dump()
392 brtvd->bv_meta_dirty, brtvd->bv_entcount_dirty, in brt_vdev_dump()
393 (u_longlong_t)brtvd->bv_size, in brt_vdev_dump()
394 (u_longlong_t)brtvd->bv_totalcount, in brt_vdev_dump()
397 if (brtvd->bv_totalcount > 0) { in brt_vdev_dump()
399 for (idx = 0; idx < brtvd->bv_size; idx++) { in brt_vdev_dump()
407 if (brtvd->bv_entcount_dirty) { in brt_vdev_dump()
410 bitmap = kmem_alloc(nblocks + 1, KM_SLEEP); in brt_vdev_dump()
413 BT_TEST(brtvd->bv_bitmap, idx) ? 'x' : '.'; in brt_vdev_dump()
417 kmem_free(bitmap, nblocks + 1); in brt_vdev_dump()
428 if (vdevid < spa->spa_brt_nvdevs) { in brt_vdev()
429 brtvd = spa->spa_brt_vdevs[vdevid]; in brt_vdev()
434 if (vdevid >= spa->spa_brt_nvdevs) in brt_vdev()
435 brt_vdevs_expand(spa, vdevid + 1); in brt_vdev()
436 brtvd = spa->spa_brt_vdevs[vdevid]; in brt_vdev()
447 ASSERT(brtvd->bv_initiated); in brt_vdev_create()
448 ASSERT0(brtvd->bv_mos_brtvdev); in brt_vdev_create()
449 ASSERT0(brtvd->bv_mos_entries); in brt_vdev_create()
451 uint64_t mos_entries = zap_create_flags(spa->spa_meta_objset, 0, in brt_vdev_create()
455 VERIFY0(dnode_hold(spa->spa_meta_objset, mos_entries, brtvd, in brt_vdev_create()
456 &brtvd->bv_mos_entries_dnode)); in brt_vdev_create()
457 rw_enter(&brtvd->bv_mos_entries_lock, RW_WRITER); in brt_vdev_create()
458 brtvd->bv_mos_entries = mos_entries; in brt_vdev_create()
459 rw_exit(&brtvd->bv_mos_entries_lock); in brt_vdev_create()
461 (u_longlong_t)brtvd->bv_mos_entries); in brt_vdev_create()
468 brtvd->bv_mos_brtvdev = dmu_object_alloc(spa->spa_meta_objset, in brt_vdev_create()
471 VERIFY(brtvd->bv_mos_brtvdev != 0); in brt_vdev_create()
473 (u_longlong_t)brtvd->bv_mos_brtvdev); in brt_vdev_create()
476 (u_longlong_t)brtvd->bv_vdevid); in brt_vdev_create()
477 VERIFY0(zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, name, in brt_vdev_create()
478 sizeof (uint64_t), 1, &brtvd->bv_mos_brtvdev, tx)); in brt_vdev_create()
482 * Activate the endian-fixed feature if this is the first BRT ZAP in brt_vdev_create()
504 ASSERT(RW_WRITE_HELD(&brtvd->bv_lock)); in brt_vdev_realloc()
507 vd = vdev_lookup_top(spa, brtvd->bv_vdevid); in brt_vdev_realloc()
508 size = (vdev_get_min_asize(vd) - 1) / spa->spa_brt_rangesize + 1; in brt_vdev_realloc()
515 if (!brtvd->bv_initiated) { in brt_vdev_realloc()
516 ASSERT0(brtvd->bv_size); in brt_vdev_realloc()
517 ASSERT0P(brtvd->bv_entcount); in brt_vdev_realloc()
518 ASSERT0P(brtvd->bv_bitmap); in brt_vdev_realloc()
520 ASSERT(brtvd->bv_size > 0); in brt_vdev_realloc()
521 ASSERT(brtvd->bv_entcount != NULL); in brt_vdev_realloc()
522 ASSERT(brtvd->bv_bitmap != NULL); in brt_vdev_realloc()
525 * shrinking the on-disk BRT VDEV object. in brt_vdev_realloc()
526 * dmu_free_range(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, in brt_vdev_realloc()
529 ASSERT3U(brtvd->bv_size, <=, size); in brt_vdev_realloc()
531 memcpy(entcount, brtvd->bv_entcount, in brt_vdev_realloc()
532 sizeof (entcount[0]) * MIN(size, brtvd->bv_size)); in brt_vdev_realloc()
533 vmem_free(brtvd->bv_entcount, in brt_vdev_realloc()
534 sizeof (entcount[0]) * brtvd->bv_size); in brt_vdev_realloc()
535 onblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size); in brt_vdev_realloc()
536 memcpy(bitmap, brtvd->bv_bitmap, MIN(BT_SIZEOFMAP(nblocks), in brt_vdev_realloc()
538 kmem_free(brtvd->bv_bitmap, BT_SIZEOFMAP(onblocks)); in brt_vdev_realloc()
541 brtvd->bv_size = size; in brt_vdev_realloc()
542 brtvd->bv_entcount = entcount; in brt_vdev_realloc()
543 brtvd->bv_bitmap = bitmap; in brt_vdev_realloc()
544 if (!brtvd->bv_initiated) { in brt_vdev_realloc()
545 brtvd->bv_need_byteswap = FALSE; in brt_vdev_realloc()
546 brtvd->bv_initiated = TRUE; in brt_vdev_realloc()
548 (u_longlong_t)brtvd->bv_vdevid); in brt_vdev_realloc()
559 ASSERT(!brtvd->bv_initiated); in brt_vdev_load()
560 ASSERT(brtvd->bv_mos_brtvdev != 0); in brt_vdev_load()
562 error = dmu_bonus_hold(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, in brt_vdev_load()
567 bvphys = db->db_data; in brt_vdev_load()
568 if (spa->spa_brt_rangesize == 0) { in brt_vdev_load()
569 spa->spa_brt_rangesize = bvphys->bvp_rangesize; in brt_vdev_load()
571 ASSERT3U(spa->spa_brt_rangesize, ==, bvphys->bvp_rangesize); in brt_vdev_load()
577 ASSERT3U(bvphys->bvp_size, <=, brtvd->bv_size); in brt_vdev_load()
582 error = dmu_read(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0, in brt_vdev_load()
583 MIN(brtvd->bv_size, bvphys->bvp_size) * sizeof (uint16_t), in brt_vdev_load()
584 brtvd->bv_entcount, DMU_READ_NO_PREFETCH); in brt_vdev_load()
588 ASSERT(bvphys->bvp_mos_entries != 0); in brt_vdev_load()
589 VERIFY0(dnode_hold(spa->spa_meta_objset, bvphys->bvp_mos_entries, brtvd, in brt_vdev_load()
590 &brtvd->bv_mos_entries_dnode)); in brt_vdev_load()
591 rw_enter(&brtvd->bv_mos_entries_lock, RW_WRITER); in brt_vdev_load()
592 brtvd->bv_mos_entries = bvphys->bvp_mos_entries; in brt_vdev_load()
593 rw_exit(&brtvd->bv_mos_entries_lock); in brt_vdev_load()
594 brtvd->bv_need_byteswap = in brt_vdev_load()
595 (bvphys->bvp_byteorder != BRT_NATIVE_BYTEORDER); in brt_vdev_load()
596 brtvd->bv_totalcount = bvphys->bvp_totalcount; in brt_vdev_load()
597 brtvd->bv_usedspace = bvphys->bvp_usedspace; in brt_vdev_load()
598 brtvd->bv_savedspace = bvphys->bvp_savedspace; in brt_vdev_load()
603 (u_longlong_t)brtvd->bv_vdevid, in brt_vdev_load()
604 (u_longlong_t)brtvd->bv_mos_brtvdev, in brt_vdev_load()
605 (u_longlong_t)brtvd->bv_mos_entries); in brt_vdev_load()
612 ASSERT(RW_WRITE_HELD(&brtvd->bv_lock)); in brt_vdev_dealloc()
613 ASSERT(brtvd->bv_initiated); in brt_vdev_dealloc()
614 ASSERT0(avl_numnodes(&brtvd->bv_tree)); in brt_vdev_dealloc()
616 vmem_free(brtvd->bv_entcount, sizeof (uint16_t) * brtvd->bv_size); in brt_vdev_dealloc()
617 brtvd->bv_entcount = NULL; in brt_vdev_dealloc()
618 uint64_t nblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size); in brt_vdev_dealloc()
619 kmem_free(brtvd->bv_bitmap, BT_SIZEOFMAP(nblocks)); in brt_vdev_dealloc()
620 brtvd->bv_bitmap = NULL; in brt_vdev_dealloc()
622 brtvd->bv_size = 0; in brt_vdev_dealloc()
624 brtvd->bv_initiated = FALSE; in brt_vdev_dealloc()
625 BRT_DEBUG("BRT VDEV %llu deallocated.", (u_longlong_t)brtvd->bv_vdevid); in brt_vdev_dealloc()
634 ASSERT(brtvd->bv_initiated); in brt_vdev_destroy()
635 ASSERT(brtvd->bv_mos_brtvdev != 0); in brt_vdev_destroy()
636 ASSERT(brtvd->bv_mos_entries != 0); in brt_vdev_destroy()
637 ASSERT0(brtvd->bv_totalcount); in brt_vdev_destroy()
638 ASSERT0(brtvd->bv_usedspace); in brt_vdev_destroy()
639 ASSERT0(brtvd->bv_savedspace); in brt_vdev_destroy()
641 uint64_t mos_entries = brtvd->bv_mos_entries; in brt_vdev_destroy()
642 rw_enter(&brtvd->bv_mos_entries_lock, RW_WRITER); in brt_vdev_destroy()
643 brtvd->bv_mos_entries = 0; in brt_vdev_destroy()
644 rw_exit(&brtvd->bv_mos_entries_lock); in brt_vdev_destroy()
645 dnode_rele(brtvd->bv_mos_entries_dnode, brtvd); in brt_vdev_destroy()
646 brtvd->bv_mos_entries_dnode = NULL; in brt_vdev_destroy()
647 ASSERT0(zap_count(spa->spa_meta_objset, mos_entries, &count)); in brt_vdev_destroy()
649 VERIFY0(zap_destroy(spa->spa_meta_objset, mos_entries, tx)); in brt_vdev_destroy()
653 VERIFY0(dmu_object_free(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, in brt_vdev_destroy()
656 (u_longlong_t)brtvd->bv_mos_brtvdev); in brt_vdev_destroy()
657 brtvd->bv_mos_brtvdev = 0; in brt_vdev_destroy()
658 brtvd->bv_entcount_dirty = FALSE; in brt_vdev_destroy()
661 (u_longlong_t)brtvd->bv_vdevid); in brt_vdev_destroy()
662 VERIFY0(zap_remove(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, in brt_vdev_destroy()
666 brtvd->bv_meta_dirty = FALSE; in brt_vdev_destroy()
668 rw_enter(&brtvd->bv_lock, RW_WRITER); in brt_vdev_destroy()
670 rw_exit(&brtvd->bv_lock); in brt_vdev_destroy()
682 ASSERT(RW_WRITE_HELD(&spa->spa_brt_lock)); in brt_vdevs_expand()
683 ASSERT3U(nvdevs, >=, spa->spa_brt_nvdevs); in brt_vdevs_expand()
685 if (nvdevs == spa->spa_brt_nvdevs) in brt_vdevs_expand()
688 vdevs = kmem_zalloc(sizeof (*spa->spa_brt_vdevs) * nvdevs, KM_SLEEP); in brt_vdevs_expand()
689 if (spa->spa_brt_nvdevs > 0) { in brt_vdevs_expand()
690 ASSERT(spa->spa_brt_vdevs != NULL); in brt_vdevs_expand()
692 memcpy(vdevs, spa->spa_brt_vdevs, in brt_vdevs_expand()
693 sizeof (*spa->spa_brt_vdevs) * spa->spa_brt_nvdevs); in brt_vdevs_expand()
694 kmem_free(spa->spa_brt_vdevs, in brt_vdevs_expand()
695 sizeof (*spa->spa_brt_vdevs) * spa->spa_brt_nvdevs); in brt_vdevs_expand()
697 spa->spa_brt_vdevs = vdevs; in brt_vdevs_expand()
699 for (uint64_t vdevid = spa->spa_brt_nvdevs; vdevid < nvdevs; vdevid++) { in brt_vdevs_expand()
701 rw_init(&brtvd->bv_lock, NULL, RW_DEFAULT, NULL); in brt_vdevs_expand()
702 brtvd->bv_vdevid = vdevid; in brt_vdevs_expand()
703 brtvd->bv_initiated = FALSE; in brt_vdevs_expand()
704 rw_init(&brtvd->bv_mos_entries_lock, NULL, RW_DEFAULT, NULL); in brt_vdevs_expand()
705 avl_create(&brtvd->bv_tree, brt_entry_compare, in brt_vdevs_expand()
708 avl_create(&brtvd->bv_pending_tree[i], in brt_vdevs_expand()
712 mutex_init(&brtvd->bv_pending_lock, NULL, MUTEX_DEFAULT, NULL); in brt_vdevs_expand()
713 spa->spa_brt_vdevs[vdevid] = brtvd; in brt_vdevs_expand()
717 (u_longlong_t)spa->spa_brt_nvdevs, (u_longlong_t)nvdevs); in brt_vdevs_expand()
718 spa->spa_brt_nvdevs = nvdevs; in brt_vdevs_expand()
724 uint64_t idx = offset / spa->spa_brt_rangesize; in brt_vdev_lookup()
725 if (idx < brtvd->bv_size) { in brt_vdev_lookup()
738 ASSERT(brtvd->bv_initiated); in brt_vdev_addref()
740 brtvd->bv_savedspace += dsize * count; in brt_vdev_addref()
741 brtvd->bv_meta_dirty = TRUE; in brt_vdev_addref()
743 if (bre->bre_count > 0) in brt_vdev_addref()
746 brtvd->bv_usedspace += dsize; in brt_vdev_addref()
748 idx = BRE_OFFSET(bre) / spa->spa_brt_rangesize; in brt_vdev_addref()
749 if (idx >= brtvd->bv_size) { in brt_vdev_addref()
751 rw_enter(&brtvd->bv_lock, RW_WRITER); in brt_vdev_addref()
753 rw_exit(&brtvd->bv_lock); in brt_vdev_addref()
756 ASSERT3U(idx, <, brtvd->bv_size); in brt_vdev_addref()
758 brtvd->bv_totalcount++; in brt_vdev_addref()
760 brtvd->bv_entcount_dirty = TRUE; in brt_vdev_addref()
762 BT_SET(brtvd->bv_bitmap, idx); in brt_vdev_addref()
771 ASSERT(RW_WRITE_HELD(&brtvd->bv_lock)); in brt_vdev_decref()
772 ASSERT(brtvd->bv_initiated); in brt_vdev_decref()
774 brtvd->bv_savedspace -= dsize; in brt_vdev_decref()
775 brtvd->bv_meta_dirty = TRUE; in brt_vdev_decref()
777 if (bre->bre_count > 0) in brt_vdev_decref()
780 brtvd->bv_usedspace -= dsize; in brt_vdev_decref()
782 idx = BRE_OFFSET(bre) / spa->spa_brt_rangesize; in brt_vdev_decref()
783 ASSERT3U(idx, <, brtvd->bv_size); in brt_vdev_decref()
785 ASSERT(brtvd->bv_totalcount > 0); in brt_vdev_decref()
786 brtvd->bv_totalcount--; in brt_vdev_decref()
788 brtvd->bv_entcount_dirty = TRUE; in brt_vdev_decref()
790 BT_SET(brtvd->bv_bitmap, idx); in brt_vdev_decref()
799 ASSERT(brtvd->bv_meta_dirty); in brt_vdev_sync()
800 ASSERT(brtvd->bv_mos_brtvdev != 0); in brt_vdev_sync()
803 VERIFY0(dmu_bonus_hold(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, in brt_vdev_sync()
806 if (brtvd->bv_entcount_dirty) { in brt_vdev_sync()
808 * TODO: Walk brtvd->bv_bitmap and write only the dirty blocks. in brt_vdev_sync()
810 dmu_write(spa->spa_meta_objset, brtvd->bv_mos_brtvdev, 0, in brt_vdev_sync()
811 brtvd->bv_size * sizeof (brtvd->bv_entcount[0]), in brt_vdev_sync()
812 brtvd->bv_entcount, tx); in brt_vdev_sync()
813 uint64_t nblocks = BRT_RANGESIZE_TO_NBLOCKS(brtvd->bv_size); in brt_vdev_sync()
814 memset(brtvd->bv_bitmap, 0, BT_SIZEOFMAP(nblocks)); in brt_vdev_sync()
815 brtvd->bv_entcount_dirty = FALSE; in brt_vdev_sync()
819 bvphys = db->db_data; in brt_vdev_sync()
820 bvphys->bvp_mos_entries = brtvd->bv_mos_entries; in brt_vdev_sync()
821 bvphys->bvp_size = brtvd->bv_size; in brt_vdev_sync()
822 if (brtvd->bv_need_byteswap) { in brt_vdev_sync()
823 bvphys->bvp_byteorder = BRT_NON_NATIVE_BYTEORDER; in brt_vdev_sync()
825 bvphys->bvp_byteorder = BRT_NATIVE_BYTEORDER; in brt_vdev_sync()
827 bvphys->bvp_totalcount = brtvd->bv_totalcount; in brt_vdev_sync()
828 bvphys->bvp_rangesize = spa->spa_brt_rangesize; in brt_vdev_sync()
829 bvphys->bvp_usedspace = brtvd->bv_usedspace; in brt_vdev_sync()
830 bvphys->bvp_savedspace = brtvd->bv_savedspace; in brt_vdev_sync()
833 brtvd->bv_meta_dirty = FALSE; in brt_vdev_sync()
839 if (spa->spa_brt_vdevs == 0) in brt_vdevs_free()
841 for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) { in brt_vdevs_free()
842 brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid]; in brt_vdevs_free()
843 rw_enter(&brtvd->bv_lock, RW_WRITER); in brt_vdevs_free()
844 if (brtvd->bv_initiated) in brt_vdevs_free()
846 rw_exit(&brtvd->bv_lock); in brt_vdevs_free()
847 rw_destroy(&brtvd->bv_lock); in brt_vdevs_free()
848 if (brtvd->bv_mos_entries != 0) in brt_vdevs_free()
849 dnode_rele(brtvd->bv_mos_entries_dnode, brtvd); in brt_vdevs_free()
850 rw_destroy(&brtvd->bv_mos_entries_lock); in brt_vdevs_free()
851 avl_destroy(&brtvd->bv_tree); in brt_vdevs_free()
853 avl_destroy(&brtvd->bv_pending_tree[i]); in brt_vdevs_free()
854 mutex_destroy(&brtvd->bv_pending_lock); in brt_vdevs_free()
857 kmem_free(spa->spa_brt_vdevs, sizeof (*spa->spa_brt_vdevs) * in brt_vdevs_free()
858 spa->spa_brt_nvdevs); in brt_vdevs_free()
865 bre->bre_bp = *bp; in brt_entry_fill()
866 bre->bre_count = 0; in brt_entry_fill()
867 bre->bre_pcount = 0; in brt_entry_fill()
869 *vdevidp = DVA_GET_VDEV(&bp->blk_dva[0]); in brt_entry_fill()
883 if (brtvd->bv_mos_entries == 0) in brt_entry_lookup()
887 return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode, in brt_entry_lookup()
888 &off, BRT_KEY_WORDS, sizeof (bre->bre_count), 1, in brt_entry_lookup()
889 &bre->bre_count)); in brt_entry_lookup()
891 return (zap_lookup_uint64_by_dnode(brtvd->bv_mos_entries_dnode, in brt_entry_lookup()
892 &off, BRT_KEY_WORDS, 1, sizeof (bre->bre_count), in brt_entry_lookup()
893 &bre->bre_count)); in brt_entry_lookup()
906 if (spa->spa_brt_nvdevs == 0) in brt_maybe_exists()
909 uint64_t vdevid = DVA_GET_VDEV(&bp->blk_dva[0]); in brt_maybe_exists()
911 if (brtvd == NULL || !brtvd->bv_initiated) in brt_maybe_exists()
920 uint64_t off = DVA_GET_OFFSET(&bp->blk_dva[0]); in brt_maybe_exists()
927 if (spa->spa_brt_nvdevs == 0) in brt_get_dspace()
932 for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) in brt_get_dspace()
933 s += spa->spa_brt_vdevs[vdevid]->bv_savedspace; in brt_get_dspace()
941 if (spa->spa_brt_nvdevs == 0) in brt_get_used()
946 for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) in brt_get_used()
947 s += spa->spa_brt_vdevs[vdevid]->bv_usedspace; in brt_get_used()
970 brt_stats_t *bs = ksp->ks_data; in brt_kstats_update()
975 bs->brt_addref_entry_not_on_disk.value.ui64 = in brt_kstats_update()
977 bs->brt_addref_entry_on_disk.value.ui64 = in brt_kstats_update()
979 bs->brt_decref_entry_in_memory.value.ui64 = in brt_kstats_update()
981 bs->brt_decref_entry_loaded_from_disk.value.ui64 = in brt_kstats_update()
983 bs->brt_decref_entry_not_in_memory.value.ui64 = in brt_kstats_update()
985 bs->brt_decref_entry_read_lost_race.value.ui64 = in brt_kstats_update()
987 bs->brt_decref_entry_still_referenced.value.ui64 = in brt_kstats_update()
989 bs->brt_decref_free_data_later.value.ui64 = in brt_kstats_update()
991 bs->brt_decref_free_data_now.value.ui64 = in brt_kstats_update()
993 bs->brt_decref_no_entry.value.ui64 = in brt_kstats_update()
1017 brt_ksp->ks_data = &brt_stats; in brt_stat_init()
1018 brt_ksp->ks_update = brt_kstats_update; in brt_stat_init()
1075 rw_enter(&brtvd->bv_lock, RW_WRITER); in brt_entry_decref()
1076 ASSERT(brtvd->bv_initiated); in brt_entry_decref()
1077 bre = avl_find(&brtvd->bv_tree, &bre_search, NULL); in brt_entry_decref()
1084 rw_exit(&brtvd->bv_lock); in brt_entry_decref()
1094 rw_enter(&brtvd->bv_lock, RW_WRITER); in brt_entry_decref()
1095 racebre = avl_find(&brtvd->bv_tree, &bre_search, &where); in brt_entry_decref()
1105 bre->bre_bp = bre_search.bre_bp; in brt_entry_decref()
1106 bre->bre_count = bre_search.bre_count; in brt_entry_decref()
1107 bre->bre_pcount = 0; in brt_entry_decref()
1108 avl_insert(&brtvd->bv_tree, bre, where); in brt_entry_decref()
1111 if (bre->bre_count == 0) { in brt_entry_decref()
1112 rw_exit(&brtvd->bv_lock); in brt_entry_decref()
1117 bre->bre_pcount--; in brt_entry_decref()
1118 ASSERT(bre->bre_count > 0); in brt_entry_decref()
1119 bre->bre_count--; in brt_entry_decref()
1120 if (bre->bre_count == 0) in brt_entry_decref()
1126 rw_exit(&brtvd->bv_lock); in brt_entry_decref()
1143 rw_enter(&brtvd->bv_lock, RW_READER); in brt_entry_get_refcount()
1144 ASSERT(brtvd->bv_initiated); in brt_entry_get_refcount()
1145 bre = avl_find(&brtvd->bv_tree, &bre_search, NULL); in brt_entry_get_refcount()
1147 rw_exit(&brtvd->bv_lock); in brt_entry_get_refcount()
1156 refcnt = bre->bre_count; in brt_entry_get_refcount()
1157 rw_exit(&brtvd->bv_lock); in brt_entry_get_refcount()
1166 if (!brt_zap_prefetch || brtvd->bv_mos_entries == 0) in brt_prefetch()
1169 uint64_t off = DVA_GET_OFFSET(&bp->blk_dva[0]); in brt_prefetch()
1170 rw_enter(&brtvd->bv_mos_entries_lock, RW_READER); in brt_prefetch()
1171 if (brtvd->bv_mos_entries != 0) { in brt_prefetch()
1172 (void) zap_prefetch_uint64_by_dnode(brtvd->bv_mos_entries_dnode, in brt_prefetch()
1175 rw_exit(&brtvd->bv_mos_entries_lock); in brt_prefetch()
1182 const blkptr_t *bp1 = &bre1->bre_bp, *bp2 = &bre2->bre_bp; in brt_entry_compare()
1184 return (TREE_CMP(DVA_GET_OFFSET(&bp1->blk_dva[0]), in brt_entry_compare()
1185 DVA_GET_OFFSET(&bp2->blk_dva[0]))); in brt_entry_compare()
1198 uint64_t vdevid = DVA_GET_VDEV(&bp->blk_dva[0]); in brt_pending_add()
1200 avl_tree_t *pending_tree = &brtvd->bv_pending_tree[txg & TXG_MASK]; in brt_pending_add()
1203 newbre->bre_bp = *bp; in brt_pending_add()
1204 newbre->bre_count = 0; in brt_pending_add()
1205 newbre->bre_pcount = 1; in brt_pending_add()
1207 mutex_enter(&brtvd->bv_pending_lock); in brt_pending_add()
1213 bre->bre_pcount++; in brt_pending_add()
1215 mutex_exit(&brtvd->bv_pending_lock); in brt_pending_add()
1238 uint64_t vdevid = DVA_GET_VDEV(&bp->blk_dva[0]); in brt_pending_remove()
1241 avl_tree_t *pending_tree = &brtvd->bv_pending_tree[txg & TXG_MASK]; in brt_pending_remove()
1245 mutex_enter(&brtvd->bv_pending_lock); in brt_pending_remove()
1248 ASSERT(bre->bre_pcount > 0); in brt_pending_remove()
1249 bre->bre_pcount--; in brt_pending_remove()
1250 if (bre->bre_pcount == 0) in brt_pending_remove()
1254 mutex_exit(&brtvd->bv_pending_lock); in brt_pending_remove()
1269 ASSERT(avl_is_empty(&brtvd->bv_tree)); in brt_pending_apply_vdev()
1270 avl_swap(&brtvd->bv_tree, &brtvd->bv_pending_tree[txg & TXG_MASK]); in brt_pending_apply_vdev()
1272 for (bre = avl_first(&brtvd->bv_tree); bre; bre = nbre) { in brt_pending_apply_vdev()
1273 nbre = AVL_NEXT(&brtvd->bv_tree, bre); in brt_pending_apply_vdev()
1280 if (BP_GET_DEDUP(&bre->bre_bp)) { in brt_pending_apply_vdev()
1281 while (bre->bre_pcount > 0) { in brt_pending_apply_vdev()
1282 if (!ddt_addref(spa, &bre->bre_bp)) in brt_pending_apply_vdev()
1284 bre->bre_pcount--; in brt_pending_apply_vdev()
1286 if (bre->bre_pcount == 0) { in brt_pending_apply_vdev()
1287 avl_remove(&brtvd->bv_tree, bre); in brt_pending_apply_vdev()
1298 if (brtvd->bv_mos_entries != 0 && in brt_pending_apply_vdev()
1303 brtvd->bv_mos_entries_dnode, &off, in brt_pending_apply_vdev()
1304 BRT_KEY_WORDS, sizeof (bre->bre_count), 1, in brt_pending_apply_vdev()
1305 &bre->bre_count); in brt_pending_apply_vdev()
1308 brtvd->bv_mos_entries_dnode, &off, in brt_pending_apply_vdev()
1309 BRT_KEY_WORDS, 1, sizeof (bre->bre_count), in brt_pending_apply_vdev()
1310 &bre->bre_count); in brt_pending_apply_vdev()
1325 if (avl_is_empty(&brtvd->bv_tree)) in brt_pending_apply_vdev()
1328 if (!brtvd->bv_initiated) { in brt_pending_apply_vdev()
1329 rw_enter(&brtvd->bv_lock, RW_WRITER); in brt_pending_apply_vdev()
1331 rw_exit(&brtvd->bv_lock); in brt_pending_apply_vdev()
1339 for (bre = avl_first(&brtvd->bv_tree); bre; in brt_pending_apply_vdev()
1340 bre = AVL_NEXT(&brtvd->bv_tree, bre)) { in brt_pending_apply_vdev()
1342 bp_get_dsize(spa, &bre->bre_bp), bre->bre_pcount); in brt_pending_apply_vdev()
1343 bre->bre_count += bre->bre_pcount; in brt_pending_apply_vdev()
1352 for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) { in brt_pending_apply()
1353 brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid]; in brt_pending_apply()
1368 if (bre->bre_pcount == 0) { in brt_sync_entry()
1370 } else if (bre->bre_count == 0) { in brt_sync_entry()
1377 BRT_KEY_WORDS, sizeof (bre->bre_count), 1, in brt_sync_entry()
1378 &bre->bre_count, tx)); in brt_sync_entry()
1381 BRT_KEY_WORDS, 1, sizeof (bre->bre_count), in brt_sync_entry()
1382 &bre->bre_count, tx)); in brt_sync_entry()
1393 for (uint64_t vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) { in brt_sync_table()
1394 brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid]; in brt_sync_table()
1397 if (!brtvd->bv_meta_dirty) { in brt_sync_table()
1398 ASSERT(!brtvd->bv_entcount_dirty); in brt_sync_table()
1399 ASSERT0(avl_numnodes(&brtvd->bv_tree)); in brt_sync_table()
1404 ASSERT(!brtvd->bv_entcount_dirty || in brt_sync_table()
1405 avl_numnodes(&brtvd->bv_tree) != 0); in brt_sync_table()
1407 if (brtvd->bv_mos_brtvdev == 0) in brt_sync_table()
1411 while ((bre = avl_destroy_nodes(&brtvd->bv_tree, &c)) != NULL) { in brt_sync_table()
1412 brt_sync_entry(spa, brtvd->bv_mos_entries_dnode, bre, in brt_sync_table()
1421 if (brtvd->bv_totalcount == 0) in brt_sync_table()
1439 for (vdevid = 0; vdevid < spa->spa_brt_nvdevs; vdevid++) { in brt_sync()
1440 if (spa->spa_brt_vdevs[vdevid]->bv_meta_dirty) in brt_sync()
1443 if (vdevid >= spa->spa_brt_nvdevs) { in brt_sync()
1449 tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); in brt_sync()
1457 rw_init(&spa->spa_brt_lock, NULL, RW_DEFAULT, NULL); in brt_alloc()
1458 spa->spa_brt_vdevs = NULL; in brt_alloc()
1459 spa->spa_brt_nvdevs = 0; in brt_alloc()
1460 spa->spa_brt_rangesize = 0; in brt_alloc()
1467 spa->spa_brt_rangesize = BRT_RANGESIZE; in brt_create()
1477 for (uint64_t vdevid = 0; vdevid < spa->spa_root_vdev->vdev_children; in brt_load()
1485 error = zap_lookup(spa->spa_meta_objset, in brt_load()
1486 DMU_POOL_DIRECTORY_OBJECT, name, sizeof (uint64_t), 1, in brt_load()
1496 brt_vdevs_expand(spa, spa->spa_root_vdev->vdev_children); in brt_load()
1497 brt_vdev_t *brtvd = spa->spa_brt_vdevs[vdevid]; in brt_load()
1498 rw_enter(&brtvd->bv_lock, RW_WRITER); in brt_load()
1499 brtvd->bv_mos_brtvdev = mos_brtvdev; in brt_load()
1501 rw_exit(&brtvd->bv_lock); in brt_load()
1506 if (spa->spa_brt_rangesize == 0) in brt_load()
1507 spa->spa_brt_rangesize = BRT_RANGESIZE; in brt_load()
1515 if (spa->spa_brt_rangesize == 0) in brt_unload()
1518 rw_destroy(&spa->spa_brt_lock); in brt_unload()
1519 spa->spa_brt_rangesize = 0; in brt_unload()