1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2020, 2021, 2022 by Pawel Jakub Dawidek 23 */ 24 25 #ifndef _SYS_BRT_IMPL_H 26 #define _SYS_BRT_IMPL_H 27 28 #ifdef __cplusplus 29 extern "C" { 30 #endif 31 32 /* 33 * BRT - Block Reference Table. 34 */ 35 #define BRT_OBJECT_VDEV_PREFIX "com.fudosecurity:brt:vdev:" 36 37 /* 38 * We divide each VDEV into 16MB chunks. Each chunk is represented in memory 39 * by a 16bit counter, thus 1TB VDEV requires 128kB of memory: (1TB / 16MB) * 2B 40 * Each element in this array represents how many BRT entries do we have in this 41 * chunk of storage. We always load this entire array into memory and update as 42 * needed. By having it in memory we can quickly tell (during zio_free()) if 43 * there are any BRT entries that we might need to update. 44 * 45 * This value cannot be larger than 16MB, at least as long as we support 46 * 512 byte block sizes. With 512 byte block size we can have exactly 47 * 32768 blocks in 16MB. In 32MB we could have 65536 blocks, which is one too 48 * many for a 16bit counter. 49 */ 50 #define BRT_RANGESIZE (16 * 1024 * 1024) 51 _Static_assert(BRT_RANGESIZE / SPA_MINBLOCKSIZE <= UINT16_MAX, 52 "BRT_RANGESIZE is too large."); 53 /* 54 * We don't want to update the whole structure every time. Maintain bitmap 55 * of dirty blocks within the regions, so that a single bit represents a 56 * block size of entcounts. For example if we have a 1PB vdev then all 57 * entcounts take 128MB of memory ((64TB / 16MB) * 2B). We can divide this 58 * 128MB array of entcounts into 32kB disk blocks, as we don't want to update 59 * the whole 128MB on disk when we have updated only a single entcount. 60 * We maintain a bitmap where each 32kB disk block within 128MB entcounts array 61 * is represented by a single bit. This gives us 4096 bits. A set bit in the 62 * bitmap means that we had a change in at least one of the 16384 entcounts 63 * that reside on a 32kB disk block (32kB / sizeof (uint16_t)). 64 */ 65 #define BRT_BLOCKSIZE (32 * 1024) 66 #define BRT_RANGESIZE_TO_NBLOCKS(size) \ 67 (((size) - 1) / BRT_BLOCKSIZE / sizeof (uint16_t) + 1) 68 69 #define BRT_LITTLE_ENDIAN 0 70 #define BRT_BIG_ENDIAN 1 71 #ifdef _ZFS_LITTLE_ENDIAN 72 #define BRT_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN 73 #define BRT_NON_NATIVE_BYTEORDER BRT_BIG_ENDIAN 74 #else 75 #define BRT_NATIVE_BYTEORDER BRT_BIG_ENDIAN 76 #define BRT_NON_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN 77 #endif 78 79 typedef struct brt_vdev_phys { 80 uint64_t bvp_mos_entries; 81 uint64_t bvp_size; 82 uint64_t bvp_byteorder; 83 uint64_t bvp_totalcount; 84 uint64_t bvp_rangesize; 85 uint64_t bvp_usedspace; 86 uint64_t bvp_savedspace; 87 } brt_vdev_phys_t; 88 89 typedef struct brt_vdev { 90 /* 91 * VDEV id. 92 */ 93 uint64_t bv_vdevid; 94 /* 95 * Is the structure initiated? 96 * (bv_entcount and bv_bitmap are allocated?) 97 */ 98 boolean_t bv_initiated; 99 /* 100 * Object number in the MOS for the entcount array and brt_vdev_phys. 101 */ 102 uint64_t bv_mos_brtvdev; 103 /* 104 * Object number in the MOS for the entries table. 105 */ 106 uint64_t bv_mos_entries; 107 /* 108 * Entries to sync. 109 */ 110 avl_tree_t bv_tree; 111 /* 112 * Does the bv_entcount[] array needs byte swapping? 113 */ 114 boolean_t bv_need_byteswap; 115 /* 116 * Number of entries in the bv_entcount[] array. 117 */ 118 uint64_t bv_size; 119 /* 120 * This is the array with BRT entry count per BRT_RANGESIZE. 121 */ 122 uint16_t *bv_entcount; 123 /* 124 * Sum of all bv_entcount[]s. 125 */ 126 uint64_t bv_totalcount; 127 /* 128 * Space on disk occupied by cloned blocks (without compression). 129 */ 130 uint64_t bv_usedspace; 131 /* 132 * How much additional space would be occupied without block cloning. 133 */ 134 uint64_t bv_savedspace; 135 /* 136 * brt_vdev_phys needs updating on disk. 137 */ 138 boolean_t bv_meta_dirty; 139 /* 140 * bv_entcount[] needs updating on disk. 141 */ 142 boolean_t bv_entcount_dirty; 143 /* 144 * bv_entcount[] potentially can be a bit too big to sychronize it all 145 * when we just changed few entcounts. The fields below allow us to 146 * track updates to bv_entcount[] array since the last sync. 147 * A single bit in the bv_bitmap represents as many entcounts as can 148 * fit into a single BRT_BLOCKSIZE. 149 * For example we have 65536 entcounts in the bv_entcount array 150 * (so the whole array is 128kB). We updated bv_entcount[2] and 151 * bv_entcount[5]. In that case only first bit in the bv_bitmap will 152 * be set and we will write only first BRT_BLOCKSIZE out of 128kB. 153 */ 154 ulong_t *bv_bitmap; 155 uint64_t bv_nblocks; 156 } brt_vdev_t; 157 158 /* 159 * In-core brt 160 */ 161 typedef struct brt { 162 krwlock_t brt_lock; 163 spa_t *brt_spa; 164 #define brt_mos brt_spa->spa_meta_objset 165 uint64_t brt_rangesize; 166 uint64_t brt_usedspace; 167 uint64_t brt_savedspace; 168 avl_tree_t brt_pending_tree[TXG_SIZE]; 169 kmutex_t brt_pending_lock[TXG_SIZE]; 170 /* Sum of all entries across all bv_trees. */ 171 uint64_t brt_nentries; 172 brt_vdev_t *brt_vdevs; 173 uint64_t brt_nvdevs; 174 } brt_t; 175 176 /* Size of bre_offset / sizeof (uint64_t). */ 177 #define BRT_KEY_WORDS (1) 178 179 /* 180 * In-core brt entry. 181 * On-disk we use bre_offset as the key and bre_refcount as the value. 182 */ 183 typedef struct brt_entry { 184 uint64_t bre_offset; 185 uint64_t bre_refcount; 186 avl_node_t bre_node; 187 } brt_entry_t; 188 189 typedef struct brt_pending_entry { 190 blkptr_t bpe_bp; 191 int bpe_count; 192 avl_node_t bpe_node; 193 } brt_pending_entry_t; 194 195 #ifdef __cplusplus 196 } 197 #endif 198 199 #endif /* _SYS_BRT_IMPL_H */ 200