1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _BCACHEFS_EXTENTS_FORMAT_H 3 #define _BCACHEFS_EXTENTS_FORMAT_H 4 5 /* 6 * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally 7 * preceded by checksum/compression information (bch_extent_crc32 or 8 * bch_extent_crc64). 9 * 10 * One major determining factor in the format of extents is how we handle and 11 * represent extents that have been partially overwritten and thus trimmed: 12 * 13 * If an extent is not checksummed or compressed, when the extent is trimmed we 14 * don't have to remember the extent we originally allocated and wrote: we can 15 * merely adjust ptr->offset to point to the start of the data that is currently 16 * live. The size field in struct bkey records the current (live) size of the 17 * extent, and is also used to mean "size of region on disk that we point to" in 18 * this case. 19 * 20 * Thus an extent that is not checksummed or compressed will consist only of a 21 * list of bch_extent_ptrs, with none of the fields in 22 * bch_extent_crc32/bch_extent_crc64. 23 * 24 * When an extent is checksummed or compressed, it's not possible to read only 25 * the data that is currently live: we have to read the entire extent that was 26 * originally written, and then return only the part of the extent that is 27 * currently live. 28 * 29 * Thus, in addition to the current size of the extent in struct bkey, we need 30 * to store the size of the originally allocated space - this is the 31 * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also, 32 * when the extent is trimmed, instead of modifying the offset field of the 33 * pointer, we keep a second smaller offset field - "offset into the original 34 * extent of the currently live region". 35 * 36 * The other major determining factor is replication and data migration: 37 * 38 * Each pointer may have its own bch_extent_crc32/64. When doing a replicated 39 * write, we will initially write all the replicas in the same format, with the 40 * same checksum type and compression format - however, when copygc runs later (or 41 * tiering/cache promotion, anything that moves data), it is not in general 42 * going to rewrite all the pointers at once - one of the replicas may be in a 43 * bucket on one device that has very little fragmentation while another lives 44 * in a bucket that has become heavily fragmented, and thus is being rewritten 45 * sooner than the rest. 46 * 47 * Thus it will only move a subset of the pointers (or in the case of 48 * tiering/cache promotion perhaps add a single pointer without dropping any 49 * current pointers), and if the extent has been partially overwritten it must 50 * write only the currently live portion (or copygc would not be able to reduce 51 * fragmentation!) - which necessitates a different bch_extent_crc format for 52 * the new pointer. 53 * 54 * But in the interests of space efficiency, we don't want to store one 55 * bch_extent_crc for each pointer if we don't have to. 56 * 57 * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and 58 * bch_extent_ptrs appended arbitrarily one after the other. We determine the 59 * type of a given entry with a scheme similar to utf8 (except we're encoding a 60 * type, not a size), encoding the type in the position of the first set bit: 61 * 62 * bch_extent_crc32 - 0b1 63 * bch_extent_ptr - 0b10 64 * bch_extent_crc64 - 0b100 65 * 66 * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and 67 * bch_extent_crc64 is the least constrained). 68 * 69 * Then, each bch_extent_crc32/64 applies to the pointers that follow after it, 70 * until the next bch_extent_crc32/64. 71 * 72 * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer 73 * is neither checksummed nor compressed. 74 */ 75 76 #define BCH_EXTENT_ENTRY_TYPES() \ 77 x(ptr, 0) \ 78 x(crc32, 1) \ 79 x(crc64, 2) \ 80 x(crc128, 3) \ 81 x(stripe_ptr, 4) \ 82 x(rebalance, 5) 83 #define BCH_EXTENT_ENTRY_MAX 6 84 85 enum bch_extent_entry_type { 86 #define x(f, n) BCH_EXTENT_ENTRY_##f = n, 87 BCH_EXTENT_ENTRY_TYPES() 88 #undef x 89 }; 90 91 /* Compressed/uncompressed size are stored biased by 1: */ 92 struct bch_extent_crc32 { 93 #if defined(__LITTLE_ENDIAN_BITFIELD) 94 __u32 type:2, 95 _compressed_size:7, 96 _uncompressed_size:7, 97 offset:7, 98 _unused:1, 99 csum_type:4, 100 compression_type:4; 101 __u32 csum; 102 #elif defined (__BIG_ENDIAN_BITFIELD) 103 __u32 csum; 104 __u32 compression_type:4, 105 csum_type:4, 106 _unused:1, 107 offset:7, 108 _uncompressed_size:7, 109 _compressed_size:7, 110 type:2; 111 #endif 112 } __packed __aligned(8); 113 114 #define CRC32_SIZE_MAX (1U << 7) 115 #define CRC32_NONCE_MAX 0 116 117 struct bch_extent_crc64 { 118 #if defined(__LITTLE_ENDIAN_BITFIELD) 119 __u64 type:3, 120 _compressed_size:9, 121 _uncompressed_size:9, 122 offset:9, 123 nonce:10, 124 csum_type:4, 125 compression_type:4, 126 csum_hi:16; 127 #elif defined (__BIG_ENDIAN_BITFIELD) 128 __u64 csum_hi:16, 129 compression_type:4, 130 csum_type:4, 131 nonce:10, 132 offset:9, 133 _uncompressed_size:9, 134 _compressed_size:9, 135 type:3; 136 #endif 137 __u64 csum_lo; 138 } __packed __aligned(8); 139 140 #define CRC64_SIZE_MAX (1U << 9) 141 #define CRC64_NONCE_MAX ((1U << 10) - 1) 142 143 struct bch_extent_crc128 { 144 #if defined(__LITTLE_ENDIAN_BITFIELD) 145 __u64 type:4, 146 _compressed_size:13, 147 _uncompressed_size:13, 148 offset:13, 149 nonce:13, 150 csum_type:4, 151 compression_type:4; 152 #elif defined (__BIG_ENDIAN_BITFIELD) 153 __u64 compression_type:4, 154 csum_type:4, 155 nonce:13, 156 offset:13, 157 _uncompressed_size:13, 158 _compressed_size:13, 159 type:4; 160 #endif 161 struct bch_csum csum; 162 } __packed __aligned(8); 163 164 #define CRC128_SIZE_MAX (1U << 13) 165 #define CRC128_NONCE_MAX ((1U << 13) - 1) 166 167 /* 168 * @reservation - pointer hasn't been written to, just reserved 169 */ 170 struct bch_extent_ptr { 171 #if defined(__LITTLE_ENDIAN_BITFIELD) 172 __u64 type:1, 173 cached:1, 174 unused:1, 175 unwritten:1, 176 offset:44, /* 8 petabytes */ 177 dev:8, 178 gen:8; 179 #elif defined (__BIG_ENDIAN_BITFIELD) 180 __u64 gen:8, 181 dev:8, 182 offset:44, 183 unwritten:1, 184 unused:1, 185 cached:1, 186 type:1; 187 #endif 188 } __packed __aligned(8); 189 190 struct bch_extent_stripe_ptr { 191 #if defined(__LITTLE_ENDIAN_BITFIELD) 192 __u64 type:5, 193 block:8, 194 redundancy:4, 195 idx:47; 196 #elif defined (__BIG_ENDIAN_BITFIELD) 197 __u64 idx:47, 198 redundancy:4, 199 block:8, 200 type:5; 201 #endif 202 }; 203 204 struct bch_extent_rebalance { 205 #if defined(__LITTLE_ENDIAN_BITFIELD) 206 __u64 type:6, 207 unused:34, 208 compression:8, /* enum bch_compression_opt */ 209 target:16; 210 #elif defined (__BIG_ENDIAN_BITFIELD) 211 __u64 target:16, 212 compression:8, 213 unused:34, 214 type:6; 215 #endif 216 }; 217 218 union bch_extent_entry { 219 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 220 unsigned long type; 221 #elif __BITS_PER_LONG == 32 222 struct { 223 unsigned long pad; 224 unsigned long type; 225 }; 226 #else 227 #error edit for your odd byteorder. 228 #endif 229 230 #define x(f, n) struct bch_extent_##f f; 231 BCH_EXTENT_ENTRY_TYPES() 232 #undef x 233 }; 234 235 struct bch_btree_ptr { 236 struct bch_val v; 237 238 __u64 _data[0]; 239 struct bch_extent_ptr start[]; 240 } __packed __aligned(8); 241 242 struct bch_btree_ptr_v2 { 243 struct bch_val v; 244 245 __u64 mem_ptr; 246 __le64 seq; 247 __le16 sectors_written; 248 __le16 flags; 249 struct bpos min_key; 250 __u64 _data[0]; 251 struct bch_extent_ptr start[]; 252 } __packed __aligned(8); 253 254 LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1); 255 256 struct bch_extent { 257 struct bch_val v; 258 259 __u64 _data[0]; 260 union bch_extent_entry start[]; 261 } __packed __aligned(8); 262 263 /* Maximum size (in u64s) a single pointer could be: */ 264 #define BKEY_EXTENT_PTR_U64s_MAX\ 265 ((sizeof(struct bch_extent_crc128) + \ 266 sizeof(struct bch_extent_ptr)) / sizeof(__u64)) 267 268 /* Maximum possible size of an entire extent value: */ 269 #define BKEY_EXTENT_VAL_U64s_MAX \ 270 (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1)) 271 272 /* * Maximum possible size of an entire extent, key + value: */ 273 #define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) 274 275 /* Btree pointers don't carry around checksums: */ 276 #define BKEY_BTREE_PTR_VAL_U64s_MAX \ 277 ((sizeof(struct bch_btree_ptr_v2) + \ 278 sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64)) 279 #define BKEY_BTREE_PTR_U64s_MAX \ 280 (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) 281 282 struct bch_reservation { 283 struct bch_val v; 284 285 __le32 generation; 286 __u8 nr_replicas; 287 __u8 pad[3]; 288 } __packed __aligned(8); 289 290 struct bch_inline_data { 291 struct bch_val v; 292 u8 data[]; 293 }; 294 295 #endif /* _BCACHEFS_EXTENTS_FORMAT_H */ 296