1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _BCACHEFS_EXTENTS_FORMAT_H 3 #define _BCACHEFS_EXTENTS_FORMAT_H 4 5 /* 6 * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally 7 * preceded by checksum/compression information (bch_extent_crc32 or 8 * bch_extent_crc64). 9 * 10 * One major determining factor in the format of extents is how we handle and 11 * represent extents that have been partially overwritten and thus trimmed: 12 * 13 * If an extent is not checksummed or compressed, when the extent is trimmed we 14 * don't have to remember the extent we originally allocated and wrote: we can 15 * merely adjust ptr->offset to point to the start of the data that is currently 16 * live. The size field in struct bkey records the current (live) size of the 17 * extent, and is also used to mean "size of region on disk that we point to" in 18 * this case. 19 * 20 * Thus an extent that is not checksummed or compressed will consist only of a 21 * list of bch_extent_ptrs, with none of the fields in 22 * bch_extent_crc32/bch_extent_crc64. 23 * 24 * When an extent is checksummed or compressed, it's not possible to read only 25 * the data that is currently live: we have to read the entire extent that was 26 * originally written, and then return only the part of the extent that is 27 * currently live. 28 * 29 * Thus, in addition to the current size of the extent in struct bkey, we need 30 * to store the size of the originally allocated space - this is the 31 * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also, 32 * when the extent is trimmed, instead of modifying the offset field of the 33 * pointer, we keep a second smaller offset field - "offset into the original 34 * extent of the currently live region". 35 * 36 * The other major determining factor is replication and data migration: 37 * 38 * Each pointer may have its own bch_extent_crc32/64. When doing a replicated 39 * write, we will initially write all the replicas in the same format, with the 40 * same checksum type and compression format - however, when copygc runs later (or 41 * tiering/cache promotion, anything that moves data), it is not in general 42 * going to rewrite all the pointers at once - one of the replicas may be in a 43 * bucket on one device that has very little fragmentation while another lives 44 * in a bucket that has become heavily fragmented, and thus is being rewritten 45 * sooner than the rest. 46 * 47 * Thus it will only move a subset of the pointers (or in the case of 48 * tiering/cache promotion perhaps add a single pointer without dropping any 49 * current pointers), and if the extent has been partially overwritten it must 50 * write only the currently live portion (or copygc would not be able to reduce 51 * fragmentation!) - which necessitates a different bch_extent_crc format for 52 * the new pointer. 53 * 54 * But in the interests of space efficiency, we don't want to store one 55 * bch_extent_crc for each pointer if we don't have to. 56 * 57 * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and 58 * bch_extent_ptrs appended arbitrarily one after the other. We determine the 59 * type of a given entry with a scheme similar to utf8 (except we're encoding a 60 * type, not a size), encoding the type in the position of the first set bit: 61 * 62 * bch_extent_crc32 - 0b1 63 * bch_extent_ptr - 0b10 64 * bch_extent_crc64 - 0b100 65 * 66 * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and 67 * bch_extent_crc64 is the least constrained). 68 * 69 * Then, each bch_extent_crc32/64 applies to the pointers that follow after it, 70 * until the next bch_extent_crc32/64. 71 * 72 * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer 73 * is neither checksummed nor compressed. 74 */ 75 76 #define BCH_EXTENT_ENTRY_TYPES() \ 77 x(ptr, 0) \ 78 x(crc32, 1) \ 79 x(crc64, 2) \ 80 x(crc128, 3) \ 81 x(stripe_ptr, 4) \ 82 x(rebalance, 5) \ 83 x(flags, 6) 84 #define BCH_EXTENT_ENTRY_MAX 7 85 86 enum bch_extent_entry_type { 87 #define x(f, n) BCH_EXTENT_ENTRY_##f = n, 88 BCH_EXTENT_ENTRY_TYPES() 89 #undef x 90 }; 91 92 /* Compressed/uncompressed size are stored biased by 1: */ 93 struct bch_extent_crc32 { 94 #if defined(__LITTLE_ENDIAN_BITFIELD) 95 __u32 type:2, 96 _compressed_size:7, 97 _uncompressed_size:7, 98 offset:7, 99 _unused:1, 100 csum_type:4, 101 compression_type:4; 102 __u32 csum; 103 #elif defined (__BIG_ENDIAN_BITFIELD) 104 __u32 csum; 105 __u32 compression_type:4, 106 csum_type:4, 107 _unused:1, 108 offset:7, 109 _uncompressed_size:7, 110 _compressed_size:7, 111 type:2; 112 #endif 113 } __packed __aligned(8); 114 115 #define CRC32_SIZE_MAX (1U << 7) 116 #define CRC32_NONCE_MAX 0 117 118 struct bch_extent_crc64 { 119 #if defined(__LITTLE_ENDIAN_BITFIELD) 120 __u64 type:3, 121 _compressed_size:9, 122 _uncompressed_size:9, 123 offset:9, 124 nonce:10, 125 csum_type:4, 126 compression_type:4, 127 csum_hi:16; 128 #elif defined (__BIG_ENDIAN_BITFIELD) 129 __u64 csum_hi:16, 130 compression_type:4, 131 csum_type:4, 132 nonce:10, 133 offset:9, 134 _uncompressed_size:9, 135 _compressed_size:9, 136 type:3; 137 #endif 138 __u64 csum_lo; 139 } __packed __aligned(8); 140 141 #define CRC64_SIZE_MAX (1U << 9) 142 #define CRC64_NONCE_MAX ((1U << 10) - 1) 143 144 struct bch_extent_crc128 { 145 #if defined(__LITTLE_ENDIAN_BITFIELD) 146 __u64 type:4, 147 _compressed_size:13, 148 _uncompressed_size:13, 149 offset:13, 150 nonce:13, 151 csum_type:4, 152 compression_type:4; 153 #elif defined (__BIG_ENDIAN_BITFIELD) 154 __u64 compression_type:4, 155 csum_type:4, 156 nonce:13, 157 offset:13, 158 _uncompressed_size:13, 159 _compressed_size:13, 160 type:4; 161 #endif 162 struct bch_csum csum; 163 } __packed __aligned(8); 164 165 #define CRC128_SIZE_MAX (1U << 13) 166 #define CRC128_NONCE_MAX ((1U << 13) - 1) 167 168 /* 169 * @reservation - pointer hasn't been written to, just reserved 170 */ 171 struct bch_extent_ptr { 172 #if defined(__LITTLE_ENDIAN_BITFIELD) 173 __u64 type:1, 174 cached:1, 175 unused:1, 176 unwritten:1, 177 offset:44, /* 8 petabytes */ 178 dev:8, 179 gen:8; 180 #elif defined (__BIG_ENDIAN_BITFIELD) 181 __u64 gen:8, 182 dev:8, 183 offset:44, 184 unwritten:1, 185 unused:1, 186 cached:1, 187 type:1; 188 #endif 189 } __packed __aligned(8); 190 191 struct bch_extent_stripe_ptr { 192 #if defined(__LITTLE_ENDIAN_BITFIELD) 193 __u64 type:5, 194 block:8, 195 redundancy:4, 196 idx:47; 197 #elif defined (__BIG_ENDIAN_BITFIELD) 198 __u64 idx:47, 199 redundancy:4, 200 block:8, 201 type:5; 202 #endif 203 }; 204 205 #define BCH_EXTENT_FLAGS() \ 206 x(poisoned, 0) 207 208 enum bch_extent_flags_e { 209 #define x(n, v) BCH_EXTENT_FLAG_##n = v, 210 BCH_EXTENT_FLAGS() 211 #undef x 212 }; 213 214 struct bch_extent_flags { 215 #if defined(__LITTLE_ENDIAN_BITFIELD) 216 __u64 type:7, 217 flags:57; 218 #elif defined (__BIG_ENDIAN_BITFIELD) 219 __u64 flags:57, 220 type:7; 221 #endif 222 }; 223 224 /* bch_extent_rebalance: */ 225 #include "rebalance_format.h" 226 227 union bch_extent_entry { 228 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64 229 unsigned long type; 230 #elif __BITS_PER_LONG == 32 231 struct { 232 unsigned long pad; 233 unsigned long type; 234 }; 235 #else 236 #error edit for your odd byteorder. 237 #endif 238 239 #define x(f, n) struct bch_extent_##f f; 240 BCH_EXTENT_ENTRY_TYPES() 241 #undef x 242 }; 243 244 struct bch_btree_ptr { 245 struct bch_val v; 246 247 __u64 _data[0]; 248 struct bch_extent_ptr start[]; 249 } __packed __aligned(8); 250 251 struct bch_btree_ptr_v2 { 252 struct bch_val v; 253 254 __u64 mem_ptr; 255 __le64 seq; 256 __le16 sectors_written; 257 __le16 flags; 258 struct bpos min_key; 259 __u64 _data[0]; 260 struct bch_extent_ptr start[]; 261 } __packed __aligned(8); 262 263 LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1); 264 265 struct bch_extent { 266 struct bch_val v; 267 268 __u64 _data[0]; 269 union bch_extent_entry start[]; 270 } __packed __aligned(8); 271 272 /* Maximum size (in u64s) a single pointer could be: */ 273 #define BKEY_EXTENT_PTR_U64s_MAX\ 274 ((sizeof(struct bch_extent_crc128) + \ 275 sizeof(struct bch_extent_ptr)) / sizeof(__u64)) 276 277 /* Maximum possible size of an entire extent value: */ 278 #define BKEY_EXTENT_VAL_U64s_MAX \ 279 (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1)) 280 281 /* * Maximum possible size of an entire extent, key + value: */ 282 #define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) 283 284 /* Btree pointers don't carry around checksums: */ 285 #define BKEY_BTREE_PTR_VAL_U64s_MAX \ 286 ((sizeof(struct bch_btree_ptr_v2) + \ 287 sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64)) 288 #define BKEY_BTREE_PTR_U64s_MAX \ 289 (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) 290 291 struct bch_reservation { 292 struct bch_val v; 293 294 __le32 generation; 295 __u8 nr_replicas; 296 __u8 pad[3]; 297 } __packed __aligned(8); 298 299 struct bch_inline_data { 300 struct bch_val v; 301 u8 data[]; 302 }; 303 304 #endif /* _BCACHEFS_EXTENTS_FORMAT_H */ 305