1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 #ifndef _LINUX_BCACHE_H 3 #define _LINUX_BCACHE_H 4 5 /* 6 * Bcache on disk data structures 7 */ 8 9 #include <linux/types.h> 10 11 #define BITMASK(name, type, field, offset, size) \ 12 static inline __u64 name(const type *k) \ 13 { return (k->field >> offset) & ~(~0ULL << size); } \ 14 \ 15 static inline void SET_##name(type *k, __u64 v) \ 16 { \ 17 k->field &= ~(~(~0ULL << size) << offset); \ 18 k->field |= (v & ~(~0ULL << size)) << offset; \ 19 } 20 21 /* Btree keys - all units are in sectors */ 22 23 struct bkey { 24 __u64 high; 25 __u64 low; 26 __u64 ptr[]; 27 }; 28 29 #define KEY_FIELD(name, field, offset, size) \ 30 BITMASK(name, struct bkey, field, offset, size) 31 32 #define PTR_FIELD(name, offset, size) \ 33 static inline __u64 name(const struct bkey *k, unsigned int i) \ 34 { return (k->ptr[i] >> offset) & ~(~0ULL << size); } \ 35 \ 36 static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \ 37 { \ 38 k->ptr[i] &= ~(~(~0ULL << size) << offset); \ 39 k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \ 40 } 41 42 #define KEY_SIZE_BITS 16 43 #define KEY_MAX_U64S 8 44 45 KEY_FIELD(KEY_PTRS, high, 60, 3) 46 KEY_FIELD(__PAD0, high, 58, 2) 47 KEY_FIELD(KEY_CSUM, high, 56, 2) 48 KEY_FIELD(__PAD1, high, 55, 1) 49 KEY_FIELD(KEY_DIRTY, high, 36, 1) 50 51 KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) 52 KEY_FIELD(KEY_INODE, high, 0, 20) 53 54 /* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ 55 56 static inline __u64 KEY_OFFSET(const struct bkey *k) 57 { 58 return k->low; 59 } 60 61 static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v) 62 { 63 k->low = v; 64 } 65 66 /* 67 * The high bit being set is a relic from when we used it to do binary 68 * searches - it told you where a key started. It's not used anymore, 69 * and can probably be safely dropped. 70 */ 71 #define KEY(inode, offset, size) \ 72 ((struct bkey) { \ 73 .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \ 74 .low = (offset) \ 75 }) 76 77 #define ZERO_KEY KEY(0, 0, 0) 78 79 #define MAX_KEY_INODE (~(~0 << 20)) 80 #define MAX_KEY_OFFSET (~0ULL >> 1) 81 #define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0) 82 83 #define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) 84 #define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) 85 86 #define PTR_DEV_BITS 12 87 88 PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS) 89 PTR_FIELD(PTR_OFFSET, 8, 43) 90 PTR_FIELD(PTR_GEN, 0, 8) 91 92 #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) 93 94 #define MAKE_PTR(gen, offset, dev) \ 95 ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) 96 97 /* Bkey utility code */ 98 99 static inline unsigned long bkey_u64s(const struct bkey *k) 100 { 101 return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k); 102 } 103 104 static inline unsigned long bkey_bytes(const struct bkey *k) 105 { 106 return bkey_u64s(k) * sizeof(__u64); 107 } 108 109 #define bkey_copy(_dest, _src) unsafe_memcpy(_dest, _src, bkey_bytes(_src), \ 110 /* bkey is always padded */) 111 112 static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) 113 { 114 SET_KEY_INODE(dest, KEY_INODE(src)); 115 SET_KEY_OFFSET(dest, KEY_OFFSET(src)); 116 } 117 118 static inline struct bkey *bkey_next(const struct bkey *k) 119 { 120 __u64 *d = (void *) k; 121 122 return (struct bkey *) (d + bkey_u64s(k)); 123 } 124 125 static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys) 126 { 127 __u64 *d = (void *) k; 128 129 return (struct bkey *) (d + nr_keys); 130 } 131 /* Enough for a key with 6 pointers */ 132 #define BKEY_PAD 8 133 134 #define BKEY_PADDED(key) \ 135 union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; } 136 137 /* Superblock */ 138 139 /* Version 0: Cache device 140 * Version 1: Backing device 141 * Version 2: Seed pointer into btree node checksum 142 * Version 3: Cache device with new UUID format 143 * Version 4: Backing device with data offset 144 */ 145 #define BCACHE_SB_VERSION_CDEV 0 146 #define BCACHE_SB_VERSION_BDEV 1 147 #define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 148 #define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 149 #define BCACHE_SB_VERSION_CDEV_WITH_FEATURES 5 150 #define BCACHE_SB_VERSION_BDEV_WITH_FEATURES 6 151 #define BCACHE_SB_MAX_VERSION 6 152 153 #define SB_SECTOR 8 154 #define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT) 155 #define SB_SIZE 4096 156 #define SB_LABEL_SIZE 32 157 #define SB_JOURNAL_BUCKETS 256U 158 /* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ 159 #define MAX_CACHES_PER_SET 8 160 161 #define BDEV_DATA_START_DEFAULT 16 /* sectors */ 162 163 struct cache_sb_disk { 164 __le64 csum; 165 __le64 offset; /* sector where this sb was written */ 166 __le64 version; 167 168 __u8 magic[16]; 169 170 __u8 uuid[16]; 171 union { 172 __u8 set_uuid[16]; 173 __le64 set_magic; 174 }; 175 __u8 label[SB_LABEL_SIZE]; 176 177 __le64 flags; 178 __le64 seq; 179 180 __le64 feature_compat; 181 __le64 feature_incompat; 182 __le64 feature_ro_compat; 183 184 __le64 pad[5]; 185 186 union { 187 struct { 188 /* Cache devices */ 189 __le64 nbuckets; /* device size */ 190 191 __le16 block_size; /* sectors */ 192 __le16 bucket_size; /* sectors */ 193 194 __le16 nr_in_set; 195 __le16 nr_this_dev; 196 }; 197 struct { 198 /* Backing devices */ 199 __le64 data_offset; 200 201 /* 202 * block_size from the cache device section is still used by 203 * backing devices, so don't add anything here until we fix 204 * things to not need it for backing devices anymore 205 */ 206 }; 207 }; 208 209 __le32 last_mount; /* time overflow in y2106 */ 210 211 __le16 first_bucket; 212 union { 213 __le16 njournal_buckets; 214 __le16 keys; 215 }; 216 __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ 217 __le16 obso_bucket_size_hi; /* obsoleted */ 218 }; 219 220 /* 221 * This is for in-memory bcache super block. 222 * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member 223 * size, ordering and even whole struct size may be different 224 * from cache_sb_disk. 225 */ 226 struct cache_sb { 227 __u64 offset; /* sector where this sb was written */ 228 __u64 version; 229 230 __u8 magic[16]; 231 232 __u8 uuid[16]; 233 union { 234 __u8 set_uuid[16]; 235 __u64 set_magic; 236 }; 237 __u8 label[SB_LABEL_SIZE]; 238 239 __u64 flags; 240 __u64 seq; 241 242 __u64 feature_compat; 243 __u64 feature_incompat; 244 __u64 feature_ro_compat; 245 246 union { 247 struct { 248 /* Cache devices */ 249 __u64 nbuckets; /* device size */ 250 251 __u16 block_size; /* sectors */ 252 __u16 nr_in_set; 253 __u16 nr_this_dev; 254 __u32 bucket_size; /* sectors */ 255 }; 256 struct { 257 /* Backing devices */ 258 __u64 data_offset; 259 260 /* 261 * block_size from the cache device section is still used by 262 * backing devices, so don't add anything here until we fix 263 * things to not need it for backing devices anymore 264 */ 265 }; 266 }; 267 268 __u32 last_mount; /* time overflow in y2106 */ 269 270 __u16 first_bucket; 271 union { 272 __u16 njournal_buckets; 273 __u16 keys; 274 }; 275 __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ 276 }; 277 278 static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) 279 { 280 return sb->version == BCACHE_SB_VERSION_BDEV 281 || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET 282 || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES; 283 } 284 285 BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); 286 BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); 287 BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); 288 #define CACHE_REPLACEMENT_LRU 0U 289 #define CACHE_REPLACEMENT_FIFO 1U 290 #define CACHE_REPLACEMENT_RANDOM 2U 291 292 BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); 293 #define CACHE_MODE_WRITETHROUGH 0U 294 #define CACHE_MODE_WRITEBACK 1U 295 #define CACHE_MODE_WRITEAROUND 2U 296 #define CACHE_MODE_NONE 3U 297 BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); 298 #define BDEV_STATE_NONE 0U 299 #define BDEV_STATE_CLEAN 1U 300 #define BDEV_STATE_DIRTY 2U 301 #define BDEV_STATE_STALE 3U 302 303 /* 304 * Magic numbers 305 * 306 * The various other data structures have their own magic numbers, which are 307 * xored with the first part of the cache set's UUID 308 */ 309 310 #define JSET_MAGIC 0x245235c1a3625032ULL 311 #define PSET_MAGIC 0x6750e15f87337f91ULL 312 #define BSET_MAGIC 0x90135c78b99e07f5ULL 313 314 static inline __u64 jset_magic(struct cache_sb *sb) 315 { 316 return sb->set_magic ^ JSET_MAGIC; 317 } 318 319 static inline __u64 pset_magic(struct cache_sb *sb) 320 { 321 return sb->set_magic ^ PSET_MAGIC; 322 } 323 324 static inline __u64 bset_magic(struct cache_sb *sb) 325 { 326 return sb->set_magic ^ BSET_MAGIC; 327 } 328 329 /* 330 * Journal 331 * 332 * On disk format for a journal entry: 333 * seq is monotonically increasing; every journal entry has its own unique 334 * sequence number. 335 * 336 * last_seq is the oldest journal entry that still has keys the btree hasn't 337 * flushed to disk yet. 338 * 339 * version is for on disk format changes. 340 */ 341 342 #define BCACHE_JSET_VERSION_UUIDv1 1 343 #define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ 344 #define BCACHE_JSET_VERSION 1 345 346 struct jset { 347 __u64 csum; 348 __u64 magic; 349 __u64 seq; 350 __u32 version; 351 __u32 keys; 352 353 __u64 last_seq; 354 355 BKEY_PADDED(uuid_bucket); 356 BKEY_PADDED(btree_root); 357 __u16 btree_level; 358 __u16 pad[3]; 359 360 __u64 prio_bucket[MAX_CACHES_PER_SET]; 361 362 union { 363 struct bkey start[0]; 364 __u64 d[0]; 365 }; 366 }; 367 368 /* Bucket prios/gens */ 369 370 struct prio_set { 371 __u64 csum; 372 __u64 magic; 373 __u64 seq; 374 __u32 version; 375 __u32 pad; 376 377 __u64 next_bucket; 378 379 struct bucket_disk { 380 __u16 prio; 381 __u8 gen; 382 } __attribute((packed)) data[]; 383 }; 384 385 /* UUIDS - per backing device/flash only volume metadata */ 386 387 struct uuid_entry { 388 union { 389 struct { 390 __u8 uuid[16]; 391 __u8 label[32]; 392 __u32 first_reg; /* time overflow in y2106 */ 393 __u32 last_reg; 394 __u32 invalidated; 395 396 __u32 flags; 397 /* Size of flash only volumes */ 398 __u64 sectors; 399 }; 400 401 __u8 pad[128]; 402 }; 403 }; 404 405 BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); 406 407 /* Btree nodes */ 408 409 /* Version 1: Seed pointer into btree node checksum 410 */ 411 #define BCACHE_BSET_CSUM 1 412 #define BCACHE_BSET_VERSION 1 413 414 /* 415 * Btree nodes 416 * 417 * On disk a btree node is a list/log of these; within each set the keys are 418 * sorted 419 */ 420 struct bset { 421 __u64 csum; 422 __u64 magic; 423 __u64 seq; 424 __u32 version; 425 __u32 keys; 426 427 union { 428 struct bkey start[0]; 429 __u64 d[0]; 430 }; 431 }; 432 433 /* OBSOLETE */ 434 435 /* UUIDS - per backing device/flash only volume metadata */ 436 437 struct uuid_entry_v0 { 438 __u8 uuid[16]; 439 __u8 label[32]; 440 __u32 first_reg; 441 __u32 last_reg; 442 __u32 invalidated; 443 __u32 pad; 444 }; 445 446 #endif /* _LINUX_BCACHE_H */ 447