1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_inode.h"
17 #include "xfs_trans.h"
18 #include "xfs_alloc.h"
19 #include "xfs_btree.h"
20 #include "xfs_btree_staging.h"
21 #include "xfs_metafile.h"
22 #include "xfs_rmap.h"
23 #include "xfs_rtrmap_btree.h"
24 #include "xfs_trace.h"
25 #include "xfs_cksum.h"
26 #include "xfs_error.h"
27 #include "xfs_extent_busy.h"
28 #include "xfs_rtgroup.h"
29 #include "xfs_bmap.h"
30 #include "xfs_health.h"
31 #include "xfs_buf_mem.h"
32 #include "xfs_btree_mem.h"
33
34 static struct kmem_cache *xfs_rtrmapbt_cur_cache;
35
36 /*
37 * Realtime Reverse Map btree.
38 *
39 * This is a btree used to track the owner(s) of a given extent in the realtime
40 * device. See the comments in xfs_rmap_btree.c for more information.
41 *
42 * This tree is basically the same as the regular rmap btree except that it
43 * is rooted in an inode and does not live in free space.
44 */
45
46 static struct xfs_btree_cur *
xfs_rtrmapbt_dup_cursor(struct xfs_btree_cur * cur)47 xfs_rtrmapbt_dup_cursor(
48 struct xfs_btree_cur *cur)
49 {
50 return xfs_rtrmapbt_init_cursor(cur->bc_tp, to_rtg(cur->bc_group));
51 }
52
53 STATIC int
xfs_rtrmapbt_get_minrecs(struct xfs_btree_cur * cur,int level)54 xfs_rtrmapbt_get_minrecs(
55 struct xfs_btree_cur *cur,
56 int level)
57 {
58 if (level == cur->bc_nlevels - 1) {
59 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
60
61 return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
62 level == 0) / 2;
63 }
64
65 return cur->bc_mp->m_rtrmap_mnr[level != 0];
66 }
67
68 STATIC int
xfs_rtrmapbt_get_maxrecs(struct xfs_btree_cur * cur,int level)69 xfs_rtrmapbt_get_maxrecs(
70 struct xfs_btree_cur *cur,
71 int level)
72 {
73 if (level == cur->bc_nlevels - 1) {
74 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
75
76 return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
77 level == 0);
78 }
79
80 return cur->bc_mp->m_rtrmap_mxr[level != 0];
81 }
82
83 /* Calculate number of records in the ondisk realtime rmap btree inode root. */
84 unsigned int
xfs_rtrmapbt_droot_maxrecs(unsigned int blocklen,bool leaf)85 xfs_rtrmapbt_droot_maxrecs(
86 unsigned int blocklen,
87 bool leaf)
88 {
89 blocklen -= sizeof(struct xfs_rtrmap_root);
90
91 if (leaf)
92 return blocklen / sizeof(struct xfs_rmap_rec);
93 return blocklen / (2 * sizeof(struct xfs_rmap_key) +
94 sizeof(xfs_rtrmap_ptr_t));
95 }
96
97 /*
98 * Get the maximum records we could store in the on-disk format.
99 *
100 * For non-root nodes this is equivalent to xfs_rtrmapbt_get_maxrecs, but
101 * for the root node this checks the available space in the dinode fork
102 * so that we can resize the in-memory buffer to match it. After a
103 * resize to the maximum size this function returns the same value
104 * as xfs_rtrmapbt_get_maxrecs for the root node, too.
105 */
106 STATIC int
xfs_rtrmapbt_get_dmaxrecs(struct xfs_btree_cur * cur,int level)107 xfs_rtrmapbt_get_dmaxrecs(
108 struct xfs_btree_cur *cur,
109 int level)
110 {
111 if (level != cur->bc_nlevels - 1)
112 return cur->bc_mp->m_rtrmap_mxr[level != 0];
113 return xfs_rtrmapbt_droot_maxrecs(cur->bc_ino.forksize, level == 0);
114 }
115
116 /*
117 * Convert the ondisk record's offset field into the ondisk key's offset field.
118 * Fork and bmbt are significant parts of the rmap record key, but written
119 * status is merely a record attribute.
120 */
ondisk_rec_offset_to_key(const union xfs_btree_rec * rec)121 static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
122 {
123 return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
124 }
125
126 STATIC void
xfs_rtrmapbt_init_key_from_rec(union xfs_btree_key * key,const union xfs_btree_rec * rec)127 xfs_rtrmapbt_init_key_from_rec(
128 union xfs_btree_key *key,
129 const union xfs_btree_rec *rec)
130 {
131 key->rmap.rm_startblock = rec->rmap.rm_startblock;
132 key->rmap.rm_owner = rec->rmap.rm_owner;
133 key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
134 }
135
136 STATIC void
xfs_rtrmapbt_init_high_key_from_rec(union xfs_btree_key * key,const union xfs_btree_rec * rec)137 xfs_rtrmapbt_init_high_key_from_rec(
138 union xfs_btree_key *key,
139 const union xfs_btree_rec *rec)
140 {
141 uint64_t off;
142 int adj;
143
144 adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
145
146 key->rmap.rm_startblock = rec->rmap.rm_startblock;
147 be32_add_cpu(&key->rmap.rm_startblock, adj);
148 key->rmap.rm_owner = rec->rmap.rm_owner;
149 key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
150 if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
151 XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
152 return;
153 off = be64_to_cpu(key->rmap.rm_offset);
154 off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
155 key->rmap.rm_offset = cpu_to_be64(off);
156 }
157
158 STATIC void
xfs_rtrmapbt_init_rec_from_cur(struct xfs_btree_cur * cur,union xfs_btree_rec * rec)159 xfs_rtrmapbt_init_rec_from_cur(
160 struct xfs_btree_cur *cur,
161 union xfs_btree_rec *rec)
162 {
163 rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
164 rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
165 rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
166 rec->rmap.rm_offset = cpu_to_be64(
167 xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
168 }
169
170 STATIC void
xfs_rtrmapbt_init_ptr_from_cur(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr)171 xfs_rtrmapbt_init_ptr_from_cur(
172 struct xfs_btree_cur *cur,
173 union xfs_btree_ptr *ptr)
174 {
175 ptr->l = 0;
176 }
177
178 /*
179 * Mask the appropriate parts of the ondisk key field for a key comparison.
180 * Fork and bmbt are significant parts of the rmap record key, but written
181 * status is merely a record attribute.
182 */
offset_keymask(uint64_t offset)183 static inline uint64_t offset_keymask(uint64_t offset)
184 {
185 return offset & ~XFS_RMAP_OFF_UNWRITTEN;
186 }
187
188 STATIC int64_t
xfs_rtrmapbt_key_diff(struct xfs_btree_cur * cur,const union xfs_btree_key * key)189 xfs_rtrmapbt_key_diff(
190 struct xfs_btree_cur *cur,
191 const union xfs_btree_key *key)
192 {
193 struct xfs_rmap_irec *rec = &cur->bc_rec.r;
194 const struct xfs_rmap_key *kp = &key->rmap;
195 __u64 x, y;
196 int64_t d;
197
198 d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
199 if (d)
200 return d;
201
202 x = be64_to_cpu(kp->rm_owner);
203 y = rec->rm_owner;
204 if (x > y)
205 return 1;
206 else if (y > x)
207 return -1;
208
209 x = offset_keymask(be64_to_cpu(kp->rm_offset));
210 y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
211 if (x > y)
212 return 1;
213 else if (y > x)
214 return -1;
215 return 0;
216 }
217
218 STATIC int64_t
xfs_rtrmapbt_diff_two_keys(struct xfs_btree_cur * cur,const union xfs_btree_key * k1,const union xfs_btree_key * k2,const union xfs_btree_key * mask)219 xfs_rtrmapbt_diff_two_keys(
220 struct xfs_btree_cur *cur,
221 const union xfs_btree_key *k1,
222 const union xfs_btree_key *k2,
223 const union xfs_btree_key *mask)
224 {
225 const struct xfs_rmap_key *kp1 = &k1->rmap;
226 const struct xfs_rmap_key *kp2 = &k2->rmap;
227 int64_t d;
228 __u64 x, y;
229
230 /* Doesn't make sense to mask off the physical space part */
231 ASSERT(!mask || mask->rmap.rm_startblock);
232
233 d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
234 be32_to_cpu(kp2->rm_startblock);
235 if (d)
236 return d;
237
238 if (!mask || mask->rmap.rm_owner) {
239 x = be64_to_cpu(kp1->rm_owner);
240 y = be64_to_cpu(kp2->rm_owner);
241 if (x > y)
242 return 1;
243 else if (y > x)
244 return -1;
245 }
246
247 if (!mask || mask->rmap.rm_offset) {
248 /* Doesn't make sense to allow offset but not owner */
249 ASSERT(!mask || mask->rmap.rm_owner);
250
251 x = offset_keymask(be64_to_cpu(kp1->rm_offset));
252 y = offset_keymask(be64_to_cpu(kp2->rm_offset));
253 if (x > y)
254 return 1;
255 else if (y > x)
256 return -1;
257 }
258
259 return 0;
260 }
261
262 static xfs_failaddr_t
xfs_rtrmapbt_verify(struct xfs_buf * bp)263 xfs_rtrmapbt_verify(
264 struct xfs_buf *bp)
265 {
266 struct xfs_mount *mp = bp->b_target->bt_mount;
267 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
268 xfs_failaddr_t fa;
269 int level;
270
271 if (!xfs_verify_magic(bp, block->bb_magic))
272 return __this_address;
273
274 if (!xfs_has_rmapbt(mp))
275 return __this_address;
276 fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
277 if (fa)
278 return fa;
279 level = be16_to_cpu(block->bb_level);
280 if (level > mp->m_rtrmap_maxlevels)
281 return __this_address;
282
283 return xfs_btree_fsblock_verify(bp, mp->m_rtrmap_mxr[level != 0]);
284 }
285
286 static void
xfs_rtrmapbt_read_verify(struct xfs_buf * bp)287 xfs_rtrmapbt_read_verify(
288 struct xfs_buf *bp)
289 {
290 xfs_failaddr_t fa;
291
292 if (!xfs_btree_fsblock_verify_crc(bp))
293 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
294 else {
295 fa = xfs_rtrmapbt_verify(bp);
296 if (fa)
297 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
298 }
299
300 if (bp->b_error)
301 trace_xfs_btree_corrupt(bp, _RET_IP_);
302 }
303
304 static void
xfs_rtrmapbt_write_verify(struct xfs_buf * bp)305 xfs_rtrmapbt_write_verify(
306 struct xfs_buf *bp)
307 {
308 xfs_failaddr_t fa;
309
310 fa = xfs_rtrmapbt_verify(bp);
311 if (fa) {
312 trace_xfs_btree_corrupt(bp, _RET_IP_);
313 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
314 return;
315 }
316 xfs_btree_fsblock_calc_crc(bp);
317
318 }
319
320 const struct xfs_buf_ops xfs_rtrmapbt_buf_ops = {
321 .name = "xfs_rtrmapbt",
322 .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) },
323 .verify_read = xfs_rtrmapbt_read_verify,
324 .verify_write = xfs_rtrmapbt_write_verify,
325 .verify_struct = xfs_rtrmapbt_verify,
326 };
327
328 STATIC int
xfs_rtrmapbt_keys_inorder(struct xfs_btree_cur * cur,const union xfs_btree_key * k1,const union xfs_btree_key * k2)329 xfs_rtrmapbt_keys_inorder(
330 struct xfs_btree_cur *cur,
331 const union xfs_btree_key *k1,
332 const union xfs_btree_key *k2)
333 {
334 uint32_t x;
335 uint32_t y;
336 uint64_t a;
337 uint64_t b;
338
339 x = be32_to_cpu(k1->rmap.rm_startblock);
340 y = be32_to_cpu(k2->rmap.rm_startblock);
341 if (x < y)
342 return 1;
343 else if (x > y)
344 return 0;
345 a = be64_to_cpu(k1->rmap.rm_owner);
346 b = be64_to_cpu(k2->rmap.rm_owner);
347 if (a < b)
348 return 1;
349 else if (a > b)
350 return 0;
351 a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
352 b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
353 if (a <= b)
354 return 1;
355 return 0;
356 }
357
358 STATIC int
xfs_rtrmapbt_recs_inorder(struct xfs_btree_cur * cur,const union xfs_btree_rec * r1,const union xfs_btree_rec * r2)359 xfs_rtrmapbt_recs_inorder(
360 struct xfs_btree_cur *cur,
361 const union xfs_btree_rec *r1,
362 const union xfs_btree_rec *r2)
363 {
364 uint32_t x;
365 uint32_t y;
366 uint64_t a;
367 uint64_t b;
368
369 x = be32_to_cpu(r1->rmap.rm_startblock);
370 y = be32_to_cpu(r2->rmap.rm_startblock);
371 if (x < y)
372 return 1;
373 else if (x > y)
374 return 0;
375 a = be64_to_cpu(r1->rmap.rm_owner);
376 b = be64_to_cpu(r2->rmap.rm_owner);
377 if (a < b)
378 return 1;
379 else if (a > b)
380 return 0;
381 a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
382 b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
383 if (a <= b)
384 return 1;
385 return 0;
386 }
387
388 STATIC enum xbtree_key_contig
xfs_rtrmapbt_keys_contiguous(struct xfs_btree_cur * cur,const union xfs_btree_key * key1,const union xfs_btree_key * key2,const union xfs_btree_key * mask)389 xfs_rtrmapbt_keys_contiguous(
390 struct xfs_btree_cur *cur,
391 const union xfs_btree_key *key1,
392 const union xfs_btree_key *key2,
393 const union xfs_btree_key *mask)
394 {
395 ASSERT(!mask || mask->rmap.rm_startblock);
396
397 /*
398 * We only support checking contiguity of the physical space component.
399 * If any callers ever need more specificity than that, they'll have to
400 * implement it here.
401 */
402 ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
403
404 return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
405 be32_to_cpu(key2->rmap.rm_startblock));
406 }
407
408 static inline void
xfs_rtrmapbt_move_ptrs(struct xfs_mount * mp,struct xfs_btree_block * broot,short old_size,size_t new_size,unsigned int numrecs)409 xfs_rtrmapbt_move_ptrs(
410 struct xfs_mount *mp,
411 struct xfs_btree_block *broot,
412 short old_size,
413 size_t new_size,
414 unsigned int numrecs)
415 {
416 void *dptr;
417 void *sptr;
418
419 sptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, old_size);
420 dptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, new_size);
421 memmove(dptr, sptr, numrecs * sizeof(xfs_rtrmap_ptr_t));
422 }
423
424 static struct xfs_btree_block *
xfs_rtrmapbt_broot_realloc(struct xfs_btree_cur * cur,unsigned int new_numrecs)425 xfs_rtrmapbt_broot_realloc(
426 struct xfs_btree_cur *cur,
427 unsigned int new_numrecs)
428 {
429 struct xfs_mount *mp = cur->bc_mp;
430 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
431 struct xfs_btree_block *broot;
432 unsigned int new_size;
433 unsigned int old_size = ifp->if_broot_bytes;
434 const unsigned int level = cur->bc_nlevels - 1;
435
436 new_size = xfs_rtrmap_broot_space_calc(mp, level, new_numrecs);
437
438 /* Handle the nop case quietly. */
439 if (new_size == old_size)
440 return ifp->if_broot;
441
442 if (new_size > old_size) {
443 unsigned int old_numrecs;
444
445 /*
446 * If there wasn't any memory allocated before, just allocate
447 * it now and get out.
448 */
449 if (old_size == 0)
450 return xfs_broot_realloc(ifp, new_size);
451
452 /*
453 * If there is already an existing if_broot, then we need to
454 * realloc it and possibly move the node block pointers because
455 * those are not butted up against the btree block header.
456 */
457 old_numrecs = xfs_rtrmapbt_maxrecs(mp, old_size, level == 0);
458 broot = xfs_broot_realloc(ifp, new_size);
459 if (level > 0)
460 xfs_rtrmapbt_move_ptrs(mp, broot, old_size, new_size,
461 old_numrecs);
462 goto out_broot;
463 }
464
465 /*
466 * We're reducing numrecs. If we're going all the way to zero, just
467 * free the block.
468 */
469 ASSERT(ifp->if_broot != NULL && old_size > 0);
470 if (new_size == 0)
471 return xfs_broot_realloc(ifp, 0);
472
473 /*
474 * Shrink the btree root by possibly moving the rtrmapbt pointers,
475 * since they are not butted up against the btree block header. Then
476 * reallocate broot.
477 */
478 if (level > 0)
479 xfs_rtrmapbt_move_ptrs(mp, ifp->if_broot, old_size, new_size,
480 new_numrecs);
481 broot = xfs_broot_realloc(ifp, new_size);
482
483 out_broot:
484 ASSERT(xfs_rtrmap_droot_space(broot) <=
485 xfs_inode_fork_size(cur->bc_ino.ip, cur->bc_ino.whichfork));
486 return broot;
487 }
488
489 const struct xfs_btree_ops xfs_rtrmapbt_ops = {
490 .name = "rtrmap",
491 .type = XFS_BTREE_TYPE_INODE,
492 .geom_flags = XFS_BTGEO_OVERLAPPING |
493 XFS_BTGEO_IROOT_RECORDS,
494
495 .rec_len = sizeof(struct xfs_rmap_rec),
496 /* Overlapping btree; 2 keys per pointer. */
497 .key_len = 2 * sizeof(struct xfs_rmap_key),
498 .ptr_len = XFS_BTREE_LONG_PTR_LEN,
499
500 .lru_refs = XFS_RMAP_BTREE_REF,
501 .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_2),
502 .sick_mask = XFS_SICK_RG_RMAPBT,
503
504 .dup_cursor = xfs_rtrmapbt_dup_cursor,
505 .alloc_block = xfs_btree_alloc_metafile_block,
506 .free_block = xfs_btree_free_metafile_block,
507 .get_minrecs = xfs_rtrmapbt_get_minrecs,
508 .get_maxrecs = xfs_rtrmapbt_get_maxrecs,
509 .get_dmaxrecs = xfs_rtrmapbt_get_dmaxrecs,
510 .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec,
511 .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec,
512 .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur,
513 .init_ptr_from_cur = xfs_rtrmapbt_init_ptr_from_cur,
514 .key_diff = xfs_rtrmapbt_key_diff,
515 .buf_ops = &xfs_rtrmapbt_buf_ops,
516 .diff_two_keys = xfs_rtrmapbt_diff_two_keys,
517 .keys_inorder = xfs_rtrmapbt_keys_inorder,
518 .recs_inorder = xfs_rtrmapbt_recs_inorder,
519 .keys_contiguous = xfs_rtrmapbt_keys_contiguous,
520 .broot_realloc = xfs_rtrmapbt_broot_realloc,
521 };
522
523 /* Allocate a new rt rmap btree cursor. */
524 struct xfs_btree_cur *
xfs_rtrmapbt_init_cursor(struct xfs_trans * tp,struct xfs_rtgroup * rtg)525 xfs_rtrmapbt_init_cursor(
526 struct xfs_trans *tp,
527 struct xfs_rtgroup *rtg)
528 {
529 struct xfs_inode *ip = rtg_rmap(rtg);
530 struct xfs_mount *mp = rtg_mount(rtg);
531 struct xfs_btree_cur *cur;
532
533 xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
534
535 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_ops,
536 mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache);
537
538 cur->bc_ino.ip = ip;
539 cur->bc_group = xfs_group_hold(rtg_group(rtg));
540 cur->bc_ino.whichfork = XFS_DATA_FORK;
541 cur->bc_nlevels = be16_to_cpu(ip->i_df.if_broot->bb_level) + 1;
542 cur->bc_ino.forksize = xfs_inode_fork_size(ip, XFS_DATA_FORK);
543
544 return cur;
545 }
546
547 #ifdef CONFIG_XFS_BTREE_IN_MEM
548 /*
549 * Validate an in-memory realtime rmap btree block. Callers are allowed to
550 * generate an in-memory btree even if the ondisk feature is not enabled.
551 */
552 static xfs_failaddr_t
xfs_rtrmapbt_mem_verify(struct xfs_buf * bp)553 xfs_rtrmapbt_mem_verify(
554 struct xfs_buf *bp)
555 {
556 struct xfs_mount *mp = bp->b_mount;
557 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
558 xfs_failaddr_t fa;
559 unsigned int level;
560 unsigned int maxrecs;
561
562 if (!xfs_verify_magic(bp, block->bb_magic))
563 return __this_address;
564
565 fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
566 if (fa)
567 return fa;
568
569 level = be16_to_cpu(block->bb_level);
570 if (xfs_has_rmapbt(mp)) {
571 if (level >= mp->m_rtrmap_maxlevels)
572 return __this_address;
573 } else {
574 if (level >= xfs_rtrmapbt_maxlevels_ondisk())
575 return __this_address;
576 }
577
578 maxrecs = xfs_rtrmapbt_maxrecs(mp, XFBNO_BLOCKSIZE, level == 0);
579 return xfs_btree_memblock_verify(bp, maxrecs);
580 }
581
582 static void
xfs_rtrmapbt_mem_rw_verify(struct xfs_buf * bp)583 xfs_rtrmapbt_mem_rw_verify(
584 struct xfs_buf *bp)
585 {
586 xfs_failaddr_t fa = xfs_rtrmapbt_mem_verify(bp);
587
588 if (fa)
589 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
590 }
591
592 /* skip crc checks on in-memory btrees to save time */
593 static const struct xfs_buf_ops xfs_rtrmapbt_mem_buf_ops = {
594 .name = "xfs_rtrmapbt_mem",
595 .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) },
596 .verify_read = xfs_rtrmapbt_mem_rw_verify,
597 .verify_write = xfs_rtrmapbt_mem_rw_verify,
598 .verify_struct = xfs_rtrmapbt_mem_verify,
599 };
600
601 const struct xfs_btree_ops xfs_rtrmapbt_mem_ops = {
602 .type = XFS_BTREE_TYPE_MEM,
603 .geom_flags = XFS_BTGEO_OVERLAPPING,
604
605 .rec_len = sizeof(struct xfs_rmap_rec),
606 /* Overlapping btree; 2 keys per pointer. */
607 .key_len = 2 * sizeof(struct xfs_rmap_key),
608 .ptr_len = XFS_BTREE_LONG_PTR_LEN,
609
610 .lru_refs = XFS_RMAP_BTREE_REF,
611 .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_mem_2),
612
613 .dup_cursor = xfbtree_dup_cursor,
614 .set_root = xfbtree_set_root,
615 .alloc_block = xfbtree_alloc_block,
616 .free_block = xfbtree_free_block,
617 .get_minrecs = xfbtree_get_minrecs,
618 .get_maxrecs = xfbtree_get_maxrecs,
619 .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec,
620 .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec,
621 .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur,
622 .init_ptr_from_cur = xfbtree_init_ptr_from_cur,
623 .key_diff = xfs_rtrmapbt_key_diff,
624 .buf_ops = &xfs_rtrmapbt_mem_buf_ops,
625 .diff_two_keys = xfs_rtrmapbt_diff_two_keys,
626 .keys_inorder = xfs_rtrmapbt_keys_inorder,
627 .recs_inorder = xfs_rtrmapbt_recs_inorder,
628 .keys_contiguous = xfs_rtrmapbt_keys_contiguous,
629 };
630
631 /* Create a cursor for an in-memory btree. */
632 struct xfs_btree_cur *
xfs_rtrmapbt_mem_cursor(struct xfs_rtgroup * rtg,struct xfs_trans * tp,struct xfbtree * xfbt)633 xfs_rtrmapbt_mem_cursor(
634 struct xfs_rtgroup *rtg,
635 struct xfs_trans *tp,
636 struct xfbtree *xfbt)
637 {
638 struct xfs_mount *mp = rtg_mount(rtg);
639 struct xfs_btree_cur *cur;
640
641 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_mem_ops,
642 mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache);
643 cur->bc_mem.xfbtree = xfbt;
644 cur->bc_nlevels = xfbt->nlevels;
645 cur->bc_group = xfs_group_hold(rtg_group(rtg));
646 return cur;
647 }
648
649 /* Create an in-memory realtime rmap btree. */
650 int
xfs_rtrmapbt_mem_init(struct xfs_mount * mp,struct xfbtree * xfbt,struct xfs_buftarg * btp,xfs_rgnumber_t rgno)651 xfs_rtrmapbt_mem_init(
652 struct xfs_mount *mp,
653 struct xfbtree *xfbt,
654 struct xfs_buftarg *btp,
655 xfs_rgnumber_t rgno)
656 {
657 xfbt->owner = rgno;
658 return xfbtree_init(mp, xfbt, btp, &xfs_rtrmapbt_mem_ops);
659 }
660 #endif /* CONFIG_XFS_BTREE_IN_MEM */
661
662 /*
663 * Install a new rt reverse mapping btree root. Caller is responsible for
664 * invalidating and freeing the old btree blocks.
665 */
666 void
xfs_rtrmapbt_commit_staged_btree(struct xfs_btree_cur * cur,struct xfs_trans * tp)667 xfs_rtrmapbt_commit_staged_btree(
668 struct xfs_btree_cur *cur,
669 struct xfs_trans *tp)
670 {
671 struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake;
672 struct xfs_ifork *ifp;
673 int flags = XFS_ILOG_CORE | XFS_ILOG_DBROOT;
674
675 ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
676 ASSERT(ifake->if_fork->if_format == XFS_DINODE_FMT_META_BTREE);
677
678 /*
679 * Free any resources hanging off the real fork, then shallow-copy the
680 * staging fork's contents into the real fork to transfer everything
681 * we just built.
682 */
683 ifp = xfs_ifork_ptr(cur->bc_ino.ip, XFS_DATA_FORK);
684 xfs_idestroy_fork(ifp);
685 memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork));
686
687 cur->bc_ino.ip->i_projid = cur->bc_group->xg_gno;
688 xfs_trans_log_inode(tp, cur->bc_ino.ip, flags);
689 xfs_btree_commit_ifakeroot(cur, tp, XFS_DATA_FORK);
690 }
691
692 /* Calculate number of records in a rt reverse mapping btree block. */
693 static inline unsigned int
xfs_rtrmapbt_block_maxrecs(unsigned int blocklen,bool leaf)694 xfs_rtrmapbt_block_maxrecs(
695 unsigned int blocklen,
696 bool leaf)
697 {
698 if (leaf)
699 return blocklen / sizeof(struct xfs_rmap_rec);
700 return blocklen /
701 (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rtrmap_ptr_t));
702 }
703
704 /*
705 * Calculate number of records in an rt reverse mapping btree block.
706 */
707 unsigned int
xfs_rtrmapbt_maxrecs(struct xfs_mount * mp,unsigned int blocklen,bool leaf)708 xfs_rtrmapbt_maxrecs(
709 struct xfs_mount *mp,
710 unsigned int blocklen,
711 bool leaf)
712 {
713 blocklen -= XFS_RTRMAP_BLOCK_LEN;
714 return xfs_rtrmapbt_block_maxrecs(blocklen, leaf);
715 }
716
717 /* Compute the max possible height for realtime reverse mapping btrees. */
718 unsigned int
xfs_rtrmapbt_maxlevels_ondisk(void)719 xfs_rtrmapbt_maxlevels_ondisk(void)
720 {
721 unsigned long long max_dblocks;
722 unsigned int minrecs[2];
723 unsigned int blocklen;
724
725 blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
726
727 minrecs[0] = xfs_rtrmapbt_block_maxrecs(blocklen, true) / 2;
728 minrecs[1] = xfs_rtrmapbt_block_maxrecs(blocklen, false) / 2;
729
730 /*
731 * Compute the asymptotic maxlevels for an rtrmapbt on any rtreflink fs.
732 *
733 * On a reflink filesystem, each block in an rtgroup can have up to
734 * 2^32 (per the refcount record format) owners, which means that
735 * theoretically we could face up to 2^64 rmap records. However, we're
736 * likely to run out of blocks in the data device long before that
737 * happens, which means that we must compute the max height based on
738 * what the btree will look like if it consumes almost all the blocks
739 * in the data device due to maximal sharing factor.
740 */
741 max_dblocks = -1U; /* max ag count */
742 max_dblocks *= XFS_MAX_CRC_AG_BLOCKS;
743 return xfs_btree_space_to_height(minrecs, max_dblocks);
744 }
745
746 int __init
xfs_rtrmapbt_init_cur_cache(void)747 xfs_rtrmapbt_init_cur_cache(void)
748 {
749 xfs_rtrmapbt_cur_cache = kmem_cache_create("xfs_rtrmapbt_cur",
750 xfs_btree_cur_sizeof(xfs_rtrmapbt_maxlevels_ondisk()),
751 0, 0, NULL);
752
753 if (!xfs_rtrmapbt_cur_cache)
754 return -ENOMEM;
755 return 0;
756 }
757
758 void
xfs_rtrmapbt_destroy_cur_cache(void)759 xfs_rtrmapbt_destroy_cur_cache(void)
760 {
761 kmem_cache_destroy(xfs_rtrmapbt_cur_cache);
762 xfs_rtrmapbt_cur_cache = NULL;
763 }
764
765 /* Compute the maximum height of an rt reverse mapping btree. */
766 void
xfs_rtrmapbt_compute_maxlevels(struct xfs_mount * mp)767 xfs_rtrmapbt_compute_maxlevels(
768 struct xfs_mount *mp)
769 {
770 unsigned int d_maxlevels, r_maxlevels;
771
772 if (!xfs_has_rtrmapbt(mp)) {
773 mp->m_rtrmap_maxlevels = 0;
774 return;
775 }
776
777 /*
778 * The realtime rmapbt lives on the data device, which means that its
779 * maximum height is constrained by the size of the data device and
780 * the height required to store one rmap record for each block in an
781 * rt group.
782 *
783 * On a reflink filesystem, each rt block can have up to 2^32 (per the
784 * refcount record format) owners, which means that theoretically we
785 * could face up to 2^64 rmap records. This makes the computation of
786 * maxlevels based on record count meaningless, so we only consider the
787 * size of the data device.
788 */
789 d_maxlevels = xfs_btree_space_to_height(mp->m_rtrmap_mnr,
790 mp->m_sb.sb_dblocks);
791 if (xfs_has_rtreflink(mp)) {
792 mp->m_rtrmap_maxlevels = d_maxlevels + 1;
793 return;
794 }
795
796 r_maxlevels = xfs_btree_compute_maxlevels(mp->m_rtrmap_mnr,
797 mp->m_groups[XG_TYPE_RTG].blocks);
798
799 /* Add one level to handle the inode root level. */
800 mp->m_rtrmap_maxlevels = min(d_maxlevels, r_maxlevels) + 1;
801 }
802
803 /* Calculate the rtrmap btree size for some records. */
804 unsigned long long
xfs_rtrmapbt_calc_size(struct xfs_mount * mp,unsigned long long len)805 xfs_rtrmapbt_calc_size(
806 struct xfs_mount *mp,
807 unsigned long long len)
808 {
809 return xfs_btree_calc_size(mp->m_rtrmap_mnr, len);
810 }
811
812 /*
813 * Calculate the maximum rmap btree size.
814 */
815 static unsigned long long
xfs_rtrmapbt_max_size(struct xfs_mount * mp,xfs_rtblock_t rtblocks)816 xfs_rtrmapbt_max_size(
817 struct xfs_mount *mp,
818 xfs_rtblock_t rtblocks)
819 {
820 /* Bail out if we're uninitialized, which can happen in mkfs. */
821 if (mp->m_rtrmap_mxr[0] == 0)
822 return 0;
823
824 return xfs_rtrmapbt_calc_size(mp, rtblocks);
825 }
826
827 /*
828 * Figure out how many blocks to reserve and how many are used by this btree.
829 */
830 xfs_filblks_t
xfs_rtrmapbt_calc_reserves(struct xfs_mount * mp)831 xfs_rtrmapbt_calc_reserves(
832 struct xfs_mount *mp)
833 {
834 uint32_t blocks = mp->m_groups[XG_TYPE_RTG].blocks;
835
836 if (!xfs_has_rtrmapbt(mp))
837 return 0;
838
839 /* Reserve 1% of the rtgroup or enough for 1 block per record. */
840 return max_t(xfs_filblks_t, blocks / 100,
841 xfs_rtrmapbt_max_size(mp, blocks));
842 }
843
844 /* Convert on-disk form of btree root to in-memory form. */
845 STATIC void
xfs_rtrmapbt_from_disk(struct xfs_inode * ip,struct xfs_rtrmap_root * dblock,unsigned int dblocklen,struct xfs_btree_block * rblock)846 xfs_rtrmapbt_from_disk(
847 struct xfs_inode *ip,
848 struct xfs_rtrmap_root *dblock,
849 unsigned int dblocklen,
850 struct xfs_btree_block *rblock)
851 {
852 struct xfs_mount *mp = ip->i_mount;
853 struct xfs_rmap_key *fkp;
854 __be64 *fpp;
855 struct xfs_rmap_key *tkp;
856 __be64 *tpp;
857 struct xfs_rmap_rec *frp;
858 struct xfs_rmap_rec *trp;
859 unsigned int rblocklen = xfs_rtrmap_broot_space(mp, dblock);
860 unsigned int numrecs;
861 unsigned int maxrecs;
862
863 xfs_btree_init_block(mp, rblock, &xfs_rtrmapbt_ops, 0, 0, ip->i_ino);
864
865 rblock->bb_level = dblock->bb_level;
866 rblock->bb_numrecs = dblock->bb_numrecs;
867 numrecs = be16_to_cpu(dblock->bb_numrecs);
868
869 if (be16_to_cpu(rblock->bb_level) > 0) {
870 maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
871 fkp = xfs_rtrmap_droot_key_addr(dblock, 1);
872 tkp = xfs_rtrmap_key_addr(rblock, 1);
873 fpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
874 tpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
875 memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
876 memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
877 } else {
878 frp = xfs_rtrmap_droot_rec_addr(dblock, 1);
879 trp = xfs_rtrmap_rec_addr(rblock, 1);
880 memcpy(trp, frp, sizeof(*frp) * numrecs);
881 }
882 }
883
884 /* Load a realtime reverse mapping btree root in from disk. */
885 int
xfs_iformat_rtrmap(struct xfs_inode * ip,struct xfs_dinode * dip)886 xfs_iformat_rtrmap(
887 struct xfs_inode *ip,
888 struct xfs_dinode *dip)
889 {
890 struct xfs_mount *mp = ip->i_mount;
891 struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
892 struct xfs_btree_block *broot;
893 unsigned int numrecs;
894 unsigned int level;
895 int dsize;
896
897 /*
898 * growfs must create the rtrmap inodes before adding a realtime volume
899 * to the filesystem, so we cannot use the rtrmapbt predicate here.
900 */
901 if (!xfs_has_rmapbt(ip->i_mount)) {
902 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
903 return -EFSCORRUPTED;
904 }
905
906 dsize = XFS_DFORK_SIZE(dip, mp, XFS_DATA_FORK);
907 numrecs = be16_to_cpu(dfp->bb_numrecs);
908 level = be16_to_cpu(dfp->bb_level);
909
910 if (level > mp->m_rtrmap_maxlevels ||
911 xfs_rtrmap_droot_space_calc(level, numrecs) > dsize) {
912 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
913 return -EFSCORRUPTED;
914 }
915
916 broot = xfs_broot_alloc(xfs_ifork_ptr(ip, XFS_DATA_FORK),
917 xfs_rtrmap_broot_space_calc(mp, level, numrecs));
918 if (broot)
919 xfs_rtrmapbt_from_disk(ip, dfp, dsize, broot);
920 return 0;
921 }
922
923 /* Convert in-memory form of btree root to on-disk form. */
924 void
xfs_rtrmapbt_to_disk(struct xfs_mount * mp,struct xfs_btree_block * rblock,unsigned int rblocklen,struct xfs_rtrmap_root * dblock,unsigned int dblocklen)925 xfs_rtrmapbt_to_disk(
926 struct xfs_mount *mp,
927 struct xfs_btree_block *rblock,
928 unsigned int rblocklen,
929 struct xfs_rtrmap_root *dblock,
930 unsigned int dblocklen)
931 {
932 struct xfs_rmap_key *fkp;
933 __be64 *fpp;
934 struct xfs_rmap_key *tkp;
935 __be64 *tpp;
936 struct xfs_rmap_rec *frp;
937 struct xfs_rmap_rec *trp;
938 unsigned int numrecs;
939 unsigned int maxrecs;
940
941 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_RTRMAP_CRC_MAGIC));
942 ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid));
943 ASSERT(rblock->bb_u.l.bb_blkno == cpu_to_be64(XFS_BUF_DADDR_NULL));
944 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK));
945 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK));
946
947 dblock->bb_level = rblock->bb_level;
948 dblock->bb_numrecs = rblock->bb_numrecs;
949 numrecs = be16_to_cpu(rblock->bb_numrecs);
950
951 if (be16_to_cpu(rblock->bb_level) > 0) {
952 maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
953 fkp = xfs_rtrmap_key_addr(rblock, 1);
954 tkp = xfs_rtrmap_droot_key_addr(dblock, 1);
955 fpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
956 tpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
957 memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
958 memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
959 } else {
960 frp = xfs_rtrmap_rec_addr(rblock, 1);
961 trp = xfs_rtrmap_droot_rec_addr(dblock, 1);
962 memcpy(trp, frp, sizeof(*frp) * numrecs);
963 }
964 }
965
966 /* Flush a realtime reverse mapping btree root out to disk. */
967 void
xfs_iflush_rtrmap(struct xfs_inode * ip,struct xfs_dinode * dip)968 xfs_iflush_rtrmap(
969 struct xfs_inode *ip,
970 struct xfs_dinode *dip)
971 {
972 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
973 struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
974
975 ASSERT(ifp->if_broot != NULL);
976 ASSERT(ifp->if_broot_bytes > 0);
977 ASSERT(xfs_rtrmap_droot_space(ifp->if_broot) <=
978 xfs_inode_fork_size(ip, XFS_DATA_FORK));
979 xfs_rtrmapbt_to_disk(ip->i_mount, ifp->if_broot, ifp->if_broot_bytes,
980 dfp, XFS_DFORK_SIZE(dip, ip->i_mount, XFS_DATA_FORK));
981 }
982
983 /*
984 * Create a realtime rmap btree inode.
985 */
986 int
xfs_rtrmapbt_create(struct xfs_rtgroup * rtg,struct xfs_inode * ip,struct xfs_trans * tp,bool init)987 xfs_rtrmapbt_create(
988 struct xfs_rtgroup *rtg,
989 struct xfs_inode *ip,
990 struct xfs_trans *tp,
991 bool init)
992 {
993 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
994 struct xfs_mount *mp = ip->i_mount;
995 struct xfs_btree_block *broot;
996
997 ifp->if_format = XFS_DINODE_FMT_META_BTREE;
998 ASSERT(ifp->if_broot_bytes == 0);
999 ASSERT(ifp->if_bytes == 0);
1000
1001 /* Initialize the empty incore btree root. */
1002 broot = xfs_broot_realloc(ifp, xfs_rtrmap_broot_space_calc(mp, 0, 0));
1003 if (broot)
1004 xfs_btree_init_block(mp, broot, &xfs_rtrmapbt_ops, 0, 0,
1005 ip->i_ino);
1006 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DBROOT);
1007
1008 return 0;
1009 }
1010
1011 /*
1012 * Initialize an rmap for a realtime superblock using the potentially updated
1013 * rt geometry in the provided @mp.
1014 */
1015 int
xfs_rtrmapbt_init_rtsb(struct xfs_mount * mp,struct xfs_rtgroup * rtg,struct xfs_trans * tp)1016 xfs_rtrmapbt_init_rtsb(
1017 struct xfs_mount *mp,
1018 struct xfs_rtgroup *rtg,
1019 struct xfs_trans *tp)
1020 {
1021 struct xfs_rmap_irec rmap = {
1022 .rm_blockcount = mp->m_sb.sb_rextsize,
1023 .rm_owner = XFS_RMAP_OWN_FS,
1024 };
1025 struct xfs_btree_cur *cur;
1026 int error;
1027
1028 ASSERT(xfs_has_rtsb(mp));
1029 ASSERT(rtg_rgno(rtg) == 0);
1030
1031 cur = xfs_rtrmapbt_init_cursor(tp, rtg);
1032 error = xfs_rmap_map_raw(cur, &rmap);
1033 xfs_btree_del_cursor(cur, error);
1034 return error;
1035 }
1036