xref: /linux/fs/xfs/libxfs/xfs_rtrmap_btree.c (revision 4a61f12eb11958f157e054d386466627445644cd)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_inode.h"
17 #include "xfs_trans.h"
18 #include "xfs_alloc.h"
19 #include "xfs_btree.h"
20 #include "xfs_btree_staging.h"
21 #include "xfs_metafile.h"
22 #include "xfs_rmap.h"
23 #include "xfs_rtrmap_btree.h"
24 #include "xfs_trace.h"
25 #include "xfs_cksum.h"
26 #include "xfs_error.h"
27 #include "xfs_extent_busy.h"
28 #include "xfs_rtgroup.h"
29 #include "xfs_bmap.h"
30 #include "xfs_health.h"
31 #include "xfs_buf_mem.h"
32 #include "xfs_btree_mem.h"
33 
34 static struct kmem_cache	*xfs_rtrmapbt_cur_cache;
35 
36 /*
37  * Realtime Reverse Map btree.
38  *
39  * This is a btree used to track the owner(s) of a given extent in the realtime
40  * device.  See the comments in xfs_rmap_btree.c for more information.
41  *
42  * This tree is basically the same as the regular rmap btree except that it
43  * is rooted in an inode and does not live in free space.
44  */
45 
46 static struct xfs_btree_cur *
47 xfs_rtrmapbt_dup_cursor(
48 	struct xfs_btree_cur	*cur)
49 {
50 	return xfs_rtrmapbt_init_cursor(cur->bc_tp, to_rtg(cur->bc_group));
51 }
52 
53 STATIC int
54 xfs_rtrmapbt_get_minrecs(
55 	struct xfs_btree_cur	*cur,
56 	int			level)
57 {
58 	if (level == cur->bc_nlevels - 1) {
59 		struct xfs_ifork	*ifp = xfs_btree_ifork_ptr(cur);
60 
61 		return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
62 				level == 0) / 2;
63 	}
64 
65 	return cur->bc_mp->m_rtrmap_mnr[level != 0];
66 }
67 
68 STATIC int
69 xfs_rtrmapbt_get_maxrecs(
70 	struct xfs_btree_cur	*cur,
71 	int			level)
72 {
73 	if (level == cur->bc_nlevels - 1) {
74 		struct xfs_ifork	*ifp = xfs_btree_ifork_ptr(cur);
75 
76 		return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
77 				level == 0);
78 	}
79 
80 	return cur->bc_mp->m_rtrmap_mxr[level != 0];
81 }
82 
83 /* Calculate number of records in the ondisk realtime rmap btree inode root. */
84 unsigned int
85 xfs_rtrmapbt_droot_maxrecs(
86 	unsigned int		blocklen,
87 	bool			leaf)
88 {
89 	blocklen -= sizeof(struct xfs_rtrmap_root);
90 
91 	if (leaf)
92 		return blocklen / sizeof(struct xfs_rmap_rec);
93 	return blocklen / (2 * sizeof(struct xfs_rmap_key) +
94 			sizeof(xfs_rtrmap_ptr_t));
95 }
96 
97 /*
98  * Get the maximum records we could store in the on-disk format.
99  *
100  * For non-root nodes this is equivalent to xfs_rtrmapbt_get_maxrecs, but
101  * for the root node this checks the available space in the dinode fork
102  * so that we can resize the in-memory buffer to match it.  After a
103  * resize to the maximum size this function returns the same value
104  * as xfs_rtrmapbt_get_maxrecs for the root node, too.
105  */
106 STATIC int
107 xfs_rtrmapbt_get_dmaxrecs(
108 	struct xfs_btree_cur	*cur,
109 	int			level)
110 {
111 	if (level != cur->bc_nlevels - 1)
112 		return cur->bc_mp->m_rtrmap_mxr[level != 0];
113 	return xfs_rtrmapbt_droot_maxrecs(cur->bc_ino.forksize, level == 0);
114 }
115 
116 /*
117  * Convert the ondisk record's offset field into the ondisk key's offset field.
118  * Fork and bmbt are significant parts of the rmap record key, but written
119  * status is merely a record attribute.
120  */
121 static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
122 {
123 	return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
124 }
125 
126 STATIC void
127 xfs_rtrmapbt_init_key_from_rec(
128 	union xfs_btree_key		*key,
129 	const union xfs_btree_rec	*rec)
130 {
131 	key->rmap.rm_startblock = rec->rmap.rm_startblock;
132 	key->rmap.rm_owner = rec->rmap.rm_owner;
133 	key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
134 }
135 
136 STATIC void
137 xfs_rtrmapbt_init_high_key_from_rec(
138 	union xfs_btree_key		*key,
139 	const union xfs_btree_rec	*rec)
140 {
141 	uint64_t			off;
142 	int				adj;
143 
144 	adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
145 
146 	key->rmap.rm_startblock = rec->rmap.rm_startblock;
147 	be32_add_cpu(&key->rmap.rm_startblock, adj);
148 	key->rmap.rm_owner = rec->rmap.rm_owner;
149 	key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
150 	if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
151 	    XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
152 		return;
153 	off = be64_to_cpu(key->rmap.rm_offset);
154 	off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
155 	key->rmap.rm_offset = cpu_to_be64(off);
156 }
157 
158 STATIC void
159 xfs_rtrmapbt_init_rec_from_cur(
160 	struct xfs_btree_cur	*cur,
161 	union xfs_btree_rec	*rec)
162 {
163 	rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
164 	rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
165 	rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
166 	rec->rmap.rm_offset = cpu_to_be64(
167 			xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
168 }
169 
170 STATIC void
171 xfs_rtrmapbt_init_ptr_from_cur(
172 	struct xfs_btree_cur	*cur,
173 	union xfs_btree_ptr	*ptr)
174 {
175 	ptr->l = 0;
176 }
177 
178 /*
179  * Mask the appropriate parts of the ondisk key field for a key comparison.
180  * Fork and bmbt are significant parts of the rmap record key, but written
181  * status is merely a record attribute.
182  */
183 static inline uint64_t offset_keymask(uint64_t offset)
184 {
185 	return offset & ~XFS_RMAP_OFF_UNWRITTEN;
186 }
187 
188 STATIC int64_t
189 xfs_rtrmapbt_key_diff(
190 	struct xfs_btree_cur		*cur,
191 	const union xfs_btree_key	*key)
192 {
193 	struct xfs_rmap_irec		*rec = &cur->bc_rec.r;
194 	const struct xfs_rmap_key	*kp = &key->rmap;
195 	__u64				x, y;
196 	int64_t				d;
197 
198 	d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
199 	if (d)
200 		return d;
201 
202 	x = be64_to_cpu(kp->rm_owner);
203 	y = rec->rm_owner;
204 	if (x > y)
205 		return 1;
206 	else if (y > x)
207 		return -1;
208 
209 	x = offset_keymask(be64_to_cpu(kp->rm_offset));
210 	y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
211 	if (x > y)
212 		return 1;
213 	else if (y > x)
214 		return -1;
215 	return 0;
216 }
217 
218 STATIC int64_t
219 xfs_rtrmapbt_diff_two_keys(
220 	struct xfs_btree_cur		*cur,
221 	const union xfs_btree_key	*k1,
222 	const union xfs_btree_key	*k2,
223 	const union xfs_btree_key	*mask)
224 {
225 	const struct xfs_rmap_key	*kp1 = &k1->rmap;
226 	const struct xfs_rmap_key	*kp2 = &k2->rmap;
227 	int64_t				d;
228 	__u64				x, y;
229 
230 	/* Doesn't make sense to mask off the physical space part */
231 	ASSERT(!mask || mask->rmap.rm_startblock);
232 
233 	d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
234 		     be32_to_cpu(kp2->rm_startblock);
235 	if (d)
236 		return d;
237 
238 	if (!mask || mask->rmap.rm_owner) {
239 		x = be64_to_cpu(kp1->rm_owner);
240 		y = be64_to_cpu(kp2->rm_owner);
241 		if (x > y)
242 			return 1;
243 		else if (y > x)
244 			return -1;
245 	}
246 
247 	if (!mask || mask->rmap.rm_offset) {
248 		/* Doesn't make sense to allow offset but not owner */
249 		ASSERT(!mask || mask->rmap.rm_owner);
250 
251 		x = offset_keymask(be64_to_cpu(kp1->rm_offset));
252 		y = offset_keymask(be64_to_cpu(kp2->rm_offset));
253 		if (x > y)
254 			return 1;
255 		else if (y > x)
256 			return -1;
257 	}
258 
259 	return 0;
260 }
261 
262 static xfs_failaddr_t
263 xfs_rtrmapbt_verify(
264 	struct xfs_buf		*bp)
265 {
266 	struct xfs_mount	*mp = bp->b_target->bt_mount;
267 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
268 	xfs_failaddr_t		fa;
269 	int			level;
270 
271 	if (!xfs_verify_magic(bp, block->bb_magic))
272 		return __this_address;
273 
274 	if (!xfs_has_rmapbt(mp))
275 		return __this_address;
276 	fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
277 	if (fa)
278 		return fa;
279 	level = be16_to_cpu(block->bb_level);
280 	if (level > mp->m_rtrmap_maxlevels)
281 		return __this_address;
282 
283 	return xfs_btree_fsblock_verify(bp, mp->m_rtrmap_mxr[level != 0]);
284 }
285 
286 static void
287 xfs_rtrmapbt_read_verify(
288 	struct xfs_buf	*bp)
289 {
290 	xfs_failaddr_t	fa;
291 
292 	if (!xfs_btree_fsblock_verify_crc(bp))
293 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
294 	else {
295 		fa = xfs_rtrmapbt_verify(bp);
296 		if (fa)
297 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
298 	}
299 
300 	if (bp->b_error)
301 		trace_xfs_btree_corrupt(bp, _RET_IP_);
302 }
303 
304 static void
305 xfs_rtrmapbt_write_verify(
306 	struct xfs_buf	*bp)
307 {
308 	xfs_failaddr_t	fa;
309 
310 	fa = xfs_rtrmapbt_verify(bp);
311 	if (fa) {
312 		trace_xfs_btree_corrupt(bp, _RET_IP_);
313 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
314 		return;
315 	}
316 	xfs_btree_fsblock_calc_crc(bp);
317 
318 }
319 
320 const struct xfs_buf_ops xfs_rtrmapbt_buf_ops = {
321 	.name			= "xfs_rtrmapbt",
322 	.magic			= { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) },
323 	.verify_read		= xfs_rtrmapbt_read_verify,
324 	.verify_write		= xfs_rtrmapbt_write_verify,
325 	.verify_struct		= xfs_rtrmapbt_verify,
326 };
327 
328 STATIC int
329 xfs_rtrmapbt_keys_inorder(
330 	struct xfs_btree_cur		*cur,
331 	const union xfs_btree_key	*k1,
332 	const union xfs_btree_key	*k2)
333 {
334 	uint32_t			x;
335 	uint32_t			y;
336 	uint64_t			a;
337 	uint64_t			b;
338 
339 	x = be32_to_cpu(k1->rmap.rm_startblock);
340 	y = be32_to_cpu(k2->rmap.rm_startblock);
341 	if (x < y)
342 		return 1;
343 	else if (x > y)
344 		return 0;
345 	a = be64_to_cpu(k1->rmap.rm_owner);
346 	b = be64_to_cpu(k2->rmap.rm_owner);
347 	if (a < b)
348 		return 1;
349 	else if (a > b)
350 		return 0;
351 	a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
352 	b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
353 	if (a <= b)
354 		return 1;
355 	return 0;
356 }
357 
358 STATIC int
359 xfs_rtrmapbt_recs_inorder(
360 	struct xfs_btree_cur		*cur,
361 	const union xfs_btree_rec	*r1,
362 	const union xfs_btree_rec	*r2)
363 {
364 	uint32_t			x;
365 	uint32_t			y;
366 	uint64_t			a;
367 	uint64_t			b;
368 
369 	x = be32_to_cpu(r1->rmap.rm_startblock);
370 	y = be32_to_cpu(r2->rmap.rm_startblock);
371 	if (x < y)
372 		return 1;
373 	else if (x > y)
374 		return 0;
375 	a = be64_to_cpu(r1->rmap.rm_owner);
376 	b = be64_to_cpu(r2->rmap.rm_owner);
377 	if (a < b)
378 		return 1;
379 	else if (a > b)
380 		return 0;
381 	a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
382 	b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
383 	if (a <= b)
384 		return 1;
385 	return 0;
386 }
387 
388 STATIC enum xbtree_key_contig
389 xfs_rtrmapbt_keys_contiguous(
390 	struct xfs_btree_cur		*cur,
391 	const union xfs_btree_key	*key1,
392 	const union xfs_btree_key	*key2,
393 	const union xfs_btree_key	*mask)
394 {
395 	ASSERT(!mask || mask->rmap.rm_startblock);
396 
397 	/*
398 	 * We only support checking contiguity of the physical space component.
399 	 * If any callers ever need more specificity than that, they'll have to
400 	 * implement it here.
401 	 */
402 	ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
403 
404 	return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
405 				 be32_to_cpu(key2->rmap.rm_startblock));
406 }
407 
408 static inline void
409 xfs_rtrmapbt_move_ptrs(
410 	struct xfs_mount	*mp,
411 	struct xfs_btree_block	*broot,
412 	short			old_size,
413 	size_t			new_size,
414 	unsigned int		numrecs)
415 {
416 	void			*dptr;
417 	void			*sptr;
418 
419 	sptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, old_size);
420 	dptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, new_size);
421 	memmove(dptr, sptr, numrecs * sizeof(xfs_rtrmap_ptr_t));
422 }
423 
424 static struct xfs_btree_block *
425 xfs_rtrmapbt_broot_realloc(
426 	struct xfs_btree_cur	*cur,
427 	unsigned int		new_numrecs)
428 {
429 	struct xfs_mount	*mp = cur->bc_mp;
430 	struct xfs_ifork	*ifp = xfs_btree_ifork_ptr(cur);
431 	struct xfs_btree_block	*broot;
432 	unsigned int		new_size;
433 	unsigned int		old_size = ifp->if_broot_bytes;
434 	const unsigned int	level = cur->bc_nlevels - 1;
435 
436 	new_size = xfs_rtrmap_broot_space_calc(mp, level, new_numrecs);
437 
438 	/* Handle the nop case quietly. */
439 	if (new_size == old_size)
440 		return ifp->if_broot;
441 
442 	if (new_size > old_size) {
443 		unsigned int	old_numrecs;
444 
445 		/*
446 		 * If there wasn't any memory allocated before, just allocate
447 		 * it now and get out.
448 		 */
449 		if (old_size == 0)
450 			return xfs_broot_realloc(ifp, new_size);
451 
452 		/*
453 		 * If there is already an existing if_broot, then we need to
454 		 * realloc it and possibly move the node block pointers because
455 		 * those are not butted up against the btree block header.
456 		 */
457 		old_numrecs = xfs_rtrmapbt_maxrecs(mp, old_size, level == 0);
458 		broot = xfs_broot_realloc(ifp, new_size);
459 		if (level > 0)
460 			xfs_rtrmapbt_move_ptrs(mp, broot, old_size, new_size,
461 					old_numrecs);
462 		goto out_broot;
463 	}
464 
465 	/*
466 	 * We're reducing numrecs.  If we're going all the way to zero, just
467 	 * free the block.
468 	 */
469 	ASSERT(ifp->if_broot != NULL && old_size > 0);
470 	if (new_size == 0)
471 		return xfs_broot_realloc(ifp, 0);
472 
473 	/*
474 	 * Shrink the btree root by possibly moving the rtrmapbt pointers,
475 	 * since they are not butted up against the btree block header.  Then
476 	 * reallocate broot.
477 	 */
478 	if (level > 0)
479 		xfs_rtrmapbt_move_ptrs(mp, ifp->if_broot, old_size, new_size,
480 				new_numrecs);
481 	broot = xfs_broot_realloc(ifp, new_size);
482 
483 out_broot:
484 	ASSERT(xfs_rtrmap_droot_space(broot) <=
485 	       xfs_inode_fork_size(cur->bc_ino.ip, cur->bc_ino.whichfork));
486 	return broot;
487 }
488 
489 const struct xfs_btree_ops xfs_rtrmapbt_ops = {
490 	.name			= "rtrmap",
491 	.type			= XFS_BTREE_TYPE_INODE,
492 	.geom_flags		= XFS_BTGEO_OVERLAPPING |
493 				  XFS_BTGEO_IROOT_RECORDS,
494 
495 	.rec_len		= sizeof(struct xfs_rmap_rec),
496 	/* Overlapping btree; 2 keys per pointer. */
497 	.key_len		= 2 * sizeof(struct xfs_rmap_key),
498 	.ptr_len		= XFS_BTREE_LONG_PTR_LEN,
499 
500 	.lru_refs		= XFS_RMAP_BTREE_REF,
501 	.statoff		= XFS_STATS_CALC_INDEX(xs_rtrmap_2),
502 	.sick_mask		= XFS_SICK_RG_RMAPBT,
503 
504 	.dup_cursor		= xfs_rtrmapbt_dup_cursor,
505 	.alloc_block		= xfs_btree_alloc_metafile_block,
506 	.free_block		= xfs_btree_free_metafile_block,
507 	.get_minrecs		= xfs_rtrmapbt_get_minrecs,
508 	.get_maxrecs		= xfs_rtrmapbt_get_maxrecs,
509 	.get_dmaxrecs		= xfs_rtrmapbt_get_dmaxrecs,
510 	.init_key_from_rec	= xfs_rtrmapbt_init_key_from_rec,
511 	.init_high_key_from_rec	= xfs_rtrmapbt_init_high_key_from_rec,
512 	.init_rec_from_cur	= xfs_rtrmapbt_init_rec_from_cur,
513 	.init_ptr_from_cur	= xfs_rtrmapbt_init_ptr_from_cur,
514 	.key_diff		= xfs_rtrmapbt_key_diff,
515 	.buf_ops		= &xfs_rtrmapbt_buf_ops,
516 	.diff_two_keys		= xfs_rtrmapbt_diff_two_keys,
517 	.keys_inorder		= xfs_rtrmapbt_keys_inorder,
518 	.recs_inorder		= xfs_rtrmapbt_recs_inorder,
519 	.keys_contiguous	= xfs_rtrmapbt_keys_contiguous,
520 	.broot_realloc		= xfs_rtrmapbt_broot_realloc,
521 };
522 
523 /* Allocate a new rt rmap btree cursor. */
524 struct xfs_btree_cur *
525 xfs_rtrmapbt_init_cursor(
526 	struct xfs_trans	*tp,
527 	struct xfs_rtgroup	*rtg)
528 {
529 	struct xfs_inode	*ip = rtg_rmap(rtg);
530 	struct xfs_mount	*mp = rtg_mount(rtg);
531 	struct xfs_btree_cur	*cur;
532 
533 	xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
534 
535 	cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_ops,
536 			mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache);
537 
538 	cur->bc_ino.ip = ip;
539 	cur->bc_group = xfs_group_hold(rtg_group(rtg));
540 	cur->bc_ino.whichfork = XFS_DATA_FORK;
541 	cur->bc_nlevels = be16_to_cpu(ip->i_df.if_broot->bb_level) + 1;
542 	cur->bc_ino.forksize = xfs_inode_fork_size(ip, XFS_DATA_FORK);
543 
544 	return cur;
545 }
546 
547 #ifdef CONFIG_XFS_BTREE_IN_MEM
548 /*
549  * Validate an in-memory realtime rmap btree block.  Callers are allowed to
550  * generate an in-memory btree even if the ondisk feature is not enabled.
551  */
552 static xfs_failaddr_t
553 xfs_rtrmapbt_mem_verify(
554 	struct xfs_buf		*bp)
555 {
556 	struct xfs_mount	*mp = bp->b_mount;
557 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
558 	xfs_failaddr_t		fa;
559 	unsigned int		level;
560 	unsigned int		maxrecs;
561 
562 	if (!xfs_verify_magic(bp, block->bb_magic))
563 		return __this_address;
564 
565 	fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
566 	if (fa)
567 		return fa;
568 
569 	level = be16_to_cpu(block->bb_level);
570 	if (xfs_has_rmapbt(mp)) {
571 		if (level >= mp->m_rtrmap_maxlevels)
572 			return __this_address;
573 	} else {
574 		if (level >= xfs_rtrmapbt_maxlevels_ondisk())
575 			return __this_address;
576 	}
577 
578 	maxrecs = xfs_rtrmapbt_maxrecs(mp, XFBNO_BLOCKSIZE, level == 0);
579 	return xfs_btree_memblock_verify(bp, maxrecs);
580 }
581 
582 static void
583 xfs_rtrmapbt_mem_rw_verify(
584 	struct xfs_buf	*bp)
585 {
586 	xfs_failaddr_t	fa = xfs_rtrmapbt_mem_verify(bp);
587 
588 	if (fa)
589 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
590 }
591 
592 /* skip crc checks on in-memory btrees to save time */
593 static const struct xfs_buf_ops xfs_rtrmapbt_mem_buf_ops = {
594 	.name			= "xfs_rtrmapbt_mem",
595 	.magic			= { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) },
596 	.verify_read		= xfs_rtrmapbt_mem_rw_verify,
597 	.verify_write		= xfs_rtrmapbt_mem_rw_verify,
598 	.verify_struct		= xfs_rtrmapbt_mem_verify,
599 };
600 
601 const struct xfs_btree_ops xfs_rtrmapbt_mem_ops = {
602 	.type			= XFS_BTREE_TYPE_MEM,
603 	.geom_flags		= XFS_BTGEO_OVERLAPPING,
604 
605 	.rec_len		= sizeof(struct xfs_rmap_rec),
606 	/* Overlapping btree; 2 keys per pointer. */
607 	.key_len		= 2 * sizeof(struct xfs_rmap_key),
608 	.ptr_len		= XFS_BTREE_LONG_PTR_LEN,
609 
610 	.lru_refs		= XFS_RMAP_BTREE_REF,
611 	.statoff		= XFS_STATS_CALC_INDEX(xs_rtrmap_mem_2),
612 
613 	.dup_cursor		= xfbtree_dup_cursor,
614 	.set_root		= xfbtree_set_root,
615 	.alloc_block		= xfbtree_alloc_block,
616 	.free_block		= xfbtree_free_block,
617 	.get_minrecs		= xfbtree_get_minrecs,
618 	.get_maxrecs		= xfbtree_get_maxrecs,
619 	.init_key_from_rec	= xfs_rtrmapbt_init_key_from_rec,
620 	.init_high_key_from_rec	= xfs_rtrmapbt_init_high_key_from_rec,
621 	.init_rec_from_cur	= xfs_rtrmapbt_init_rec_from_cur,
622 	.init_ptr_from_cur	= xfbtree_init_ptr_from_cur,
623 	.key_diff		= xfs_rtrmapbt_key_diff,
624 	.buf_ops		= &xfs_rtrmapbt_mem_buf_ops,
625 	.diff_two_keys		= xfs_rtrmapbt_diff_two_keys,
626 	.keys_inorder		= xfs_rtrmapbt_keys_inorder,
627 	.recs_inorder		= xfs_rtrmapbt_recs_inorder,
628 	.keys_contiguous	= xfs_rtrmapbt_keys_contiguous,
629 };
630 
631 /* Create a cursor for an in-memory btree. */
632 struct xfs_btree_cur *
633 xfs_rtrmapbt_mem_cursor(
634 	struct xfs_rtgroup	*rtg,
635 	struct xfs_trans	*tp,
636 	struct xfbtree		*xfbt)
637 {
638 	struct xfs_mount	*mp = rtg_mount(rtg);
639 	struct xfs_btree_cur	*cur;
640 
641 	cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_mem_ops,
642 			mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache);
643 	cur->bc_mem.xfbtree = xfbt;
644 	cur->bc_nlevels = xfbt->nlevels;
645 	cur->bc_group = xfs_group_hold(rtg_group(rtg));
646 	return cur;
647 }
648 
649 /* Create an in-memory realtime rmap btree. */
650 int
651 xfs_rtrmapbt_mem_init(
652 	struct xfs_mount	*mp,
653 	struct xfbtree		*xfbt,
654 	struct xfs_buftarg	*btp,
655 	xfs_rgnumber_t		rgno)
656 {
657 	xfbt->owner = rgno;
658 	return xfbtree_init(mp, xfbt, btp, &xfs_rtrmapbt_mem_ops);
659 }
660 #endif /* CONFIG_XFS_BTREE_IN_MEM */
661 
662 /*
663  * Install a new rt reverse mapping btree root.  Caller is responsible for
664  * invalidating and freeing the old btree blocks.
665  */
666 void
667 xfs_rtrmapbt_commit_staged_btree(
668 	struct xfs_btree_cur	*cur,
669 	struct xfs_trans	*tp)
670 {
671 	struct xbtree_ifakeroot	*ifake = cur->bc_ino.ifake;
672 	struct xfs_ifork	*ifp;
673 	int			flags = XFS_ILOG_CORE | XFS_ILOG_DBROOT;
674 
675 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
676 	ASSERT(ifake->if_fork->if_format == XFS_DINODE_FMT_META_BTREE);
677 
678 	/*
679 	 * Free any resources hanging off the real fork, then shallow-copy the
680 	 * staging fork's contents into the real fork to transfer everything
681 	 * we just built.
682 	 */
683 	ifp = xfs_ifork_ptr(cur->bc_ino.ip, XFS_DATA_FORK);
684 	xfs_idestroy_fork(ifp);
685 	memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork));
686 
687 	cur->bc_ino.ip->i_projid = cur->bc_group->xg_gno;
688 	xfs_trans_log_inode(tp, cur->bc_ino.ip, flags);
689 	xfs_btree_commit_ifakeroot(cur, tp, XFS_DATA_FORK);
690 }
691 
692 /* Calculate number of records in a rt reverse mapping btree block. */
693 static inline unsigned int
694 xfs_rtrmapbt_block_maxrecs(
695 	unsigned int		blocklen,
696 	bool			leaf)
697 {
698 	if (leaf)
699 		return blocklen / sizeof(struct xfs_rmap_rec);
700 	return blocklen /
701 		(2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rtrmap_ptr_t));
702 }
703 
704 /*
705  * Calculate number of records in an rt reverse mapping btree block.
706  */
707 unsigned int
708 xfs_rtrmapbt_maxrecs(
709 	struct xfs_mount	*mp,
710 	unsigned int		blocklen,
711 	bool			leaf)
712 {
713 	blocklen -= XFS_RTRMAP_BLOCK_LEN;
714 	return xfs_rtrmapbt_block_maxrecs(blocklen, leaf);
715 }
716 
717 /* Compute the max possible height for realtime reverse mapping btrees. */
718 unsigned int
719 xfs_rtrmapbt_maxlevels_ondisk(void)
720 {
721 	unsigned int		minrecs[2];
722 	unsigned int		blocklen;
723 
724 	blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
725 
726 	minrecs[0] = xfs_rtrmapbt_block_maxrecs(blocklen, true) / 2;
727 	minrecs[1] = xfs_rtrmapbt_block_maxrecs(blocklen, false) / 2;
728 
729 	/* We need at most one record for every block in an rt group. */
730 	return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_RGBLOCKS);
731 }
732 
733 int __init
734 xfs_rtrmapbt_init_cur_cache(void)
735 {
736 	xfs_rtrmapbt_cur_cache = kmem_cache_create("xfs_rtrmapbt_cur",
737 			xfs_btree_cur_sizeof(xfs_rtrmapbt_maxlevels_ondisk()),
738 			0, 0, NULL);
739 
740 	if (!xfs_rtrmapbt_cur_cache)
741 		return -ENOMEM;
742 	return 0;
743 }
744 
745 void
746 xfs_rtrmapbt_destroy_cur_cache(void)
747 {
748 	kmem_cache_destroy(xfs_rtrmapbt_cur_cache);
749 	xfs_rtrmapbt_cur_cache = NULL;
750 }
751 
752 /* Compute the maximum height of an rt reverse mapping btree. */
753 void
754 xfs_rtrmapbt_compute_maxlevels(
755 	struct xfs_mount	*mp)
756 {
757 	unsigned int		d_maxlevels, r_maxlevels;
758 
759 	if (!xfs_has_rtrmapbt(mp)) {
760 		mp->m_rtrmap_maxlevels = 0;
761 		return;
762 	}
763 
764 	/*
765 	 * The realtime rmapbt lives on the data device, which means that its
766 	 * maximum height is constrained by the size of the data device and
767 	 * the height required to store one rmap record for each block in an
768 	 * rt group.
769 	 */
770 	d_maxlevels = xfs_btree_space_to_height(mp->m_rtrmap_mnr,
771 				mp->m_sb.sb_dblocks);
772 	r_maxlevels = xfs_btree_compute_maxlevels(mp->m_rtrmap_mnr,
773 				mp->m_groups[XG_TYPE_RTG].blocks);
774 
775 	/* Add one level to handle the inode root level. */
776 	mp->m_rtrmap_maxlevels = min(d_maxlevels, r_maxlevels) + 1;
777 }
778 
779 /* Calculate the rtrmap btree size for some records. */
780 unsigned long long
781 xfs_rtrmapbt_calc_size(
782 	struct xfs_mount	*mp,
783 	unsigned long long	len)
784 {
785 	return xfs_btree_calc_size(mp->m_rtrmap_mnr, len);
786 }
787 
788 /*
789  * Calculate the maximum rmap btree size.
790  */
791 static unsigned long long
792 xfs_rtrmapbt_max_size(
793 	struct xfs_mount	*mp,
794 	xfs_rtblock_t		rtblocks)
795 {
796 	/* Bail out if we're uninitialized, which can happen in mkfs. */
797 	if (mp->m_rtrmap_mxr[0] == 0)
798 		return 0;
799 
800 	return xfs_rtrmapbt_calc_size(mp, rtblocks);
801 }
802 
803 /*
804  * Figure out how many blocks to reserve and how many are used by this btree.
805  */
806 xfs_filblks_t
807 xfs_rtrmapbt_calc_reserves(
808 	struct xfs_mount	*mp)
809 {
810 	uint32_t		blocks = mp->m_groups[XG_TYPE_RTG].blocks;
811 
812 	if (!xfs_has_rtrmapbt(mp))
813 		return 0;
814 
815 	/* Reserve 1% of the rtgroup or enough for 1 block per record. */
816 	return max_t(xfs_filblks_t, blocks / 100,
817 			xfs_rtrmapbt_max_size(mp, blocks));
818 }
819 
820 /* Convert on-disk form of btree root to in-memory form. */
821 STATIC void
822 xfs_rtrmapbt_from_disk(
823 	struct xfs_inode	*ip,
824 	struct xfs_rtrmap_root	*dblock,
825 	unsigned int		dblocklen,
826 	struct xfs_btree_block	*rblock)
827 {
828 	struct xfs_mount	*mp = ip->i_mount;
829 	struct xfs_rmap_key	*fkp;
830 	__be64			*fpp;
831 	struct xfs_rmap_key	*tkp;
832 	__be64			*tpp;
833 	struct xfs_rmap_rec	*frp;
834 	struct xfs_rmap_rec	*trp;
835 	unsigned int		rblocklen = xfs_rtrmap_broot_space(mp, dblock);
836 	unsigned int		numrecs;
837 	unsigned int		maxrecs;
838 
839 	xfs_btree_init_block(mp, rblock, &xfs_rtrmapbt_ops, 0, 0, ip->i_ino);
840 
841 	rblock->bb_level = dblock->bb_level;
842 	rblock->bb_numrecs = dblock->bb_numrecs;
843 	numrecs = be16_to_cpu(dblock->bb_numrecs);
844 
845 	if (be16_to_cpu(rblock->bb_level) > 0) {
846 		maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
847 		fkp = xfs_rtrmap_droot_key_addr(dblock, 1);
848 		tkp = xfs_rtrmap_key_addr(rblock, 1);
849 		fpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
850 		tpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
851 		memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
852 		memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
853 	} else {
854 		frp = xfs_rtrmap_droot_rec_addr(dblock, 1);
855 		trp = xfs_rtrmap_rec_addr(rblock, 1);
856 		memcpy(trp, frp, sizeof(*frp) * numrecs);
857 	}
858 }
859 
860 /* Load a realtime reverse mapping btree root in from disk. */
861 int
862 xfs_iformat_rtrmap(
863 	struct xfs_inode	*ip,
864 	struct xfs_dinode	*dip)
865 {
866 	struct xfs_mount	*mp = ip->i_mount;
867 	struct xfs_rtrmap_root	*dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
868 	struct xfs_btree_block	*broot;
869 	unsigned int		numrecs;
870 	unsigned int		level;
871 	int			dsize;
872 
873 	/*
874 	 * growfs must create the rtrmap inodes before adding a realtime volume
875 	 * to the filesystem, so we cannot use the rtrmapbt predicate here.
876 	 */
877 	if (!xfs_has_rmapbt(ip->i_mount)) {
878 		xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
879 		return -EFSCORRUPTED;
880 	}
881 
882 	dsize = XFS_DFORK_SIZE(dip, mp, XFS_DATA_FORK);
883 	numrecs = be16_to_cpu(dfp->bb_numrecs);
884 	level = be16_to_cpu(dfp->bb_level);
885 
886 	if (level > mp->m_rtrmap_maxlevels ||
887 	    xfs_rtrmap_droot_space_calc(level, numrecs) > dsize) {
888 		xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
889 		return -EFSCORRUPTED;
890 	}
891 
892 	broot = xfs_broot_alloc(xfs_ifork_ptr(ip, XFS_DATA_FORK),
893 			xfs_rtrmap_broot_space_calc(mp, level, numrecs));
894 	if (broot)
895 		xfs_rtrmapbt_from_disk(ip, dfp, dsize, broot);
896 	return 0;
897 }
898 
899 /* Convert in-memory form of btree root to on-disk form. */
900 void
901 xfs_rtrmapbt_to_disk(
902 	struct xfs_mount	*mp,
903 	struct xfs_btree_block	*rblock,
904 	unsigned int		rblocklen,
905 	struct xfs_rtrmap_root	*dblock,
906 	unsigned int		dblocklen)
907 {
908 	struct xfs_rmap_key	*fkp;
909 	__be64			*fpp;
910 	struct xfs_rmap_key	*tkp;
911 	__be64			*tpp;
912 	struct xfs_rmap_rec	*frp;
913 	struct xfs_rmap_rec	*trp;
914 	unsigned int		numrecs;
915 	unsigned int		maxrecs;
916 
917 	ASSERT(rblock->bb_magic == cpu_to_be32(XFS_RTRMAP_CRC_MAGIC));
918 	ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid));
919 	ASSERT(rblock->bb_u.l.bb_blkno == cpu_to_be64(XFS_BUF_DADDR_NULL));
920 	ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK));
921 	ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK));
922 
923 	dblock->bb_level = rblock->bb_level;
924 	dblock->bb_numrecs = rblock->bb_numrecs;
925 	numrecs = be16_to_cpu(rblock->bb_numrecs);
926 
927 	if (be16_to_cpu(rblock->bb_level) > 0) {
928 		maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
929 		fkp = xfs_rtrmap_key_addr(rblock, 1);
930 		tkp = xfs_rtrmap_droot_key_addr(dblock, 1);
931 		fpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
932 		tpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
933 		memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
934 		memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
935 	} else {
936 		frp = xfs_rtrmap_rec_addr(rblock, 1);
937 		trp = xfs_rtrmap_droot_rec_addr(dblock, 1);
938 		memcpy(trp, frp, sizeof(*frp) * numrecs);
939 	}
940 }
941 
942 /* Flush a realtime reverse mapping btree root out to disk. */
943 void
944 xfs_iflush_rtrmap(
945 	struct xfs_inode	*ip,
946 	struct xfs_dinode	*dip)
947 {
948 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
949 	struct xfs_rtrmap_root	*dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
950 
951 	ASSERT(ifp->if_broot != NULL);
952 	ASSERT(ifp->if_broot_bytes > 0);
953 	ASSERT(xfs_rtrmap_droot_space(ifp->if_broot) <=
954 			xfs_inode_fork_size(ip, XFS_DATA_FORK));
955 	xfs_rtrmapbt_to_disk(ip->i_mount, ifp->if_broot, ifp->if_broot_bytes,
956 			dfp, XFS_DFORK_SIZE(dip, ip->i_mount, XFS_DATA_FORK));
957 }
958 
959 /*
960  * Create a realtime rmap btree inode.
961  */
962 int
963 xfs_rtrmapbt_create(
964 	struct xfs_rtgroup	*rtg,
965 	struct xfs_inode	*ip,
966 	struct xfs_trans	*tp,
967 	bool			init)
968 {
969 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
970 	struct xfs_mount	*mp = ip->i_mount;
971 	struct xfs_btree_block	*broot;
972 
973 	ifp->if_format = XFS_DINODE_FMT_META_BTREE;
974 	ASSERT(ifp->if_broot_bytes == 0);
975 	ASSERT(ifp->if_bytes == 0);
976 
977 	/* Initialize the empty incore btree root. */
978 	broot = xfs_broot_realloc(ifp, xfs_rtrmap_broot_space_calc(mp, 0, 0));
979 	if (broot)
980 		xfs_btree_init_block(mp, broot, &xfs_rtrmapbt_ops, 0, 0,
981 				ip->i_ino);
982 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DBROOT);
983 
984 	return 0;
985 }
986 
987 /*
988  * Initialize an rmap for a realtime superblock using the potentially updated
989  * rt geometry in the provided @mp.
990  */
991 int
992 xfs_rtrmapbt_init_rtsb(
993 	struct xfs_mount	*mp,
994 	struct xfs_rtgroup	*rtg,
995 	struct xfs_trans	*tp)
996 {
997 	struct xfs_rmap_irec	rmap = {
998 		.rm_blockcount	= mp->m_sb.sb_rextsize,
999 		.rm_owner	= XFS_RMAP_OWN_FS,
1000 	};
1001 	struct xfs_btree_cur	*cur;
1002 	int			error;
1003 
1004 	ASSERT(xfs_has_rtsb(mp));
1005 	ASSERT(rtg_rgno(rtg) == 0);
1006 
1007 	cur = xfs_rtrmapbt_init_cursor(tp, rtg);
1008 	error = xfs_rmap_map_raw(cur, &rmap);
1009 	xfs_btree_del_cursor(cur, error);
1010 	return error;
1011 }
1012