xref: /linux/fs/xfs/libxfs/xfs_rtrmap_btree.c (revision 6d4933c221958d1e1848d5092a3e3d1c6e4a6f92)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_inode.h"
17 #include "xfs_trans.h"
18 #include "xfs_alloc.h"
19 #include "xfs_btree.h"
20 #include "xfs_btree_staging.h"
21 #include "xfs_metafile.h"
22 #include "xfs_rmap.h"
23 #include "xfs_rtrmap_btree.h"
24 #include "xfs_trace.h"
25 #include "xfs_cksum.h"
26 #include "xfs_error.h"
27 #include "xfs_extent_busy.h"
28 #include "xfs_rtgroup.h"
29 #include "xfs_bmap.h"
30 #include "xfs_health.h"
31 
32 static struct kmem_cache	*xfs_rtrmapbt_cur_cache;
33 
34 /*
35  * Realtime Reverse Map btree.
36  *
37  * This is a btree used to track the owner(s) of a given extent in the realtime
38  * device.  See the comments in xfs_rmap_btree.c for more information.
39  *
40  * This tree is basically the same as the regular rmap btree except that it
41  * is rooted in an inode and does not live in free space.
42  */
43 
44 static struct xfs_btree_cur *
45 xfs_rtrmapbt_dup_cursor(
46 	struct xfs_btree_cur	*cur)
47 {
48 	return xfs_rtrmapbt_init_cursor(cur->bc_tp, to_rtg(cur->bc_group));
49 }
50 
51 STATIC int
52 xfs_rtrmapbt_get_minrecs(
53 	struct xfs_btree_cur	*cur,
54 	int			level)
55 {
56 	if (level == cur->bc_nlevels - 1) {
57 		struct xfs_ifork	*ifp = xfs_btree_ifork_ptr(cur);
58 
59 		return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
60 				level == 0) / 2;
61 	}
62 
63 	return cur->bc_mp->m_rtrmap_mnr[level != 0];
64 }
65 
66 STATIC int
67 xfs_rtrmapbt_get_maxrecs(
68 	struct xfs_btree_cur	*cur,
69 	int			level)
70 {
71 	if (level == cur->bc_nlevels - 1) {
72 		struct xfs_ifork	*ifp = xfs_btree_ifork_ptr(cur);
73 
74 		return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
75 				level == 0);
76 	}
77 
78 	return cur->bc_mp->m_rtrmap_mxr[level != 0];
79 }
80 
81 /* Calculate number of records in the ondisk realtime rmap btree inode root. */
82 unsigned int
83 xfs_rtrmapbt_droot_maxrecs(
84 	unsigned int		blocklen,
85 	bool			leaf)
86 {
87 	blocklen -= sizeof(struct xfs_rtrmap_root);
88 
89 	if (leaf)
90 		return blocklen / sizeof(struct xfs_rmap_rec);
91 	return blocklen / (2 * sizeof(struct xfs_rmap_key) +
92 			sizeof(xfs_rtrmap_ptr_t));
93 }
94 
95 /*
96  * Get the maximum records we could store in the on-disk format.
97  *
98  * For non-root nodes this is equivalent to xfs_rtrmapbt_get_maxrecs, but
99  * for the root node this checks the available space in the dinode fork
100  * so that we can resize the in-memory buffer to match it.  After a
101  * resize to the maximum size this function returns the same value
102  * as xfs_rtrmapbt_get_maxrecs for the root node, too.
103  */
104 STATIC int
105 xfs_rtrmapbt_get_dmaxrecs(
106 	struct xfs_btree_cur	*cur,
107 	int			level)
108 {
109 	if (level != cur->bc_nlevels - 1)
110 		return cur->bc_mp->m_rtrmap_mxr[level != 0];
111 	return xfs_rtrmapbt_droot_maxrecs(cur->bc_ino.forksize, level == 0);
112 }
113 
114 /*
115  * Convert the ondisk record's offset field into the ondisk key's offset field.
116  * Fork and bmbt are significant parts of the rmap record key, but written
117  * status is merely a record attribute.
118  */
119 static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
120 {
121 	return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
122 }
123 
124 STATIC void
125 xfs_rtrmapbt_init_key_from_rec(
126 	union xfs_btree_key		*key,
127 	const union xfs_btree_rec	*rec)
128 {
129 	key->rmap.rm_startblock = rec->rmap.rm_startblock;
130 	key->rmap.rm_owner = rec->rmap.rm_owner;
131 	key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
132 }
133 
134 STATIC void
135 xfs_rtrmapbt_init_high_key_from_rec(
136 	union xfs_btree_key		*key,
137 	const union xfs_btree_rec	*rec)
138 {
139 	uint64_t			off;
140 	int				adj;
141 
142 	adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
143 
144 	key->rmap.rm_startblock = rec->rmap.rm_startblock;
145 	be32_add_cpu(&key->rmap.rm_startblock, adj);
146 	key->rmap.rm_owner = rec->rmap.rm_owner;
147 	key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
148 	if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
149 	    XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
150 		return;
151 	off = be64_to_cpu(key->rmap.rm_offset);
152 	off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
153 	key->rmap.rm_offset = cpu_to_be64(off);
154 }
155 
156 STATIC void
157 xfs_rtrmapbt_init_rec_from_cur(
158 	struct xfs_btree_cur	*cur,
159 	union xfs_btree_rec	*rec)
160 {
161 	rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
162 	rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
163 	rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
164 	rec->rmap.rm_offset = cpu_to_be64(
165 			xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
166 }
167 
168 STATIC void
169 xfs_rtrmapbt_init_ptr_from_cur(
170 	struct xfs_btree_cur	*cur,
171 	union xfs_btree_ptr	*ptr)
172 {
173 	ptr->l = 0;
174 }
175 
176 /*
177  * Mask the appropriate parts of the ondisk key field for a key comparison.
178  * Fork and bmbt are significant parts of the rmap record key, but written
179  * status is merely a record attribute.
180  */
181 static inline uint64_t offset_keymask(uint64_t offset)
182 {
183 	return offset & ~XFS_RMAP_OFF_UNWRITTEN;
184 }
185 
186 STATIC int64_t
187 xfs_rtrmapbt_key_diff(
188 	struct xfs_btree_cur		*cur,
189 	const union xfs_btree_key	*key)
190 {
191 	struct xfs_rmap_irec		*rec = &cur->bc_rec.r;
192 	const struct xfs_rmap_key	*kp = &key->rmap;
193 	__u64				x, y;
194 	int64_t				d;
195 
196 	d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
197 	if (d)
198 		return d;
199 
200 	x = be64_to_cpu(kp->rm_owner);
201 	y = rec->rm_owner;
202 	if (x > y)
203 		return 1;
204 	else if (y > x)
205 		return -1;
206 
207 	x = offset_keymask(be64_to_cpu(kp->rm_offset));
208 	y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
209 	if (x > y)
210 		return 1;
211 	else if (y > x)
212 		return -1;
213 	return 0;
214 }
215 
216 STATIC int64_t
217 xfs_rtrmapbt_diff_two_keys(
218 	struct xfs_btree_cur		*cur,
219 	const union xfs_btree_key	*k1,
220 	const union xfs_btree_key	*k2,
221 	const union xfs_btree_key	*mask)
222 {
223 	const struct xfs_rmap_key	*kp1 = &k1->rmap;
224 	const struct xfs_rmap_key	*kp2 = &k2->rmap;
225 	int64_t				d;
226 	__u64				x, y;
227 
228 	/* Doesn't make sense to mask off the physical space part */
229 	ASSERT(!mask || mask->rmap.rm_startblock);
230 
231 	d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
232 		     be32_to_cpu(kp2->rm_startblock);
233 	if (d)
234 		return d;
235 
236 	if (!mask || mask->rmap.rm_owner) {
237 		x = be64_to_cpu(kp1->rm_owner);
238 		y = be64_to_cpu(kp2->rm_owner);
239 		if (x > y)
240 			return 1;
241 		else if (y > x)
242 			return -1;
243 	}
244 
245 	if (!mask || mask->rmap.rm_offset) {
246 		/* Doesn't make sense to allow offset but not owner */
247 		ASSERT(!mask || mask->rmap.rm_owner);
248 
249 		x = offset_keymask(be64_to_cpu(kp1->rm_offset));
250 		y = offset_keymask(be64_to_cpu(kp2->rm_offset));
251 		if (x > y)
252 			return 1;
253 		else if (y > x)
254 			return -1;
255 	}
256 
257 	return 0;
258 }
259 
260 static xfs_failaddr_t
261 xfs_rtrmapbt_verify(
262 	struct xfs_buf		*bp)
263 {
264 	struct xfs_mount	*mp = bp->b_target->bt_mount;
265 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
266 	xfs_failaddr_t		fa;
267 	int			level;
268 
269 	if (!xfs_verify_magic(bp, block->bb_magic))
270 		return __this_address;
271 
272 	if (!xfs_has_rmapbt(mp))
273 		return __this_address;
274 	fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
275 	if (fa)
276 		return fa;
277 	level = be16_to_cpu(block->bb_level);
278 	if (level > mp->m_rtrmap_maxlevels)
279 		return __this_address;
280 
281 	return xfs_btree_fsblock_verify(bp, mp->m_rtrmap_mxr[level != 0]);
282 }
283 
284 static void
285 xfs_rtrmapbt_read_verify(
286 	struct xfs_buf	*bp)
287 {
288 	xfs_failaddr_t	fa;
289 
290 	if (!xfs_btree_fsblock_verify_crc(bp))
291 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
292 	else {
293 		fa = xfs_rtrmapbt_verify(bp);
294 		if (fa)
295 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
296 	}
297 
298 	if (bp->b_error)
299 		trace_xfs_btree_corrupt(bp, _RET_IP_);
300 }
301 
302 static void
303 xfs_rtrmapbt_write_verify(
304 	struct xfs_buf	*bp)
305 {
306 	xfs_failaddr_t	fa;
307 
308 	fa = xfs_rtrmapbt_verify(bp);
309 	if (fa) {
310 		trace_xfs_btree_corrupt(bp, _RET_IP_);
311 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
312 		return;
313 	}
314 	xfs_btree_fsblock_calc_crc(bp);
315 
316 }
317 
318 const struct xfs_buf_ops xfs_rtrmapbt_buf_ops = {
319 	.name			= "xfs_rtrmapbt",
320 	.magic			= { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) },
321 	.verify_read		= xfs_rtrmapbt_read_verify,
322 	.verify_write		= xfs_rtrmapbt_write_verify,
323 	.verify_struct		= xfs_rtrmapbt_verify,
324 };
325 
326 STATIC int
327 xfs_rtrmapbt_keys_inorder(
328 	struct xfs_btree_cur		*cur,
329 	const union xfs_btree_key	*k1,
330 	const union xfs_btree_key	*k2)
331 {
332 	uint32_t			x;
333 	uint32_t			y;
334 	uint64_t			a;
335 	uint64_t			b;
336 
337 	x = be32_to_cpu(k1->rmap.rm_startblock);
338 	y = be32_to_cpu(k2->rmap.rm_startblock);
339 	if (x < y)
340 		return 1;
341 	else if (x > y)
342 		return 0;
343 	a = be64_to_cpu(k1->rmap.rm_owner);
344 	b = be64_to_cpu(k2->rmap.rm_owner);
345 	if (a < b)
346 		return 1;
347 	else if (a > b)
348 		return 0;
349 	a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
350 	b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
351 	if (a <= b)
352 		return 1;
353 	return 0;
354 }
355 
356 STATIC int
357 xfs_rtrmapbt_recs_inorder(
358 	struct xfs_btree_cur		*cur,
359 	const union xfs_btree_rec	*r1,
360 	const union xfs_btree_rec	*r2)
361 {
362 	uint32_t			x;
363 	uint32_t			y;
364 	uint64_t			a;
365 	uint64_t			b;
366 
367 	x = be32_to_cpu(r1->rmap.rm_startblock);
368 	y = be32_to_cpu(r2->rmap.rm_startblock);
369 	if (x < y)
370 		return 1;
371 	else if (x > y)
372 		return 0;
373 	a = be64_to_cpu(r1->rmap.rm_owner);
374 	b = be64_to_cpu(r2->rmap.rm_owner);
375 	if (a < b)
376 		return 1;
377 	else if (a > b)
378 		return 0;
379 	a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
380 	b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
381 	if (a <= b)
382 		return 1;
383 	return 0;
384 }
385 
386 STATIC enum xbtree_key_contig
387 xfs_rtrmapbt_keys_contiguous(
388 	struct xfs_btree_cur		*cur,
389 	const union xfs_btree_key	*key1,
390 	const union xfs_btree_key	*key2,
391 	const union xfs_btree_key	*mask)
392 {
393 	ASSERT(!mask || mask->rmap.rm_startblock);
394 
395 	/*
396 	 * We only support checking contiguity of the physical space component.
397 	 * If any callers ever need more specificity than that, they'll have to
398 	 * implement it here.
399 	 */
400 	ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
401 
402 	return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
403 				 be32_to_cpu(key2->rmap.rm_startblock));
404 }
405 
406 static inline void
407 xfs_rtrmapbt_move_ptrs(
408 	struct xfs_mount	*mp,
409 	struct xfs_btree_block	*broot,
410 	short			old_size,
411 	size_t			new_size,
412 	unsigned int		numrecs)
413 {
414 	void			*dptr;
415 	void			*sptr;
416 
417 	sptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, old_size);
418 	dptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, new_size);
419 	memmove(dptr, sptr, numrecs * sizeof(xfs_rtrmap_ptr_t));
420 }
421 
422 static struct xfs_btree_block *
423 xfs_rtrmapbt_broot_realloc(
424 	struct xfs_btree_cur	*cur,
425 	unsigned int		new_numrecs)
426 {
427 	struct xfs_mount	*mp = cur->bc_mp;
428 	struct xfs_ifork	*ifp = xfs_btree_ifork_ptr(cur);
429 	struct xfs_btree_block	*broot;
430 	unsigned int		new_size;
431 	unsigned int		old_size = ifp->if_broot_bytes;
432 	const unsigned int	level = cur->bc_nlevels - 1;
433 
434 	new_size = xfs_rtrmap_broot_space_calc(mp, level, new_numrecs);
435 
436 	/* Handle the nop case quietly. */
437 	if (new_size == old_size)
438 		return ifp->if_broot;
439 
440 	if (new_size > old_size) {
441 		unsigned int	old_numrecs;
442 
443 		/*
444 		 * If there wasn't any memory allocated before, just allocate
445 		 * it now and get out.
446 		 */
447 		if (old_size == 0)
448 			return xfs_broot_realloc(ifp, new_size);
449 
450 		/*
451 		 * If there is already an existing if_broot, then we need to
452 		 * realloc it and possibly move the node block pointers because
453 		 * those are not butted up against the btree block header.
454 		 */
455 		old_numrecs = xfs_rtrmapbt_maxrecs(mp, old_size, level == 0);
456 		broot = xfs_broot_realloc(ifp, new_size);
457 		if (level > 0)
458 			xfs_rtrmapbt_move_ptrs(mp, broot, old_size, new_size,
459 					old_numrecs);
460 		goto out_broot;
461 	}
462 
463 	/*
464 	 * We're reducing numrecs.  If we're going all the way to zero, just
465 	 * free the block.
466 	 */
467 	ASSERT(ifp->if_broot != NULL && old_size > 0);
468 	if (new_size == 0)
469 		return xfs_broot_realloc(ifp, 0);
470 
471 	/*
472 	 * Shrink the btree root by possibly moving the rtrmapbt pointers,
473 	 * since they are not butted up against the btree block header.  Then
474 	 * reallocate broot.
475 	 */
476 	if (level > 0)
477 		xfs_rtrmapbt_move_ptrs(mp, ifp->if_broot, old_size, new_size,
478 				new_numrecs);
479 	broot = xfs_broot_realloc(ifp, new_size);
480 
481 out_broot:
482 	ASSERT(xfs_rtrmap_droot_space(broot) <=
483 	       xfs_inode_fork_size(cur->bc_ino.ip, cur->bc_ino.whichfork));
484 	return broot;
485 }
486 
487 const struct xfs_btree_ops xfs_rtrmapbt_ops = {
488 	.name			= "rtrmap",
489 	.type			= XFS_BTREE_TYPE_INODE,
490 	.geom_flags		= XFS_BTGEO_OVERLAPPING |
491 				  XFS_BTGEO_IROOT_RECORDS,
492 
493 	.rec_len		= sizeof(struct xfs_rmap_rec),
494 	/* Overlapping btree; 2 keys per pointer. */
495 	.key_len		= 2 * sizeof(struct xfs_rmap_key),
496 	.ptr_len		= XFS_BTREE_LONG_PTR_LEN,
497 
498 	.lru_refs		= XFS_RMAP_BTREE_REF,
499 	.statoff		= XFS_STATS_CALC_INDEX(xs_rtrmap_2),
500 	.sick_mask		= XFS_SICK_RG_RMAPBT,
501 
502 	.dup_cursor		= xfs_rtrmapbt_dup_cursor,
503 	.alloc_block		= xfs_btree_alloc_metafile_block,
504 	.free_block		= xfs_btree_free_metafile_block,
505 	.get_minrecs		= xfs_rtrmapbt_get_minrecs,
506 	.get_maxrecs		= xfs_rtrmapbt_get_maxrecs,
507 	.get_dmaxrecs		= xfs_rtrmapbt_get_dmaxrecs,
508 	.init_key_from_rec	= xfs_rtrmapbt_init_key_from_rec,
509 	.init_high_key_from_rec	= xfs_rtrmapbt_init_high_key_from_rec,
510 	.init_rec_from_cur	= xfs_rtrmapbt_init_rec_from_cur,
511 	.init_ptr_from_cur	= xfs_rtrmapbt_init_ptr_from_cur,
512 	.key_diff		= xfs_rtrmapbt_key_diff,
513 	.buf_ops		= &xfs_rtrmapbt_buf_ops,
514 	.diff_two_keys		= xfs_rtrmapbt_diff_two_keys,
515 	.keys_inorder		= xfs_rtrmapbt_keys_inorder,
516 	.recs_inorder		= xfs_rtrmapbt_recs_inorder,
517 	.keys_contiguous	= xfs_rtrmapbt_keys_contiguous,
518 	.broot_realloc		= xfs_rtrmapbt_broot_realloc,
519 };
520 
521 /* Allocate a new rt rmap btree cursor. */
522 struct xfs_btree_cur *
523 xfs_rtrmapbt_init_cursor(
524 	struct xfs_trans	*tp,
525 	struct xfs_rtgroup	*rtg)
526 {
527 	struct xfs_inode	*ip = rtg_rmap(rtg);
528 	struct xfs_mount	*mp = rtg_mount(rtg);
529 	struct xfs_btree_cur	*cur;
530 
531 	xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
532 
533 	cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_ops,
534 			mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache);
535 
536 	cur->bc_ino.ip = ip;
537 	cur->bc_group = xfs_group_hold(rtg_group(rtg));
538 	cur->bc_ino.whichfork = XFS_DATA_FORK;
539 	cur->bc_nlevels = be16_to_cpu(ip->i_df.if_broot->bb_level) + 1;
540 	cur->bc_ino.forksize = xfs_inode_fork_size(ip, XFS_DATA_FORK);
541 
542 	return cur;
543 }
544 
545 /*
546  * Install a new rt reverse mapping btree root.  Caller is responsible for
547  * invalidating and freeing the old btree blocks.
548  */
549 void
550 xfs_rtrmapbt_commit_staged_btree(
551 	struct xfs_btree_cur	*cur,
552 	struct xfs_trans	*tp)
553 {
554 	struct xbtree_ifakeroot	*ifake = cur->bc_ino.ifake;
555 	struct xfs_ifork	*ifp;
556 	int			flags = XFS_ILOG_CORE | XFS_ILOG_DBROOT;
557 
558 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
559 	ASSERT(ifake->if_fork->if_format == XFS_DINODE_FMT_META_BTREE);
560 
561 	/*
562 	 * Free any resources hanging off the real fork, then shallow-copy the
563 	 * staging fork's contents into the real fork to transfer everything
564 	 * we just built.
565 	 */
566 	ifp = xfs_ifork_ptr(cur->bc_ino.ip, XFS_DATA_FORK);
567 	xfs_idestroy_fork(ifp);
568 	memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork));
569 
570 	cur->bc_ino.ip->i_projid = cur->bc_group->xg_gno;
571 	xfs_trans_log_inode(tp, cur->bc_ino.ip, flags);
572 	xfs_btree_commit_ifakeroot(cur, tp, XFS_DATA_FORK);
573 }
574 
575 /* Calculate number of records in a rt reverse mapping btree block. */
576 static inline unsigned int
577 xfs_rtrmapbt_block_maxrecs(
578 	unsigned int		blocklen,
579 	bool			leaf)
580 {
581 	if (leaf)
582 		return blocklen / sizeof(struct xfs_rmap_rec);
583 	return blocklen /
584 		(2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rtrmap_ptr_t));
585 }
586 
587 /*
588  * Calculate number of records in an rt reverse mapping btree block.
589  */
590 unsigned int
591 xfs_rtrmapbt_maxrecs(
592 	struct xfs_mount	*mp,
593 	unsigned int		blocklen,
594 	bool			leaf)
595 {
596 	blocklen -= XFS_RTRMAP_BLOCK_LEN;
597 	return xfs_rtrmapbt_block_maxrecs(blocklen, leaf);
598 }
599 
600 /* Compute the max possible height for realtime reverse mapping btrees. */
601 unsigned int
602 xfs_rtrmapbt_maxlevels_ondisk(void)
603 {
604 	unsigned int		minrecs[2];
605 	unsigned int		blocklen;
606 
607 	blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
608 
609 	minrecs[0] = xfs_rtrmapbt_block_maxrecs(blocklen, true) / 2;
610 	minrecs[1] = xfs_rtrmapbt_block_maxrecs(blocklen, false) / 2;
611 
612 	/* We need at most one record for every block in an rt group. */
613 	return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_RGBLOCKS);
614 }
615 
616 int __init
617 xfs_rtrmapbt_init_cur_cache(void)
618 {
619 	xfs_rtrmapbt_cur_cache = kmem_cache_create("xfs_rtrmapbt_cur",
620 			xfs_btree_cur_sizeof(xfs_rtrmapbt_maxlevels_ondisk()),
621 			0, 0, NULL);
622 
623 	if (!xfs_rtrmapbt_cur_cache)
624 		return -ENOMEM;
625 	return 0;
626 }
627 
628 void
629 xfs_rtrmapbt_destroy_cur_cache(void)
630 {
631 	kmem_cache_destroy(xfs_rtrmapbt_cur_cache);
632 	xfs_rtrmapbt_cur_cache = NULL;
633 }
634 
635 /* Compute the maximum height of an rt reverse mapping btree. */
636 void
637 xfs_rtrmapbt_compute_maxlevels(
638 	struct xfs_mount	*mp)
639 {
640 	unsigned int		d_maxlevels, r_maxlevels;
641 
642 	if (!xfs_has_rtrmapbt(mp)) {
643 		mp->m_rtrmap_maxlevels = 0;
644 		return;
645 	}
646 
647 	/*
648 	 * The realtime rmapbt lives on the data device, which means that its
649 	 * maximum height is constrained by the size of the data device and
650 	 * the height required to store one rmap record for each block in an
651 	 * rt group.
652 	 */
653 	d_maxlevels = xfs_btree_space_to_height(mp->m_rtrmap_mnr,
654 				mp->m_sb.sb_dblocks);
655 	r_maxlevels = xfs_btree_compute_maxlevels(mp->m_rtrmap_mnr,
656 				mp->m_groups[XG_TYPE_RTG].blocks);
657 
658 	/* Add one level to handle the inode root level. */
659 	mp->m_rtrmap_maxlevels = min(d_maxlevels, r_maxlevels) + 1;
660 }
661 
662 /* Calculate the rtrmap btree size for some records. */
663 static unsigned long long
664 xfs_rtrmapbt_calc_size(
665 	struct xfs_mount	*mp,
666 	unsigned long long	len)
667 {
668 	return xfs_btree_calc_size(mp->m_rtrmap_mnr, len);
669 }
670 
671 /*
672  * Calculate the maximum rmap btree size.
673  */
674 static unsigned long long
675 xfs_rtrmapbt_max_size(
676 	struct xfs_mount	*mp,
677 	xfs_rtblock_t		rtblocks)
678 {
679 	/* Bail out if we're uninitialized, which can happen in mkfs. */
680 	if (mp->m_rtrmap_mxr[0] == 0)
681 		return 0;
682 
683 	return xfs_rtrmapbt_calc_size(mp, rtblocks);
684 }
685 
686 /*
687  * Figure out how many blocks to reserve and how many are used by this btree.
688  */
689 xfs_filblks_t
690 xfs_rtrmapbt_calc_reserves(
691 	struct xfs_mount	*mp)
692 {
693 	uint32_t		blocks = mp->m_groups[XG_TYPE_RTG].blocks;
694 
695 	if (!xfs_has_rtrmapbt(mp))
696 		return 0;
697 
698 	/* Reserve 1% of the rtgroup or enough for 1 block per record. */
699 	return max_t(xfs_filblks_t, blocks / 100,
700 			xfs_rtrmapbt_max_size(mp, blocks));
701 }
702 
703 /* Convert on-disk form of btree root to in-memory form. */
704 STATIC void
705 xfs_rtrmapbt_from_disk(
706 	struct xfs_inode	*ip,
707 	struct xfs_rtrmap_root	*dblock,
708 	unsigned int		dblocklen,
709 	struct xfs_btree_block	*rblock)
710 {
711 	struct xfs_mount	*mp = ip->i_mount;
712 	struct xfs_rmap_key	*fkp;
713 	__be64			*fpp;
714 	struct xfs_rmap_key	*tkp;
715 	__be64			*tpp;
716 	struct xfs_rmap_rec	*frp;
717 	struct xfs_rmap_rec	*trp;
718 	unsigned int		rblocklen = xfs_rtrmap_broot_space(mp, dblock);
719 	unsigned int		numrecs;
720 	unsigned int		maxrecs;
721 
722 	xfs_btree_init_block(mp, rblock, &xfs_rtrmapbt_ops, 0, 0, ip->i_ino);
723 
724 	rblock->bb_level = dblock->bb_level;
725 	rblock->bb_numrecs = dblock->bb_numrecs;
726 	numrecs = be16_to_cpu(dblock->bb_numrecs);
727 
728 	if (be16_to_cpu(rblock->bb_level) > 0) {
729 		maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
730 		fkp = xfs_rtrmap_droot_key_addr(dblock, 1);
731 		tkp = xfs_rtrmap_key_addr(rblock, 1);
732 		fpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
733 		tpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
734 		memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
735 		memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
736 	} else {
737 		frp = xfs_rtrmap_droot_rec_addr(dblock, 1);
738 		trp = xfs_rtrmap_rec_addr(rblock, 1);
739 		memcpy(trp, frp, sizeof(*frp) * numrecs);
740 	}
741 }
742 
743 /* Load a realtime reverse mapping btree root in from disk. */
744 int
745 xfs_iformat_rtrmap(
746 	struct xfs_inode	*ip,
747 	struct xfs_dinode	*dip)
748 {
749 	struct xfs_mount	*mp = ip->i_mount;
750 	struct xfs_rtrmap_root	*dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
751 	struct xfs_btree_block	*broot;
752 	unsigned int		numrecs;
753 	unsigned int		level;
754 	int			dsize;
755 
756 	/*
757 	 * growfs must create the rtrmap inodes before adding a realtime volume
758 	 * to the filesystem, so we cannot use the rtrmapbt predicate here.
759 	 */
760 	if (!xfs_has_rmapbt(ip->i_mount)) {
761 		xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
762 		return -EFSCORRUPTED;
763 	}
764 
765 	dsize = XFS_DFORK_SIZE(dip, mp, XFS_DATA_FORK);
766 	numrecs = be16_to_cpu(dfp->bb_numrecs);
767 	level = be16_to_cpu(dfp->bb_level);
768 
769 	if (level > mp->m_rtrmap_maxlevels ||
770 	    xfs_rtrmap_droot_space_calc(level, numrecs) > dsize) {
771 		xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
772 		return -EFSCORRUPTED;
773 	}
774 
775 	broot = xfs_broot_alloc(xfs_ifork_ptr(ip, XFS_DATA_FORK),
776 			xfs_rtrmap_broot_space_calc(mp, level, numrecs));
777 	if (broot)
778 		xfs_rtrmapbt_from_disk(ip, dfp, dsize, broot);
779 	return 0;
780 }
781 
782 /* Convert in-memory form of btree root to on-disk form. */
783 void
784 xfs_rtrmapbt_to_disk(
785 	struct xfs_mount	*mp,
786 	struct xfs_btree_block	*rblock,
787 	unsigned int		rblocklen,
788 	struct xfs_rtrmap_root	*dblock,
789 	unsigned int		dblocklen)
790 {
791 	struct xfs_rmap_key	*fkp;
792 	__be64			*fpp;
793 	struct xfs_rmap_key	*tkp;
794 	__be64			*tpp;
795 	struct xfs_rmap_rec	*frp;
796 	struct xfs_rmap_rec	*trp;
797 	unsigned int		numrecs;
798 	unsigned int		maxrecs;
799 
800 	ASSERT(rblock->bb_magic == cpu_to_be32(XFS_RTRMAP_CRC_MAGIC));
801 	ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid));
802 	ASSERT(rblock->bb_u.l.bb_blkno == cpu_to_be64(XFS_BUF_DADDR_NULL));
803 	ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK));
804 	ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK));
805 
806 	dblock->bb_level = rblock->bb_level;
807 	dblock->bb_numrecs = rblock->bb_numrecs;
808 	numrecs = be16_to_cpu(rblock->bb_numrecs);
809 
810 	if (be16_to_cpu(rblock->bb_level) > 0) {
811 		maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
812 		fkp = xfs_rtrmap_key_addr(rblock, 1);
813 		tkp = xfs_rtrmap_droot_key_addr(dblock, 1);
814 		fpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
815 		tpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
816 		memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
817 		memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
818 	} else {
819 		frp = xfs_rtrmap_rec_addr(rblock, 1);
820 		trp = xfs_rtrmap_droot_rec_addr(dblock, 1);
821 		memcpy(trp, frp, sizeof(*frp) * numrecs);
822 	}
823 }
824 
825 /* Flush a realtime reverse mapping btree root out to disk. */
826 void
827 xfs_iflush_rtrmap(
828 	struct xfs_inode	*ip,
829 	struct xfs_dinode	*dip)
830 {
831 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
832 	struct xfs_rtrmap_root	*dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
833 
834 	ASSERT(ifp->if_broot != NULL);
835 	ASSERT(ifp->if_broot_bytes > 0);
836 	ASSERT(xfs_rtrmap_droot_space(ifp->if_broot) <=
837 			xfs_inode_fork_size(ip, XFS_DATA_FORK));
838 	xfs_rtrmapbt_to_disk(ip->i_mount, ifp->if_broot, ifp->if_broot_bytes,
839 			dfp, XFS_DFORK_SIZE(dip, ip->i_mount, XFS_DATA_FORK));
840 }
841 
842 /*
843  * Create a realtime rmap btree inode.
844  */
845 int
846 xfs_rtrmapbt_create(
847 	struct xfs_rtgroup	*rtg,
848 	struct xfs_inode	*ip,
849 	struct xfs_trans	*tp,
850 	bool			init)
851 {
852 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
853 	struct xfs_mount	*mp = ip->i_mount;
854 	struct xfs_btree_block	*broot;
855 
856 	ifp->if_format = XFS_DINODE_FMT_META_BTREE;
857 	ASSERT(ifp->if_broot_bytes == 0);
858 	ASSERT(ifp->if_bytes == 0);
859 
860 	/* Initialize the empty incore btree root. */
861 	broot = xfs_broot_realloc(ifp, xfs_rtrmap_broot_space_calc(mp, 0, 0));
862 	if (broot)
863 		xfs_btree_init_block(mp, broot, &xfs_rtrmapbt_ops, 0, 0,
864 				ip->i_ino);
865 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DBROOT);
866 
867 	return 0;
868 }
869 
870 /*
871  * Initialize an rmap for a realtime superblock using the potentially updated
872  * rt geometry in the provided @mp.
873  */
874 int
875 xfs_rtrmapbt_init_rtsb(
876 	struct xfs_mount	*mp,
877 	struct xfs_rtgroup	*rtg,
878 	struct xfs_trans	*tp)
879 {
880 	struct xfs_rmap_irec	rmap = {
881 		.rm_blockcount	= mp->m_sb.sb_rextsize,
882 		.rm_owner	= XFS_RMAP_OWN_FS,
883 	};
884 	struct xfs_btree_cur	*cur;
885 	int			error;
886 
887 	ASSERT(xfs_has_rtsb(mp));
888 	ASSERT(rtg_rgno(rtg) == 0);
889 
890 	cur = xfs_rtrmapbt_init_cursor(tp, rtg);
891 	error = xfs_rmap_map_raw(cur, &rmap);
892 	xfs_btree_del_cursor(cur, error);
893 	return error;
894 }
895