xref: /linux/fs/xfs/libxfs/xfs_rtrmap_btree.c (revision f33659e8a114e2c17108227d30a2bdf398e39bdb)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_inode.h"
17 #include "xfs_trans.h"
18 #include "xfs_alloc.h"
19 #include "xfs_btree.h"
20 #include "xfs_btree_staging.h"
21 #include "xfs_metafile.h"
22 #include "xfs_rmap.h"
23 #include "xfs_rtrmap_btree.h"
24 #include "xfs_trace.h"
25 #include "xfs_cksum.h"
26 #include "xfs_error.h"
27 #include "xfs_extent_busy.h"
28 #include "xfs_rtgroup.h"
29 #include "xfs_bmap.h"
30 
31 static struct kmem_cache	*xfs_rtrmapbt_cur_cache;
32 
33 /*
34  * Realtime Reverse Map btree.
35  *
36  * This is a btree used to track the owner(s) of a given extent in the realtime
37  * device.  See the comments in xfs_rmap_btree.c for more information.
38  *
39  * This tree is basically the same as the regular rmap btree except that it
40  * is rooted in an inode and does not live in free space.
41  */
42 
43 static struct xfs_btree_cur *
44 xfs_rtrmapbt_dup_cursor(
45 	struct xfs_btree_cur	*cur)
46 {
47 	return xfs_rtrmapbt_init_cursor(cur->bc_tp, to_rtg(cur->bc_group));
48 }
49 
50 STATIC int
51 xfs_rtrmapbt_get_minrecs(
52 	struct xfs_btree_cur	*cur,
53 	int			level)
54 {
55 	if (level == cur->bc_nlevels - 1) {
56 		struct xfs_ifork	*ifp = xfs_btree_ifork_ptr(cur);
57 
58 		return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
59 				level == 0) / 2;
60 	}
61 
62 	return cur->bc_mp->m_rtrmap_mnr[level != 0];
63 }
64 
65 STATIC int
66 xfs_rtrmapbt_get_maxrecs(
67 	struct xfs_btree_cur	*cur,
68 	int			level)
69 {
70 	if (level == cur->bc_nlevels - 1) {
71 		struct xfs_ifork	*ifp = xfs_btree_ifork_ptr(cur);
72 
73 		return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
74 				level == 0);
75 	}
76 
77 	return cur->bc_mp->m_rtrmap_mxr[level != 0];
78 }
79 
80 /* Calculate number of records in the ondisk realtime rmap btree inode root. */
81 unsigned int
82 xfs_rtrmapbt_droot_maxrecs(
83 	unsigned int		blocklen,
84 	bool			leaf)
85 {
86 	blocklen -= sizeof(struct xfs_rtrmap_root);
87 
88 	if (leaf)
89 		return blocklen / sizeof(struct xfs_rmap_rec);
90 	return blocklen / (2 * sizeof(struct xfs_rmap_key) +
91 			sizeof(xfs_rtrmap_ptr_t));
92 }
93 
94 /*
95  * Get the maximum records we could store in the on-disk format.
96  *
97  * For non-root nodes this is equivalent to xfs_rtrmapbt_get_maxrecs, but
98  * for the root node this checks the available space in the dinode fork
99  * so that we can resize the in-memory buffer to match it.  After a
100  * resize to the maximum size this function returns the same value
101  * as xfs_rtrmapbt_get_maxrecs for the root node, too.
102  */
103 STATIC int
104 xfs_rtrmapbt_get_dmaxrecs(
105 	struct xfs_btree_cur	*cur,
106 	int			level)
107 {
108 	if (level != cur->bc_nlevels - 1)
109 		return cur->bc_mp->m_rtrmap_mxr[level != 0];
110 	return xfs_rtrmapbt_droot_maxrecs(cur->bc_ino.forksize, level == 0);
111 }
112 
113 /*
114  * Convert the ondisk record's offset field into the ondisk key's offset field.
115  * Fork and bmbt are significant parts of the rmap record key, but written
116  * status is merely a record attribute.
117  */
118 static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
119 {
120 	return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
121 }
122 
123 STATIC void
124 xfs_rtrmapbt_init_key_from_rec(
125 	union xfs_btree_key		*key,
126 	const union xfs_btree_rec	*rec)
127 {
128 	key->rmap.rm_startblock = rec->rmap.rm_startblock;
129 	key->rmap.rm_owner = rec->rmap.rm_owner;
130 	key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
131 }
132 
133 STATIC void
134 xfs_rtrmapbt_init_high_key_from_rec(
135 	union xfs_btree_key		*key,
136 	const union xfs_btree_rec	*rec)
137 {
138 	uint64_t			off;
139 	int				adj;
140 
141 	adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
142 
143 	key->rmap.rm_startblock = rec->rmap.rm_startblock;
144 	be32_add_cpu(&key->rmap.rm_startblock, adj);
145 	key->rmap.rm_owner = rec->rmap.rm_owner;
146 	key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
147 	if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
148 	    XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
149 		return;
150 	off = be64_to_cpu(key->rmap.rm_offset);
151 	off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
152 	key->rmap.rm_offset = cpu_to_be64(off);
153 }
154 
155 STATIC void
156 xfs_rtrmapbt_init_rec_from_cur(
157 	struct xfs_btree_cur	*cur,
158 	union xfs_btree_rec	*rec)
159 {
160 	rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
161 	rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
162 	rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
163 	rec->rmap.rm_offset = cpu_to_be64(
164 			xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
165 }
166 
167 STATIC void
168 xfs_rtrmapbt_init_ptr_from_cur(
169 	struct xfs_btree_cur	*cur,
170 	union xfs_btree_ptr	*ptr)
171 {
172 	ptr->l = 0;
173 }
174 
175 /*
176  * Mask the appropriate parts of the ondisk key field for a key comparison.
177  * Fork and bmbt are significant parts of the rmap record key, but written
178  * status is merely a record attribute.
179  */
180 static inline uint64_t offset_keymask(uint64_t offset)
181 {
182 	return offset & ~XFS_RMAP_OFF_UNWRITTEN;
183 }
184 
185 STATIC int64_t
186 xfs_rtrmapbt_key_diff(
187 	struct xfs_btree_cur		*cur,
188 	const union xfs_btree_key	*key)
189 {
190 	struct xfs_rmap_irec		*rec = &cur->bc_rec.r;
191 	const struct xfs_rmap_key	*kp = &key->rmap;
192 	__u64				x, y;
193 	int64_t				d;
194 
195 	d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
196 	if (d)
197 		return d;
198 
199 	x = be64_to_cpu(kp->rm_owner);
200 	y = rec->rm_owner;
201 	if (x > y)
202 		return 1;
203 	else if (y > x)
204 		return -1;
205 
206 	x = offset_keymask(be64_to_cpu(kp->rm_offset));
207 	y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
208 	if (x > y)
209 		return 1;
210 	else if (y > x)
211 		return -1;
212 	return 0;
213 }
214 
215 STATIC int64_t
216 xfs_rtrmapbt_diff_two_keys(
217 	struct xfs_btree_cur		*cur,
218 	const union xfs_btree_key	*k1,
219 	const union xfs_btree_key	*k2,
220 	const union xfs_btree_key	*mask)
221 {
222 	const struct xfs_rmap_key	*kp1 = &k1->rmap;
223 	const struct xfs_rmap_key	*kp2 = &k2->rmap;
224 	int64_t				d;
225 	__u64				x, y;
226 
227 	/* Doesn't make sense to mask off the physical space part */
228 	ASSERT(!mask || mask->rmap.rm_startblock);
229 
230 	d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
231 		     be32_to_cpu(kp2->rm_startblock);
232 	if (d)
233 		return d;
234 
235 	if (!mask || mask->rmap.rm_owner) {
236 		x = be64_to_cpu(kp1->rm_owner);
237 		y = be64_to_cpu(kp2->rm_owner);
238 		if (x > y)
239 			return 1;
240 		else if (y > x)
241 			return -1;
242 	}
243 
244 	if (!mask || mask->rmap.rm_offset) {
245 		/* Doesn't make sense to allow offset but not owner */
246 		ASSERT(!mask || mask->rmap.rm_owner);
247 
248 		x = offset_keymask(be64_to_cpu(kp1->rm_offset));
249 		y = offset_keymask(be64_to_cpu(kp2->rm_offset));
250 		if (x > y)
251 			return 1;
252 		else if (y > x)
253 			return -1;
254 	}
255 
256 	return 0;
257 }
258 
259 static xfs_failaddr_t
260 xfs_rtrmapbt_verify(
261 	struct xfs_buf		*bp)
262 {
263 	struct xfs_mount	*mp = bp->b_target->bt_mount;
264 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
265 	xfs_failaddr_t		fa;
266 	int			level;
267 
268 	if (!xfs_verify_magic(bp, block->bb_magic))
269 		return __this_address;
270 
271 	if (!xfs_has_rmapbt(mp))
272 		return __this_address;
273 	fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
274 	if (fa)
275 		return fa;
276 	level = be16_to_cpu(block->bb_level);
277 	if (level > mp->m_rtrmap_maxlevels)
278 		return __this_address;
279 
280 	return xfs_btree_fsblock_verify(bp, mp->m_rtrmap_mxr[level != 0]);
281 }
282 
283 static void
284 xfs_rtrmapbt_read_verify(
285 	struct xfs_buf	*bp)
286 {
287 	xfs_failaddr_t	fa;
288 
289 	if (!xfs_btree_fsblock_verify_crc(bp))
290 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
291 	else {
292 		fa = xfs_rtrmapbt_verify(bp);
293 		if (fa)
294 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
295 	}
296 
297 	if (bp->b_error)
298 		trace_xfs_btree_corrupt(bp, _RET_IP_);
299 }
300 
301 static void
302 xfs_rtrmapbt_write_verify(
303 	struct xfs_buf	*bp)
304 {
305 	xfs_failaddr_t	fa;
306 
307 	fa = xfs_rtrmapbt_verify(bp);
308 	if (fa) {
309 		trace_xfs_btree_corrupt(bp, _RET_IP_);
310 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
311 		return;
312 	}
313 	xfs_btree_fsblock_calc_crc(bp);
314 
315 }
316 
317 const struct xfs_buf_ops xfs_rtrmapbt_buf_ops = {
318 	.name			= "xfs_rtrmapbt",
319 	.magic			= { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) },
320 	.verify_read		= xfs_rtrmapbt_read_verify,
321 	.verify_write		= xfs_rtrmapbt_write_verify,
322 	.verify_struct		= xfs_rtrmapbt_verify,
323 };
324 
325 STATIC int
326 xfs_rtrmapbt_keys_inorder(
327 	struct xfs_btree_cur		*cur,
328 	const union xfs_btree_key	*k1,
329 	const union xfs_btree_key	*k2)
330 {
331 	uint32_t			x;
332 	uint32_t			y;
333 	uint64_t			a;
334 	uint64_t			b;
335 
336 	x = be32_to_cpu(k1->rmap.rm_startblock);
337 	y = be32_to_cpu(k2->rmap.rm_startblock);
338 	if (x < y)
339 		return 1;
340 	else if (x > y)
341 		return 0;
342 	a = be64_to_cpu(k1->rmap.rm_owner);
343 	b = be64_to_cpu(k2->rmap.rm_owner);
344 	if (a < b)
345 		return 1;
346 	else if (a > b)
347 		return 0;
348 	a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
349 	b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
350 	if (a <= b)
351 		return 1;
352 	return 0;
353 }
354 
355 STATIC int
356 xfs_rtrmapbt_recs_inorder(
357 	struct xfs_btree_cur		*cur,
358 	const union xfs_btree_rec	*r1,
359 	const union xfs_btree_rec	*r2)
360 {
361 	uint32_t			x;
362 	uint32_t			y;
363 	uint64_t			a;
364 	uint64_t			b;
365 
366 	x = be32_to_cpu(r1->rmap.rm_startblock);
367 	y = be32_to_cpu(r2->rmap.rm_startblock);
368 	if (x < y)
369 		return 1;
370 	else if (x > y)
371 		return 0;
372 	a = be64_to_cpu(r1->rmap.rm_owner);
373 	b = be64_to_cpu(r2->rmap.rm_owner);
374 	if (a < b)
375 		return 1;
376 	else if (a > b)
377 		return 0;
378 	a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
379 	b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
380 	if (a <= b)
381 		return 1;
382 	return 0;
383 }
384 
385 STATIC enum xbtree_key_contig
386 xfs_rtrmapbt_keys_contiguous(
387 	struct xfs_btree_cur		*cur,
388 	const union xfs_btree_key	*key1,
389 	const union xfs_btree_key	*key2,
390 	const union xfs_btree_key	*mask)
391 {
392 	ASSERT(!mask || mask->rmap.rm_startblock);
393 
394 	/*
395 	 * We only support checking contiguity of the physical space component.
396 	 * If any callers ever need more specificity than that, they'll have to
397 	 * implement it here.
398 	 */
399 	ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
400 
401 	return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
402 				 be32_to_cpu(key2->rmap.rm_startblock));
403 }
404 
405 static inline void
406 xfs_rtrmapbt_move_ptrs(
407 	struct xfs_mount	*mp,
408 	struct xfs_btree_block	*broot,
409 	short			old_size,
410 	size_t			new_size,
411 	unsigned int		numrecs)
412 {
413 	void			*dptr;
414 	void			*sptr;
415 
416 	sptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, old_size);
417 	dptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, new_size);
418 	memmove(dptr, sptr, numrecs * sizeof(xfs_rtrmap_ptr_t));
419 }
420 
421 static struct xfs_btree_block *
422 xfs_rtrmapbt_broot_realloc(
423 	struct xfs_btree_cur	*cur,
424 	unsigned int		new_numrecs)
425 {
426 	struct xfs_mount	*mp = cur->bc_mp;
427 	struct xfs_ifork	*ifp = xfs_btree_ifork_ptr(cur);
428 	struct xfs_btree_block	*broot;
429 	unsigned int		new_size;
430 	unsigned int		old_size = ifp->if_broot_bytes;
431 	const unsigned int	level = cur->bc_nlevels - 1;
432 
433 	new_size = xfs_rtrmap_broot_space_calc(mp, level, new_numrecs);
434 
435 	/* Handle the nop case quietly. */
436 	if (new_size == old_size)
437 		return ifp->if_broot;
438 
439 	if (new_size > old_size) {
440 		unsigned int	old_numrecs;
441 
442 		/*
443 		 * If there wasn't any memory allocated before, just allocate
444 		 * it now and get out.
445 		 */
446 		if (old_size == 0)
447 			return xfs_broot_realloc(ifp, new_size);
448 
449 		/*
450 		 * If there is already an existing if_broot, then we need to
451 		 * realloc it and possibly move the node block pointers because
452 		 * those are not butted up against the btree block header.
453 		 */
454 		old_numrecs = xfs_rtrmapbt_maxrecs(mp, old_size, level == 0);
455 		broot = xfs_broot_realloc(ifp, new_size);
456 		if (level > 0)
457 			xfs_rtrmapbt_move_ptrs(mp, broot, old_size, new_size,
458 					old_numrecs);
459 		goto out_broot;
460 	}
461 
462 	/*
463 	 * We're reducing numrecs.  If we're going all the way to zero, just
464 	 * free the block.
465 	 */
466 	ASSERT(ifp->if_broot != NULL && old_size > 0);
467 	if (new_size == 0)
468 		return xfs_broot_realloc(ifp, 0);
469 
470 	/*
471 	 * Shrink the btree root by possibly moving the rtrmapbt pointers,
472 	 * since they are not butted up against the btree block header.  Then
473 	 * reallocate broot.
474 	 */
475 	if (level > 0)
476 		xfs_rtrmapbt_move_ptrs(mp, ifp->if_broot, old_size, new_size,
477 				new_numrecs);
478 	broot = xfs_broot_realloc(ifp, new_size);
479 
480 out_broot:
481 	ASSERT(xfs_rtrmap_droot_space(broot) <=
482 	       xfs_inode_fork_size(cur->bc_ino.ip, cur->bc_ino.whichfork));
483 	return broot;
484 }
485 
486 const struct xfs_btree_ops xfs_rtrmapbt_ops = {
487 	.name			= "rtrmap",
488 	.type			= XFS_BTREE_TYPE_INODE,
489 	.geom_flags		= XFS_BTGEO_OVERLAPPING |
490 				  XFS_BTGEO_IROOT_RECORDS,
491 
492 	.rec_len		= sizeof(struct xfs_rmap_rec),
493 	/* Overlapping btree; 2 keys per pointer. */
494 	.key_len		= 2 * sizeof(struct xfs_rmap_key),
495 	.ptr_len		= XFS_BTREE_LONG_PTR_LEN,
496 
497 	.lru_refs		= XFS_RMAP_BTREE_REF,
498 	.statoff		= XFS_STATS_CALC_INDEX(xs_rtrmap_2),
499 
500 	.dup_cursor		= xfs_rtrmapbt_dup_cursor,
501 	.alloc_block		= xfs_btree_alloc_metafile_block,
502 	.free_block		= xfs_btree_free_metafile_block,
503 	.get_minrecs		= xfs_rtrmapbt_get_minrecs,
504 	.get_maxrecs		= xfs_rtrmapbt_get_maxrecs,
505 	.get_dmaxrecs		= xfs_rtrmapbt_get_dmaxrecs,
506 	.init_key_from_rec	= xfs_rtrmapbt_init_key_from_rec,
507 	.init_high_key_from_rec	= xfs_rtrmapbt_init_high_key_from_rec,
508 	.init_rec_from_cur	= xfs_rtrmapbt_init_rec_from_cur,
509 	.init_ptr_from_cur	= xfs_rtrmapbt_init_ptr_from_cur,
510 	.key_diff		= xfs_rtrmapbt_key_diff,
511 	.buf_ops		= &xfs_rtrmapbt_buf_ops,
512 	.diff_two_keys		= xfs_rtrmapbt_diff_two_keys,
513 	.keys_inorder		= xfs_rtrmapbt_keys_inorder,
514 	.recs_inorder		= xfs_rtrmapbt_recs_inorder,
515 	.keys_contiguous	= xfs_rtrmapbt_keys_contiguous,
516 	.broot_realloc		= xfs_rtrmapbt_broot_realloc,
517 };
518 
519 /* Allocate a new rt rmap btree cursor. */
520 struct xfs_btree_cur *
521 xfs_rtrmapbt_init_cursor(
522 	struct xfs_trans	*tp,
523 	struct xfs_rtgroup	*rtg)
524 {
525 	struct xfs_inode	*ip = rtg_rmap(rtg);
526 	struct xfs_mount	*mp = rtg_mount(rtg);
527 	struct xfs_btree_cur	*cur;
528 
529 	xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
530 
531 	cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_ops,
532 			mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache);
533 
534 	cur->bc_ino.ip = ip;
535 	cur->bc_group = xfs_group_hold(rtg_group(rtg));
536 	cur->bc_ino.whichfork = XFS_DATA_FORK;
537 	cur->bc_nlevels = be16_to_cpu(ip->i_df.if_broot->bb_level) + 1;
538 	cur->bc_ino.forksize = xfs_inode_fork_size(ip, XFS_DATA_FORK);
539 
540 	return cur;
541 }
542 
543 /*
544  * Install a new rt reverse mapping btree root.  Caller is responsible for
545  * invalidating and freeing the old btree blocks.
546  */
547 void
548 xfs_rtrmapbt_commit_staged_btree(
549 	struct xfs_btree_cur	*cur,
550 	struct xfs_trans	*tp)
551 {
552 	struct xbtree_ifakeroot	*ifake = cur->bc_ino.ifake;
553 	struct xfs_ifork	*ifp;
554 	int			flags = XFS_ILOG_CORE | XFS_ILOG_DBROOT;
555 
556 	ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
557 	ASSERT(ifake->if_fork->if_format == XFS_DINODE_FMT_META_BTREE);
558 
559 	/*
560 	 * Free any resources hanging off the real fork, then shallow-copy the
561 	 * staging fork's contents into the real fork to transfer everything
562 	 * we just built.
563 	 */
564 	ifp = xfs_ifork_ptr(cur->bc_ino.ip, XFS_DATA_FORK);
565 	xfs_idestroy_fork(ifp);
566 	memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork));
567 
568 	cur->bc_ino.ip->i_projid = cur->bc_group->xg_gno;
569 	xfs_trans_log_inode(tp, cur->bc_ino.ip, flags);
570 	xfs_btree_commit_ifakeroot(cur, tp, XFS_DATA_FORK);
571 }
572 
573 /* Calculate number of records in a rt reverse mapping btree block. */
574 static inline unsigned int
575 xfs_rtrmapbt_block_maxrecs(
576 	unsigned int		blocklen,
577 	bool			leaf)
578 {
579 	if (leaf)
580 		return blocklen / sizeof(struct xfs_rmap_rec);
581 	return blocklen /
582 		(2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rtrmap_ptr_t));
583 }
584 
585 /*
586  * Calculate number of records in an rt reverse mapping btree block.
587  */
588 unsigned int
589 xfs_rtrmapbt_maxrecs(
590 	struct xfs_mount	*mp,
591 	unsigned int		blocklen,
592 	bool			leaf)
593 {
594 	blocklen -= XFS_RTRMAP_BLOCK_LEN;
595 	return xfs_rtrmapbt_block_maxrecs(blocklen, leaf);
596 }
597 
598 /* Compute the max possible height for realtime reverse mapping btrees. */
599 unsigned int
600 xfs_rtrmapbt_maxlevels_ondisk(void)
601 {
602 	unsigned int		minrecs[2];
603 	unsigned int		blocklen;
604 
605 	blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
606 
607 	minrecs[0] = xfs_rtrmapbt_block_maxrecs(blocklen, true) / 2;
608 	minrecs[1] = xfs_rtrmapbt_block_maxrecs(blocklen, false) / 2;
609 
610 	/* We need at most one record for every block in an rt group. */
611 	return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_RGBLOCKS);
612 }
613 
614 int __init
615 xfs_rtrmapbt_init_cur_cache(void)
616 {
617 	xfs_rtrmapbt_cur_cache = kmem_cache_create("xfs_rtrmapbt_cur",
618 			xfs_btree_cur_sizeof(xfs_rtrmapbt_maxlevels_ondisk()),
619 			0, 0, NULL);
620 
621 	if (!xfs_rtrmapbt_cur_cache)
622 		return -ENOMEM;
623 	return 0;
624 }
625 
626 void
627 xfs_rtrmapbt_destroy_cur_cache(void)
628 {
629 	kmem_cache_destroy(xfs_rtrmapbt_cur_cache);
630 	xfs_rtrmapbt_cur_cache = NULL;
631 }
632 
633 /* Compute the maximum height of an rt reverse mapping btree. */
634 void
635 xfs_rtrmapbt_compute_maxlevels(
636 	struct xfs_mount	*mp)
637 {
638 	unsigned int		d_maxlevels, r_maxlevels;
639 
640 	if (!xfs_has_rtrmapbt(mp)) {
641 		mp->m_rtrmap_maxlevels = 0;
642 		return;
643 	}
644 
645 	/*
646 	 * The realtime rmapbt lives on the data device, which means that its
647 	 * maximum height is constrained by the size of the data device and
648 	 * the height required to store one rmap record for each block in an
649 	 * rt group.
650 	 */
651 	d_maxlevels = xfs_btree_space_to_height(mp->m_rtrmap_mnr,
652 				mp->m_sb.sb_dblocks);
653 	r_maxlevels = xfs_btree_compute_maxlevels(mp->m_rtrmap_mnr,
654 				mp->m_groups[XG_TYPE_RTG].blocks);
655 
656 	/* Add one level to handle the inode root level. */
657 	mp->m_rtrmap_maxlevels = min(d_maxlevels, r_maxlevels) + 1;
658 }
659 
660 /* Calculate the rtrmap btree size for some records. */
661 static unsigned long long
662 xfs_rtrmapbt_calc_size(
663 	struct xfs_mount	*mp,
664 	unsigned long long	len)
665 {
666 	return xfs_btree_calc_size(mp->m_rtrmap_mnr, len);
667 }
668 
669 /*
670  * Calculate the maximum rmap btree size.
671  */
672 static unsigned long long
673 xfs_rtrmapbt_max_size(
674 	struct xfs_mount	*mp,
675 	xfs_rtblock_t		rtblocks)
676 {
677 	/* Bail out if we're uninitialized, which can happen in mkfs. */
678 	if (mp->m_rtrmap_mxr[0] == 0)
679 		return 0;
680 
681 	return xfs_rtrmapbt_calc_size(mp, rtblocks);
682 }
683 
684 /*
685  * Figure out how many blocks to reserve and how many are used by this btree.
686  */
687 xfs_filblks_t
688 xfs_rtrmapbt_calc_reserves(
689 	struct xfs_mount	*mp)
690 {
691 	uint32_t		blocks = mp->m_groups[XG_TYPE_RTG].blocks;
692 
693 	if (!xfs_has_rtrmapbt(mp))
694 		return 0;
695 
696 	/* Reserve 1% of the rtgroup or enough for 1 block per record. */
697 	return max_t(xfs_filblks_t, blocks / 100,
698 			xfs_rtrmapbt_max_size(mp, blocks));
699 }
700 
701 /* Convert on-disk form of btree root to in-memory form. */
702 STATIC void
703 xfs_rtrmapbt_from_disk(
704 	struct xfs_inode	*ip,
705 	struct xfs_rtrmap_root	*dblock,
706 	unsigned int		dblocklen,
707 	struct xfs_btree_block	*rblock)
708 {
709 	struct xfs_mount	*mp = ip->i_mount;
710 	struct xfs_rmap_key	*fkp;
711 	__be64			*fpp;
712 	struct xfs_rmap_key	*tkp;
713 	__be64			*tpp;
714 	struct xfs_rmap_rec	*frp;
715 	struct xfs_rmap_rec	*trp;
716 	unsigned int		rblocklen = xfs_rtrmap_broot_space(mp, dblock);
717 	unsigned int		numrecs;
718 	unsigned int		maxrecs;
719 
720 	xfs_btree_init_block(mp, rblock, &xfs_rtrmapbt_ops, 0, 0, ip->i_ino);
721 
722 	rblock->bb_level = dblock->bb_level;
723 	rblock->bb_numrecs = dblock->bb_numrecs;
724 	numrecs = be16_to_cpu(dblock->bb_numrecs);
725 
726 	if (be16_to_cpu(rblock->bb_level) > 0) {
727 		maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
728 		fkp = xfs_rtrmap_droot_key_addr(dblock, 1);
729 		tkp = xfs_rtrmap_key_addr(rblock, 1);
730 		fpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
731 		tpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
732 		memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
733 		memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
734 	} else {
735 		frp = xfs_rtrmap_droot_rec_addr(dblock, 1);
736 		trp = xfs_rtrmap_rec_addr(rblock, 1);
737 		memcpy(trp, frp, sizeof(*frp) * numrecs);
738 	}
739 }
740 
741 /* Load a realtime reverse mapping btree root in from disk. */
742 int
743 xfs_iformat_rtrmap(
744 	struct xfs_inode	*ip,
745 	struct xfs_dinode	*dip)
746 {
747 	struct xfs_mount	*mp = ip->i_mount;
748 	struct xfs_rtrmap_root	*dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
749 	struct xfs_btree_block	*broot;
750 	unsigned int		numrecs;
751 	unsigned int		level;
752 	int			dsize;
753 
754 	/*
755 	 * growfs must create the rtrmap inodes before adding a realtime volume
756 	 * to the filesystem, so we cannot use the rtrmapbt predicate here.
757 	 */
758 	if (!xfs_has_rmapbt(ip->i_mount))
759 		return -EFSCORRUPTED;
760 
761 	dsize = XFS_DFORK_SIZE(dip, mp, XFS_DATA_FORK);
762 	numrecs = be16_to_cpu(dfp->bb_numrecs);
763 	level = be16_to_cpu(dfp->bb_level);
764 
765 	if (level > mp->m_rtrmap_maxlevels ||
766 	    xfs_rtrmap_droot_space_calc(level, numrecs) > dsize)
767 		return -EFSCORRUPTED;
768 
769 	broot = xfs_broot_alloc(xfs_ifork_ptr(ip, XFS_DATA_FORK),
770 			xfs_rtrmap_broot_space_calc(mp, level, numrecs));
771 	if (broot)
772 		xfs_rtrmapbt_from_disk(ip, dfp, dsize, broot);
773 	return 0;
774 }
775 
776 /* Convert in-memory form of btree root to on-disk form. */
777 void
778 xfs_rtrmapbt_to_disk(
779 	struct xfs_mount	*mp,
780 	struct xfs_btree_block	*rblock,
781 	unsigned int		rblocklen,
782 	struct xfs_rtrmap_root	*dblock,
783 	unsigned int		dblocklen)
784 {
785 	struct xfs_rmap_key	*fkp;
786 	__be64			*fpp;
787 	struct xfs_rmap_key	*tkp;
788 	__be64			*tpp;
789 	struct xfs_rmap_rec	*frp;
790 	struct xfs_rmap_rec	*trp;
791 	unsigned int		numrecs;
792 	unsigned int		maxrecs;
793 
794 	ASSERT(rblock->bb_magic == cpu_to_be32(XFS_RTRMAP_CRC_MAGIC));
795 	ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid));
796 	ASSERT(rblock->bb_u.l.bb_blkno == cpu_to_be64(XFS_BUF_DADDR_NULL));
797 	ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK));
798 	ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK));
799 
800 	dblock->bb_level = rblock->bb_level;
801 	dblock->bb_numrecs = rblock->bb_numrecs;
802 	numrecs = be16_to_cpu(rblock->bb_numrecs);
803 
804 	if (be16_to_cpu(rblock->bb_level) > 0) {
805 		maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
806 		fkp = xfs_rtrmap_key_addr(rblock, 1);
807 		tkp = xfs_rtrmap_droot_key_addr(dblock, 1);
808 		fpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
809 		tpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
810 		memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
811 		memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
812 	} else {
813 		frp = xfs_rtrmap_rec_addr(rblock, 1);
814 		trp = xfs_rtrmap_droot_rec_addr(dblock, 1);
815 		memcpy(trp, frp, sizeof(*frp) * numrecs);
816 	}
817 }
818 
819 /* Flush a realtime reverse mapping btree root out to disk. */
820 void
821 xfs_iflush_rtrmap(
822 	struct xfs_inode	*ip,
823 	struct xfs_dinode	*dip)
824 {
825 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
826 	struct xfs_rtrmap_root	*dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
827 
828 	ASSERT(ifp->if_broot != NULL);
829 	ASSERT(ifp->if_broot_bytes > 0);
830 	ASSERT(xfs_rtrmap_droot_space(ifp->if_broot) <=
831 			xfs_inode_fork_size(ip, XFS_DATA_FORK));
832 	xfs_rtrmapbt_to_disk(ip->i_mount, ifp->if_broot, ifp->if_broot_bytes,
833 			dfp, XFS_DFORK_SIZE(dip, ip->i_mount, XFS_DATA_FORK));
834 }
835