xref: /linux/fs/xfs/libxfs/xfs_rtgroup.c (revision 76d3be00df91a56f7c05142ed500f8f8544d5457)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2022-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_bit.h"
12 #include "xfs_sb.h"
13 #include "xfs_mount.h"
14 #include "xfs_btree.h"
15 #include "xfs_alloc_btree.h"
16 #include "xfs_rmap_btree.h"
17 #include "xfs_alloc.h"
18 #include "xfs_ialloc.h"
19 #include "xfs_rmap.h"
20 #include "xfs_ag.h"
21 #include "xfs_ag_resv.h"
22 #include "xfs_health.h"
23 #include "xfs_error.h"
24 #include "xfs_bmap.h"
25 #include "xfs_defer.h"
26 #include "xfs_log_format.h"
27 #include "xfs_trans.h"
28 #include "xfs_trace.h"
29 #include "xfs_inode.h"
30 #include "xfs_icache.h"
31 #include "xfs_buf_item.h"
32 #include "xfs_rtgroup.h"
33 #include "xfs_rtbitmap.h"
34 #include "xfs_metafile.h"
35 #include "xfs_metadir.h"
36 
37 int
38 xfs_rtgroup_alloc(
39 	struct xfs_mount	*mp,
40 	xfs_rgnumber_t		rgno,
41 	xfs_rgnumber_t		rgcount,
42 	xfs_rtbxlen_t		rextents)
43 {
44 	struct xfs_rtgroup	*rtg;
45 	int			error;
46 
47 	rtg = kzalloc(sizeof(struct xfs_rtgroup), GFP_KERNEL);
48 	if (!rtg)
49 		return -ENOMEM;
50 
51 	error = xfs_group_insert(mp, rtg_group(rtg), rgno, XG_TYPE_RTG);
52 	if (error)
53 		goto out_free_rtg;
54 	return 0;
55 
56 out_free_rtg:
57 	kfree(rtg);
58 	return error;
59 }
60 
61 void
62 xfs_rtgroup_free(
63 	struct xfs_mount	*mp,
64 	xfs_rgnumber_t		rgno)
65 {
66 	xfs_group_free(mp, rgno, XG_TYPE_RTG, NULL);
67 }
68 
69 /* Free a range of incore rtgroup objects. */
70 void
71 xfs_free_rtgroups(
72 	struct xfs_mount	*mp,
73 	xfs_rgnumber_t		first_rgno,
74 	xfs_rgnumber_t		end_rgno)
75 {
76 	xfs_rgnumber_t		rgno;
77 
78 	for (rgno = first_rgno; rgno < end_rgno; rgno++)
79 		xfs_rtgroup_free(mp, rgno);
80 }
81 
82 /* Initialize some range of incore rtgroup objects. */
83 int
84 xfs_initialize_rtgroups(
85 	struct xfs_mount	*mp,
86 	xfs_rgnumber_t		first_rgno,
87 	xfs_rgnumber_t		end_rgno,
88 	xfs_rtbxlen_t		rextents)
89 {
90 	xfs_rgnumber_t		index;
91 	int			error;
92 
93 	if (first_rgno >= end_rgno)
94 		return 0;
95 
96 	for (index = first_rgno; index < end_rgno; index++) {
97 		error = xfs_rtgroup_alloc(mp, index, end_rgno, rextents);
98 		if (error)
99 			goto out_unwind_new_rtgs;
100 	}
101 
102 	return 0;
103 
104 out_unwind_new_rtgs:
105 	xfs_free_rtgroups(mp, first_rgno, index);
106 	return error;
107 }
108 
109 /* Compute the number of rt extents in this realtime group. */
110 xfs_rtxnum_t
111 __xfs_rtgroup_extents(
112 	struct xfs_mount	*mp,
113 	xfs_rgnumber_t		rgno,
114 	xfs_rgnumber_t		rgcount,
115 	xfs_rtbxlen_t		rextents)
116 {
117 	ASSERT(rgno < rgcount);
118 	if (rgno == rgcount - 1)
119 		return rextents - ((xfs_rtxnum_t)rgno * mp->m_sb.sb_rgextents);
120 
121 	ASSERT(xfs_has_rtgroups(mp));
122 	return mp->m_sb.sb_rgextents;
123 }
124 
125 xfs_rtxnum_t
126 xfs_rtgroup_extents(
127 	struct xfs_mount	*mp,
128 	xfs_rgnumber_t		rgno)
129 {
130 	return __xfs_rtgroup_extents(mp, rgno, mp->m_sb.sb_rgcount,
131 			mp->m_sb.sb_rextents);
132 }
133 
134 /*
135  * Update the rt extent count of the previous tail rtgroup if it changed during
136  * recovery (i.e. recovery of a growfs).
137  */
138 int
139 xfs_update_last_rtgroup_size(
140 	struct xfs_mount	*mp,
141 	xfs_rgnumber_t		prev_rgcount)
142 {
143 	struct xfs_rtgroup	*rtg;
144 
145 	ASSERT(prev_rgcount > 0);
146 
147 	rtg = xfs_rtgroup_grab(mp, prev_rgcount - 1);
148 	if (!rtg)
149 		return -EFSCORRUPTED;
150 	rtg->rtg_extents = __xfs_rtgroup_extents(mp, prev_rgcount - 1,
151 			mp->m_sb.sb_rgcount, mp->m_sb.sb_rextents);
152 	xfs_rtgroup_rele(rtg);
153 	return 0;
154 }
155 
156 /* Lock metadata inodes associated with this rt group. */
157 void
158 xfs_rtgroup_lock(
159 	struct xfs_rtgroup	*rtg,
160 	unsigned int		rtglock_flags)
161 {
162 	ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS));
163 	ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) ||
164 	       !(rtglock_flags & XFS_RTGLOCK_BITMAP));
165 
166 	if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
167 		/*
168 		 * Lock both realtime free space metadata inodes for a freespace
169 		 * update.
170 		 */
171 		xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL);
172 		xfs_ilock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL);
173 	} else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
174 		xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED);
175 	}
176 }
177 
178 /* Unlock metadata inodes associated with this rt group. */
179 void
180 xfs_rtgroup_unlock(
181 	struct xfs_rtgroup	*rtg,
182 	unsigned int		rtglock_flags)
183 {
184 	ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS));
185 	ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) ||
186 	       !(rtglock_flags & XFS_RTGLOCK_BITMAP));
187 
188 	if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
189 		xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL);
190 		xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL);
191 	} else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
192 		xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED);
193 	}
194 }
195 
196 /*
197  * Join realtime group metadata inodes to the transaction.  The ILOCKs will be
198  * released on transaction commit.
199  */
200 void
201 xfs_rtgroup_trans_join(
202 	struct xfs_trans	*tp,
203 	struct xfs_rtgroup	*rtg,
204 	unsigned int		rtglock_flags)
205 {
206 	ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS));
207 	ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED));
208 
209 	if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
210 		xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_BITMAP],
211 				XFS_ILOCK_EXCL);
212 		xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_SUMMARY],
213 				XFS_ILOCK_EXCL);
214 	}
215 }
216 
217 #ifdef CONFIG_PROVE_LOCKING
218 static struct lock_class_key xfs_rtginode_lock_class;
219 
220 static int
221 xfs_rtginode_ilock_cmp_fn(
222 	const struct lockdep_map	*m1,
223 	const struct lockdep_map	*m2)
224 {
225 	const struct xfs_inode *ip1 =
226 		container_of(m1, struct xfs_inode, i_lock.dep_map);
227 	const struct xfs_inode *ip2 =
228 		container_of(m2, struct xfs_inode, i_lock.dep_map);
229 
230 	if (ip1->i_projid < ip2->i_projid)
231 		return -1;
232 	if (ip1->i_projid > ip2->i_projid)
233 		return 1;
234 	return 0;
235 }
236 
237 static inline void
238 xfs_rtginode_ilock_print_fn(
239 	const struct lockdep_map	*m)
240 {
241 	const struct xfs_inode *ip =
242 		container_of(m, struct xfs_inode, i_lock.dep_map);
243 
244 	printk(KERN_CONT " rgno=%u", ip->i_projid);
245 }
246 
247 /*
248  * Most of the time each of the RTG inode locks are only taken one at a time.
249  * But when committing deferred ops, more than one of a kind can be taken.
250  * However, deferred rt ops will be committed in rgno order so there is no
251  * potential for deadlocks.  The code here is needed to tell lockdep about this
252  * order.
253  */
254 static inline void
255 xfs_rtginode_lockdep_setup(
256 	struct xfs_inode	*ip,
257 	xfs_rgnumber_t		rgno,
258 	enum xfs_rtg_inodes	type)
259 {
260 	lockdep_set_class_and_subclass(&ip->i_lock, &xfs_rtginode_lock_class,
261 			type);
262 	lock_set_cmp_fn(&ip->i_lock, xfs_rtginode_ilock_cmp_fn,
263 			xfs_rtginode_ilock_print_fn);
264 }
265 #else
266 #define xfs_rtginode_lockdep_setup(ip, rgno, type)	do { } while (0)
267 #endif /* CONFIG_PROVE_LOCKING */
268 
269 struct xfs_rtginode_ops {
270 	const char		*name;	/* short name */
271 
272 	enum xfs_metafile_type	metafile_type;
273 
274 	/* Does the fs have this feature? */
275 	bool			(*enabled)(struct xfs_mount *mp);
276 
277 	/* Create this rtgroup metadata inode and initialize it. */
278 	int			(*create)(struct xfs_rtgroup *rtg,
279 					  struct xfs_inode *ip,
280 					  struct xfs_trans *tp,
281 					  bool init);
282 };
283 
284 static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = {
285 	[XFS_RTGI_BITMAP] = {
286 		.name		= "bitmap",
287 		.metafile_type	= XFS_METAFILE_RTBITMAP,
288 		.create		= xfs_rtbitmap_create,
289 	},
290 	[XFS_RTGI_SUMMARY] = {
291 		.name		= "summary",
292 		.metafile_type	= XFS_METAFILE_RTSUMMARY,
293 		.create		= xfs_rtsummary_create,
294 	},
295 };
296 
297 /* Return the shortname of this rtgroup inode. */
298 const char *
299 xfs_rtginode_name(
300 	enum xfs_rtg_inodes	type)
301 {
302 	return xfs_rtginode_ops[type].name;
303 }
304 
305 /* Return the metafile type of this rtgroup inode. */
306 enum xfs_metafile_type
307 xfs_rtginode_metafile_type(
308 	enum xfs_rtg_inodes	type)
309 {
310 	return xfs_rtginode_ops[type].metafile_type;
311 }
312 
313 /* Should this rtgroup inode be present? */
314 bool
315 xfs_rtginode_enabled(
316 	struct xfs_rtgroup	*rtg,
317 	enum xfs_rtg_inodes	type)
318 {
319 	const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
320 
321 	if (!ops->enabled)
322 		return true;
323 	return ops->enabled(rtg_mount(rtg));
324 }
325 
326 /* Load and existing rtgroup inode into the rtgroup structure. */
327 int
328 xfs_rtginode_load(
329 	struct xfs_rtgroup	*rtg,
330 	enum xfs_rtg_inodes	type,
331 	struct xfs_trans	*tp)
332 {
333 	struct xfs_mount	*mp = tp->t_mountp;
334 	struct xfs_inode	*ip;
335 	const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
336 	int			error;
337 
338 	if (!xfs_rtginode_enabled(rtg, type))
339 		return 0;
340 
341 	if (!xfs_has_rtgroups(mp)) {
342 		xfs_ino_t	ino;
343 
344 		switch (type) {
345 		case XFS_RTGI_BITMAP:
346 			ino = mp->m_sb.sb_rbmino;
347 			break;
348 		case XFS_RTGI_SUMMARY:
349 			ino = mp->m_sb.sb_rsumino;
350 			break;
351 		default:
352 			/* None of the other types exist on !rtgroups */
353 			return 0;
354 		}
355 
356 		error = xfs_trans_metafile_iget(tp, ino, ops->metafile_type,
357 				&ip);
358 	} else {
359 		const char	*path;
360 
361 		if (!mp->m_rtdirip)
362 			return -EFSCORRUPTED;
363 
364 		path = xfs_rtginode_path(rtg_rgno(rtg), type);
365 		if (!path)
366 			return -ENOMEM;
367 		error = xfs_metadir_load(tp, mp->m_rtdirip, path,
368 				ops->metafile_type, &ip);
369 		kfree(path);
370 	}
371 
372 	if (error)
373 		return error;
374 
375 	if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
376 			       ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
377 		xfs_irele(ip);
378 		return -EFSCORRUPTED;
379 	}
380 
381 	if (XFS_IS_CORRUPT(mp, ip->i_projid != rtg_rgno(rtg))) {
382 		xfs_irele(ip);
383 		return -EFSCORRUPTED;
384 	}
385 
386 	xfs_rtginode_lockdep_setup(ip, rtg_rgno(rtg), type);
387 	rtg->rtg_inodes[type] = ip;
388 	return 0;
389 }
390 
391 /* Release an rtgroup metadata inode. */
392 void
393 xfs_rtginode_irele(
394 	struct xfs_inode	**ipp)
395 {
396 	if (*ipp)
397 		xfs_irele(*ipp);
398 	*ipp = NULL;
399 }
400 
401 /* Add a metadata inode for a realtime rmap btree. */
402 int
403 xfs_rtginode_create(
404 	struct xfs_rtgroup		*rtg,
405 	enum xfs_rtg_inodes		type,
406 	bool				init)
407 {
408 	const struct xfs_rtginode_ops	*ops = &xfs_rtginode_ops[type];
409 	struct xfs_mount		*mp = rtg_mount(rtg);
410 	struct xfs_metadir_update	upd = {
411 		.dp			= mp->m_rtdirip,
412 		.metafile_type		= ops->metafile_type,
413 	};
414 	int				error;
415 
416 	if (!xfs_rtginode_enabled(rtg, type))
417 		return 0;
418 
419 	if (!mp->m_rtdirip)
420 		return -EFSCORRUPTED;
421 
422 	upd.path = xfs_rtginode_path(rtg_rgno(rtg), type);
423 	if (!upd.path)
424 		return -ENOMEM;
425 
426 	error = xfs_metadir_start_create(&upd);
427 	if (error)
428 		goto out_path;
429 
430 	error = xfs_metadir_create(&upd, S_IFREG);
431 	if (error)
432 		return error;
433 
434 	xfs_rtginode_lockdep_setup(upd.ip, rtg_rgno(rtg), type);
435 
436 	upd.ip->i_projid = rtg_rgno(rtg);
437 	error = ops->create(rtg, upd.ip, upd.tp, init);
438 	if (error)
439 		goto out_cancel;
440 
441 	error = xfs_metadir_commit(&upd);
442 	if (error)
443 		goto out_path;
444 
445 	kfree(upd.path);
446 	xfs_finish_inode_setup(upd.ip);
447 	rtg->rtg_inodes[type] = upd.ip;
448 	return 0;
449 
450 out_cancel:
451 	xfs_metadir_cancel(&upd, error);
452 	/* Have to finish setting up the inode to ensure it's deleted. */
453 	if (upd.ip) {
454 		xfs_finish_inode_setup(upd.ip);
455 		xfs_irele(upd.ip);
456 	}
457 out_path:
458 	kfree(upd.path);
459 	return error;
460 }
461 
462 /* Create the parent directory for all rtgroup inodes and load it. */
463 int
464 xfs_rtginode_mkdir_parent(
465 	struct xfs_mount	*mp)
466 {
467 	if (!mp->m_metadirip)
468 		return -EFSCORRUPTED;
469 
470 	return xfs_metadir_mkdir(mp->m_metadirip, "rtgroups", &mp->m_rtdirip);
471 }
472 
473 /* Load the parent directory of all rtgroup inodes. */
474 int
475 xfs_rtginode_load_parent(
476 	struct xfs_trans	*tp)
477 {
478 	struct xfs_mount	*mp = tp->t_mountp;
479 
480 	if (!mp->m_metadirip)
481 		return -EFSCORRUPTED;
482 
483 	return xfs_metadir_load(tp, mp->m_metadirip, "rtgroups",
484 			XFS_METAFILE_DIR, &mp->m_rtdirip);
485 }
486 
487 /* Check superblock fields for a read or a write. */
488 static xfs_failaddr_t
489 xfs_rtsb_verify_common(
490 	struct xfs_buf		*bp)
491 {
492 	struct xfs_rtsb		*rsb = bp->b_addr;
493 
494 	if (!xfs_verify_magic(bp, rsb->rsb_magicnum))
495 		return __this_address;
496 	if (rsb->rsb_pad)
497 		return __this_address;
498 
499 	/* Everything to the end of the fs block must be zero */
500 	if (memchr_inv(rsb + 1, 0, BBTOB(bp->b_length) - sizeof(*rsb)))
501 		return __this_address;
502 
503 	return NULL;
504 }
505 
506 /* Check superblock fields for a read or revalidation. */
507 static inline xfs_failaddr_t
508 xfs_rtsb_verify_all(
509 	struct xfs_buf		*bp)
510 {
511 	struct xfs_rtsb		*rsb = bp->b_addr;
512 	struct xfs_mount	*mp = bp->b_mount;
513 	xfs_failaddr_t		fa;
514 
515 	fa = xfs_rtsb_verify_common(bp);
516 	if (fa)
517 		return fa;
518 
519 	if (memcmp(&rsb->rsb_fname, &mp->m_sb.sb_fname, XFSLABEL_MAX))
520 		return __this_address;
521 	if (!uuid_equal(&rsb->rsb_uuid, &mp->m_sb.sb_uuid))
522 		return __this_address;
523 	if (!uuid_equal(&rsb->rsb_meta_uuid, &mp->m_sb.sb_meta_uuid))
524 		return  __this_address;
525 
526 	return NULL;
527 }
528 
529 static void
530 xfs_rtsb_read_verify(
531 	struct xfs_buf		*bp)
532 {
533 	xfs_failaddr_t		fa;
534 
535 	if (!xfs_buf_verify_cksum(bp, XFS_RTSB_CRC_OFF)) {
536 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
537 		return;
538 	}
539 
540 	fa = xfs_rtsb_verify_all(bp);
541 	if (fa)
542 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
543 }
544 
545 static void
546 xfs_rtsb_write_verify(
547 	struct xfs_buf		*bp)
548 {
549 	xfs_failaddr_t		fa;
550 
551 	fa = xfs_rtsb_verify_common(bp);
552 	if (fa) {
553 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
554 		return;
555 	}
556 
557 	xfs_buf_update_cksum(bp, XFS_RTSB_CRC_OFF);
558 }
559 
560 const struct xfs_buf_ops xfs_rtsb_buf_ops = {
561 	.name		= "xfs_rtsb",
562 	.magic		= { 0, cpu_to_be32(XFS_RTSB_MAGIC) },
563 	.verify_read	= xfs_rtsb_read_verify,
564 	.verify_write	= xfs_rtsb_write_verify,
565 	.verify_struct	= xfs_rtsb_verify_all,
566 };
567 
568 /* Update a realtime superblock from the primary fs super */
569 void
570 xfs_update_rtsb(
571 	struct xfs_buf		*rtsb_bp,
572 	const struct xfs_buf	*sb_bp)
573 {
574 	const struct xfs_dsb	*dsb = sb_bp->b_addr;
575 	struct xfs_rtsb		*rsb = rtsb_bp->b_addr;
576 	const uuid_t		*meta_uuid;
577 
578 	rsb->rsb_magicnum = cpu_to_be32(XFS_RTSB_MAGIC);
579 
580 	rsb->rsb_pad = 0;
581 	memcpy(&rsb->rsb_fname, &dsb->sb_fname, XFSLABEL_MAX);
582 
583 	memcpy(&rsb->rsb_uuid, &dsb->sb_uuid, sizeof(rsb->rsb_uuid));
584 
585 	/*
586 	 * The metadata uuid is the fs uuid if the metauuid feature is not
587 	 * enabled.
588 	 */
589 	if (dsb->sb_features_incompat &
590 				cpu_to_be32(XFS_SB_FEAT_INCOMPAT_META_UUID))
591 		meta_uuid = &dsb->sb_meta_uuid;
592 	else
593 		meta_uuid = &dsb->sb_uuid;
594 	memcpy(&rsb->rsb_meta_uuid, meta_uuid, sizeof(rsb->rsb_meta_uuid));
595 }
596 
597 /*
598  * Update the realtime superblock from a filesystem superblock and log it to
599  * the given transaction.
600  */
601 struct xfs_buf *
602 xfs_log_rtsb(
603 	struct xfs_trans	*tp,
604 	const struct xfs_buf	*sb_bp)
605 {
606 	struct xfs_buf		*rtsb_bp;
607 
608 	if (!xfs_has_rtsb(tp->t_mountp))
609 		return NULL;
610 
611 	rtsb_bp = xfs_trans_getrtsb(tp);
612 	if (!rtsb_bp) {
613 		/*
614 		 * It's possible for the rtgroups feature to be enabled but
615 		 * there is no incore rt superblock buffer if the rt geometry
616 		 * was specified at mkfs time but the rt section has not yet
617 		 * been attached.  In this case, rblocks must be zero.
618 		 */
619 		ASSERT(tp->t_mountp->m_sb.sb_rblocks == 0);
620 		return NULL;
621 	}
622 
623 	xfs_update_rtsb(rtsb_bp, sb_bp);
624 	xfs_trans_ordered_buf(tp, rtsb_bp);
625 	return rtsb_bp;
626 }
627