xref: /linux/fs/xfs/xfs_rtalloc.c (revision 02091cbe9cc4f18167208eec1d6de636cc731817)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_mount.h"
14 #include "xfs_inode.h"
15 #include "xfs_bmap.h"
16 #include "xfs_bmap_btree.h"
17 #include "xfs_trans.h"
18 #include "xfs_trans_space.h"
19 #include "xfs_icache.h"
20 #include "xfs_rtalloc.h"
21 #include "xfs_sb.h"
22 
23 /*
24  * Read and return the summary information for a given extent size,
25  * bitmap block combination.
26  * Keeps track of a current summary block, so we don't keep reading
27  * it from the buffer cache.
28  */
29 static int
30 xfs_rtget_summary(
31 	xfs_mount_t	*mp,		/* file system mount structure */
32 	xfs_trans_t	*tp,		/* transaction pointer */
33 	int		log,		/* log2 of extent size */
34 	xfs_rtblock_t	bbno,		/* bitmap block number */
35 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
36 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
37 	xfs_suminfo_t	*sum)		/* out: summary info for this block */
38 {
39 	return xfs_rtmodify_summary_int(mp, tp, log, bbno, 0, rbpp, rsb, sum);
40 }
41 
42 /*
43  * Return whether there are any free extents in the size range given
44  * by low and high, for the bitmap block bbno.
45  */
46 STATIC int				/* error */
47 xfs_rtany_summary(
48 	xfs_mount_t	*mp,		/* file system mount structure */
49 	xfs_trans_t	*tp,		/* transaction pointer */
50 	int		low,		/* low log2 extent size */
51 	int		high,		/* high log2 extent size */
52 	xfs_rtblock_t	bbno,		/* bitmap block number */
53 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
54 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
55 	int		*stat)		/* out: any good extents here? */
56 {
57 	int		error;		/* error value */
58 	int		log;		/* loop counter, log2 of ext. size */
59 	xfs_suminfo_t	sum;		/* summary data */
60 
61 	/* There are no extents at levels < m_rsum_cache[bbno]. */
62 	if (mp->m_rsum_cache && low < mp->m_rsum_cache[bbno])
63 		low = mp->m_rsum_cache[bbno];
64 
65 	/*
66 	 * Loop over logs of extent sizes.
67 	 */
68 	for (log = low; log <= high; log++) {
69 		/*
70 		 * Get one summary datum.
71 		 */
72 		error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum);
73 		if (error) {
74 			return error;
75 		}
76 		/*
77 		 * If there are any, return success.
78 		 */
79 		if (sum) {
80 			*stat = 1;
81 			goto out;
82 		}
83 	}
84 	/*
85 	 * Found nothing, return failure.
86 	 */
87 	*stat = 0;
88 out:
89 	/* There were no extents at levels < log. */
90 	if (mp->m_rsum_cache && log > mp->m_rsum_cache[bbno])
91 		mp->m_rsum_cache[bbno] = log;
92 	return 0;
93 }
94 
95 
96 /*
97  * Copy and transform the summary file, given the old and new
98  * parameters in the mount structures.
99  */
100 STATIC int				/* error */
101 xfs_rtcopy_summary(
102 	xfs_mount_t	*omp,		/* old file system mount point */
103 	xfs_mount_t	*nmp,		/* new file system mount point */
104 	xfs_trans_t	*tp)		/* transaction pointer */
105 {
106 	xfs_rtblock_t	bbno;		/* bitmap block number */
107 	struct xfs_buf	*bp;		/* summary buffer */
108 	int		error;		/* error return value */
109 	int		log;		/* summary level number (log length) */
110 	xfs_suminfo_t	sum;		/* summary data */
111 	xfs_fsblock_t	sumbno;		/* summary block number */
112 
113 	bp = NULL;
114 	for (log = omp->m_rsumlevels - 1; log >= 0; log--) {
115 		for (bbno = omp->m_sb.sb_rbmblocks - 1;
116 		     (xfs_srtblock_t)bbno >= 0;
117 		     bbno--) {
118 			error = xfs_rtget_summary(omp, tp, log, bbno, &bp,
119 				&sumbno, &sum);
120 			if (error)
121 				return error;
122 			if (sum == 0)
123 				continue;
124 			error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum,
125 				&bp, &sumbno);
126 			if (error)
127 				return error;
128 			error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum,
129 				&bp, &sumbno);
130 			if (error)
131 				return error;
132 			ASSERT(sum > 0);
133 		}
134 	}
135 	return 0;
136 }
137 /*
138  * Mark an extent specified by start and len allocated.
139  * Updates all the summary information as well as the bitmap.
140  */
141 STATIC int				/* error */
142 xfs_rtallocate_range(
143 	xfs_mount_t	*mp,		/* file system mount point */
144 	xfs_trans_t	*tp,		/* transaction pointer */
145 	xfs_rtblock_t	start,		/* start block to allocate */
146 	xfs_extlen_t	len,		/* length to allocate */
147 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
148 	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
149 {
150 	xfs_rtblock_t	end;		/* end of the allocated extent */
151 	int		error;		/* error value */
152 	xfs_rtblock_t	postblock = 0;	/* first block allocated > end */
153 	xfs_rtblock_t	preblock = 0;	/* first block allocated < start */
154 
155 	end = start + len - 1;
156 	/*
157 	 * Assume we're allocating out of the middle of a free extent.
158 	 * We need to find the beginning and end of the extent so we can
159 	 * properly update the summary.
160 	 */
161 	error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
162 	if (error) {
163 		return error;
164 	}
165 	/*
166 	 * Find the next allocated block (end of free extent).
167 	 */
168 	error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
169 		&postblock);
170 	if (error) {
171 		return error;
172 	}
173 	/*
174 	 * Decrement the summary information corresponding to the entire
175 	 * (old) free extent.
176 	 */
177 	error = xfs_rtmodify_summary(mp, tp,
178 		XFS_RTBLOCKLOG(postblock + 1 - preblock),
179 		XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
180 	if (error) {
181 		return error;
182 	}
183 	/*
184 	 * If there are blocks not being allocated at the front of the
185 	 * old extent, add summary data for them to be free.
186 	 */
187 	if (preblock < start) {
188 		error = xfs_rtmodify_summary(mp, tp,
189 			XFS_RTBLOCKLOG(start - preblock),
190 			XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
191 		if (error) {
192 			return error;
193 		}
194 	}
195 	/*
196 	 * If there are blocks not being allocated at the end of the
197 	 * old extent, add summary data for them to be free.
198 	 */
199 	if (postblock > end) {
200 		error = xfs_rtmodify_summary(mp, tp,
201 			XFS_RTBLOCKLOG(postblock - end),
202 			XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb);
203 		if (error) {
204 			return error;
205 		}
206 	}
207 	/*
208 	 * Modify the bitmap to mark this extent allocated.
209 	 */
210 	error = xfs_rtmodify_range(mp, tp, start, len, 0);
211 	return error;
212 }
213 
214 /*
215  * Attempt to allocate an extent minlen<=len<=maxlen starting from
216  * bitmap block bbno.  If we don't get maxlen then use prod to trim
217  * the length, if given.  Returns error; returns starting block in *rtblock.
218  * The lengths are all in rtextents.
219  */
220 STATIC int				/* error */
221 xfs_rtallocate_extent_block(
222 	xfs_mount_t	*mp,		/* file system mount point */
223 	xfs_trans_t	*tp,		/* transaction pointer */
224 	xfs_rtblock_t	bbno,		/* bitmap block number */
225 	xfs_extlen_t	minlen,		/* minimum length to allocate */
226 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
227 	xfs_extlen_t	*len,		/* out: actual length allocated */
228 	xfs_rtblock_t	*nextp,		/* out: next block to try */
229 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
230 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
231 	xfs_extlen_t	prod,		/* extent product factor */
232 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
233 {
234 	xfs_rtblock_t	besti;		/* best rtblock found so far */
235 	xfs_rtblock_t	bestlen;	/* best length found so far */
236 	xfs_rtblock_t	end;		/* last rtblock in chunk */
237 	int		error;		/* error value */
238 	xfs_rtblock_t	i;		/* current rtblock trying */
239 	xfs_rtblock_t	next;		/* next rtblock to try */
240 	int		stat;		/* status from internal calls */
241 
242 	/*
243 	 * Loop over all the extents starting in this bitmap block,
244 	 * looking for one that's long enough.
245 	 */
246 	for (i = XFS_BLOCKTOBIT(mp, bbno), besti = -1, bestlen = 0,
247 		end = XFS_BLOCKTOBIT(mp, bbno + 1) - 1;
248 	     i <= end;
249 	     i++) {
250 		/* Make sure we don't scan off the end of the rt volume. */
251 		maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i;
252 
253 		/*
254 		 * See if there's a free extent of maxlen starting at i.
255 		 * If it's not so then next will contain the first non-free.
256 		 */
257 		error = xfs_rtcheck_range(mp, tp, i, maxlen, 1, &next, &stat);
258 		if (error) {
259 			return error;
260 		}
261 		if (stat) {
262 			/*
263 			 * i for maxlen is all free, allocate and return that.
264 			 */
265 			error = xfs_rtallocate_range(mp, tp, i, maxlen, rbpp,
266 				rsb);
267 			if (error) {
268 				return error;
269 			}
270 			*len = maxlen;
271 			*rtblock = i;
272 			return 0;
273 		}
274 		/*
275 		 * In the case where we have a variable-sized allocation
276 		 * request, figure out how big this free piece is,
277 		 * and if it's big enough for the minimum, and the best
278 		 * so far, remember it.
279 		 */
280 		if (minlen < maxlen) {
281 			xfs_rtblock_t	thislen;	/* this extent size */
282 
283 			thislen = next - i;
284 			if (thislen >= minlen && thislen > bestlen) {
285 				besti = i;
286 				bestlen = thislen;
287 			}
288 		}
289 		/*
290 		 * If not done yet, find the start of the next free space.
291 		 */
292 		if (next < end) {
293 			error = xfs_rtfind_forw(mp, tp, next, end, &i);
294 			if (error) {
295 				return error;
296 			}
297 		} else
298 			break;
299 	}
300 	/*
301 	 * Searched the whole thing & didn't find a maxlen free extent.
302 	 */
303 	if (minlen < maxlen && besti != -1) {
304 		xfs_extlen_t	p;	/* amount to trim length by */
305 
306 		/*
307 		 * If size should be a multiple of prod, make that so.
308 		 */
309 		if (prod > 1) {
310 			div_u64_rem(bestlen, prod, &p);
311 			if (p)
312 				bestlen -= p;
313 		}
314 
315 		/*
316 		 * Allocate besti for bestlen & return that.
317 		 */
318 		error = xfs_rtallocate_range(mp, tp, besti, bestlen, rbpp, rsb);
319 		if (error) {
320 			return error;
321 		}
322 		*len = bestlen;
323 		*rtblock = besti;
324 		return 0;
325 	}
326 	/*
327 	 * Allocation failed.  Set *nextp to the next block to try.
328 	 */
329 	*nextp = next;
330 	*rtblock = NULLRTBLOCK;
331 	return 0;
332 }
333 
334 /*
335  * Allocate an extent of length minlen<=len<=maxlen, starting at block
336  * bno.  If we don't get maxlen then use prod to trim the length, if given.
337  * Returns error; returns starting block in *rtblock.
338  * The lengths are all in rtextents.
339  */
340 STATIC int				/* error */
341 xfs_rtallocate_extent_exact(
342 	xfs_mount_t	*mp,		/* file system mount point */
343 	xfs_trans_t	*tp,		/* transaction pointer */
344 	xfs_rtblock_t	bno,		/* starting block number to allocate */
345 	xfs_extlen_t	minlen,		/* minimum length to allocate */
346 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
347 	xfs_extlen_t	*len,		/* out: actual length allocated */
348 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
349 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
350 	xfs_extlen_t	prod,		/* extent product factor */
351 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
352 {
353 	int		error;		/* error value */
354 	xfs_extlen_t	i;		/* extent length trimmed due to prod */
355 	int		isfree;		/* extent is free */
356 	xfs_rtblock_t	next;		/* next block to try (dummy) */
357 
358 	ASSERT(minlen % prod == 0 && maxlen % prod == 0);
359 	/*
360 	 * Check if the range in question (for maxlen) is free.
361 	 */
362 	error = xfs_rtcheck_range(mp, tp, bno, maxlen, 1, &next, &isfree);
363 	if (error) {
364 		return error;
365 	}
366 	if (isfree) {
367 		/*
368 		 * If it is, allocate it and return success.
369 		 */
370 		error = xfs_rtallocate_range(mp, tp, bno, maxlen, rbpp, rsb);
371 		if (error) {
372 			return error;
373 		}
374 		*len = maxlen;
375 		*rtblock = bno;
376 		return 0;
377 	}
378 	/*
379 	 * If not, allocate what there is, if it's at least minlen.
380 	 */
381 	maxlen = next - bno;
382 	if (maxlen < minlen) {
383 		/*
384 		 * Failed, return failure status.
385 		 */
386 		*rtblock = NULLRTBLOCK;
387 		return 0;
388 	}
389 	/*
390 	 * Trim off tail of extent, if prod is specified.
391 	 */
392 	if (prod > 1 && (i = maxlen % prod)) {
393 		maxlen -= i;
394 		if (maxlen < minlen) {
395 			/*
396 			 * Now we can't do it, return failure status.
397 			 */
398 			*rtblock = NULLRTBLOCK;
399 			return 0;
400 		}
401 	}
402 	/*
403 	 * Allocate what we can and return it.
404 	 */
405 	error = xfs_rtallocate_range(mp, tp, bno, maxlen, rbpp, rsb);
406 	if (error) {
407 		return error;
408 	}
409 	*len = maxlen;
410 	*rtblock = bno;
411 	return 0;
412 }
413 
414 /*
415  * Allocate an extent of length minlen<=len<=maxlen, starting as near
416  * to bno as possible.  If we don't get maxlen then use prod to trim
417  * the length, if given.  The lengths are all in rtextents.
418  */
419 STATIC int				/* error */
420 xfs_rtallocate_extent_near(
421 	xfs_mount_t	*mp,		/* file system mount point */
422 	xfs_trans_t	*tp,		/* transaction pointer */
423 	xfs_rtblock_t	bno,		/* starting block number to allocate */
424 	xfs_extlen_t	minlen,		/* minimum length to allocate */
425 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
426 	xfs_extlen_t	*len,		/* out: actual length allocated */
427 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
428 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
429 	xfs_extlen_t	prod,		/* extent product factor */
430 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
431 {
432 	int		any;		/* any useful extents from summary */
433 	xfs_rtblock_t	bbno;		/* bitmap block number */
434 	int		error;		/* error value */
435 	int		i;		/* bitmap block offset (loop control) */
436 	int		j;		/* secondary loop control */
437 	int		log2len;	/* log2 of minlen */
438 	xfs_rtblock_t	n;		/* next block to try */
439 	xfs_rtblock_t	r;		/* result block */
440 
441 	ASSERT(minlen % prod == 0 && maxlen % prod == 0);
442 	/*
443 	 * If the block number given is off the end, silently set it to
444 	 * the last block.
445 	 */
446 	if (bno >= mp->m_sb.sb_rextents)
447 		bno = mp->m_sb.sb_rextents - 1;
448 
449 	/* Make sure we don't run off the end of the rt volume. */
450 	maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno;
451 	if (maxlen < minlen) {
452 		*rtblock = NULLRTBLOCK;
453 		return 0;
454 	}
455 
456 	/*
457 	 * Try the exact allocation first.
458 	 */
459 	error = xfs_rtallocate_extent_exact(mp, tp, bno, minlen, maxlen, len,
460 		rbpp, rsb, prod, &r);
461 	if (error) {
462 		return error;
463 	}
464 	/*
465 	 * If the exact allocation worked, return that.
466 	 */
467 	if (r != NULLRTBLOCK) {
468 		*rtblock = r;
469 		return 0;
470 	}
471 	bbno = XFS_BITTOBLOCK(mp, bno);
472 	i = 0;
473 	ASSERT(minlen != 0);
474 	log2len = xfs_highbit32(minlen);
475 	/*
476 	 * Loop over all bitmap blocks (bbno + i is current block).
477 	 */
478 	for (;;) {
479 		/*
480 		 * Get summary information of extents of all useful levels
481 		 * starting in this bitmap block.
482 		 */
483 		error = xfs_rtany_summary(mp, tp, log2len, mp->m_rsumlevels - 1,
484 			bbno + i, rbpp, rsb, &any);
485 		if (error) {
486 			return error;
487 		}
488 		/*
489 		 * If there are any useful extents starting here, try
490 		 * allocating one.
491 		 */
492 		if (any) {
493 			/*
494 			 * On the positive side of the starting location.
495 			 */
496 			if (i >= 0) {
497 				/*
498 				 * Try to allocate an extent starting in
499 				 * this block.
500 				 */
501 				error = xfs_rtallocate_extent_block(mp, tp,
502 					bbno + i, minlen, maxlen, len, &n, rbpp,
503 					rsb, prod, &r);
504 				if (error) {
505 					return error;
506 				}
507 				/*
508 				 * If it worked, return it.
509 				 */
510 				if (r != NULLRTBLOCK) {
511 					*rtblock = r;
512 					return 0;
513 				}
514 			}
515 			/*
516 			 * On the negative side of the starting location.
517 			 */
518 			else {		/* i < 0 */
519 				/*
520 				 * Loop backwards through the bitmap blocks from
521 				 * the starting point-1 up to where we are now.
522 				 * There should be an extent which ends in this
523 				 * bitmap block and is long enough.
524 				 */
525 				for (j = -1; j > i; j--) {
526 					/*
527 					 * Grab the summary information for
528 					 * this bitmap block.
529 					 */
530 					error = xfs_rtany_summary(mp, tp,
531 						log2len, mp->m_rsumlevels - 1,
532 						bbno + j, rbpp, rsb, &any);
533 					if (error) {
534 						return error;
535 					}
536 					/*
537 					 * If there's no extent given in the
538 					 * summary that means the extent we
539 					 * found must carry over from an
540 					 * earlier block.  If there is an
541 					 * extent given, we've already tried
542 					 * that allocation, don't do it again.
543 					 */
544 					if (any)
545 						continue;
546 					error = xfs_rtallocate_extent_block(mp,
547 						tp, bbno + j, minlen, maxlen,
548 						len, &n, rbpp, rsb, prod, &r);
549 					if (error) {
550 						return error;
551 					}
552 					/*
553 					 * If it works, return the extent.
554 					 */
555 					if (r != NULLRTBLOCK) {
556 						*rtblock = r;
557 						return 0;
558 					}
559 				}
560 				/*
561 				 * There weren't intervening bitmap blocks
562 				 * with a long enough extent, or the
563 				 * allocation didn't work for some reason
564 				 * (i.e. it's a little * too short).
565 				 * Try to allocate from the summary block
566 				 * that we found.
567 				 */
568 				error = xfs_rtallocate_extent_block(mp, tp,
569 					bbno + i, minlen, maxlen, len, &n, rbpp,
570 					rsb, prod, &r);
571 				if (error) {
572 					return error;
573 				}
574 				/*
575 				 * If it works, return the extent.
576 				 */
577 				if (r != NULLRTBLOCK) {
578 					*rtblock = r;
579 					return 0;
580 				}
581 			}
582 		}
583 		/*
584 		 * Loop control.  If we were on the positive side, and there's
585 		 * still more blocks on the negative side, go there.
586 		 */
587 		if (i > 0 && (int)bbno - i >= 0)
588 			i = -i;
589 		/*
590 		 * If positive, and no more negative, but there are more
591 		 * positive, go there.
592 		 */
593 		else if (i > 0 && (int)bbno + i < mp->m_sb.sb_rbmblocks - 1)
594 			i++;
595 		/*
596 		 * If negative or 0 (just started), and there are positive
597 		 * blocks to go, go there.  The 0 case moves to block 1.
598 		 */
599 		else if (i <= 0 && (int)bbno - i < mp->m_sb.sb_rbmblocks - 1)
600 			i = 1 - i;
601 		/*
602 		 * If negative or 0 and there are more negative blocks,
603 		 * go there.
604 		 */
605 		else if (i <= 0 && (int)bbno + i > 0)
606 			i--;
607 		/*
608 		 * Must be done.  Return failure.
609 		 */
610 		else
611 			break;
612 	}
613 	*rtblock = NULLRTBLOCK;
614 	return 0;
615 }
616 
617 /*
618  * Allocate an extent of length minlen<=len<=maxlen, with no position
619  * specified.  If we don't get maxlen then use prod to trim
620  * the length, if given.  The lengths are all in rtextents.
621  */
622 STATIC int				/* error */
623 xfs_rtallocate_extent_size(
624 	xfs_mount_t	*mp,		/* file system mount point */
625 	xfs_trans_t	*tp,		/* transaction pointer */
626 	xfs_extlen_t	minlen,		/* minimum length to allocate */
627 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
628 	xfs_extlen_t	*len,		/* out: actual length allocated */
629 	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
630 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
631 	xfs_extlen_t	prod,		/* extent product factor */
632 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
633 {
634 	int		error;		/* error value */
635 	int		i;		/* bitmap block number */
636 	int		l;		/* level number (loop control) */
637 	xfs_rtblock_t	n;		/* next block to be tried */
638 	xfs_rtblock_t	r;		/* result block number */
639 	xfs_suminfo_t	sum;		/* summary information for extents */
640 
641 	ASSERT(minlen % prod == 0 && maxlen % prod == 0);
642 	ASSERT(maxlen != 0);
643 
644 	/*
645 	 * Loop over all the levels starting with maxlen.
646 	 * At each level, look at all the bitmap blocks, to see if there
647 	 * are extents starting there that are long enough (>= maxlen).
648 	 * Note, only on the initial level can the allocation fail if
649 	 * the summary says there's an extent.
650 	 */
651 	for (l = xfs_highbit32(maxlen); l < mp->m_rsumlevels; l++) {
652 		/*
653 		 * Loop over all the bitmap blocks.
654 		 */
655 		for (i = 0; i < mp->m_sb.sb_rbmblocks; i++) {
656 			/*
657 			 * Get the summary for this level/block.
658 			 */
659 			error = xfs_rtget_summary(mp, tp, l, i, rbpp, rsb,
660 				&sum);
661 			if (error) {
662 				return error;
663 			}
664 			/*
665 			 * Nothing there, on to the next block.
666 			 */
667 			if (!sum)
668 				continue;
669 			/*
670 			 * Try allocating the extent.
671 			 */
672 			error = xfs_rtallocate_extent_block(mp, tp, i, maxlen,
673 				maxlen, len, &n, rbpp, rsb, prod, &r);
674 			if (error) {
675 				return error;
676 			}
677 			/*
678 			 * If it worked, return that.
679 			 */
680 			if (r != NULLRTBLOCK) {
681 				*rtblock = r;
682 				return 0;
683 			}
684 			/*
685 			 * If the "next block to try" returned from the
686 			 * allocator is beyond the next bitmap block,
687 			 * skip to that bitmap block.
688 			 */
689 			if (XFS_BITTOBLOCK(mp, n) > i + 1)
690 				i = XFS_BITTOBLOCK(mp, n) - 1;
691 		}
692 	}
693 	/*
694 	 * Didn't find any maxlen blocks.  Try smaller ones, unless
695 	 * we're asking for a fixed size extent.
696 	 */
697 	if (minlen > --maxlen) {
698 		*rtblock = NULLRTBLOCK;
699 		return 0;
700 	}
701 	ASSERT(minlen != 0);
702 	ASSERT(maxlen != 0);
703 
704 	/*
705 	 * Loop over sizes, from maxlen down to minlen.
706 	 * This time, when we do the allocations, allow smaller ones
707 	 * to succeed.
708 	 */
709 	for (l = xfs_highbit32(maxlen); l >= xfs_highbit32(minlen); l--) {
710 		/*
711 		 * Loop over all the bitmap blocks, try an allocation
712 		 * starting in that block.
713 		 */
714 		for (i = 0; i < mp->m_sb.sb_rbmblocks; i++) {
715 			/*
716 			 * Get the summary information for this level/block.
717 			 */
718 			error =	xfs_rtget_summary(mp, tp, l, i, rbpp, rsb,
719 						  &sum);
720 			if (error) {
721 				return error;
722 			}
723 			/*
724 			 * If nothing there, go on to next.
725 			 */
726 			if (!sum)
727 				continue;
728 			/*
729 			 * Try the allocation.  Make sure the specified
730 			 * minlen/maxlen are in the possible range for
731 			 * this summary level.
732 			 */
733 			error = xfs_rtallocate_extent_block(mp, tp, i,
734 					XFS_RTMAX(minlen, 1 << l),
735 					XFS_RTMIN(maxlen, (1 << (l + 1)) - 1),
736 					len, &n, rbpp, rsb, prod, &r);
737 			if (error) {
738 				return error;
739 			}
740 			/*
741 			 * If it worked, return that extent.
742 			 */
743 			if (r != NULLRTBLOCK) {
744 				*rtblock = r;
745 				return 0;
746 			}
747 			/*
748 			 * If the "next block to try" returned from the
749 			 * allocator is beyond the next bitmap block,
750 			 * skip to that bitmap block.
751 			 */
752 			if (XFS_BITTOBLOCK(mp, n) > i + 1)
753 				i = XFS_BITTOBLOCK(mp, n) - 1;
754 		}
755 	}
756 	/*
757 	 * Got nothing, return failure.
758 	 */
759 	*rtblock = NULLRTBLOCK;
760 	return 0;
761 }
762 
763 /*
764  * Allocate space to the bitmap or summary file, and zero it, for growfs.
765  */
766 STATIC int
767 xfs_growfs_rt_alloc(
768 	struct xfs_mount	*mp,		/* file system mount point */
769 	xfs_extlen_t		oblocks,	/* old count of blocks */
770 	xfs_extlen_t		nblocks,	/* new count of blocks */
771 	struct xfs_inode	*ip)		/* inode (bitmap/summary) */
772 {
773 	xfs_fileoff_t		bno;		/* block number in file */
774 	struct xfs_buf		*bp;	/* temporary buffer for zeroing */
775 	xfs_daddr_t		d;		/* disk block address */
776 	int			error;		/* error return value */
777 	xfs_fsblock_t		fsbno;		/* filesystem block for bno */
778 	struct xfs_bmbt_irec	map;		/* block map output */
779 	int			nmap;		/* number of block maps */
780 	int			resblks;	/* space reservation */
781 	enum xfs_blft		buf_type;
782 	struct xfs_trans	*tp;
783 
784 	if (ip == mp->m_rsumip)
785 		buf_type = XFS_BLFT_RTSUMMARY_BUF;
786 	else
787 		buf_type = XFS_BLFT_RTBITMAP_BUF;
788 
789 	/*
790 	 * Allocate space to the file, as necessary.
791 	 */
792 	while (oblocks < nblocks) {
793 		resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);
794 		/*
795 		 * Reserve space & log for one extent added to the file.
796 		 */
797 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtalloc, resblks,
798 				0, 0, &tp);
799 		if (error)
800 			return error;
801 		/*
802 		 * Lock the inode.
803 		 */
804 		xfs_ilock(ip, XFS_ILOCK_EXCL);
805 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
806 
807 		error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
808 				XFS_IEXT_ADD_NOSPLIT_CNT);
809 		if (error == -EFBIG)
810 			error = xfs_iext_count_upgrade(tp, ip,
811 					XFS_IEXT_ADD_NOSPLIT_CNT);
812 		if (error)
813 			goto out_trans_cancel;
814 
815 		/*
816 		 * Allocate blocks to the bitmap file.
817 		 */
818 		nmap = 1;
819 		error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
820 					XFS_BMAPI_METADATA, 0, &map, &nmap);
821 		if (!error && nmap < 1)
822 			error = -ENOSPC;
823 		if (error)
824 			goto out_trans_cancel;
825 		/*
826 		 * Free any blocks freed up in the transaction, then commit.
827 		 */
828 		error = xfs_trans_commit(tp);
829 		if (error)
830 			return error;
831 		/*
832 		 * Now we need to clear the allocated blocks.
833 		 * Do this one block per transaction, to keep it simple.
834 		 */
835 		for (bno = map.br_startoff, fsbno = map.br_startblock;
836 		     bno < map.br_startoff + map.br_blockcount;
837 		     bno++, fsbno++) {
838 			/*
839 			 * Reserve log for one block zeroing.
840 			 */
841 			error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero,
842 					0, 0, 0, &tp);
843 			if (error)
844 				return error;
845 			/*
846 			 * Lock the bitmap inode.
847 			 */
848 			xfs_ilock(ip, XFS_ILOCK_EXCL);
849 			xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
850 			/*
851 			 * Get a buffer for the block.
852 			 */
853 			d = XFS_FSB_TO_DADDR(mp, fsbno);
854 			error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
855 					mp->m_bsize, 0, &bp);
856 			if (error)
857 				goto out_trans_cancel;
858 
859 			xfs_trans_buf_set_type(tp, bp, buf_type);
860 			bp->b_ops = &xfs_rtbuf_ops;
861 			memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
862 			xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
863 			/*
864 			 * Commit the transaction.
865 			 */
866 			error = xfs_trans_commit(tp);
867 			if (error)
868 				return error;
869 		}
870 		/*
871 		 * Go on to the next extent, if any.
872 		 */
873 		oblocks = map.br_startoff + map.br_blockcount;
874 	}
875 
876 	return 0;
877 
878 out_trans_cancel:
879 	xfs_trans_cancel(tp);
880 	return error;
881 }
882 
883 static void
884 xfs_alloc_rsum_cache(
885 	xfs_mount_t	*mp,		/* file system mount structure */
886 	xfs_extlen_t	rbmblocks)	/* number of rt bitmap blocks */
887 {
888 	/*
889 	 * The rsum cache is initialized to all zeroes, which is trivially a
890 	 * lower bound on the minimum level with any free extents. We can
891 	 * continue without the cache if it couldn't be allocated.
892 	 */
893 	mp->m_rsum_cache = kvzalloc(rbmblocks, GFP_KERNEL);
894 	if (!mp->m_rsum_cache)
895 		xfs_warn(mp, "could not allocate realtime summary cache");
896 }
897 
898 /*
899  * Visible (exported) functions.
900  */
901 
902 /*
903  * Grow the realtime area of the filesystem.
904  */
905 int
906 xfs_growfs_rt(
907 	xfs_mount_t	*mp,		/* mount point for filesystem */
908 	xfs_growfs_rt_t	*in)		/* growfs rt input struct */
909 {
910 	xfs_rtblock_t	bmbno;		/* bitmap block number */
911 	struct xfs_buf	*bp;		/* temporary buffer */
912 	int		error;		/* error return value */
913 	xfs_mount_t	*nmp;		/* new (fake) mount structure */
914 	xfs_rfsblock_t	nrblocks;	/* new number of realtime blocks */
915 	xfs_extlen_t	nrbmblocks;	/* new number of rt bitmap blocks */
916 	xfs_rtblock_t	nrextents;	/* new number of realtime extents */
917 	uint8_t		nrextslog;	/* new log2 of sb_rextents */
918 	xfs_extlen_t	nrsumblocks;	/* new number of summary blocks */
919 	uint		nrsumlevels;	/* new rt summary levels */
920 	uint		nrsumsize;	/* new size of rt summary, bytes */
921 	xfs_sb_t	*nsbp;		/* new superblock */
922 	xfs_extlen_t	rbmblocks;	/* current number of rt bitmap blocks */
923 	xfs_extlen_t	rsumblocks;	/* current number of rt summary blks */
924 	xfs_sb_t	*sbp;		/* old superblock */
925 	xfs_fsblock_t	sumbno;		/* summary block number */
926 	uint8_t		*rsum_cache;	/* old summary cache */
927 
928 	sbp = &mp->m_sb;
929 
930 	if (!capable(CAP_SYS_ADMIN))
931 		return -EPERM;
932 
933 	/* Needs to have been mounted with an rt device. */
934 	if (!XFS_IS_REALTIME_MOUNT(mp))
935 		return -EINVAL;
936 	/*
937 	 * Mount should fail if the rt bitmap/summary files don't load, but
938 	 * we'll check anyway.
939 	 */
940 	if (!mp->m_rbmip || !mp->m_rsumip)
941 		return -EINVAL;
942 
943 	/* Shrink not supported. */
944 	if (in->newblocks <= sbp->sb_rblocks)
945 		return -EINVAL;
946 
947 	/* Can only change rt extent size when adding rt volume. */
948 	if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize)
949 		return -EINVAL;
950 
951 	/* Range check the extent size. */
952 	if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE ||
953 	    XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE)
954 		return -EINVAL;
955 
956 	/* Unsupported realtime features. */
957 	if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp))
958 		return -EOPNOTSUPP;
959 
960 	nrblocks = in->newblocks;
961 	error = xfs_sb_validate_fsb_count(sbp, nrblocks);
962 	if (error)
963 		return error;
964 	/*
965 	 * Read in the last block of the device, make sure it exists.
966 	 */
967 	error = xfs_buf_read_uncached(mp->m_rtdev_targp,
968 				XFS_FSB_TO_BB(mp, nrblocks - 1),
969 				XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
970 	if (error)
971 		return error;
972 	xfs_buf_relse(bp);
973 
974 	/*
975 	 * Calculate new parameters.  These are the final values to be reached.
976 	 */
977 	nrextents = nrblocks;
978 	do_div(nrextents, in->extsize);
979 	nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize);
980 	nrextslog = xfs_highbit32(nrextents);
981 	nrsumlevels = nrextslog + 1;
982 	nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks;
983 	nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
984 	nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks);
985 	/*
986 	 * New summary size can't be more than half the size of
987 	 * the log.  This prevents us from getting a log overflow,
988 	 * since we'll log basically the whole summary file at once.
989 	 */
990 	if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1))
991 		return -EINVAL;
992 	/*
993 	 * Get the old block counts for bitmap and summary inodes.
994 	 * These can't change since other growfs callers are locked out.
995 	 */
996 	rbmblocks = XFS_B_TO_FSB(mp, mp->m_rbmip->i_disk_size);
997 	rsumblocks = XFS_B_TO_FSB(mp, mp->m_rsumip->i_disk_size);
998 	/*
999 	 * Allocate space to the bitmap and summary files, as necessary.
1000 	 */
1001 	error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip);
1002 	if (error)
1003 		return error;
1004 	error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip);
1005 	if (error)
1006 		return error;
1007 
1008 	rsum_cache = mp->m_rsum_cache;
1009 	if (nrbmblocks != sbp->sb_rbmblocks)
1010 		xfs_alloc_rsum_cache(mp, nrbmblocks);
1011 
1012 	/*
1013 	 * Allocate a new (fake) mount/sb.
1014 	 */
1015 	nmp = kmem_alloc(sizeof(*nmp), 0);
1016 	/*
1017 	 * Loop over the bitmap blocks.
1018 	 * We will do everything one bitmap block at a time.
1019 	 * Skip the current block if it is exactly full.
1020 	 * This also deals with the case where there were no rtextents before.
1021 	 */
1022 	for (bmbno = sbp->sb_rbmblocks -
1023 		     ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
1024 	     bmbno < nrbmblocks;
1025 	     bmbno++) {
1026 		struct xfs_trans	*tp;
1027 		xfs_rfsblock_t		nrblocks_step;
1028 
1029 		*nmp = *mp;
1030 		nsbp = &nmp->m_sb;
1031 		/*
1032 		 * Calculate new sb and mount fields for this round.
1033 		 */
1034 		nsbp->sb_rextsize = in->extsize;
1035 		nsbp->sb_rbmblocks = bmbno + 1;
1036 		nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize *
1037 				nsbp->sb_rextsize;
1038 		nsbp->sb_rblocks = min(nrblocks, nrblocks_step);
1039 		nsbp->sb_rextents = nsbp->sb_rblocks;
1040 		do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
1041 		ASSERT(nsbp->sb_rextents != 0);
1042 		nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
1043 		nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
1044 		nrsumsize =
1045 			(uint)sizeof(xfs_suminfo_t) * nrsumlevels *
1046 			nsbp->sb_rbmblocks;
1047 		nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
1048 		nmp->m_rsumsize = nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks);
1049 		/*
1050 		 * Start a transaction, get the log reservation.
1051 		 */
1052 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtfree, 0, 0, 0,
1053 				&tp);
1054 		if (error)
1055 			break;
1056 		/*
1057 		 * Lock out other callers by grabbing the bitmap inode lock.
1058 		 */
1059 		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
1060 		xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
1061 		/*
1062 		 * Update the bitmap inode's size ondisk and incore.  We need
1063 		 * to update the incore size so that inode inactivation won't
1064 		 * punch what it thinks are "posteof" blocks.
1065 		 */
1066 		mp->m_rbmip->i_disk_size =
1067 			nsbp->sb_rbmblocks * nsbp->sb_blocksize;
1068 		i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_disk_size);
1069 		xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
1070 		/*
1071 		 * Get the summary inode into the transaction.
1072 		 */
1073 		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
1074 		xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
1075 		/*
1076 		 * Update the summary inode's size.  We need to update the
1077 		 * incore size so that inode inactivation won't punch what it
1078 		 * thinks are "posteof" blocks.
1079 		 */
1080 		mp->m_rsumip->i_disk_size = nmp->m_rsumsize;
1081 		i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_disk_size);
1082 		xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE);
1083 		/*
1084 		 * Copy summary data from old to new sizes.
1085 		 * Do this when the real size (not block-aligned) changes.
1086 		 */
1087 		if (sbp->sb_rbmblocks != nsbp->sb_rbmblocks ||
1088 		    mp->m_rsumlevels != nmp->m_rsumlevels) {
1089 			error = xfs_rtcopy_summary(mp, nmp, tp);
1090 			if (error)
1091 				goto error_cancel;
1092 		}
1093 		/*
1094 		 * Update superblock fields.
1095 		 */
1096 		if (nsbp->sb_rextsize != sbp->sb_rextsize)
1097 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSIZE,
1098 				nsbp->sb_rextsize - sbp->sb_rextsize);
1099 		if (nsbp->sb_rbmblocks != sbp->sb_rbmblocks)
1100 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS,
1101 				nsbp->sb_rbmblocks - sbp->sb_rbmblocks);
1102 		if (nsbp->sb_rblocks != sbp->sb_rblocks)
1103 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBLOCKS,
1104 				nsbp->sb_rblocks - sbp->sb_rblocks);
1105 		if (nsbp->sb_rextents != sbp->sb_rextents)
1106 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTENTS,
1107 				nsbp->sb_rextents - sbp->sb_rextents);
1108 		if (nsbp->sb_rextslog != sbp->sb_rextslog)
1109 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG,
1110 				nsbp->sb_rextslog - sbp->sb_rextslog);
1111 		/*
1112 		 * Free new extent.
1113 		 */
1114 		bp = NULL;
1115 		error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents,
1116 			nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
1117 		if (error) {
1118 error_cancel:
1119 			xfs_trans_cancel(tp);
1120 			break;
1121 		}
1122 		/*
1123 		 * Mark more blocks free in the superblock.
1124 		 */
1125 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS,
1126 			nsbp->sb_rextents - sbp->sb_rextents);
1127 		/*
1128 		 * Update mp values into the real mp structure.
1129 		 */
1130 		mp->m_rsumlevels = nrsumlevels;
1131 		mp->m_rsumsize = nrsumsize;
1132 
1133 		error = xfs_trans_commit(tp);
1134 		if (error)
1135 			break;
1136 
1137 		/* Ensure the mount RT feature flag is now set. */
1138 		mp->m_features |= XFS_FEAT_REALTIME;
1139 	}
1140 	if (error)
1141 		goto out_free;
1142 
1143 	/* Update secondary superblocks now the physical grow has completed */
1144 	error = xfs_update_secondary_sbs(mp);
1145 
1146 out_free:
1147 	/*
1148 	 * Free the fake mp structure.
1149 	 */
1150 	kmem_free(nmp);
1151 
1152 	/*
1153 	 * If we had to allocate a new rsum_cache, we either need to free the
1154 	 * old one (if we succeeded) or free the new one and restore the old one
1155 	 * (if there was an error).
1156 	 */
1157 	if (rsum_cache != mp->m_rsum_cache) {
1158 		if (error) {
1159 			kmem_free(mp->m_rsum_cache);
1160 			mp->m_rsum_cache = rsum_cache;
1161 		} else {
1162 			kmem_free(rsum_cache);
1163 		}
1164 	}
1165 
1166 	return error;
1167 }
1168 
1169 /*
1170  * Allocate an extent in the realtime subvolume, with the usual allocation
1171  * parameters.  The length units are all in realtime extents, as is the
1172  * result block number.
1173  */
1174 int					/* error */
1175 xfs_rtallocate_extent(
1176 	xfs_trans_t	*tp,		/* transaction pointer */
1177 	xfs_rtblock_t	bno,		/* starting block number to allocate */
1178 	xfs_extlen_t	minlen,		/* minimum length to allocate */
1179 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
1180 	xfs_extlen_t	*len,		/* out: actual length allocated */
1181 	int		wasdel,		/* was a delayed allocation extent */
1182 	xfs_extlen_t	prod,		/* extent product factor */
1183 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
1184 {
1185 	xfs_mount_t	*mp = tp->t_mountp;
1186 	int		error;		/* error value */
1187 	xfs_rtblock_t	r;		/* result allocated block */
1188 	xfs_fsblock_t	sb;		/* summary file block number */
1189 	struct xfs_buf	*sumbp;		/* summary file block buffer */
1190 
1191 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
1192 	ASSERT(minlen > 0 && minlen <= maxlen);
1193 
1194 	/*
1195 	 * If prod is set then figure out what to do to minlen and maxlen.
1196 	 */
1197 	if (prod > 1) {
1198 		xfs_extlen_t	i;
1199 
1200 		if ((i = maxlen % prod))
1201 			maxlen -= i;
1202 		if ((i = minlen % prod))
1203 			minlen += prod - i;
1204 		if (maxlen < minlen) {
1205 			*rtblock = NULLRTBLOCK;
1206 			return 0;
1207 		}
1208 	}
1209 
1210 retry:
1211 	sumbp = NULL;
1212 	if (bno == 0) {
1213 		error = xfs_rtallocate_extent_size(mp, tp, minlen, maxlen, len,
1214 				&sumbp,	&sb, prod, &r);
1215 	} else {
1216 		error = xfs_rtallocate_extent_near(mp, tp, bno, minlen, maxlen,
1217 				len, &sumbp, &sb, prod, &r);
1218 	}
1219 
1220 	if (error)
1221 		return error;
1222 
1223 	/*
1224 	 * If it worked, update the superblock.
1225 	 */
1226 	if (r != NULLRTBLOCK) {
1227 		long	slen = (long)*len;
1228 
1229 		ASSERT(*len >= minlen && *len <= maxlen);
1230 		if (wasdel)
1231 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FREXTENTS, -slen);
1232 		else
1233 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, -slen);
1234 	} else if (prod > 1) {
1235 		prod = 1;
1236 		goto retry;
1237 	}
1238 
1239 	*rtblock = r;
1240 	return 0;
1241 }
1242 
1243 /*
1244  * Initialize realtime fields in the mount structure.
1245  */
1246 int				/* error */
1247 xfs_rtmount_init(
1248 	struct xfs_mount	*mp)	/* file system mount structure */
1249 {
1250 	struct xfs_buf		*bp;	/* buffer for last block of subvolume */
1251 	struct xfs_sb		*sbp;	/* filesystem superblock copy in mount */
1252 	xfs_daddr_t		d;	/* address of last block of subvolume */
1253 	int			error;
1254 
1255 	sbp = &mp->m_sb;
1256 	if (sbp->sb_rblocks == 0)
1257 		return 0;
1258 	if (mp->m_rtdev_targp == NULL) {
1259 		xfs_warn(mp,
1260 	"Filesystem has a realtime volume, use rtdev=device option");
1261 		return -ENODEV;
1262 	}
1263 	mp->m_rsumlevels = sbp->sb_rextslog + 1;
1264 	mp->m_rsumsize =
1265 		(uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels *
1266 		sbp->sb_rbmblocks;
1267 	mp->m_rsumsize = roundup(mp->m_rsumsize, sbp->sb_blocksize);
1268 	mp->m_rbmip = mp->m_rsumip = NULL;
1269 	/*
1270 	 * Check that the realtime section is an ok size.
1271 	 */
1272 	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
1273 	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) {
1274 		xfs_warn(mp, "realtime mount -- %llu != %llu",
1275 			(unsigned long long) XFS_BB_TO_FSB(mp, d),
1276 			(unsigned long long) mp->m_sb.sb_rblocks);
1277 		return -EFBIG;
1278 	}
1279 	error = xfs_buf_read_uncached(mp->m_rtdev_targp,
1280 					d - XFS_FSB_TO_BB(mp, 1),
1281 					XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
1282 	if (error) {
1283 		xfs_warn(mp, "realtime device size check failed");
1284 		return error;
1285 	}
1286 	xfs_buf_relse(bp);
1287 	return 0;
1288 }
1289 
1290 static int
1291 xfs_rtalloc_count_frextent(
1292 	struct xfs_mount		*mp,
1293 	struct xfs_trans		*tp,
1294 	const struct xfs_rtalloc_rec	*rec,
1295 	void				*priv)
1296 {
1297 	uint64_t			*valp = priv;
1298 
1299 	*valp += rec->ar_extcount;
1300 	return 0;
1301 }
1302 
1303 /*
1304  * Reinitialize the number of free realtime extents from the realtime bitmap.
1305  * Callers must ensure that there is no other activity in the filesystem.
1306  */
1307 int
1308 xfs_rtalloc_reinit_frextents(
1309 	struct xfs_mount	*mp)
1310 {
1311 	uint64_t		val = 0;
1312 	int			error;
1313 
1314 	xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
1315 	error = xfs_rtalloc_query_all(mp, NULL, xfs_rtalloc_count_frextent,
1316 			&val);
1317 	xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
1318 	if (error)
1319 		return error;
1320 
1321 	spin_lock(&mp->m_sb_lock);
1322 	mp->m_sb.sb_frextents = val;
1323 	spin_unlock(&mp->m_sb_lock);
1324 	percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
1325 	return 0;
1326 }
1327 
1328 /*
1329  * Read in the bmbt of an rt metadata inode so that we never have to load them
1330  * at runtime.  This enables the use of shared ILOCKs for rtbitmap scans.  Use
1331  * an empty transaction to avoid deadlocking on loops in the bmbt.
1332  */
1333 static inline int
1334 xfs_rtmount_iread_extents(
1335 	struct xfs_inode	*ip,
1336 	unsigned int		lock_class)
1337 {
1338 	struct xfs_trans	*tp;
1339 	int			error;
1340 
1341 	error = xfs_trans_alloc_empty(ip->i_mount, &tp);
1342 	if (error)
1343 		return error;
1344 
1345 	xfs_ilock(ip, XFS_ILOCK_EXCL | lock_class);
1346 
1347 	error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
1348 	if (error)
1349 		goto out_unlock;
1350 
1351 	if (xfs_inode_has_attr_fork(ip)) {
1352 		error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK);
1353 		if (error)
1354 			goto out_unlock;
1355 	}
1356 
1357 out_unlock:
1358 	xfs_iunlock(ip, XFS_ILOCK_EXCL | lock_class);
1359 	xfs_trans_cancel(tp);
1360 	return error;
1361 }
1362 
1363 /*
1364  * Get the bitmap and summary inodes and the summary cache into the mount
1365  * structure at mount time.
1366  */
1367 int					/* error */
1368 xfs_rtmount_inodes(
1369 	xfs_mount_t	*mp)		/* file system mount structure */
1370 {
1371 	int		error;		/* error return value */
1372 	xfs_sb_t	*sbp;
1373 
1374 	sbp = &mp->m_sb;
1375 	error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
1376 	if (error)
1377 		return error;
1378 	ASSERT(mp->m_rbmip != NULL);
1379 
1380 	error = xfs_rtmount_iread_extents(mp->m_rbmip, XFS_ILOCK_RTBITMAP);
1381 	if (error)
1382 		goto out_rele_bitmap;
1383 
1384 	error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
1385 	if (error)
1386 		goto out_rele_bitmap;
1387 	ASSERT(mp->m_rsumip != NULL);
1388 
1389 	error = xfs_rtmount_iread_extents(mp->m_rsumip, XFS_ILOCK_RTSUM);
1390 	if (error)
1391 		goto out_rele_summary;
1392 
1393 	xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks);
1394 	return 0;
1395 
1396 out_rele_summary:
1397 	xfs_irele(mp->m_rsumip);
1398 out_rele_bitmap:
1399 	xfs_irele(mp->m_rbmip);
1400 	return error;
1401 }
1402 
1403 void
1404 xfs_rtunmount_inodes(
1405 	struct xfs_mount	*mp)
1406 {
1407 	kmem_free(mp->m_rsum_cache);
1408 	if (mp->m_rbmip)
1409 		xfs_irele(mp->m_rbmip);
1410 	if (mp->m_rsumip)
1411 		xfs_irele(mp->m_rsumip);
1412 }
1413 
1414 /*
1415  * Pick an extent for allocation at the start of a new realtime file.
1416  * Use the sequence number stored in the atime field of the bitmap inode.
1417  * Translate this to a fraction of the rtextents, and return the product
1418  * of rtextents and the fraction.
1419  * The fraction sequence is 0, 1/2, 1/4, 3/4, 1/8, ..., 7/8, 1/16, ...
1420  */
1421 int					/* error */
1422 xfs_rtpick_extent(
1423 	xfs_mount_t	*mp,		/* file system mount point */
1424 	xfs_trans_t	*tp,		/* transaction pointer */
1425 	xfs_extlen_t	len,		/* allocation length (rtextents) */
1426 	xfs_rtblock_t	*pick)		/* result rt extent */
1427 {
1428 	xfs_rtblock_t	b;		/* result block */
1429 	int		log2;		/* log of sequence number */
1430 	uint64_t	resid;		/* residual after log removed */
1431 	uint64_t	seq;		/* sequence number of file creation */
1432 	uint64_t	*seqp;		/* pointer to seqno in inode */
1433 
1434 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
1435 
1436 	seqp = (uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
1437 	if (!(mp->m_rbmip->i_diflags & XFS_DIFLAG_NEWRTBM)) {
1438 		mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1439 		*seqp = 0;
1440 	}
1441 	seq = *seqp;
1442 	if ((log2 = xfs_highbit64(seq)) == -1)
1443 		b = 0;
1444 	else {
1445 		resid = seq - (1ULL << log2);
1446 		b = (mp->m_sb.sb_rextents * ((resid << 1) + 1ULL)) >>
1447 		    (log2 + 1);
1448 		if (b >= mp->m_sb.sb_rextents)
1449 			div64_u64_rem(b, mp->m_sb.sb_rextents, &b);
1450 		if (b + len > mp->m_sb.sb_rextents)
1451 			b = mp->m_sb.sb_rextents - len;
1452 	}
1453 	*seqp = seq + 1;
1454 	xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
1455 	*pick = b;
1456 	return 0;
1457 }
1458