xref: /linux/fs/xfs/scrub/tempfile.c (revision d85fe250f2eb61e19029e9e0d30095c5f646e2f2)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_quota.h"
17 #include "xfs_bmap.h"
18 #include "xfs_bmap_btree.h"
19 #include "xfs_trans_space.h"
20 #include "xfs_dir2.h"
21 #include "xfs_exchrange.h"
22 #include "xfs_exchmaps.h"
23 #include "xfs_defer.h"
24 #include "xfs_symlink_remote.h"
25 #include "scrub/scrub.h"
26 #include "scrub/common.h"
27 #include "scrub/repair.h"
28 #include "scrub/trace.h"
29 #include "scrub/tempfile.h"
30 #include "scrub/tempexch.h"
31 #include "scrub/xfile.h"
32 
33 /*
34  * Create a temporary file for reconstructing metadata, with the intention of
35  * atomically exchanging the temporary file's contents with the file that's
36  * being repaired.
37  */
38 int
39 xrep_tempfile_create(
40 	struct xfs_scrub	*sc,
41 	uint16_t		mode)
42 {
43 	struct xfs_mount	*mp = sc->mp;
44 	struct xfs_trans	*tp = NULL;
45 	struct xfs_dquot	*udqp = NULL;
46 	struct xfs_dquot	*gdqp = NULL;
47 	struct xfs_dquot	*pdqp = NULL;
48 	struct xfs_trans_res	*tres;
49 	struct xfs_inode	*dp = mp->m_rootip;
50 	xfs_ino_t		ino;
51 	unsigned int		resblks;
52 	bool			is_dir = S_ISDIR(mode);
53 	int			error;
54 
55 	if (xfs_is_shutdown(mp))
56 		return -EIO;
57 	if (xfs_is_readonly(mp))
58 		return -EROFS;
59 
60 	ASSERT(sc->tp == NULL);
61 	ASSERT(sc->tempip == NULL);
62 
63 	/*
64 	 * Make sure that we have allocated dquot(s) on disk.  The temporary
65 	 * inode should be completely root owned so that we don't fail due to
66 	 * quota limits.
67 	 */
68 	error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
69 			XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
70 	if (error)
71 		return error;
72 
73 	if (is_dir) {
74 		resblks = XFS_MKDIR_SPACE_RES(mp, 0);
75 		tres = &M_RES(mp)->tr_mkdir;
76 	} else {
77 		resblks = XFS_IALLOC_SPACE_RES(mp);
78 		tres = &M_RES(mp)->tr_create_tmpfile;
79 	}
80 
81 	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
82 			&tp);
83 	if (error)
84 		goto out_release_dquots;
85 
86 	/* Allocate inode, set up directory. */
87 	error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
88 	if (error)
89 		goto out_trans_cancel;
90 	error = xfs_init_new_inode(&nop_mnt_idmap, tp, dp, ino, mode, 0, 0,
91 			0, false, &sc->tempip);
92 	if (error)
93 		goto out_trans_cancel;
94 
95 	/* Change the ownership of the inode to root. */
96 	VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
97 	VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
98 	sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
99 	xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
100 
101 	/*
102 	 * Mark our temporary file as private so that LSMs and the ACL code
103 	 * don't try to add their own metadata or reason about these files.
104 	 * The file should never be exposed to userspace.
105 	 */
106 	VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
107 	VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
108 
109 	if (is_dir) {
110 		error = xfs_dir_init(tp, sc->tempip, dp);
111 		if (error)
112 			goto out_trans_cancel;
113 	} else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) {
114 		/*
115 		 * Initialize the temporary symlink with a meaningless target
116 		 * that won't trip the verifiers.  Repair must rewrite the
117 		 * target with meaningful content before swapping with the file
118 		 * being repaired.  A single-byte target will not write a
119 		 * remote target block, so the owner is irrelevant.
120 		 */
121 		error = xfs_symlink_write_target(tp, sc->tempip,
122 				sc->tempip->i_ino, ".", 1, 0, 0);
123 		if (error)
124 			goto out_trans_cancel;
125 	}
126 
127 	/*
128 	 * Attach the dquot(s) to the inodes and modify them incore.
129 	 * These ids of the inode couldn't have changed since the new
130 	 * inode has been locked ever since it was created.
131 	 */
132 	xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
133 
134 	/*
135 	 * Put our temp file on the unlinked list so it's purged automatically.
136 	 * All file-based metadata being reconstructed using this file must be
137 	 * atomically exchanged with the original file because the contents
138 	 * here will be purged when the inode is dropped or log recovery cleans
139 	 * out the unlinked list.
140 	 */
141 	error = xfs_iunlink(tp, sc->tempip);
142 	if (error)
143 		goto out_trans_cancel;
144 
145 	error = xfs_trans_commit(tp);
146 	if (error)
147 		goto out_release_inode;
148 
149 	trace_xrep_tempfile_create(sc);
150 
151 	xfs_qm_dqrele(udqp);
152 	xfs_qm_dqrele(gdqp);
153 	xfs_qm_dqrele(pdqp);
154 
155 	/* Finish setting up the incore / vfs context. */
156 	xfs_setup_iops(sc->tempip);
157 	xfs_finish_inode_setup(sc->tempip);
158 
159 	sc->temp_ilock_flags = 0;
160 	return error;
161 
162 out_trans_cancel:
163 	xfs_trans_cancel(tp);
164 out_release_inode:
165 	/*
166 	 * Wait until after the current transaction is aborted to finish the
167 	 * setup of the inode and release the inode.  This prevents recursive
168 	 * transactions and deadlocks from xfs_inactive.
169 	 */
170 	if (sc->tempip) {
171 		xfs_finish_inode_setup(sc->tempip);
172 		xchk_irele(sc, sc->tempip);
173 	}
174 out_release_dquots:
175 	xfs_qm_dqrele(udqp);
176 	xfs_qm_dqrele(gdqp);
177 	xfs_qm_dqrele(pdqp);
178 
179 	return error;
180 }
181 
182 /* Take IOLOCK_EXCL on the temporary file, maybe. */
183 bool
184 xrep_tempfile_iolock_nowait(
185 	struct xfs_scrub	*sc)
186 {
187 	if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
188 		sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
189 		return true;
190 	}
191 
192 	return false;
193 }
194 
195 /*
196  * Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
197  * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
198  * to avoid deadlocks and lockdep complaints.
199  */
200 int
201 xrep_tempfile_iolock_polled(
202 	struct xfs_scrub	*sc)
203 {
204 	int			error = 0;
205 
206 	while (!xrep_tempfile_iolock_nowait(sc)) {
207 		if (xchk_should_terminate(sc, &error))
208 			return error;
209 		delay(1);
210 	}
211 
212 	return 0;
213 }
214 
215 /* Release IOLOCK_EXCL on the temporary file. */
216 void
217 xrep_tempfile_iounlock(
218 	struct xfs_scrub	*sc)
219 {
220 	xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
221 	sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
222 }
223 
224 /* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
225 void
226 xrep_tempfile_ilock(
227 	struct xfs_scrub	*sc)
228 {
229 	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
230 	xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
231 }
232 
233 /* Try to grab ILOCK_EXCL on the temporary file. */
234 bool
235 xrep_tempfile_ilock_nowait(
236 	struct xfs_scrub	*sc)
237 {
238 	if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
239 		sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
240 		return true;
241 	}
242 
243 	return false;
244 }
245 
246 /* Unlock ILOCK_EXCL on the temporary file after an update. */
247 void
248 xrep_tempfile_iunlock(
249 	struct xfs_scrub	*sc)
250 {
251 	xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
252 	sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
253 }
254 
255 /*
256  * Begin the process of making changes to both the file being scrubbed and
257  * the temporary file by taking ILOCK_EXCL on both.
258  */
259 void
260 xrep_tempfile_ilock_both(
261 	struct xfs_scrub	*sc)
262 {
263 	xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
264 	sc->ilock_flags |= XFS_ILOCK_EXCL;
265 	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
266 }
267 
268 /* Unlock ILOCK_EXCL on both files. */
269 void
270 xrep_tempfile_iunlock_both(
271 	struct xfs_scrub	*sc)
272 {
273 	xrep_tempfile_iunlock(sc);
274 	xchk_iunlock(sc, XFS_ILOCK_EXCL);
275 }
276 
277 /* Release the temporary file. */
278 void
279 xrep_tempfile_rele(
280 	struct xfs_scrub	*sc)
281 {
282 	if (!sc->tempip)
283 		return;
284 
285 	if (sc->temp_ilock_flags) {
286 		xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
287 		sc->temp_ilock_flags = 0;
288 	}
289 
290 	xchk_irele(sc, sc->tempip);
291 	sc->tempip = NULL;
292 }
293 
294 /*
295  * Make sure that the given range of the data fork of the temporary file is
296  * mapped to written blocks.  The caller must ensure that both inodes are
297  * joined to the transaction.
298  */
299 int
300 xrep_tempfile_prealloc(
301 	struct xfs_scrub	*sc,
302 	xfs_fileoff_t		off,
303 	xfs_filblks_t		len)
304 {
305 	struct xfs_bmbt_irec	map;
306 	xfs_fileoff_t		end = off + len;
307 	int			error;
308 
309 	ASSERT(sc->tempip != NULL);
310 	ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
311 
312 	for (; off < end; off = map.br_startoff + map.br_blockcount) {
313 		int		nmaps = 1;
314 
315 		/*
316 		 * If we have a real extent mapping this block then we're
317 		 * in ok shape.
318 		 */
319 		error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
320 				XFS_DATA_FORK);
321 		if (error)
322 			return error;
323 		if (nmaps == 0) {
324 			ASSERT(nmaps != 0);
325 			return -EFSCORRUPTED;
326 		}
327 
328 		if (xfs_bmap_is_written_extent(&map))
329 			continue;
330 
331 		/*
332 		 * If we find a delalloc reservation then something is very
333 		 * very wrong.  Bail out.
334 		 */
335 		if (map.br_startblock == DELAYSTARTBLOCK)
336 			return -EFSCORRUPTED;
337 
338 		/*
339 		 * Make sure this block has a real zeroed extent allocated to
340 		 * it.
341 		 */
342 		nmaps = 1;
343 		error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
344 				XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
345 				&nmaps);
346 		if (error)
347 			return error;
348 		if (nmaps != 1)
349 			return -EFSCORRUPTED;
350 
351 		trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);
352 
353 		/* Commit new extent and all deferred work. */
354 		error = xfs_defer_finish(&sc->tp);
355 		if (error)
356 			return error;
357 	}
358 
359 	return 0;
360 }
361 
362 /*
363  * Write data to each block of a file.  The given range of the tempfile's data
364  * fork must already be populated with written extents.
365  */
366 int
367 xrep_tempfile_copyin(
368 	struct xfs_scrub	*sc,
369 	xfs_fileoff_t		off,
370 	xfs_filblks_t		len,
371 	xrep_tempfile_copyin_fn	prep_fn,
372 	void			*data)
373 {
374 	LIST_HEAD(buffers_list);
375 	struct xfs_mount	*mp = sc->mp;
376 	struct xfs_buf		*bp;
377 	xfs_fileoff_t		flush_mask;
378 	xfs_fileoff_t		end = off + len;
379 	loff_t			pos = XFS_FSB_TO_B(mp, off);
380 	int			error = 0;
381 
382 	ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
383 
384 	/* Flush buffers to disk every 512K */
385 	flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;
386 
387 	for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
388 		struct xfs_bmbt_irec	map;
389 		int			nmaps = 1;
390 
391 		/* Read block mapping for this file block. */
392 		error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
393 		if (error)
394 			goto out_err;
395 		if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
396 			error = -EFSCORRUPTED;
397 			goto out_err;
398 		}
399 
400 		/* Get the metadata buffer for this offset in the file. */
401 		error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
402 				XFS_FSB_TO_DADDR(mp, map.br_startblock),
403 				mp->m_bsize, 0, &bp);
404 		if (error)
405 			goto out_err;
406 
407 		trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);
408 
409 		/* Read in a block's worth of data from the xfile. */
410 		error = prep_fn(sc, bp, data);
411 		if (error) {
412 			xfs_trans_brelse(sc->tp, bp);
413 			goto out_err;
414 		}
415 
416 		/* Queue buffer, and flush if we have too much dirty data. */
417 		xfs_buf_delwri_queue_here(bp, &buffers_list);
418 		xfs_trans_brelse(sc->tp, bp);
419 
420 		if (!(off & flush_mask)) {
421 			error = xfs_buf_delwri_submit(&buffers_list);
422 			if (error)
423 				goto out_err;
424 		}
425 	}
426 
427 	/*
428 	 * Write the new blocks to disk.  If the ordered list isn't empty after
429 	 * that, then something went wrong and we have to fail.  This should
430 	 * never happen, but we'll check anyway.
431 	 */
432 	error = xfs_buf_delwri_submit(&buffers_list);
433 	if (error)
434 		goto out_err;
435 
436 	if (!list_empty(&buffers_list)) {
437 		ASSERT(list_empty(&buffers_list));
438 		error = -EIO;
439 		goto out_err;
440 	}
441 
442 	return 0;
443 
444 out_err:
445 	xfs_buf_delwri_cancel(&buffers_list);
446 	return error;
447 }
448 
449 /*
450  * Set the temporary file's size.  Caller must join the tempfile to the scrub
451  * transaction and is responsible for adjusting block mappings as needed.
452  */
453 int
454 xrep_tempfile_set_isize(
455 	struct xfs_scrub	*sc,
456 	unsigned long long	isize)
457 {
458 	if (sc->tempip->i_disk_size == isize)
459 		return 0;
460 
461 	sc->tempip->i_disk_size = isize;
462 	i_size_write(VFS_I(sc->tempip), isize);
463 	return xrep_tempfile_roll_trans(sc);
464 }
465 
466 /*
467  * Roll a repair transaction involving the temporary file.  Caller must join
468  * both the temporary file and the file being scrubbed to the transaction.
469  * This function return with both inodes joined to a new scrub transaction,
470  * or the usual negative errno.
471  */
472 int
473 xrep_tempfile_roll_trans(
474 	struct xfs_scrub	*sc)
475 {
476 	int			error;
477 
478 	xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
479 	error = xrep_roll_trans(sc);
480 	if (error)
481 		return error;
482 
483 	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
484 	return 0;
485 }
486 
487 /* Enable file content exchanges. */
488 int
489 xrep_tempexch_enable(
490 	struct xfs_scrub	*sc)
491 {
492 	if (sc->flags & XREP_FSGATES_EXCHANGE_RANGE)
493 		return 0;
494 
495 	if (!xfs_has_exchange_range(sc->mp))
496 		return -EOPNOTSUPP;
497 
498 	trace_xchk_fsgates_enable(sc, XREP_FSGATES_EXCHANGE_RANGE);
499 
500 	sc->flags |= XREP_FSGATES_EXCHANGE_RANGE;
501 	return 0;
502 }
503 
504 /*
505  * Fill out the mapping exchange request in preparation for atomically
506  * committing the contents of a metadata file that we've rebuilt in the temp
507  * file.
508  */
509 STATIC int
510 xrep_tempexch_prep_request(
511 	struct xfs_scrub	*sc,
512 	int			whichfork,
513 	struct xrep_tempexch	*tx)
514 {
515 	struct xfs_exchmaps_req	*req = &tx->req;
516 
517 	memset(tx, 0, sizeof(struct xrep_tempexch));
518 
519 	/* COW forks don't exist on disk. */
520 	if (whichfork == XFS_COW_FORK) {
521 		ASSERT(0);
522 		return -EINVAL;
523 	}
524 
525 	/* Both files should have the relevant forks. */
526 	if (!xfs_ifork_ptr(sc->ip, whichfork) ||
527 	    !xfs_ifork_ptr(sc->tempip, whichfork)) {
528 		ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
529 		ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
530 		return -EINVAL;
531 	}
532 
533 	/* Exchange all mappings in both forks. */
534 	req->ip1 = sc->tempip;
535 	req->ip2 = sc->ip;
536 	req->startoff1 = 0;
537 	req->startoff2 = 0;
538 	switch (whichfork) {
539 	case XFS_ATTR_FORK:
540 		req->flags |= XFS_EXCHMAPS_ATTR_FORK;
541 		break;
542 	case XFS_DATA_FORK:
543 		/* Always exchange sizes when exchanging data fork mappings. */
544 		req->flags |= XFS_EXCHMAPS_SET_SIZES;
545 		break;
546 	}
547 	req->blockcount = XFS_MAX_FILEOFF;
548 
549 	return 0;
550 }
551 
552 /*
553  * Fill out the mapping exchange resource estimation structures in preparation
554  * for exchanging the contents of a metadata file that we've rebuilt in the
555  * temp file.  Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
556  */
557 STATIC int
558 xrep_tempexch_estimate(
559 	struct xfs_scrub	*sc,
560 	struct xrep_tempexch	*tx)
561 {
562 	struct xfs_exchmaps_req	*req = &tx->req;
563 	struct xfs_ifork	*ifp;
564 	struct xfs_ifork	*tifp;
565 	int			whichfork = xfs_exchmaps_reqfork(req);
566 	int			state = 0;
567 
568 	/*
569 	 * The exchmaps code only knows how to exchange file fork space
570 	 * mappings.  Any fork data in local format must be promoted to a
571 	 * single block before the exchange can take place.
572 	 */
573 	ifp = xfs_ifork_ptr(sc->ip, whichfork);
574 	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
575 		state |= 1;
576 
577 	tifp = xfs_ifork_ptr(sc->tempip, whichfork);
578 	if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
579 		state |= 2;
580 
581 	switch (state) {
582 	case 0:
583 		/* Both files have mapped extents; use the regular estimate. */
584 		return xfs_exchrange_estimate(req);
585 	case 1:
586 		/*
587 		 * The file being repaired is in local format, but the temp
588 		 * file has mapped extents.  To perform the exchange, the file
589 		 * being repaired must have its shorform data converted to an
590 		 * ondisk block so that the forks will be in extents format.
591 		 * We need one resblk for the conversion; the number of
592 		 * exchanges is (worst case) the temporary file's extent count
593 		 * plus the block we converted.
594 		 */
595 		req->ip1_bcount = sc->tempip->i_nblocks;
596 		req->ip2_bcount = 1;
597 		req->nr_exchanges = 1 + tifp->if_nextents;
598 		req->resblks = 1;
599 		break;
600 	case 2:
601 		/*
602 		 * The temporary file is in local format, but the file being
603 		 * repaired has mapped extents.  To perform the exchange, the
604 		 * temp file must have its shortform data converted to an
605 		 * ondisk block, and the fork changed to extents format.  We
606 		 * need one resblk for the conversion; the number of exchanges
607 		 * is (worst case) the extent count of the file being repaired
608 		 * plus the block we converted.
609 		 */
610 		req->ip1_bcount = 1;
611 		req->ip2_bcount = sc->ip->i_nblocks;
612 		req->nr_exchanges = 1 + ifp->if_nextents;
613 		req->resblks = 1;
614 		break;
615 	case 3:
616 		/*
617 		 * Both forks are in local format.  To perform the exchange,
618 		 * both files must have their shortform data converted to
619 		 * fsblocks, and both forks must be converted to extents
620 		 * format.  We need two resblks for the two conversions, and
621 		 * the number of exchanges is 1 since there's only one block at
622 		 * fileoff 0.  Presumably, the caller could not exchange the
623 		 * two inode fork areas directly.
624 		 */
625 		req->ip1_bcount = 1;
626 		req->ip2_bcount = 1;
627 		req->nr_exchanges = 1;
628 		req->resblks = 2;
629 		break;
630 	}
631 
632 	return xfs_exchmaps_estimate_overhead(req);
633 }
634 
635 /*
636  * Obtain a quota reservation to make sure we don't hit EDQUOT.  We can skip
637  * this if quota enforcement is disabled or if both inodes' dquots are the
638  * same.  The qretry structure must be initialized to zeroes before the first
639  * call to this function.
640  */
641 STATIC int
642 xrep_tempexch_reserve_quota(
643 	struct xfs_scrub		*sc,
644 	const struct xrep_tempexch	*tx)
645 {
646 	struct xfs_trans		*tp = sc->tp;
647 	const struct xfs_exchmaps_req	*req = &tx->req;
648 	int64_t				ddelta, rdelta;
649 	int				error;
650 
651 	/*
652 	 * Don't bother with a quota reservation if we're not enforcing them
653 	 * or the two inodes have the same dquots.
654 	 */
655 	if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
656 	    (req->ip1->i_udquot == req->ip2->i_udquot &&
657 	     req->ip1->i_gdquot == req->ip2->i_gdquot &&
658 	     req->ip1->i_pdquot == req->ip2->i_pdquot))
659 		return 0;
660 
661 	/*
662 	 * Quota reservation for each file comes from two sources.  First, we
663 	 * need to account for any net gain in mapped blocks during the
664 	 * exchange.  Second, we need reservation for the gross gain in mapped
665 	 * blocks so that we don't trip over any quota block reservation
666 	 * assertions.  We must reserve the gross gain because the quota code
667 	 * subtracts from bcount the number of blocks that we unmap; it does
668 	 * not add that quantity back to the quota block reservation.
669 	 */
670 	ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
671 	rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
672 	error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
673 			ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
674 			true);
675 	if (error)
676 		return error;
677 
678 	ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
679 	rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
680 	return xfs_trans_reserve_quota_nblks(tp, req->ip2,
681 			ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
682 			true);
683 }
684 
685 /*
686  * Prepare an existing transaction for an atomic file contents exchange.
687  *
688  * This function fills out the mapping exchange request and resource estimation
689  * structures in preparation for exchanging the contents of a metadata file
690  * that has been rebuilt in the temp file.  Next, it reserves space and quota
691  * for the transaction.
692  *
693  * The caller must hold ILOCK_EXCL of the scrub target file and the temporary
694  * file.  The caller must join both inodes to the transaction with no unlock
695  * flags, and is responsible for dropping both ILOCKs when appropriate.  Only
696  * use this when those ILOCKs cannot be dropped.
697  */
698 int
699 xrep_tempexch_trans_reserve(
700 	struct xfs_scrub	*sc,
701 	int			whichfork,
702 	struct xrep_tempexch	*tx)
703 {
704 	int			error;
705 
706 	ASSERT(sc->tp != NULL);
707 	xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL);
708 	xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL);
709 
710 	error = xrep_tempexch_prep_request(sc, whichfork, tx);
711 	if (error)
712 		return error;
713 
714 	error = xfs_exchmaps_estimate(&tx->req);
715 	if (error)
716 		return error;
717 
718 	error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
719 	if (error)
720 		return error;
721 
722 	return xrep_tempexch_reserve_quota(sc, tx);
723 }
724 
725 /*
726  * Create a new transaction for a file contents exchange.
727  *
728  * This function fills out the mapping excahange request and resource
729  * estimation structures in preparation for exchanging the contents of a
730  * metadata file that has been rebuilt in the temp file.  Next, it reserves
731  * space, takes ILOCK_EXCL of both inodes, joins them to the transaction and
732  * reserves quota for the transaction.
733  *
734  * The caller is responsible for dropping both ILOCKs when appropriate.
735  */
736 int
737 xrep_tempexch_trans_alloc(
738 	struct xfs_scrub	*sc,
739 	int			whichfork,
740 	struct xrep_tempexch	*tx)
741 {
742 	unsigned int		flags = 0;
743 	int			error;
744 
745 	ASSERT(sc->tp == NULL);
746 
747 	error = xrep_tempexch_prep_request(sc, whichfork, tx);
748 	if (error)
749 		return error;
750 
751 	error = xrep_tempexch_estimate(sc, tx);
752 	if (error)
753 		return error;
754 
755 	if (xfs_has_lazysbcount(sc->mp))
756 		flags |= XFS_TRANS_RES_FDBLKS;
757 
758 	error = xrep_tempexch_enable(sc);
759 	if (error)
760 		return error;
761 
762 	error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
763 			tx->req.resblks, 0, flags, &sc->tp);
764 	if (error)
765 		return error;
766 
767 	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
768 	sc->ilock_flags |= XFS_ILOCK_EXCL;
769 	xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip);
770 
771 	return xrep_tempexch_reserve_quota(sc, tx);
772 }
773 
774 /*
775  * Exchange file mappings (and hence file contents) between the file being
776  * repaired and the temporary file.  Returns with both inodes locked and joined
777  * to a clean scrub transaction.
778  */
779 int
780 xrep_tempexch_contents(
781 	struct xfs_scrub	*sc,
782 	struct xrep_tempexch	*tx)
783 {
784 	int			error;
785 
786 	ASSERT(sc->flags & XREP_FSGATES_EXCHANGE_RANGE);
787 
788 	xfs_exchange_mappings(sc->tp, &tx->req);
789 	error = xfs_defer_finish(&sc->tp);
790 	if (error)
791 		return error;
792 
793 	/*
794 	 * If we exchanged the ondisk sizes of two metadata files, we must
795 	 * exchanged the incore sizes as well.
796 	 */
797 	if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) {
798 		loff_t	temp;
799 
800 		temp = i_size_read(VFS_I(sc->ip));
801 		i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
802 		i_size_write(VFS_I(sc->tempip), temp);
803 	}
804 
805 	return 0;
806 }
807 
808 /*
809  * Write local format data from one of the temporary file's forks into the same
810  * fork of file being repaired, and exchange the file sizes, if appropriate.
811  * Caller must ensure that the file being repaired has enough fork space to
812  * hold all the bytes.
813  */
814 void
815 xrep_tempfile_copyout_local(
816 	struct xfs_scrub	*sc,
817 	int			whichfork)
818 {
819 	struct xfs_ifork	*temp_ifp;
820 	struct xfs_ifork	*ifp;
821 	unsigned int		ilog_flags = XFS_ILOG_CORE;
822 
823 	temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
824 	ifp = xfs_ifork_ptr(sc->ip, whichfork);
825 
826 	ASSERT(temp_ifp != NULL);
827 	ASSERT(ifp != NULL);
828 	ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
829 	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
830 
831 	switch (whichfork) {
832 	case XFS_DATA_FORK:
833 		ASSERT(sc->tempip->i_disk_size <=
834 					xfs_inode_data_fork_size(sc->ip));
835 		break;
836 	case XFS_ATTR_FORK:
837 		ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
838 		break;
839 	default:
840 		ASSERT(0);
841 		return;
842 	}
843 
844 	/* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */
845 	xfs_idestroy_fork(ifp);
846 	xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data,
847 			temp_ifp->if_bytes);
848 
849 	if (whichfork == XFS_DATA_FORK) {
850 		i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
851 		sc->ip->i_disk_size = sc->tempip->i_disk_size;
852 	}
853 
854 	ilog_flags |= xfs_ilog_fdata(whichfork);
855 	xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
856 }
857 
858 /* Decide if a given XFS inode is a temporary file for a repair. */
859 bool
860 xrep_is_tempfile(
861 	const struct xfs_inode	*ip)
862 {
863 	const struct inode	*inode = &ip->i_vnode;
864 
865 	if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
866 		return true;
867 
868 	return false;
869 }
870