xref: /linux/fs/xfs/scrub/tempfile.c (revision c5288cda69ee2d8607f5026bd599a5cebf0ee783)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_quota.h"
17 #include "xfs_bmap.h"
18 #include "xfs_bmap_btree.h"
19 #include "xfs_trans_space.h"
20 #include "xfs_dir2.h"
21 #include "xfs_exchrange.h"
22 #include "xfs_exchmaps.h"
23 #include "xfs_defer.h"
24 #include "xfs_symlink_remote.h"
25 #include "scrub/scrub.h"
26 #include "scrub/common.h"
27 #include "scrub/repair.h"
28 #include "scrub/trace.h"
29 #include "scrub/tempfile.h"
30 #include "scrub/tempexch.h"
31 #include "scrub/xfile.h"
32 
33 /*
34  * Create a temporary file for reconstructing metadata, with the intention of
35  * atomically exchanging the temporary file's contents with the file that's
36  * being repaired.
37  */
38 int
39 xrep_tempfile_create(
40 	struct xfs_scrub	*sc,
41 	uint16_t		mode)
42 {
43 	struct xfs_mount	*mp = sc->mp;
44 	struct xfs_trans	*tp = NULL;
45 	struct xfs_dquot	*udqp = NULL;
46 	struct xfs_dquot	*gdqp = NULL;
47 	struct xfs_dquot	*pdqp = NULL;
48 	struct xfs_trans_res	*tres;
49 	struct xfs_inode	*dp = mp->m_rootip;
50 	xfs_ino_t		ino;
51 	unsigned int		resblks;
52 	bool			is_dir = S_ISDIR(mode);
53 	int			error;
54 
55 	if (xfs_is_shutdown(mp))
56 		return -EIO;
57 	if (xfs_is_readonly(mp))
58 		return -EROFS;
59 
60 	ASSERT(sc->tp == NULL);
61 	ASSERT(sc->tempip == NULL);
62 
63 	/*
64 	 * Make sure that we have allocated dquot(s) on disk.  The temporary
65 	 * inode should be completely root owned so that we don't fail due to
66 	 * quota limits.
67 	 */
68 	error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
69 			XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
70 	if (error)
71 		return error;
72 
73 	if (is_dir) {
74 		resblks = xfs_mkdir_space_res(mp, 0);
75 		tres = &M_RES(mp)->tr_mkdir;
76 	} else {
77 		resblks = XFS_IALLOC_SPACE_RES(mp);
78 		tres = &M_RES(mp)->tr_create_tmpfile;
79 	}
80 
81 	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
82 			&tp);
83 	if (error)
84 		goto out_release_dquots;
85 
86 	/* Allocate inode, set up directory. */
87 	error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
88 	if (error)
89 		goto out_trans_cancel;
90 	error = xfs_init_new_inode(&nop_mnt_idmap, tp, dp, ino, mode, 0, 0,
91 			0, false, &sc->tempip);
92 	if (error)
93 		goto out_trans_cancel;
94 
95 	/* Change the ownership of the inode to root. */
96 	VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
97 	VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
98 	sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
99 	xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
100 
101 	/*
102 	 * Mark our temporary file as private so that LSMs and the ACL code
103 	 * don't try to add their own metadata or reason about these files.
104 	 * The file should never be exposed to userspace.
105 	 */
106 	VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
107 	VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
108 
109 	if (is_dir) {
110 		error = xfs_dir_init(tp, sc->tempip, dp);
111 		if (error)
112 			goto out_trans_cancel;
113 	} else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) {
114 		/*
115 		 * Initialize the temporary symlink with a meaningless target
116 		 * that won't trip the verifiers.  Repair must rewrite the
117 		 * target with meaningful content before swapping with the file
118 		 * being repaired.  A single-byte target will not write a
119 		 * remote target block, so the owner is irrelevant.
120 		 */
121 		error = xfs_symlink_write_target(tp, sc->tempip,
122 				sc->tempip->i_ino, ".", 1, 0, 0);
123 		if (error)
124 			goto out_trans_cancel;
125 	}
126 
127 	/*
128 	 * Attach the dquot(s) to the inodes and modify them incore.
129 	 * These ids of the inode couldn't have changed since the new
130 	 * inode has been locked ever since it was created.
131 	 */
132 	xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
133 
134 	/*
135 	 * Put our temp file on the unlinked list so it's purged automatically.
136 	 * All file-based metadata being reconstructed using this file must be
137 	 * atomically exchanged with the original file because the contents
138 	 * here will be purged when the inode is dropped or log recovery cleans
139 	 * out the unlinked list.
140 	 */
141 	error = xfs_iunlink(tp, sc->tempip);
142 	if (error)
143 		goto out_trans_cancel;
144 
145 	error = xfs_trans_commit(tp);
146 	if (error)
147 		goto out_release_inode;
148 
149 	trace_xrep_tempfile_create(sc);
150 
151 	xfs_qm_dqrele(udqp);
152 	xfs_qm_dqrele(gdqp);
153 	xfs_qm_dqrele(pdqp);
154 
155 	/* Finish setting up the incore / vfs context. */
156 	xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
157 	xfs_setup_iops(sc->tempip);
158 	xfs_finish_inode_setup(sc->tempip);
159 
160 	sc->temp_ilock_flags = 0;
161 	return error;
162 
163 out_trans_cancel:
164 	xfs_trans_cancel(tp);
165 out_release_inode:
166 	/*
167 	 * Wait until after the current transaction is aborted to finish the
168 	 * setup of the inode and release the inode.  This prevents recursive
169 	 * transactions and deadlocks from xfs_inactive.
170 	 */
171 	if (sc->tempip) {
172 		xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
173 		xfs_finish_inode_setup(sc->tempip);
174 		xchk_irele(sc, sc->tempip);
175 	}
176 out_release_dquots:
177 	xfs_qm_dqrele(udqp);
178 	xfs_qm_dqrele(gdqp);
179 	xfs_qm_dqrele(pdqp);
180 
181 	return error;
182 }
183 
184 /* Take IOLOCK_EXCL on the temporary file, maybe. */
185 bool
186 xrep_tempfile_iolock_nowait(
187 	struct xfs_scrub	*sc)
188 {
189 	if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
190 		sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
191 		return true;
192 	}
193 
194 	return false;
195 }
196 
197 /*
198  * Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
199  * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
200  * to avoid deadlocks and lockdep complaints.
201  */
202 int
203 xrep_tempfile_iolock_polled(
204 	struct xfs_scrub	*sc)
205 {
206 	int			error = 0;
207 
208 	while (!xrep_tempfile_iolock_nowait(sc)) {
209 		if (xchk_should_terminate(sc, &error))
210 			return error;
211 		delay(1);
212 	}
213 
214 	return 0;
215 }
216 
217 /* Release IOLOCK_EXCL on the temporary file. */
218 void
219 xrep_tempfile_iounlock(
220 	struct xfs_scrub	*sc)
221 {
222 	xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
223 	sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
224 }
225 
226 /* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
227 void
228 xrep_tempfile_ilock(
229 	struct xfs_scrub	*sc)
230 {
231 	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
232 	xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
233 }
234 
235 /* Try to grab ILOCK_EXCL on the temporary file. */
236 bool
237 xrep_tempfile_ilock_nowait(
238 	struct xfs_scrub	*sc)
239 {
240 	if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
241 		sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
242 		return true;
243 	}
244 
245 	return false;
246 }
247 
248 /* Unlock ILOCK_EXCL on the temporary file after an update. */
249 void
250 xrep_tempfile_iunlock(
251 	struct xfs_scrub	*sc)
252 {
253 	xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
254 	sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
255 }
256 
257 /*
258  * Begin the process of making changes to both the file being scrubbed and
259  * the temporary file by taking ILOCK_EXCL on both.
260  */
261 void
262 xrep_tempfile_ilock_both(
263 	struct xfs_scrub	*sc)
264 {
265 	xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
266 	sc->ilock_flags |= XFS_ILOCK_EXCL;
267 	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
268 }
269 
270 /* Unlock ILOCK_EXCL on both files. */
271 void
272 xrep_tempfile_iunlock_both(
273 	struct xfs_scrub	*sc)
274 {
275 	xrep_tempfile_iunlock(sc);
276 	xchk_iunlock(sc, XFS_ILOCK_EXCL);
277 }
278 
279 /* Release the temporary file. */
280 void
281 xrep_tempfile_rele(
282 	struct xfs_scrub	*sc)
283 {
284 	if (!sc->tempip)
285 		return;
286 
287 	if (sc->temp_ilock_flags) {
288 		xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
289 		sc->temp_ilock_flags = 0;
290 	}
291 
292 	xchk_irele(sc, sc->tempip);
293 	sc->tempip = NULL;
294 }
295 
296 /*
297  * Make sure that the given range of the data fork of the temporary file is
298  * mapped to written blocks.  The caller must ensure that both inodes are
299  * joined to the transaction.
300  */
301 int
302 xrep_tempfile_prealloc(
303 	struct xfs_scrub	*sc,
304 	xfs_fileoff_t		off,
305 	xfs_filblks_t		len)
306 {
307 	struct xfs_bmbt_irec	map;
308 	xfs_fileoff_t		end = off + len;
309 	int			error;
310 
311 	ASSERT(sc->tempip != NULL);
312 	ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
313 
314 	for (; off < end; off = map.br_startoff + map.br_blockcount) {
315 		int		nmaps = 1;
316 
317 		/*
318 		 * If we have a real extent mapping this block then we're
319 		 * in ok shape.
320 		 */
321 		error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
322 				XFS_DATA_FORK);
323 		if (error)
324 			return error;
325 		if (nmaps == 0) {
326 			ASSERT(nmaps != 0);
327 			return -EFSCORRUPTED;
328 		}
329 
330 		if (xfs_bmap_is_written_extent(&map))
331 			continue;
332 
333 		/*
334 		 * If we find a delalloc reservation then something is very
335 		 * very wrong.  Bail out.
336 		 */
337 		if (map.br_startblock == DELAYSTARTBLOCK)
338 			return -EFSCORRUPTED;
339 
340 		/*
341 		 * Make sure this block has a real zeroed extent allocated to
342 		 * it.
343 		 */
344 		nmaps = 1;
345 		error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
346 				XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
347 				&nmaps);
348 		if (error)
349 			return error;
350 		if (nmaps != 1)
351 			return -EFSCORRUPTED;
352 
353 		trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);
354 
355 		/* Commit new extent and all deferred work. */
356 		error = xfs_defer_finish(&sc->tp);
357 		if (error)
358 			return error;
359 	}
360 
361 	return 0;
362 }
363 
364 /*
365  * Write data to each block of a file.  The given range of the tempfile's data
366  * fork must already be populated with written extents.
367  */
368 int
369 xrep_tempfile_copyin(
370 	struct xfs_scrub	*sc,
371 	xfs_fileoff_t		off,
372 	xfs_filblks_t		len,
373 	xrep_tempfile_copyin_fn	prep_fn,
374 	void			*data)
375 {
376 	LIST_HEAD(buffers_list);
377 	struct xfs_mount	*mp = sc->mp;
378 	struct xfs_buf		*bp;
379 	xfs_fileoff_t		flush_mask;
380 	xfs_fileoff_t		end = off + len;
381 	loff_t			pos = XFS_FSB_TO_B(mp, off);
382 	int			error = 0;
383 
384 	ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
385 
386 	/* Flush buffers to disk every 512K */
387 	flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;
388 
389 	for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
390 		struct xfs_bmbt_irec	map;
391 		int			nmaps = 1;
392 
393 		/* Read block mapping for this file block. */
394 		error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
395 		if (error)
396 			goto out_err;
397 		if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
398 			error = -EFSCORRUPTED;
399 			goto out_err;
400 		}
401 
402 		/* Get the metadata buffer for this offset in the file. */
403 		error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
404 				XFS_FSB_TO_DADDR(mp, map.br_startblock),
405 				mp->m_bsize, 0, &bp);
406 		if (error)
407 			goto out_err;
408 
409 		trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);
410 
411 		/* Read in a block's worth of data from the xfile. */
412 		error = prep_fn(sc, bp, data);
413 		if (error) {
414 			xfs_trans_brelse(sc->tp, bp);
415 			goto out_err;
416 		}
417 
418 		/* Queue buffer, and flush if we have too much dirty data. */
419 		xfs_buf_delwri_queue_here(bp, &buffers_list);
420 		xfs_trans_brelse(sc->tp, bp);
421 
422 		if (!(off & flush_mask)) {
423 			error = xfs_buf_delwri_submit(&buffers_list);
424 			if (error)
425 				goto out_err;
426 		}
427 	}
428 
429 	/*
430 	 * Write the new blocks to disk.  If the ordered list isn't empty after
431 	 * that, then something went wrong and we have to fail.  This should
432 	 * never happen, but we'll check anyway.
433 	 */
434 	error = xfs_buf_delwri_submit(&buffers_list);
435 	if (error)
436 		goto out_err;
437 
438 	if (!list_empty(&buffers_list)) {
439 		ASSERT(list_empty(&buffers_list));
440 		error = -EIO;
441 		goto out_err;
442 	}
443 
444 	return 0;
445 
446 out_err:
447 	xfs_buf_delwri_cancel(&buffers_list);
448 	return error;
449 }
450 
451 /*
452  * Set the temporary file's size.  Caller must join the tempfile to the scrub
453  * transaction and is responsible for adjusting block mappings as needed.
454  */
455 int
456 xrep_tempfile_set_isize(
457 	struct xfs_scrub	*sc,
458 	unsigned long long	isize)
459 {
460 	if (sc->tempip->i_disk_size == isize)
461 		return 0;
462 
463 	sc->tempip->i_disk_size = isize;
464 	i_size_write(VFS_I(sc->tempip), isize);
465 	return xrep_tempfile_roll_trans(sc);
466 }
467 
468 /*
469  * Roll a repair transaction involving the temporary file.  Caller must join
470  * both the temporary file and the file being scrubbed to the transaction.
471  * This function return with both inodes joined to a new scrub transaction,
472  * or the usual negative errno.
473  */
474 int
475 xrep_tempfile_roll_trans(
476 	struct xfs_scrub	*sc)
477 {
478 	int			error;
479 
480 	xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
481 	error = xrep_roll_trans(sc);
482 	if (error)
483 		return error;
484 
485 	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
486 	return 0;
487 }
488 
489 /*
490  * Fill out the mapping exchange request in preparation for atomically
491  * committing the contents of a metadata file that we've rebuilt in the temp
492  * file.
493  */
494 STATIC int
495 xrep_tempexch_prep_request(
496 	struct xfs_scrub	*sc,
497 	int			whichfork,
498 	struct xrep_tempexch	*tx)
499 {
500 	struct xfs_exchmaps_req	*req = &tx->req;
501 
502 	memset(tx, 0, sizeof(struct xrep_tempexch));
503 
504 	/* COW forks don't exist on disk. */
505 	if (whichfork == XFS_COW_FORK) {
506 		ASSERT(0);
507 		return -EINVAL;
508 	}
509 
510 	/* Both files should have the relevant forks. */
511 	if (!xfs_ifork_ptr(sc->ip, whichfork) ||
512 	    !xfs_ifork_ptr(sc->tempip, whichfork)) {
513 		ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
514 		ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
515 		return -EINVAL;
516 	}
517 
518 	/* Exchange all mappings in both forks. */
519 	req->ip1 = sc->tempip;
520 	req->ip2 = sc->ip;
521 	req->startoff1 = 0;
522 	req->startoff2 = 0;
523 	switch (whichfork) {
524 	case XFS_ATTR_FORK:
525 		req->flags |= XFS_EXCHMAPS_ATTR_FORK;
526 		break;
527 	case XFS_DATA_FORK:
528 		/* Always exchange sizes when exchanging data fork mappings. */
529 		req->flags |= XFS_EXCHMAPS_SET_SIZES;
530 		break;
531 	}
532 	req->blockcount = XFS_MAX_FILEOFF;
533 
534 	return 0;
535 }
536 
537 /*
538  * Fill out the mapping exchange resource estimation structures in preparation
539  * for exchanging the contents of a metadata file that we've rebuilt in the
540  * temp file.  Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
541  */
542 STATIC int
543 xrep_tempexch_estimate(
544 	struct xfs_scrub	*sc,
545 	struct xrep_tempexch	*tx)
546 {
547 	struct xfs_exchmaps_req	*req = &tx->req;
548 	struct xfs_ifork	*ifp;
549 	struct xfs_ifork	*tifp;
550 	int			whichfork = xfs_exchmaps_reqfork(req);
551 	int			state = 0;
552 
553 	/*
554 	 * The exchmaps code only knows how to exchange file fork space
555 	 * mappings.  Any fork data in local format must be promoted to a
556 	 * single block before the exchange can take place.
557 	 */
558 	ifp = xfs_ifork_ptr(sc->ip, whichfork);
559 	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
560 		state |= 1;
561 
562 	tifp = xfs_ifork_ptr(sc->tempip, whichfork);
563 	if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
564 		state |= 2;
565 
566 	switch (state) {
567 	case 0:
568 		/* Both files have mapped extents; use the regular estimate. */
569 		return xfs_exchrange_estimate(req);
570 	case 1:
571 		/*
572 		 * The file being repaired is in local format, but the temp
573 		 * file has mapped extents.  To perform the exchange, the file
574 		 * being repaired must have its shorform data converted to an
575 		 * ondisk block so that the forks will be in extents format.
576 		 * We need one resblk for the conversion; the number of
577 		 * exchanges is (worst case) the temporary file's extent count
578 		 * plus the block we converted.
579 		 */
580 		req->ip1_bcount = sc->tempip->i_nblocks;
581 		req->ip2_bcount = 1;
582 		req->nr_exchanges = 1 + tifp->if_nextents;
583 		req->resblks = 1;
584 		break;
585 	case 2:
586 		/*
587 		 * The temporary file is in local format, but the file being
588 		 * repaired has mapped extents.  To perform the exchange, the
589 		 * temp file must have its shortform data converted to an
590 		 * ondisk block, and the fork changed to extents format.  We
591 		 * need one resblk for the conversion; the number of exchanges
592 		 * is (worst case) the extent count of the file being repaired
593 		 * plus the block we converted.
594 		 */
595 		req->ip1_bcount = 1;
596 		req->ip2_bcount = sc->ip->i_nblocks;
597 		req->nr_exchanges = 1 + ifp->if_nextents;
598 		req->resblks = 1;
599 		break;
600 	case 3:
601 		/*
602 		 * Both forks are in local format.  To perform the exchange,
603 		 * both files must have their shortform data converted to
604 		 * fsblocks, and both forks must be converted to extents
605 		 * format.  We need two resblks for the two conversions, and
606 		 * the number of exchanges is 1 since there's only one block at
607 		 * fileoff 0.  Presumably, the caller could not exchange the
608 		 * two inode fork areas directly.
609 		 */
610 		req->ip1_bcount = 1;
611 		req->ip2_bcount = 1;
612 		req->nr_exchanges = 1;
613 		req->resblks = 2;
614 		break;
615 	}
616 
617 	return xfs_exchmaps_estimate_overhead(req);
618 }
619 
620 /*
621  * Obtain a quota reservation to make sure we don't hit EDQUOT.  We can skip
622  * this if quota enforcement is disabled or if both inodes' dquots are the
623  * same.  The qretry structure must be initialized to zeroes before the first
624  * call to this function.
625  */
626 STATIC int
627 xrep_tempexch_reserve_quota(
628 	struct xfs_scrub		*sc,
629 	const struct xrep_tempexch	*tx)
630 {
631 	struct xfs_trans		*tp = sc->tp;
632 	const struct xfs_exchmaps_req	*req = &tx->req;
633 	int64_t				ddelta, rdelta;
634 	int				error;
635 
636 	/*
637 	 * Don't bother with a quota reservation if we're not enforcing them
638 	 * or the two inodes have the same dquots.
639 	 */
640 	if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
641 	    (req->ip1->i_udquot == req->ip2->i_udquot &&
642 	     req->ip1->i_gdquot == req->ip2->i_gdquot &&
643 	     req->ip1->i_pdquot == req->ip2->i_pdquot))
644 		return 0;
645 
646 	/*
647 	 * Quota reservation for each file comes from two sources.  First, we
648 	 * need to account for any net gain in mapped blocks during the
649 	 * exchange.  Second, we need reservation for the gross gain in mapped
650 	 * blocks so that we don't trip over any quota block reservation
651 	 * assertions.  We must reserve the gross gain because the quota code
652 	 * subtracts from bcount the number of blocks that we unmap; it does
653 	 * not add that quantity back to the quota block reservation.
654 	 */
655 	ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
656 	rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
657 	error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
658 			ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
659 			true);
660 	if (error)
661 		return error;
662 
663 	ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
664 	rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
665 	return xfs_trans_reserve_quota_nblks(tp, req->ip2,
666 			ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
667 			true);
668 }
669 
670 /*
671  * Prepare an existing transaction for an atomic file contents exchange.
672  *
673  * This function fills out the mapping exchange request and resource estimation
674  * structures in preparation for exchanging the contents of a metadata file
675  * that has been rebuilt in the temp file.  Next, it reserves space and quota
676  * for the transaction.
677  *
678  * The caller must hold ILOCK_EXCL of the scrub target file and the temporary
679  * file.  The caller must join both inodes to the transaction with no unlock
680  * flags, and is responsible for dropping both ILOCKs when appropriate.  Only
681  * use this when those ILOCKs cannot be dropped.
682  */
683 int
684 xrep_tempexch_trans_reserve(
685 	struct xfs_scrub	*sc,
686 	int			whichfork,
687 	struct xrep_tempexch	*tx)
688 {
689 	int			error;
690 
691 	ASSERT(sc->tp != NULL);
692 	xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL);
693 	xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL);
694 
695 	error = xrep_tempexch_prep_request(sc, whichfork, tx);
696 	if (error)
697 		return error;
698 
699 	error = xfs_exchmaps_estimate(&tx->req);
700 	if (error)
701 		return error;
702 
703 	error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
704 	if (error)
705 		return error;
706 
707 	return xrep_tempexch_reserve_quota(sc, tx);
708 }
709 
710 /*
711  * Create a new transaction for a file contents exchange.
712  *
713  * This function fills out the mapping excahange request and resource
714  * estimation structures in preparation for exchanging the contents of a
715  * metadata file that has been rebuilt in the temp file.  Next, it reserves
716  * space, takes ILOCK_EXCL of both inodes, joins them to the transaction and
717  * reserves quota for the transaction.
718  *
719  * The caller is responsible for dropping both ILOCKs when appropriate.
720  */
721 int
722 xrep_tempexch_trans_alloc(
723 	struct xfs_scrub	*sc,
724 	int			whichfork,
725 	struct xrep_tempexch	*tx)
726 {
727 	unsigned int		flags = 0;
728 	int			error;
729 
730 	ASSERT(sc->tp == NULL);
731 	ASSERT(xfs_has_exchange_range(sc->mp));
732 
733 	error = xrep_tempexch_prep_request(sc, whichfork, tx);
734 	if (error)
735 		return error;
736 
737 	error = xrep_tempexch_estimate(sc, tx);
738 	if (error)
739 		return error;
740 
741 	if (xfs_has_lazysbcount(sc->mp))
742 		flags |= XFS_TRANS_RES_FDBLKS;
743 
744 	error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
745 			tx->req.resblks, 0, flags, &sc->tp);
746 	if (error)
747 		return error;
748 
749 	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
750 	sc->ilock_flags |= XFS_ILOCK_EXCL;
751 	xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip);
752 
753 	return xrep_tempexch_reserve_quota(sc, tx);
754 }
755 
756 /*
757  * Exchange file mappings (and hence file contents) between the file being
758  * repaired and the temporary file.  Returns with both inodes locked and joined
759  * to a clean scrub transaction.
760  */
761 int
762 xrep_tempexch_contents(
763 	struct xfs_scrub	*sc,
764 	struct xrep_tempexch	*tx)
765 {
766 	int			error;
767 
768 	ASSERT(xfs_has_exchange_range(sc->mp));
769 
770 	xfs_exchange_mappings(sc->tp, &tx->req);
771 	error = xfs_defer_finish(&sc->tp);
772 	if (error)
773 		return error;
774 
775 	/*
776 	 * If we exchanged the ondisk sizes of two metadata files, we must
777 	 * exchanged the incore sizes as well.
778 	 */
779 	if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) {
780 		loff_t	temp;
781 
782 		temp = i_size_read(VFS_I(sc->ip));
783 		i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
784 		i_size_write(VFS_I(sc->tempip), temp);
785 	}
786 
787 	return 0;
788 }
789 
790 /*
791  * Write local format data from one of the temporary file's forks into the same
792  * fork of file being repaired, and exchange the file sizes, if appropriate.
793  * Caller must ensure that the file being repaired has enough fork space to
794  * hold all the bytes.
795  */
796 void
797 xrep_tempfile_copyout_local(
798 	struct xfs_scrub	*sc,
799 	int			whichfork)
800 {
801 	struct xfs_ifork	*temp_ifp;
802 	struct xfs_ifork	*ifp;
803 	unsigned int		ilog_flags = XFS_ILOG_CORE;
804 
805 	temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
806 	ifp = xfs_ifork_ptr(sc->ip, whichfork);
807 
808 	ASSERT(temp_ifp != NULL);
809 	ASSERT(ifp != NULL);
810 	ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
811 	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
812 
813 	switch (whichfork) {
814 	case XFS_DATA_FORK:
815 		ASSERT(sc->tempip->i_disk_size <=
816 					xfs_inode_data_fork_size(sc->ip));
817 		break;
818 	case XFS_ATTR_FORK:
819 		ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
820 		break;
821 	default:
822 		ASSERT(0);
823 		return;
824 	}
825 
826 	/* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */
827 	xfs_idestroy_fork(ifp);
828 	xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data,
829 			temp_ifp->if_bytes);
830 
831 	if (whichfork == XFS_DATA_FORK) {
832 		i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
833 		sc->ip->i_disk_size = sc->tempip->i_disk_size;
834 	}
835 
836 	ilog_flags |= xfs_ilog_fdata(whichfork);
837 	xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
838 }
839 
840 /* Decide if a given XFS inode is a temporary file for a repair. */
841 bool
842 xrep_is_tempfile(
843 	const struct xfs_inode	*ip)
844 {
845 	const struct inode	*inode = &ip->i_vnode;
846 
847 	if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
848 		return true;
849 
850 	return false;
851 }
852