1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_quota.h"
17 #include "xfs_bmap.h"
18 #include "xfs_bmap_btree.h"
19 #include "xfs_trans_space.h"
20 #include "xfs_dir2.h"
21 #include "xfs_exchrange.h"
22 #include "xfs_exchmaps.h"
23 #include "xfs_defer.h"
24 #include "xfs_symlink_remote.h"
25 #include "scrub/scrub.h"
26 #include "scrub/common.h"
27 #include "scrub/repair.h"
28 #include "scrub/trace.h"
29 #include "scrub/tempfile.h"
30 #include "scrub/tempexch.h"
31 #include "scrub/xfile.h"
32
33 /*
34 * Create a temporary file for reconstructing metadata, with the intention of
35 * atomically exchanging the temporary file's contents with the file that's
36 * being repaired.
37 */
38 int
xrep_tempfile_create(struct xfs_scrub * sc,uint16_t mode)39 xrep_tempfile_create(
40 struct xfs_scrub *sc,
41 uint16_t mode)
42 {
43 struct xfs_icreate_args args = {
44 .pip = sc->mp->m_rootip,
45 .mode = mode,
46 .flags = XFS_ICREATE_TMPFILE | XFS_ICREATE_UNLINKABLE,
47 };
48 struct xfs_mount *mp = sc->mp;
49 struct xfs_trans *tp = NULL;
50 struct xfs_dquot *udqp;
51 struct xfs_dquot *gdqp;
52 struct xfs_dquot *pdqp;
53 struct xfs_trans_res *tres;
54 struct xfs_inode *dp = mp->m_rootip;
55 xfs_ino_t ino;
56 unsigned int resblks;
57 bool is_dir = S_ISDIR(mode);
58 int error;
59
60 if (xfs_is_shutdown(mp))
61 return -EIO;
62 if (xfs_is_readonly(mp))
63 return -EROFS;
64
65 ASSERT(sc->tp == NULL);
66 ASSERT(sc->tempip == NULL);
67
68 /*
69 * Make sure that we have allocated dquot(s) on disk. The temporary
70 * inode should be completely root owned so that we don't fail due to
71 * quota limits.
72 */
73 error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp);
74 if (error)
75 return error;
76
77 if (is_dir) {
78 resblks = xfs_mkdir_space_res(mp, 0);
79 tres = &M_RES(mp)->tr_mkdir;
80 } else {
81 resblks = XFS_IALLOC_SPACE_RES(mp);
82 tres = &M_RES(mp)->tr_create_tmpfile;
83 }
84
85 error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
86 &tp);
87 if (error)
88 goto out_release_dquots;
89
90 /* Allocate inode, set up directory. */
91 error = xfs_dialloc(&tp, &args, &ino);
92 if (error)
93 goto out_trans_cancel;
94 error = xfs_icreate(tp, ino, &args, &sc->tempip);
95 if (error)
96 goto out_trans_cancel;
97
98 /* We don't touch file data, so drop the realtime flags. */
99 sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
100 xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
101
102 /*
103 * Mark our temporary file as private so that LSMs and the ACL code
104 * don't try to add their own metadata or reason about these files.
105 * The file should never be exposed to userspace.
106 */
107 VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
108 VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
109
110 if (is_dir) {
111 error = xfs_dir_init(tp, sc->tempip, dp);
112 if (error)
113 goto out_trans_cancel;
114 } else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) {
115 /*
116 * Initialize the temporary symlink with a meaningless target
117 * that won't trip the verifiers. Repair must rewrite the
118 * target with meaningful content before swapping with the file
119 * being repaired. A single-byte target will not write a
120 * remote target block, so the owner is irrelevant.
121 */
122 error = xfs_symlink_write_target(tp, sc->tempip,
123 sc->tempip->i_ino, ".", 1, 0, 0);
124 if (error)
125 goto out_trans_cancel;
126 }
127
128 /*
129 * Attach the dquot(s) to the inodes and modify them incore.
130 * These ids of the inode couldn't have changed since the new
131 * inode has been locked ever since it was created.
132 */
133 xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
134
135 /*
136 * Put our temp file on the unlinked list so it's purged automatically.
137 * All file-based metadata being reconstructed using this file must be
138 * atomically exchanged with the original file because the contents
139 * here will be purged when the inode is dropped or log recovery cleans
140 * out the unlinked list.
141 */
142 error = xfs_iunlink(tp, sc->tempip);
143 if (error)
144 goto out_trans_cancel;
145
146 error = xfs_trans_commit(tp);
147 if (error)
148 goto out_release_inode;
149
150 trace_xrep_tempfile_create(sc);
151
152 xfs_qm_dqrele(udqp);
153 xfs_qm_dqrele(gdqp);
154 xfs_qm_dqrele(pdqp);
155
156 /* Finish setting up the incore / vfs context. */
157 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
158 xfs_setup_iops(sc->tempip);
159 xfs_finish_inode_setup(sc->tempip);
160
161 sc->temp_ilock_flags = 0;
162 return error;
163
164 out_trans_cancel:
165 xfs_trans_cancel(tp);
166 out_release_inode:
167 /*
168 * Wait until after the current transaction is aborted to finish the
169 * setup of the inode and release the inode. This prevents recursive
170 * transactions and deadlocks from xfs_inactive.
171 */
172 if (sc->tempip) {
173 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
174 xfs_finish_inode_setup(sc->tempip);
175 xchk_irele(sc, sc->tempip);
176 }
177 out_release_dquots:
178 xfs_qm_dqrele(udqp);
179 xfs_qm_dqrele(gdqp);
180 xfs_qm_dqrele(pdqp);
181
182 return error;
183 }
184
185 /* Take IOLOCK_EXCL on the temporary file, maybe. */
186 bool
xrep_tempfile_iolock_nowait(struct xfs_scrub * sc)187 xrep_tempfile_iolock_nowait(
188 struct xfs_scrub *sc)
189 {
190 if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
191 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
192 return true;
193 }
194
195 return false;
196 }
197
198 /*
199 * Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
200 * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
201 * to avoid deadlocks and lockdep complaints.
202 */
203 int
xrep_tempfile_iolock_polled(struct xfs_scrub * sc)204 xrep_tempfile_iolock_polled(
205 struct xfs_scrub *sc)
206 {
207 int error = 0;
208
209 while (!xrep_tempfile_iolock_nowait(sc)) {
210 if (xchk_should_terminate(sc, &error))
211 return error;
212 delay(1);
213 }
214
215 return 0;
216 }
217
218 /* Release IOLOCK_EXCL on the temporary file. */
219 void
xrep_tempfile_iounlock(struct xfs_scrub * sc)220 xrep_tempfile_iounlock(
221 struct xfs_scrub *sc)
222 {
223 xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
224 sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
225 }
226
227 /* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
228 void
xrep_tempfile_ilock(struct xfs_scrub * sc)229 xrep_tempfile_ilock(
230 struct xfs_scrub *sc)
231 {
232 sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
233 xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
234 }
235
236 /* Try to grab ILOCK_EXCL on the temporary file. */
237 bool
xrep_tempfile_ilock_nowait(struct xfs_scrub * sc)238 xrep_tempfile_ilock_nowait(
239 struct xfs_scrub *sc)
240 {
241 if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
242 sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
243 return true;
244 }
245
246 return false;
247 }
248
249 /* Unlock ILOCK_EXCL on the temporary file after an update. */
250 void
xrep_tempfile_iunlock(struct xfs_scrub * sc)251 xrep_tempfile_iunlock(
252 struct xfs_scrub *sc)
253 {
254 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
255 sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
256 }
257
258 /*
259 * Begin the process of making changes to both the file being scrubbed and
260 * the temporary file by taking ILOCK_EXCL on both.
261 */
262 void
xrep_tempfile_ilock_both(struct xfs_scrub * sc)263 xrep_tempfile_ilock_both(
264 struct xfs_scrub *sc)
265 {
266 xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
267 sc->ilock_flags |= XFS_ILOCK_EXCL;
268 sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
269 }
270
271 /* Unlock ILOCK_EXCL on both files. */
272 void
xrep_tempfile_iunlock_both(struct xfs_scrub * sc)273 xrep_tempfile_iunlock_both(
274 struct xfs_scrub *sc)
275 {
276 xrep_tempfile_iunlock(sc);
277 xchk_iunlock(sc, XFS_ILOCK_EXCL);
278 }
279
280 /* Release the temporary file. */
281 void
xrep_tempfile_rele(struct xfs_scrub * sc)282 xrep_tempfile_rele(
283 struct xfs_scrub *sc)
284 {
285 if (!sc->tempip)
286 return;
287
288 if (sc->temp_ilock_flags) {
289 xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
290 sc->temp_ilock_flags = 0;
291 }
292
293 xchk_irele(sc, sc->tempip);
294 sc->tempip = NULL;
295 }
296
297 /*
298 * Make sure that the given range of the data fork of the temporary file is
299 * mapped to written blocks. The caller must ensure that both inodes are
300 * joined to the transaction.
301 */
302 int
xrep_tempfile_prealloc(struct xfs_scrub * sc,xfs_fileoff_t off,xfs_filblks_t len)303 xrep_tempfile_prealloc(
304 struct xfs_scrub *sc,
305 xfs_fileoff_t off,
306 xfs_filblks_t len)
307 {
308 struct xfs_bmbt_irec map;
309 xfs_fileoff_t end = off + len;
310 int error;
311
312 ASSERT(sc->tempip != NULL);
313 ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
314
315 for (; off < end; off = map.br_startoff + map.br_blockcount) {
316 int nmaps = 1;
317
318 /*
319 * If we have a real extent mapping this block then we're
320 * in ok shape.
321 */
322 error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
323 XFS_DATA_FORK);
324 if (error)
325 return error;
326 if (nmaps == 0) {
327 ASSERT(nmaps != 0);
328 return -EFSCORRUPTED;
329 }
330
331 if (xfs_bmap_is_written_extent(&map))
332 continue;
333
334 /*
335 * If we find a delalloc reservation then something is very
336 * very wrong. Bail out.
337 */
338 if (map.br_startblock == DELAYSTARTBLOCK)
339 return -EFSCORRUPTED;
340
341 /*
342 * Make sure this block has a real zeroed extent allocated to
343 * it.
344 */
345 nmaps = 1;
346 error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
347 XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
348 &nmaps);
349 if (error)
350 return error;
351 if (nmaps != 1)
352 return -EFSCORRUPTED;
353
354 trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);
355
356 /* Commit new extent and all deferred work. */
357 error = xfs_defer_finish(&sc->tp);
358 if (error)
359 return error;
360 }
361
362 return 0;
363 }
364
365 /*
366 * Write data to each block of a file. The given range of the tempfile's data
367 * fork must already be populated with written extents.
368 */
369 int
xrep_tempfile_copyin(struct xfs_scrub * sc,xfs_fileoff_t off,xfs_filblks_t len,xrep_tempfile_copyin_fn prep_fn,void * data)370 xrep_tempfile_copyin(
371 struct xfs_scrub *sc,
372 xfs_fileoff_t off,
373 xfs_filblks_t len,
374 xrep_tempfile_copyin_fn prep_fn,
375 void *data)
376 {
377 LIST_HEAD(buffers_list);
378 struct xfs_mount *mp = sc->mp;
379 struct xfs_buf *bp;
380 xfs_fileoff_t flush_mask;
381 xfs_fileoff_t end = off + len;
382 loff_t pos = XFS_FSB_TO_B(mp, off);
383 int error = 0;
384
385 ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
386
387 /* Flush buffers to disk every 512K */
388 flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;
389
390 for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
391 struct xfs_bmbt_irec map;
392 int nmaps = 1;
393
394 /* Read block mapping for this file block. */
395 error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
396 if (error)
397 goto out_err;
398 if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
399 error = -EFSCORRUPTED;
400 goto out_err;
401 }
402
403 /* Get the metadata buffer for this offset in the file. */
404 error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
405 XFS_FSB_TO_DADDR(mp, map.br_startblock),
406 mp->m_bsize, 0, &bp);
407 if (error)
408 goto out_err;
409
410 trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);
411
412 /* Read in a block's worth of data from the xfile. */
413 error = prep_fn(sc, bp, data);
414 if (error) {
415 xfs_trans_brelse(sc->tp, bp);
416 goto out_err;
417 }
418
419 /* Queue buffer, and flush if we have too much dirty data. */
420 xfs_buf_delwri_queue_here(bp, &buffers_list);
421 xfs_trans_brelse(sc->tp, bp);
422
423 if (!(off & flush_mask)) {
424 error = xfs_buf_delwri_submit(&buffers_list);
425 if (error)
426 goto out_err;
427 }
428 }
429
430 /*
431 * Write the new blocks to disk. If the ordered list isn't empty after
432 * that, then something went wrong and we have to fail. This should
433 * never happen, but we'll check anyway.
434 */
435 error = xfs_buf_delwri_submit(&buffers_list);
436 if (error)
437 goto out_err;
438
439 if (!list_empty(&buffers_list)) {
440 ASSERT(list_empty(&buffers_list));
441 error = -EIO;
442 goto out_err;
443 }
444
445 return 0;
446
447 out_err:
448 xfs_buf_delwri_cancel(&buffers_list);
449 return error;
450 }
451
452 /*
453 * Set the temporary file's size. Caller must join the tempfile to the scrub
454 * transaction and is responsible for adjusting block mappings as needed.
455 */
456 int
xrep_tempfile_set_isize(struct xfs_scrub * sc,unsigned long long isize)457 xrep_tempfile_set_isize(
458 struct xfs_scrub *sc,
459 unsigned long long isize)
460 {
461 if (sc->tempip->i_disk_size == isize)
462 return 0;
463
464 sc->tempip->i_disk_size = isize;
465 i_size_write(VFS_I(sc->tempip), isize);
466 return xrep_tempfile_roll_trans(sc);
467 }
468
469 /*
470 * Roll a repair transaction involving the temporary file. Caller must join
471 * both the temporary file and the file being scrubbed to the transaction.
472 * This function return with both inodes joined to a new scrub transaction,
473 * or the usual negative errno.
474 */
475 int
xrep_tempfile_roll_trans(struct xfs_scrub * sc)476 xrep_tempfile_roll_trans(
477 struct xfs_scrub *sc)
478 {
479 int error;
480
481 xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
482 error = xrep_roll_trans(sc);
483 if (error)
484 return error;
485
486 xfs_trans_ijoin(sc->tp, sc->tempip, 0);
487 return 0;
488 }
489
490 /*
491 * Fill out the mapping exchange request in preparation for atomically
492 * committing the contents of a metadata file that we've rebuilt in the temp
493 * file.
494 */
495 STATIC int
xrep_tempexch_prep_request(struct xfs_scrub * sc,int whichfork,struct xrep_tempexch * tx)496 xrep_tempexch_prep_request(
497 struct xfs_scrub *sc,
498 int whichfork,
499 struct xrep_tempexch *tx)
500 {
501 struct xfs_exchmaps_req *req = &tx->req;
502
503 memset(tx, 0, sizeof(struct xrep_tempexch));
504
505 /* COW forks don't exist on disk. */
506 if (whichfork == XFS_COW_FORK) {
507 ASSERT(0);
508 return -EINVAL;
509 }
510
511 /* Both files should have the relevant forks. */
512 if (!xfs_ifork_ptr(sc->ip, whichfork) ||
513 !xfs_ifork_ptr(sc->tempip, whichfork)) {
514 ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
515 ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
516 return -EINVAL;
517 }
518
519 /* Exchange all mappings in both forks. */
520 req->ip1 = sc->tempip;
521 req->ip2 = sc->ip;
522 req->startoff1 = 0;
523 req->startoff2 = 0;
524 switch (whichfork) {
525 case XFS_ATTR_FORK:
526 req->flags |= XFS_EXCHMAPS_ATTR_FORK;
527 break;
528 case XFS_DATA_FORK:
529 /* Always exchange sizes when exchanging data fork mappings. */
530 req->flags |= XFS_EXCHMAPS_SET_SIZES;
531 break;
532 }
533 req->blockcount = XFS_MAX_FILEOFF;
534
535 return 0;
536 }
537
538 /*
539 * Fill out the mapping exchange resource estimation structures in preparation
540 * for exchanging the contents of a metadata file that we've rebuilt in the
541 * temp file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
542 */
543 STATIC int
xrep_tempexch_estimate(struct xfs_scrub * sc,struct xrep_tempexch * tx)544 xrep_tempexch_estimate(
545 struct xfs_scrub *sc,
546 struct xrep_tempexch *tx)
547 {
548 struct xfs_exchmaps_req *req = &tx->req;
549 struct xfs_ifork *ifp;
550 struct xfs_ifork *tifp;
551 int whichfork = xfs_exchmaps_reqfork(req);
552 int state = 0;
553
554 /*
555 * The exchmaps code only knows how to exchange file fork space
556 * mappings. Any fork data in local format must be promoted to a
557 * single block before the exchange can take place.
558 */
559 ifp = xfs_ifork_ptr(sc->ip, whichfork);
560 if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
561 state |= 1;
562
563 tifp = xfs_ifork_ptr(sc->tempip, whichfork);
564 if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
565 state |= 2;
566
567 switch (state) {
568 case 0:
569 /* Both files have mapped extents; use the regular estimate. */
570 return xfs_exchrange_estimate(req);
571 case 1:
572 /*
573 * The file being repaired is in local format, but the temp
574 * file has mapped extents. To perform the exchange, the file
575 * being repaired must have its shorform data converted to an
576 * ondisk block so that the forks will be in extents format.
577 * We need one resblk for the conversion; the number of
578 * exchanges is (worst case) the temporary file's extent count
579 * plus the block we converted.
580 */
581 req->ip1_bcount = sc->tempip->i_nblocks;
582 req->ip2_bcount = 1;
583 req->nr_exchanges = 1 + tifp->if_nextents;
584 req->resblks = 1;
585 break;
586 case 2:
587 /*
588 * The temporary file is in local format, but the file being
589 * repaired has mapped extents. To perform the exchange, the
590 * temp file must have its shortform data converted to an
591 * ondisk block, and the fork changed to extents format. We
592 * need one resblk for the conversion; the number of exchanges
593 * is (worst case) the extent count of the file being repaired
594 * plus the block we converted.
595 */
596 req->ip1_bcount = 1;
597 req->ip2_bcount = sc->ip->i_nblocks;
598 req->nr_exchanges = 1 + ifp->if_nextents;
599 req->resblks = 1;
600 break;
601 case 3:
602 /*
603 * Both forks are in local format. To perform the exchange,
604 * both files must have their shortform data converted to
605 * fsblocks, and both forks must be converted to extents
606 * format. We need two resblks for the two conversions, and
607 * the number of exchanges is 1 since there's only one block at
608 * fileoff 0. Presumably, the caller could not exchange the
609 * two inode fork areas directly.
610 */
611 req->ip1_bcount = 1;
612 req->ip2_bcount = 1;
613 req->nr_exchanges = 1;
614 req->resblks = 2;
615 break;
616 }
617
618 return xfs_exchmaps_estimate_overhead(req);
619 }
620
621 /*
622 * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
623 * this if quota enforcement is disabled or if both inodes' dquots are the
624 * same. The qretry structure must be initialized to zeroes before the first
625 * call to this function.
626 */
627 STATIC int
xrep_tempexch_reserve_quota(struct xfs_scrub * sc,const struct xrep_tempexch * tx)628 xrep_tempexch_reserve_quota(
629 struct xfs_scrub *sc,
630 const struct xrep_tempexch *tx)
631 {
632 struct xfs_trans *tp = sc->tp;
633 const struct xfs_exchmaps_req *req = &tx->req;
634 int64_t ddelta, rdelta;
635 int error;
636
637 /*
638 * Don't bother with a quota reservation if we're not enforcing them
639 * or the two inodes have the same dquots.
640 */
641 if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
642 (req->ip1->i_udquot == req->ip2->i_udquot &&
643 req->ip1->i_gdquot == req->ip2->i_gdquot &&
644 req->ip1->i_pdquot == req->ip2->i_pdquot))
645 return 0;
646
647 /*
648 * Quota reservation for each file comes from two sources. First, we
649 * need to account for any net gain in mapped blocks during the
650 * exchange. Second, we need reservation for the gross gain in mapped
651 * blocks so that we don't trip over any quota block reservation
652 * assertions. We must reserve the gross gain because the quota code
653 * subtracts from bcount the number of blocks that we unmap; it does
654 * not add that quantity back to the quota block reservation.
655 */
656 ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
657 rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
658 error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
659 ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
660 true);
661 if (error)
662 return error;
663
664 ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
665 rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
666 return xfs_trans_reserve_quota_nblks(tp, req->ip2,
667 ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
668 true);
669 }
670
671 /*
672 * Prepare an existing transaction for an atomic file contents exchange.
673 *
674 * This function fills out the mapping exchange request and resource estimation
675 * structures in preparation for exchanging the contents of a metadata file
676 * that has been rebuilt in the temp file. Next, it reserves space and quota
677 * for the transaction.
678 *
679 * The caller must hold ILOCK_EXCL of the scrub target file and the temporary
680 * file. The caller must join both inodes to the transaction with no unlock
681 * flags, and is responsible for dropping both ILOCKs when appropriate. Only
682 * use this when those ILOCKs cannot be dropped.
683 */
684 int
xrep_tempexch_trans_reserve(struct xfs_scrub * sc,int whichfork,struct xrep_tempexch * tx)685 xrep_tempexch_trans_reserve(
686 struct xfs_scrub *sc,
687 int whichfork,
688 struct xrep_tempexch *tx)
689 {
690 int error;
691
692 ASSERT(sc->tp != NULL);
693 xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL);
694 xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL);
695
696 error = xrep_tempexch_prep_request(sc, whichfork, tx);
697 if (error)
698 return error;
699
700 error = xfs_exchmaps_estimate(&tx->req);
701 if (error)
702 return error;
703
704 error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
705 if (error)
706 return error;
707
708 return xrep_tempexch_reserve_quota(sc, tx);
709 }
710
711 /*
712 * Create a new transaction for a file contents exchange.
713 *
714 * This function fills out the mapping excahange request and resource
715 * estimation structures in preparation for exchanging the contents of a
716 * metadata file that has been rebuilt in the temp file. Next, it reserves
717 * space, takes ILOCK_EXCL of both inodes, joins them to the transaction and
718 * reserves quota for the transaction.
719 *
720 * The caller is responsible for dropping both ILOCKs when appropriate.
721 */
722 int
xrep_tempexch_trans_alloc(struct xfs_scrub * sc,int whichfork,struct xrep_tempexch * tx)723 xrep_tempexch_trans_alloc(
724 struct xfs_scrub *sc,
725 int whichfork,
726 struct xrep_tempexch *tx)
727 {
728 unsigned int flags = 0;
729 int error;
730
731 ASSERT(sc->tp == NULL);
732 ASSERT(xfs_has_exchange_range(sc->mp));
733
734 error = xrep_tempexch_prep_request(sc, whichfork, tx);
735 if (error)
736 return error;
737
738 error = xrep_tempexch_estimate(sc, tx);
739 if (error)
740 return error;
741
742 if (xfs_has_lazysbcount(sc->mp))
743 flags |= XFS_TRANS_RES_FDBLKS;
744
745 error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
746 tx->req.resblks, 0, flags, &sc->tp);
747 if (error)
748 return error;
749
750 sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
751 sc->ilock_flags |= XFS_ILOCK_EXCL;
752 xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip);
753
754 return xrep_tempexch_reserve_quota(sc, tx);
755 }
756
757 /*
758 * Exchange file mappings (and hence file contents) between the file being
759 * repaired and the temporary file. Returns with both inodes locked and joined
760 * to a clean scrub transaction.
761 */
762 int
xrep_tempexch_contents(struct xfs_scrub * sc,struct xrep_tempexch * tx)763 xrep_tempexch_contents(
764 struct xfs_scrub *sc,
765 struct xrep_tempexch *tx)
766 {
767 int error;
768
769 ASSERT(xfs_has_exchange_range(sc->mp));
770
771 xfs_exchange_mappings(sc->tp, &tx->req);
772 error = xfs_defer_finish(&sc->tp);
773 if (error)
774 return error;
775
776 /*
777 * If we exchanged the ondisk sizes of two metadata files, we must
778 * exchanged the incore sizes as well.
779 */
780 if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) {
781 loff_t temp;
782
783 temp = i_size_read(VFS_I(sc->ip));
784 i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
785 i_size_write(VFS_I(sc->tempip), temp);
786 }
787
788 return 0;
789 }
790
791 /*
792 * Write local format data from one of the temporary file's forks into the same
793 * fork of file being repaired, and exchange the file sizes, if appropriate.
794 * Caller must ensure that the file being repaired has enough fork space to
795 * hold all the bytes.
796 */
797 void
xrep_tempfile_copyout_local(struct xfs_scrub * sc,int whichfork)798 xrep_tempfile_copyout_local(
799 struct xfs_scrub *sc,
800 int whichfork)
801 {
802 struct xfs_ifork *temp_ifp;
803 struct xfs_ifork *ifp;
804 unsigned int ilog_flags = XFS_ILOG_CORE;
805
806 temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
807 ifp = xfs_ifork_ptr(sc->ip, whichfork);
808
809 ASSERT(temp_ifp != NULL);
810 ASSERT(ifp != NULL);
811 ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
812 ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
813
814 switch (whichfork) {
815 case XFS_DATA_FORK:
816 ASSERT(sc->tempip->i_disk_size <=
817 xfs_inode_data_fork_size(sc->ip));
818 break;
819 case XFS_ATTR_FORK:
820 ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
821 break;
822 default:
823 ASSERT(0);
824 return;
825 }
826
827 /* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */
828 xfs_idestroy_fork(ifp);
829 xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data,
830 temp_ifp->if_bytes);
831
832 if (whichfork == XFS_DATA_FORK) {
833 i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
834 sc->ip->i_disk_size = sc->tempip->i_disk_size;
835 }
836
837 ilog_flags |= xfs_ilog_fdata(whichfork);
838 xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
839 }
840
841 /* Decide if a given XFS inode is a temporary file for a repair. */
842 bool
xrep_is_tempfile(const struct xfs_inode * ip)843 xrep_is_tempfile(
844 const struct xfs_inode *ip)
845 {
846 const struct inode *inode = &ip->i_vnode;
847
848 if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
849 return true;
850
851 return false;
852 }
853