1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_quota.h"
17 #include "xfs_bmap.h"
18 #include "xfs_bmap_btree.h"
19 #include "xfs_trans_space.h"
20 #include "xfs_dir2.h"
21 #include "xfs_exchrange.h"
22 #include "xfs_exchmaps.h"
23 #include "xfs_defer.h"
24 #include "xfs_symlink_remote.h"
25 #include "xfs_metafile.h"
26 #include "scrub/scrub.h"
27 #include "scrub/common.h"
28 #include "scrub/repair.h"
29 #include "scrub/trace.h"
30 #include "scrub/tempfile.h"
31 #include "scrub/tempexch.h"
32 #include "scrub/xfile.h"
33
34 /*
35 * Create a temporary file for reconstructing metadata, with the intention of
36 * atomically exchanging the temporary file's contents with the file that's
37 * being repaired.
38 */
39 int
xrep_tempfile_create(struct xfs_scrub * sc,uint16_t mode)40 xrep_tempfile_create(
41 struct xfs_scrub *sc,
42 uint16_t mode)
43 {
44 struct xfs_icreate_args args = {
45 .pip = sc->mp->m_rootip,
46 .mode = mode,
47 .flags = XFS_ICREATE_TMPFILE | XFS_ICREATE_UNLINKABLE,
48 };
49 struct xfs_mount *mp = sc->mp;
50 struct xfs_trans *tp = NULL;
51 struct xfs_dquot *udqp;
52 struct xfs_dquot *gdqp;
53 struct xfs_dquot *pdqp;
54 struct xfs_trans_res *tres;
55 struct xfs_inode *dp = mp->m_rootip;
56 xfs_ino_t ino;
57 unsigned int resblks;
58 bool is_dir = S_ISDIR(mode);
59 int error;
60
61 if (xfs_is_shutdown(mp))
62 return -EIO;
63 if (xfs_is_readonly(mp))
64 return -EROFS;
65
66 ASSERT(sc->tp == NULL);
67 ASSERT(sc->tempip == NULL);
68
69 /*
70 * Make sure that we have allocated dquot(s) on disk. The temporary
71 * inode should be completely root owned so that we don't fail due to
72 * quota limits.
73 */
74 error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp);
75 if (error)
76 return error;
77
78 if (is_dir) {
79 resblks = xfs_mkdir_space_res(mp, 0);
80 tres = &M_RES(mp)->tr_mkdir;
81 } else {
82 resblks = XFS_IALLOC_SPACE_RES(mp);
83 tres = &M_RES(mp)->tr_create_tmpfile;
84 }
85
86 error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
87 &tp);
88 if (error)
89 goto out_release_dquots;
90
91 /* Allocate inode, set up directory. */
92 error = xfs_dialloc(&tp, &args, &ino);
93 if (error)
94 goto out_trans_cancel;
95 error = xfs_icreate(tp, ino, &args, &sc->tempip);
96 if (error)
97 goto out_trans_cancel;
98
99 /* We don't touch file data, so drop the realtime flags. */
100 sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
101 xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
102
103 /*
104 * Mark our temporary file as private so that LSMs and the ACL code
105 * don't try to add their own metadata or reason about these files.
106 * The file should never be exposed to userspace.
107 */
108 VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
109 VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
110
111 if (is_dir) {
112 error = xfs_dir_init(tp, sc->tempip, dp);
113 if (error)
114 goto out_trans_cancel;
115 } else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) {
116 /*
117 * Initialize the temporary symlink with a meaningless target
118 * that won't trip the verifiers. Repair must rewrite the
119 * target with meaningful content before swapping with the file
120 * being repaired. A single-byte target will not write a
121 * remote target block, so the owner is irrelevant.
122 */
123 error = xfs_symlink_write_target(tp, sc->tempip,
124 sc->tempip->i_ino, ".", 1, 0, 0);
125 if (error)
126 goto out_trans_cancel;
127 }
128
129 /*
130 * Attach the dquot(s) to the inodes and modify them incore.
131 * These ids of the inode couldn't have changed since the new
132 * inode has been locked ever since it was created.
133 */
134 xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
135
136 /*
137 * Put our temp file on the unlinked list so it's purged automatically.
138 * All file-based metadata being reconstructed using this file must be
139 * atomically exchanged with the original file because the contents
140 * here will be purged when the inode is dropped or log recovery cleans
141 * out the unlinked list.
142 */
143 error = xfs_iunlink(tp, sc->tempip);
144 if (error)
145 goto out_trans_cancel;
146
147 error = xfs_trans_commit(tp);
148 if (error)
149 goto out_release_inode;
150
151 trace_xrep_tempfile_create(sc);
152
153 xfs_qm_dqrele(udqp);
154 xfs_qm_dqrele(gdqp);
155 xfs_qm_dqrele(pdqp);
156
157 /* Finish setting up the incore / vfs context. */
158 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
159 xfs_setup_iops(sc->tempip);
160 xfs_finish_inode_setup(sc->tempip);
161
162 sc->temp_ilock_flags = 0;
163 return error;
164
165 out_trans_cancel:
166 xfs_trans_cancel(tp);
167 out_release_inode:
168 /*
169 * Wait until after the current transaction is aborted to finish the
170 * setup of the inode and release the inode. This prevents recursive
171 * transactions and deadlocks from xfs_inactive.
172 */
173 if (sc->tempip) {
174 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
175 xfs_finish_inode_setup(sc->tempip);
176 xchk_irele(sc, sc->tempip);
177 }
178 out_release_dquots:
179 xfs_qm_dqrele(udqp);
180 xfs_qm_dqrele(gdqp);
181 xfs_qm_dqrele(pdqp);
182
183 return error;
184 }
185
186 /*
187 * Move sc->tempip from the regular directory tree to the metadata directory
188 * tree if sc->ip is part of the metadata directory tree and tempip has an
189 * eligible file mode.
190 *
191 * Temporary files have to be created before we even know which inode we're
192 * going to scrub, so we assume that they will be part of the regular directory
193 * tree. If it turns out that we're actually scrubbing a file from the
194 * metadata directory tree, we have to subtract the temp file from the root
195 * dquots and detach the dquots prior to setting the METADATA iflag. However,
196 * the scrub setup functions grab sc->ip and create sc->tempip before we
197 * actually get around to checking if the file mode is the right type for the
198 * scrubber.
199 */
200 int
xrep_tempfile_adjust_directory_tree(struct xfs_scrub * sc)201 xrep_tempfile_adjust_directory_tree(
202 struct xfs_scrub *sc)
203 {
204 int error;
205
206 if (!sc->tempip)
207 return 0;
208
209 ASSERT(sc->tp == NULL);
210 ASSERT(!xfs_is_metadir_inode(sc->tempip));
211
212 if (!sc->ip || !xfs_is_metadir_inode(sc->ip))
213 return 0;
214 if (!S_ISDIR(VFS_I(sc->tempip)->i_mode) &&
215 !S_ISREG(VFS_I(sc->tempip)->i_mode))
216 return 0;
217
218 xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
219 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
220
221 error = xchk_trans_alloc(sc, 0);
222 if (error)
223 goto out_iolock;
224
225 xrep_tempfile_ilock(sc);
226 xfs_trans_ijoin(sc->tp, sc->tempip, 0);
227
228 /* Metadir files are not accounted in quota, so drop icount */
229 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, -1L);
230 xfs_metafile_set_iflag(sc->tp, sc->tempip, XFS_METAFILE_UNKNOWN);
231
232 error = xrep_trans_commit(sc);
233 if (error)
234 goto out_ilock;
235
236 xfs_iflags_set(sc->tempip, XFS_IRECOVERY);
237 xfs_qm_dqdetach(sc->tempip);
238 out_ilock:
239 xrep_tempfile_iunlock(sc);
240 out_iolock:
241 xrep_tempfile_iounlock(sc);
242 return error;
243 }
244
245 /*
246 * Remove this temporary file from the metadata directory tree so that it can
247 * be inactivated the normal way.
248 */
249 STATIC int
xrep_tempfile_remove_metadir(struct xfs_scrub * sc)250 xrep_tempfile_remove_metadir(
251 struct xfs_scrub *sc)
252 {
253 int error;
254
255 if (!sc->tempip || !xfs_is_metadir_inode(sc->tempip))
256 return 0;
257
258 ASSERT(sc->tp == NULL);
259
260 xfs_iflags_clear(sc->tempip, XFS_IRECOVERY);
261
262 xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
263 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
264
265 error = xchk_trans_alloc(sc, 0);
266 if (error)
267 goto out_iolock;
268
269 xrep_tempfile_ilock(sc);
270 xfs_trans_ijoin(sc->tp, sc->tempip, 0);
271
272 xfs_metafile_clear_iflag(sc->tp, sc->tempip);
273
274 /* Non-metadir files are accounted in quota, so bump bcount/icount */
275 error = xfs_qm_dqattach_locked(sc->tempip, false);
276 if (error)
277 goto out_cancel;
278
279 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, 1L);
280 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_BCOUNT,
281 sc->tempip->i_nblocks);
282 error = xrep_trans_commit(sc);
283 goto out_ilock;
284
285 out_cancel:
286 xchk_trans_cancel(sc);
287 out_ilock:
288 xrep_tempfile_iunlock(sc);
289 out_iolock:
290 xrep_tempfile_iounlock(sc);
291 return error;
292 }
293
294 /* Take IOLOCK_EXCL on the temporary file, maybe. */
295 bool
xrep_tempfile_iolock_nowait(struct xfs_scrub * sc)296 xrep_tempfile_iolock_nowait(
297 struct xfs_scrub *sc)
298 {
299 if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
300 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
301 return true;
302 }
303
304 return false;
305 }
306
307 /*
308 * Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
309 * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
310 * to avoid deadlocks and lockdep complaints.
311 */
312 int
xrep_tempfile_iolock_polled(struct xfs_scrub * sc)313 xrep_tempfile_iolock_polled(
314 struct xfs_scrub *sc)
315 {
316 int error = 0;
317
318 while (!xrep_tempfile_iolock_nowait(sc)) {
319 if (xchk_should_terminate(sc, &error))
320 return error;
321 delay(1);
322 }
323
324 return 0;
325 }
326
327 /* Release IOLOCK_EXCL on the temporary file. */
328 void
xrep_tempfile_iounlock(struct xfs_scrub * sc)329 xrep_tempfile_iounlock(
330 struct xfs_scrub *sc)
331 {
332 xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
333 sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
334 }
335
336 /* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
337 void
xrep_tempfile_ilock(struct xfs_scrub * sc)338 xrep_tempfile_ilock(
339 struct xfs_scrub *sc)
340 {
341 sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
342 xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
343 }
344
345 /* Try to grab ILOCK_EXCL on the temporary file. */
346 bool
xrep_tempfile_ilock_nowait(struct xfs_scrub * sc)347 xrep_tempfile_ilock_nowait(
348 struct xfs_scrub *sc)
349 {
350 if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
351 sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
352 return true;
353 }
354
355 return false;
356 }
357
358 /* Unlock ILOCK_EXCL on the temporary file after an update. */
359 void
xrep_tempfile_iunlock(struct xfs_scrub * sc)360 xrep_tempfile_iunlock(
361 struct xfs_scrub *sc)
362 {
363 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
364 sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
365 }
366
367 /*
368 * Begin the process of making changes to both the file being scrubbed and
369 * the temporary file by taking ILOCK_EXCL on both.
370 */
371 void
xrep_tempfile_ilock_both(struct xfs_scrub * sc)372 xrep_tempfile_ilock_both(
373 struct xfs_scrub *sc)
374 {
375 xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
376 sc->ilock_flags |= XFS_ILOCK_EXCL;
377 sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
378 }
379
380 /* Unlock ILOCK_EXCL on both files. */
381 void
xrep_tempfile_iunlock_both(struct xfs_scrub * sc)382 xrep_tempfile_iunlock_both(
383 struct xfs_scrub *sc)
384 {
385 xrep_tempfile_iunlock(sc);
386 xchk_iunlock(sc, XFS_ILOCK_EXCL);
387 }
388
389 /* Release the temporary file. */
390 void
xrep_tempfile_rele(struct xfs_scrub * sc)391 xrep_tempfile_rele(
392 struct xfs_scrub *sc)
393 {
394 if (!sc->tempip)
395 return;
396
397 if (sc->temp_ilock_flags) {
398 xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
399 sc->temp_ilock_flags = 0;
400 }
401
402 xrep_tempfile_remove_metadir(sc);
403 xchk_irele(sc, sc->tempip);
404 sc->tempip = NULL;
405 }
406
407 /*
408 * Make sure that the given range of the data fork of the temporary file is
409 * mapped to written blocks. The caller must ensure that both inodes are
410 * joined to the transaction.
411 */
412 int
xrep_tempfile_prealloc(struct xfs_scrub * sc,xfs_fileoff_t off,xfs_filblks_t len)413 xrep_tempfile_prealloc(
414 struct xfs_scrub *sc,
415 xfs_fileoff_t off,
416 xfs_filblks_t len)
417 {
418 struct xfs_bmbt_irec map;
419 xfs_fileoff_t end = off + len;
420 int error;
421
422 ASSERT(sc->tempip != NULL);
423 ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
424
425 for (; off < end; off = map.br_startoff + map.br_blockcount) {
426 int nmaps = 1;
427
428 /*
429 * If we have a real extent mapping this block then we're
430 * in ok shape.
431 */
432 error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
433 XFS_DATA_FORK);
434 if (error)
435 return error;
436 if (nmaps == 0) {
437 ASSERT(nmaps != 0);
438 return -EFSCORRUPTED;
439 }
440
441 if (xfs_bmap_is_written_extent(&map))
442 continue;
443
444 /*
445 * If we find a delalloc reservation then something is very
446 * very wrong. Bail out.
447 */
448 if (map.br_startblock == DELAYSTARTBLOCK)
449 return -EFSCORRUPTED;
450
451 /*
452 * Make sure this block has a real zeroed extent allocated to
453 * it.
454 */
455 nmaps = 1;
456 error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
457 XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
458 &nmaps);
459 if (error)
460 return error;
461 if (nmaps != 1)
462 return -EFSCORRUPTED;
463
464 trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);
465
466 /* Commit new extent and all deferred work. */
467 error = xfs_defer_finish(&sc->tp);
468 if (error)
469 return error;
470 }
471
472 return 0;
473 }
474
475 /*
476 * Write data to each block of a file. The given range of the tempfile's data
477 * fork must already be populated with written extents.
478 */
479 int
xrep_tempfile_copyin(struct xfs_scrub * sc,xfs_fileoff_t off,xfs_filblks_t len,xrep_tempfile_copyin_fn prep_fn,void * data)480 xrep_tempfile_copyin(
481 struct xfs_scrub *sc,
482 xfs_fileoff_t off,
483 xfs_filblks_t len,
484 xrep_tempfile_copyin_fn prep_fn,
485 void *data)
486 {
487 LIST_HEAD(buffers_list);
488 struct xfs_mount *mp = sc->mp;
489 struct xfs_buf *bp;
490 xfs_fileoff_t flush_mask;
491 xfs_fileoff_t end = off + len;
492 loff_t pos = XFS_FSB_TO_B(mp, off);
493 int error = 0;
494
495 ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
496
497 /* Flush buffers to disk every 512K */
498 flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;
499
500 for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
501 struct xfs_bmbt_irec map;
502 int nmaps = 1;
503
504 /* Read block mapping for this file block. */
505 error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
506 if (error)
507 goto out_err;
508 if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
509 error = -EFSCORRUPTED;
510 goto out_err;
511 }
512
513 /* Get the metadata buffer for this offset in the file. */
514 error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
515 XFS_FSB_TO_DADDR(mp, map.br_startblock),
516 mp->m_bsize, 0, &bp);
517 if (error)
518 goto out_err;
519
520 trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);
521
522 /* Read in a block's worth of data from the xfile. */
523 error = prep_fn(sc, bp, data);
524 if (error) {
525 xfs_trans_brelse(sc->tp, bp);
526 goto out_err;
527 }
528
529 /* Queue buffer, and flush if we have too much dirty data. */
530 xfs_buf_delwri_queue_here(bp, &buffers_list);
531 xfs_trans_brelse(sc->tp, bp);
532
533 if (!(off & flush_mask)) {
534 error = xfs_buf_delwri_submit(&buffers_list);
535 if (error)
536 goto out_err;
537 }
538 }
539
540 /*
541 * Write the new blocks to disk. If the ordered list isn't empty after
542 * that, then something went wrong and we have to fail. This should
543 * never happen, but we'll check anyway.
544 */
545 error = xfs_buf_delwri_submit(&buffers_list);
546 if (error)
547 goto out_err;
548
549 if (!list_empty(&buffers_list)) {
550 ASSERT(list_empty(&buffers_list));
551 error = -EIO;
552 goto out_err;
553 }
554
555 return 0;
556
557 out_err:
558 xfs_buf_delwri_cancel(&buffers_list);
559 return error;
560 }
561
562 /*
563 * Set the temporary file's size. Caller must join the tempfile to the scrub
564 * transaction and is responsible for adjusting block mappings as needed.
565 */
566 int
xrep_tempfile_set_isize(struct xfs_scrub * sc,unsigned long long isize)567 xrep_tempfile_set_isize(
568 struct xfs_scrub *sc,
569 unsigned long long isize)
570 {
571 if (sc->tempip->i_disk_size == isize)
572 return 0;
573
574 sc->tempip->i_disk_size = isize;
575 i_size_write(VFS_I(sc->tempip), isize);
576 return xrep_tempfile_roll_trans(sc);
577 }
578
579 /*
580 * Roll a repair transaction involving the temporary file. Caller must join
581 * both the temporary file and the file being scrubbed to the transaction.
582 * This function return with both inodes joined to a new scrub transaction,
583 * or the usual negative errno.
584 */
585 int
xrep_tempfile_roll_trans(struct xfs_scrub * sc)586 xrep_tempfile_roll_trans(
587 struct xfs_scrub *sc)
588 {
589 int error;
590
591 xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
592 error = xrep_roll_trans(sc);
593 if (error)
594 return error;
595
596 xfs_trans_ijoin(sc->tp, sc->tempip, 0);
597 return 0;
598 }
599
600 /*
601 * Fill out the mapping exchange request in preparation for atomically
602 * committing the contents of a metadata file that we've rebuilt in the temp
603 * file.
604 */
605 STATIC int
xrep_tempexch_prep_request(struct xfs_scrub * sc,int whichfork,struct xrep_tempexch * tx)606 xrep_tempexch_prep_request(
607 struct xfs_scrub *sc,
608 int whichfork,
609 struct xrep_tempexch *tx)
610 {
611 struct xfs_exchmaps_req *req = &tx->req;
612
613 memset(tx, 0, sizeof(struct xrep_tempexch));
614
615 /* COW forks don't exist on disk. */
616 if (whichfork == XFS_COW_FORK) {
617 ASSERT(0);
618 return -EINVAL;
619 }
620
621 /* Both files should have the relevant forks. */
622 if (!xfs_ifork_ptr(sc->ip, whichfork) ||
623 !xfs_ifork_ptr(sc->tempip, whichfork)) {
624 ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
625 ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
626 return -EINVAL;
627 }
628
629 /* Exchange all mappings in both forks. */
630 req->ip1 = sc->tempip;
631 req->ip2 = sc->ip;
632 req->startoff1 = 0;
633 req->startoff2 = 0;
634 switch (whichfork) {
635 case XFS_ATTR_FORK:
636 req->flags |= XFS_EXCHMAPS_ATTR_FORK;
637 break;
638 case XFS_DATA_FORK:
639 /* Always exchange sizes when exchanging data fork mappings. */
640 req->flags |= XFS_EXCHMAPS_SET_SIZES;
641 break;
642 }
643 req->blockcount = XFS_MAX_FILEOFF;
644
645 return 0;
646 }
647
648 /*
649 * Fill out the mapping exchange resource estimation structures in preparation
650 * for exchanging the contents of a metadata file that we've rebuilt in the
651 * temp file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
652 */
653 STATIC int
xrep_tempexch_estimate(struct xfs_scrub * sc,struct xrep_tempexch * tx)654 xrep_tempexch_estimate(
655 struct xfs_scrub *sc,
656 struct xrep_tempexch *tx)
657 {
658 struct xfs_exchmaps_req *req = &tx->req;
659 struct xfs_ifork *ifp;
660 struct xfs_ifork *tifp;
661 int whichfork = xfs_exchmaps_reqfork(req);
662 int state = 0;
663
664 /*
665 * The exchmaps code only knows how to exchange file fork space
666 * mappings. Any fork data in local format must be promoted to a
667 * single block before the exchange can take place.
668 */
669 ifp = xfs_ifork_ptr(sc->ip, whichfork);
670 if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
671 state |= 1;
672
673 tifp = xfs_ifork_ptr(sc->tempip, whichfork);
674 if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
675 state |= 2;
676
677 switch (state) {
678 case 0:
679 /* Both files have mapped extents; use the regular estimate. */
680 return xfs_exchrange_estimate(req);
681 case 1:
682 /*
683 * The file being repaired is in local format, but the temp
684 * file has mapped extents. To perform the exchange, the file
685 * being repaired must have its shorform data converted to an
686 * ondisk block so that the forks will be in extents format.
687 * We need one resblk for the conversion; the number of
688 * exchanges is (worst case) the temporary file's extent count
689 * plus the block we converted.
690 */
691 req->ip1_bcount = sc->tempip->i_nblocks;
692 req->ip2_bcount = 1;
693 req->nr_exchanges = 1 + tifp->if_nextents;
694 req->resblks = 1;
695 break;
696 case 2:
697 /*
698 * The temporary file is in local format, but the file being
699 * repaired has mapped extents. To perform the exchange, the
700 * temp file must have its shortform data converted to an
701 * ondisk block, and the fork changed to extents format. We
702 * need one resblk for the conversion; the number of exchanges
703 * is (worst case) the extent count of the file being repaired
704 * plus the block we converted.
705 */
706 req->ip1_bcount = 1;
707 req->ip2_bcount = sc->ip->i_nblocks;
708 req->nr_exchanges = 1 + ifp->if_nextents;
709 req->resblks = 1;
710 break;
711 case 3:
712 /*
713 * Both forks are in local format. To perform the exchange,
714 * both files must have their shortform data converted to
715 * fsblocks, and both forks must be converted to extents
716 * format. We need two resblks for the two conversions, and
717 * the number of exchanges is 1 since there's only one block at
718 * fileoff 0. Presumably, the caller could not exchange the
719 * two inode fork areas directly.
720 */
721 req->ip1_bcount = 1;
722 req->ip2_bcount = 1;
723 req->nr_exchanges = 1;
724 req->resblks = 2;
725 break;
726 }
727
728 return xfs_exchmaps_estimate_overhead(req);
729 }
730
731 /*
732 * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
733 * this if quota enforcement is disabled or if both inodes' dquots are the
734 * same. The qretry structure must be initialized to zeroes before the first
735 * call to this function.
736 */
737 STATIC int
xrep_tempexch_reserve_quota(struct xfs_scrub * sc,const struct xrep_tempexch * tx)738 xrep_tempexch_reserve_quota(
739 struct xfs_scrub *sc,
740 const struct xrep_tempexch *tx)
741 {
742 struct xfs_trans *tp = sc->tp;
743 const struct xfs_exchmaps_req *req = &tx->req;
744 int64_t ddelta, rdelta;
745 int error;
746
747 /*
748 * Don't bother with a quota reservation if we're not enforcing them
749 * or the two inodes have the same dquots.
750 */
751 if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
752 (req->ip1->i_udquot == req->ip2->i_udquot &&
753 req->ip1->i_gdquot == req->ip2->i_gdquot &&
754 req->ip1->i_pdquot == req->ip2->i_pdquot))
755 return 0;
756
757 /*
758 * Quota reservation for each file comes from two sources. First, we
759 * need to account for any net gain in mapped blocks during the
760 * exchange. Second, we need reservation for the gross gain in mapped
761 * blocks so that we don't trip over any quota block reservation
762 * assertions. We must reserve the gross gain because the quota code
763 * subtracts from bcount the number of blocks that we unmap; it does
764 * not add that quantity back to the quota block reservation.
765 */
766 ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
767 rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
768 error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
769 ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
770 true);
771 if (error)
772 return error;
773
774 ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
775 rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
776 return xfs_trans_reserve_quota_nblks(tp, req->ip2,
777 ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
778 true);
779 }
780
781 /*
782 * Prepare an existing transaction for an atomic file contents exchange.
783 *
784 * This function fills out the mapping exchange request and resource estimation
785 * structures in preparation for exchanging the contents of a metadata file
786 * that has been rebuilt in the temp file. Next, it reserves space and quota
787 * for the transaction.
788 *
789 * The caller must hold ILOCK_EXCL of the scrub target file and the temporary
790 * file. The caller must join both inodes to the transaction with no unlock
791 * flags, and is responsible for dropping both ILOCKs when appropriate. Only
792 * use this when those ILOCKs cannot be dropped.
793 */
794 int
xrep_tempexch_trans_reserve(struct xfs_scrub * sc,int whichfork,struct xrep_tempexch * tx)795 xrep_tempexch_trans_reserve(
796 struct xfs_scrub *sc,
797 int whichfork,
798 struct xrep_tempexch *tx)
799 {
800 int error;
801
802 ASSERT(sc->tp != NULL);
803 xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL);
804 xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL);
805
806 error = xrep_tempexch_prep_request(sc, whichfork, tx);
807 if (error)
808 return error;
809
810 error = xfs_exchmaps_estimate(&tx->req);
811 if (error)
812 return error;
813
814 error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
815 if (error)
816 return error;
817
818 return xrep_tempexch_reserve_quota(sc, tx);
819 }
820
821 /*
822 * Create a new transaction for a file contents exchange.
823 *
824 * This function fills out the mapping excahange request and resource
825 * estimation structures in preparation for exchanging the contents of a
826 * metadata file that has been rebuilt in the temp file. Next, it reserves
827 * space, takes ILOCK_EXCL of both inodes, joins them to the transaction and
828 * reserves quota for the transaction.
829 *
830 * The caller is responsible for dropping both ILOCKs when appropriate.
831 */
832 int
xrep_tempexch_trans_alloc(struct xfs_scrub * sc,int whichfork,struct xrep_tempexch * tx)833 xrep_tempexch_trans_alloc(
834 struct xfs_scrub *sc,
835 int whichfork,
836 struct xrep_tempexch *tx)
837 {
838 unsigned int flags = 0;
839 int error;
840
841 ASSERT(sc->tp == NULL);
842 ASSERT(xfs_has_exchange_range(sc->mp));
843
844 error = xrep_tempexch_prep_request(sc, whichfork, tx);
845 if (error)
846 return error;
847
848 error = xrep_tempexch_estimate(sc, tx);
849 if (error)
850 return error;
851
852 if (xfs_has_lazysbcount(sc->mp))
853 flags |= XFS_TRANS_RES_FDBLKS;
854
855 error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
856 tx->req.resblks, 0, flags, &sc->tp);
857 if (error)
858 return error;
859
860 sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
861 sc->ilock_flags |= XFS_ILOCK_EXCL;
862 xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip);
863
864 return xrep_tempexch_reserve_quota(sc, tx);
865 }
866
867 /*
868 * Exchange file mappings (and hence file contents) between the file being
869 * repaired and the temporary file. Returns with both inodes locked and joined
870 * to a clean scrub transaction.
871 */
872 int
xrep_tempexch_contents(struct xfs_scrub * sc,struct xrep_tempexch * tx)873 xrep_tempexch_contents(
874 struct xfs_scrub *sc,
875 struct xrep_tempexch *tx)
876 {
877 int error;
878
879 ASSERT(xfs_has_exchange_range(sc->mp));
880
881 xfs_exchange_mappings(sc->tp, &tx->req);
882 error = xfs_defer_finish(&sc->tp);
883 if (error)
884 return error;
885
886 /*
887 * If we exchanged the ondisk sizes of two metadata files, we must
888 * exchanged the incore sizes as well.
889 */
890 if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) {
891 loff_t temp;
892
893 temp = i_size_read(VFS_I(sc->ip));
894 i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
895 i_size_write(VFS_I(sc->tempip), temp);
896 }
897
898 return 0;
899 }
900
901 /*
902 * Write local format data from one of the temporary file's forks into the same
903 * fork of file being repaired, and exchange the file sizes, if appropriate.
904 * Caller must ensure that the file being repaired has enough fork space to
905 * hold all the bytes.
906 */
907 void
xrep_tempfile_copyout_local(struct xfs_scrub * sc,int whichfork)908 xrep_tempfile_copyout_local(
909 struct xfs_scrub *sc,
910 int whichfork)
911 {
912 struct xfs_ifork *temp_ifp;
913 struct xfs_ifork *ifp;
914 unsigned int ilog_flags = XFS_ILOG_CORE;
915
916 temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
917 ifp = xfs_ifork_ptr(sc->ip, whichfork);
918
919 ASSERT(temp_ifp != NULL);
920 ASSERT(ifp != NULL);
921 ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
922 ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
923
924 switch (whichfork) {
925 case XFS_DATA_FORK:
926 ASSERT(sc->tempip->i_disk_size <=
927 xfs_inode_data_fork_size(sc->ip));
928 break;
929 case XFS_ATTR_FORK:
930 ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
931 break;
932 default:
933 ASSERT(0);
934 return;
935 }
936
937 /* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */
938 xfs_idestroy_fork(ifp);
939 xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data,
940 temp_ifp->if_bytes);
941
942 if (whichfork == XFS_DATA_FORK) {
943 i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
944 sc->ip->i_disk_size = sc->tempip->i_disk_size;
945 }
946
947 ilog_flags |= xfs_ilog_fdata(whichfork);
948 xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
949 }
950
951 /* Decide if a given XFS inode is a temporary file for a repair. */
952 bool
xrep_is_tempfile(const struct xfs_inode * ip)953 xrep_is_tempfile(
954 const struct xfs_inode *ip)
955 {
956 const struct inode *inode = &ip->i_vnode;
957 struct xfs_mount *mp = ip->i_mount;
958
959 /*
960 * Files in the metadata directory tree also have S_PRIVATE set and
961 * IOP_XATTR unset, so we must distinguish them separately. We (ab)use
962 * the IRECOVERY flag to mark temporary metadir inodes knowing that the
963 * end of log recovery clears IRECOVERY, so the only ones that can
964 * exist during online repair are the ones we create.
965 */
966 if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA))
967 return __xfs_iflags_test(ip, XFS_IRECOVERY);
968
969 if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
970 return true;
971
972 return false;
973 }
974