1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_defer.h"
14 #include "xfs_inode.h"
15 #include "xfs_trans.h"
16 #include "xfs_bmap.h"
17 #include "xfs_icache.h"
18 #include "xfs_quota.h"
19 #include "xfs_exchmaps.h"
20 #include "xfs_trace.h"
21 #include "xfs_bmap_btree.h"
22 #include "xfs_trans_space.h"
23 #include "xfs_error.h"
24 #include "xfs_errortag.h"
25 #include "xfs_health.h"
26 #include "xfs_exchmaps_item.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_attr_leaf.h"
30 #include "xfs_attr.h"
31 #include "xfs_dir2_priv.h"
32 #include "xfs_dir2.h"
33 #include "xfs_symlink_remote.h"
34
35 struct kmem_cache *xfs_exchmaps_intent_cache;
36
37 /* bmbt mappings adjacent to a pair of records. */
38 struct xfs_exchmaps_adjacent {
39 struct xfs_bmbt_irec left1;
40 struct xfs_bmbt_irec right1;
41 struct xfs_bmbt_irec left2;
42 struct xfs_bmbt_irec right2;
43 };
44
45 #define ADJACENT_INIT { \
46 .left1 = { .br_startblock = HOLESTARTBLOCK }, \
47 .right1 = { .br_startblock = HOLESTARTBLOCK }, \
48 .left2 = { .br_startblock = HOLESTARTBLOCK }, \
49 .right2 = { .br_startblock = HOLESTARTBLOCK }, \
50 }
51
52 /* Information to reset reflink flag / CoW fork state after an exchange. */
53
54 /*
55 * If the reflink flag is set on either inode, make sure it has an incore CoW
56 * fork, since all reflink inodes must have them. If there's a CoW fork and it
57 * has mappings in it, make sure the inodes are tagged appropriately so that
58 * speculative preallocations can be GC'd if we run low of space.
59 */
60 static inline void
xfs_exchmaps_ensure_cowfork(struct xfs_inode * ip)61 xfs_exchmaps_ensure_cowfork(
62 struct xfs_inode *ip)
63 {
64 struct xfs_ifork *cfork;
65
66 if (xfs_is_reflink_inode(ip))
67 xfs_ifork_init_cow(ip);
68
69 cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
70 if (!cfork)
71 return;
72 if (cfork->if_bytes > 0)
73 xfs_inode_set_cowblocks_tag(ip);
74 else
75 xfs_inode_clear_cowblocks_tag(ip);
76 }
77
78 /*
79 * Adjust the on-disk inode size upwards if needed so that we never add
80 * mappings into the file past EOF. This is crucial so that log recovery won't
81 * get confused by the sudden appearance of post-eof mappings.
82 */
83 STATIC void
xfs_exchmaps_update_size(struct xfs_trans * tp,struct xfs_inode * ip,struct xfs_bmbt_irec * imap,xfs_fsize_t new_isize)84 xfs_exchmaps_update_size(
85 struct xfs_trans *tp,
86 struct xfs_inode *ip,
87 struct xfs_bmbt_irec *imap,
88 xfs_fsize_t new_isize)
89 {
90 struct xfs_mount *mp = tp->t_mountp;
91 xfs_fsize_t len;
92
93 if (new_isize < 0)
94 return;
95
96 len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
97 new_isize);
98
99 if (len <= ip->i_disk_size)
100 return;
101
102 trace_xfs_exchmaps_update_inode_size(ip, len);
103
104 ip->i_disk_size = len;
105 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
106 }
107
108 /* Advance the incore state tracking after exchanging a mapping. */
109 static inline void
xmi_advance(struct xfs_exchmaps_intent * xmi,const struct xfs_bmbt_irec * irec)110 xmi_advance(
111 struct xfs_exchmaps_intent *xmi,
112 const struct xfs_bmbt_irec *irec)
113 {
114 xmi->xmi_startoff1 += irec->br_blockcount;
115 xmi->xmi_startoff2 += irec->br_blockcount;
116 xmi->xmi_blockcount -= irec->br_blockcount;
117 }
118
119 /* Do we still have more mappings to exchange? */
120 static inline bool
xmi_has_more_exchange_work(const struct xfs_exchmaps_intent * xmi)121 xmi_has_more_exchange_work(const struct xfs_exchmaps_intent *xmi)
122 {
123 return xmi->xmi_blockcount > 0;
124 }
125
126 /* Do we have post-operation cleanups to perform? */
127 static inline bool
xmi_has_postop_work(const struct xfs_exchmaps_intent * xmi)128 xmi_has_postop_work(const struct xfs_exchmaps_intent *xmi)
129 {
130 return xmi->xmi_flags & (XFS_EXCHMAPS_CLEAR_INO1_REFLINK |
131 XFS_EXCHMAPS_CLEAR_INO2_REFLINK |
132 __XFS_EXCHMAPS_INO2_SHORTFORM);
133 }
134
135 /* Check all mappings to make sure we can actually exchange them. */
136 int
xfs_exchmaps_check_forks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)137 xfs_exchmaps_check_forks(
138 struct xfs_mount *mp,
139 const struct xfs_exchmaps_req *req)
140 {
141 struct xfs_ifork *ifp1, *ifp2;
142 int whichfork = xfs_exchmaps_reqfork(req);
143
144 /* No fork? */
145 ifp1 = xfs_ifork_ptr(req->ip1, whichfork);
146 ifp2 = xfs_ifork_ptr(req->ip2, whichfork);
147 if (!ifp1 || !ifp2)
148 return -EINVAL;
149
150 /* We don't know how to exchange local format forks. */
151 if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
152 ifp2->if_format == XFS_DINODE_FMT_LOCAL)
153 return -EINVAL;
154
155 return 0;
156 }
157
158 #ifdef CONFIG_XFS_QUOTA
159 /* Log the actual updates to the quota accounting. */
160 static inline void
xfs_exchmaps_update_quota(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2)161 xfs_exchmaps_update_quota(
162 struct xfs_trans *tp,
163 struct xfs_exchmaps_intent *xmi,
164 struct xfs_bmbt_irec *irec1,
165 struct xfs_bmbt_irec *irec2)
166 {
167 int64_t ip1_delta = 0, ip2_delta = 0;
168 unsigned int qflag;
169
170 qflag = XFS_IS_REALTIME_INODE(xmi->xmi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
171 XFS_TRANS_DQ_BCOUNT;
172
173 if (xfs_bmap_is_real_extent(irec1)) {
174 ip1_delta -= irec1->br_blockcount;
175 ip2_delta += irec1->br_blockcount;
176 }
177
178 if (xfs_bmap_is_real_extent(irec2)) {
179 ip1_delta += irec2->br_blockcount;
180 ip2_delta -= irec2->br_blockcount;
181 }
182
183 xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip1, qflag, ip1_delta);
184 xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip2, qflag, ip2_delta);
185 }
186 #else
187 # define xfs_exchmaps_update_quota(tp, xmi, irec1, irec2) ((void)0)
188 #endif
189
190 /* Decide if we want to skip this mapping from file1. */
191 static inline bool
xfs_exchmaps_can_skip_mapping(struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec)192 xfs_exchmaps_can_skip_mapping(
193 struct xfs_exchmaps_intent *xmi,
194 struct xfs_bmbt_irec *irec)
195 {
196 struct xfs_mount *mp = xmi->xmi_ip1->i_mount;
197
198 /* Do not skip this mapping if the caller did not tell us to. */
199 if (!(xmi->xmi_flags & XFS_EXCHMAPS_INO1_WRITTEN))
200 return false;
201
202 /* Do not skip mapped, written mappings. */
203 if (xfs_bmap_is_written_extent(irec))
204 return false;
205
206 /*
207 * The mapping is unwritten or a hole. It cannot be a delalloc
208 * reservation because we already excluded those. It cannot be an
209 * unwritten extent with dirty page cache because we flushed the page
210 * cache. For files where the allocation unit is 1FSB (files on the
211 * data dev, rt files if the extent size is 1FSB), we can safely
212 * skip this mapping.
213 */
214 if (!xfs_inode_has_bigrtalloc(xmi->xmi_ip1))
215 return true;
216
217 /*
218 * For a realtime file with a multi-fsb allocation unit, the decision
219 * is trickier because we can only swap full allocation units.
220 * Unwritten mappings can appear in the middle of an rtx if the rtx is
221 * partially written, but they can also appear for preallocations.
222 *
223 * If the mapping is a hole, skip it entirely. Holes should align with
224 * rtx boundaries.
225 */
226 if (!xfs_bmap_is_real_extent(irec))
227 return true;
228
229 /*
230 * All mappings below this point are unwritten.
231 *
232 * - If the beginning is not aligned to an rtx, trim the end of the
233 * mapping so that it does not cross an rtx boundary, and swap it.
234 *
235 * - If both ends are aligned to an rtx, skip the entire mapping.
236 */
237 if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
238 xfs_fileoff_t new_end;
239
240 new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
241 irec->br_blockcount = min(irec->br_blockcount,
242 new_end - irec->br_startoff);
243 return false;
244 }
245 if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
246 return true;
247
248 /*
249 * All mappings below this point are unwritten, start on an rtx
250 * boundary, and do not end on an rtx boundary.
251 *
252 * - If the mapping is longer than one rtx, trim the end of the mapping
253 * down to an rtx boundary and skip it.
254 *
255 * - The mapping is shorter than one rtx. Swap it.
256 */
257 if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
258 xfs_fileoff_t new_end;
259
260 new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
261 mp->m_sb.sb_rextsize);
262 irec->br_blockcount = new_end - irec->br_startoff;
263 return true;
264 }
265
266 return false;
267 }
268
269 /*
270 * Walk forward through the file ranges in @xmi until we find two different
271 * mappings to exchange. If there is work to do, return the mappings;
272 * otherwise we've reached the end of the range and xmi_blockcount will be
273 * zero.
274 *
275 * If the walk skips over a pair of mappings to the same storage, save them as
276 * the left records in @adj (if provided) so that the simulation phase can
277 * avoid an extra lookup.
278 */
279 static int
xfs_exchmaps_find_mappings(struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2,struct xfs_exchmaps_adjacent * adj)280 xfs_exchmaps_find_mappings(
281 struct xfs_exchmaps_intent *xmi,
282 struct xfs_bmbt_irec *irec1,
283 struct xfs_bmbt_irec *irec2,
284 struct xfs_exchmaps_adjacent *adj)
285 {
286 int nimaps;
287 int bmap_flags;
288 int error;
289
290 bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_whichfork(xmi));
291
292 for (; xmi_has_more_exchange_work(xmi); xmi_advance(xmi, irec1)) {
293 /* Read mapping from the first file */
294 nimaps = 1;
295 error = xfs_bmapi_read(xmi->xmi_ip1, xmi->xmi_startoff1,
296 xmi->xmi_blockcount, irec1, &nimaps,
297 bmap_flags);
298 if (error)
299 return error;
300 if (nimaps != 1 ||
301 irec1->br_startblock == DELAYSTARTBLOCK ||
302 irec1->br_startoff != xmi->xmi_startoff1) {
303 /*
304 * We should never get no mapping or a delalloc mapping
305 * or something that doesn't match what we asked for,
306 * since the caller flushed both inodes and we hold the
307 * ILOCKs for both inodes.
308 */
309 ASSERT(0);
310 return -EINVAL;
311 }
312
313 if (xfs_exchmaps_can_skip_mapping(xmi, irec1)) {
314 trace_xfs_exchmaps_mapping1_skip(xmi->xmi_ip1, irec1);
315 continue;
316 }
317
318 /* Read mapping from the second file */
319 nimaps = 1;
320 error = xfs_bmapi_read(xmi->xmi_ip2, xmi->xmi_startoff2,
321 irec1->br_blockcount, irec2, &nimaps,
322 bmap_flags);
323 if (error)
324 return error;
325 if (nimaps != 1 ||
326 irec2->br_startblock == DELAYSTARTBLOCK ||
327 irec2->br_startoff != xmi->xmi_startoff2) {
328 /*
329 * We should never get no mapping or a delalloc mapping
330 * or something that doesn't match what we asked for,
331 * since the caller flushed both inodes and we hold the
332 * ILOCKs for both inodes.
333 */
334 ASSERT(0);
335 return -EINVAL;
336 }
337
338 /*
339 * We can only exchange as many blocks as the smaller of the
340 * two mapping maps.
341 */
342 irec1->br_blockcount = min(irec1->br_blockcount,
343 irec2->br_blockcount);
344
345 trace_xfs_exchmaps_mapping1(xmi->xmi_ip1, irec1);
346 trace_xfs_exchmaps_mapping2(xmi->xmi_ip2, irec2);
347
348 /* We found something to exchange, so return it. */
349 if (irec1->br_startblock != irec2->br_startblock)
350 return 0;
351
352 /*
353 * Two mappings pointing to the same physical block must not
354 * have different states; that's filesystem corruption. Move
355 * on to the next mapping if they're both holes or both point
356 * to the same physical space extent.
357 */
358 if (irec1->br_state != irec2->br_state) {
359 xfs_bmap_mark_sick(xmi->xmi_ip1,
360 xfs_exchmaps_whichfork(xmi));
361 xfs_bmap_mark_sick(xmi->xmi_ip2,
362 xfs_exchmaps_whichfork(xmi));
363 return -EFSCORRUPTED;
364 }
365
366 /*
367 * Save the mappings if we're estimating work and skipping
368 * these identical mappings.
369 */
370 if (adj) {
371 memcpy(&adj->left1, irec1, sizeof(*irec1));
372 memcpy(&adj->left2, irec2, sizeof(*irec2));
373 }
374 }
375
376 return 0;
377 }
378
379 /* Exchange these two mappings. */
380 static void
xfs_exchmaps_one_step(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2)381 xfs_exchmaps_one_step(
382 struct xfs_trans *tp,
383 struct xfs_exchmaps_intent *xmi,
384 struct xfs_bmbt_irec *irec1,
385 struct xfs_bmbt_irec *irec2)
386 {
387 int whichfork = xfs_exchmaps_whichfork(xmi);
388
389 xfs_exchmaps_update_quota(tp, xmi, irec1, irec2);
390
391 /* Remove both mappings. */
392 xfs_bmap_unmap_extent(tp, xmi->xmi_ip1, whichfork, irec1);
393 xfs_bmap_unmap_extent(tp, xmi->xmi_ip2, whichfork, irec2);
394
395 /*
396 * Re-add both mappings. We exchange the file offsets between the two
397 * maps and add the opposite map, which has the effect of filling the
398 * logical offsets we just unmapped, but with with the physical mapping
399 * information exchanged.
400 */
401 swap(irec1->br_startoff, irec2->br_startoff);
402 xfs_bmap_map_extent(tp, xmi->xmi_ip1, whichfork, irec2);
403 xfs_bmap_map_extent(tp, xmi->xmi_ip2, whichfork, irec1);
404
405 /* Make sure we're not adding mappings past EOF. */
406 if (whichfork == XFS_DATA_FORK) {
407 xfs_exchmaps_update_size(tp, xmi->xmi_ip1, irec2,
408 xmi->xmi_isize1);
409 xfs_exchmaps_update_size(tp, xmi->xmi_ip2, irec1,
410 xmi->xmi_isize2);
411 }
412
413 /*
414 * Advance our cursor and exit. The caller (either defer ops or log
415 * recovery) will log the XMD item, and if *blockcount is nonzero, it
416 * will log a new XMI item for the remainder and call us back.
417 */
418 xmi_advance(xmi, irec1);
419 }
420
421 /* Convert inode2's leaf attr fork back to shortform, if possible.. */
422 STATIC int
xfs_exchmaps_attr_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)423 xfs_exchmaps_attr_to_sf(
424 struct xfs_trans *tp,
425 struct xfs_exchmaps_intent *xmi)
426 {
427 struct xfs_da_args args = {
428 .dp = xmi->xmi_ip2,
429 .geo = tp->t_mountp->m_attr_geo,
430 .whichfork = XFS_ATTR_FORK,
431 .trans = tp,
432 .owner = xmi->xmi_ip2->i_ino,
433 };
434 struct xfs_buf *bp;
435 int forkoff;
436 int error;
437
438 if (!xfs_attr_is_leaf(xmi->xmi_ip2))
439 return 0;
440
441 error = xfs_attr3_leaf_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, 0,
442 &bp);
443 if (error)
444 return error;
445
446 forkoff = xfs_attr_shortform_allfit(bp, xmi->xmi_ip2);
447 if (forkoff == 0)
448 return 0;
449
450 return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
451 }
452
453 /* Convert inode2's block dir fork back to shortform, if possible.. */
454 STATIC int
xfs_exchmaps_dir_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)455 xfs_exchmaps_dir_to_sf(
456 struct xfs_trans *tp,
457 struct xfs_exchmaps_intent *xmi)
458 {
459 struct xfs_da_args args = {
460 .dp = xmi->xmi_ip2,
461 .geo = tp->t_mountp->m_dir_geo,
462 .whichfork = XFS_DATA_FORK,
463 .trans = tp,
464 .owner = xmi->xmi_ip2->i_ino,
465 };
466 struct xfs_dir2_sf_hdr sfh;
467 struct xfs_buf *bp;
468 int size;
469 int error = 0;
470
471 if (xfs_dir2_format(&args, &error) != XFS_DIR2_FMT_BLOCK)
472 return error;
473
474 error = xfs_dir3_block_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, &bp);
475 if (error)
476 return error;
477
478 size = xfs_dir2_block_sfsize(xmi->xmi_ip2, bp->b_addr, &sfh);
479 if (size > xfs_inode_data_fork_size(xmi->xmi_ip2))
480 return 0;
481
482 return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
483 }
484
485 /* Convert inode2's remote symlink target back to shortform, if possible. */
486 STATIC int
xfs_exchmaps_link_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)487 xfs_exchmaps_link_to_sf(
488 struct xfs_trans *tp,
489 struct xfs_exchmaps_intent *xmi)
490 {
491 struct xfs_inode *ip = xmi->xmi_ip2;
492 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
493 char *buf;
494 int error;
495
496 if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
497 ip->i_disk_size > xfs_inode_data_fork_size(ip))
498 return 0;
499
500 /* Read the current symlink target into a buffer. */
501 buf = kmalloc(ip->i_disk_size + 1,
502 GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
503 if (!buf) {
504 ASSERT(0);
505 return -ENOMEM;
506 }
507
508 error = xfs_symlink_remote_read(ip, buf);
509 if (error)
510 goto free;
511
512 /* Remove the blocks. */
513 error = xfs_symlink_remote_truncate(tp, ip);
514 if (error)
515 goto free;
516
517 /* Convert fork to local format and log our changes. */
518 xfs_idestroy_fork(ifp);
519 ifp->if_bytes = 0;
520 ifp->if_format = XFS_DINODE_FMT_LOCAL;
521 xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
522 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
523 free:
524 kfree(buf);
525 return error;
526 }
527
528 /* Clear the reflink flag after an exchange. */
529 static inline void
xfs_exchmaps_clear_reflink(struct xfs_trans * tp,struct xfs_inode * ip)530 xfs_exchmaps_clear_reflink(
531 struct xfs_trans *tp,
532 struct xfs_inode *ip)
533 {
534 trace_xfs_reflink_unset_inode_flag(ip);
535
536 ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
537 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
538 }
539
540 /* Finish whatever work might come after an exchange operation. */
541 static int
xfs_exchmaps_do_postop_work(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)542 xfs_exchmaps_do_postop_work(
543 struct xfs_trans *tp,
544 struct xfs_exchmaps_intent *xmi)
545 {
546 if (xmi->xmi_flags & __XFS_EXCHMAPS_INO2_SHORTFORM) {
547 int error = 0;
548
549 if (xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)
550 error = xfs_exchmaps_attr_to_sf(tp, xmi);
551 else if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode))
552 error = xfs_exchmaps_dir_to_sf(tp, xmi);
553 else if (S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
554 error = xfs_exchmaps_link_to_sf(tp, xmi);
555 xmi->xmi_flags &= ~__XFS_EXCHMAPS_INO2_SHORTFORM;
556 if (error)
557 return error;
558 }
559
560 if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO1_REFLINK) {
561 xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip1);
562 xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
563 }
564
565 if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO2_REFLINK) {
566 xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip2);
567 xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
568 }
569
570 return 0;
571 }
572
573 /* Finish one step in a mapping exchange operation, possibly relogging. */
574 int
xfs_exchmaps_finish_one(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)575 xfs_exchmaps_finish_one(
576 struct xfs_trans *tp,
577 struct xfs_exchmaps_intent *xmi)
578 {
579 struct xfs_bmbt_irec irec1, irec2;
580 int error;
581
582 if (xmi_has_more_exchange_work(xmi)) {
583 /*
584 * If the operation state says that some range of the files
585 * have not yet been exchanged, look for mappings in that range
586 * to exchange. If we find some mappings, exchange them.
587 */
588 error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, NULL);
589 if (error)
590 return error;
591
592 if (xmi_has_more_exchange_work(xmi))
593 xfs_exchmaps_one_step(tp, xmi, &irec1, &irec2);
594
595 /*
596 * If the caller asked us to exchange the file sizes after the
597 * exchange and either we just exchanged the last mappings in
598 * the range or we didn't find anything to exchange, update the
599 * ondisk file sizes.
600 */
601 if ((xmi->xmi_flags & XFS_EXCHMAPS_SET_SIZES) &&
602 !xmi_has_more_exchange_work(xmi)) {
603 xmi->xmi_ip1->i_disk_size = xmi->xmi_isize1;
604 xmi->xmi_ip2->i_disk_size = xmi->xmi_isize2;
605
606 xfs_trans_log_inode(tp, xmi->xmi_ip1, XFS_ILOG_CORE);
607 xfs_trans_log_inode(tp, xmi->xmi_ip2, XFS_ILOG_CORE);
608 }
609 } else if (xmi_has_postop_work(xmi)) {
610 /*
611 * Now that we're finished with the exchange operation,
612 * complete the post-op cleanup work.
613 */
614 error = xfs_exchmaps_do_postop_work(tp, xmi);
615 if (error)
616 return error;
617 }
618
619 if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
620 return -EIO;
621
622 /* If we still have work to do, ask for a new transaction. */
623 if (xmi_has_more_exchange_work(xmi) || xmi_has_postop_work(xmi)) {
624 trace_xfs_exchmaps_defer(tp->t_mountp, xmi);
625 return -EAGAIN;
626 }
627
628 /*
629 * If we reach here, we've finished all the exchange work and the post
630 * operation work. The last thing we need to do before returning to
631 * the caller is to make sure that COW forks are set up correctly.
632 */
633 if (!(xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)) {
634 xfs_exchmaps_ensure_cowfork(xmi->xmi_ip1);
635 xfs_exchmaps_ensure_cowfork(xmi->xmi_ip2);
636 }
637
638 return 0;
639 }
640
641 /*
642 * Compute the amount of bmbt blocks we should reserve for each file. In the
643 * worst case, each exchange will fill a hole with a new mapping, which could
644 * result in a btree split every time we add a new leaf block.
645 */
646 static inline uint64_t
xfs_exchmaps_bmbt_blocks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)647 xfs_exchmaps_bmbt_blocks(
648 struct xfs_mount *mp,
649 const struct xfs_exchmaps_req *req)
650 {
651 return howmany_64(req->nr_exchanges,
652 XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
653 XFS_EXTENTADD_SPACE_RES(mp, xfs_exchmaps_reqfork(req));
654 }
655
656 /* Compute the space we should reserve for the rmap btree expansions. */
657 static inline uint64_t
xfs_exchmaps_rmapbt_blocks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)658 xfs_exchmaps_rmapbt_blocks(
659 struct xfs_mount *mp,
660 const struct xfs_exchmaps_req *req)
661 {
662 if (!xfs_has_rmapbt(mp))
663 return 0;
664 if (XFS_IS_REALTIME_INODE(req->ip1))
665 return howmany_64(req->nr_exchanges,
666 XFS_MAX_CONTIG_RTRMAPS_PER_BLOCK(mp)) *
667 XFS_RTRMAPADD_SPACE_RES(mp);
668
669 return howmany_64(req->nr_exchanges,
670 XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
671 XFS_RMAPADD_SPACE_RES(mp);
672 }
673
674 /* Estimate the bmbt and rmapbt overhead required to exchange mappings. */
675 int
xfs_exchmaps_estimate_overhead(struct xfs_exchmaps_req * req)676 xfs_exchmaps_estimate_overhead(
677 struct xfs_exchmaps_req *req)
678 {
679 struct xfs_mount *mp = req->ip1->i_mount;
680 xfs_filblks_t bmbt_blocks;
681 xfs_filblks_t rmapbt_blocks;
682 xfs_filblks_t resblks = req->resblks;
683
684 /*
685 * Compute the number of bmbt and rmapbt blocks we might need to handle
686 * the estimated number of exchanges.
687 */
688 bmbt_blocks = xfs_exchmaps_bmbt_blocks(mp, req);
689 rmapbt_blocks = xfs_exchmaps_rmapbt_blocks(mp, req);
690
691 trace_xfs_exchmaps_overhead(mp, bmbt_blocks, rmapbt_blocks);
692
693 /* Make sure the change in file block count doesn't overflow. */
694 if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
695 return -EFBIG;
696 if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
697 return -EFBIG;
698
699 /*
700 * Add together the number of blocks we need to handle btree growth,
701 * then add it to the number of blocks we need to reserve to this
702 * transaction.
703 */
704 if (check_add_overflow(resblks, bmbt_blocks, &resblks))
705 return -ENOSPC;
706 if (check_add_overflow(resblks, bmbt_blocks, &resblks))
707 return -ENOSPC;
708 if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
709 return -ENOSPC;
710 if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
711 return -ENOSPC;
712
713 /* Can't actually reserve more than UINT_MAX blocks. */
714 if (req->resblks > UINT_MAX)
715 return -ENOSPC;
716
717 req->resblks = resblks;
718 trace_xfs_exchmaps_final_estimate(req);
719 return 0;
720 }
721
722 /* Decide if we can merge two real mappings. */
723 static inline bool
xmi_can_merge(const struct xfs_bmbt_irec * b1,const struct xfs_bmbt_irec * b2)724 xmi_can_merge(
725 const struct xfs_bmbt_irec *b1,
726 const struct xfs_bmbt_irec *b2)
727 {
728 /* Don't merge holes. */
729 if (b1->br_startblock == HOLESTARTBLOCK ||
730 b2->br_startblock == HOLESTARTBLOCK)
731 return false;
732
733 /* We don't merge holes. */
734 if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
735 return false;
736
737 if (b1->br_startoff + b1->br_blockcount == b2->br_startoff &&
738 b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
739 b1->br_state == b2->br_state &&
740 b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
741 return true;
742
743 return false;
744 }
745
746 /*
747 * Decide if we can merge three mappings. Caller must ensure all three
748 * mappings must not be holes or delalloc reservations.
749 */
750 static inline bool
xmi_can_merge_all(const struct xfs_bmbt_irec * l,const struct xfs_bmbt_irec * m,const struct xfs_bmbt_irec * r)751 xmi_can_merge_all(
752 const struct xfs_bmbt_irec *l,
753 const struct xfs_bmbt_irec *m,
754 const struct xfs_bmbt_irec *r)
755 {
756 xfs_filblks_t new_len;
757
758 new_len = l->br_blockcount + m->br_blockcount + r->br_blockcount;
759 return new_len <= XFS_MAX_BMBT_EXTLEN;
760 }
761
762 #define CLEFT_CONTIG 0x01
763 #define CRIGHT_CONTIG 0x02
764 #define CHOLE 0x04
765 #define CBOTH_CONTIG (CLEFT_CONTIG | CRIGHT_CONTIG)
766
767 #define NLEFT_CONTIG 0x10
768 #define NRIGHT_CONTIG 0x20
769 #define NHOLE 0x40
770 #define NBOTH_CONTIG (NLEFT_CONTIG | NRIGHT_CONTIG)
771
772 /* Estimate the effect of a single exchange on mapping count. */
773 static inline int
xmi_delta_nextents_step(struct xfs_mount * mp,const struct xfs_bmbt_irec * left,const struct xfs_bmbt_irec * curr,const struct xfs_bmbt_irec * new,const struct xfs_bmbt_irec * right)774 xmi_delta_nextents_step(
775 struct xfs_mount *mp,
776 const struct xfs_bmbt_irec *left,
777 const struct xfs_bmbt_irec *curr,
778 const struct xfs_bmbt_irec *new,
779 const struct xfs_bmbt_irec *right)
780 {
781 bool lhole, rhole, chole, nhole;
782 unsigned int state = 0;
783 int ret = 0;
784
785 lhole = left->br_startblock == HOLESTARTBLOCK;
786 rhole = right->br_startblock == HOLESTARTBLOCK;
787 chole = curr->br_startblock == HOLESTARTBLOCK;
788 nhole = new->br_startblock == HOLESTARTBLOCK;
789
790 if (chole)
791 state |= CHOLE;
792 if (!lhole && !chole && xmi_can_merge(left, curr))
793 state |= CLEFT_CONTIG;
794 if (!rhole && !chole && xmi_can_merge(curr, right))
795 state |= CRIGHT_CONTIG;
796 if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
797 !xmi_can_merge_all(left, curr, right))
798 state &= ~CRIGHT_CONTIG;
799
800 if (nhole)
801 state |= NHOLE;
802 if (!lhole && !nhole && xmi_can_merge(left, new))
803 state |= NLEFT_CONTIG;
804 if (!rhole && !nhole && xmi_can_merge(new, right))
805 state |= NRIGHT_CONTIG;
806 if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
807 !xmi_can_merge_all(left, new, right))
808 state &= ~NRIGHT_CONTIG;
809
810 switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
811 case CLEFT_CONTIG | CRIGHT_CONTIG:
812 /*
813 * left/curr/right are the same mapping, so deleting curr
814 * causes 2 new mappings to be created.
815 */
816 ret += 2;
817 break;
818 case 0:
819 /*
820 * curr is not contiguous with any mapping, so we remove curr
821 * completely
822 */
823 ret--;
824 break;
825 case CHOLE:
826 /* hole, do nothing */
827 break;
828 case CLEFT_CONTIG:
829 case CRIGHT_CONTIG:
830 /* trim either left or right, no change */
831 break;
832 }
833
834 switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
835 case NLEFT_CONTIG | NRIGHT_CONTIG:
836 /*
837 * left/curr/right will become the same mapping, so adding
838 * curr causes the deletion of right.
839 */
840 ret--;
841 break;
842 case 0:
843 /* new is not contiguous with any mapping */
844 ret++;
845 break;
846 case NHOLE:
847 /* hole, do nothing. */
848 break;
849 case NLEFT_CONTIG:
850 case NRIGHT_CONTIG:
851 /* new is absorbed into left or right, no change */
852 break;
853 }
854
855 trace_xfs_exchmaps_delta_nextents_step(mp, left, curr, new, right, ret,
856 state);
857 return ret;
858 }
859
860 /* Make sure we don't overflow the extent (mapping) counters. */
861 static inline int
xmi_ensure_delta_nextents(struct xfs_exchmaps_req * req,struct xfs_inode * ip,int64_t delta)862 xmi_ensure_delta_nextents(
863 struct xfs_exchmaps_req *req,
864 struct xfs_inode *ip,
865 int64_t delta)
866 {
867 struct xfs_mount *mp = ip->i_mount;
868 int whichfork = xfs_exchmaps_reqfork(req);
869 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
870 uint64_t new_nextents;
871 xfs_extnum_t max_nextents;
872
873 if (delta < 0)
874 return 0;
875
876 /*
877 * It's always an error if the delta causes integer overflow. delta
878 * needs an explicit cast here to avoid warnings about implicit casts
879 * coded into the overflow check.
880 */
881 if (check_add_overflow(ifp->if_nextents, (uint64_t)delta,
882 &new_nextents))
883 return -EFBIG;
884
885 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
886 new_nextents > 10)
887 return -EFBIG;
888
889 /*
890 * We always promote both inodes to have large extent counts if the
891 * superblock feature is enabled, so we only need to check against the
892 * theoretical maximum.
893 */
894 max_nextents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
895 whichfork);
896 if (new_nextents > max_nextents)
897 return -EFBIG;
898
899 return 0;
900 }
901
902 /* Find the next mapping after irec. */
903 static inline int
xmi_next(struct xfs_inode * ip,int bmap_flags,const struct xfs_bmbt_irec * irec,struct xfs_bmbt_irec * nrec)904 xmi_next(
905 struct xfs_inode *ip,
906 int bmap_flags,
907 const struct xfs_bmbt_irec *irec,
908 struct xfs_bmbt_irec *nrec)
909 {
910 xfs_fileoff_t off;
911 xfs_filblks_t blockcount;
912 int nimaps = 1;
913 int error;
914
915 off = irec->br_startoff + irec->br_blockcount;
916 blockcount = XFS_MAX_FILEOFF - off;
917 error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
918 if (error)
919 return error;
920 if (nrec->br_startblock == DELAYSTARTBLOCK ||
921 nrec->br_startoff != off) {
922 /*
923 * If we don't get the mapping we want, return a zero-length
924 * mapping, which our estimator function will pretend is a hole.
925 * We shouldn't get delalloc reservations.
926 */
927 nrec->br_startblock = HOLESTARTBLOCK;
928 }
929
930 return 0;
931 }
932
933 int __init
xfs_exchmaps_intent_init_cache(void)934 xfs_exchmaps_intent_init_cache(void)
935 {
936 xfs_exchmaps_intent_cache = kmem_cache_create("xfs_exchmaps_intent",
937 sizeof(struct xfs_exchmaps_intent),
938 0, 0, NULL);
939
940 return xfs_exchmaps_intent_cache != NULL ? 0 : -ENOMEM;
941 }
942
943 void
xfs_exchmaps_intent_destroy_cache(void)944 xfs_exchmaps_intent_destroy_cache(void)
945 {
946 kmem_cache_destroy(xfs_exchmaps_intent_cache);
947 xfs_exchmaps_intent_cache = NULL;
948 }
949
950 /*
951 * Decide if we will exchange the reflink flags between the two files after the
952 * exchange. The only time we want to do this is if we're exchanging all
953 * mappings under EOF and the inode reflink flags have different states.
954 */
955 static inline bool
xmi_can_exchange_reflink_flags(const struct xfs_exchmaps_req * req,unsigned int reflink_state)956 xmi_can_exchange_reflink_flags(
957 const struct xfs_exchmaps_req *req,
958 unsigned int reflink_state)
959 {
960 struct xfs_mount *mp = req->ip1->i_mount;
961
962 if (hweight32(reflink_state) != 1)
963 return false;
964 if (req->startoff1 != 0 || req->startoff2 != 0)
965 return false;
966 if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
967 return false;
968 if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
969 return false;
970 return true;
971 }
972
973
974 /* Allocate and initialize a new incore intent item from a request. */
975 struct xfs_exchmaps_intent *
xfs_exchmaps_init_intent(const struct xfs_exchmaps_req * req)976 xfs_exchmaps_init_intent(
977 const struct xfs_exchmaps_req *req)
978 {
979 struct xfs_exchmaps_intent *xmi;
980 unsigned int rs = 0;
981
982 xmi = kmem_cache_zalloc(xfs_exchmaps_intent_cache,
983 GFP_NOFS | __GFP_NOFAIL);
984 INIT_LIST_HEAD(&xmi->xmi_list);
985 xmi->xmi_ip1 = req->ip1;
986 xmi->xmi_ip2 = req->ip2;
987 xmi->xmi_startoff1 = req->startoff1;
988 xmi->xmi_startoff2 = req->startoff2;
989 xmi->xmi_blockcount = req->blockcount;
990 xmi->xmi_isize1 = xmi->xmi_isize2 = -1;
991 xmi->xmi_flags = req->flags & XFS_EXCHMAPS_PARAMS;
992
993 if (xfs_exchmaps_whichfork(xmi) == XFS_ATTR_FORK) {
994 xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
995 return xmi;
996 }
997
998 if (req->flags & XFS_EXCHMAPS_SET_SIZES) {
999 xmi->xmi_flags |= XFS_EXCHMAPS_SET_SIZES;
1000 xmi->xmi_isize1 = req->ip2->i_disk_size;
1001 xmi->xmi_isize2 = req->ip1->i_disk_size;
1002 }
1003
1004 /* Record the state of each inode's reflink flag before the op. */
1005 if (xfs_is_reflink_inode(req->ip1))
1006 rs |= 1;
1007 if (xfs_is_reflink_inode(req->ip2))
1008 rs |= 2;
1009
1010 /*
1011 * Figure out if we're clearing the reflink flags (which effectively
1012 * exchanges them) after the operation.
1013 */
1014 if (xmi_can_exchange_reflink_flags(req, rs)) {
1015 if (rs & 1)
1016 xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
1017 if (rs & 2)
1018 xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
1019 }
1020
1021 if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode) ||
1022 S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
1023 xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
1024
1025 return xmi;
1026 }
1027
1028 /*
1029 * Estimate the number of exchange operations and the number of file blocks
1030 * in each file that will be affected by the exchange operation.
1031 */
1032 int
xfs_exchmaps_estimate(struct xfs_exchmaps_req * req)1033 xfs_exchmaps_estimate(
1034 struct xfs_exchmaps_req *req)
1035 {
1036 struct xfs_exchmaps_intent *xmi;
1037 struct xfs_bmbt_irec irec1, irec2;
1038 struct xfs_exchmaps_adjacent adj = ADJACENT_INIT;
1039 xfs_filblks_t ip1_blocks = 0, ip2_blocks = 0;
1040 int64_t d_nexts1, d_nexts2;
1041 int bmap_flags;
1042 int error;
1043
1044 ASSERT(!(req->flags & ~XFS_EXCHMAPS_PARAMS));
1045
1046 bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_reqfork(req));
1047 xmi = xfs_exchmaps_init_intent(req);
1048
1049 /*
1050 * To guard against the possibility of overflowing the extent counters,
1051 * we have to estimate an upper bound on the potential increase in that
1052 * counter. We can split the mapping at each end of the range, and for
1053 * each step of the exchange we can split the mapping that we're
1054 * working on if the mappings do not align.
1055 */
1056 d_nexts1 = d_nexts2 = 3;
1057
1058 while (xmi_has_more_exchange_work(xmi)) {
1059 /*
1060 * Walk through the file ranges until we find something to
1061 * exchange. Because we're simulating the exchange, pass in
1062 * adj to capture skipped mappings for correct estimation of
1063 * bmbt record merges.
1064 */
1065 error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, &adj);
1066 if (error)
1067 goto out_free;
1068 if (!xmi_has_more_exchange_work(xmi))
1069 break;
1070
1071 /* Update accounting. */
1072 if (xfs_bmap_is_real_extent(&irec1))
1073 ip1_blocks += irec1.br_blockcount;
1074 if (xfs_bmap_is_real_extent(&irec2))
1075 ip2_blocks += irec2.br_blockcount;
1076 req->nr_exchanges++;
1077
1078 /* Read the next mappings from both files. */
1079 error = xmi_next(req->ip1, bmap_flags, &irec1, &adj.right1);
1080 if (error)
1081 goto out_free;
1082
1083 error = xmi_next(req->ip2, bmap_flags, &irec2, &adj.right2);
1084 if (error)
1085 goto out_free;
1086
1087 /* Update extent count deltas. */
1088 d_nexts1 += xmi_delta_nextents_step(req->ip1->i_mount,
1089 &adj.left1, &irec1, &irec2, &adj.right1);
1090
1091 d_nexts2 += xmi_delta_nextents_step(req->ip1->i_mount,
1092 &adj.left2, &irec2, &irec1, &adj.right2);
1093
1094 /* Now pretend we exchanged the mappings. */
1095 if (xmi_can_merge(&adj.left2, &irec1))
1096 adj.left2.br_blockcount += irec1.br_blockcount;
1097 else
1098 memcpy(&adj.left2, &irec1, sizeof(irec1));
1099
1100 if (xmi_can_merge(&adj.left1, &irec2))
1101 adj.left1.br_blockcount += irec2.br_blockcount;
1102 else
1103 memcpy(&adj.left1, &irec2, sizeof(irec2));
1104
1105 xmi_advance(xmi, &irec1);
1106 }
1107
1108 /* Account for the blocks that are being exchanged. */
1109 if (XFS_IS_REALTIME_INODE(req->ip1) &&
1110 xfs_exchmaps_reqfork(req) == XFS_DATA_FORK) {
1111 req->ip1_rtbcount = ip1_blocks;
1112 req->ip2_rtbcount = ip2_blocks;
1113 } else {
1114 req->ip1_bcount = ip1_blocks;
1115 req->ip2_bcount = ip2_blocks;
1116 }
1117
1118 /*
1119 * Make sure that both forks have enough slack left in their extent
1120 * counters that the exchange operation will not overflow.
1121 */
1122 trace_xfs_exchmaps_delta_nextents(req, d_nexts1, d_nexts2);
1123 if (req->ip1 == req->ip2) {
1124 error = xmi_ensure_delta_nextents(req, req->ip1,
1125 d_nexts1 + d_nexts2);
1126 } else {
1127 error = xmi_ensure_delta_nextents(req, req->ip1, d_nexts1);
1128 if (error)
1129 goto out_free;
1130 error = xmi_ensure_delta_nextents(req, req->ip2, d_nexts2);
1131 }
1132 if (error)
1133 goto out_free;
1134
1135 trace_xfs_exchmaps_initial_estimate(req);
1136 error = xfs_exchmaps_estimate_overhead(req);
1137 out_free:
1138 kmem_cache_free(xfs_exchmaps_intent_cache, xmi);
1139 return error;
1140 }
1141
1142 /* Set the reflink flag before an operation. */
1143 static inline void
xfs_exchmaps_set_reflink(struct xfs_trans * tp,struct xfs_inode * ip)1144 xfs_exchmaps_set_reflink(
1145 struct xfs_trans *tp,
1146 struct xfs_inode *ip)
1147 {
1148 trace_xfs_reflink_set_inode_flag(ip);
1149
1150 ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
1151 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1152 }
1153
1154 /*
1155 * If either file has shared blocks and we're exchanging data forks, we must
1156 * flag the other file as having shared blocks so that we get the shared-block
1157 * rmap functions if we need to fix up the rmaps.
1158 */
1159 void
xfs_exchmaps_ensure_reflink(struct xfs_trans * tp,const struct xfs_exchmaps_intent * xmi)1160 xfs_exchmaps_ensure_reflink(
1161 struct xfs_trans *tp,
1162 const struct xfs_exchmaps_intent *xmi)
1163 {
1164 unsigned int rs = 0;
1165
1166 if (xfs_is_reflink_inode(xmi->xmi_ip1))
1167 rs |= 1;
1168 if (xfs_is_reflink_inode(xmi->xmi_ip2))
1169 rs |= 2;
1170
1171 if ((rs & 1) && !xfs_is_reflink_inode(xmi->xmi_ip2))
1172 xfs_exchmaps_set_reflink(tp, xmi->xmi_ip2);
1173
1174 if ((rs & 2) && !xfs_is_reflink_inode(xmi->xmi_ip1))
1175 xfs_exchmaps_set_reflink(tp, xmi->xmi_ip1);
1176 }
1177
1178 /* Set the large extent count flag before an operation if needed. */
1179 static inline void
xfs_exchmaps_ensure_large_extent_counts(struct xfs_trans * tp,struct xfs_inode * ip)1180 xfs_exchmaps_ensure_large_extent_counts(
1181 struct xfs_trans *tp,
1182 struct xfs_inode *ip)
1183 {
1184 if (xfs_inode_has_large_extent_counts(ip))
1185 return;
1186
1187 ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1188 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1189 }
1190
1191 /* Widen the extent counter fields of both inodes if necessary. */
1192 void
xfs_exchmaps_upgrade_extent_counts(struct xfs_trans * tp,const struct xfs_exchmaps_intent * xmi)1193 xfs_exchmaps_upgrade_extent_counts(
1194 struct xfs_trans *tp,
1195 const struct xfs_exchmaps_intent *xmi)
1196 {
1197 if (!xfs_has_large_extent_counts(tp->t_mountp))
1198 return;
1199
1200 xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip1);
1201 xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip2);
1202 }
1203
1204 /*
1205 * Schedule an exchange a range of mappings from one inode to another.
1206 *
1207 * The use of file mapping exchange log intent items ensures the operation can
1208 * be resumed even if the system goes down. The caller must commit the
1209 * transaction to start the work.
1210 *
1211 * The caller must ensure the inodes must be joined to the transaction and
1212 * ILOCKd; they will still be joined to the transaction at exit.
1213 */
1214 void
xfs_exchange_mappings(struct xfs_trans * tp,const struct xfs_exchmaps_req * req)1215 xfs_exchange_mappings(
1216 struct xfs_trans *tp,
1217 const struct xfs_exchmaps_req *req)
1218 {
1219 struct xfs_exchmaps_intent *xmi;
1220
1221 BUILD_BUG_ON(XFS_EXCHMAPS_INTERNAL_FLAGS & XFS_EXCHMAPS_LOGGED_FLAGS);
1222
1223 xfs_assert_ilocked(req->ip1, XFS_ILOCK_EXCL);
1224 xfs_assert_ilocked(req->ip2, XFS_ILOCK_EXCL);
1225 ASSERT(!(req->flags & ~XFS_EXCHMAPS_LOGGED_FLAGS));
1226 if (req->flags & XFS_EXCHMAPS_SET_SIZES)
1227 ASSERT(!(req->flags & XFS_EXCHMAPS_ATTR_FORK));
1228 ASSERT(xfs_has_exchange_range(tp->t_mountp));
1229
1230 if (req->blockcount == 0)
1231 return;
1232
1233 xmi = xfs_exchmaps_init_intent(req);
1234 xfs_exchmaps_defer_add(tp, xmi);
1235 xfs_exchmaps_ensure_reflink(tp, xmi);
1236 xfs_exchmaps_upgrade_extent_counts(tp, xmi);
1237 }
1238