xref: /linux/fs/xfs/libxfs/xfs_exchmaps.c (revision 1e58a8ccf2597c9259a8e71a2bffac5e11e12ea0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_defer.h"
14 #include "xfs_inode.h"
15 #include "xfs_trans.h"
16 #include "xfs_bmap.h"
17 #include "xfs_icache.h"
18 #include "xfs_quota.h"
19 #include "xfs_exchmaps.h"
20 #include "xfs_trace.h"
21 #include "xfs_bmap_btree.h"
22 #include "xfs_trans_space.h"
23 #include "xfs_error.h"
24 #include "xfs_errortag.h"
25 #include "xfs_health.h"
26 #include "xfs_exchmaps_item.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_attr_leaf.h"
30 #include "xfs_attr.h"
31 #include "xfs_dir2_priv.h"
32 #include "xfs_dir2.h"
33 #include "xfs_symlink_remote.h"
34 
35 struct kmem_cache	*xfs_exchmaps_intent_cache;
36 
37 /* bmbt mappings adjacent to a pair of records. */
38 struct xfs_exchmaps_adjacent {
39 	struct xfs_bmbt_irec		left1;
40 	struct xfs_bmbt_irec		right1;
41 	struct xfs_bmbt_irec		left2;
42 	struct xfs_bmbt_irec		right2;
43 };
44 
45 #define ADJACENT_INIT { \
46 	.left1  = { .br_startblock = HOLESTARTBLOCK }, \
47 	.right1 = { .br_startblock = HOLESTARTBLOCK }, \
48 	.left2  = { .br_startblock = HOLESTARTBLOCK }, \
49 	.right2 = { .br_startblock = HOLESTARTBLOCK }, \
50 }
51 
52 /* Information to reset reflink flag / CoW fork state after an exchange. */
53 
54 /*
55  * If the reflink flag is set on either inode, make sure it has an incore CoW
56  * fork, since all reflink inodes must have them.  If there's a CoW fork and it
57  * has mappings in it, make sure the inodes are tagged appropriately so that
58  * speculative preallocations can be GC'd if we run low of space.
59  */
60 static inline void
61 xfs_exchmaps_ensure_cowfork(
62 	struct xfs_inode	*ip)
63 {
64 	struct xfs_ifork	*cfork;
65 
66 	if (xfs_is_reflink_inode(ip))
67 		xfs_ifork_init_cow(ip);
68 
69 	cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
70 	if (!cfork)
71 		return;
72 	if (cfork->if_bytes > 0)
73 		xfs_inode_set_cowblocks_tag(ip);
74 	else
75 		xfs_inode_clear_cowblocks_tag(ip);
76 }
77 
78 /*
79  * Adjust the on-disk inode size upwards if needed so that we never add
80  * mappings into the file past EOF.  This is crucial so that log recovery won't
81  * get confused by the sudden appearance of post-eof mappings.
82  */
83 STATIC void
84 xfs_exchmaps_update_size(
85 	struct xfs_trans	*tp,
86 	struct xfs_inode	*ip,
87 	struct xfs_bmbt_irec	*imap,
88 	xfs_fsize_t		new_isize)
89 {
90 	struct xfs_mount	*mp = tp->t_mountp;
91 	xfs_fsize_t		len;
92 
93 	if (new_isize < 0)
94 		return;
95 
96 	len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
97 		  new_isize);
98 
99 	if (len <= ip->i_disk_size)
100 		return;
101 
102 	trace_xfs_exchmaps_update_inode_size(ip, len);
103 
104 	ip->i_disk_size = len;
105 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
106 }
107 
108 /* Advance the incore state tracking after exchanging a mapping. */
109 static inline void
110 xmi_advance(
111 	struct xfs_exchmaps_intent	*xmi,
112 	const struct xfs_bmbt_irec	*irec)
113 {
114 	xmi->xmi_startoff1 += irec->br_blockcount;
115 	xmi->xmi_startoff2 += irec->br_blockcount;
116 	xmi->xmi_blockcount -= irec->br_blockcount;
117 }
118 
119 /* Do we still have more mappings to exchange? */
120 static inline bool
121 xmi_has_more_exchange_work(const struct xfs_exchmaps_intent *xmi)
122 {
123 	return xmi->xmi_blockcount > 0;
124 }
125 
126 /* Do we have post-operation cleanups to perform? */
127 static inline bool
128 xmi_has_postop_work(const struct xfs_exchmaps_intent *xmi)
129 {
130 	return xmi->xmi_flags & (XFS_EXCHMAPS_CLEAR_INO1_REFLINK |
131 				 XFS_EXCHMAPS_CLEAR_INO2_REFLINK |
132 				 __XFS_EXCHMAPS_INO2_SHORTFORM);
133 }
134 
135 /* Check all mappings to make sure we can actually exchange them. */
136 int
137 xfs_exchmaps_check_forks(
138 	struct xfs_mount		*mp,
139 	const struct xfs_exchmaps_req	*req)
140 {
141 	struct xfs_ifork		*ifp1, *ifp2;
142 	int				whichfork = xfs_exchmaps_reqfork(req);
143 
144 	/* No fork? */
145 	ifp1 = xfs_ifork_ptr(req->ip1, whichfork);
146 	ifp2 = xfs_ifork_ptr(req->ip2, whichfork);
147 	if (!ifp1 || !ifp2)
148 		return -EINVAL;
149 
150 	/* We don't know how to exchange local format forks. */
151 	if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
152 	    ifp2->if_format == XFS_DINODE_FMT_LOCAL)
153 		return -EINVAL;
154 
155 	return 0;
156 }
157 
158 #ifdef CONFIG_XFS_QUOTA
159 /* Log the actual updates to the quota accounting. */
160 static inline void
161 xfs_exchmaps_update_quota(
162 	struct xfs_trans		*tp,
163 	struct xfs_exchmaps_intent	*xmi,
164 	struct xfs_bmbt_irec		*irec1,
165 	struct xfs_bmbt_irec		*irec2)
166 {
167 	int64_t				ip1_delta = 0, ip2_delta = 0;
168 	unsigned int			qflag;
169 
170 	qflag = XFS_IS_REALTIME_INODE(xmi->xmi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
171 						      XFS_TRANS_DQ_BCOUNT;
172 
173 	if (xfs_bmap_is_real_extent(irec1)) {
174 		ip1_delta -= irec1->br_blockcount;
175 		ip2_delta += irec1->br_blockcount;
176 	}
177 
178 	if (xfs_bmap_is_real_extent(irec2)) {
179 		ip1_delta += irec2->br_blockcount;
180 		ip2_delta -= irec2->br_blockcount;
181 	}
182 
183 	xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip1, qflag, ip1_delta);
184 	xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip2, qflag, ip2_delta);
185 }
186 #else
187 # define xfs_exchmaps_update_quota(tp, xmi, irec1, irec2)	((void)0)
188 #endif
189 
190 /* Decide if we want to skip this mapping from file1. */
191 static inline bool
192 xfs_exchmaps_can_skip_mapping(
193 	struct xfs_exchmaps_intent	*xmi,
194 	struct xfs_bmbt_irec		*irec)
195 {
196 	struct xfs_mount		*mp = xmi->xmi_ip1->i_mount;
197 
198 	/* Do not skip this mapping if the caller did not tell us to. */
199 	if (!(xmi->xmi_flags & XFS_EXCHMAPS_INO1_WRITTEN))
200 		return false;
201 
202 	/* Do not skip mapped, written mappings. */
203 	if (xfs_bmap_is_written_extent(irec))
204 		return false;
205 
206 	/*
207 	 * The mapping is unwritten or a hole.  It cannot be a delalloc
208 	 * reservation because we already excluded those.  It cannot be an
209 	 * unwritten extent with dirty page cache because we flushed the page
210 	 * cache.  For files where the allocation unit is 1FSB (files on the
211 	 * data dev, rt files if the extent size is 1FSB), we can safely
212 	 * skip this mapping.
213 	 */
214 	if (!xfs_inode_has_bigrtalloc(xmi->xmi_ip1))
215 		return true;
216 
217 	/*
218 	 * For a realtime file with a multi-fsb allocation unit, the decision
219 	 * is trickier because we can only swap full allocation units.
220 	 * Unwritten mappings can appear in the middle of an rtx if the rtx is
221 	 * partially written, but they can also appear for preallocations.
222 	 *
223 	 * If the mapping is a hole, skip it entirely.  Holes should align with
224 	 * rtx boundaries.
225 	 */
226 	if (!xfs_bmap_is_real_extent(irec))
227 		return true;
228 
229 	/*
230 	 * All mappings below this point are unwritten.
231 	 *
232 	 * - If the beginning is not aligned to an rtx, trim the end of the
233 	 *   mapping so that it does not cross an rtx boundary, and swap it.
234 	 *
235 	 * - If both ends are aligned to an rtx, skip the entire mapping.
236 	 */
237 	if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
238 		xfs_fileoff_t	new_end;
239 
240 		new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
241 		irec->br_blockcount = min(irec->br_blockcount,
242 					  new_end - irec->br_startoff);
243 		return false;
244 	}
245 	if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
246 		return true;
247 
248 	/*
249 	 * All mappings below this point are unwritten, start on an rtx
250 	 * boundary, and do not end on an rtx boundary.
251 	 *
252 	 * - If the mapping is longer than one rtx, trim the end of the mapping
253 	 *   down to an rtx boundary and skip it.
254 	 *
255 	 * - The mapping is shorter than one rtx.  Swap it.
256 	 */
257 	if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
258 		xfs_fileoff_t	new_end;
259 
260 		new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
261 				mp->m_sb.sb_rextsize);
262 		irec->br_blockcount = new_end - irec->br_startoff;
263 		return true;
264 	}
265 
266 	return false;
267 }
268 
269 /*
270  * Walk forward through the file ranges in @xmi until we find two different
271  * mappings to exchange.  If there is work to do, return the mappings;
272  * otherwise we've reached the end of the range and xmi_blockcount will be
273  * zero.
274  *
275  * If the walk skips over a pair of mappings to the same storage, save them as
276  * the left records in @adj (if provided) so that the simulation phase can
277  * avoid an extra lookup.
278   */
279 static int
280 xfs_exchmaps_find_mappings(
281 	struct xfs_exchmaps_intent	*xmi,
282 	struct xfs_bmbt_irec		*irec1,
283 	struct xfs_bmbt_irec		*irec2,
284 	struct xfs_exchmaps_adjacent	*adj)
285 {
286 	int				nimaps;
287 	int				bmap_flags;
288 	int				error;
289 
290 	bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_whichfork(xmi));
291 
292 	for (; xmi_has_more_exchange_work(xmi); xmi_advance(xmi, irec1)) {
293 		/* Read mapping from the first file */
294 		nimaps = 1;
295 		error = xfs_bmapi_read(xmi->xmi_ip1, xmi->xmi_startoff1,
296 				xmi->xmi_blockcount, irec1, &nimaps,
297 				bmap_flags);
298 		if (error)
299 			return error;
300 		if (nimaps != 1 ||
301 		    irec1->br_startblock == DELAYSTARTBLOCK ||
302 		    irec1->br_startoff != xmi->xmi_startoff1) {
303 			/*
304 			 * We should never get no mapping or a delalloc mapping
305 			 * or something that doesn't match what we asked for,
306 			 * since the caller flushed both inodes and we hold the
307 			 * ILOCKs for both inodes.
308 			 */
309 			ASSERT(0);
310 			return -EINVAL;
311 		}
312 
313 		if (xfs_exchmaps_can_skip_mapping(xmi, irec1)) {
314 			trace_xfs_exchmaps_mapping1_skip(xmi->xmi_ip1, irec1);
315 			continue;
316 		}
317 
318 		/* Read mapping from the second file */
319 		nimaps = 1;
320 		error = xfs_bmapi_read(xmi->xmi_ip2, xmi->xmi_startoff2,
321 				irec1->br_blockcount, irec2, &nimaps,
322 				bmap_flags);
323 		if (error)
324 			return error;
325 		if (nimaps != 1 ||
326 		    irec2->br_startblock == DELAYSTARTBLOCK ||
327 		    irec2->br_startoff != xmi->xmi_startoff2) {
328 			/*
329 			 * We should never get no mapping or a delalloc mapping
330 			 * or something that doesn't match what we asked for,
331 			 * since the caller flushed both inodes and we hold the
332 			 * ILOCKs for both inodes.
333 			 */
334 			ASSERT(0);
335 			return -EINVAL;
336 		}
337 
338 		/*
339 		 * We can only exchange as many blocks as the smaller of the
340 		 * two mapping maps.
341 		 */
342 		irec1->br_blockcount = min(irec1->br_blockcount,
343 					   irec2->br_blockcount);
344 
345 		trace_xfs_exchmaps_mapping1(xmi->xmi_ip1, irec1);
346 		trace_xfs_exchmaps_mapping2(xmi->xmi_ip2, irec2);
347 
348 		/* We found something to exchange, so return it. */
349 		if (irec1->br_startblock != irec2->br_startblock)
350 			return 0;
351 
352 		/*
353 		 * Two mappings pointing to the same physical block must not
354 		 * have different states; that's filesystem corruption.  Move
355 		 * on to the next mapping if they're both holes or both point
356 		 * to the same physical space extent.
357 		 */
358 		if (irec1->br_state != irec2->br_state) {
359 			xfs_bmap_mark_sick(xmi->xmi_ip1,
360 					xfs_exchmaps_whichfork(xmi));
361 			xfs_bmap_mark_sick(xmi->xmi_ip2,
362 					xfs_exchmaps_whichfork(xmi));
363 			return -EFSCORRUPTED;
364 		}
365 
366 		/*
367 		 * Save the mappings if we're estimating work and skipping
368 		 * these identical mappings.
369 		 */
370 		if (adj) {
371 			memcpy(&adj->left1, irec1, sizeof(*irec1));
372 			memcpy(&adj->left2, irec2, sizeof(*irec2));
373 		}
374 	}
375 
376 	return 0;
377 }
378 
379 /* Exchange these two mappings. */
380 static void
381 xfs_exchmaps_one_step(
382 	struct xfs_trans		*tp,
383 	struct xfs_exchmaps_intent	*xmi,
384 	struct xfs_bmbt_irec		*irec1,
385 	struct xfs_bmbt_irec		*irec2)
386 {
387 	int				whichfork = xfs_exchmaps_whichfork(xmi);
388 
389 	xfs_exchmaps_update_quota(tp, xmi, irec1, irec2);
390 
391 	/* Remove both mappings. */
392 	xfs_bmap_unmap_extent(tp, xmi->xmi_ip1, whichfork, irec1);
393 	xfs_bmap_unmap_extent(tp, xmi->xmi_ip2, whichfork, irec2);
394 
395 	/*
396 	 * Re-add both mappings.  We exchange the file offsets between the two
397 	 * maps and add the opposite map, which has the effect of filling the
398 	 * logical offsets we just unmapped, but with with the physical mapping
399 	 * information exchanged.
400 	 */
401 	swap(irec1->br_startoff, irec2->br_startoff);
402 	xfs_bmap_map_extent(tp, xmi->xmi_ip1, whichfork, irec2);
403 	xfs_bmap_map_extent(tp, xmi->xmi_ip2, whichfork, irec1);
404 
405 	/* Make sure we're not adding mappings past EOF. */
406 	if (whichfork == XFS_DATA_FORK) {
407 		xfs_exchmaps_update_size(tp, xmi->xmi_ip1, irec2,
408 				xmi->xmi_isize1);
409 		xfs_exchmaps_update_size(tp, xmi->xmi_ip2, irec1,
410 				xmi->xmi_isize2);
411 	}
412 
413 	/*
414 	 * Advance our cursor and exit.   The caller (either defer ops or log
415 	 * recovery) will log the XMD item, and if *blockcount is nonzero, it
416 	 * will log a new XMI item for the remainder and call us back.
417 	 */
418 	xmi_advance(xmi, irec1);
419 }
420 
421 /* Convert inode2's leaf attr fork back to shortform, if possible.. */
422 STATIC int
423 xfs_exchmaps_attr_to_sf(
424 	struct xfs_trans		*tp,
425 	struct xfs_exchmaps_intent	*xmi)
426 {
427 	struct xfs_da_args	args = {
428 		.dp		= xmi->xmi_ip2,
429 		.geo		= tp->t_mountp->m_attr_geo,
430 		.whichfork	= XFS_ATTR_FORK,
431 		.trans		= tp,
432 		.owner		= xmi->xmi_ip2->i_ino,
433 	};
434 	struct xfs_buf		*bp;
435 	int			forkoff;
436 	int			error;
437 
438 	if (!xfs_attr_is_leaf(xmi->xmi_ip2))
439 		return 0;
440 
441 	error = xfs_attr3_leaf_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, 0,
442 			&bp);
443 	if (error)
444 		return error;
445 
446 	forkoff = xfs_attr_shortform_allfit(bp, xmi->xmi_ip2);
447 	if (forkoff == 0)
448 		return 0;
449 
450 	return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
451 }
452 
453 /* Convert inode2's block dir fork back to shortform, if possible.. */
454 STATIC int
455 xfs_exchmaps_dir_to_sf(
456 	struct xfs_trans		*tp,
457 	struct xfs_exchmaps_intent	*xmi)
458 {
459 	struct xfs_da_args	args = {
460 		.dp		= xmi->xmi_ip2,
461 		.geo		= tp->t_mountp->m_dir_geo,
462 		.whichfork	= XFS_DATA_FORK,
463 		.trans		= tp,
464 		.owner		= xmi->xmi_ip2->i_ino,
465 	};
466 	struct xfs_dir2_sf_hdr	sfh;
467 	struct xfs_buf		*bp;
468 	bool			isblock;
469 	int			size;
470 	int			error;
471 
472 	error = xfs_dir2_isblock(&args, &isblock);
473 	if (error)
474 		return error;
475 
476 	if (!isblock)
477 		return 0;
478 
479 	error = xfs_dir3_block_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, &bp);
480 	if (error)
481 		return error;
482 
483 	size = xfs_dir2_block_sfsize(xmi->xmi_ip2, bp->b_addr, &sfh);
484 	if (size > xfs_inode_data_fork_size(xmi->xmi_ip2))
485 		return 0;
486 
487 	return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
488 }
489 
490 /* Convert inode2's remote symlink target back to shortform, if possible. */
491 STATIC int
492 xfs_exchmaps_link_to_sf(
493 	struct xfs_trans		*tp,
494 	struct xfs_exchmaps_intent	*xmi)
495 {
496 	struct xfs_inode		*ip = xmi->xmi_ip2;
497 	struct xfs_ifork		*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
498 	char				*buf;
499 	int				error;
500 
501 	if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
502 	    ip->i_disk_size > xfs_inode_data_fork_size(ip))
503 		return 0;
504 
505 	/* Read the current symlink target into a buffer. */
506 	buf = kmalloc(ip->i_disk_size + 1,
507 			GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
508 	if (!buf) {
509 		ASSERT(0);
510 		return -ENOMEM;
511 	}
512 
513 	error = xfs_symlink_remote_read(ip, buf);
514 	if (error)
515 		goto free;
516 
517 	/* Remove the blocks. */
518 	error = xfs_symlink_remote_truncate(tp, ip);
519 	if (error)
520 		goto free;
521 
522 	/* Convert fork to local format and log our changes. */
523 	xfs_idestroy_fork(ifp);
524 	ifp->if_bytes = 0;
525 	ifp->if_format = XFS_DINODE_FMT_LOCAL;
526 	xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
527 	xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
528 free:
529 	kfree(buf);
530 	return error;
531 }
532 
533 /* Clear the reflink flag after an exchange. */
534 static inline void
535 xfs_exchmaps_clear_reflink(
536 	struct xfs_trans	*tp,
537 	struct xfs_inode	*ip)
538 {
539 	trace_xfs_reflink_unset_inode_flag(ip);
540 
541 	ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
542 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
543 }
544 
545 /* Finish whatever work might come after an exchange operation. */
546 static int
547 xfs_exchmaps_do_postop_work(
548 	struct xfs_trans		*tp,
549 	struct xfs_exchmaps_intent	*xmi)
550 {
551 	if (xmi->xmi_flags & __XFS_EXCHMAPS_INO2_SHORTFORM) {
552 		int			error = 0;
553 
554 		if (xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)
555 			error = xfs_exchmaps_attr_to_sf(tp, xmi);
556 		else if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode))
557 			error = xfs_exchmaps_dir_to_sf(tp, xmi);
558 		else if (S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
559 			error = xfs_exchmaps_link_to_sf(tp, xmi);
560 		xmi->xmi_flags &= ~__XFS_EXCHMAPS_INO2_SHORTFORM;
561 		if (error)
562 			return error;
563 	}
564 
565 	if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO1_REFLINK) {
566 		xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip1);
567 		xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
568 	}
569 
570 	if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO2_REFLINK) {
571 		xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip2);
572 		xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
573 	}
574 
575 	return 0;
576 }
577 
578 /* Finish one step in a mapping exchange operation, possibly relogging. */
579 int
580 xfs_exchmaps_finish_one(
581 	struct xfs_trans		*tp,
582 	struct xfs_exchmaps_intent	*xmi)
583 {
584 	struct xfs_bmbt_irec		irec1, irec2;
585 	int				error;
586 
587 	if (xmi_has_more_exchange_work(xmi)) {
588 		/*
589 		 * If the operation state says that some range of the files
590 		 * have not yet been exchanged, look for mappings in that range
591 		 * to exchange.  If we find some mappings, exchange them.
592 		 */
593 		error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, NULL);
594 		if (error)
595 			return error;
596 
597 		if (xmi_has_more_exchange_work(xmi))
598 			xfs_exchmaps_one_step(tp, xmi, &irec1, &irec2);
599 
600 		/*
601 		 * If the caller asked us to exchange the file sizes after the
602 		 * exchange and either we just exchanged the last mappings in
603 		 * the range or we didn't find anything to exchange, update the
604 		 * ondisk file sizes.
605 		 */
606 		if ((xmi->xmi_flags & XFS_EXCHMAPS_SET_SIZES) &&
607 		    !xmi_has_more_exchange_work(xmi)) {
608 			xmi->xmi_ip1->i_disk_size = xmi->xmi_isize1;
609 			xmi->xmi_ip2->i_disk_size = xmi->xmi_isize2;
610 
611 			xfs_trans_log_inode(tp, xmi->xmi_ip1, XFS_ILOG_CORE);
612 			xfs_trans_log_inode(tp, xmi->xmi_ip2, XFS_ILOG_CORE);
613 		}
614 	} else if (xmi_has_postop_work(xmi)) {
615 		/*
616 		 * Now that we're finished with the exchange operation,
617 		 * complete the post-op cleanup work.
618 		 */
619 		error = xfs_exchmaps_do_postop_work(tp, xmi);
620 		if (error)
621 			return error;
622 	}
623 
624 	if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
625 		return -EIO;
626 
627 	/* If we still have work to do, ask for a new transaction. */
628 	if (xmi_has_more_exchange_work(xmi) || xmi_has_postop_work(xmi)) {
629 		trace_xfs_exchmaps_defer(tp->t_mountp, xmi);
630 		return -EAGAIN;
631 	}
632 
633 	/*
634 	 * If we reach here, we've finished all the exchange work and the post
635 	 * operation work.  The last thing we need to do before returning to
636 	 * the caller is to make sure that COW forks are set up correctly.
637 	 */
638 	if (!(xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)) {
639 		xfs_exchmaps_ensure_cowfork(xmi->xmi_ip1);
640 		xfs_exchmaps_ensure_cowfork(xmi->xmi_ip2);
641 	}
642 
643 	return 0;
644 }
645 
646 /*
647  * Compute the amount of bmbt blocks we should reserve for each file.  In the
648  * worst case, each exchange will fill a hole with a new mapping, which could
649  * result in a btree split every time we add a new leaf block.
650  */
651 static inline uint64_t
652 xfs_exchmaps_bmbt_blocks(
653 	struct xfs_mount		*mp,
654 	const struct xfs_exchmaps_req	*req)
655 {
656 	return howmany_64(req->nr_exchanges,
657 					XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
658 			XFS_EXTENTADD_SPACE_RES(mp, xfs_exchmaps_reqfork(req));
659 }
660 
661 /* Compute the space we should reserve for the rmap btree expansions. */
662 static inline uint64_t
663 xfs_exchmaps_rmapbt_blocks(
664 	struct xfs_mount		*mp,
665 	const struct xfs_exchmaps_req	*req)
666 {
667 	if (!xfs_has_rmapbt(mp))
668 		return 0;
669 	if (XFS_IS_REALTIME_INODE(req->ip1))
670 		return 0;
671 
672 	return howmany_64(req->nr_exchanges,
673 					XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
674 			XFS_RMAPADD_SPACE_RES(mp);
675 }
676 
677 /* Estimate the bmbt and rmapbt overhead required to exchange mappings. */
678 int
679 xfs_exchmaps_estimate_overhead(
680 	struct xfs_exchmaps_req		*req)
681 {
682 	struct xfs_mount		*mp = req->ip1->i_mount;
683 	xfs_filblks_t			bmbt_blocks;
684 	xfs_filblks_t			rmapbt_blocks;
685 	xfs_filblks_t			resblks = req->resblks;
686 
687 	/*
688 	 * Compute the number of bmbt and rmapbt blocks we might need to handle
689 	 * the estimated number of exchanges.
690 	 */
691 	bmbt_blocks = xfs_exchmaps_bmbt_blocks(mp, req);
692 	rmapbt_blocks = xfs_exchmaps_rmapbt_blocks(mp, req);
693 
694 	trace_xfs_exchmaps_overhead(mp, bmbt_blocks, rmapbt_blocks);
695 
696 	/* Make sure the change in file block count doesn't overflow. */
697 	if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
698 		return -EFBIG;
699 	if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
700 		return -EFBIG;
701 
702 	/*
703 	 * Add together the number of blocks we need to handle btree growth,
704 	 * then add it to the number of blocks we need to reserve to this
705 	 * transaction.
706 	 */
707 	if (check_add_overflow(resblks, bmbt_blocks, &resblks))
708 		return -ENOSPC;
709 	if (check_add_overflow(resblks, bmbt_blocks, &resblks))
710 		return -ENOSPC;
711 	if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
712 		return -ENOSPC;
713 	if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
714 		return -ENOSPC;
715 
716 	/* Can't actually reserve more than UINT_MAX blocks. */
717 	if (req->resblks > UINT_MAX)
718 		return -ENOSPC;
719 
720 	req->resblks = resblks;
721 	trace_xfs_exchmaps_final_estimate(req);
722 	return 0;
723 }
724 
725 /* Decide if we can merge two real mappings. */
726 static inline bool
727 xmi_can_merge(
728 	const struct xfs_bmbt_irec	*b1,
729 	const struct xfs_bmbt_irec	*b2)
730 {
731 	/* Don't merge holes. */
732 	if (b1->br_startblock == HOLESTARTBLOCK ||
733 	    b2->br_startblock == HOLESTARTBLOCK)
734 		return false;
735 
736 	/* We don't merge holes. */
737 	if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
738 		return false;
739 
740 	if (b1->br_startoff   + b1->br_blockcount == b2->br_startoff &&
741 	    b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
742 	    b1->br_state			  == b2->br_state &&
743 	    b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
744 		return true;
745 
746 	return false;
747 }
748 
749 /*
750  * Decide if we can merge three mappings.  Caller must ensure all three
751  * mappings must not be holes or delalloc reservations.
752  */
753 static inline bool
754 xmi_can_merge_all(
755 	const struct xfs_bmbt_irec	*l,
756 	const struct xfs_bmbt_irec	*m,
757 	const struct xfs_bmbt_irec	*r)
758 {
759 	xfs_filblks_t			new_len;
760 
761 	new_len = l->br_blockcount + m->br_blockcount + r->br_blockcount;
762 	return new_len <= XFS_MAX_BMBT_EXTLEN;
763 }
764 
765 #define CLEFT_CONTIG	0x01
766 #define CRIGHT_CONTIG	0x02
767 #define CHOLE		0x04
768 #define CBOTH_CONTIG	(CLEFT_CONTIG | CRIGHT_CONTIG)
769 
770 #define NLEFT_CONTIG	0x10
771 #define NRIGHT_CONTIG	0x20
772 #define NHOLE		0x40
773 #define NBOTH_CONTIG	(NLEFT_CONTIG | NRIGHT_CONTIG)
774 
775 /* Estimate the effect of a single exchange on mapping count. */
776 static inline int
777 xmi_delta_nextents_step(
778 	struct xfs_mount		*mp,
779 	const struct xfs_bmbt_irec	*left,
780 	const struct xfs_bmbt_irec	*curr,
781 	const struct xfs_bmbt_irec	*new,
782 	const struct xfs_bmbt_irec	*right)
783 {
784 	bool				lhole, rhole, chole, nhole;
785 	unsigned int			state = 0;
786 	int				ret = 0;
787 
788 	lhole = left->br_startblock == HOLESTARTBLOCK;
789 	rhole = right->br_startblock == HOLESTARTBLOCK;
790 	chole = curr->br_startblock == HOLESTARTBLOCK;
791 	nhole = new->br_startblock == HOLESTARTBLOCK;
792 
793 	if (chole)
794 		state |= CHOLE;
795 	if (!lhole && !chole && xmi_can_merge(left, curr))
796 		state |= CLEFT_CONTIG;
797 	if (!rhole && !chole && xmi_can_merge(curr, right))
798 		state |= CRIGHT_CONTIG;
799 	if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
800 	    !xmi_can_merge_all(left, curr, right))
801 		state &= ~CRIGHT_CONTIG;
802 
803 	if (nhole)
804 		state |= NHOLE;
805 	if (!lhole && !nhole && xmi_can_merge(left, new))
806 		state |= NLEFT_CONTIG;
807 	if (!rhole && !nhole && xmi_can_merge(new, right))
808 		state |= NRIGHT_CONTIG;
809 	if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
810 	    !xmi_can_merge_all(left, new, right))
811 		state &= ~NRIGHT_CONTIG;
812 
813 	switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
814 	case CLEFT_CONTIG | CRIGHT_CONTIG:
815 		/*
816 		 * left/curr/right are the same mapping, so deleting curr
817 		 * causes 2 new mappings to be created.
818 		 */
819 		ret += 2;
820 		break;
821 	case 0:
822 		/*
823 		 * curr is not contiguous with any mapping, so we remove curr
824 		 * completely
825 		 */
826 		ret--;
827 		break;
828 	case CHOLE:
829 		/* hole, do nothing */
830 		break;
831 	case CLEFT_CONTIG:
832 	case CRIGHT_CONTIG:
833 		/* trim either left or right, no change */
834 		break;
835 	}
836 
837 	switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
838 	case NLEFT_CONTIG | NRIGHT_CONTIG:
839 		/*
840 		 * left/curr/right will become the same mapping, so adding
841 		 * curr causes the deletion of right.
842 		 */
843 		ret--;
844 		break;
845 	case 0:
846 		/* new is not contiguous with any mapping */
847 		ret++;
848 		break;
849 	case NHOLE:
850 		/* hole, do nothing. */
851 		break;
852 	case NLEFT_CONTIG:
853 	case NRIGHT_CONTIG:
854 		/* new is absorbed into left or right, no change */
855 		break;
856 	}
857 
858 	trace_xfs_exchmaps_delta_nextents_step(mp, left, curr, new, right, ret,
859 			state);
860 	return ret;
861 }
862 
863 /* Make sure we don't overflow the extent (mapping) counters. */
864 static inline int
865 xmi_ensure_delta_nextents(
866 	struct xfs_exchmaps_req	*req,
867 	struct xfs_inode	*ip,
868 	int64_t			delta)
869 {
870 	struct xfs_mount	*mp = ip->i_mount;
871 	int			whichfork = xfs_exchmaps_reqfork(req);
872 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
873 	uint64_t		new_nextents;
874 	xfs_extnum_t		max_nextents;
875 
876 	if (delta < 0)
877 		return 0;
878 
879 	/*
880 	 * It's always an error if the delta causes integer overflow.  delta
881 	 * needs an explicit cast here to avoid warnings about implicit casts
882 	 * coded into the overflow check.
883 	 */
884 	if (check_add_overflow(ifp->if_nextents, (uint64_t)delta,
885 				&new_nextents))
886 		return -EFBIG;
887 
888 	if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
889 	    new_nextents > 10)
890 		return -EFBIG;
891 
892 	/*
893 	 * We always promote both inodes to have large extent counts if the
894 	 * superblock feature is enabled, so we only need to check against the
895 	 * theoretical maximum.
896 	 */
897 	max_nextents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
898 					     whichfork);
899 	if (new_nextents > max_nextents)
900 		return -EFBIG;
901 
902 	return 0;
903 }
904 
905 /* Find the next mapping after irec. */
906 static inline int
907 xmi_next(
908 	struct xfs_inode		*ip,
909 	int				bmap_flags,
910 	const struct xfs_bmbt_irec	*irec,
911 	struct xfs_bmbt_irec		*nrec)
912 {
913 	xfs_fileoff_t			off;
914 	xfs_filblks_t			blockcount;
915 	int				nimaps = 1;
916 	int				error;
917 
918 	off = irec->br_startoff + irec->br_blockcount;
919 	blockcount = XFS_MAX_FILEOFF - off;
920 	error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
921 	if (error)
922 		return error;
923 	if (nrec->br_startblock == DELAYSTARTBLOCK ||
924 	    nrec->br_startoff != off) {
925 		/*
926 		 * If we don't get the mapping we want, return a zero-length
927 		 * mapping, which our estimator function will pretend is a hole.
928 		 * We shouldn't get delalloc reservations.
929 		 */
930 		nrec->br_startblock = HOLESTARTBLOCK;
931 	}
932 
933 	return 0;
934 }
935 
936 int __init
937 xfs_exchmaps_intent_init_cache(void)
938 {
939 	xfs_exchmaps_intent_cache = kmem_cache_create("xfs_exchmaps_intent",
940 			sizeof(struct xfs_exchmaps_intent),
941 			0, 0, NULL);
942 
943 	return xfs_exchmaps_intent_cache != NULL ? 0 : -ENOMEM;
944 }
945 
946 void
947 xfs_exchmaps_intent_destroy_cache(void)
948 {
949 	kmem_cache_destroy(xfs_exchmaps_intent_cache);
950 	xfs_exchmaps_intent_cache = NULL;
951 }
952 
953 /*
954  * Decide if we will exchange the reflink flags between the two files after the
955  * exchange.  The only time we want to do this is if we're exchanging all
956  * mappings under EOF and the inode reflink flags have different states.
957  */
958 static inline bool
959 xmi_can_exchange_reflink_flags(
960 	const struct xfs_exchmaps_req	*req,
961 	unsigned int			reflink_state)
962 {
963 	struct xfs_mount		*mp = req->ip1->i_mount;
964 
965 	if (hweight32(reflink_state) != 1)
966 		return false;
967 	if (req->startoff1 != 0 || req->startoff2 != 0)
968 		return false;
969 	if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
970 		return false;
971 	if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
972 		return false;
973 	return true;
974 }
975 
976 
977 /* Allocate and initialize a new incore intent item from a request. */
978 struct xfs_exchmaps_intent *
979 xfs_exchmaps_init_intent(
980 	const struct xfs_exchmaps_req	*req)
981 {
982 	struct xfs_exchmaps_intent	*xmi;
983 	unsigned int			rs = 0;
984 
985 	xmi = kmem_cache_zalloc(xfs_exchmaps_intent_cache,
986 			GFP_NOFS | __GFP_NOFAIL);
987 	INIT_LIST_HEAD(&xmi->xmi_list);
988 	xmi->xmi_ip1 = req->ip1;
989 	xmi->xmi_ip2 = req->ip2;
990 	xmi->xmi_startoff1 = req->startoff1;
991 	xmi->xmi_startoff2 = req->startoff2;
992 	xmi->xmi_blockcount = req->blockcount;
993 	xmi->xmi_isize1 = xmi->xmi_isize2 = -1;
994 	xmi->xmi_flags = req->flags & XFS_EXCHMAPS_PARAMS;
995 
996 	if (xfs_exchmaps_whichfork(xmi) == XFS_ATTR_FORK) {
997 		xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
998 		return xmi;
999 	}
1000 
1001 	if (req->flags & XFS_EXCHMAPS_SET_SIZES) {
1002 		xmi->xmi_flags |= XFS_EXCHMAPS_SET_SIZES;
1003 		xmi->xmi_isize1 = req->ip2->i_disk_size;
1004 		xmi->xmi_isize2 = req->ip1->i_disk_size;
1005 	}
1006 
1007 	/* Record the state of each inode's reflink flag before the op. */
1008 	if (xfs_is_reflink_inode(req->ip1))
1009 		rs |= 1;
1010 	if (xfs_is_reflink_inode(req->ip2))
1011 		rs |= 2;
1012 
1013 	/*
1014 	 * Figure out if we're clearing the reflink flags (which effectively
1015 	 * exchanges them) after the operation.
1016 	 */
1017 	if (xmi_can_exchange_reflink_flags(req, rs)) {
1018 		if (rs & 1)
1019 			xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
1020 		if (rs & 2)
1021 			xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
1022 	}
1023 
1024 	if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode) ||
1025 	    S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
1026 		xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
1027 
1028 	return xmi;
1029 }
1030 
1031 /*
1032  * Estimate the number of exchange operations and the number of file blocks
1033  * in each file that will be affected by the exchange operation.
1034  */
1035 int
1036 xfs_exchmaps_estimate(
1037 	struct xfs_exchmaps_req		*req)
1038 {
1039 	struct xfs_exchmaps_intent	*xmi;
1040 	struct xfs_bmbt_irec		irec1, irec2;
1041 	struct xfs_exchmaps_adjacent	adj = ADJACENT_INIT;
1042 	xfs_filblks_t			ip1_blocks = 0, ip2_blocks = 0;
1043 	int64_t				d_nexts1, d_nexts2;
1044 	int				bmap_flags;
1045 	int				error;
1046 
1047 	ASSERT(!(req->flags & ~XFS_EXCHMAPS_PARAMS));
1048 
1049 	bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_reqfork(req));
1050 	xmi = xfs_exchmaps_init_intent(req);
1051 
1052 	/*
1053 	 * To guard against the possibility of overflowing the extent counters,
1054 	 * we have to estimate an upper bound on the potential increase in that
1055 	 * counter.  We can split the mapping at each end of the range, and for
1056 	 * each step of the exchange we can split the mapping that we're
1057 	 * working on if the mappings do not align.
1058 	 */
1059 	d_nexts1 = d_nexts2 = 3;
1060 
1061 	while (xmi_has_more_exchange_work(xmi)) {
1062 		/*
1063 		 * Walk through the file ranges until we find something to
1064 		 * exchange.  Because we're simulating the exchange, pass in
1065 		 * adj to capture skipped mappings for correct estimation of
1066 		 * bmbt record merges.
1067 		 */
1068 		error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, &adj);
1069 		if (error)
1070 			goto out_free;
1071 		if (!xmi_has_more_exchange_work(xmi))
1072 			break;
1073 
1074 		/* Update accounting. */
1075 		if (xfs_bmap_is_real_extent(&irec1))
1076 			ip1_blocks += irec1.br_blockcount;
1077 		if (xfs_bmap_is_real_extent(&irec2))
1078 			ip2_blocks += irec2.br_blockcount;
1079 		req->nr_exchanges++;
1080 
1081 		/* Read the next mappings from both files. */
1082 		error = xmi_next(req->ip1, bmap_flags, &irec1, &adj.right1);
1083 		if (error)
1084 			goto out_free;
1085 
1086 		error = xmi_next(req->ip2, bmap_flags, &irec2, &adj.right2);
1087 		if (error)
1088 			goto out_free;
1089 
1090 		/* Update extent count deltas. */
1091 		d_nexts1 += xmi_delta_nextents_step(req->ip1->i_mount,
1092 				&adj.left1, &irec1, &irec2, &adj.right1);
1093 
1094 		d_nexts2 += xmi_delta_nextents_step(req->ip1->i_mount,
1095 				&adj.left2, &irec2, &irec1, &adj.right2);
1096 
1097 		/* Now pretend we exchanged the mappings. */
1098 		if (xmi_can_merge(&adj.left2, &irec1))
1099 			adj.left2.br_blockcount += irec1.br_blockcount;
1100 		else
1101 			memcpy(&adj.left2, &irec1, sizeof(irec1));
1102 
1103 		if (xmi_can_merge(&adj.left1, &irec2))
1104 			adj.left1.br_blockcount += irec2.br_blockcount;
1105 		else
1106 			memcpy(&adj.left1, &irec2, sizeof(irec2));
1107 
1108 		xmi_advance(xmi, &irec1);
1109 	}
1110 
1111 	/* Account for the blocks that are being exchanged. */
1112 	if (XFS_IS_REALTIME_INODE(req->ip1) &&
1113 	    xfs_exchmaps_reqfork(req) == XFS_DATA_FORK) {
1114 		req->ip1_rtbcount = ip1_blocks;
1115 		req->ip2_rtbcount = ip2_blocks;
1116 	} else {
1117 		req->ip1_bcount = ip1_blocks;
1118 		req->ip2_bcount = ip2_blocks;
1119 	}
1120 
1121 	/*
1122 	 * Make sure that both forks have enough slack left in their extent
1123 	 * counters that the exchange operation will not overflow.
1124 	 */
1125 	trace_xfs_exchmaps_delta_nextents(req, d_nexts1, d_nexts2);
1126 	if (req->ip1 == req->ip2) {
1127 		error = xmi_ensure_delta_nextents(req, req->ip1,
1128 				d_nexts1 + d_nexts2);
1129 	} else {
1130 		error = xmi_ensure_delta_nextents(req, req->ip1, d_nexts1);
1131 		if (error)
1132 			goto out_free;
1133 		error = xmi_ensure_delta_nextents(req, req->ip2, d_nexts2);
1134 	}
1135 	if (error)
1136 		goto out_free;
1137 
1138 	trace_xfs_exchmaps_initial_estimate(req);
1139 	error = xfs_exchmaps_estimate_overhead(req);
1140 out_free:
1141 	kmem_cache_free(xfs_exchmaps_intent_cache, xmi);
1142 	return error;
1143 }
1144 
1145 /* Set the reflink flag before an operation. */
1146 static inline void
1147 xfs_exchmaps_set_reflink(
1148 	struct xfs_trans	*tp,
1149 	struct xfs_inode	*ip)
1150 {
1151 	trace_xfs_reflink_set_inode_flag(ip);
1152 
1153 	ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
1154 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1155 }
1156 
1157 /*
1158  * If either file has shared blocks and we're exchanging data forks, we must
1159  * flag the other file as having shared blocks so that we get the shared-block
1160  * rmap functions if we need to fix up the rmaps.
1161  */
1162 void
1163 xfs_exchmaps_ensure_reflink(
1164 	struct xfs_trans			*tp,
1165 	const struct xfs_exchmaps_intent	*xmi)
1166 {
1167 	unsigned int				rs = 0;
1168 
1169 	if (xfs_is_reflink_inode(xmi->xmi_ip1))
1170 		rs |= 1;
1171 	if (xfs_is_reflink_inode(xmi->xmi_ip2))
1172 		rs |= 2;
1173 
1174 	if ((rs & 1) && !xfs_is_reflink_inode(xmi->xmi_ip2))
1175 		xfs_exchmaps_set_reflink(tp, xmi->xmi_ip2);
1176 
1177 	if ((rs & 2) && !xfs_is_reflink_inode(xmi->xmi_ip1))
1178 		xfs_exchmaps_set_reflink(tp, xmi->xmi_ip1);
1179 }
1180 
1181 /* Set the large extent count flag before an operation if needed. */
1182 static inline void
1183 xfs_exchmaps_ensure_large_extent_counts(
1184 	struct xfs_trans	*tp,
1185 	struct xfs_inode	*ip)
1186 {
1187 	if (xfs_inode_has_large_extent_counts(ip))
1188 		return;
1189 
1190 	ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1191 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1192 }
1193 
1194 /* Widen the extent counter fields of both inodes if necessary. */
1195 void
1196 xfs_exchmaps_upgrade_extent_counts(
1197 	struct xfs_trans			*tp,
1198 	const struct xfs_exchmaps_intent	*xmi)
1199 {
1200 	if (!xfs_has_large_extent_counts(tp->t_mountp))
1201 		return;
1202 
1203 	xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip1);
1204 	xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip2);
1205 }
1206 
1207 /*
1208  * Schedule an exchange a range of mappings from one inode to another.
1209  *
1210  * The use of file mapping exchange log intent items ensures the operation can
1211  * be resumed even if the system goes down.  The caller must commit the
1212  * transaction to start the work.
1213  *
1214  * The caller must ensure the inodes must be joined to the transaction and
1215  * ILOCKd; they will still be joined to the transaction at exit.
1216  */
1217 void
1218 xfs_exchange_mappings(
1219 	struct xfs_trans		*tp,
1220 	const struct xfs_exchmaps_req	*req)
1221 {
1222 	struct xfs_exchmaps_intent	*xmi;
1223 
1224 	BUILD_BUG_ON(XFS_EXCHMAPS_INTERNAL_FLAGS & XFS_EXCHMAPS_LOGGED_FLAGS);
1225 
1226 	xfs_assert_ilocked(req->ip1, XFS_ILOCK_EXCL);
1227 	xfs_assert_ilocked(req->ip2, XFS_ILOCK_EXCL);
1228 	ASSERT(!(req->flags & ~XFS_EXCHMAPS_LOGGED_FLAGS));
1229 	if (req->flags & XFS_EXCHMAPS_SET_SIZES)
1230 		ASSERT(!(req->flags & XFS_EXCHMAPS_ATTR_FORK));
1231 	ASSERT(xfs_has_exchange_range(tp->t_mountp));
1232 
1233 	if (req->blockcount == 0)
1234 		return;
1235 
1236 	xmi = xfs_exchmaps_init_intent(req);
1237 	xfs_exchmaps_defer_add(tp, xmi);
1238 	xfs_exchmaps_ensure_reflink(tp, xmi);
1239 	xfs_exchmaps_upgrade_extent_counts(tp, xmi);
1240 }
1241