xref: /linux/fs/xfs/libxfs/xfs_exchmaps.c (revision b477ff98d903618a1ab8247861f2ea6e70c0f0f8)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_defer.h"
14 #include "xfs_inode.h"
15 #include "xfs_trans.h"
16 #include "xfs_bmap.h"
17 #include "xfs_icache.h"
18 #include "xfs_quota.h"
19 #include "xfs_exchmaps.h"
20 #include "xfs_trace.h"
21 #include "xfs_bmap_btree.h"
22 #include "xfs_trans_space.h"
23 #include "xfs_error.h"
24 #include "xfs_errortag.h"
25 #include "xfs_health.h"
26 #include "xfs_exchmaps_item.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_attr_leaf.h"
30 #include "xfs_attr.h"
31 #include "xfs_dir2_priv.h"
32 #include "xfs_dir2.h"
33 #include "xfs_symlink_remote.h"
34 
35 struct kmem_cache	*xfs_exchmaps_intent_cache;
36 
37 /* bmbt mappings adjacent to a pair of records. */
38 struct xfs_exchmaps_adjacent {
39 	struct xfs_bmbt_irec		left1;
40 	struct xfs_bmbt_irec		right1;
41 	struct xfs_bmbt_irec		left2;
42 	struct xfs_bmbt_irec		right2;
43 };
44 
45 #define ADJACENT_INIT { \
46 	.left1  = { .br_startblock = HOLESTARTBLOCK }, \
47 	.right1 = { .br_startblock = HOLESTARTBLOCK }, \
48 	.left2  = { .br_startblock = HOLESTARTBLOCK }, \
49 	.right2 = { .br_startblock = HOLESTARTBLOCK }, \
50 }
51 
52 /* Information to reset reflink flag / CoW fork state after an exchange. */
53 
54 /*
55  * If the reflink flag is set on either inode, make sure it has an incore CoW
56  * fork, since all reflink inodes must have them.  If there's a CoW fork and it
57  * has mappings in it, make sure the inodes are tagged appropriately so that
58  * speculative preallocations can be GC'd if we run low of space.
59  */
60 static inline void
xfs_exchmaps_ensure_cowfork(struct xfs_inode * ip)61 xfs_exchmaps_ensure_cowfork(
62 	struct xfs_inode	*ip)
63 {
64 	struct xfs_ifork	*cfork;
65 
66 	if (xfs_is_reflink_inode(ip))
67 		xfs_ifork_init_cow(ip);
68 
69 	cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
70 	if (!cfork)
71 		return;
72 	if (cfork->if_bytes > 0)
73 		xfs_inode_set_cowblocks_tag(ip);
74 	else
75 		xfs_inode_clear_cowblocks_tag(ip);
76 }
77 
78 /*
79  * Adjust the on-disk inode size upwards if needed so that we never add
80  * mappings into the file past EOF.  This is crucial so that log recovery won't
81  * get confused by the sudden appearance of post-eof mappings.
82  */
83 STATIC void
xfs_exchmaps_update_size(struct xfs_trans * tp,struct xfs_inode * ip,struct xfs_bmbt_irec * imap,xfs_fsize_t new_isize)84 xfs_exchmaps_update_size(
85 	struct xfs_trans	*tp,
86 	struct xfs_inode	*ip,
87 	struct xfs_bmbt_irec	*imap,
88 	xfs_fsize_t		new_isize)
89 {
90 	struct xfs_mount	*mp = tp->t_mountp;
91 	xfs_fsize_t		len;
92 
93 	if (new_isize < 0)
94 		return;
95 
96 	len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
97 		  new_isize);
98 
99 	if (len <= ip->i_disk_size)
100 		return;
101 
102 	trace_xfs_exchmaps_update_inode_size(ip, len);
103 
104 	ip->i_disk_size = len;
105 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
106 }
107 
108 /* Advance the incore state tracking after exchanging a mapping. */
109 static inline void
xmi_advance(struct xfs_exchmaps_intent * xmi,const struct xfs_bmbt_irec * irec)110 xmi_advance(
111 	struct xfs_exchmaps_intent	*xmi,
112 	const struct xfs_bmbt_irec	*irec)
113 {
114 	xmi->xmi_startoff1 += irec->br_blockcount;
115 	xmi->xmi_startoff2 += irec->br_blockcount;
116 	xmi->xmi_blockcount -= irec->br_blockcount;
117 }
118 
119 /* Do we still have more mappings to exchange? */
120 static inline bool
xmi_has_more_exchange_work(const struct xfs_exchmaps_intent * xmi)121 xmi_has_more_exchange_work(const struct xfs_exchmaps_intent *xmi)
122 {
123 	return xmi->xmi_blockcount > 0;
124 }
125 
126 /* Do we have post-operation cleanups to perform? */
127 static inline bool
xmi_has_postop_work(const struct xfs_exchmaps_intent * xmi)128 xmi_has_postop_work(const struct xfs_exchmaps_intent *xmi)
129 {
130 	return xmi->xmi_flags & (XFS_EXCHMAPS_CLEAR_INO1_REFLINK |
131 				 XFS_EXCHMAPS_CLEAR_INO2_REFLINK |
132 				 __XFS_EXCHMAPS_INO2_SHORTFORM);
133 }
134 
135 /* Check all mappings to make sure we can actually exchange them. */
136 int
xfs_exchmaps_check_forks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)137 xfs_exchmaps_check_forks(
138 	struct xfs_mount		*mp,
139 	const struct xfs_exchmaps_req	*req)
140 {
141 	struct xfs_ifork		*ifp1, *ifp2;
142 	int				whichfork = xfs_exchmaps_reqfork(req);
143 
144 	/* No fork? */
145 	ifp1 = xfs_ifork_ptr(req->ip1, whichfork);
146 	ifp2 = xfs_ifork_ptr(req->ip2, whichfork);
147 	if (!ifp1 || !ifp2)
148 		return -EINVAL;
149 
150 	/* We don't know how to exchange local format forks. */
151 	if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
152 	    ifp2->if_format == XFS_DINODE_FMT_LOCAL)
153 		return -EINVAL;
154 
155 	return 0;
156 }
157 
158 #ifdef CONFIG_XFS_QUOTA
159 /* Log the actual updates to the quota accounting. */
160 static inline void
xfs_exchmaps_update_quota(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2)161 xfs_exchmaps_update_quota(
162 	struct xfs_trans		*tp,
163 	struct xfs_exchmaps_intent	*xmi,
164 	struct xfs_bmbt_irec		*irec1,
165 	struct xfs_bmbt_irec		*irec2)
166 {
167 	int64_t				ip1_delta = 0, ip2_delta = 0;
168 	unsigned int			qflag;
169 
170 	qflag = XFS_IS_REALTIME_INODE(xmi->xmi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
171 						      XFS_TRANS_DQ_BCOUNT;
172 
173 	if (xfs_bmap_is_real_extent(irec1)) {
174 		ip1_delta -= irec1->br_blockcount;
175 		ip2_delta += irec1->br_blockcount;
176 	}
177 
178 	if (xfs_bmap_is_real_extent(irec2)) {
179 		ip1_delta += irec2->br_blockcount;
180 		ip2_delta -= irec2->br_blockcount;
181 	}
182 
183 	xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip1, qflag, ip1_delta);
184 	xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip2, qflag, ip2_delta);
185 }
186 #else
187 # define xfs_exchmaps_update_quota(tp, xmi, irec1, irec2)	((void)0)
188 #endif
189 
190 /* Decide if we want to skip this mapping from file1. */
191 static inline bool
xfs_exchmaps_can_skip_mapping(struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec)192 xfs_exchmaps_can_skip_mapping(
193 	struct xfs_exchmaps_intent	*xmi,
194 	struct xfs_bmbt_irec		*irec)
195 {
196 	struct xfs_mount		*mp = xmi->xmi_ip1->i_mount;
197 
198 	/* Do not skip this mapping if the caller did not tell us to. */
199 	if (!(xmi->xmi_flags & XFS_EXCHMAPS_INO1_WRITTEN))
200 		return false;
201 
202 	/* Do not skip mapped, written mappings. */
203 	if (xfs_bmap_is_written_extent(irec))
204 		return false;
205 
206 	/*
207 	 * The mapping is unwritten or a hole.  It cannot be a delalloc
208 	 * reservation because we already excluded those.  It cannot be an
209 	 * unwritten extent with dirty page cache because we flushed the page
210 	 * cache.  For files where the allocation unit is 1FSB (files on the
211 	 * data dev, rt files if the extent size is 1FSB), we can safely
212 	 * skip this mapping.
213 	 */
214 	if (!xfs_inode_has_bigrtalloc(xmi->xmi_ip1))
215 		return true;
216 
217 	/*
218 	 * For a realtime file with a multi-fsb allocation unit, the decision
219 	 * is trickier because we can only swap full allocation units.
220 	 * Unwritten mappings can appear in the middle of an rtx if the rtx is
221 	 * partially written, but they can also appear for preallocations.
222 	 *
223 	 * If the mapping is a hole, skip it entirely.  Holes should align with
224 	 * rtx boundaries.
225 	 */
226 	if (!xfs_bmap_is_real_extent(irec))
227 		return true;
228 
229 	/*
230 	 * All mappings below this point are unwritten.
231 	 *
232 	 * - If the beginning is not aligned to an rtx, trim the end of the
233 	 *   mapping so that it does not cross an rtx boundary, and swap it.
234 	 *
235 	 * - If both ends are aligned to an rtx, skip the entire mapping.
236 	 */
237 	if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
238 		xfs_fileoff_t	new_end;
239 
240 		new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
241 		irec->br_blockcount = min(irec->br_blockcount,
242 					  new_end - irec->br_startoff);
243 		return false;
244 	}
245 	if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
246 		return true;
247 
248 	/*
249 	 * All mappings below this point are unwritten, start on an rtx
250 	 * boundary, and do not end on an rtx boundary.
251 	 *
252 	 * - If the mapping is longer than one rtx, trim the end of the mapping
253 	 *   down to an rtx boundary and skip it.
254 	 *
255 	 * - The mapping is shorter than one rtx.  Swap it.
256 	 */
257 	if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
258 		xfs_fileoff_t	new_end;
259 
260 		new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
261 				mp->m_sb.sb_rextsize);
262 		irec->br_blockcount = new_end - irec->br_startoff;
263 		return true;
264 	}
265 
266 	return false;
267 }
268 
269 /*
270  * Walk forward through the file ranges in @xmi until we find two different
271  * mappings to exchange.  If there is work to do, return the mappings;
272  * otherwise we've reached the end of the range and xmi_blockcount will be
273  * zero.
274  *
275  * If the walk skips over a pair of mappings to the same storage, save them as
276  * the left records in @adj (if provided) so that the simulation phase can
277  * avoid an extra lookup.
278   */
279 static int
xfs_exchmaps_find_mappings(struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2,struct xfs_exchmaps_adjacent * adj)280 xfs_exchmaps_find_mappings(
281 	struct xfs_exchmaps_intent	*xmi,
282 	struct xfs_bmbt_irec		*irec1,
283 	struct xfs_bmbt_irec		*irec2,
284 	struct xfs_exchmaps_adjacent	*adj)
285 {
286 	int				nimaps;
287 	int				bmap_flags;
288 	int				error;
289 
290 	bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_whichfork(xmi));
291 
292 	for (; xmi_has_more_exchange_work(xmi); xmi_advance(xmi, irec1)) {
293 		/* Read mapping from the first file */
294 		nimaps = 1;
295 		error = xfs_bmapi_read(xmi->xmi_ip1, xmi->xmi_startoff1,
296 				xmi->xmi_blockcount, irec1, &nimaps,
297 				bmap_flags);
298 		if (error)
299 			return error;
300 		if (nimaps != 1 ||
301 		    irec1->br_startblock == DELAYSTARTBLOCK ||
302 		    irec1->br_startoff != xmi->xmi_startoff1) {
303 			/*
304 			 * We should never get no mapping or a delalloc mapping
305 			 * or something that doesn't match what we asked for,
306 			 * since the caller flushed both inodes and we hold the
307 			 * ILOCKs for both inodes.
308 			 */
309 			ASSERT(0);
310 			return -EINVAL;
311 		}
312 
313 		if (xfs_exchmaps_can_skip_mapping(xmi, irec1)) {
314 			trace_xfs_exchmaps_mapping1_skip(xmi->xmi_ip1, irec1);
315 			continue;
316 		}
317 
318 		/* Read mapping from the second file */
319 		nimaps = 1;
320 		error = xfs_bmapi_read(xmi->xmi_ip2, xmi->xmi_startoff2,
321 				irec1->br_blockcount, irec2, &nimaps,
322 				bmap_flags);
323 		if (error)
324 			return error;
325 		if (nimaps != 1 ||
326 		    irec2->br_startblock == DELAYSTARTBLOCK ||
327 		    irec2->br_startoff != xmi->xmi_startoff2) {
328 			/*
329 			 * We should never get no mapping or a delalloc mapping
330 			 * or something that doesn't match what we asked for,
331 			 * since the caller flushed both inodes and we hold the
332 			 * ILOCKs for both inodes.
333 			 */
334 			ASSERT(0);
335 			return -EINVAL;
336 		}
337 
338 		/*
339 		 * We can only exchange as many blocks as the smaller of the
340 		 * two mapping maps.
341 		 */
342 		irec1->br_blockcount = min(irec1->br_blockcount,
343 					   irec2->br_blockcount);
344 
345 		trace_xfs_exchmaps_mapping1(xmi->xmi_ip1, irec1);
346 		trace_xfs_exchmaps_mapping2(xmi->xmi_ip2, irec2);
347 
348 		/* We found something to exchange, so return it. */
349 		if (irec1->br_startblock != irec2->br_startblock)
350 			return 0;
351 
352 		/*
353 		 * Two mappings pointing to the same physical block must not
354 		 * have different states; that's filesystem corruption.  Move
355 		 * on to the next mapping if they're both holes or both point
356 		 * to the same physical space extent.
357 		 */
358 		if (irec1->br_state != irec2->br_state) {
359 			xfs_bmap_mark_sick(xmi->xmi_ip1,
360 					xfs_exchmaps_whichfork(xmi));
361 			xfs_bmap_mark_sick(xmi->xmi_ip2,
362 					xfs_exchmaps_whichfork(xmi));
363 			return -EFSCORRUPTED;
364 		}
365 
366 		/*
367 		 * Save the mappings if we're estimating work and skipping
368 		 * these identical mappings.
369 		 */
370 		if (adj) {
371 			memcpy(&adj->left1, irec1, sizeof(*irec1));
372 			memcpy(&adj->left2, irec2, sizeof(*irec2));
373 		}
374 	}
375 
376 	return 0;
377 }
378 
379 /* Exchange these two mappings. */
380 static void
xfs_exchmaps_one_step(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2)381 xfs_exchmaps_one_step(
382 	struct xfs_trans		*tp,
383 	struct xfs_exchmaps_intent	*xmi,
384 	struct xfs_bmbt_irec		*irec1,
385 	struct xfs_bmbt_irec		*irec2)
386 {
387 	int				whichfork = xfs_exchmaps_whichfork(xmi);
388 
389 	xfs_exchmaps_update_quota(tp, xmi, irec1, irec2);
390 
391 	/* Remove both mappings. */
392 	xfs_bmap_unmap_extent(tp, xmi->xmi_ip1, whichfork, irec1);
393 	xfs_bmap_unmap_extent(tp, xmi->xmi_ip2, whichfork, irec2);
394 
395 	/*
396 	 * Re-add both mappings.  We exchange the file offsets between the two
397 	 * maps and add the opposite map, which has the effect of filling the
398 	 * logical offsets we just unmapped, but with with the physical mapping
399 	 * information exchanged.
400 	 */
401 	swap(irec1->br_startoff, irec2->br_startoff);
402 	xfs_bmap_map_extent(tp, xmi->xmi_ip1, whichfork, irec2);
403 	xfs_bmap_map_extent(tp, xmi->xmi_ip2, whichfork, irec1);
404 
405 	/* Make sure we're not adding mappings past EOF. */
406 	if (whichfork == XFS_DATA_FORK) {
407 		xfs_exchmaps_update_size(tp, xmi->xmi_ip1, irec2,
408 				xmi->xmi_isize1);
409 		xfs_exchmaps_update_size(tp, xmi->xmi_ip2, irec1,
410 				xmi->xmi_isize2);
411 	}
412 
413 	/*
414 	 * Advance our cursor and exit.   The caller (either defer ops or log
415 	 * recovery) will log the XMD item, and if *blockcount is nonzero, it
416 	 * will log a new XMI item for the remainder and call us back.
417 	 */
418 	xmi_advance(xmi, irec1);
419 }
420 
421 /* Convert inode2's leaf attr fork back to shortform, if possible.. */
422 STATIC int
xfs_exchmaps_attr_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)423 xfs_exchmaps_attr_to_sf(
424 	struct xfs_trans		*tp,
425 	struct xfs_exchmaps_intent	*xmi)
426 {
427 	struct xfs_da_args	args = {
428 		.dp		= xmi->xmi_ip2,
429 		.geo		= tp->t_mountp->m_attr_geo,
430 		.whichfork	= XFS_ATTR_FORK,
431 		.trans		= tp,
432 		.owner		= xmi->xmi_ip2->i_ino,
433 	};
434 	struct xfs_buf		*bp;
435 	int			forkoff;
436 	int			error;
437 
438 	if (!xfs_attr_is_leaf(xmi->xmi_ip2))
439 		return 0;
440 
441 	error = xfs_attr3_leaf_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, 0,
442 			&bp);
443 	if (error)
444 		return error;
445 
446 	forkoff = xfs_attr_shortform_allfit(bp, xmi->xmi_ip2);
447 	if (forkoff == 0)
448 		return 0;
449 
450 	return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
451 }
452 
453 /* Convert inode2's block dir fork back to shortform, if possible.. */
454 STATIC int
xfs_exchmaps_dir_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)455 xfs_exchmaps_dir_to_sf(
456 	struct xfs_trans		*tp,
457 	struct xfs_exchmaps_intent	*xmi)
458 {
459 	struct xfs_da_args	args = {
460 		.dp		= xmi->xmi_ip2,
461 		.geo		= tp->t_mountp->m_dir_geo,
462 		.whichfork	= XFS_DATA_FORK,
463 		.trans		= tp,
464 		.owner		= xmi->xmi_ip2->i_ino,
465 	};
466 	struct xfs_dir2_sf_hdr	sfh;
467 	struct xfs_buf		*bp;
468 	int			size;
469 	int			error = 0;
470 
471 	if (xfs_dir2_format(&args, &error) != XFS_DIR2_FMT_BLOCK)
472 		return error;
473 
474 	error = xfs_dir3_block_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, &bp);
475 	if (error)
476 		return error;
477 
478 	size = xfs_dir2_block_sfsize(xmi->xmi_ip2, bp->b_addr, &sfh);
479 	if (size > xfs_inode_data_fork_size(xmi->xmi_ip2))
480 		return 0;
481 
482 	return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
483 }
484 
485 /* Convert inode2's remote symlink target back to shortform, if possible. */
486 STATIC int
xfs_exchmaps_link_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)487 xfs_exchmaps_link_to_sf(
488 	struct xfs_trans		*tp,
489 	struct xfs_exchmaps_intent	*xmi)
490 {
491 	struct xfs_inode		*ip = xmi->xmi_ip2;
492 	struct xfs_ifork		*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
493 	char				*buf;
494 	int				error;
495 
496 	if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
497 	    ip->i_disk_size > xfs_inode_data_fork_size(ip))
498 		return 0;
499 
500 	/* Read the current symlink target into a buffer. */
501 	buf = kmalloc(ip->i_disk_size + 1,
502 			GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
503 	if (!buf) {
504 		ASSERT(0);
505 		return -ENOMEM;
506 	}
507 
508 	error = xfs_symlink_remote_read(ip, buf);
509 	if (error)
510 		goto free;
511 
512 	/* Remove the blocks. */
513 	error = xfs_symlink_remote_truncate(tp, ip);
514 	if (error)
515 		goto free;
516 
517 	/* Convert fork to local format and log our changes. */
518 	xfs_idestroy_fork(ifp);
519 	ifp->if_bytes = 0;
520 	ifp->if_format = XFS_DINODE_FMT_LOCAL;
521 	xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
522 	xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
523 free:
524 	kfree(buf);
525 	return error;
526 }
527 
528 /* Clear the reflink flag after an exchange. */
529 static inline void
xfs_exchmaps_clear_reflink(struct xfs_trans * tp,struct xfs_inode * ip)530 xfs_exchmaps_clear_reflink(
531 	struct xfs_trans	*tp,
532 	struct xfs_inode	*ip)
533 {
534 	trace_xfs_reflink_unset_inode_flag(ip);
535 
536 	ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
537 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
538 }
539 
540 /* Finish whatever work might come after an exchange operation. */
541 static int
xfs_exchmaps_do_postop_work(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)542 xfs_exchmaps_do_postop_work(
543 	struct xfs_trans		*tp,
544 	struct xfs_exchmaps_intent	*xmi)
545 {
546 	if (xmi->xmi_flags & __XFS_EXCHMAPS_INO2_SHORTFORM) {
547 		int			error = 0;
548 
549 		if (xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)
550 			error = xfs_exchmaps_attr_to_sf(tp, xmi);
551 		else if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode))
552 			error = xfs_exchmaps_dir_to_sf(tp, xmi);
553 		else if (S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
554 			error = xfs_exchmaps_link_to_sf(tp, xmi);
555 		xmi->xmi_flags &= ~__XFS_EXCHMAPS_INO2_SHORTFORM;
556 		if (error)
557 			return error;
558 	}
559 
560 	if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO1_REFLINK) {
561 		xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip1);
562 		xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
563 	}
564 
565 	if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO2_REFLINK) {
566 		xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip2);
567 		xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
568 	}
569 
570 	return 0;
571 }
572 
573 /* Finish one step in a mapping exchange operation, possibly relogging. */
574 int
xfs_exchmaps_finish_one(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)575 xfs_exchmaps_finish_one(
576 	struct xfs_trans		*tp,
577 	struct xfs_exchmaps_intent	*xmi)
578 {
579 	struct xfs_bmbt_irec		irec1, irec2;
580 	int				error;
581 
582 	if (xmi_has_more_exchange_work(xmi)) {
583 		/*
584 		 * If the operation state says that some range of the files
585 		 * have not yet been exchanged, look for mappings in that range
586 		 * to exchange.  If we find some mappings, exchange them.
587 		 */
588 		error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, NULL);
589 		if (error)
590 			return error;
591 
592 		if (xmi_has_more_exchange_work(xmi))
593 			xfs_exchmaps_one_step(tp, xmi, &irec1, &irec2);
594 
595 		/*
596 		 * If the caller asked us to exchange the file sizes after the
597 		 * exchange and either we just exchanged the last mappings in
598 		 * the range or we didn't find anything to exchange, update the
599 		 * ondisk file sizes.
600 		 */
601 		if ((xmi->xmi_flags & XFS_EXCHMAPS_SET_SIZES) &&
602 		    !xmi_has_more_exchange_work(xmi)) {
603 			xmi->xmi_ip1->i_disk_size = xmi->xmi_isize1;
604 			xmi->xmi_ip2->i_disk_size = xmi->xmi_isize2;
605 
606 			xfs_trans_log_inode(tp, xmi->xmi_ip1, XFS_ILOG_CORE);
607 			xfs_trans_log_inode(tp, xmi->xmi_ip2, XFS_ILOG_CORE);
608 		}
609 	} else if (xmi_has_postop_work(xmi)) {
610 		/*
611 		 * Now that we're finished with the exchange operation,
612 		 * complete the post-op cleanup work.
613 		 */
614 		error = xfs_exchmaps_do_postop_work(tp, xmi);
615 		if (error)
616 			return error;
617 	}
618 
619 	if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
620 		return -EIO;
621 
622 	/* If we still have work to do, ask for a new transaction. */
623 	if (xmi_has_more_exchange_work(xmi) || xmi_has_postop_work(xmi)) {
624 		trace_xfs_exchmaps_defer(tp->t_mountp, xmi);
625 		return -EAGAIN;
626 	}
627 
628 	/*
629 	 * If we reach here, we've finished all the exchange work and the post
630 	 * operation work.  The last thing we need to do before returning to
631 	 * the caller is to make sure that COW forks are set up correctly.
632 	 */
633 	if (!(xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)) {
634 		xfs_exchmaps_ensure_cowfork(xmi->xmi_ip1);
635 		xfs_exchmaps_ensure_cowfork(xmi->xmi_ip2);
636 	}
637 
638 	return 0;
639 }
640 
641 /*
642  * Compute the amount of bmbt blocks we should reserve for each file.  In the
643  * worst case, each exchange will fill a hole with a new mapping, which could
644  * result in a btree split every time we add a new leaf block.
645  */
646 static inline uint64_t
xfs_exchmaps_bmbt_blocks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)647 xfs_exchmaps_bmbt_blocks(
648 	struct xfs_mount		*mp,
649 	const struct xfs_exchmaps_req	*req)
650 {
651 	return howmany_64(req->nr_exchanges,
652 					XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
653 			XFS_EXTENTADD_SPACE_RES(mp, xfs_exchmaps_reqfork(req));
654 }
655 
656 /* Compute the space we should reserve for the rmap btree expansions. */
657 static inline uint64_t
xfs_exchmaps_rmapbt_blocks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)658 xfs_exchmaps_rmapbt_blocks(
659 	struct xfs_mount		*mp,
660 	const struct xfs_exchmaps_req	*req)
661 {
662 	if (!xfs_has_rmapbt(mp))
663 		return 0;
664 	if (XFS_IS_REALTIME_INODE(req->ip1))
665 		return howmany_64(req->nr_exchanges,
666 					XFS_MAX_CONTIG_RTRMAPS_PER_BLOCK(mp)) *
667 			XFS_RTRMAPADD_SPACE_RES(mp);
668 
669 	return howmany_64(req->nr_exchanges,
670 					XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
671 			XFS_RMAPADD_SPACE_RES(mp);
672 }
673 
674 /* Estimate the bmbt and rmapbt overhead required to exchange mappings. */
675 int
xfs_exchmaps_estimate_overhead(struct xfs_exchmaps_req * req)676 xfs_exchmaps_estimate_overhead(
677 	struct xfs_exchmaps_req		*req)
678 {
679 	struct xfs_mount		*mp = req->ip1->i_mount;
680 	xfs_filblks_t			bmbt_blocks;
681 	xfs_filblks_t			rmapbt_blocks;
682 	xfs_filblks_t			resblks = req->resblks;
683 
684 	/*
685 	 * Compute the number of bmbt and rmapbt blocks we might need to handle
686 	 * the estimated number of exchanges.
687 	 */
688 	bmbt_blocks = xfs_exchmaps_bmbt_blocks(mp, req);
689 	rmapbt_blocks = xfs_exchmaps_rmapbt_blocks(mp, req);
690 
691 	trace_xfs_exchmaps_overhead(mp, bmbt_blocks, rmapbt_blocks);
692 
693 	/* Make sure the change in file block count doesn't overflow. */
694 	if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
695 		return -EFBIG;
696 	if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
697 		return -EFBIG;
698 
699 	/*
700 	 * Add together the number of blocks we need to handle btree growth,
701 	 * then add it to the number of blocks we need to reserve to this
702 	 * transaction.
703 	 */
704 	if (check_add_overflow(resblks, bmbt_blocks, &resblks))
705 		return -ENOSPC;
706 	if (check_add_overflow(resblks, bmbt_blocks, &resblks))
707 		return -ENOSPC;
708 	if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
709 		return -ENOSPC;
710 	if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
711 		return -ENOSPC;
712 
713 	/* Can't actually reserve more than UINT_MAX blocks. */
714 	if (req->resblks > UINT_MAX)
715 		return -ENOSPC;
716 
717 	req->resblks = resblks;
718 	trace_xfs_exchmaps_final_estimate(req);
719 	return 0;
720 }
721 
722 /* Decide if we can merge two real mappings. */
723 static inline bool
xmi_can_merge(const struct xfs_bmbt_irec * b1,const struct xfs_bmbt_irec * b2)724 xmi_can_merge(
725 	const struct xfs_bmbt_irec	*b1,
726 	const struct xfs_bmbt_irec	*b2)
727 {
728 	/* Don't merge holes. */
729 	if (b1->br_startblock == HOLESTARTBLOCK ||
730 	    b2->br_startblock == HOLESTARTBLOCK)
731 		return false;
732 
733 	/* We don't merge holes. */
734 	if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
735 		return false;
736 
737 	if (b1->br_startoff   + b1->br_blockcount == b2->br_startoff &&
738 	    b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
739 	    b1->br_state			  == b2->br_state &&
740 	    b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
741 		return true;
742 
743 	return false;
744 }
745 
746 /*
747  * Decide if we can merge three mappings.  Caller must ensure all three
748  * mappings must not be holes or delalloc reservations.
749  */
750 static inline bool
xmi_can_merge_all(const struct xfs_bmbt_irec * l,const struct xfs_bmbt_irec * m,const struct xfs_bmbt_irec * r)751 xmi_can_merge_all(
752 	const struct xfs_bmbt_irec	*l,
753 	const struct xfs_bmbt_irec	*m,
754 	const struct xfs_bmbt_irec	*r)
755 {
756 	xfs_filblks_t			new_len;
757 
758 	new_len = l->br_blockcount + m->br_blockcount + r->br_blockcount;
759 	return new_len <= XFS_MAX_BMBT_EXTLEN;
760 }
761 
762 #define CLEFT_CONTIG	0x01
763 #define CRIGHT_CONTIG	0x02
764 #define CHOLE		0x04
765 #define CBOTH_CONTIG	(CLEFT_CONTIG | CRIGHT_CONTIG)
766 
767 #define NLEFT_CONTIG	0x10
768 #define NRIGHT_CONTIG	0x20
769 #define NHOLE		0x40
770 #define NBOTH_CONTIG	(NLEFT_CONTIG | NRIGHT_CONTIG)
771 
772 /* Estimate the effect of a single exchange on mapping count. */
773 static inline int
xmi_delta_nextents_step(struct xfs_mount * mp,const struct xfs_bmbt_irec * left,const struct xfs_bmbt_irec * curr,const struct xfs_bmbt_irec * new,const struct xfs_bmbt_irec * right)774 xmi_delta_nextents_step(
775 	struct xfs_mount		*mp,
776 	const struct xfs_bmbt_irec	*left,
777 	const struct xfs_bmbt_irec	*curr,
778 	const struct xfs_bmbt_irec	*new,
779 	const struct xfs_bmbt_irec	*right)
780 {
781 	bool				lhole, rhole, chole, nhole;
782 	unsigned int			state = 0;
783 	int				ret = 0;
784 
785 	lhole = left->br_startblock == HOLESTARTBLOCK;
786 	rhole = right->br_startblock == HOLESTARTBLOCK;
787 	chole = curr->br_startblock == HOLESTARTBLOCK;
788 	nhole = new->br_startblock == HOLESTARTBLOCK;
789 
790 	if (chole)
791 		state |= CHOLE;
792 	if (!lhole && !chole && xmi_can_merge(left, curr))
793 		state |= CLEFT_CONTIG;
794 	if (!rhole && !chole && xmi_can_merge(curr, right))
795 		state |= CRIGHT_CONTIG;
796 	if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
797 	    !xmi_can_merge_all(left, curr, right))
798 		state &= ~CRIGHT_CONTIG;
799 
800 	if (nhole)
801 		state |= NHOLE;
802 	if (!lhole && !nhole && xmi_can_merge(left, new))
803 		state |= NLEFT_CONTIG;
804 	if (!rhole && !nhole && xmi_can_merge(new, right))
805 		state |= NRIGHT_CONTIG;
806 	if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
807 	    !xmi_can_merge_all(left, new, right))
808 		state &= ~NRIGHT_CONTIG;
809 
810 	switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
811 	case CLEFT_CONTIG | CRIGHT_CONTIG:
812 		/*
813 		 * left/curr/right are the same mapping, so deleting curr
814 		 * causes 2 new mappings to be created.
815 		 */
816 		ret += 2;
817 		break;
818 	case 0:
819 		/*
820 		 * curr is not contiguous with any mapping, so we remove curr
821 		 * completely
822 		 */
823 		ret--;
824 		break;
825 	case CHOLE:
826 		/* hole, do nothing */
827 		break;
828 	case CLEFT_CONTIG:
829 	case CRIGHT_CONTIG:
830 		/* trim either left or right, no change */
831 		break;
832 	}
833 
834 	switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
835 	case NLEFT_CONTIG | NRIGHT_CONTIG:
836 		/*
837 		 * left/curr/right will become the same mapping, so adding
838 		 * curr causes the deletion of right.
839 		 */
840 		ret--;
841 		break;
842 	case 0:
843 		/* new is not contiguous with any mapping */
844 		ret++;
845 		break;
846 	case NHOLE:
847 		/* hole, do nothing. */
848 		break;
849 	case NLEFT_CONTIG:
850 	case NRIGHT_CONTIG:
851 		/* new is absorbed into left or right, no change */
852 		break;
853 	}
854 
855 	trace_xfs_exchmaps_delta_nextents_step(mp, left, curr, new, right, ret,
856 			state);
857 	return ret;
858 }
859 
860 /* Make sure we don't overflow the extent (mapping) counters. */
861 static inline int
xmi_ensure_delta_nextents(struct xfs_exchmaps_req * req,struct xfs_inode * ip,int64_t delta)862 xmi_ensure_delta_nextents(
863 	struct xfs_exchmaps_req	*req,
864 	struct xfs_inode	*ip,
865 	int64_t			delta)
866 {
867 	struct xfs_mount	*mp = ip->i_mount;
868 	int			whichfork = xfs_exchmaps_reqfork(req);
869 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
870 	uint64_t		new_nextents;
871 	xfs_extnum_t		max_nextents;
872 
873 	if (delta < 0)
874 		return 0;
875 
876 	/*
877 	 * It's always an error if the delta causes integer overflow.  delta
878 	 * needs an explicit cast here to avoid warnings about implicit casts
879 	 * coded into the overflow check.
880 	 */
881 	if (check_add_overflow(ifp->if_nextents, (uint64_t)delta,
882 				&new_nextents))
883 		return -EFBIG;
884 
885 	if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
886 	    new_nextents > 10)
887 		return -EFBIG;
888 
889 	/*
890 	 * We always promote both inodes to have large extent counts if the
891 	 * superblock feature is enabled, so we only need to check against the
892 	 * theoretical maximum.
893 	 */
894 	max_nextents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
895 					     whichfork);
896 	if (new_nextents > max_nextents)
897 		return -EFBIG;
898 
899 	return 0;
900 }
901 
902 /* Find the next mapping after irec. */
903 static inline int
xmi_next(struct xfs_inode * ip,int bmap_flags,const struct xfs_bmbt_irec * irec,struct xfs_bmbt_irec * nrec)904 xmi_next(
905 	struct xfs_inode		*ip,
906 	int				bmap_flags,
907 	const struct xfs_bmbt_irec	*irec,
908 	struct xfs_bmbt_irec		*nrec)
909 {
910 	xfs_fileoff_t			off;
911 	xfs_filblks_t			blockcount;
912 	int				nimaps = 1;
913 	int				error;
914 
915 	off = irec->br_startoff + irec->br_blockcount;
916 	blockcount = XFS_MAX_FILEOFF - off;
917 	error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
918 	if (error)
919 		return error;
920 	if (nrec->br_startblock == DELAYSTARTBLOCK ||
921 	    nrec->br_startoff != off) {
922 		/*
923 		 * If we don't get the mapping we want, return a zero-length
924 		 * mapping, which our estimator function will pretend is a hole.
925 		 * We shouldn't get delalloc reservations.
926 		 */
927 		nrec->br_startblock = HOLESTARTBLOCK;
928 	}
929 
930 	return 0;
931 }
932 
933 int __init
xfs_exchmaps_intent_init_cache(void)934 xfs_exchmaps_intent_init_cache(void)
935 {
936 	xfs_exchmaps_intent_cache = kmem_cache_create("xfs_exchmaps_intent",
937 			sizeof(struct xfs_exchmaps_intent),
938 			0, 0, NULL);
939 
940 	return xfs_exchmaps_intent_cache != NULL ? 0 : -ENOMEM;
941 }
942 
943 void
xfs_exchmaps_intent_destroy_cache(void)944 xfs_exchmaps_intent_destroy_cache(void)
945 {
946 	kmem_cache_destroy(xfs_exchmaps_intent_cache);
947 	xfs_exchmaps_intent_cache = NULL;
948 }
949 
950 /*
951  * Decide if we will exchange the reflink flags between the two files after the
952  * exchange.  The only time we want to do this is if we're exchanging all
953  * mappings under EOF and the inode reflink flags have different states.
954  */
955 static inline bool
xmi_can_exchange_reflink_flags(const struct xfs_exchmaps_req * req,unsigned int reflink_state)956 xmi_can_exchange_reflink_flags(
957 	const struct xfs_exchmaps_req	*req,
958 	unsigned int			reflink_state)
959 {
960 	struct xfs_mount		*mp = req->ip1->i_mount;
961 
962 	if (hweight32(reflink_state) != 1)
963 		return false;
964 	if (req->startoff1 != 0 || req->startoff2 != 0)
965 		return false;
966 	if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
967 		return false;
968 	if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
969 		return false;
970 	return true;
971 }
972 
973 
974 /* Allocate and initialize a new incore intent item from a request. */
975 struct xfs_exchmaps_intent *
xfs_exchmaps_init_intent(const struct xfs_exchmaps_req * req)976 xfs_exchmaps_init_intent(
977 	const struct xfs_exchmaps_req	*req)
978 {
979 	struct xfs_exchmaps_intent	*xmi;
980 	unsigned int			rs = 0;
981 
982 	xmi = kmem_cache_zalloc(xfs_exchmaps_intent_cache,
983 			GFP_NOFS | __GFP_NOFAIL);
984 	INIT_LIST_HEAD(&xmi->xmi_list);
985 	xmi->xmi_ip1 = req->ip1;
986 	xmi->xmi_ip2 = req->ip2;
987 	xmi->xmi_startoff1 = req->startoff1;
988 	xmi->xmi_startoff2 = req->startoff2;
989 	xmi->xmi_blockcount = req->blockcount;
990 	xmi->xmi_isize1 = xmi->xmi_isize2 = -1;
991 	xmi->xmi_flags = req->flags & XFS_EXCHMAPS_PARAMS;
992 
993 	if (xfs_exchmaps_whichfork(xmi) == XFS_ATTR_FORK) {
994 		xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
995 		return xmi;
996 	}
997 
998 	if (req->flags & XFS_EXCHMAPS_SET_SIZES) {
999 		xmi->xmi_flags |= XFS_EXCHMAPS_SET_SIZES;
1000 		xmi->xmi_isize1 = req->ip2->i_disk_size;
1001 		xmi->xmi_isize2 = req->ip1->i_disk_size;
1002 	}
1003 
1004 	/* Record the state of each inode's reflink flag before the op. */
1005 	if (xfs_is_reflink_inode(req->ip1))
1006 		rs |= 1;
1007 	if (xfs_is_reflink_inode(req->ip2))
1008 		rs |= 2;
1009 
1010 	/*
1011 	 * Figure out if we're clearing the reflink flags (which effectively
1012 	 * exchanges them) after the operation.
1013 	 */
1014 	if (xmi_can_exchange_reflink_flags(req, rs)) {
1015 		if (rs & 1)
1016 			xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
1017 		if (rs & 2)
1018 			xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
1019 	}
1020 
1021 	if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode) ||
1022 	    S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
1023 		xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
1024 
1025 	return xmi;
1026 }
1027 
1028 /*
1029  * Estimate the number of exchange operations and the number of file blocks
1030  * in each file that will be affected by the exchange operation.
1031  */
1032 int
xfs_exchmaps_estimate(struct xfs_exchmaps_req * req)1033 xfs_exchmaps_estimate(
1034 	struct xfs_exchmaps_req		*req)
1035 {
1036 	struct xfs_exchmaps_intent	*xmi;
1037 	struct xfs_bmbt_irec		irec1, irec2;
1038 	struct xfs_exchmaps_adjacent	adj = ADJACENT_INIT;
1039 	xfs_filblks_t			ip1_blocks = 0, ip2_blocks = 0;
1040 	int64_t				d_nexts1, d_nexts2;
1041 	int				bmap_flags;
1042 	int				error;
1043 
1044 	ASSERT(!(req->flags & ~XFS_EXCHMAPS_PARAMS));
1045 
1046 	bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_reqfork(req));
1047 	xmi = xfs_exchmaps_init_intent(req);
1048 
1049 	/*
1050 	 * To guard against the possibility of overflowing the extent counters,
1051 	 * we have to estimate an upper bound on the potential increase in that
1052 	 * counter.  We can split the mapping at each end of the range, and for
1053 	 * each step of the exchange we can split the mapping that we're
1054 	 * working on if the mappings do not align.
1055 	 */
1056 	d_nexts1 = d_nexts2 = 3;
1057 
1058 	while (xmi_has_more_exchange_work(xmi)) {
1059 		/*
1060 		 * Walk through the file ranges until we find something to
1061 		 * exchange.  Because we're simulating the exchange, pass in
1062 		 * adj to capture skipped mappings for correct estimation of
1063 		 * bmbt record merges.
1064 		 */
1065 		error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, &adj);
1066 		if (error)
1067 			goto out_free;
1068 		if (!xmi_has_more_exchange_work(xmi))
1069 			break;
1070 
1071 		/* Update accounting. */
1072 		if (xfs_bmap_is_real_extent(&irec1))
1073 			ip1_blocks += irec1.br_blockcount;
1074 		if (xfs_bmap_is_real_extent(&irec2))
1075 			ip2_blocks += irec2.br_blockcount;
1076 		req->nr_exchanges++;
1077 
1078 		/* Read the next mappings from both files. */
1079 		error = xmi_next(req->ip1, bmap_flags, &irec1, &adj.right1);
1080 		if (error)
1081 			goto out_free;
1082 
1083 		error = xmi_next(req->ip2, bmap_flags, &irec2, &adj.right2);
1084 		if (error)
1085 			goto out_free;
1086 
1087 		/* Update extent count deltas. */
1088 		d_nexts1 += xmi_delta_nextents_step(req->ip1->i_mount,
1089 				&adj.left1, &irec1, &irec2, &adj.right1);
1090 
1091 		d_nexts2 += xmi_delta_nextents_step(req->ip1->i_mount,
1092 				&adj.left2, &irec2, &irec1, &adj.right2);
1093 
1094 		/* Now pretend we exchanged the mappings. */
1095 		if (xmi_can_merge(&adj.left2, &irec1))
1096 			adj.left2.br_blockcount += irec1.br_blockcount;
1097 		else
1098 			memcpy(&adj.left2, &irec1, sizeof(irec1));
1099 
1100 		if (xmi_can_merge(&adj.left1, &irec2))
1101 			adj.left1.br_blockcount += irec2.br_blockcount;
1102 		else
1103 			memcpy(&adj.left1, &irec2, sizeof(irec2));
1104 
1105 		xmi_advance(xmi, &irec1);
1106 	}
1107 
1108 	/* Account for the blocks that are being exchanged. */
1109 	if (XFS_IS_REALTIME_INODE(req->ip1) &&
1110 	    xfs_exchmaps_reqfork(req) == XFS_DATA_FORK) {
1111 		req->ip1_rtbcount = ip1_blocks;
1112 		req->ip2_rtbcount = ip2_blocks;
1113 	} else {
1114 		req->ip1_bcount = ip1_blocks;
1115 		req->ip2_bcount = ip2_blocks;
1116 	}
1117 
1118 	/*
1119 	 * Make sure that both forks have enough slack left in their extent
1120 	 * counters that the exchange operation will not overflow.
1121 	 */
1122 	trace_xfs_exchmaps_delta_nextents(req, d_nexts1, d_nexts2);
1123 	if (req->ip1 == req->ip2) {
1124 		error = xmi_ensure_delta_nextents(req, req->ip1,
1125 				d_nexts1 + d_nexts2);
1126 	} else {
1127 		error = xmi_ensure_delta_nextents(req, req->ip1, d_nexts1);
1128 		if (error)
1129 			goto out_free;
1130 		error = xmi_ensure_delta_nextents(req, req->ip2, d_nexts2);
1131 	}
1132 	if (error)
1133 		goto out_free;
1134 
1135 	trace_xfs_exchmaps_initial_estimate(req);
1136 	error = xfs_exchmaps_estimate_overhead(req);
1137 out_free:
1138 	kmem_cache_free(xfs_exchmaps_intent_cache, xmi);
1139 	return error;
1140 }
1141 
1142 /* Set the reflink flag before an operation. */
1143 static inline void
xfs_exchmaps_set_reflink(struct xfs_trans * tp,struct xfs_inode * ip)1144 xfs_exchmaps_set_reflink(
1145 	struct xfs_trans	*tp,
1146 	struct xfs_inode	*ip)
1147 {
1148 	trace_xfs_reflink_set_inode_flag(ip);
1149 
1150 	ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
1151 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1152 }
1153 
1154 /*
1155  * If either file has shared blocks and we're exchanging data forks, we must
1156  * flag the other file as having shared blocks so that we get the shared-block
1157  * rmap functions if we need to fix up the rmaps.
1158  */
1159 void
xfs_exchmaps_ensure_reflink(struct xfs_trans * tp,const struct xfs_exchmaps_intent * xmi)1160 xfs_exchmaps_ensure_reflink(
1161 	struct xfs_trans			*tp,
1162 	const struct xfs_exchmaps_intent	*xmi)
1163 {
1164 	unsigned int				rs = 0;
1165 
1166 	if (xfs_is_reflink_inode(xmi->xmi_ip1))
1167 		rs |= 1;
1168 	if (xfs_is_reflink_inode(xmi->xmi_ip2))
1169 		rs |= 2;
1170 
1171 	if ((rs & 1) && !xfs_is_reflink_inode(xmi->xmi_ip2))
1172 		xfs_exchmaps_set_reflink(tp, xmi->xmi_ip2);
1173 
1174 	if ((rs & 2) && !xfs_is_reflink_inode(xmi->xmi_ip1))
1175 		xfs_exchmaps_set_reflink(tp, xmi->xmi_ip1);
1176 }
1177 
1178 /* Set the large extent count flag before an operation if needed. */
1179 static inline void
xfs_exchmaps_ensure_large_extent_counts(struct xfs_trans * tp,struct xfs_inode * ip)1180 xfs_exchmaps_ensure_large_extent_counts(
1181 	struct xfs_trans	*tp,
1182 	struct xfs_inode	*ip)
1183 {
1184 	if (xfs_inode_has_large_extent_counts(ip))
1185 		return;
1186 
1187 	ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1188 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1189 }
1190 
1191 /* Widen the extent counter fields of both inodes if necessary. */
1192 void
xfs_exchmaps_upgrade_extent_counts(struct xfs_trans * tp,const struct xfs_exchmaps_intent * xmi)1193 xfs_exchmaps_upgrade_extent_counts(
1194 	struct xfs_trans			*tp,
1195 	const struct xfs_exchmaps_intent	*xmi)
1196 {
1197 	if (!xfs_has_large_extent_counts(tp->t_mountp))
1198 		return;
1199 
1200 	xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip1);
1201 	xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip2);
1202 }
1203 
1204 /*
1205  * Schedule an exchange a range of mappings from one inode to another.
1206  *
1207  * The use of file mapping exchange log intent items ensures the operation can
1208  * be resumed even if the system goes down.  The caller must commit the
1209  * transaction to start the work.
1210  *
1211  * The caller must ensure the inodes must be joined to the transaction and
1212  * ILOCKd; they will still be joined to the transaction at exit.
1213  */
1214 void
xfs_exchange_mappings(struct xfs_trans * tp,const struct xfs_exchmaps_req * req)1215 xfs_exchange_mappings(
1216 	struct xfs_trans		*tp,
1217 	const struct xfs_exchmaps_req	*req)
1218 {
1219 	struct xfs_exchmaps_intent	*xmi;
1220 
1221 	BUILD_BUG_ON(XFS_EXCHMAPS_INTERNAL_FLAGS & XFS_EXCHMAPS_LOGGED_FLAGS);
1222 
1223 	xfs_assert_ilocked(req->ip1, XFS_ILOCK_EXCL);
1224 	xfs_assert_ilocked(req->ip2, XFS_ILOCK_EXCL);
1225 	ASSERT(!(req->flags & ~XFS_EXCHMAPS_LOGGED_FLAGS));
1226 	if (req->flags & XFS_EXCHMAPS_SET_SIZES)
1227 		ASSERT(!(req->flags & XFS_EXCHMAPS_ATTR_FORK));
1228 	ASSERT(xfs_has_exchange_range(tp->t_mountp));
1229 
1230 	if (req->blockcount == 0)
1231 		return;
1232 
1233 	xmi = xfs_exchmaps_init_intent(req);
1234 	xfs_exchmaps_defer_add(tp, xmi);
1235 	xfs_exchmaps_ensure_reflink(tp, xmi);
1236 	xfs_exchmaps_upgrade_extent_counts(tp, xmi);
1237 }
1238