xref: /linux/fs/ext4/move_extent.c (revision f8907398a6d941b204b90b6a40eecfdfd7d00c44)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
4  * Written by Takashi Sato <t-sato@yk.jp.nec.com>
5  *            Akira Fujita <a-fujita@rs.jp.nec.com>
6  */
7 
8 #include <linux/fs.h>
9 #include <linux/quotaops.h>
10 #include <linux/slab.h>
11 #include <linux/sched/mm.h>
12 #include "ext4_jbd2.h"
13 #include "ext4.h"
14 #include "ext4_extents.h"
15 
16 #include <trace/events/ext4.h>
17 
18 struct mext_data {
19 	struct inode *orig_inode;	/* Origin file inode */
20 	struct inode *donor_inode;	/* Donor file inode */
21 	struct ext4_map_blocks orig_map;/* Origin file's move mapping */
22 	ext4_lblk_t donor_lblk;		/* Start block of the donor file */
23 };
24 
25 /**
26  * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem
27  * @first: inode to be locked
28  * @second: inode to be locked
29  *
30  * Acquire write lock of i_data_sem of the two inodes
31  */
32 void
ext4_double_down_write_data_sem(struct inode * first,struct inode * second)33 ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
34 {
35 	if (first < second) {
36 		down_write(&EXT4_I(first)->i_data_sem);
37 		down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER);
38 	} else {
39 		down_write(&EXT4_I(second)->i_data_sem);
40 		down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER);
41 	}
42 }
43 
44 /**
45  * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
46  *
47  * @orig_inode:		original inode structure to be released its lock first
48  * @donor_inode:	donor inode structure to be released its lock second
49  * Release write lock of i_data_sem of two inodes (orig and donor).
50  */
51 void
ext4_double_up_write_data_sem(struct inode * orig_inode,struct inode * donor_inode)52 ext4_double_up_write_data_sem(struct inode *orig_inode,
53 			      struct inode *donor_inode)
54 {
55 	up_write(&EXT4_I(orig_inode)->i_data_sem);
56 	up_write(&EXT4_I(donor_inode)->i_data_sem);
57 }
58 
59 /* Grab and lock folio on both @inode1 and @inode2 by inode order. */
mext_folio_double_lock(struct inode * inode1,struct inode * inode2,pgoff_t index1,pgoff_t index2,size_t len,struct folio * folio[2])60 static int mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
61 				  pgoff_t index1, pgoff_t index2, size_t len,
62 				  struct folio *folio[2])
63 {
64 	struct address_space *mapping[2];
65 	unsigned int flags;
66 	fgf_t fgp_flags = FGP_WRITEBEGIN;
67 
68 	BUG_ON(!inode1 || !inode2);
69 	if (inode1 < inode2) {
70 		mapping[0] = inode1->i_mapping;
71 		mapping[1] = inode2->i_mapping;
72 	} else {
73 		swap(index1, index2);
74 		mapping[0] = inode2->i_mapping;
75 		mapping[1] = inode1->i_mapping;
76 	}
77 
78 	flags = memalloc_nofs_save();
79 	fgp_flags |= fgf_set_order(len);
80 	folio[0] = __filemap_get_folio(mapping[0], index1, fgp_flags,
81 			mapping_gfp_mask(mapping[0]));
82 	if (IS_ERR(folio[0])) {
83 		memalloc_nofs_restore(flags);
84 		return PTR_ERR(folio[0]);
85 	}
86 
87 	folio[1] = __filemap_get_folio(mapping[1], index2, fgp_flags,
88 			mapping_gfp_mask(mapping[1]));
89 	memalloc_nofs_restore(flags);
90 	if (IS_ERR(folio[1])) {
91 		folio_unlock(folio[0]);
92 		folio_put(folio[0]);
93 		return PTR_ERR(folio[1]);
94 	}
95 	/*
96 	 * __filemap_get_folio() may not wait on folio's writeback if
97 	 * BDI not demand that. But it is reasonable to be very conservative
98 	 * here and explicitly wait on folio's writeback
99 	 */
100 	folio_wait_writeback(folio[0]);
101 	folio_wait_writeback(folio[1]);
102 	if (inode1 > inode2)
103 		swap(folio[0], folio[1]);
104 
105 	return 0;
106 }
107 
mext_folio_double_unlock(struct folio * folio[2])108 static void mext_folio_double_unlock(struct folio *folio[2])
109 {
110 	folio_unlock(folio[0]);
111 	folio_put(folio[0]);
112 	folio_unlock(folio[1]);
113 	folio_put(folio[1]);
114 }
115 
116 /* Force folio buffers uptodate w/o dropping folio's lock */
mext_folio_mkuptodate(struct folio * folio,size_t from,size_t to)117 static int mext_folio_mkuptodate(struct folio *folio, size_t from, size_t to)
118 {
119 	struct inode *inode = folio->mapping->host;
120 	sector_t block;
121 	struct buffer_head *bh, *head;
122 	unsigned int blocksize, block_start, block_end;
123 	int nr = 0;
124 	bool partial = false;
125 
126 	BUG_ON(!folio_test_locked(folio));
127 	BUG_ON(folio_test_writeback(folio));
128 
129 	if (folio_test_uptodate(folio))
130 		return 0;
131 
132 	blocksize = i_blocksize(inode);
133 	head = folio_buffers(folio);
134 	if (!head)
135 		head = create_empty_buffers(folio, blocksize, 0);
136 
137 	block = folio_pos(folio) >> inode->i_blkbits;
138 	block_end = 0;
139 	bh = head;
140 	do {
141 		block_start = block_end;
142 		block_end = block_start + blocksize;
143 		if (block_end <= from || block_start >= to) {
144 			if (!buffer_uptodate(bh))
145 				partial = true;
146 			continue;
147 		}
148 		if (buffer_uptodate(bh))
149 			continue;
150 		if (!buffer_mapped(bh)) {
151 			int err = ext4_get_block(inode, block, bh, 0);
152 			if (err)
153 				return err;
154 			if (!buffer_mapped(bh)) {
155 				folio_zero_range(folio, block_start, blocksize);
156 				set_buffer_uptodate(bh);
157 				continue;
158 			}
159 		}
160 		lock_buffer(bh);
161 		if (buffer_uptodate(bh)) {
162 			unlock_buffer(bh);
163 			continue;
164 		}
165 		ext4_read_bh_nowait(bh, 0, NULL, false);
166 		nr++;
167 	} while (block++, (bh = bh->b_this_page) != head);
168 
169 	/* No io required */
170 	if (!nr)
171 		goto out;
172 
173 	bh = head;
174 	do {
175 		if (bh_offset(bh) + blocksize <= from)
176 			continue;
177 		if (bh_offset(bh) >= to)
178 			break;
179 		wait_on_buffer(bh);
180 		if (buffer_uptodate(bh))
181 			continue;
182 		return -EIO;
183 	} while ((bh = bh->b_this_page) != head);
184 out:
185 	if (!partial)
186 		folio_mark_uptodate(folio);
187 	return 0;
188 }
189 
190 enum mext_move_type {MEXT_SKIP_EXTENT, MEXT_MOVE_EXTENT, MEXT_COPY_DATA};
191 
192 /*
193  * Start to move extent between the origin inode and the donor inode,
194  * hold one folio for each inode and check the candidate moving extent
195  * mapping status again.
196  */
mext_move_begin(struct mext_data * mext,struct folio * folio[2],enum mext_move_type * move_type)197 static int mext_move_begin(struct mext_data *mext, struct folio *folio[2],
198 			   enum mext_move_type *move_type)
199 {
200 	struct inode *orig_inode = mext->orig_inode;
201 	struct inode *donor_inode = mext->donor_inode;
202 	unsigned int blkbits = orig_inode->i_blkbits;
203 	struct ext4_map_blocks donor_map = {0};
204 	loff_t orig_pos, donor_pos;
205 	size_t move_len;
206 	int ret;
207 
208 	orig_pos = ((loff_t)mext->orig_map.m_lblk) << blkbits;
209 	donor_pos = ((loff_t)mext->donor_lblk) << blkbits;
210 	ret = mext_folio_double_lock(orig_inode, donor_inode,
211 			orig_pos >> PAGE_SHIFT, donor_pos >> PAGE_SHIFT,
212 			((size_t)mext->orig_map.m_len) << blkbits, folio);
213 	if (ret)
214 		return ret;
215 
216 	/*
217 	 * Check the origin inode's mapping information again under the
218 	 * folio lock, as we do not hold the i_data_sem at all times, and
219 	 * it may change during the concurrent write-back operation.
220 	 */
221 	if (mext->orig_map.m_seq != READ_ONCE(EXT4_I(orig_inode)->i_es_seq)) {
222 		ret = -ESTALE;
223 		goto error;
224 	}
225 
226 	/* Adjust the moving length according to the length of shorter folio. */
227 	move_len = umin(folio_pos(folio[0]) + folio_size(folio[0]) - orig_pos,
228 			folio_pos(folio[1]) + folio_size(folio[1]) - donor_pos);
229 	move_len >>= blkbits;
230 	if (move_len < mext->orig_map.m_len)
231 		mext->orig_map.m_len = move_len;
232 
233 	donor_map.m_lblk = mext->donor_lblk;
234 	donor_map.m_len = mext->orig_map.m_len;
235 	donor_map.m_flags = 0;
236 	ret = ext4_map_blocks(NULL, donor_inode, &donor_map, 0);
237 	if (ret < 0)
238 		goto error;
239 
240 	/* Adjust the moving length according to the donor mapping length. */
241 	mext->orig_map.m_len = donor_map.m_len;
242 
243 	/* Skip moving if the donor range is a hole or a delalloc extent. */
244 	if (!(donor_map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN)))
245 		*move_type = MEXT_SKIP_EXTENT;
246 	/* If both mapping ranges are unwritten, no need to copy data. */
247 	else if ((mext->orig_map.m_flags & EXT4_MAP_UNWRITTEN) &&
248 		 (donor_map.m_flags & EXT4_MAP_UNWRITTEN))
249 		*move_type = MEXT_MOVE_EXTENT;
250 	else
251 		*move_type = MEXT_COPY_DATA;
252 
253 	return 0;
254 error:
255 	mext_folio_double_unlock(folio);
256 	return ret;
257 }
258 
259 /*
260  * Re-create the new moved mapping buffers of the original inode and commit
261  * the entire written range.
262  */
mext_folio_mkwrite(struct inode * inode,struct folio * folio,size_t from,size_t to)263 static int mext_folio_mkwrite(struct inode *inode, struct folio *folio,
264 			      size_t from, size_t to)
265 {
266 	unsigned int blocksize = i_blocksize(inode);
267 	struct buffer_head *bh, *head;
268 	size_t block_start, block_end;
269 	sector_t block;
270 	int ret;
271 
272 	head = folio_buffers(folio);
273 	if (!head)
274 		head = create_empty_buffers(folio, blocksize, 0);
275 
276 	block = folio_pos(folio) >> inode->i_blkbits;
277 	block_end = 0;
278 	bh = head;
279 	do {
280 		block_start = block_end;
281 		block_end = block_start + blocksize;
282 		if (block_end <= from || block_start >= to)
283 			continue;
284 
285 		ret = ext4_get_block(inode, block, bh, 0);
286 		if (ret)
287 			return ret;
288 	} while (block++, (bh = bh->b_this_page) != head);
289 
290 	block_commit_write(folio, from, to);
291 	return 0;
292 }
293 
294 /*
295  * Save the data in original inode extent blocks and replace one folio size
296  * aligned original inode extent with one or one partial donor inode extent,
297  * and then write out the saved data in new original inode blocks. Pass out
298  * the replaced block count through m_len. Return 0 on success, and an error
299  * code otherwise.
300  */
mext_move_extent(struct mext_data * mext,u64 * m_len)301 static int mext_move_extent(struct mext_data *mext, u64 *m_len)
302 {
303 	struct inode *orig_inode = mext->orig_inode;
304 	struct inode *donor_inode = mext->donor_inode;
305 	struct ext4_map_blocks *orig_map = &mext->orig_map;
306 	unsigned int blkbits = orig_inode->i_blkbits;
307 	struct folio *folio[2] = {NULL, NULL};
308 	loff_t from, length;
309 	enum mext_move_type move_type = 0;
310 	handle_t *handle;
311 	u64 r_len = 0;
312 	unsigned int credits;
313 	int ret, ret2;
314 
315 	*m_len = 0;
316 	trace_ext4_move_extent_enter(orig_inode, orig_map, donor_inode,
317 				     mext->donor_lblk);
318 	credits = ext4_chunk_trans_extent(orig_inode, 0) * 2;
319 	handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, credits);
320 	if (IS_ERR(handle)) {
321 		ret = PTR_ERR(handle);
322 		goto out;
323 	}
324 
325 	ret = mext_move_begin(mext, folio, &move_type);
326 	if (ret)
327 		goto stop_handle;
328 
329 	if (move_type == MEXT_SKIP_EXTENT)
330 		goto unlock;
331 
332 	/*
333 	 * Copy the data. First, read the original inode data into the page
334 	 * cache. Then, release the existing mapping relationships and swap
335 	 * the extent. Finally, re-establish the new mapping relationships
336 	 * and dirty the page cache.
337 	 */
338 	if (move_type == MEXT_COPY_DATA) {
339 		from = offset_in_folio(folio[0],
340 				((loff_t)orig_map->m_lblk) << blkbits);
341 		length = ((loff_t)orig_map->m_len) << blkbits;
342 
343 		ret = mext_folio_mkuptodate(folio[0], from, from + length);
344 		if (ret)
345 			goto unlock;
346 	}
347 
348 	if (!filemap_release_folio(folio[0], 0) ||
349 	    !filemap_release_folio(folio[1], 0)) {
350 		ret = -EBUSY;
351 		goto unlock;
352 	}
353 
354 	/* Move extent */
355 	ext4_double_down_write_data_sem(orig_inode, donor_inode);
356 	*m_len = ext4_swap_extents(handle, orig_inode, donor_inode,
357 				   orig_map->m_lblk, mext->donor_lblk,
358 				   orig_map->m_len, 1, &ret);
359 	ext4_double_up_write_data_sem(orig_inode, donor_inode);
360 
361 	/* A short-length swap cannot occur after a successful swap extent. */
362 	if (WARN_ON_ONCE(!ret && (*m_len != orig_map->m_len)))
363 		ret = -EIO;
364 
365 	if (!(*m_len) || (move_type == MEXT_MOVE_EXTENT))
366 		goto unlock;
367 
368 	/* Copy data */
369 	length = (*m_len) << blkbits;
370 	ret2 = mext_folio_mkwrite(orig_inode, folio[0], from, from + length);
371 	if (ret2) {
372 		if (!ret)
373 			ret = ret2;
374 		goto repair_branches;
375 	}
376 	/*
377 	 * Even in case of data=writeback it is reasonable to pin
378 	 * inode to transaction, to prevent unexpected data loss.
379 	 */
380 	ret2 = ext4_jbd2_inode_add_write(handle, orig_inode,
381 			((loff_t)orig_map->m_lblk) << blkbits, length);
382 	if (!ret)
383 		ret = ret2;
384 unlock:
385 	mext_folio_double_unlock(folio);
386 stop_handle:
387 	ext4_journal_stop(handle);
388 out:
389 	trace_ext4_move_extent_exit(orig_inode, orig_map->m_lblk, donor_inode,
390 				    mext->donor_lblk, orig_map->m_len, *m_len,
391 				    move_type, ret);
392 	return ret;
393 
394 repair_branches:
395 	ret2 = 0;
396 	ext4_double_down_write_data_sem(orig_inode, donor_inode);
397 	r_len = ext4_swap_extents(handle, donor_inode, orig_inode,
398 				  mext->donor_lblk, orig_map->m_lblk,
399 				  *m_len, 0, &ret2);
400 	ext4_double_up_write_data_sem(orig_inode, donor_inode);
401 	if (ret2 || r_len != *m_len) {
402 		ext4_error_inode_block(orig_inode, (sector_t)(orig_map->m_lblk),
403 				       EIO, "Unable to copy data block, data will be lost!");
404 		ret = -EIO;
405 	}
406 	*m_len = 0;
407 	goto unlock;
408 }
409 
410 /*
411  * Check the validity of the basic filesystem environment and the
412  * inodes' support status.
413  */
mext_check_validity(struct inode * orig_inode,struct inode * donor_inode)414 static int mext_check_validity(struct inode *orig_inode,
415 			       struct inode *donor_inode)
416 {
417 	struct super_block *sb = orig_inode->i_sb;
418 
419 	/* origin and donor should be different inodes */
420 	if (orig_inode == donor_inode) {
421 		ext4_debug("ext4 move extent: The argument files should not be same inode [ino:orig %lu, donor %lu]\n",
422 			   orig_inode->i_ino, donor_inode->i_ino);
423 		return -EINVAL;
424 	}
425 
426 	/* origin and donor should belone to the same filesystem */
427 	if (orig_inode->i_sb != donor_inode->i_sb) {
428 		ext4_debug("ext4 move extent: The argument files should be in same FS [ino:orig %lu, donor %lu]\n",
429 			   orig_inode->i_ino, donor_inode->i_ino);
430 		return -EINVAL;
431 	}
432 
433 	/* Regular file check */
434 	if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
435 		ext4_debug("ext4 move extent: The argument files should be regular file [ino:orig %lu, donor %lu]\n",
436 			   orig_inode->i_ino, donor_inode->i_ino);
437 		return -EINVAL;
438 	}
439 
440 	if (ext4_has_feature_bigalloc(sb)) {
441 		ext4_msg(sb, KERN_ERR,
442 			 "Online defrag not supported with bigalloc");
443 		return -EOPNOTSUPP;
444 	}
445 
446 	if (IS_DAX(orig_inode)) {
447 		ext4_msg(sb, KERN_ERR,
448 			 "Online defrag not supported with DAX");
449 		return -EOPNOTSUPP;
450 	}
451 
452 	/*
453 	 * TODO: it's not obvious how to swap blocks for inodes with full
454 	 * journaling enabled.
455 	 */
456 	if (ext4_should_journal_data(orig_inode) ||
457 	    ext4_should_journal_data(donor_inode)) {
458 		ext4_msg(sb, KERN_ERR,
459 			 "Online defrag not supported with data journaling");
460 		return -EOPNOTSUPP;
461 	}
462 
463 	if (IS_ENCRYPTED(orig_inode) || IS_ENCRYPTED(donor_inode)) {
464 		ext4_msg(sb, KERN_ERR,
465 			 "Online defrag not supported for encrypted files");
466 		return -EOPNOTSUPP;
467 	}
468 
469 	/* Ext4 move extent supports only extent based file */
470 	if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS)) ||
471 	    !(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
472 		ext4_msg(sb, KERN_ERR,
473 			 "Online defrag not supported for non-extent files");
474 		return -EOPNOTSUPP;
475 	}
476 
477 	if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
478 		ext4_debug("ext4 move extent: suid or sgid is set to donor file [ino:orig %lu, donor %lu]\n",
479 			   orig_inode->i_ino, donor_inode->i_ino);
480 		return -EINVAL;
481 	}
482 
483 	if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode)) {
484 		ext4_debug("ext4 move extent: donor should not be immutable or append file [ino:orig %lu, donor %lu]\n",
485 			   orig_inode->i_ino, donor_inode->i_ino);
486 		return -EPERM;
487 	}
488 
489 	/* Ext4 move extent does not support swap files */
490 	if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
491 		ext4_debug("ext4 move extent: The argument files should not be swap files [ino:orig %lu, donor %lu]\n",
492 			   orig_inode->i_ino, donor_inode->i_ino);
493 		return -ETXTBSY;
494 	}
495 
496 	if (ext4_is_quota_file(orig_inode) || ext4_is_quota_file(donor_inode)) {
497 		ext4_debug("ext4 move extent: The argument files should not be quota files [ino:orig %lu, donor %lu]\n",
498 			   orig_inode->i_ino, donor_inode->i_ino);
499 		return -EOPNOTSUPP;
500 	}
501 
502 	if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
503 		ext4_debug("ext4 move extent: File size is 0 byte\n");
504 		return -EINVAL;
505 	}
506 
507 	return 0;
508 }
509 
510 /*
511  * Check the moving range of ext4_move_extents() whether the files can be
512  * exchanged with each other, and adjust the length to fit within the file
513  * size. Return 0 on success, or a negative error value on failure.
514  */
mext_check_adjust_range(struct inode * orig_inode,struct inode * donor_inode,__u64 orig_start,__u64 donor_start,__u64 * len)515 static int mext_check_adjust_range(struct inode *orig_inode,
516 				   struct inode *donor_inode, __u64 orig_start,
517 				   __u64 donor_start, __u64 *len)
518 {
519 	__u64 orig_eof, donor_eof;
520 
521 	/* Start offset should be same */
522 	if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
523 	    (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
524 		ext4_debug("ext4 move extent: orig and donor's start offsets are not aligned [ino:orig %lu, donor %lu]\n",
525 			   orig_inode->i_ino, donor_inode->i_ino);
526 		return -EINVAL;
527 	}
528 
529 	if ((orig_start >= EXT_MAX_BLOCKS) ||
530 	    (donor_start >= EXT_MAX_BLOCKS) ||
531 	    (*len > EXT_MAX_BLOCKS) ||
532 	    (donor_start + *len >= EXT_MAX_BLOCKS) ||
533 	    (orig_start + *len >= EXT_MAX_BLOCKS))  {
534 		ext4_debug("ext4 move extent: Can't handle over [%u] blocks [ino:orig %lu, donor %lu]\n",
535 			   EXT_MAX_BLOCKS,
536 			   orig_inode->i_ino, donor_inode->i_ino);
537 		return -EINVAL;
538 	}
539 
540 	orig_eof = EXT4_B_TO_LBLK(orig_inode, i_size_read(orig_inode));
541 	donor_eof = EXT4_B_TO_LBLK(donor_inode, i_size_read(donor_inode));
542 	if (orig_eof <= orig_start)
543 		*len = 0;
544 	else if (orig_eof < orig_start + *len - 1)
545 		*len = orig_eof - orig_start;
546 	if (donor_eof <= donor_start)
547 		*len = 0;
548 	else if (donor_eof < donor_start + *len - 1)
549 		*len = donor_eof - donor_start;
550 	if (!*len) {
551 		ext4_debug("ext4 move extent: len should not be 0 [ino:orig %lu, donor %lu]\n",
552 			   orig_inode->i_ino, donor_inode->i_ino);
553 		return -EINVAL;
554 	}
555 
556 	return 0;
557 }
558 
559 /**
560  * ext4_move_extents - Exchange the specified range of a file
561  *
562  * @o_filp:		file structure of the original file
563  * @d_filp:		file structure of the donor file
564  * @orig_blk:		start offset in block for orig
565  * @donor_blk:		start offset in block for donor
566  * @len:		the number of blocks to be moved
567  * @moved_len:		moved block length
568  *
569  * This function returns 0 and moved block length is set in moved_len
570  * if succeed, otherwise returns error value.
571  */
ext4_move_extents(struct file * o_filp,struct file * d_filp,__u64 orig_blk,__u64 donor_blk,__u64 len,__u64 * moved_len)572 int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
573 		      __u64 donor_blk, __u64 len, __u64 *moved_len)
574 {
575 	struct inode *orig_inode = file_inode(o_filp);
576 	struct inode *donor_inode = file_inode(d_filp);
577 	struct mext_data mext;
578 	struct super_block *sb = orig_inode->i_sb;
579 	struct ext4_sb_info *sbi = EXT4_SB(sb);
580 	int retries = 0;
581 	u64 m_len;
582 	int ret;
583 
584 	*moved_len = 0;
585 
586 	/* Protect orig and donor inodes against a truncate */
587 	lock_two_nondirectories(orig_inode, donor_inode);
588 
589 	ret = mext_check_validity(orig_inode, donor_inode);
590 	if (ret)
591 		goto out;
592 
593 	/* Wait for all existing dio workers */
594 	inode_dio_wait(orig_inode);
595 	inode_dio_wait(donor_inode);
596 
597 	/* Check and adjust the specified move_extent range. */
598 	ret = mext_check_adjust_range(orig_inode, donor_inode, orig_blk,
599 				      donor_blk, &len);
600 	if (ret)
601 		goto out;
602 
603 	mext.orig_inode = orig_inode;
604 	mext.donor_inode = donor_inode;
605 	while (len) {
606 		mext.orig_map.m_lblk = orig_blk;
607 		mext.orig_map.m_len = len;
608 		mext.orig_map.m_flags = 0;
609 		mext.donor_lblk = donor_blk;
610 
611 		ret = ext4_map_blocks(NULL, orig_inode, &mext.orig_map, 0);
612 		if (ret < 0)
613 			goto out;
614 
615 		/* Skip moving if it is a hole or a delalloc extent. */
616 		if (mext.orig_map.m_flags &
617 		    (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN)) {
618 			ret = mext_move_extent(&mext, &m_len);
619 			*moved_len += m_len;
620 			if (!ret)
621 				goto next;
622 
623 			/* Move failed or partially failed. */
624 			if (m_len) {
625 				orig_blk += m_len;
626 				donor_blk += m_len;
627 				len -= m_len;
628 			}
629 			if (ret == -ESTALE)
630 				continue;
631 			if (ret == -ENOSPC &&
632 			    ext4_should_retry_alloc(sb, &retries))
633 				continue;
634 			if (ret == -EBUSY &&
635 			    sbi->s_journal && retries++ < 4 &&
636 			    jbd2_journal_force_commit_nested(sbi->s_journal))
637 				continue;
638 
639 			goto out;
640 		}
641 next:
642 		orig_blk += mext.orig_map.m_len;
643 		donor_blk += mext.orig_map.m_len;
644 		len -= mext.orig_map.m_len;
645 		retries = 0;
646 	}
647 
648 out:
649 	if (*moved_len) {
650 		ext4_discard_preallocations(orig_inode);
651 		ext4_discard_preallocations(donor_inode);
652 	}
653 
654 	unlock_two_nondirectories(orig_inode, donor_inode);
655 	return ret;
656 }
657