xref: /linux/fs/ext4/migrate.c (revision ed3174d93c342b8b2eeba6bbd124707d55304a7b)
1 /*
2  * Copyright IBM Corporation, 2007
3  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of version 2.1 of the GNU Lesser General Public License
7  * as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12  *
13  */
14 
15 #include <linux/module.h>
16 #include <linux/ext4_jbd2.h>
17 #include <linux/ext4_fs_extents.h>
18 
19 /*
20  * The contiguous blocks details which can be
21  * represented by a single extent
22  */
23 struct list_blocks_struct {
24 	ext4_lblk_t first_block, last_block;
25 	ext4_fsblk_t first_pblock, last_pblock;
26 };
27 
28 static int finish_range(handle_t *handle, struct inode *inode,
29 				struct list_blocks_struct *lb)
30 
31 {
32 	int retval = 0, needed;
33 	struct ext4_extent newext;
34 	struct ext4_ext_path *path;
35 	if (lb->first_pblock == 0)
36 		return 0;
37 
38 	/* Add the extent to temp inode*/
39 	newext.ee_block = cpu_to_le32(lb->first_block);
40 	newext.ee_len   = cpu_to_le16(lb->last_block - lb->first_block + 1);
41 	ext4_ext_store_pblock(&newext, lb->first_pblock);
42 	path = ext4_ext_find_extent(inode, lb->first_block, NULL);
43 
44 	if (IS_ERR(path)) {
45 		retval = PTR_ERR(path);
46 		goto err_out;
47 	}
48 
49 	/*
50 	 * Calculate the credit needed to inserting this extent
51 	 * Since we are doing this in loop we may accumalate extra
52 	 * credit. But below we try to not accumalate too much
53 	 * of them by restarting the journal.
54 	 */
55 	needed = ext4_ext_calc_credits_for_insert(inode, path);
56 
57 	/*
58 	 * Make sure the credit we accumalated is not really high
59 	 */
60 	if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
61 		retval = ext4_journal_restart(handle, needed);
62 		if (retval)
63 			goto err_out;
64 	} else if (needed) {
65 		retval = ext4_journal_extend(handle, needed);
66 		if (retval) {
67 			/*
68 			 * IF not able to extend the journal restart the journal
69 			 */
70 			retval = ext4_journal_restart(handle, needed);
71 			if (retval)
72 				goto err_out;
73 		}
74 	}
75 	retval = ext4_ext_insert_extent(handle, inode, path, &newext);
76 err_out:
77 	lb->first_pblock = 0;
78 	return retval;
79 }
80 
81 static int update_extent_range(handle_t *handle, struct inode *inode,
82 				ext4_fsblk_t pblock, ext4_lblk_t blk_num,
83 				struct list_blocks_struct *lb)
84 {
85 	int retval;
86 	/*
87 	 * See if we can add on to the existing range (if it exists)
88 	 */
89 	if (lb->first_pblock &&
90 		(lb->last_pblock+1 == pblock) &&
91 		(lb->last_block+1 == blk_num)) {
92 		lb->last_pblock = pblock;
93 		lb->last_block = blk_num;
94 		return 0;
95 	}
96 	/*
97 	 * Start a new range.
98 	 */
99 	retval = finish_range(handle, inode, lb);
100 	lb->first_pblock = lb->last_pblock = pblock;
101 	lb->first_block = lb->last_block = blk_num;
102 
103 	return retval;
104 }
105 
106 static int update_ind_extent_range(handle_t *handle, struct inode *inode,
107 				   ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
108 				   struct list_blocks_struct *lb)
109 {
110 	struct buffer_head *bh;
111 	__le32 *i_data;
112 	int i, retval = 0;
113 	ext4_lblk_t blk_count = *blk_nump;
114 	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
115 
116 	if (!pblock) {
117 		/* Only update the file block number */
118 		*blk_nump += max_entries;
119 		return 0;
120 	}
121 
122 	bh = sb_bread(inode->i_sb, pblock);
123 	if (!bh)
124 		return -EIO;
125 
126 	i_data = (__le32 *)bh->b_data;
127 	for (i = 0; i < max_entries; i++, blk_count++) {
128 		if (i_data[i]) {
129 			retval = update_extent_range(handle, inode,
130 						le32_to_cpu(i_data[i]),
131 						blk_count, lb);
132 			if (retval)
133 				break;
134 		}
135 	}
136 
137 	/* Update the file block number */
138 	*blk_nump = blk_count;
139 	put_bh(bh);
140 	return retval;
141 
142 }
143 
144 static int update_dind_extent_range(handle_t *handle, struct inode *inode,
145 				    ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
146 				    struct list_blocks_struct *lb)
147 {
148 	struct buffer_head *bh;
149 	__le32 *i_data;
150 	int i, retval = 0;
151 	ext4_lblk_t blk_count = *blk_nump;
152 	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
153 
154 	if (!pblock) {
155 		/* Only update the file block number */
156 		*blk_nump += max_entries * max_entries;
157 		return 0;
158 	}
159 	bh = sb_bread(inode->i_sb, pblock);
160 	if (!bh)
161 		return -EIO;
162 
163 	i_data = (__le32 *)bh->b_data;
164 	for (i = 0; i < max_entries; i++) {
165 		if (i_data[i]) {
166 			retval = update_ind_extent_range(handle, inode,
167 						le32_to_cpu(i_data[i]),
168 						&blk_count, lb);
169 			if (retval)
170 				break;
171 		} else {
172 			/* Only update the file block number */
173 			blk_count += max_entries;
174 		}
175 	}
176 
177 	/* Update the file block number */
178 	*blk_nump = blk_count;
179 	put_bh(bh);
180 	return retval;
181 
182 }
183 
184 static int update_tind_extent_range(handle_t *handle, struct inode *inode,
185 				     ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
186 				     struct list_blocks_struct *lb)
187 {
188 	struct buffer_head *bh;
189 	__le32 *i_data;
190 	int i, retval = 0;
191 	ext4_lblk_t blk_count = *blk_nump;
192 	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
193 
194 	if (!pblock) {
195 		/* Only update the file block number */
196 		*blk_nump += max_entries * max_entries * max_entries;
197 		return 0;
198 	}
199 	bh = sb_bread(inode->i_sb, pblock);
200 	if (!bh)
201 		return -EIO;
202 
203 	i_data = (__le32 *)bh->b_data;
204 	for (i = 0; i < max_entries; i++) {
205 		if (i_data[i]) {
206 			retval = update_dind_extent_range(handle, inode,
207 						le32_to_cpu(i_data[i]),
208 						&blk_count, lb);
209 			if (retval)
210 				break;
211 		} else
212 			/* Only update the file block number */
213 			blk_count += max_entries * max_entries;
214 	}
215 	/* Update the file block number */
216 	*blk_nump = blk_count;
217 	put_bh(bh);
218 	return retval;
219 
220 }
221 
222 static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
223 {
224 	int retval = 0, needed;
225 
226 	if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
227 		return 0;
228 	/*
229 	 * We are freeing a blocks. During this we touch
230 	 * superblock, group descriptor and block bitmap.
231 	 * So allocate a credit of 3. We may update
232 	 * quota (user and group).
233 	 */
234 	needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
235 
236 	if (ext4_journal_extend(handle, needed) != 0)
237 		retval = ext4_journal_restart(handle, needed);
238 
239 	return retval;
240 }
241 
242 static int free_dind_blocks(handle_t *handle,
243 				struct inode *inode, __le32 i_data)
244 {
245 	int i;
246 	__le32 *tmp_idata;
247 	struct buffer_head *bh;
248 	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
249 
250 	bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
251 	if (!bh)
252 		return -EIO;
253 
254 	tmp_idata = (__le32 *)bh->b_data;
255 	for (i = 0; i < max_entries; i++) {
256 		if (tmp_idata[i]) {
257 			extend_credit_for_blkdel(handle, inode);
258 			ext4_free_blocks(handle, inode,
259 					le32_to_cpu(tmp_idata[i]), 1, 1);
260 		}
261 	}
262 	put_bh(bh);
263 	extend_credit_for_blkdel(handle, inode);
264 	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
265 	return 0;
266 }
267 
268 static int free_tind_blocks(handle_t *handle,
269 				struct inode *inode, __le32 i_data)
270 {
271 	int i, retval = 0;
272 	__le32 *tmp_idata;
273 	struct buffer_head *bh;
274 	unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
275 
276 	bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
277 	if (!bh)
278 		return -EIO;
279 
280 	tmp_idata = (__le32 *)bh->b_data;
281 	for (i = 0; i < max_entries; i++) {
282 		if (tmp_idata[i]) {
283 			retval = free_dind_blocks(handle,
284 					inode, tmp_idata[i]);
285 			if (retval) {
286 				put_bh(bh);
287 				return retval;
288 			}
289 		}
290 	}
291 	put_bh(bh);
292 	extend_credit_for_blkdel(handle, inode);
293 	ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
294 	return 0;
295 }
296 
297 static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
298 {
299 	int retval;
300 
301 	/* ei->i_data[EXT4_IND_BLOCK] */
302 	if (i_data[0]) {
303 		extend_credit_for_blkdel(handle, inode);
304 		ext4_free_blocks(handle, inode,
305 				le32_to_cpu(i_data[0]), 1, 1);
306 	}
307 
308 	/* ei->i_data[EXT4_DIND_BLOCK] */
309 	if (i_data[1]) {
310 		retval = free_dind_blocks(handle, inode, i_data[1]);
311 		if (retval)
312 			return retval;
313 	}
314 
315 	/* ei->i_data[EXT4_TIND_BLOCK] */
316 	if (i_data[2]) {
317 		retval = free_tind_blocks(handle, inode, i_data[2]);
318 		if (retval)
319 			return retval;
320 	}
321 	return 0;
322 }
323 
324 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
325 				struct inode *tmp_inode)
326 {
327 	int retval;
328 	__le32	i_data[3];
329 	struct ext4_inode_info *ei = EXT4_I(inode);
330 	struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
331 
332 	/*
333 	 * One credit accounted for writing the
334 	 * i_data field of the original inode
335 	 */
336 	retval = ext4_journal_extend(handle, 1);
337 	if (retval != 0) {
338 		retval = ext4_journal_restart(handle, 1);
339 		if (retval)
340 			goto err_out;
341 	}
342 
343 	i_data[0] = ei->i_data[EXT4_IND_BLOCK];
344 	i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
345 	i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
346 
347 	down_write(&EXT4_I(inode)->i_data_sem);
348 	/*
349 	 * We have the extent map build with the tmp inode.
350 	 * Now copy the i_data across
351 	 */
352 	ei->i_flags |= EXT4_EXTENTS_FL;
353 	memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
354 
355 	/*
356 	 * Update i_blocks with the new blocks that got
357 	 * allocated while adding extents for extent index
358 	 * blocks.
359 	 *
360 	 * While converting to extents we need not
361 	 * update the orignal inode i_blocks for extent blocks
362 	 * via quota APIs. The quota update happened via tmp_inode already.
363 	 */
364 	spin_lock(&inode->i_lock);
365 	inode->i_blocks += tmp_inode->i_blocks;
366 	spin_unlock(&inode->i_lock);
367 	up_write(&EXT4_I(inode)->i_data_sem);
368 
369 	/*
370 	 * We mark the inode dirty after, because we decrement the
371 	 * i_blocks when freeing the indirect meta-data blocks
372 	 */
373 	retval = free_ind_block(handle, inode, i_data);
374 	ext4_mark_inode_dirty(handle, inode);
375 
376 err_out:
377 	return retval;
378 }
379 
380 static int free_ext_idx(handle_t *handle, struct inode *inode,
381 					struct ext4_extent_idx *ix)
382 {
383 	int i, retval = 0;
384 	ext4_fsblk_t block;
385 	struct buffer_head *bh;
386 	struct ext4_extent_header *eh;
387 
388 	block = idx_pblock(ix);
389 	bh = sb_bread(inode->i_sb, block);
390 	if (!bh)
391 		return -EIO;
392 
393 	eh = (struct ext4_extent_header *)bh->b_data;
394 	if (eh->eh_depth != 0) {
395 		ix = EXT_FIRST_INDEX(eh);
396 		for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
397 			retval = free_ext_idx(handle, inode, ix);
398 			if (retval)
399 				break;
400 		}
401 	}
402 	put_bh(bh);
403 	extend_credit_for_blkdel(handle, inode);
404 	ext4_free_blocks(handle, inode, block, 1, 1);
405 	return retval;
406 }
407 
408 /*
409  * Free the extent meta data blocks only
410  */
411 static int free_ext_block(handle_t *handle, struct inode *inode)
412 {
413 	int i, retval = 0;
414 	struct ext4_inode_info *ei = EXT4_I(inode);
415 	struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
416 	struct ext4_extent_idx *ix;
417 	if (eh->eh_depth == 0)
418 		/*
419 		 * No extra blocks allocated for extent meta data
420 		 */
421 		return 0;
422 	ix = EXT_FIRST_INDEX(eh);
423 	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
424 		retval = free_ext_idx(handle, inode, ix);
425 		if (retval)
426 			return retval;
427 	}
428 	return retval;
429 
430 }
431 
432 int ext4_ext_migrate(struct inode *inode, struct file *filp,
433 				unsigned int cmd, unsigned long arg)
434 {
435 	handle_t *handle;
436 	int retval = 0, i;
437 	__le32 *i_data;
438 	ext4_lblk_t blk_count = 0;
439 	struct ext4_inode_info *ei;
440 	struct inode *tmp_inode = NULL;
441 	struct list_blocks_struct lb;
442 	unsigned long max_entries;
443 
444 	if (!test_opt(inode->i_sb, EXTENTS))
445 		/*
446 		 * if mounted with noextents we don't allow the migrate
447 		 */
448 		return -EINVAL;
449 
450 	if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
451 		return -EINVAL;
452 
453 	if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
454 		/*
455 		 * don't migrate fast symlink
456 		 */
457 		return retval;
458 
459 	handle = ext4_journal_start(inode,
460 					EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
461 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
462 					2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
463 					+ 1);
464 	if (IS_ERR(handle)) {
465 		retval = PTR_ERR(handle);
466 		goto err_out;
467 	}
468 	tmp_inode = ext4_new_inode(handle,
469 				inode->i_sb->s_root->d_inode,
470 				S_IFREG);
471 	if (IS_ERR(tmp_inode)) {
472 		retval = -ENOMEM;
473 		ext4_journal_stop(handle);
474 		tmp_inode = NULL;
475 		goto err_out;
476 	}
477 	i_size_write(tmp_inode, i_size_read(inode));
478 	/*
479 	 * We don't want the inode to be reclaimed
480 	 * if we got interrupted in between. We have
481 	 * this tmp inode carrying reference to the
482 	 * data blocks of the original file. We set
483 	 * the i_nlink to zero at the last stage after
484 	 * switching the original file to extent format
485 	 */
486 	tmp_inode->i_nlink = 1;
487 
488 	ext4_ext_tree_init(handle, tmp_inode);
489 	ext4_orphan_add(handle, tmp_inode);
490 	ext4_journal_stop(handle);
491 
492 	/*
493 	 * start with one credit accounted for
494 	 * superblock modification.
495 	 *
496 	 * For the tmp_inode we already have commited the
497 	 * trascation that created the inode. Later as and
498 	 * when we add extents we extent the journal
499 	 */
500 	/*
501 	 * inode_mutex prevent write and truncate on the file. Read still goes
502 	 * through. We take i_data_sem in ext4_ext_swap_inode_data before we
503 	 * switch the inode format to prevent read.
504 	 */
505 	mutex_lock(&(inode->i_mutex));
506 	handle = ext4_journal_start(inode, 1);
507 
508 	ei = EXT4_I(inode);
509 	i_data = ei->i_data;
510 	memset(&lb, 0, sizeof(lb));
511 
512 	/* 32 bit block address 4 bytes */
513 	max_entries = inode->i_sb->s_blocksize >> 2;
514 	for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
515 		if (i_data[i]) {
516 			retval = update_extent_range(handle, tmp_inode,
517 						le32_to_cpu(i_data[i]),
518 						blk_count, &lb);
519 			if (retval)
520 				goto err_out;
521 		}
522 	}
523 	if (i_data[EXT4_IND_BLOCK]) {
524 		retval = update_ind_extent_range(handle, tmp_inode,
525 					le32_to_cpu(i_data[EXT4_IND_BLOCK]),
526 					&blk_count, &lb);
527 			if (retval)
528 				goto err_out;
529 	} else
530 		blk_count +=  max_entries;
531 	if (i_data[EXT4_DIND_BLOCK]) {
532 		retval = update_dind_extent_range(handle, tmp_inode,
533 					le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
534 					&blk_count, &lb);
535 			if (retval)
536 				goto err_out;
537 	} else
538 		blk_count += max_entries * max_entries;
539 	if (i_data[EXT4_TIND_BLOCK]) {
540 		retval = update_tind_extent_range(handle, tmp_inode,
541 					le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
542 					&blk_count, &lb);
543 			if (retval)
544 				goto err_out;
545 	}
546 	/*
547 	 * Build the last extent
548 	 */
549 	retval = finish_range(handle, tmp_inode, &lb);
550 err_out:
551 	if (retval)
552 		/*
553 		 * Failure case delete the extent information with the
554 		 * tmp_inode
555 		 */
556 		free_ext_block(handle, tmp_inode);
557 	else
558 		retval = ext4_ext_swap_inode_data(handle, inode,
559 							tmp_inode);
560 
561 	/* We mark the tmp_inode dirty via ext4_ext_tree_init. */
562 	if (ext4_journal_extend(handle, 1) != 0)
563 		ext4_journal_restart(handle, 1);
564 
565 	/*
566 	 * Mark the tmp_inode as of size zero
567 	 */
568 	i_size_write(tmp_inode, 0);
569 
570 	/*
571 	 * set the  i_blocks count to zero
572 	 * so that the ext4_delete_inode does the
573 	 * right job
574 	 *
575 	 * We don't need to take the i_lock because
576 	 * the inode is not visible to user space.
577 	 */
578 	tmp_inode->i_blocks = 0;
579 
580 	/* Reset the extent details */
581 	ext4_ext_tree_init(handle, tmp_inode);
582 
583 	/*
584 	 * Set the i_nlink to zero so that
585 	 * generic_drop_inode really deletes the
586 	 * inode
587 	 */
588 	tmp_inode->i_nlink = 0;
589 
590 	ext4_journal_stop(handle);
591 	mutex_unlock(&(inode->i_mutex));
592 
593 	if (tmp_inode)
594 		iput(tmp_inode);
595 
596 	return retval;
597 }
598