1 /* 2 * Copyright IBM Corporation, 2007 3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of version 2.1 of the GNU Lesser General Public License 7 * as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 * 13 */ 14 15 #include <linux/slab.h> 16 #include "ext4_jbd2.h" 17 #include "ext4_extents.h" 18 19 /* 20 * The contiguous blocks details which can be 21 * represented by a single extent 22 */ 23 struct migrate_struct { 24 ext4_lblk_t first_block, last_block, curr_block; 25 ext4_fsblk_t first_pblock, last_pblock; 26 }; 27 28 static int finish_range(handle_t *handle, struct inode *inode, 29 struct migrate_struct *lb) 30 31 { 32 int retval = 0, needed; 33 struct ext4_extent newext; 34 struct ext4_ext_path *path; 35 if (lb->first_pblock == 0) 36 return 0; 37 38 /* Add the extent to temp inode*/ 39 newext.ee_block = cpu_to_le32(lb->first_block); 40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); 41 ext4_ext_store_pblock(&newext, lb->first_pblock); 42 /* Locking only for convinience since we are operating on temp inode */ 43 down_write(&EXT4_I(inode)->i_data_sem); 44 path = ext4_find_extent(inode, lb->first_block, NULL, 0); 45 if (IS_ERR(path)) { 46 retval = PTR_ERR(path); 47 path = NULL; 48 goto err_out; 49 } 50 51 /* 52 * Calculate the credit needed to inserting this extent 53 * Since we are doing this in loop we may accumalate extra 54 * credit. But below we try to not accumalate too much 55 * of them by restarting the journal. 56 */ 57 needed = ext4_ext_calc_credits_for_single_extent(inode, 58 lb->last_block - lb->first_block + 1, path); 59 60 /* 61 * Make sure the credit we accumalated is not really high 62 */ 63 if (needed && ext4_handle_has_enough_credits(handle, 64 EXT4_RESERVE_TRANS_BLOCKS)) { 65 up_write((&EXT4_I(inode)->i_data_sem)); 66 retval = ext4_journal_restart(handle, needed); 67 down_write((&EXT4_I(inode)->i_data_sem)); 68 if (retval) 69 goto err_out; 70 } else if (needed) { 71 retval = ext4_journal_extend(handle, needed); 72 if (retval) { 73 /* 74 * IF not able to extend the journal restart the journal 75 */ 76 up_write((&EXT4_I(inode)->i_data_sem)); 77 retval = ext4_journal_restart(handle, needed); 78 down_write((&EXT4_I(inode)->i_data_sem)); 79 if (retval) 80 goto err_out; 81 } 82 } 83 retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0); 84 err_out: 85 up_write((&EXT4_I(inode)->i_data_sem)); 86 ext4_ext_drop_refs(path); 87 kfree(path); 88 lb->first_pblock = 0; 89 return retval; 90 } 91 92 static int update_extent_range(handle_t *handle, struct inode *inode, 93 ext4_fsblk_t pblock, struct migrate_struct *lb) 94 { 95 int retval; 96 /* 97 * See if we can add on to the existing range (if it exists) 98 */ 99 if (lb->first_pblock && 100 (lb->last_pblock+1 == pblock) && 101 (lb->last_block+1 == lb->curr_block)) { 102 lb->last_pblock = pblock; 103 lb->last_block = lb->curr_block; 104 lb->curr_block++; 105 return 0; 106 } 107 /* 108 * Start a new range. 109 */ 110 retval = finish_range(handle, inode, lb); 111 lb->first_pblock = lb->last_pblock = pblock; 112 lb->first_block = lb->last_block = lb->curr_block; 113 lb->curr_block++; 114 return retval; 115 } 116 117 static int update_ind_extent_range(handle_t *handle, struct inode *inode, 118 ext4_fsblk_t pblock, 119 struct migrate_struct *lb) 120 { 121 struct buffer_head *bh; 122 __le32 *i_data; 123 int i, retval = 0; 124 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 125 126 bh = sb_bread(inode->i_sb, pblock); 127 if (!bh) 128 return -EIO; 129 130 i_data = (__le32 *)bh->b_data; 131 for (i = 0; i < max_entries; i++) { 132 if (i_data[i]) { 133 retval = update_extent_range(handle, inode, 134 le32_to_cpu(i_data[i]), lb); 135 if (retval) 136 break; 137 } else { 138 lb->curr_block++; 139 } 140 } 141 put_bh(bh); 142 return retval; 143 144 } 145 146 static int update_dind_extent_range(handle_t *handle, struct inode *inode, 147 ext4_fsblk_t pblock, 148 struct migrate_struct *lb) 149 { 150 struct buffer_head *bh; 151 __le32 *i_data; 152 int i, retval = 0; 153 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 154 155 bh = sb_bread(inode->i_sb, pblock); 156 if (!bh) 157 return -EIO; 158 159 i_data = (__le32 *)bh->b_data; 160 for (i = 0; i < max_entries; i++) { 161 if (i_data[i]) { 162 retval = update_ind_extent_range(handle, inode, 163 le32_to_cpu(i_data[i]), lb); 164 if (retval) 165 break; 166 } else { 167 /* Only update the file block number */ 168 lb->curr_block += max_entries; 169 } 170 } 171 put_bh(bh); 172 return retval; 173 174 } 175 176 static int update_tind_extent_range(handle_t *handle, struct inode *inode, 177 ext4_fsblk_t pblock, 178 struct migrate_struct *lb) 179 { 180 struct buffer_head *bh; 181 __le32 *i_data; 182 int i, retval = 0; 183 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 184 185 bh = sb_bread(inode->i_sb, pblock); 186 if (!bh) 187 return -EIO; 188 189 i_data = (__le32 *)bh->b_data; 190 for (i = 0; i < max_entries; i++) { 191 if (i_data[i]) { 192 retval = update_dind_extent_range(handle, inode, 193 le32_to_cpu(i_data[i]), lb); 194 if (retval) 195 break; 196 } else { 197 /* Only update the file block number */ 198 lb->curr_block += max_entries * max_entries; 199 } 200 } 201 put_bh(bh); 202 return retval; 203 204 } 205 206 static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) 207 { 208 int retval = 0, needed; 209 210 if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) 211 return 0; 212 /* 213 * We are freeing a blocks. During this we touch 214 * superblock, group descriptor and block bitmap. 215 * So allocate a credit of 3. We may update 216 * quota (user and group). 217 */ 218 needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); 219 220 if (ext4_journal_extend(handle, needed) != 0) 221 retval = ext4_journal_restart(handle, needed); 222 223 return retval; 224 } 225 226 static int free_dind_blocks(handle_t *handle, 227 struct inode *inode, __le32 i_data) 228 { 229 int i; 230 __le32 *tmp_idata; 231 struct buffer_head *bh; 232 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 233 234 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 235 if (!bh) 236 return -EIO; 237 238 tmp_idata = (__le32 *)bh->b_data; 239 for (i = 0; i < max_entries; i++) { 240 if (tmp_idata[i]) { 241 extend_credit_for_blkdel(handle, inode); 242 ext4_free_blocks(handle, inode, NULL, 243 le32_to_cpu(tmp_idata[i]), 1, 244 EXT4_FREE_BLOCKS_METADATA | 245 EXT4_FREE_BLOCKS_FORGET); 246 } 247 } 248 put_bh(bh); 249 extend_credit_for_blkdel(handle, inode); 250 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, 251 EXT4_FREE_BLOCKS_METADATA | 252 EXT4_FREE_BLOCKS_FORGET); 253 return 0; 254 } 255 256 static int free_tind_blocks(handle_t *handle, 257 struct inode *inode, __le32 i_data) 258 { 259 int i, retval = 0; 260 __le32 *tmp_idata; 261 struct buffer_head *bh; 262 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 263 264 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 265 if (!bh) 266 return -EIO; 267 268 tmp_idata = (__le32 *)bh->b_data; 269 for (i = 0; i < max_entries; i++) { 270 if (tmp_idata[i]) { 271 retval = free_dind_blocks(handle, 272 inode, tmp_idata[i]); 273 if (retval) { 274 put_bh(bh); 275 return retval; 276 } 277 } 278 } 279 put_bh(bh); 280 extend_credit_for_blkdel(handle, inode); 281 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, 282 EXT4_FREE_BLOCKS_METADATA | 283 EXT4_FREE_BLOCKS_FORGET); 284 return 0; 285 } 286 287 static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) 288 { 289 int retval; 290 291 /* ei->i_data[EXT4_IND_BLOCK] */ 292 if (i_data[0]) { 293 extend_credit_for_blkdel(handle, inode); 294 ext4_free_blocks(handle, inode, NULL, 295 le32_to_cpu(i_data[0]), 1, 296 EXT4_FREE_BLOCKS_METADATA | 297 EXT4_FREE_BLOCKS_FORGET); 298 } 299 300 /* ei->i_data[EXT4_DIND_BLOCK] */ 301 if (i_data[1]) { 302 retval = free_dind_blocks(handle, inode, i_data[1]); 303 if (retval) 304 return retval; 305 } 306 307 /* ei->i_data[EXT4_TIND_BLOCK] */ 308 if (i_data[2]) { 309 retval = free_tind_blocks(handle, inode, i_data[2]); 310 if (retval) 311 return retval; 312 } 313 return 0; 314 } 315 316 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 317 struct inode *tmp_inode) 318 { 319 int retval; 320 __le32 i_data[3]; 321 struct ext4_inode_info *ei = EXT4_I(inode); 322 struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); 323 324 /* 325 * One credit accounted for writing the 326 * i_data field of the original inode 327 */ 328 retval = ext4_journal_extend(handle, 1); 329 if (retval) { 330 retval = ext4_journal_restart(handle, 1); 331 if (retval) 332 goto err_out; 333 } 334 335 i_data[0] = ei->i_data[EXT4_IND_BLOCK]; 336 i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; 337 i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; 338 339 down_write(&EXT4_I(inode)->i_data_sem); 340 /* 341 * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation 342 * happened after we started the migrate. We need to 343 * fail the migrate 344 */ 345 if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) { 346 retval = -EAGAIN; 347 up_write(&EXT4_I(inode)->i_data_sem); 348 goto err_out; 349 } else 350 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 351 /* 352 * We have the extent map build with the tmp inode. 353 * Now copy the i_data across 354 */ 355 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); 356 memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); 357 358 /* 359 * Update i_blocks with the new blocks that got 360 * allocated while adding extents for extent index 361 * blocks. 362 * 363 * While converting to extents we need not 364 * update the orignal inode i_blocks for extent blocks 365 * via quota APIs. The quota update happened via tmp_inode already. 366 */ 367 spin_lock(&inode->i_lock); 368 inode->i_blocks += tmp_inode->i_blocks; 369 spin_unlock(&inode->i_lock); 370 up_write(&EXT4_I(inode)->i_data_sem); 371 372 /* 373 * We mark the inode dirty after, because we decrement the 374 * i_blocks when freeing the indirect meta-data blocks 375 */ 376 retval = free_ind_block(handle, inode, i_data); 377 ext4_mark_inode_dirty(handle, inode); 378 379 err_out: 380 return retval; 381 } 382 383 static int free_ext_idx(handle_t *handle, struct inode *inode, 384 struct ext4_extent_idx *ix) 385 { 386 int i, retval = 0; 387 ext4_fsblk_t block; 388 struct buffer_head *bh; 389 struct ext4_extent_header *eh; 390 391 block = ext4_idx_pblock(ix); 392 bh = sb_bread(inode->i_sb, block); 393 if (!bh) 394 return -EIO; 395 396 eh = (struct ext4_extent_header *)bh->b_data; 397 if (eh->eh_depth != 0) { 398 ix = EXT_FIRST_INDEX(eh); 399 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 400 retval = free_ext_idx(handle, inode, ix); 401 if (retval) 402 break; 403 } 404 } 405 put_bh(bh); 406 extend_credit_for_blkdel(handle, inode); 407 ext4_free_blocks(handle, inode, NULL, block, 1, 408 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 409 return retval; 410 } 411 412 /* 413 * Free the extent meta data blocks only 414 */ 415 static int free_ext_block(handle_t *handle, struct inode *inode) 416 { 417 int i, retval = 0; 418 struct ext4_inode_info *ei = EXT4_I(inode); 419 struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data; 420 struct ext4_extent_idx *ix; 421 if (eh->eh_depth == 0) 422 /* 423 * No extra blocks allocated for extent meta data 424 */ 425 return 0; 426 ix = EXT_FIRST_INDEX(eh); 427 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 428 retval = free_ext_idx(handle, inode, ix); 429 if (retval) 430 return retval; 431 } 432 return retval; 433 } 434 435 int ext4_ext_migrate(struct inode *inode) 436 { 437 handle_t *handle; 438 int retval = 0, i; 439 __le32 *i_data; 440 struct ext4_inode_info *ei; 441 struct inode *tmp_inode = NULL; 442 struct migrate_struct lb; 443 unsigned long max_entries; 444 __u32 goal; 445 uid_t owner[2]; 446 447 /* 448 * If the filesystem does not support extents, or the inode 449 * already is extent-based, error out. 450 */ 451 if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, 452 EXT4_FEATURE_INCOMPAT_EXTENTS) || 453 (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 454 return -EINVAL; 455 456 if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) 457 /* 458 * don't migrate fast symlink 459 */ 460 return retval; 461 462 /* 463 * Worst case we can touch the allocation bitmaps, a bgd 464 * block, and a block to link in the orphan list. We do need 465 * need to worry about credits for modifying the quota inode. 466 */ 467 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 468 4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); 469 470 if (IS_ERR(handle)) { 471 retval = PTR_ERR(handle); 472 return retval; 473 } 474 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * 475 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; 476 owner[0] = i_uid_read(inode); 477 owner[1] = i_gid_read(inode); 478 tmp_inode = ext4_new_inode(handle, d_inode(inode->i_sb->s_root), 479 S_IFREG, NULL, goal, owner); 480 if (IS_ERR(tmp_inode)) { 481 retval = PTR_ERR(tmp_inode); 482 ext4_journal_stop(handle); 483 return retval; 484 } 485 i_size_write(tmp_inode, i_size_read(inode)); 486 /* 487 * Set the i_nlink to zero so it will be deleted later 488 * when we drop inode reference. 489 */ 490 clear_nlink(tmp_inode); 491 492 ext4_ext_tree_init(handle, tmp_inode); 493 ext4_orphan_add(handle, tmp_inode); 494 ext4_journal_stop(handle); 495 496 /* 497 * start with one credit accounted for 498 * superblock modification. 499 * 500 * For the tmp_inode we already have committed the 501 * transaction that created the inode. Later as and 502 * when we add extents we extent the journal 503 */ 504 /* 505 * Even though we take i_mutex we can still cause block 506 * allocation via mmap write to holes. If we have allocated 507 * new blocks we fail migrate. New block allocation will 508 * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated 509 * with i_data_sem held to prevent racing with block 510 * allocation. 511 */ 512 down_read(&EXT4_I(inode)->i_data_sem); 513 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 514 up_read((&EXT4_I(inode)->i_data_sem)); 515 516 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); 517 if (IS_ERR(handle)) { 518 /* 519 * It is impossible to update on-disk structures without 520 * a handle, so just rollback in-core changes and live other 521 * work to orphan_list_cleanup() 522 */ 523 ext4_orphan_del(NULL, tmp_inode); 524 retval = PTR_ERR(handle); 525 goto out; 526 } 527 528 ei = EXT4_I(inode); 529 i_data = ei->i_data; 530 memset(&lb, 0, sizeof(lb)); 531 532 /* 32 bit block address 4 bytes */ 533 max_entries = inode->i_sb->s_blocksize >> 2; 534 for (i = 0; i < EXT4_NDIR_BLOCKS; i++) { 535 if (i_data[i]) { 536 retval = update_extent_range(handle, tmp_inode, 537 le32_to_cpu(i_data[i]), &lb); 538 if (retval) 539 goto err_out; 540 } else 541 lb.curr_block++; 542 } 543 if (i_data[EXT4_IND_BLOCK]) { 544 retval = update_ind_extent_range(handle, tmp_inode, 545 le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb); 546 if (retval) 547 goto err_out; 548 } else 549 lb.curr_block += max_entries; 550 if (i_data[EXT4_DIND_BLOCK]) { 551 retval = update_dind_extent_range(handle, tmp_inode, 552 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb); 553 if (retval) 554 goto err_out; 555 } else 556 lb.curr_block += max_entries * max_entries; 557 if (i_data[EXT4_TIND_BLOCK]) { 558 retval = update_tind_extent_range(handle, tmp_inode, 559 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb); 560 if (retval) 561 goto err_out; 562 } 563 /* 564 * Build the last extent 565 */ 566 retval = finish_range(handle, tmp_inode, &lb); 567 err_out: 568 if (retval) 569 /* 570 * Failure case delete the extent information with the 571 * tmp_inode 572 */ 573 free_ext_block(handle, tmp_inode); 574 else { 575 retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode); 576 if (retval) 577 /* 578 * if we fail to swap inode data free the extent 579 * details of the tmp inode 580 */ 581 free_ext_block(handle, tmp_inode); 582 } 583 584 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 585 if (ext4_journal_extend(handle, 1) != 0) 586 ext4_journal_restart(handle, 1); 587 588 /* 589 * Mark the tmp_inode as of size zero 590 */ 591 i_size_write(tmp_inode, 0); 592 593 /* 594 * set the i_blocks count to zero 595 * so that the ext4_evict_inode() does the 596 * right job 597 * 598 * We don't need to take the i_lock because 599 * the inode is not visible to user space. 600 */ 601 tmp_inode->i_blocks = 0; 602 603 /* Reset the extent details */ 604 ext4_ext_tree_init(handle, tmp_inode); 605 ext4_journal_stop(handle); 606 out: 607 unlock_new_inode(tmp_inode); 608 iput(tmp_inode); 609 610 return retval; 611 } 612 613 /* 614 * Migrate a simple extent-based inode to use the i_blocks[] array 615 */ 616 int ext4_ind_migrate(struct inode *inode) 617 { 618 struct ext4_extent_header *eh; 619 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 620 struct ext4_inode_info *ei = EXT4_I(inode); 621 struct ext4_extent *ex; 622 unsigned int i, len; 623 ext4_lblk_t start, end; 624 ext4_fsblk_t blk; 625 handle_t *handle; 626 int ret; 627 628 if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, 629 EXT4_FEATURE_INCOMPAT_EXTENTS) || 630 (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 631 return -EINVAL; 632 633 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 634 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) 635 return -EOPNOTSUPP; 636 637 /* 638 * In order to get correct extent info, force all delayed allocation 639 * blocks to be allocated, otherwise delayed allocation blocks may not 640 * be reflected and bypass the checks on extent header. 641 */ 642 if (test_opt(inode->i_sb, DELALLOC)) 643 ext4_alloc_da_blocks(inode); 644 645 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); 646 if (IS_ERR(handle)) 647 return PTR_ERR(handle); 648 649 down_write(&EXT4_I(inode)->i_data_sem); 650 ret = ext4_ext_check_inode(inode); 651 if (ret) 652 goto errout; 653 654 eh = ext_inode_hdr(inode); 655 ex = EXT_FIRST_EXTENT(eh); 656 if (ext4_blocks_count(es) > EXT4_MAX_BLOCK_FILE_PHYS || 657 eh->eh_depth != 0 || le16_to_cpu(eh->eh_entries) > 1) { 658 ret = -EOPNOTSUPP; 659 goto errout; 660 } 661 if (eh->eh_entries == 0) 662 blk = len = start = end = 0; 663 else { 664 len = le16_to_cpu(ex->ee_len); 665 blk = ext4_ext_pblock(ex); 666 start = le32_to_cpu(ex->ee_block); 667 end = start + len - 1; 668 if (end >= EXT4_NDIR_BLOCKS) { 669 ret = -EOPNOTSUPP; 670 goto errout; 671 } 672 } 673 674 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 675 memset(ei->i_data, 0, sizeof(ei->i_data)); 676 for (i = start; i <= end; i++) 677 ei->i_data[i] = cpu_to_le32(blk++); 678 ext4_mark_inode_dirty(handle, inode); 679 errout: 680 ext4_journal_stop(handle); 681 up_write(&EXT4_I(inode)->i_data_sem); 682 return ret; 683 } 684