1 /* 2 * Copyright IBM Corporation, 2007 3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of version 2.1 of the GNU Lesser General Public License 7 * as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 * 13 */ 14 15 #include <linux/slab.h> 16 #include "ext4_jbd2.h" 17 #include "ext4_extents.h" 18 19 /* 20 * The contiguous blocks details which can be 21 * represented by a single extent 22 */ 23 struct migrate_struct { 24 ext4_lblk_t first_block, last_block, curr_block; 25 ext4_fsblk_t first_pblock, last_pblock; 26 }; 27 28 static int finish_range(handle_t *handle, struct inode *inode, 29 struct migrate_struct *lb) 30 31 { 32 int retval = 0, needed; 33 struct ext4_extent newext; 34 struct ext4_ext_path *path; 35 if (lb->first_pblock == 0) 36 return 0; 37 38 /* Add the extent to temp inode*/ 39 newext.ee_block = cpu_to_le32(lb->first_block); 40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); 41 ext4_ext_store_pblock(&newext, lb->first_pblock); 42 /* Locking only for convinience since we are operating on temp inode */ 43 down_write(&EXT4_I(inode)->i_data_sem); 44 path = ext4_find_extent(inode, lb->first_block, NULL, 0); 45 if (IS_ERR(path)) { 46 retval = PTR_ERR(path); 47 path = NULL; 48 goto err_out; 49 } 50 51 /* 52 * Calculate the credit needed to inserting this extent 53 * Since we are doing this in loop we may accumalate extra 54 * credit. But below we try to not accumalate too much 55 * of them by restarting the journal. 56 */ 57 needed = ext4_ext_calc_credits_for_single_extent(inode, 58 lb->last_block - lb->first_block + 1, path); 59 60 /* 61 * Make sure the credit we accumalated is not really high 62 */ 63 if (needed && ext4_handle_has_enough_credits(handle, 64 EXT4_RESERVE_TRANS_BLOCKS)) { 65 up_write((&EXT4_I(inode)->i_data_sem)); 66 retval = ext4_journal_restart(handle, needed); 67 down_write((&EXT4_I(inode)->i_data_sem)); 68 if (retval) 69 goto err_out; 70 } else if (needed) { 71 retval = ext4_journal_extend(handle, needed); 72 if (retval) { 73 /* 74 * IF not able to extend the journal restart the journal 75 */ 76 up_write((&EXT4_I(inode)->i_data_sem)); 77 retval = ext4_journal_restart(handle, needed); 78 down_write((&EXT4_I(inode)->i_data_sem)); 79 if (retval) 80 goto err_out; 81 } 82 } 83 retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0); 84 err_out: 85 up_write((&EXT4_I(inode)->i_data_sem)); 86 ext4_ext_drop_refs(path); 87 kfree(path); 88 lb->first_pblock = 0; 89 return retval; 90 } 91 92 static int update_extent_range(handle_t *handle, struct inode *inode, 93 ext4_fsblk_t pblock, struct migrate_struct *lb) 94 { 95 int retval; 96 /* 97 * See if we can add on to the existing range (if it exists) 98 */ 99 if (lb->first_pblock && 100 (lb->last_pblock+1 == pblock) && 101 (lb->last_block+1 == lb->curr_block)) { 102 lb->last_pblock = pblock; 103 lb->last_block = lb->curr_block; 104 lb->curr_block++; 105 return 0; 106 } 107 /* 108 * Start a new range. 109 */ 110 retval = finish_range(handle, inode, lb); 111 lb->first_pblock = lb->last_pblock = pblock; 112 lb->first_block = lb->last_block = lb->curr_block; 113 lb->curr_block++; 114 return retval; 115 } 116 117 static int update_ind_extent_range(handle_t *handle, struct inode *inode, 118 ext4_fsblk_t pblock, 119 struct migrate_struct *lb) 120 { 121 struct buffer_head *bh; 122 __le32 *i_data; 123 int i, retval = 0; 124 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 125 126 bh = sb_bread(inode->i_sb, pblock); 127 if (!bh) 128 return -EIO; 129 130 i_data = (__le32 *)bh->b_data; 131 for (i = 0; i < max_entries; i++) { 132 if (i_data[i]) { 133 retval = update_extent_range(handle, inode, 134 le32_to_cpu(i_data[i]), lb); 135 if (retval) 136 break; 137 } else { 138 lb->curr_block++; 139 } 140 } 141 put_bh(bh); 142 return retval; 143 144 } 145 146 static int update_dind_extent_range(handle_t *handle, struct inode *inode, 147 ext4_fsblk_t pblock, 148 struct migrate_struct *lb) 149 { 150 struct buffer_head *bh; 151 __le32 *i_data; 152 int i, retval = 0; 153 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 154 155 bh = sb_bread(inode->i_sb, pblock); 156 if (!bh) 157 return -EIO; 158 159 i_data = (__le32 *)bh->b_data; 160 for (i = 0; i < max_entries; i++) { 161 if (i_data[i]) { 162 retval = update_ind_extent_range(handle, inode, 163 le32_to_cpu(i_data[i]), lb); 164 if (retval) 165 break; 166 } else { 167 /* Only update the file block number */ 168 lb->curr_block += max_entries; 169 } 170 } 171 put_bh(bh); 172 return retval; 173 174 } 175 176 static int update_tind_extent_range(handle_t *handle, struct inode *inode, 177 ext4_fsblk_t pblock, 178 struct migrate_struct *lb) 179 { 180 struct buffer_head *bh; 181 __le32 *i_data; 182 int i, retval = 0; 183 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 184 185 bh = sb_bread(inode->i_sb, pblock); 186 if (!bh) 187 return -EIO; 188 189 i_data = (__le32 *)bh->b_data; 190 for (i = 0; i < max_entries; i++) { 191 if (i_data[i]) { 192 retval = update_dind_extent_range(handle, inode, 193 le32_to_cpu(i_data[i]), lb); 194 if (retval) 195 break; 196 } else { 197 /* Only update the file block number */ 198 lb->curr_block += max_entries * max_entries; 199 } 200 } 201 put_bh(bh); 202 return retval; 203 204 } 205 206 static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) 207 { 208 int retval = 0, needed; 209 210 if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) 211 return 0; 212 /* 213 * We are freeing a blocks. During this we touch 214 * superblock, group descriptor and block bitmap. 215 * So allocate a credit of 3. We may update 216 * quota (user and group). 217 */ 218 needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); 219 220 if (ext4_journal_extend(handle, needed) != 0) 221 retval = ext4_journal_restart(handle, needed); 222 223 return retval; 224 } 225 226 static int free_dind_blocks(handle_t *handle, 227 struct inode *inode, __le32 i_data) 228 { 229 int i; 230 __le32 *tmp_idata; 231 struct buffer_head *bh; 232 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 233 234 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 235 if (!bh) 236 return -EIO; 237 238 tmp_idata = (__le32 *)bh->b_data; 239 for (i = 0; i < max_entries; i++) { 240 if (tmp_idata[i]) { 241 extend_credit_for_blkdel(handle, inode); 242 ext4_free_blocks(handle, inode, NULL, 243 le32_to_cpu(tmp_idata[i]), 1, 244 EXT4_FREE_BLOCKS_METADATA | 245 EXT4_FREE_BLOCKS_FORGET); 246 } 247 } 248 put_bh(bh); 249 extend_credit_for_blkdel(handle, inode); 250 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, 251 EXT4_FREE_BLOCKS_METADATA | 252 EXT4_FREE_BLOCKS_FORGET); 253 return 0; 254 } 255 256 static int free_tind_blocks(handle_t *handle, 257 struct inode *inode, __le32 i_data) 258 { 259 int i, retval = 0; 260 __le32 *tmp_idata; 261 struct buffer_head *bh; 262 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 263 264 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 265 if (!bh) 266 return -EIO; 267 268 tmp_idata = (__le32 *)bh->b_data; 269 for (i = 0; i < max_entries; i++) { 270 if (tmp_idata[i]) { 271 retval = free_dind_blocks(handle, 272 inode, tmp_idata[i]); 273 if (retval) { 274 put_bh(bh); 275 return retval; 276 } 277 } 278 } 279 put_bh(bh); 280 extend_credit_for_blkdel(handle, inode); 281 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, 282 EXT4_FREE_BLOCKS_METADATA | 283 EXT4_FREE_BLOCKS_FORGET); 284 return 0; 285 } 286 287 static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) 288 { 289 int retval; 290 291 /* ei->i_data[EXT4_IND_BLOCK] */ 292 if (i_data[0]) { 293 extend_credit_for_blkdel(handle, inode); 294 ext4_free_blocks(handle, inode, NULL, 295 le32_to_cpu(i_data[0]), 1, 296 EXT4_FREE_BLOCKS_METADATA | 297 EXT4_FREE_BLOCKS_FORGET); 298 } 299 300 /* ei->i_data[EXT4_DIND_BLOCK] */ 301 if (i_data[1]) { 302 retval = free_dind_blocks(handle, inode, i_data[1]); 303 if (retval) 304 return retval; 305 } 306 307 /* ei->i_data[EXT4_TIND_BLOCK] */ 308 if (i_data[2]) { 309 retval = free_tind_blocks(handle, inode, i_data[2]); 310 if (retval) 311 return retval; 312 } 313 return 0; 314 } 315 316 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 317 struct inode *tmp_inode) 318 { 319 int retval; 320 __le32 i_data[3]; 321 struct ext4_inode_info *ei = EXT4_I(inode); 322 struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); 323 324 /* 325 * One credit accounted for writing the 326 * i_data field of the original inode 327 */ 328 retval = ext4_journal_extend(handle, 1); 329 if (retval) { 330 retval = ext4_journal_restart(handle, 1); 331 if (retval) 332 goto err_out; 333 } 334 335 i_data[0] = ei->i_data[EXT4_IND_BLOCK]; 336 i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; 337 i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; 338 339 down_write(&EXT4_I(inode)->i_data_sem); 340 /* 341 * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation 342 * happened after we started the migrate. We need to 343 * fail the migrate 344 */ 345 if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) { 346 retval = -EAGAIN; 347 up_write(&EXT4_I(inode)->i_data_sem); 348 goto err_out; 349 } else 350 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 351 /* 352 * We have the extent map build with the tmp inode. 353 * Now copy the i_data across 354 */ 355 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); 356 memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); 357 358 /* 359 * Update i_blocks with the new blocks that got 360 * allocated while adding extents for extent index 361 * blocks. 362 * 363 * While converting to extents we need not 364 * update the original inode i_blocks for extent blocks 365 * via quota APIs. The quota update happened via tmp_inode already. 366 */ 367 spin_lock(&inode->i_lock); 368 inode->i_blocks += tmp_inode->i_blocks; 369 spin_unlock(&inode->i_lock); 370 up_write(&EXT4_I(inode)->i_data_sem); 371 372 /* 373 * We mark the inode dirty after, because we decrement the 374 * i_blocks when freeing the indirect meta-data blocks 375 */ 376 retval = free_ind_block(handle, inode, i_data); 377 ext4_mark_inode_dirty(handle, inode); 378 379 err_out: 380 return retval; 381 } 382 383 static int free_ext_idx(handle_t *handle, struct inode *inode, 384 struct ext4_extent_idx *ix) 385 { 386 int i, retval = 0; 387 ext4_fsblk_t block; 388 struct buffer_head *bh; 389 struct ext4_extent_header *eh; 390 391 block = ext4_idx_pblock(ix); 392 bh = sb_bread(inode->i_sb, block); 393 if (!bh) 394 return -EIO; 395 396 eh = (struct ext4_extent_header *)bh->b_data; 397 if (eh->eh_depth != 0) { 398 ix = EXT_FIRST_INDEX(eh); 399 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 400 retval = free_ext_idx(handle, inode, ix); 401 if (retval) 402 break; 403 } 404 } 405 put_bh(bh); 406 extend_credit_for_blkdel(handle, inode); 407 ext4_free_blocks(handle, inode, NULL, block, 1, 408 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 409 return retval; 410 } 411 412 /* 413 * Free the extent meta data blocks only 414 */ 415 static int free_ext_block(handle_t *handle, struct inode *inode) 416 { 417 int i, retval = 0; 418 struct ext4_inode_info *ei = EXT4_I(inode); 419 struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data; 420 struct ext4_extent_idx *ix; 421 if (eh->eh_depth == 0) 422 /* 423 * No extra blocks allocated for extent meta data 424 */ 425 return 0; 426 ix = EXT_FIRST_INDEX(eh); 427 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 428 retval = free_ext_idx(handle, inode, ix); 429 if (retval) 430 return retval; 431 } 432 return retval; 433 } 434 435 int ext4_ext_migrate(struct inode *inode) 436 { 437 handle_t *handle; 438 int retval = 0, i; 439 __le32 *i_data; 440 struct ext4_inode_info *ei; 441 struct inode *tmp_inode = NULL; 442 struct migrate_struct lb; 443 unsigned long max_entries; 444 __u32 goal; 445 uid_t owner[2]; 446 447 /* 448 * If the filesystem does not support extents, or the inode 449 * already is extent-based, error out. 450 */ 451 if (!ext4_has_feature_extents(inode->i_sb) || 452 (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 453 return -EINVAL; 454 455 if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) 456 /* 457 * don't migrate fast symlink 458 */ 459 return retval; 460 461 /* 462 * Worst case we can touch the allocation bitmaps, a bgd 463 * block, and a block to link in the orphan list. We do need 464 * need to worry about credits for modifying the quota inode. 465 */ 466 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 467 4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); 468 469 if (IS_ERR(handle)) { 470 retval = PTR_ERR(handle); 471 return retval; 472 } 473 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * 474 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; 475 owner[0] = i_uid_read(inode); 476 owner[1] = i_gid_read(inode); 477 tmp_inode = ext4_new_inode(handle, d_inode(inode->i_sb->s_root), 478 S_IFREG, NULL, goal, owner); 479 if (IS_ERR(tmp_inode)) { 480 retval = PTR_ERR(tmp_inode); 481 ext4_journal_stop(handle); 482 return retval; 483 } 484 i_size_write(tmp_inode, i_size_read(inode)); 485 /* 486 * Set the i_nlink to zero so it will be deleted later 487 * when we drop inode reference. 488 */ 489 clear_nlink(tmp_inode); 490 491 ext4_ext_tree_init(handle, tmp_inode); 492 ext4_orphan_add(handle, tmp_inode); 493 ext4_journal_stop(handle); 494 495 /* 496 * start with one credit accounted for 497 * superblock modification. 498 * 499 * For the tmp_inode we already have committed the 500 * transaction that created the inode. Later as and 501 * when we add extents we extent the journal 502 */ 503 /* 504 * Even though we take i_mutex we can still cause block 505 * allocation via mmap write to holes. If we have allocated 506 * new blocks we fail migrate. New block allocation will 507 * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated 508 * with i_data_sem held to prevent racing with block 509 * allocation. 510 */ 511 down_read(&EXT4_I(inode)->i_data_sem); 512 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 513 up_read((&EXT4_I(inode)->i_data_sem)); 514 515 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); 516 if (IS_ERR(handle)) { 517 /* 518 * It is impossible to update on-disk structures without 519 * a handle, so just rollback in-core changes and live other 520 * work to orphan_list_cleanup() 521 */ 522 ext4_orphan_del(NULL, tmp_inode); 523 retval = PTR_ERR(handle); 524 goto out; 525 } 526 527 ei = EXT4_I(inode); 528 i_data = ei->i_data; 529 memset(&lb, 0, sizeof(lb)); 530 531 /* 32 bit block address 4 bytes */ 532 max_entries = inode->i_sb->s_blocksize >> 2; 533 for (i = 0; i < EXT4_NDIR_BLOCKS; i++) { 534 if (i_data[i]) { 535 retval = update_extent_range(handle, tmp_inode, 536 le32_to_cpu(i_data[i]), &lb); 537 if (retval) 538 goto err_out; 539 } else 540 lb.curr_block++; 541 } 542 if (i_data[EXT4_IND_BLOCK]) { 543 retval = update_ind_extent_range(handle, tmp_inode, 544 le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb); 545 if (retval) 546 goto err_out; 547 } else 548 lb.curr_block += max_entries; 549 if (i_data[EXT4_DIND_BLOCK]) { 550 retval = update_dind_extent_range(handle, tmp_inode, 551 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb); 552 if (retval) 553 goto err_out; 554 } else 555 lb.curr_block += max_entries * max_entries; 556 if (i_data[EXT4_TIND_BLOCK]) { 557 retval = update_tind_extent_range(handle, tmp_inode, 558 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb); 559 if (retval) 560 goto err_out; 561 } 562 /* 563 * Build the last extent 564 */ 565 retval = finish_range(handle, tmp_inode, &lb); 566 err_out: 567 if (retval) 568 /* 569 * Failure case delete the extent information with the 570 * tmp_inode 571 */ 572 free_ext_block(handle, tmp_inode); 573 else { 574 retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode); 575 if (retval) 576 /* 577 * if we fail to swap inode data free the extent 578 * details of the tmp inode 579 */ 580 free_ext_block(handle, tmp_inode); 581 } 582 583 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 584 if (ext4_journal_extend(handle, 1) != 0) 585 ext4_journal_restart(handle, 1); 586 587 /* 588 * Mark the tmp_inode as of size zero 589 */ 590 i_size_write(tmp_inode, 0); 591 592 /* 593 * set the i_blocks count to zero 594 * so that the ext4_evict_inode() does the 595 * right job 596 * 597 * We don't need to take the i_lock because 598 * the inode is not visible to user space. 599 */ 600 tmp_inode->i_blocks = 0; 601 602 /* Reset the extent details */ 603 ext4_ext_tree_init(handle, tmp_inode); 604 ext4_journal_stop(handle); 605 out: 606 unlock_new_inode(tmp_inode); 607 iput(tmp_inode); 608 609 return retval; 610 } 611 612 /* 613 * Migrate a simple extent-based inode to use the i_blocks[] array 614 */ 615 int ext4_ind_migrate(struct inode *inode) 616 { 617 struct ext4_extent_header *eh; 618 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 619 struct ext4_inode_info *ei = EXT4_I(inode); 620 struct ext4_extent *ex; 621 unsigned int i, len; 622 ext4_lblk_t start, end; 623 ext4_fsblk_t blk; 624 handle_t *handle; 625 int ret; 626 627 if (!ext4_has_feature_extents(inode->i_sb) || 628 (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 629 return -EINVAL; 630 631 if (ext4_has_feature_bigalloc(inode->i_sb)) 632 return -EOPNOTSUPP; 633 634 /* 635 * In order to get correct extent info, force all delayed allocation 636 * blocks to be allocated, otherwise delayed allocation blocks may not 637 * be reflected and bypass the checks on extent header. 638 */ 639 if (test_opt(inode->i_sb, DELALLOC)) 640 ext4_alloc_da_blocks(inode); 641 642 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); 643 if (IS_ERR(handle)) 644 return PTR_ERR(handle); 645 646 down_write(&EXT4_I(inode)->i_data_sem); 647 ret = ext4_ext_check_inode(inode); 648 if (ret) 649 goto errout; 650 651 eh = ext_inode_hdr(inode); 652 ex = EXT_FIRST_EXTENT(eh); 653 if (ext4_blocks_count(es) > EXT4_MAX_BLOCK_FILE_PHYS || 654 eh->eh_depth != 0 || le16_to_cpu(eh->eh_entries) > 1) { 655 ret = -EOPNOTSUPP; 656 goto errout; 657 } 658 if (eh->eh_entries == 0) 659 blk = len = start = end = 0; 660 else { 661 len = le16_to_cpu(ex->ee_len); 662 blk = ext4_ext_pblock(ex); 663 start = le32_to_cpu(ex->ee_block); 664 end = start + len - 1; 665 if (end >= EXT4_NDIR_BLOCKS) { 666 ret = -EOPNOTSUPP; 667 goto errout; 668 } 669 } 670 671 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 672 memset(ei->i_data, 0, sizeof(ei->i_data)); 673 for (i = start; i <= end; i++) 674 ei->i_data[i] = cpu_to_le32(blk++); 675 ext4_mark_inode_dirty(handle, inode); 676 errout: 677 ext4_journal_stop(handle); 678 up_write(&EXT4_I(inode)->i_data_sem); 679 return ret; 680 } 681