1 /* 2 * Copyright IBM Corporation, 2007 3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of version 2.1 of the GNU Lesser General Public License 7 * as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 * 13 */ 14 15 #include <linux/module.h> 16 #include <linux/ext4_jbd2.h> 17 #include <linux/ext4_fs_extents.h> 18 19 /* 20 * The contiguous blocks details which can be 21 * represented by a single extent 22 */ 23 struct list_blocks_struct { 24 ext4_lblk_t first_block, last_block; 25 ext4_fsblk_t first_pblock, last_pblock; 26 }; 27 28 static int finish_range(handle_t *handle, struct inode *inode, 29 struct list_blocks_struct *lb) 30 31 { 32 int retval = 0, needed; 33 struct ext4_extent newext; 34 struct ext4_ext_path *path; 35 if (lb->first_pblock == 0) 36 return 0; 37 38 /* Add the extent to temp inode*/ 39 newext.ee_block = cpu_to_le32(lb->first_block); 40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); 41 ext4_ext_store_pblock(&newext, lb->first_pblock); 42 path = ext4_ext_find_extent(inode, lb->first_block, NULL); 43 44 if (IS_ERR(path)) { 45 retval = PTR_ERR(path); 46 goto err_out; 47 } 48 49 /* 50 * Calculate the credit needed to inserting this extent 51 * Since we are doing this in loop we may accumalate extra 52 * credit. But below we try to not accumalate too much 53 * of them by restarting the journal. 54 */ 55 needed = ext4_ext_calc_credits_for_insert(inode, path); 56 57 /* 58 * Make sure the credit we accumalated is not really high 59 */ 60 if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) { 61 retval = ext4_journal_restart(handle, needed); 62 if (retval) 63 goto err_out; 64 } else if (needed) { 65 retval = ext4_journal_extend(handle, needed); 66 if (retval) { 67 /* 68 * IF not able to extend the journal restart the journal 69 */ 70 retval = ext4_journal_restart(handle, needed); 71 if (retval) 72 goto err_out; 73 } 74 } 75 retval = ext4_ext_insert_extent(handle, inode, path, &newext); 76 err_out: 77 lb->first_pblock = 0; 78 return retval; 79 } 80 81 static int update_extent_range(handle_t *handle, struct inode *inode, 82 ext4_fsblk_t pblock, ext4_lblk_t blk_num, 83 struct list_blocks_struct *lb) 84 { 85 int retval; 86 /* 87 * See if we can add on to the existing range (if it exists) 88 */ 89 if (lb->first_pblock && 90 (lb->last_pblock+1 == pblock) && 91 (lb->last_block+1 == blk_num)) { 92 lb->last_pblock = pblock; 93 lb->last_block = blk_num; 94 return 0; 95 } 96 /* 97 * Start a new range. 98 */ 99 retval = finish_range(handle, inode, lb); 100 lb->first_pblock = lb->last_pblock = pblock; 101 lb->first_block = lb->last_block = blk_num; 102 103 return retval; 104 } 105 106 static int update_ind_extent_range(handle_t *handle, struct inode *inode, 107 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 108 struct list_blocks_struct *lb) 109 { 110 struct buffer_head *bh; 111 __le32 *i_data; 112 int i, retval = 0; 113 ext4_lblk_t blk_count = *blk_nump; 114 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 115 116 if (!pblock) { 117 /* Only update the file block number */ 118 *blk_nump += max_entries; 119 return 0; 120 } 121 122 bh = sb_bread(inode->i_sb, pblock); 123 if (!bh) 124 return -EIO; 125 126 i_data = (__le32 *)bh->b_data; 127 for (i = 0; i < max_entries; i++, blk_count++) { 128 if (i_data[i]) { 129 retval = update_extent_range(handle, inode, 130 le32_to_cpu(i_data[i]), 131 blk_count, lb); 132 if (retval) 133 break; 134 } 135 } 136 137 /* Update the file block number */ 138 *blk_nump = blk_count; 139 put_bh(bh); 140 return retval; 141 142 } 143 144 static int update_dind_extent_range(handle_t *handle, struct inode *inode, 145 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 146 struct list_blocks_struct *lb) 147 { 148 struct buffer_head *bh; 149 __le32 *i_data; 150 int i, retval = 0; 151 ext4_lblk_t blk_count = *blk_nump; 152 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 153 154 if (!pblock) { 155 /* Only update the file block number */ 156 *blk_nump += max_entries * max_entries; 157 return 0; 158 } 159 bh = sb_bread(inode->i_sb, pblock); 160 if (!bh) 161 return -EIO; 162 163 i_data = (__le32 *)bh->b_data; 164 for (i = 0; i < max_entries; i++) { 165 if (i_data[i]) { 166 retval = update_ind_extent_range(handle, inode, 167 le32_to_cpu(i_data[i]), 168 &blk_count, lb); 169 if (retval) 170 break; 171 } else { 172 /* Only update the file block number */ 173 blk_count += max_entries; 174 } 175 } 176 177 /* Update the file block number */ 178 *blk_nump = blk_count; 179 put_bh(bh); 180 return retval; 181 182 } 183 184 static int update_tind_extent_range(handle_t *handle, struct inode *inode, 185 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 186 struct list_blocks_struct *lb) 187 { 188 struct buffer_head *bh; 189 __le32 *i_data; 190 int i, retval = 0; 191 ext4_lblk_t blk_count = *blk_nump; 192 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 193 194 if (!pblock) { 195 /* Only update the file block number */ 196 *blk_nump += max_entries * max_entries * max_entries; 197 return 0; 198 } 199 bh = sb_bread(inode->i_sb, pblock); 200 if (!bh) 201 return -EIO; 202 203 i_data = (__le32 *)bh->b_data; 204 for (i = 0; i < max_entries; i++) { 205 if (i_data[i]) { 206 retval = update_dind_extent_range(handle, inode, 207 le32_to_cpu(i_data[i]), 208 &blk_count, lb); 209 if (retval) 210 break; 211 } else 212 /* Only update the file block number */ 213 blk_count += max_entries * max_entries; 214 } 215 /* Update the file block number */ 216 *blk_nump = blk_count; 217 put_bh(bh); 218 return retval; 219 220 } 221 222 static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) 223 { 224 int retval = 0, needed; 225 226 if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) 227 return 0; 228 /* 229 * We are freeing a blocks. During this we touch 230 * superblock, group descriptor and block bitmap. 231 * So allocate a credit of 3. We may update 232 * quota (user and group). 233 */ 234 needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 235 236 if (ext4_journal_extend(handle, needed) != 0) 237 retval = ext4_journal_restart(handle, needed); 238 239 return retval; 240 } 241 242 static int free_dind_blocks(handle_t *handle, 243 struct inode *inode, __le32 i_data) 244 { 245 int i; 246 __le32 *tmp_idata; 247 struct buffer_head *bh; 248 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 249 250 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 251 if (!bh) 252 return -EIO; 253 254 tmp_idata = (__le32 *)bh->b_data; 255 for (i = 0; i < max_entries; i++) { 256 if (tmp_idata[i]) { 257 extend_credit_for_blkdel(handle, inode); 258 ext4_free_blocks(handle, inode, 259 le32_to_cpu(tmp_idata[i]), 1, 1); 260 } 261 } 262 put_bh(bh); 263 extend_credit_for_blkdel(handle, inode); 264 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 265 return 0; 266 } 267 268 static int free_tind_blocks(handle_t *handle, 269 struct inode *inode, __le32 i_data) 270 { 271 int i, retval = 0; 272 __le32 *tmp_idata; 273 struct buffer_head *bh; 274 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 275 276 bh = sb_bread(inode->i_sb, le32_to_cpu(i_data)); 277 if (!bh) 278 return -EIO; 279 280 tmp_idata = (__le32 *)bh->b_data; 281 for (i = 0; i < max_entries; i++) { 282 if (tmp_idata[i]) { 283 retval = free_dind_blocks(handle, 284 inode, tmp_idata[i]); 285 if (retval) { 286 put_bh(bh); 287 return retval; 288 } 289 } 290 } 291 put_bh(bh); 292 extend_credit_for_blkdel(handle, inode); 293 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 294 return 0; 295 } 296 297 static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) 298 { 299 int retval; 300 301 /* ei->i_data[EXT4_IND_BLOCK] */ 302 if (i_data[0]) { 303 extend_credit_for_blkdel(handle, inode); 304 ext4_free_blocks(handle, inode, 305 le32_to_cpu(i_data[0]), 1, 1); 306 } 307 308 /* ei->i_data[EXT4_DIND_BLOCK] */ 309 if (i_data[1]) { 310 retval = free_dind_blocks(handle, inode, i_data[1]); 311 if (retval) 312 return retval; 313 } 314 315 /* ei->i_data[EXT4_TIND_BLOCK] */ 316 if (i_data[2]) { 317 retval = free_tind_blocks(handle, inode, i_data[2]); 318 if (retval) 319 return retval; 320 } 321 return 0; 322 } 323 324 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 325 struct inode *tmp_inode) 326 { 327 int retval; 328 __le32 i_data[3]; 329 struct ext4_inode_info *ei = EXT4_I(inode); 330 struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode); 331 332 /* 333 * One credit accounted for writing the 334 * i_data field of the original inode 335 */ 336 retval = ext4_journal_extend(handle, 1); 337 if (retval != 0) { 338 retval = ext4_journal_restart(handle, 1); 339 if (retval) 340 goto err_out; 341 } 342 343 i_data[0] = ei->i_data[EXT4_IND_BLOCK]; 344 i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; 345 i_data[2] = ei->i_data[EXT4_TIND_BLOCK]; 346 347 down_write(&EXT4_I(inode)->i_data_sem); 348 /* 349 * We have the extent map build with the tmp inode. 350 * Now copy the i_data across 351 */ 352 ei->i_flags |= EXT4_EXTENTS_FL; 353 memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); 354 355 /* 356 * Update i_blocks with the new blocks that got 357 * allocated while adding extents for extent index 358 * blocks. 359 * 360 * While converting to extents we need not 361 * update the orignal inode i_blocks for extent blocks 362 * via quota APIs. The quota update happened via tmp_inode already. 363 */ 364 spin_lock(&inode->i_lock); 365 inode->i_blocks += tmp_inode->i_blocks; 366 spin_unlock(&inode->i_lock); 367 up_write(&EXT4_I(inode)->i_data_sem); 368 369 /* 370 * We mark the inode dirty after, because we decrement the 371 * i_blocks when freeing the indirect meta-data blocks 372 */ 373 retval = free_ind_block(handle, inode, i_data); 374 ext4_mark_inode_dirty(handle, inode); 375 376 err_out: 377 return retval; 378 } 379 380 static int free_ext_idx(handle_t *handle, struct inode *inode, 381 struct ext4_extent_idx *ix) 382 { 383 int i, retval = 0; 384 ext4_fsblk_t block; 385 struct buffer_head *bh; 386 struct ext4_extent_header *eh; 387 388 block = idx_pblock(ix); 389 bh = sb_bread(inode->i_sb, block); 390 if (!bh) 391 return -EIO; 392 393 eh = (struct ext4_extent_header *)bh->b_data; 394 if (eh->eh_depth != 0) { 395 ix = EXT_FIRST_INDEX(eh); 396 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 397 retval = free_ext_idx(handle, inode, ix); 398 if (retval) 399 break; 400 } 401 } 402 put_bh(bh); 403 extend_credit_for_blkdel(handle, inode); 404 ext4_free_blocks(handle, inode, block, 1, 1); 405 return retval; 406 } 407 408 /* 409 * Free the extent meta data blocks only 410 */ 411 static int free_ext_block(handle_t *handle, struct inode *inode) 412 { 413 int i, retval = 0; 414 struct ext4_inode_info *ei = EXT4_I(inode); 415 struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data; 416 struct ext4_extent_idx *ix; 417 if (eh->eh_depth == 0) 418 /* 419 * No extra blocks allocated for extent meta data 420 */ 421 return 0; 422 ix = EXT_FIRST_INDEX(eh); 423 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { 424 retval = free_ext_idx(handle, inode, ix); 425 if (retval) 426 return retval; 427 } 428 return retval; 429 430 } 431 432 int ext4_ext_migrate(struct inode *inode, struct file *filp, 433 unsigned int cmd, unsigned long arg) 434 { 435 handle_t *handle; 436 int retval = 0, i; 437 __le32 *i_data; 438 ext4_lblk_t blk_count = 0; 439 struct ext4_inode_info *ei; 440 struct inode *tmp_inode = NULL; 441 struct list_blocks_struct lb; 442 unsigned long max_entries; 443 444 if (!test_opt(inode->i_sb, EXTENTS)) 445 /* 446 * if mounted with noextents we don't allow the migrate 447 */ 448 return -EINVAL; 449 450 if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 451 return -EINVAL; 452 453 if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) 454 /* 455 * don't migrate fast symlink 456 */ 457 return retval; 458 459 handle = ext4_journal_start(inode, 460 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 461 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 462 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) 463 + 1); 464 if (IS_ERR(handle)) { 465 retval = PTR_ERR(handle); 466 goto err_out; 467 } 468 tmp_inode = ext4_new_inode(handle, 469 inode->i_sb->s_root->d_inode, 470 S_IFREG); 471 if (IS_ERR(tmp_inode)) { 472 retval = -ENOMEM; 473 ext4_journal_stop(handle); 474 tmp_inode = NULL; 475 goto err_out; 476 } 477 i_size_write(tmp_inode, i_size_read(inode)); 478 /* 479 * We don't want the inode to be reclaimed 480 * if we got interrupted in between. We have 481 * this tmp inode carrying reference to the 482 * data blocks of the original file. We set 483 * the i_nlink to zero at the last stage after 484 * switching the original file to extent format 485 */ 486 tmp_inode->i_nlink = 1; 487 488 ext4_ext_tree_init(handle, tmp_inode); 489 ext4_orphan_add(handle, tmp_inode); 490 ext4_journal_stop(handle); 491 492 /* 493 * start with one credit accounted for 494 * superblock modification. 495 * 496 * For the tmp_inode we already have commited the 497 * trascation that created the inode. Later as and 498 * when we add extents we extent the journal 499 */ 500 /* 501 * inode_mutex prevent write and truncate on the file. Read still goes 502 * through. We take i_data_sem in ext4_ext_swap_inode_data before we 503 * switch the inode format to prevent read. 504 */ 505 mutex_lock(&(inode->i_mutex)); 506 handle = ext4_journal_start(inode, 1); 507 508 ei = EXT4_I(inode); 509 i_data = ei->i_data; 510 memset(&lb, 0, sizeof(lb)); 511 512 /* 32 bit block address 4 bytes */ 513 max_entries = inode->i_sb->s_blocksize >> 2; 514 for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { 515 if (i_data[i]) { 516 retval = update_extent_range(handle, tmp_inode, 517 le32_to_cpu(i_data[i]), 518 blk_count, &lb); 519 if (retval) 520 goto err_out; 521 } 522 } 523 if (i_data[EXT4_IND_BLOCK]) { 524 retval = update_ind_extent_range(handle, tmp_inode, 525 le32_to_cpu(i_data[EXT4_IND_BLOCK]), 526 &blk_count, &lb); 527 if (retval) 528 goto err_out; 529 } else 530 blk_count += max_entries; 531 if (i_data[EXT4_DIND_BLOCK]) { 532 retval = update_dind_extent_range(handle, tmp_inode, 533 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), 534 &blk_count, &lb); 535 if (retval) 536 goto err_out; 537 } else 538 blk_count += max_entries * max_entries; 539 if (i_data[EXT4_TIND_BLOCK]) { 540 retval = update_tind_extent_range(handle, tmp_inode, 541 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), 542 &blk_count, &lb); 543 if (retval) 544 goto err_out; 545 } 546 /* 547 * Build the last extent 548 */ 549 retval = finish_range(handle, tmp_inode, &lb); 550 err_out: 551 if (retval) 552 /* 553 * Failure case delete the extent information with the 554 * tmp_inode 555 */ 556 free_ext_block(handle, tmp_inode); 557 else 558 retval = ext4_ext_swap_inode_data(handle, inode, 559 tmp_inode); 560 561 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 562 if (ext4_journal_extend(handle, 1) != 0) 563 ext4_journal_restart(handle, 1); 564 565 /* 566 * Mark the tmp_inode as of size zero 567 */ 568 i_size_write(tmp_inode, 0); 569 570 /* 571 * set the i_blocks count to zero 572 * so that the ext4_delete_inode does the 573 * right job 574 * 575 * We don't need to take the i_lock because 576 * the inode is not visible to user space. 577 */ 578 tmp_inode->i_blocks = 0; 579 580 /* Reset the extent details */ 581 ext4_ext_tree_init(handle, tmp_inode); 582 583 /* 584 * Set the i_nlink to zero so that 585 * generic_drop_inode really deletes the 586 * inode 587 */ 588 tmp_inode->i_nlink = 0; 589 590 ext4_journal_stop(handle); 591 mutex_unlock(&(inode->i_mutex)); 592 593 if (tmp_inode) 594 iput(tmp_inode); 595 596 return retval; 597 } 598