1 /* 2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com 3 * Written by Alex Tomas <alex@clusterfs.com> 4 * 5 * Architecture independence: 6 * Copyright (c) 2005, Bull S.A. 7 * Written by Pierre Peiffer <pierre.peiffer@bull.net> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public Licens 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- 21 */ 22 23 /* 24 * Extents support for EXT4 25 * 26 * TODO: 27 * - ext4*_error() should be used in some situations 28 * - analyze all BUG()/BUG_ON(), use -EIO where appropriate 29 * - smart tree reduction 30 */ 31 32 #include <linux/module.h> 33 #include <linux/fs.h> 34 #include <linux/time.h> 35 #include <linux/jbd2.h> 36 #include <linux/highuid.h> 37 #include <linux/pagemap.h> 38 #include <linux/quotaops.h> 39 #include <linux/string.h> 40 #include <linux/slab.h> 41 #include <linux/falloc.h> 42 #include <asm/uaccess.h> 43 #include <linux/fiemap.h> 44 #include "ext4_jbd2.h" 45 #include "ext4_extents.h" 46 47 static int ext4_ext_truncate_extend_restart(handle_t *handle, 48 struct inode *inode, 49 int needed) 50 { 51 int err; 52 53 if (!ext4_handle_valid(handle)) 54 return 0; 55 if (handle->h_buffer_credits > needed) 56 return 0; 57 err = ext4_journal_extend(handle, needed); 58 if (err <= 0) 59 return err; 60 err = ext4_truncate_restart_trans(handle, inode, needed); 61 if (err == 0) 62 err = -EAGAIN; 63 64 return err; 65 } 66 67 /* 68 * could return: 69 * - EROFS 70 * - ENOMEM 71 */ 72 static int ext4_ext_get_access(handle_t *handle, struct inode *inode, 73 struct ext4_ext_path *path) 74 { 75 if (path->p_bh) { 76 /* path points to block */ 77 return ext4_journal_get_write_access(handle, path->p_bh); 78 } 79 /* path points to leaf/index in inode body */ 80 /* we use in-core data, no need to protect them */ 81 return 0; 82 } 83 84 /* 85 * could return: 86 * - EROFS 87 * - ENOMEM 88 * - EIO 89 */ 90 static int ext4_ext_dirty(handle_t *handle, struct inode *inode, 91 struct ext4_ext_path *path) 92 { 93 int err; 94 if (path->p_bh) { 95 /* path points to block */ 96 err = ext4_handle_dirty_metadata(handle, inode, path->p_bh); 97 } else { 98 /* path points to leaf/index in inode body */ 99 err = ext4_mark_inode_dirty(handle, inode); 100 } 101 return err; 102 } 103 104 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, 105 struct ext4_ext_path *path, 106 ext4_lblk_t block) 107 { 108 struct ext4_inode_info *ei = EXT4_I(inode); 109 ext4_fsblk_t bg_start; 110 ext4_fsblk_t last_block; 111 ext4_grpblk_t colour; 112 ext4_group_t block_group; 113 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); 114 int depth; 115 116 if (path) { 117 struct ext4_extent *ex; 118 depth = path->p_depth; 119 120 /* try to predict block placement */ 121 ex = path[depth].p_ext; 122 if (ex) 123 return (ext4_ext_pblock(ex) + 124 (block - le32_to_cpu(ex->ee_block))); 125 126 /* it looks like index is empty; 127 * try to find starting block from index itself */ 128 if (path[depth].p_bh) 129 return path[depth].p_bh->b_blocknr; 130 } 131 132 /* OK. use inode's group */ 133 block_group = ei->i_block_group; 134 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { 135 /* 136 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 137 * block groups per flexgroup, reserve the first block 138 * group for directories and special files. Regular 139 * files will start at the second block group. This 140 * tends to speed up directory access and improves 141 * fsck times. 142 */ 143 block_group &= ~(flex_size-1); 144 if (S_ISREG(inode->i_mode)) 145 block_group++; 146 } 147 bg_start = ext4_group_first_block_no(inode->i_sb, block_group); 148 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; 149 150 /* 151 * If we are doing delayed allocation, we don't need take 152 * colour into account. 153 */ 154 if (test_opt(inode->i_sb, DELALLOC)) 155 return bg_start; 156 157 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) 158 colour = (current->pid % 16) * 159 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); 160 else 161 colour = (current->pid % 16) * ((last_block - bg_start) / 16); 162 return bg_start + colour + block; 163 } 164 165 /* 166 * Allocation for a meta data block 167 */ 168 static ext4_fsblk_t 169 ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, 170 struct ext4_ext_path *path, 171 struct ext4_extent *ex, int *err) 172 { 173 ext4_fsblk_t goal, newblock; 174 175 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); 176 newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err); 177 return newblock; 178 } 179 180 static inline int ext4_ext_space_block(struct inode *inode, int check) 181 { 182 int size; 183 184 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 185 / sizeof(struct ext4_extent); 186 if (!check) { 187 #ifdef AGGRESSIVE_TEST 188 if (size > 6) 189 size = 6; 190 #endif 191 } 192 return size; 193 } 194 195 static inline int ext4_ext_space_block_idx(struct inode *inode, int check) 196 { 197 int size; 198 199 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 200 / sizeof(struct ext4_extent_idx); 201 if (!check) { 202 #ifdef AGGRESSIVE_TEST 203 if (size > 5) 204 size = 5; 205 #endif 206 } 207 return size; 208 } 209 210 static inline int ext4_ext_space_root(struct inode *inode, int check) 211 { 212 int size; 213 214 size = sizeof(EXT4_I(inode)->i_data); 215 size -= sizeof(struct ext4_extent_header); 216 size /= sizeof(struct ext4_extent); 217 if (!check) { 218 #ifdef AGGRESSIVE_TEST 219 if (size > 3) 220 size = 3; 221 #endif 222 } 223 return size; 224 } 225 226 static inline int ext4_ext_space_root_idx(struct inode *inode, int check) 227 { 228 int size; 229 230 size = sizeof(EXT4_I(inode)->i_data); 231 size -= sizeof(struct ext4_extent_header); 232 size /= sizeof(struct ext4_extent_idx); 233 if (!check) { 234 #ifdef AGGRESSIVE_TEST 235 if (size > 4) 236 size = 4; 237 #endif 238 } 239 return size; 240 } 241 242 /* 243 * Calculate the number of metadata blocks needed 244 * to allocate @blocks 245 * Worse case is one block per extent 246 */ 247 int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) 248 { 249 struct ext4_inode_info *ei = EXT4_I(inode); 250 int idxs, num = 0; 251 252 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 253 / sizeof(struct ext4_extent_idx)); 254 255 /* 256 * If the new delayed allocation block is contiguous with the 257 * previous da block, it can share index blocks with the 258 * previous block, so we only need to allocate a new index 259 * block every idxs leaf blocks. At ldxs**2 blocks, we need 260 * an additional index block, and at ldxs**3 blocks, yet 261 * another index blocks. 262 */ 263 if (ei->i_da_metadata_calc_len && 264 ei->i_da_metadata_calc_last_lblock+1 == lblock) { 265 if ((ei->i_da_metadata_calc_len % idxs) == 0) 266 num++; 267 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) 268 num++; 269 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { 270 num++; 271 ei->i_da_metadata_calc_len = 0; 272 } else 273 ei->i_da_metadata_calc_len++; 274 ei->i_da_metadata_calc_last_lblock++; 275 return num; 276 } 277 278 /* 279 * In the worst case we need a new set of index blocks at 280 * every level of the inode's extent tree. 281 */ 282 ei->i_da_metadata_calc_len = 1; 283 ei->i_da_metadata_calc_last_lblock = lblock; 284 return ext_depth(inode) + 1; 285 } 286 287 static int 288 ext4_ext_max_entries(struct inode *inode, int depth) 289 { 290 int max; 291 292 if (depth == ext_depth(inode)) { 293 if (depth == 0) 294 max = ext4_ext_space_root(inode, 1); 295 else 296 max = ext4_ext_space_root_idx(inode, 1); 297 } else { 298 if (depth == 0) 299 max = ext4_ext_space_block(inode, 1); 300 else 301 max = ext4_ext_space_block_idx(inode, 1); 302 } 303 304 return max; 305 } 306 307 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) 308 { 309 ext4_fsblk_t block = ext4_ext_pblock(ext); 310 int len = ext4_ext_get_actual_len(ext); 311 312 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 313 } 314 315 static int ext4_valid_extent_idx(struct inode *inode, 316 struct ext4_extent_idx *ext_idx) 317 { 318 ext4_fsblk_t block = ext4_idx_pblock(ext_idx); 319 320 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); 321 } 322 323 static int ext4_valid_extent_entries(struct inode *inode, 324 struct ext4_extent_header *eh, 325 int depth) 326 { 327 struct ext4_extent *ext; 328 struct ext4_extent_idx *ext_idx; 329 unsigned short entries; 330 if (eh->eh_entries == 0) 331 return 1; 332 333 entries = le16_to_cpu(eh->eh_entries); 334 335 if (depth == 0) { 336 /* leaf entries */ 337 ext = EXT_FIRST_EXTENT(eh); 338 while (entries) { 339 if (!ext4_valid_extent(inode, ext)) 340 return 0; 341 ext++; 342 entries--; 343 } 344 } else { 345 ext_idx = EXT_FIRST_INDEX(eh); 346 while (entries) { 347 if (!ext4_valid_extent_idx(inode, ext_idx)) 348 return 0; 349 ext_idx++; 350 entries--; 351 } 352 } 353 return 1; 354 } 355 356 static int __ext4_ext_check(const char *function, unsigned int line, 357 struct inode *inode, struct ext4_extent_header *eh, 358 int depth) 359 { 360 const char *error_msg; 361 int max = 0; 362 363 if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) { 364 error_msg = "invalid magic"; 365 goto corrupted; 366 } 367 if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) { 368 error_msg = "unexpected eh_depth"; 369 goto corrupted; 370 } 371 if (unlikely(eh->eh_max == 0)) { 372 error_msg = "invalid eh_max"; 373 goto corrupted; 374 } 375 max = ext4_ext_max_entries(inode, depth); 376 if (unlikely(le16_to_cpu(eh->eh_max) > max)) { 377 error_msg = "too large eh_max"; 378 goto corrupted; 379 } 380 if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) { 381 error_msg = "invalid eh_entries"; 382 goto corrupted; 383 } 384 if (!ext4_valid_extent_entries(inode, eh, depth)) { 385 error_msg = "invalid extent entries"; 386 goto corrupted; 387 } 388 return 0; 389 390 corrupted: 391 ext4_error_inode(inode, function, line, 0, 392 "bad header/extent: %s - magic %x, " 393 "entries %u, max %u(%u), depth %u(%u)", 394 error_msg, le16_to_cpu(eh->eh_magic), 395 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), 396 max, le16_to_cpu(eh->eh_depth), depth); 397 398 return -EIO; 399 } 400 401 #define ext4_ext_check(inode, eh, depth) \ 402 __ext4_ext_check(__func__, __LINE__, inode, eh, depth) 403 404 int ext4_ext_check_inode(struct inode *inode) 405 { 406 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); 407 } 408 409 #ifdef EXT_DEBUG 410 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 411 { 412 int k, l = path->p_depth; 413 414 ext_debug("path:"); 415 for (k = 0; k <= l; k++, path++) { 416 if (path->p_idx) { 417 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), 418 ext4_idx_pblock(path->p_idx)); 419 } else if (path->p_ext) { 420 ext_debug(" %d:[%d]%d:%llu ", 421 le32_to_cpu(path->p_ext->ee_block), 422 ext4_ext_is_uninitialized(path->p_ext), 423 ext4_ext_get_actual_len(path->p_ext), 424 ext4_ext_pblock(path->p_ext)); 425 } else 426 ext_debug(" []"); 427 } 428 ext_debug("\n"); 429 } 430 431 static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) 432 { 433 int depth = ext_depth(inode); 434 struct ext4_extent_header *eh; 435 struct ext4_extent *ex; 436 int i; 437 438 if (!path) 439 return; 440 441 eh = path[depth].p_hdr; 442 ex = EXT_FIRST_EXTENT(eh); 443 444 ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); 445 446 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 447 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), 448 ext4_ext_is_uninitialized(ex), 449 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); 450 } 451 ext_debug("\n"); 452 } 453 #else 454 #define ext4_ext_show_path(inode, path) 455 #define ext4_ext_show_leaf(inode, path) 456 #endif 457 458 void ext4_ext_drop_refs(struct ext4_ext_path *path) 459 { 460 int depth = path->p_depth; 461 int i; 462 463 for (i = 0; i <= depth; i++, path++) 464 if (path->p_bh) { 465 brelse(path->p_bh); 466 path->p_bh = NULL; 467 } 468 } 469 470 /* 471 * ext4_ext_binsearch_idx: 472 * binary search for the closest index of the given block 473 * the header must be checked before calling this 474 */ 475 static void 476 ext4_ext_binsearch_idx(struct inode *inode, 477 struct ext4_ext_path *path, ext4_lblk_t block) 478 { 479 struct ext4_extent_header *eh = path->p_hdr; 480 struct ext4_extent_idx *r, *l, *m; 481 482 483 ext_debug("binsearch for %u(idx): ", block); 484 485 l = EXT_FIRST_INDEX(eh) + 1; 486 r = EXT_LAST_INDEX(eh); 487 while (l <= r) { 488 m = l + (r - l) / 2; 489 if (block < le32_to_cpu(m->ei_block)) 490 r = m - 1; 491 else 492 l = m + 1; 493 ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block), 494 m, le32_to_cpu(m->ei_block), 495 r, le32_to_cpu(r->ei_block)); 496 } 497 498 path->p_idx = l - 1; 499 ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), 500 ext4_idx_pblock(path->p_idx)); 501 502 #ifdef CHECK_BINSEARCH 503 { 504 struct ext4_extent_idx *chix, *ix; 505 int k; 506 507 chix = ix = EXT_FIRST_INDEX(eh); 508 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { 509 if (k != 0 && 510 le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { 511 printk(KERN_DEBUG "k=%d, ix=0x%p, " 512 "first=0x%p\n", k, 513 ix, EXT_FIRST_INDEX(eh)); 514 printk(KERN_DEBUG "%u <= %u\n", 515 le32_to_cpu(ix->ei_block), 516 le32_to_cpu(ix[-1].ei_block)); 517 } 518 BUG_ON(k && le32_to_cpu(ix->ei_block) 519 <= le32_to_cpu(ix[-1].ei_block)); 520 if (block < le32_to_cpu(ix->ei_block)) 521 break; 522 chix = ix; 523 } 524 BUG_ON(chix != path->p_idx); 525 } 526 #endif 527 528 } 529 530 /* 531 * ext4_ext_binsearch: 532 * binary search for closest extent of the given block 533 * the header must be checked before calling this 534 */ 535 static void 536 ext4_ext_binsearch(struct inode *inode, 537 struct ext4_ext_path *path, ext4_lblk_t block) 538 { 539 struct ext4_extent_header *eh = path->p_hdr; 540 struct ext4_extent *r, *l, *m; 541 542 if (eh->eh_entries == 0) { 543 /* 544 * this leaf is empty: 545 * we get such a leaf in split/add case 546 */ 547 return; 548 } 549 550 ext_debug("binsearch for %u: ", block); 551 552 l = EXT_FIRST_EXTENT(eh) + 1; 553 r = EXT_LAST_EXTENT(eh); 554 555 while (l <= r) { 556 m = l + (r - l) / 2; 557 if (block < le32_to_cpu(m->ee_block)) 558 r = m - 1; 559 else 560 l = m + 1; 561 ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block), 562 m, le32_to_cpu(m->ee_block), 563 r, le32_to_cpu(r->ee_block)); 564 } 565 566 path->p_ext = l - 1; 567 ext_debug(" -> %d:%llu:[%d]%d ", 568 le32_to_cpu(path->p_ext->ee_block), 569 ext4_ext_pblock(path->p_ext), 570 ext4_ext_is_uninitialized(path->p_ext), 571 ext4_ext_get_actual_len(path->p_ext)); 572 573 #ifdef CHECK_BINSEARCH 574 { 575 struct ext4_extent *chex, *ex; 576 int k; 577 578 chex = ex = EXT_FIRST_EXTENT(eh); 579 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { 580 BUG_ON(k && le32_to_cpu(ex->ee_block) 581 <= le32_to_cpu(ex[-1].ee_block)); 582 if (block < le32_to_cpu(ex->ee_block)) 583 break; 584 chex = ex; 585 } 586 BUG_ON(chex != path->p_ext); 587 } 588 #endif 589 590 } 591 592 int ext4_ext_tree_init(handle_t *handle, struct inode *inode) 593 { 594 struct ext4_extent_header *eh; 595 596 eh = ext_inode_hdr(inode); 597 eh->eh_depth = 0; 598 eh->eh_entries = 0; 599 eh->eh_magic = EXT4_EXT_MAGIC; 600 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); 601 ext4_mark_inode_dirty(handle, inode); 602 ext4_ext_invalidate_cache(inode); 603 return 0; 604 } 605 606 struct ext4_ext_path * 607 ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, 608 struct ext4_ext_path *path) 609 { 610 struct ext4_extent_header *eh; 611 struct buffer_head *bh; 612 short int depth, i, ppos = 0, alloc = 0; 613 614 eh = ext_inode_hdr(inode); 615 depth = ext_depth(inode); 616 617 /* account possible depth increase */ 618 if (!path) { 619 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), 620 GFP_NOFS); 621 if (!path) 622 return ERR_PTR(-ENOMEM); 623 alloc = 1; 624 } 625 path[0].p_hdr = eh; 626 path[0].p_bh = NULL; 627 628 i = depth; 629 /* walk through the tree */ 630 while (i) { 631 int need_to_validate = 0; 632 633 ext_debug("depth %d: num %d, max %d\n", 634 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 635 636 ext4_ext_binsearch_idx(inode, path + ppos, block); 637 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); 638 path[ppos].p_depth = i; 639 path[ppos].p_ext = NULL; 640 641 bh = sb_getblk(inode->i_sb, path[ppos].p_block); 642 if (unlikely(!bh)) 643 goto err; 644 if (!bh_uptodate_or_lock(bh)) { 645 if (bh_submit_read(bh) < 0) { 646 put_bh(bh); 647 goto err; 648 } 649 /* validate the extent entries */ 650 need_to_validate = 1; 651 } 652 eh = ext_block_hdr(bh); 653 ppos++; 654 if (unlikely(ppos > depth)) { 655 put_bh(bh); 656 EXT4_ERROR_INODE(inode, 657 "ppos %d > depth %d", ppos, depth); 658 goto err; 659 } 660 path[ppos].p_bh = bh; 661 path[ppos].p_hdr = eh; 662 i--; 663 664 if (need_to_validate && ext4_ext_check(inode, eh, i)) 665 goto err; 666 } 667 668 path[ppos].p_depth = i; 669 path[ppos].p_ext = NULL; 670 path[ppos].p_idx = NULL; 671 672 /* find extent */ 673 ext4_ext_binsearch(inode, path + ppos, block); 674 /* if not an empty leaf */ 675 if (path[ppos].p_ext) 676 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); 677 678 ext4_ext_show_path(inode, path); 679 680 return path; 681 682 err: 683 ext4_ext_drop_refs(path); 684 if (alloc) 685 kfree(path); 686 return ERR_PTR(-EIO); 687 } 688 689 /* 690 * ext4_ext_insert_index: 691 * insert new index [@logical;@ptr] into the block at @curp; 692 * check where to insert: before @curp or after @curp 693 */ 694 static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, 695 struct ext4_ext_path *curp, 696 int logical, ext4_fsblk_t ptr) 697 { 698 struct ext4_extent_idx *ix; 699 int len, err; 700 701 err = ext4_ext_get_access(handle, inode, curp); 702 if (err) 703 return err; 704 705 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) { 706 EXT4_ERROR_INODE(inode, 707 "logical %d == ei_block %d!", 708 logical, le32_to_cpu(curp->p_idx->ei_block)); 709 return -EIO; 710 } 711 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; 712 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 713 /* insert after */ 714 if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { 715 len = (len - 1) * sizeof(struct ext4_extent_idx); 716 len = len < 0 ? 0 : len; 717 ext_debug("insert new index %d after: %llu. " 718 "move %d from 0x%p to 0x%p\n", 719 logical, ptr, len, 720 (curp->p_idx + 1), (curp->p_idx + 2)); 721 memmove(curp->p_idx + 2, curp->p_idx + 1, len); 722 } 723 ix = curp->p_idx + 1; 724 } else { 725 /* insert before */ 726 len = len * sizeof(struct ext4_extent_idx); 727 len = len < 0 ? 0 : len; 728 ext_debug("insert new index %d before: %llu. " 729 "move %d from 0x%p to 0x%p\n", 730 logical, ptr, len, 731 curp->p_idx, (curp->p_idx + 1)); 732 memmove(curp->p_idx + 1, curp->p_idx, len); 733 ix = curp->p_idx; 734 } 735 736 ix->ei_block = cpu_to_le32(logical); 737 ext4_idx_store_pblock(ix, ptr); 738 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 739 740 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) 741 > le16_to_cpu(curp->p_hdr->eh_max))) { 742 EXT4_ERROR_INODE(inode, 743 "logical %d == ei_block %d!", 744 logical, le32_to_cpu(curp->p_idx->ei_block)); 745 return -EIO; 746 } 747 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { 748 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); 749 return -EIO; 750 } 751 752 err = ext4_ext_dirty(handle, inode, curp); 753 ext4_std_error(inode->i_sb, err); 754 755 return err; 756 } 757 758 /* 759 * ext4_ext_split: 760 * inserts new subtree into the path, using free index entry 761 * at depth @at: 762 * - allocates all needed blocks (new leaf and all intermediate index blocks) 763 * - makes decision where to split 764 * - moves remaining extents and index entries (right to the split point) 765 * into the newly allocated blocks 766 * - initializes subtree 767 */ 768 static int ext4_ext_split(handle_t *handle, struct inode *inode, 769 struct ext4_ext_path *path, 770 struct ext4_extent *newext, int at) 771 { 772 struct buffer_head *bh = NULL; 773 int depth = ext_depth(inode); 774 struct ext4_extent_header *neh; 775 struct ext4_extent_idx *fidx; 776 struct ext4_extent *ex; 777 int i = at, k, m, a; 778 ext4_fsblk_t newblock, oldblock; 779 __le32 border; 780 ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ 781 int err = 0; 782 783 /* make decision: where to split? */ 784 /* FIXME: now decision is simplest: at current extent */ 785 786 /* if current leaf will be split, then we should use 787 * border from split point */ 788 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { 789 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); 790 return -EIO; 791 } 792 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { 793 border = path[depth].p_ext[1].ee_block; 794 ext_debug("leaf will be split." 795 " next leaf starts at %d\n", 796 le32_to_cpu(border)); 797 } else { 798 border = newext->ee_block; 799 ext_debug("leaf will be added." 800 " next leaf starts at %d\n", 801 le32_to_cpu(border)); 802 } 803 804 /* 805 * If error occurs, then we break processing 806 * and mark filesystem read-only. index won't 807 * be inserted and tree will be in consistent 808 * state. Next mount will repair buffers too. 809 */ 810 811 /* 812 * Get array to track all allocated blocks. 813 * We need this to handle errors and free blocks 814 * upon them. 815 */ 816 ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); 817 if (!ablocks) 818 return -ENOMEM; 819 820 /* allocate all needed blocks */ 821 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); 822 for (a = 0; a < depth - at; a++) { 823 newblock = ext4_ext_new_meta_block(handle, inode, path, 824 newext, &err); 825 if (newblock == 0) 826 goto cleanup; 827 ablocks[a] = newblock; 828 } 829 830 /* initialize new leaf */ 831 newblock = ablocks[--a]; 832 if (unlikely(newblock == 0)) { 833 EXT4_ERROR_INODE(inode, "newblock == 0!"); 834 err = -EIO; 835 goto cleanup; 836 } 837 bh = sb_getblk(inode->i_sb, newblock); 838 if (!bh) { 839 err = -EIO; 840 goto cleanup; 841 } 842 lock_buffer(bh); 843 844 err = ext4_journal_get_create_access(handle, bh); 845 if (err) 846 goto cleanup; 847 848 neh = ext_block_hdr(bh); 849 neh->eh_entries = 0; 850 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 851 neh->eh_magic = EXT4_EXT_MAGIC; 852 neh->eh_depth = 0; 853 ex = EXT_FIRST_EXTENT(neh); 854 855 /* move remainder of path[depth] to the new leaf */ 856 if (unlikely(path[depth].p_hdr->eh_entries != 857 path[depth].p_hdr->eh_max)) { 858 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", 859 path[depth].p_hdr->eh_entries, 860 path[depth].p_hdr->eh_max); 861 err = -EIO; 862 goto cleanup; 863 } 864 /* start copy from next extent */ 865 /* TODO: we could do it by single memmove */ 866 m = 0; 867 path[depth].p_ext++; 868 while (path[depth].p_ext <= 869 EXT_MAX_EXTENT(path[depth].p_hdr)) { 870 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", 871 le32_to_cpu(path[depth].p_ext->ee_block), 872 ext4_ext_pblock(path[depth].p_ext), 873 ext4_ext_is_uninitialized(path[depth].p_ext), 874 ext4_ext_get_actual_len(path[depth].p_ext), 875 newblock); 876 /*memmove(ex++, path[depth].p_ext++, 877 sizeof(struct ext4_extent)); 878 neh->eh_entries++;*/ 879 path[depth].p_ext++; 880 m++; 881 } 882 if (m) { 883 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); 884 le16_add_cpu(&neh->eh_entries, m); 885 } 886 887 set_buffer_uptodate(bh); 888 unlock_buffer(bh); 889 890 err = ext4_handle_dirty_metadata(handle, inode, bh); 891 if (err) 892 goto cleanup; 893 brelse(bh); 894 bh = NULL; 895 896 /* correct old leaf */ 897 if (m) { 898 err = ext4_ext_get_access(handle, inode, path + depth); 899 if (err) 900 goto cleanup; 901 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m); 902 err = ext4_ext_dirty(handle, inode, path + depth); 903 if (err) 904 goto cleanup; 905 906 } 907 908 /* create intermediate indexes */ 909 k = depth - at - 1; 910 if (unlikely(k < 0)) { 911 EXT4_ERROR_INODE(inode, "k %d < 0!", k); 912 err = -EIO; 913 goto cleanup; 914 } 915 if (k) 916 ext_debug("create %d intermediate indices\n", k); 917 /* insert new index into current index block */ 918 /* current depth stored in i var */ 919 i = depth - 1; 920 while (k--) { 921 oldblock = newblock; 922 newblock = ablocks[--a]; 923 bh = sb_getblk(inode->i_sb, newblock); 924 if (!bh) { 925 err = -EIO; 926 goto cleanup; 927 } 928 lock_buffer(bh); 929 930 err = ext4_journal_get_create_access(handle, bh); 931 if (err) 932 goto cleanup; 933 934 neh = ext_block_hdr(bh); 935 neh->eh_entries = cpu_to_le16(1); 936 neh->eh_magic = EXT4_EXT_MAGIC; 937 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); 938 neh->eh_depth = cpu_to_le16(depth - i); 939 fidx = EXT_FIRST_INDEX(neh); 940 fidx->ei_block = border; 941 ext4_idx_store_pblock(fidx, oldblock); 942 943 ext_debug("int.index at %d (block %llu): %u -> %llu\n", 944 i, newblock, le32_to_cpu(border), oldblock); 945 /* copy indexes */ 946 m = 0; 947 path[i].p_idx++; 948 949 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, 950 EXT_MAX_INDEX(path[i].p_hdr)); 951 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != 952 EXT_LAST_INDEX(path[i].p_hdr))) { 953 EXT4_ERROR_INODE(inode, 954 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", 955 le32_to_cpu(path[i].p_ext->ee_block)); 956 err = -EIO; 957 goto cleanup; 958 } 959 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 960 ext_debug("%d: move %d:%llu in new index %llu\n", i, 961 le32_to_cpu(path[i].p_idx->ei_block), 962 ext4_idx_pblock(path[i].p_idx), 963 newblock); 964 /*memmove(++fidx, path[i].p_idx++, 965 sizeof(struct ext4_extent_idx)); 966 neh->eh_entries++; 967 BUG_ON(neh->eh_entries > neh->eh_max);*/ 968 path[i].p_idx++; 969 m++; 970 } 971 if (m) { 972 memmove(++fidx, path[i].p_idx - m, 973 sizeof(struct ext4_extent_idx) * m); 974 le16_add_cpu(&neh->eh_entries, m); 975 } 976 set_buffer_uptodate(bh); 977 unlock_buffer(bh); 978 979 err = ext4_handle_dirty_metadata(handle, inode, bh); 980 if (err) 981 goto cleanup; 982 brelse(bh); 983 bh = NULL; 984 985 /* correct old index */ 986 if (m) { 987 err = ext4_ext_get_access(handle, inode, path + i); 988 if (err) 989 goto cleanup; 990 le16_add_cpu(&path[i].p_hdr->eh_entries, -m); 991 err = ext4_ext_dirty(handle, inode, path + i); 992 if (err) 993 goto cleanup; 994 } 995 996 i--; 997 } 998 999 /* insert new index */ 1000 err = ext4_ext_insert_index(handle, inode, path + at, 1001 le32_to_cpu(border), newblock); 1002 1003 cleanup: 1004 if (bh) { 1005 if (buffer_locked(bh)) 1006 unlock_buffer(bh); 1007 brelse(bh); 1008 } 1009 1010 if (err) { 1011 /* free all allocated blocks in error case */ 1012 for (i = 0; i < depth; i++) { 1013 if (!ablocks[i]) 1014 continue; 1015 ext4_free_blocks(handle, inode, 0, ablocks[i], 1, 1016 EXT4_FREE_BLOCKS_METADATA); 1017 } 1018 } 1019 kfree(ablocks); 1020 1021 return err; 1022 } 1023 1024 /* 1025 * ext4_ext_grow_indepth: 1026 * implements tree growing procedure: 1027 * - allocates new block 1028 * - moves top-level data (index block or leaf) into the new block 1029 * - initializes new top-level, creating index that points to the 1030 * just created block 1031 */ 1032 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, 1033 struct ext4_ext_path *path, 1034 struct ext4_extent *newext) 1035 { 1036 struct ext4_ext_path *curp = path; 1037 struct ext4_extent_header *neh; 1038 struct buffer_head *bh; 1039 ext4_fsblk_t newblock; 1040 int err = 0; 1041 1042 newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); 1043 if (newblock == 0) 1044 return err; 1045 1046 bh = sb_getblk(inode->i_sb, newblock); 1047 if (!bh) { 1048 err = -EIO; 1049 ext4_std_error(inode->i_sb, err); 1050 return err; 1051 } 1052 lock_buffer(bh); 1053 1054 err = ext4_journal_get_create_access(handle, bh); 1055 if (err) { 1056 unlock_buffer(bh); 1057 goto out; 1058 } 1059 1060 /* move top-level index/leaf into new block */ 1061 memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data)); 1062 1063 /* set size of new block */ 1064 neh = ext_block_hdr(bh); 1065 /* old root could have indexes or leaves 1066 * so calculate e_max right way */ 1067 if (ext_depth(inode)) 1068 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); 1069 else 1070 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 1071 neh->eh_magic = EXT4_EXT_MAGIC; 1072 set_buffer_uptodate(bh); 1073 unlock_buffer(bh); 1074 1075 err = ext4_handle_dirty_metadata(handle, inode, bh); 1076 if (err) 1077 goto out; 1078 1079 /* create index in new top-level index: num,max,pointer */ 1080 err = ext4_ext_get_access(handle, inode, curp); 1081 if (err) 1082 goto out; 1083 1084 curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; 1085 curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); 1086 curp->p_hdr->eh_entries = cpu_to_le16(1); 1087 curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); 1088 1089 if (path[0].p_hdr->eh_depth) 1090 curp->p_idx->ei_block = 1091 EXT_FIRST_INDEX(path[0].p_hdr)->ei_block; 1092 else 1093 curp->p_idx->ei_block = 1094 EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; 1095 ext4_idx_store_pblock(curp->p_idx, newblock); 1096 1097 neh = ext_inode_hdr(inode); 1098 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1099 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), 1100 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1101 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1102 1103 neh->eh_depth = cpu_to_le16(path->p_depth + 1); 1104 err = ext4_ext_dirty(handle, inode, curp); 1105 out: 1106 brelse(bh); 1107 1108 return err; 1109 } 1110 1111 /* 1112 * ext4_ext_create_new_leaf: 1113 * finds empty index and adds new leaf. 1114 * if no free index is found, then it requests in-depth growing. 1115 */ 1116 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, 1117 struct ext4_ext_path *path, 1118 struct ext4_extent *newext) 1119 { 1120 struct ext4_ext_path *curp; 1121 int depth, i, err = 0; 1122 1123 repeat: 1124 i = depth = ext_depth(inode); 1125 1126 /* walk up to the tree and look for free index entry */ 1127 curp = path + depth; 1128 while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { 1129 i--; 1130 curp--; 1131 } 1132 1133 /* we use already allocated block for index block, 1134 * so subsequent data blocks should be contiguous */ 1135 if (EXT_HAS_FREE_INDEX(curp)) { 1136 /* if we found index with free entry, then use that 1137 * entry: create all needed subtree and add new leaf */ 1138 err = ext4_ext_split(handle, inode, path, newext, i); 1139 if (err) 1140 goto out; 1141 1142 /* refill path */ 1143 ext4_ext_drop_refs(path); 1144 path = ext4_ext_find_extent(inode, 1145 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1146 path); 1147 if (IS_ERR(path)) 1148 err = PTR_ERR(path); 1149 } else { 1150 /* tree is full, time to grow in depth */ 1151 err = ext4_ext_grow_indepth(handle, inode, path, newext); 1152 if (err) 1153 goto out; 1154 1155 /* refill path */ 1156 ext4_ext_drop_refs(path); 1157 path = ext4_ext_find_extent(inode, 1158 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1159 path); 1160 if (IS_ERR(path)) { 1161 err = PTR_ERR(path); 1162 goto out; 1163 } 1164 1165 /* 1166 * only first (depth 0 -> 1) produces free space; 1167 * in all other cases we have to split the grown tree 1168 */ 1169 depth = ext_depth(inode); 1170 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { 1171 /* now we need to split */ 1172 goto repeat; 1173 } 1174 } 1175 1176 out: 1177 return err; 1178 } 1179 1180 /* 1181 * search the closest allocated block to the left for *logical 1182 * and returns it at @logical + it's physical address at @phys 1183 * if *logical is the smallest allocated block, the function 1184 * returns 0 at @phys 1185 * return value contains 0 (success) or error code 1186 */ 1187 static int ext4_ext_search_left(struct inode *inode, 1188 struct ext4_ext_path *path, 1189 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1190 { 1191 struct ext4_extent_idx *ix; 1192 struct ext4_extent *ex; 1193 int depth, ee_len; 1194 1195 if (unlikely(path == NULL)) { 1196 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); 1197 return -EIO; 1198 } 1199 depth = path->p_depth; 1200 *phys = 0; 1201 1202 if (depth == 0 && path->p_ext == NULL) 1203 return 0; 1204 1205 /* usually extent in the path covers blocks smaller 1206 * then *logical, but it can be that extent is the 1207 * first one in the file */ 1208 1209 ex = path[depth].p_ext; 1210 ee_len = ext4_ext_get_actual_len(ex); 1211 if (*logical < le32_to_cpu(ex->ee_block)) { 1212 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { 1213 EXT4_ERROR_INODE(inode, 1214 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", 1215 *logical, le32_to_cpu(ex->ee_block)); 1216 return -EIO; 1217 } 1218 while (--depth >= 0) { 1219 ix = path[depth].p_idx; 1220 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1221 EXT4_ERROR_INODE(inode, 1222 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", 1223 ix != NULL ? ix->ei_block : 0, 1224 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? 1225 EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0, 1226 depth); 1227 return -EIO; 1228 } 1229 } 1230 return 0; 1231 } 1232 1233 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { 1234 EXT4_ERROR_INODE(inode, 1235 "logical %d < ee_block %d + ee_len %d!", 1236 *logical, le32_to_cpu(ex->ee_block), ee_len); 1237 return -EIO; 1238 } 1239 1240 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; 1241 *phys = ext4_ext_pblock(ex) + ee_len - 1; 1242 return 0; 1243 } 1244 1245 /* 1246 * search the closest allocated block to the right for *logical 1247 * and returns it at @logical + it's physical address at @phys 1248 * if *logical is the smallest allocated block, the function 1249 * returns 0 at @phys 1250 * return value contains 0 (success) or error code 1251 */ 1252 static int ext4_ext_search_right(struct inode *inode, 1253 struct ext4_ext_path *path, 1254 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1255 { 1256 struct buffer_head *bh = NULL; 1257 struct ext4_extent_header *eh; 1258 struct ext4_extent_idx *ix; 1259 struct ext4_extent *ex; 1260 ext4_fsblk_t block; 1261 int depth; /* Note, NOT eh_depth; depth from top of tree */ 1262 int ee_len; 1263 1264 if (unlikely(path == NULL)) { 1265 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); 1266 return -EIO; 1267 } 1268 depth = path->p_depth; 1269 *phys = 0; 1270 1271 if (depth == 0 && path->p_ext == NULL) 1272 return 0; 1273 1274 /* usually extent in the path covers blocks smaller 1275 * then *logical, but it can be that extent is the 1276 * first one in the file */ 1277 1278 ex = path[depth].p_ext; 1279 ee_len = ext4_ext_get_actual_len(ex); 1280 if (*logical < le32_to_cpu(ex->ee_block)) { 1281 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { 1282 EXT4_ERROR_INODE(inode, 1283 "first_extent(path[%d].p_hdr) != ex", 1284 depth); 1285 return -EIO; 1286 } 1287 while (--depth >= 0) { 1288 ix = path[depth].p_idx; 1289 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1290 EXT4_ERROR_INODE(inode, 1291 "ix != EXT_FIRST_INDEX *logical %d!", 1292 *logical); 1293 return -EIO; 1294 } 1295 } 1296 *logical = le32_to_cpu(ex->ee_block); 1297 *phys = ext4_ext_pblock(ex); 1298 return 0; 1299 } 1300 1301 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { 1302 EXT4_ERROR_INODE(inode, 1303 "logical %d < ee_block %d + ee_len %d!", 1304 *logical, le32_to_cpu(ex->ee_block), ee_len); 1305 return -EIO; 1306 } 1307 1308 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { 1309 /* next allocated block in this leaf */ 1310 ex++; 1311 *logical = le32_to_cpu(ex->ee_block); 1312 *phys = ext4_ext_pblock(ex); 1313 return 0; 1314 } 1315 1316 /* go up and search for index to the right */ 1317 while (--depth >= 0) { 1318 ix = path[depth].p_idx; 1319 if (ix != EXT_LAST_INDEX(path[depth].p_hdr)) 1320 goto got_index; 1321 } 1322 1323 /* we've gone up to the root and found no index to the right */ 1324 return 0; 1325 1326 got_index: 1327 /* we've found index to the right, let's 1328 * follow it and find the closest allocated 1329 * block to the right */ 1330 ix++; 1331 block = ext4_idx_pblock(ix); 1332 while (++depth < path->p_depth) { 1333 bh = sb_bread(inode->i_sb, block); 1334 if (bh == NULL) 1335 return -EIO; 1336 eh = ext_block_hdr(bh); 1337 /* subtract from p_depth to get proper eh_depth */ 1338 if (ext4_ext_check(inode, eh, path->p_depth - depth)) { 1339 put_bh(bh); 1340 return -EIO; 1341 } 1342 ix = EXT_FIRST_INDEX(eh); 1343 block = ext4_idx_pblock(ix); 1344 put_bh(bh); 1345 } 1346 1347 bh = sb_bread(inode->i_sb, block); 1348 if (bh == NULL) 1349 return -EIO; 1350 eh = ext_block_hdr(bh); 1351 if (ext4_ext_check(inode, eh, path->p_depth - depth)) { 1352 put_bh(bh); 1353 return -EIO; 1354 } 1355 ex = EXT_FIRST_EXTENT(eh); 1356 *logical = le32_to_cpu(ex->ee_block); 1357 *phys = ext4_ext_pblock(ex); 1358 put_bh(bh); 1359 return 0; 1360 } 1361 1362 /* 1363 * ext4_ext_next_allocated_block: 1364 * returns allocated block in subsequent extent or EXT_MAX_BLOCK. 1365 * NOTE: it considers block number from index entry as 1366 * allocated block. Thus, index entries have to be consistent 1367 * with leaves. 1368 */ 1369 static ext4_lblk_t 1370 ext4_ext_next_allocated_block(struct ext4_ext_path *path) 1371 { 1372 int depth; 1373 1374 BUG_ON(path == NULL); 1375 depth = path->p_depth; 1376 1377 if (depth == 0 && path->p_ext == NULL) 1378 return EXT_MAX_BLOCK; 1379 1380 while (depth >= 0) { 1381 if (depth == path->p_depth) { 1382 /* leaf */ 1383 if (path[depth].p_ext != 1384 EXT_LAST_EXTENT(path[depth].p_hdr)) 1385 return le32_to_cpu(path[depth].p_ext[1].ee_block); 1386 } else { 1387 /* index */ 1388 if (path[depth].p_idx != 1389 EXT_LAST_INDEX(path[depth].p_hdr)) 1390 return le32_to_cpu(path[depth].p_idx[1].ei_block); 1391 } 1392 depth--; 1393 } 1394 1395 return EXT_MAX_BLOCK; 1396 } 1397 1398 /* 1399 * ext4_ext_next_leaf_block: 1400 * returns first allocated block from next leaf or EXT_MAX_BLOCK 1401 */ 1402 static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, 1403 struct ext4_ext_path *path) 1404 { 1405 int depth; 1406 1407 BUG_ON(path == NULL); 1408 depth = path->p_depth; 1409 1410 /* zero-tree has no leaf blocks at all */ 1411 if (depth == 0) 1412 return EXT_MAX_BLOCK; 1413 1414 /* go to index block */ 1415 depth--; 1416 1417 while (depth >= 0) { 1418 if (path[depth].p_idx != 1419 EXT_LAST_INDEX(path[depth].p_hdr)) 1420 return (ext4_lblk_t) 1421 le32_to_cpu(path[depth].p_idx[1].ei_block); 1422 depth--; 1423 } 1424 1425 return EXT_MAX_BLOCK; 1426 } 1427 1428 /* 1429 * ext4_ext_correct_indexes: 1430 * if leaf gets modified and modified extent is first in the leaf, 1431 * then we have to correct all indexes above. 1432 * TODO: do we need to correct tree in all cases? 1433 */ 1434 static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, 1435 struct ext4_ext_path *path) 1436 { 1437 struct ext4_extent_header *eh; 1438 int depth = ext_depth(inode); 1439 struct ext4_extent *ex; 1440 __le32 border; 1441 int k, err = 0; 1442 1443 eh = path[depth].p_hdr; 1444 ex = path[depth].p_ext; 1445 1446 if (unlikely(ex == NULL || eh == NULL)) { 1447 EXT4_ERROR_INODE(inode, 1448 "ex %p == NULL or eh %p == NULL", ex, eh); 1449 return -EIO; 1450 } 1451 1452 if (depth == 0) { 1453 /* there is no tree at all */ 1454 return 0; 1455 } 1456 1457 if (ex != EXT_FIRST_EXTENT(eh)) { 1458 /* we correct tree if first leaf got modified only */ 1459 return 0; 1460 } 1461 1462 /* 1463 * TODO: we need correction if border is smaller than current one 1464 */ 1465 k = depth - 1; 1466 border = path[depth].p_ext->ee_block; 1467 err = ext4_ext_get_access(handle, inode, path + k); 1468 if (err) 1469 return err; 1470 path[k].p_idx->ei_block = border; 1471 err = ext4_ext_dirty(handle, inode, path + k); 1472 if (err) 1473 return err; 1474 1475 while (k--) { 1476 /* change all left-side indexes */ 1477 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) 1478 break; 1479 err = ext4_ext_get_access(handle, inode, path + k); 1480 if (err) 1481 break; 1482 path[k].p_idx->ei_block = border; 1483 err = ext4_ext_dirty(handle, inode, path + k); 1484 if (err) 1485 break; 1486 } 1487 1488 return err; 1489 } 1490 1491 int 1492 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, 1493 struct ext4_extent *ex2) 1494 { 1495 unsigned short ext1_ee_len, ext2_ee_len, max_len; 1496 1497 /* 1498 * Make sure that either both extents are uninitialized, or 1499 * both are _not_. 1500 */ 1501 if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) 1502 return 0; 1503 1504 if (ext4_ext_is_uninitialized(ex1)) 1505 max_len = EXT_UNINIT_MAX_LEN; 1506 else 1507 max_len = EXT_INIT_MAX_LEN; 1508 1509 ext1_ee_len = ext4_ext_get_actual_len(ex1); 1510 ext2_ee_len = ext4_ext_get_actual_len(ex2); 1511 1512 if (le32_to_cpu(ex1->ee_block) + ext1_ee_len != 1513 le32_to_cpu(ex2->ee_block)) 1514 return 0; 1515 1516 /* 1517 * To allow future support for preallocated extents to be added 1518 * as an RO_COMPAT feature, refuse to merge to extents if 1519 * this can result in the top bit of ee_len being set. 1520 */ 1521 if (ext1_ee_len + ext2_ee_len > max_len) 1522 return 0; 1523 #ifdef AGGRESSIVE_TEST 1524 if (ext1_ee_len >= 4) 1525 return 0; 1526 #endif 1527 1528 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2)) 1529 return 1; 1530 return 0; 1531 } 1532 1533 /* 1534 * This function tries to merge the "ex" extent to the next extent in the tree. 1535 * It always tries to merge towards right. If you want to merge towards 1536 * left, pass "ex - 1" as argument instead of "ex". 1537 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns 1538 * 1 if they got merged. 1539 */ 1540 static int ext4_ext_try_to_merge(struct inode *inode, 1541 struct ext4_ext_path *path, 1542 struct ext4_extent *ex) 1543 { 1544 struct ext4_extent_header *eh; 1545 unsigned int depth, len; 1546 int merge_done = 0; 1547 int uninitialized = 0; 1548 1549 depth = ext_depth(inode); 1550 BUG_ON(path[depth].p_hdr == NULL); 1551 eh = path[depth].p_hdr; 1552 1553 while (ex < EXT_LAST_EXTENT(eh)) { 1554 if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) 1555 break; 1556 /* merge with next extent! */ 1557 if (ext4_ext_is_uninitialized(ex)) 1558 uninitialized = 1; 1559 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1560 + ext4_ext_get_actual_len(ex + 1)); 1561 if (uninitialized) 1562 ext4_ext_mark_uninitialized(ex); 1563 1564 if (ex + 1 < EXT_LAST_EXTENT(eh)) { 1565 len = (EXT_LAST_EXTENT(eh) - ex - 1) 1566 * sizeof(struct ext4_extent); 1567 memmove(ex + 1, ex + 2, len); 1568 } 1569 le16_add_cpu(&eh->eh_entries, -1); 1570 merge_done = 1; 1571 WARN_ON(eh->eh_entries == 0); 1572 if (!eh->eh_entries) 1573 EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); 1574 } 1575 1576 return merge_done; 1577 } 1578 1579 /* 1580 * check if a portion of the "newext" extent overlaps with an 1581 * existing extent. 1582 * 1583 * If there is an overlap discovered, it updates the length of the newext 1584 * such that there will be no overlap, and then returns 1. 1585 * If there is no overlap found, it returns 0. 1586 */ 1587 static unsigned int ext4_ext_check_overlap(struct inode *inode, 1588 struct ext4_extent *newext, 1589 struct ext4_ext_path *path) 1590 { 1591 ext4_lblk_t b1, b2; 1592 unsigned int depth, len1; 1593 unsigned int ret = 0; 1594 1595 b1 = le32_to_cpu(newext->ee_block); 1596 len1 = ext4_ext_get_actual_len(newext); 1597 depth = ext_depth(inode); 1598 if (!path[depth].p_ext) 1599 goto out; 1600 b2 = le32_to_cpu(path[depth].p_ext->ee_block); 1601 1602 /* 1603 * get the next allocated block if the extent in the path 1604 * is before the requested block(s) 1605 */ 1606 if (b2 < b1) { 1607 b2 = ext4_ext_next_allocated_block(path); 1608 if (b2 == EXT_MAX_BLOCK) 1609 goto out; 1610 } 1611 1612 /* check for wrap through zero on extent logical start block*/ 1613 if (b1 + len1 < b1) { 1614 len1 = EXT_MAX_BLOCK - b1; 1615 newext->ee_len = cpu_to_le16(len1); 1616 ret = 1; 1617 } 1618 1619 /* check for overlap */ 1620 if (b1 + len1 > b2) { 1621 newext->ee_len = cpu_to_le16(b2 - b1); 1622 ret = 1; 1623 } 1624 out: 1625 return ret; 1626 } 1627 1628 /* 1629 * ext4_ext_insert_extent: 1630 * tries to merge requsted extent into the existing extent or 1631 * inserts requested extent as new one into the tree, 1632 * creating new leaf in the no-space case. 1633 */ 1634 int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, 1635 struct ext4_ext_path *path, 1636 struct ext4_extent *newext, int flag) 1637 { 1638 struct ext4_extent_header *eh; 1639 struct ext4_extent *ex, *fex; 1640 struct ext4_extent *nearex; /* nearest extent */ 1641 struct ext4_ext_path *npath = NULL; 1642 int depth, len, err; 1643 ext4_lblk_t next; 1644 unsigned uninitialized = 0; 1645 1646 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1647 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1648 return -EIO; 1649 } 1650 depth = ext_depth(inode); 1651 ex = path[depth].p_ext; 1652 if (unlikely(path[depth].p_hdr == NULL)) { 1653 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 1654 return -EIO; 1655 } 1656 1657 /* try to insert block into found extent and return */ 1658 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) 1659 && ext4_can_extents_be_merged(inode, ex, newext)) { 1660 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1661 ext4_ext_is_uninitialized(newext), 1662 ext4_ext_get_actual_len(newext), 1663 le32_to_cpu(ex->ee_block), 1664 ext4_ext_is_uninitialized(ex), 1665 ext4_ext_get_actual_len(ex), 1666 ext4_ext_pblock(ex)); 1667 err = ext4_ext_get_access(handle, inode, path + depth); 1668 if (err) 1669 return err; 1670 1671 /* 1672 * ext4_can_extents_be_merged should have checked that either 1673 * both extents are uninitialized, or both aren't. Thus we 1674 * need to check only one of them here. 1675 */ 1676 if (ext4_ext_is_uninitialized(ex)) 1677 uninitialized = 1; 1678 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1679 + ext4_ext_get_actual_len(newext)); 1680 if (uninitialized) 1681 ext4_ext_mark_uninitialized(ex); 1682 eh = path[depth].p_hdr; 1683 nearex = ex; 1684 goto merge; 1685 } 1686 1687 repeat: 1688 depth = ext_depth(inode); 1689 eh = path[depth].p_hdr; 1690 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) 1691 goto has_space; 1692 1693 /* probably next leaf has space for us? */ 1694 fex = EXT_LAST_EXTENT(eh); 1695 next = ext4_ext_next_leaf_block(inode, path); 1696 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) 1697 && next != EXT_MAX_BLOCK) { 1698 ext_debug("next leaf block - %d\n", next); 1699 BUG_ON(npath != NULL); 1700 npath = ext4_ext_find_extent(inode, next, NULL); 1701 if (IS_ERR(npath)) 1702 return PTR_ERR(npath); 1703 BUG_ON(npath->p_depth != path->p_depth); 1704 eh = npath[depth].p_hdr; 1705 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { 1706 ext_debug("next leaf isnt full(%d)\n", 1707 le16_to_cpu(eh->eh_entries)); 1708 path = npath; 1709 goto repeat; 1710 } 1711 ext_debug("next leaf has no free space(%d,%d)\n", 1712 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 1713 } 1714 1715 /* 1716 * There is no free space in the found leaf. 1717 * We're gonna add a new leaf in the tree. 1718 */ 1719 err = ext4_ext_create_new_leaf(handle, inode, path, newext); 1720 if (err) 1721 goto cleanup; 1722 depth = ext_depth(inode); 1723 eh = path[depth].p_hdr; 1724 1725 has_space: 1726 nearex = path[depth].p_ext; 1727 1728 err = ext4_ext_get_access(handle, inode, path + depth); 1729 if (err) 1730 goto cleanup; 1731 1732 if (!nearex) { 1733 /* there is no extent in this leaf, create first one */ 1734 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", 1735 le32_to_cpu(newext->ee_block), 1736 ext4_ext_pblock(newext), 1737 ext4_ext_is_uninitialized(newext), 1738 ext4_ext_get_actual_len(newext)); 1739 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1740 } else if (le32_to_cpu(newext->ee_block) 1741 > le32_to_cpu(nearex->ee_block)) { 1742 /* BUG_ON(newext->ee_block == nearex->ee_block); */ 1743 if (nearex != EXT_LAST_EXTENT(eh)) { 1744 len = EXT_MAX_EXTENT(eh) - nearex; 1745 len = (len - 1) * sizeof(struct ext4_extent); 1746 len = len < 0 ? 0 : len; 1747 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " 1748 "move %d from 0x%p to 0x%p\n", 1749 le32_to_cpu(newext->ee_block), 1750 ext4_ext_pblock(newext), 1751 ext4_ext_is_uninitialized(newext), 1752 ext4_ext_get_actual_len(newext), 1753 nearex, len, nearex + 1, nearex + 2); 1754 memmove(nearex + 2, nearex + 1, len); 1755 } 1756 path[depth].p_ext = nearex + 1; 1757 } else { 1758 BUG_ON(newext->ee_block == nearex->ee_block); 1759 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); 1760 len = len < 0 ? 0 : len; 1761 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " 1762 "move %d from 0x%p to 0x%p\n", 1763 le32_to_cpu(newext->ee_block), 1764 ext4_ext_pblock(newext), 1765 ext4_ext_is_uninitialized(newext), 1766 ext4_ext_get_actual_len(newext), 1767 nearex, len, nearex + 1, nearex + 2); 1768 memmove(nearex + 1, nearex, len); 1769 path[depth].p_ext = nearex; 1770 } 1771 1772 le16_add_cpu(&eh->eh_entries, 1); 1773 nearex = path[depth].p_ext; 1774 nearex->ee_block = newext->ee_block; 1775 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); 1776 nearex->ee_len = newext->ee_len; 1777 1778 merge: 1779 /* try to merge extents to the right */ 1780 if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) 1781 ext4_ext_try_to_merge(inode, path, nearex); 1782 1783 /* try to merge extents to the left */ 1784 1785 /* time to correct all indexes above */ 1786 err = ext4_ext_correct_indexes(handle, inode, path); 1787 if (err) 1788 goto cleanup; 1789 1790 err = ext4_ext_dirty(handle, inode, path + depth); 1791 1792 cleanup: 1793 if (npath) { 1794 ext4_ext_drop_refs(npath); 1795 kfree(npath); 1796 } 1797 ext4_ext_invalidate_cache(inode); 1798 return err; 1799 } 1800 1801 static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, 1802 ext4_lblk_t num, ext_prepare_callback func, 1803 void *cbdata) 1804 { 1805 struct ext4_ext_path *path = NULL; 1806 struct ext4_ext_cache cbex; 1807 struct ext4_extent *ex; 1808 ext4_lblk_t next, start = 0, end = 0; 1809 ext4_lblk_t last = block + num; 1810 int depth, exists, err = 0; 1811 1812 BUG_ON(func == NULL); 1813 BUG_ON(inode == NULL); 1814 1815 while (block < last && block != EXT_MAX_BLOCK) { 1816 num = last - block; 1817 /* find extent for this block */ 1818 down_read(&EXT4_I(inode)->i_data_sem); 1819 path = ext4_ext_find_extent(inode, block, path); 1820 up_read(&EXT4_I(inode)->i_data_sem); 1821 if (IS_ERR(path)) { 1822 err = PTR_ERR(path); 1823 path = NULL; 1824 break; 1825 } 1826 1827 depth = ext_depth(inode); 1828 if (unlikely(path[depth].p_hdr == NULL)) { 1829 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 1830 err = -EIO; 1831 break; 1832 } 1833 ex = path[depth].p_ext; 1834 next = ext4_ext_next_allocated_block(path); 1835 1836 exists = 0; 1837 if (!ex) { 1838 /* there is no extent yet, so try to allocate 1839 * all requested space */ 1840 start = block; 1841 end = block + num; 1842 } else if (le32_to_cpu(ex->ee_block) > block) { 1843 /* need to allocate space before found extent */ 1844 start = block; 1845 end = le32_to_cpu(ex->ee_block); 1846 if (block + num < end) 1847 end = block + num; 1848 } else if (block >= le32_to_cpu(ex->ee_block) 1849 + ext4_ext_get_actual_len(ex)) { 1850 /* need to allocate space after found extent */ 1851 start = block; 1852 end = block + num; 1853 if (end >= next) 1854 end = next; 1855 } else if (block >= le32_to_cpu(ex->ee_block)) { 1856 /* 1857 * some part of requested space is covered 1858 * by found extent 1859 */ 1860 start = block; 1861 end = le32_to_cpu(ex->ee_block) 1862 + ext4_ext_get_actual_len(ex); 1863 if (block + num < end) 1864 end = block + num; 1865 exists = 1; 1866 } else { 1867 BUG(); 1868 } 1869 BUG_ON(end <= start); 1870 1871 if (!exists) { 1872 cbex.ec_block = start; 1873 cbex.ec_len = end - start; 1874 cbex.ec_start = 0; 1875 cbex.ec_type = EXT4_EXT_CACHE_GAP; 1876 } else { 1877 cbex.ec_block = le32_to_cpu(ex->ee_block); 1878 cbex.ec_len = ext4_ext_get_actual_len(ex); 1879 cbex.ec_start = ext4_ext_pblock(ex); 1880 cbex.ec_type = EXT4_EXT_CACHE_EXTENT; 1881 } 1882 1883 if (unlikely(cbex.ec_len == 0)) { 1884 EXT4_ERROR_INODE(inode, "cbex.ec_len == 0"); 1885 err = -EIO; 1886 break; 1887 } 1888 err = func(inode, path, &cbex, ex, cbdata); 1889 ext4_ext_drop_refs(path); 1890 1891 if (err < 0) 1892 break; 1893 1894 if (err == EXT_REPEAT) 1895 continue; 1896 else if (err == EXT_BREAK) { 1897 err = 0; 1898 break; 1899 } 1900 1901 if (ext_depth(inode) != depth) { 1902 /* depth was changed. we have to realloc path */ 1903 kfree(path); 1904 path = NULL; 1905 } 1906 1907 block = cbex.ec_block + cbex.ec_len; 1908 } 1909 1910 if (path) { 1911 ext4_ext_drop_refs(path); 1912 kfree(path); 1913 } 1914 1915 return err; 1916 } 1917 1918 static void 1919 ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, 1920 __u32 len, ext4_fsblk_t start, int type) 1921 { 1922 struct ext4_ext_cache *cex; 1923 BUG_ON(len == 0); 1924 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1925 cex = &EXT4_I(inode)->i_cached_extent; 1926 cex->ec_type = type; 1927 cex->ec_block = block; 1928 cex->ec_len = len; 1929 cex->ec_start = start; 1930 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1931 } 1932 1933 /* 1934 * ext4_ext_put_gap_in_cache: 1935 * calculate boundaries of the gap that the requested block fits into 1936 * and cache this gap 1937 */ 1938 static void 1939 ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, 1940 ext4_lblk_t block) 1941 { 1942 int depth = ext_depth(inode); 1943 unsigned long len; 1944 ext4_lblk_t lblock; 1945 struct ext4_extent *ex; 1946 1947 ex = path[depth].p_ext; 1948 if (ex == NULL) { 1949 /* there is no extent yet, so gap is [0;-] */ 1950 lblock = 0; 1951 len = EXT_MAX_BLOCK; 1952 ext_debug("cache gap(whole file):"); 1953 } else if (block < le32_to_cpu(ex->ee_block)) { 1954 lblock = block; 1955 len = le32_to_cpu(ex->ee_block) - block; 1956 ext_debug("cache gap(before): %u [%u:%u]", 1957 block, 1958 le32_to_cpu(ex->ee_block), 1959 ext4_ext_get_actual_len(ex)); 1960 } else if (block >= le32_to_cpu(ex->ee_block) 1961 + ext4_ext_get_actual_len(ex)) { 1962 ext4_lblk_t next; 1963 lblock = le32_to_cpu(ex->ee_block) 1964 + ext4_ext_get_actual_len(ex); 1965 1966 next = ext4_ext_next_allocated_block(path); 1967 ext_debug("cache gap(after): [%u:%u] %u", 1968 le32_to_cpu(ex->ee_block), 1969 ext4_ext_get_actual_len(ex), 1970 block); 1971 BUG_ON(next == lblock); 1972 len = next - lblock; 1973 } else { 1974 lblock = len = 0; 1975 BUG(); 1976 } 1977 1978 ext_debug(" -> %u:%lu\n", lblock, len); 1979 ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); 1980 } 1981 1982 static int 1983 ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, 1984 struct ext4_extent *ex) 1985 { 1986 struct ext4_ext_cache *cex; 1987 int ret = EXT4_EXT_CACHE_NO; 1988 1989 /* 1990 * We borrow i_block_reservation_lock to protect i_cached_extent 1991 */ 1992 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1993 cex = &EXT4_I(inode)->i_cached_extent; 1994 1995 /* has cache valid data? */ 1996 if (cex->ec_type == EXT4_EXT_CACHE_NO) 1997 goto errout; 1998 1999 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && 2000 cex->ec_type != EXT4_EXT_CACHE_EXTENT); 2001 if (in_range(block, cex->ec_block, cex->ec_len)) { 2002 ex->ee_block = cpu_to_le32(cex->ec_block); 2003 ext4_ext_store_pblock(ex, cex->ec_start); 2004 ex->ee_len = cpu_to_le16(cex->ec_len); 2005 ext_debug("%u cached by %u:%u:%llu\n", 2006 block, 2007 cex->ec_block, cex->ec_len, cex->ec_start); 2008 ret = cex->ec_type; 2009 } 2010 errout: 2011 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2012 return ret; 2013 } 2014 2015 /* 2016 * ext4_ext_rm_idx: 2017 * removes index from the index block. 2018 * It's used in truncate case only, thus all requests are for 2019 * last index in the block only. 2020 */ 2021 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, 2022 struct ext4_ext_path *path) 2023 { 2024 int err; 2025 ext4_fsblk_t leaf; 2026 2027 /* free index block */ 2028 path--; 2029 leaf = ext4_idx_pblock(path->p_idx); 2030 if (unlikely(path->p_hdr->eh_entries == 0)) { 2031 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); 2032 return -EIO; 2033 } 2034 err = ext4_ext_get_access(handle, inode, path); 2035 if (err) 2036 return err; 2037 le16_add_cpu(&path->p_hdr->eh_entries, -1); 2038 err = ext4_ext_dirty(handle, inode, path); 2039 if (err) 2040 return err; 2041 ext_debug("index is empty, remove it, free block %llu\n", leaf); 2042 ext4_free_blocks(handle, inode, 0, leaf, 1, 2043 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 2044 return err; 2045 } 2046 2047 /* 2048 * ext4_ext_calc_credits_for_single_extent: 2049 * This routine returns max. credits that needed to insert an extent 2050 * to the extent tree. 2051 * When pass the actual path, the caller should calculate credits 2052 * under i_data_sem. 2053 */ 2054 int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, 2055 struct ext4_ext_path *path) 2056 { 2057 if (path) { 2058 int depth = ext_depth(inode); 2059 int ret = 0; 2060 2061 /* probably there is space in leaf? */ 2062 if (le16_to_cpu(path[depth].p_hdr->eh_entries) 2063 < le16_to_cpu(path[depth].p_hdr->eh_max)) { 2064 2065 /* 2066 * There are some space in the leaf tree, no 2067 * need to account for leaf block credit 2068 * 2069 * bitmaps and block group descriptor blocks 2070 * and other metadat blocks still need to be 2071 * accounted. 2072 */ 2073 /* 1 bitmap, 1 block group descriptor */ 2074 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); 2075 return ret; 2076 } 2077 } 2078 2079 return ext4_chunk_trans_blocks(inode, nrblocks); 2080 } 2081 2082 /* 2083 * How many index/leaf blocks need to change/allocate to modify nrblocks? 2084 * 2085 * if nrblocks are fit in a single extent (chunk flag is 1), then 2086 * in the worse case, each tree level index/leaf need to be changed 2087 * if the tree split due to insert a new extent, then the old tree 2088 * index/leaf need to be updated too 2089 * 2090 * If the nrblocks are discontiguous, they could cause 2091 * the whole tree split more than once, but this is really rare. 2092 */ 2093 int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) 2094 { 2095 int index; 2096 int depth = ext_depth(inode); 2097 2098 if (chunk) 2099 index = depth * 2; 2100 else 2101 index = depth * 3; 2102 2103 return index; 2104 } 2105 2106 static int ext4_remove_blocks(handle_t *handle, struct inode *inode, 2107 struct ext4_extent *ex, 2108 ext4_lblk_t from, ext4_lblk_t to) 2109 { 2110 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2111 int flags = EXT4_FREE_BLOCKS_FORGET; 2112 2113 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2114 flags |= EXT4_FREE_BLOCKS_METADATA; 2115 #ifdef EXTENTS_STATS 2116 { 2117 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2118 spin_lock(&sbi->s_ext_stats_lock); 2119 sbi->s_ext_blocks += ee_len; 2120 sbi->s_ext_extents++; 2121 if (ee_len < sbi->s_ext_min) 2122 sbi->s_ext_min = ee_len; 2123 if (ee_len > sbi->s_ext_max) 2124 sbi->s_ext_max = ee_len; 2125 if (ext_depth(inode) > sbi->s_depth_max) 2126 sbi->s_depth_max = ext_depth(inode); 2127 spin_unlock(&sbi->s_ext_stats_lock); 2128 } 2129 #endif 2130 if (from >= le32_to_cpu(ex->ee_block) 2131 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { 2132 /* tail removal */ 2133 ext4_lblk_t num; 2134 ext4_fsblk_t start; 2135 2136 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2137 start = ext4_ext_pblock(ex) + ee_len - num; 2138 ext_debug("free last %u blocks starting %llu\n", num, start); 2139 ext4_free_blocks(handle, inode, 0, start, num, flags); 2140 } else if (from == le32_to_cpu(ex->ee_block) 2141 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2142 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", 2143 from, to, le32_to_cpu(ex->ee_block), ee_len); 2144 } else { 2145 printk(KERN_INFO "strange request: removal(2) " 2146 "%u-%u from %u:%u\n", 2147 from, to, le32_to_cpu(ex->ee_block), ee_len); 2148 } 2149 return 0; 2150 } 2151 2152 static int 2153 ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, 2154 struct ext4_ext_path *path, ext4_lblk_t start) 2155 { 2156 int err = 0, correct_index = 0; 2157 int depth = ext_depth(inode), credits; 2158 struct ext4_extent_header *eh; 2159 ext4_lblk_t a, b, block; 2160 unsigned num; 2161 ext4_lblk_t ex_ee_block; 2162 unsigned short ex_ee_len; 2163 unsigned uninitialized = 0; 2164 struct ext4_extent *ex; 2165 2166 /* the header must be checked already in ext4_ext_remove_space() */ 2167 ext_debug("truncate since %u in leaf\n", start); 2168 if (!path[depth].p_hdr) 2169 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 2170 eh = path[depth].p_hdr; 2171 if (unlikely(path[depth].p_hdr == NULL)) { 2172 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 2173 return -EIO; 2174 } 2175 /* find where to start removing */ 2176 ex = EXT_LAST_EXTENT(eh); 2177 2178 ex_ee_block = le32_to_cpu(ex->ee_block); 2179 ex_ee_len = ext4_ext_get_actual_len(ex); 2180 2181 while (ex >= EXT_FIRST_EXTENT(eh) && 2182 ex_ee_block + ex_ee_len > start) { 2183 2184 if (ext4_ext_is_uninitialized(ex)) 2185 uninitialized = 1; 2186 else 2187 uninitialized = 0; 2188 2189 ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, 2190 uninitialized, ex_ee_len); 2191 path[depth].p_ext = ex; 2192 2193 a = ex_ee_block > start ? ex_ee_block : start; 2194 b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? 2195 ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; 2196 2197 ext_debug(" border %u:%u\n", a, b); 2198 2199 if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { 2200 block = 0; 2201 num = 0; 2202 BUG(); 2203 } else if (a != ex_ee_block) { 2204 /* remove tail of the extent */ 2205 block = ex_ee_block; 2206 num = a - block; 2207 } else if (b != ex_ee_block + ex_ee_len - 1) { 2208 /* remove head of the extent */ 2209 block = a; 2210 num = b - a; 2211 /* there is no "make a hole" API yet */ 2212 BUG(); 2213 } else { 2214 /* remove whole extent: excellent! */ 2215 block = ex_ee_block; 2216 num = 0; 2217 BUG_ON(a != ex_ee_block); 2218 BUG_ON(b != ex_ee_block + ex_ee_len - 1); 2219 } 2220 2221 /* 2222 * 3 for leaf, sb, and inode plus 2 (bmap and group 2223 * descriptor) for each block group; assume two block 2224 * groups plus ex_ee_len/blocks_per_block_group for 2225 * the worst case 2226 */ 2227 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb)); 2228 if (ex == EXT_FIRST_EXTENT(eh)) { 2229 correct_index = 1; 2230 credits += (ext_depth(inode)) + 1; 2231 } 2232 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); 2233 2234 err = ext4_ext_truncate_extend_restart(handle, inode, credits); 2235 if (err) 2236 goto out; 2237 2238 err = ext4_ext_get_access(handle, inode, path + depth); 2239 if (err) 2240 goto out; 2241 2242 err = ext4_remove_blocks(handle, inode, ex, a, b); 2243 if (err) 2244 goto out; 2245 2246 if (num == 0) { 2247 /* this extent is removed; mark slot entirely unused */ 2248 ext4_ext_store_pblock(ex, 0); 2249 le16_add_cpu(&eh->eh_entries, -1); 2250 } 2251 2252 ex->ee_block = cpu_to_le32(block); 2253 ex->ee_len = cpu_to_le16(num); 2254 /* 2255 * Do not mark uninitialized if all the blocks in the 2256 * extent have been removed. 2257 */ 2258 if (uninitialized && num) 2259 ext4_ext_mark_uninitialized(ex); 2260 2261 err = ext4_ext_dirty(handle, inode, path + depth); 2262 if (err) 2263 goto out; 2264 2265 ext_debug("new extent: %u:%u:%llu\n", block, num, 2266 ext4_ext_pblock(ex)); 2267 ex--; 2268 ex_ee_block = le32_to_cpu(ex->ee_block); 2269 ex_ee_len = ext4_ext_get_actual_len(ex); 2270 } 2271 2272 if (correct_index && eh->eh_entries) 2273 err = ext4_ext_correct_indexes(handle, inode, path); 2274 2275 /* if this leaf is free, then we should 2276 * remove it from index block above */ 2277 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) 2278 err = ext4_ext_rm_idx(handle, inode, path + depth); 2279 2280 out: 2281 return err; 2282 } 2283 2284 /* 2285 * ext4_ext_more_to_rm: 2286 * returns 1 if current index has to be freed (even partial) 2287 */ 2288 static int 2289 ext4_ext_more_to_rm(struct ext4_ext_path *path) 2290 { 2291 BUG_ON(path->p_idx == NULL); 2292 2293 if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) 2294 return 0; 2295 2296 /* 2297 * if truncate on deeper level happened, it wasn't partial, 2298 * so we have to consider current index for truncation 2299 */ 2300 if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) 2301 return 0; 2302 return 1; 2303 } 2304 2305 static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) 2306 { 2307 struct super_block *sb = inode->i_sb; 2308 int depth = ext_depth(inode); 2309 struct ext4_ext_path *path; 2310 handle_t *handle; 2311 int i, err; 2312 2313 ext_debug("truncate since %u\n", start); 2314 2315 /* probably first extent we're gonna free will be last in block */ 2316 handle = ext4_journal_start(inode, depth + 1); 2317 if (IS_ERR(handle)) 2318 return PTR_ERR(handle); 2319 2320 again: 2321 ext4_ext_invalidate_cache(inode); 2322 2323 /* 2324 * We start scanning from right side, freeing all the blocks 2325 * after i_size and walking into the tree depth-wise. 2326 */ 2327 depth = ext_depth(inode); 2328 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); 2329 if (path == NULL) { 2330 ext4_journal_stop(handle); 2331 return -ENOMEM; 2332 } 2333 path[0].p_depth = depth; 2334 path[0].p_hdr = ext_inode_hdr(inode); 2335 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2336 err = -EIO; 2337 goto out; 2338 } 2339 i = err = 0; 2340 2341 while (i >= 0 && err == 0) { 2342 if (i == depth) { 2343 /* this is leaf block */ 2344 err = ext4_ext_rm_leaf(handle, inode, path, start); 2345 /* root level has p_bh == NULL, brelse() eats this */ 2346 brelse(path[i].p_bh); 2347 path[i].p_bh = NULL; 2348 i--; 2349 continue; 2350 } 2351 2352 /* this is index block */ 2353 if (!path[i].p_hdr) { 2354 ext_debug("initialize header\n"); 2355 path[i].p_hdr = ext_block_hdr(path[i].p_bh); 2356 } 2357 2358 if (!path[i].p_idx) { 2359 /* this level hasn't been touched yet */ 2360 path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); 2361 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; 2362 ext_debug("init index ptr: hdr 0x%p, num %d\n", 2363 path[i].p_hdr, 2364 le16_to_cpu(path[i].p_hdr->eh_entries)); 2365 } else { 2366 /* we were already here, see at next index */ 2367 path[i].p_idx--; 2368 } 2369 2370 ext_debug("level %d - index, first 0x%p, cur 0x%p\n", 2371 i, EXT_FIRST_INDEX(path[i].p_hdr), 2372 path[i].p_idx); 2373 if (ext4_ext_more_to_rm(path + i)) { 2374 struct buffer_head *bh; 2375 /* go to the next level */ 2376 ext_debug("move to level %d (block %llu)\n", 2377 i + 1, ext4_idx_pblock(path[i].p_idx)); 2378 memset(path + i + 1, 0, sizeof(*path)); 2379 bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx)); 2380 if (!bh) { 2381 /* should we reset i_size? */ 2382 err = -EIO; 2383 break; 2384 } 2385 if (WARN_ON(i + 1 > depth)) { 2386 err = -EIO; 2387 break; 2388 } 2389 if (ext4_ext_check(inode, ext_block_hdr(bh), 2390 depth - i - 1)) { 2391 err = -EIO; 2392 break; 2393 } 2394 path[i + 1].p_bh = bh; 2395 2396 /* save actual number of indexes since this 2397 * number is changed at the next iteration */ 2398 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); 2399 i++; 2400 } else { 2401 /* we finished processing this index, go up */ 2402 if (path[i].p_hdr->eh_entries == 0 && i > 0) { 2403 /* index is empty, remove it; 2404 * handle must be already prepared by the 2405 * truncatei_leaf() */ 2406 err = ext4_ext_rm_idx(handle, inode, path + i); 2407 } 2408 /* root level has p_bh == NULL, brelse() eats this */ 2409 brelse(path[i].p_bh); 2410 path[i].p_bh = NULL; 2411 i--; 2412 ext_debug("return to level %d\n", i); 2413 } 2414 } 2415 2416 /* TODO: flexible tree reduction should be here */ 2417 if (path->p_hdr->eh_entries == 0) { 2418 /* 2419 * truncate to zero freed all the tree, 2420 * so we need to correct eh_depth 2421 */ 2422 err = ext4_ext_get_access(handle, inode, path); 2423 if (err == 0) { 2424 ext_inode_hdr(inode)->eh_depth = 0; 2425 ext_inode_hdr(inode)->eh_max = 2426 cpu_to_le16(ext4_ext_space_root(inode, 0)); 2427 err = ext4_ext_dirty(handle, inode, path); 2428 } 2429 } 2430 out: 2431 ext4_ext_drop_refs(path); 2432 kfree(path); 2433 if (err == -EAGAIN) 2434 goto again; 2435 ext4_journal_stop(handle); 2436 2437 return err; 2438 } 2439 2440 /* 2441 * called at mount time 2442 */ 2443 void ext4_ext_init(struct super_block *sb) 2444 { 2445 /* 2446 * possible initialization would be here 2447 */ 2448 2449 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { 2450 #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS) 2451 printk(KERN_INFO "EXT4-fs: file extents enabled"); 2452 #ifdef AGGRESSIVE_TEST 2453 printk(", aggressive tests"); 2454 #endif 2455 #ifdef CHECK_BINSEARCH 2456 printk(", check binsearch"); 2457 #endif 2458 #ifdef EXTENTS_STATS 2459 printk(", stats"); 2460 #endif 2461 printk("\n"); 2462 #endif 2463 #ifdef EXTENTS_STATS 2464 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); 2465 EXT4_SB(sb)->s_ext_min = 1 << 30; 2466 EXT4_SB(sb)->s_ext_max = 0; 2467 #endif 2468 } 2469 } 2470 2471 /* 2472 * called at umount time 2473 */ 2474 void ext4_ext_release(struct super_block *sb) 2475 { 2476 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) 2477 return; 2478 2479 #ifdef EXTENTS_STATS 2480 if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) { 2481 struct ext4_sb_info *sbi = EXT4_SB(sb); 2482 printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n", 2483 sbi->s_ext_blocks, sbi->s_ext_extents, 2484 sbi->s_ext_blocks / sbi->s_ext_extents); 2485 printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n", 2486 sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max); 2487 } 2488 #endif 2489 } 2490 2491 /* FIXME!! we need to try to merge to left or right after zero-out */ 2492 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) 2493 { 2494 ext4_fsblk_t ee_pblock; 2495 unsigned int ee_len; 2496 int ret; 2497 2498 ee_len = ext4_ext_get_actual_len(ex); 2499 ee_pblock = ext4_ext_pblock(ex); 2500 2501 ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); 2502 if (ret > 0) 2503 ret = 0; 2504 2505 return ret; 2506 } 2507 2508 #define EXT4_EXT_ZERO_LEN 7 2509 /* 2510 * This function is called by ext4_ext_map_blocks() if someone tries to write 2511 * to an uninitialized extent. It may result in splitting the uninitialized 2512 * extent into multiple extents (upto three - one initialized and two 2513 * uninitialized). 2514 * There are three possibilities: 2515 * a> There is no split required: Entire extent should be initialized 2516 * b> Splits in two extents: Write is happening at either end of the extent 2517 * c> Splits in three extents: Somone is writing in middle of the extent 2518 */ 2519 static int ext4_ext_convert_to_initialized(handle_t *handle, 2520 struct inode *inode, 2521 struct ext4_map_blocks *map, 2522 struct ext4_ext_path *path) 2523 { 2524 struct ext4_extent *ex, newex, orig_ex; 2525 struct ext4_extent *ex1 = NULL; 2526 struct ext4_extent *ex2 = NULL; 2527 struct ext4_extent *ex3 = NULL; 2528 struct ext4_extent_header *eh; 2529 ext4_lblk_t ee_block, eof_block; 2530 unsigned int allocated, ee_len, depth; 2531 ext4_fsblk_t newblock; 2532 int err = 0; 2533 int ret = 0; 2534 int may_zeroout; 2535 2536 ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" 2537 "block %llu, max_blocks %u\n", inode->i_ino, 2538 (unsigned long long)map->m_lblk, map->m_len); 2539 2540 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 2541 inode->i_sb->s_blocksize_bits; 2542 if (eof_block < map->m_lblk + map->m_len) 2543 eof_block = map->m_lblk + map->m_len; 2544 2545 depth = ext_depth(inode); 2546 eh = path[depth].p_hdr; 2547 ex = path[depth].p_ext; 2548 ee_block = le32_to_cpu(ex->ee_block); 2549 ee_len = ext4_ext_get_actual_len(ex); 2550 allocated = ee_len - (map->m_lblk - ee_block); 2551 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); 2552 2553 ex2 = ex; 2554 orig_ex.ee_block = ex->ee_block; 2555 orig_ex.ee_len = cpu_to_le16(ee_len); 2556 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); 2557 2558 /* 2559 * It is safe to convert extent to initialized via explicit 2560 * zeroout only if extent is fully insde i_size or new_size. 2561 */ 2562 may_zeroout = ee_block + ee_len <= eof_block; 2563 2564 err = ext4_ext_get_access(handle, inode, path + depth); 2565 if (err) 2566 goto out; 2567 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ 2568 if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { 2569 err = ext4_ext_zeroout(inode, &orig_ex); 2570 if (err) 2571 goto fix_extent_len; 2572 /* update the extent length and mark as initialized */ 2573 ex->ee_block = orig_ex.ee_block; 2574 ex->ee_len = orig_ex.ee_len; 2575 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2576 ext4_ext_dirty(handle, inode, path + depth); 2577 /* zeroed the full extent */ 2578 return allocated; 2579 } 2580 2581 /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ 2582 if (map->m_lblk > ee_block) { 2583 ex1 = ex; 2584 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 2585 ext4_ext_mark_uninitialized(ex1); 2586 ex2 = &newex; 2587 } 2588 /* 2589 * for sanity, update the length of the ex2 extent before 2590 * we insert ex3, if ex1 is NULL. This is to avoid temporary 2591 * overlap of blocks. 2592 */ 2593 if (!ex1 && allocated > map->m_len) 2594 ex2->ee_len = cpu_to_le16(map->m_len); 2595 /* ex3: to ee_block + ee_len : uninitialised */ 2596 if (allocated > map->m_len) { 2597 unsigned int newdepth; 2598 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ 2599 if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { 2600 /* 2601 * map->m_lblk == ee_block is handled by the zerouout 2602 * at the beginning. 2603 * Mark first half uninitialized. 2604 * Mark second half initialized and zero out the 2605 * initialized extent 2606 */ 2607 ex->ee_block = orig_ex.ee_block; 2608 ex->ee_len = cpu_to_le16(ee_len - allocated); 2609 ext4_ext_mark_uninitialized(ex); 2610 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2611 ext4_ext_dirty(handle, inode, path + depth); 2612 2613 ex3 = &newex; 2614 ex3->ee_block = cpu_to_le32(map->m_lblk); 2615 ext4_ext_store_pblock(ex3, newblock); 2616 ex3->ee_len = cpu_to_le16(allocated); 2617 err = ext4_ext_insert_extent(handle, inode, path, 2618 ex3, 0); 2619 if (err == -ENOSPC) { 2620 err = ext4_ext_zeroout(inode, &orig_ex); 2621 if (err) 2622 goto fix_extent_len; 2623 ex->ee_block = orig_ex.ee_block; 2624 ex->ee_len = orig_ex.ee_len; 2625 ext4_ext_store_pblock(ex, 2626 ext4_ext_pblock(&orig_ex)); 2627 ext4_ext_dirty(handle, inode, path + depth); 2628 /* blocks available from map->m_lblk */ 2629 return allocated; 2630 2631 } else if (err) 2632 goto fix_extent_len; 2633 2634 /* 2635 * We need to zero out the second half because 2636 * an fallocate request can update file size and 2637 * converting the second half to initialized extent 2638 * implies that we can leak some junk data to user 2639 * space. 2640 */ 2641 err = ext4_ext_zeroout(inode, ex3); 2642 if (err) { 2643 /* 2644 * We should actually mark the 2645 * second half as uninit and return error 2646 * Insert would have changed the extent 2647 */ 2648 depth = ext_depth(inode); 2649 ext4_ext_drop_refs(path); 2650 path = ext4_ext_find_extent(inode, map->m_lblk, 2651 path); 2652 if (IS_ERR(path)) { 2653 err = PTR_ERR(path); 2654 return err; 2655 } 2656 /* get the second half extent details */ 2657 ex = path[depth].p_ext; 2658 err = ext4_ext_get_access(handle, inode, 2659 path + depth); 2660 if (err) 2661 return err; 2662 ext4_ext_mark_uninitialized(ex); 2663 ext4_ext_dirty(handle, inode, path + depth); 2664 return err; 2665 } 2666 2667 /* zeroed the second half */ 2668 return allocated; 2669 } 2670 ex3 = &newex; 2671 ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); 2672 ext4_ext_store_pblock(ex3, newblock + map->m_len); 2673 ex3->ee_len = cpu_to_le16(allocated - map->m_len); 2674 ext4_ext_mark_uninitialized(ex3); 2675 err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); 2676 if (err == -ENOSPC && may_zeroout) { 2677 err = ext4_ext_zeroout(inode, &orig_ex); 2678 if (err) 2679 goto fix_extent_len; 2680 /* update the extent length and mark as initialized */ 2681 ex->ee_block = orig_ex.ee_block; 2682 ex->ee_len = orig_ex.ee_len; 2683 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2684 ext4_ext_dirty(handle, inode, path + depth); 2685 /* zeroed the full extent */ 2686 /* blocks available from map->m_lblk */ 2687 return allocated; 2688 2689 } else if (err) 2690 goto fix_extent_len; 2691 /* 2692 * The depth, and hence eh & ex might change 2693 * as part of the insert above. 2694 */ 2695 newdepth = ext_depth(inode); 2696 /* 2697 * update the extent length after successful insert of the 2698 * split extent 2699 */ 2700 ee_len -= ext4_ext_get_actual_len(ex3); 2701 orig_ex.ee_len = cpu_to_le16(ee_len); 2702 may_zeroout = ee_block + ee_len <= eof_block; 2703 2704 depth = newdepth; 2705 ext4_ext_drop_refs(path); 2706 path = ext4_ext_find_extent(inode, map->m_lblk, path); 2707 if (IS_ERR(path)) { 2708 err = PTR_ERR(path); 2709 goto out; 2710 } 2711 eh = path[depth].p_hdr; 2712 ex = path[depth].p_ext; 2713 if (ex2 != &newex) 2714 ex2 = ex; 2715 2716 err = ext4_ext_get_access(handle, inode, path + depth); 2717 if (err) 2718 goto out; 2719 2720 allocated = map->m_len; 2721 2722 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying 2723 * to insert a extent in the middle zerout directly 2724 * otherwise give the extent a chance to merge to left 2725 */ 2726 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && 2727 map->m_lblk != ee_block && may_zeroout) { 2728 err = ext4_ext_zeroout(inode, &orig_ex); 2729 if (err) 2730 goto fix_extent_len; 2731 /* update the extent length and mark as initialized */ 2732 ex->ee_block = orig_ex.ee_block; 2733 ex->ee_len = orig_ex.ee_len; 2734 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2735 ext4_ext_dirty(handle, inode, path + depth); 2736 /* zero out the first half */ 2737 /* blocks available from map->m_lblk */ 2738 return allocated; 2739 } 2740 } 2741 /* 2742 * If there was a change of depth as part of the 2743 * insertion of ex3 above, we need to update the length 2744 * of the ex1 extent again here 2745 */ 2746 if (ex1 && ex1 != ex) { 2747 ex1 = ex; 2748 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 2749 ext4_ext_mark_uninitialized(ex1); 2750 ex2 = &newex; 2751 } 2752 /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ 2753 ex2->ee_block = cpu_to_le32(map->m_lblk); 2754 ext4_ext_store_pblock(ex2, newblock); 2755 ex2->ee_len = cpu_to_le16(allocated); 2756 if (ex2 != ex) 2757 goto insert; 2758 /* 2759 * New (initialized) extent starts from the first block 2760 * in the current extent. i.e., ex2 == ex 2761 * We have to see if it can be merged with the extent 2762 * on the left. 2763 */ 2764 if (ex2 > EXT_FIRST_EXTENT(eh)) { 2765 /* 2766 * To merge left, pass "ex2 - 1" to try_to_merge(), 2767 * since it merges towards right _only_. 2768 */ 2769 ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); 2770 if (ret) { 2771 err = ext4_ext_correct_indexes(handle, inode, path); 2772 if (err) 2773 goto out; 2774 depth = ext_depth(inode); 2775 ex2--; 2776 } 2777 } 2778 /* 2779 * Try to Merge towards right. This might be required 2780 * only when the whole extent is being written to. 2781 * i.e. ex2 == ex and ex3 == NULL. 2782 */ 2783 if (!ex3) { 2784 ret = ext4_ext_try_to_merge(inode, path, ex2); 2785 if (ret) { 2786 err = ext4_ext_correct_indexes(handle, inode, path); 2787 if (err) 2788 goto out; 2789 } 2790 } 2791 /* Mark modified extent as dirty */ 2792 err = ext4_ext_dirty(handle, inode, path + depth); 2793 goto out; 2794 insert: 2795 err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); 2796 if (err == -ENOSPC && may_zeroout) { 2797 err = ext4_ext_zeroout(inode, &orig_ex); 2798 if (err) 2799 goto fix_extent_len; 2800 /* update the extent length and mark as initialized */ 2801 ex->ee_block = orig_ex.ee_block; 2802 ex->ee_len = orig_ex.ee_len; 2803 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2804 ext4_ext_dirty(handle, inode, path + depth); 2805 /* zero out the first half */ 2806 return allocated; 2807 } else if (err) 2808 goto fix_extent_len; 2809 out: 2810 ext4_ext_show_leaf(inode, path); 2811 return err ? err : allocated; 2812 2813 fix_extent_len: 2814 ex->ee_block = orig_ex.ee_block; 2815 ex->ee_len = orig_ex.ee_len; 2816 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2817 ext4_ext_mark_uninitialized(ex); 2818 ext4_ext_dirty(handle, inode, path + depth); 2819 return err; 2820 } 2821 2822 /* 2823 * This function is called by ext4_ext_map_blocks() from 2824 * ext4_get_blocks_dio_write() when DIO to write 2825 * to an uninitialized extent. 2826 * 2827 * Writing to an uninitized extent may result in splitting the uninitialized 2828 * extent into multiple /intialized unintialized extents (up to three) 2829 * There are three possibilities: 2830 * a> There is no split required: Entire extent should be uninitialized 2831 * b> Splits in two extents: Write is happening at either end of the extent 2832 * c> Splits in three extents: Somone is writing in middle of the extent 2833 * 2834 * One of more index blocks maybe needed if the extent tree grow after 2835 * the unintialized extent split. To prevent ENOSPC occur at the IO 2836 * complete, we need to split the uninitialized extent before DIO submit 2837 * the IO. The uninitialized extent called at this time will be split 2838 * into three uninitialized extent(at most). After IO complete, the part 2839 * being filled will be convert to initialized by the end_io callback function 2840 * via ext4_convert_unwritten_extents(). 2841 * 2842 * Returns the size of uninitialized extent to be written on success. 2843 */ 2844 static int ext4_split_unwritten_extents(handle_t *handle, 2845 struct inode *inode, 2846 struct ext4_map_blocks *map, 2847 struct ext4_ext_path *path, 2848 int flags) 2849 { 2850 struct ext4_extent *ex, newex, orig_ex; 2851 struct ext4_extent *ex1 = NULL; 2852 struct ext4_extent *ex2 = NULL; 2853 struct ext4_extent *ex3 = NULL; 2854 ext4_lblk_t ee_block, eof_block; 2855 unsigned int allocated, ee_len, depth; 2856 ext4_fsblk_t newblock; 2857 int err = 0; 2858 int may_zeroout; 2859 2860 ext_debug("ext4_split_unwritten_extents: inode %lu, logical" 2861 "block %llu, max_blocks %u\n", inode->i_ino, 2862 (unsigned long long)map->m_lblk, map->m_len); 2863 2864 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 2865 inode->i_sb->s_blocksize_bits; 2866 if (eof_block < map->m_lblk + map->m_len) 2867 eof_block = map->m_lblk + map->m_len; 2868 2869 depth = ext_depth(inode); 2870 ex = path[depth].p_ext; 2871 ee_block = le32_to_cpu(ex->ee_block); 2872 ee_len = ext4_ext_get_actual_len(ex); 2873 allocated = ee_len - (map->m_lblk - ee_block); 2874 newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); 2875 2876 ex2 = ex; 2877 orig_ex.ee_block = ex->ee_block; 2878 orig_ex.ee_len = cpu_to_le16(ee_len); 2879 ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); 2880 2881 /* 2882 * It is safe to convert extent to initialized via explicit 2883 * zeroout only if extent is fully insde i_size or new_size. 2884 */ 2885 may_zeroout = ee_block + ee_len <= eof_block; 2886 2887 /* 2888 * If the uninitialized extent begins at the same logical 2889 * block where the write begins, and the write completely 2890 * covers the extent, then we don't need to split it. 2891 */ 2892 if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) 2893 return allocated; 2894 2895 err = ext4_ext_get_access(handle, inode, path + depth); 2896 if (err) 2897 goto out; 2898 /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ 2899 if (map->m_lblk > ee_block) { 2900 ex1 = ex; 2901 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 2902 ext4_ext_mark_uninitialized(ex1); 2903 ex2 = &newex; 2904 } 2905 /* 2906 * for sanity, update the length of the ex2 extent before 2907 * we insert ex3, if ex1 is NULL. This is to avoid temporary 2908 * overlap of blocks. 2909 */ 2910 if (!ex1 && allocated > map->m_len) 2911 ex2->ee_len = cpu_to_le16(map->m_len); 2912 /* ex3: to ee_block + ee_len : uninitialised */ 2913 if (allocated > map->m_len) { 2914 unsigned int newdepth; 2915 ex3 = &newex; 2916 ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); 2917 ext4_ext_store_pblock(ex3, newblock + map->m_len); 2918 ex3->ee_len = cpu_to_le16(allocated - map->m_len); 2919 ext4_ext_mark_uninitialized(ex3); 2920 err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); 2921 if (err == -ENOSPC && may_zeroout) { 2922 err = ext4_ext_zeroout(inode, &orig_ex); 2923 if (err) 2924 goto fix_extent_len; 2925 /* update the extent length and mark as initialized */ 2926 ex->ee_block = orig_ex.ee_block; 2927 ex->ee_len = orig_ex.ee_len; 2928 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 2929 ext4_ext_dirty(handle, inode, path + depth); 2930 /* zeroed the full extent */ 2931 /* blocks available from map->m_lblk */ 2932 return allocated; 2933 2934 } else if (err) 2935 goto fix_extent_len; 2936 /* 2937 * The depth, and hence eh & ex might change 2938 * as part of the insert above. 2939 */ 2940 newdepth = ext_depth(inode); 2941 /* 2942 * update the extent length after successful insert of the 2943 * split extent 2944 */ 2945 ee_len -= ext4_ext_get_actual_len(ex3); 2946 orig_ex.ee_len = cpu_to_le16(ee_len); 2947 may_zeroout = ee_block + ee_len <= eof_block; 2948 2949 depth = newdepth; 2950 ext4_ext_drop_refs(path); 2951 path = ext4_ext_find_extent(inode, map->m_lblk, path); 2952 if (IS_ERR(path)) { 2953 err = PTR_ERR(path); 2954 goto out; 2955 } 2956 ex = path[depth].p_ext; 2957 if (ex2 != &newex) 2958 ex2 = ex; 2959 2960 err = ext4_ext_get_access(handle, inode, path + depth); 2961 if (err) 2962 goto out; 2963 2964 allocated = map->m_len; 2965 } 2966 /* 2967 * If there was a change of depth as part of the 2968 * insertion of ex3 above, we need to update the length 2969 * of the ex1 extent again here 2970 */ 2971 if (ex1 && ex1 != ex) { 2972 ex1 = ex; 2973 ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); 2974 ext4_ext_mark_uninitialized(ex1); 2975 ex2 = &newex; 2976 } 2977 /* 2978 * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written 2979 * using direct I/O, uninitialised still. 2980 */ 2981 ex2->ee_block = cpu_to_le32(map->m_lblk); 2982 ext4_ext_store_pblock(ex2, newblock); 2983 ex2->ee_len = cpu_to_le16(allocated); 2984 ext4_ext_mark_uninitialized(ex2); 2985 if (ex2 != ex) 2986 goto insert; 2987 /* Mark modified extent as dirty */ 2988 err = ext4_ext_dirty(handle, inode, path + depth); 2989 ext_debug("out here\n"); 2990 goto out; 2991 insert: 2992 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 2993 if (err == -ENOSPC && may_zeroout) { 2994 err = ext4_ext_zeroout(inode, &orig_ex); 2995 if (err) 2996 goto fix_extent_len; 2997 /* update the extent length and mark as initialized */ 2998 ex->ee_block = orig_ex.ee_block; 2999 ex->ee_len = orig_ex.ee_len; 3000 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 3001 ext4_ext_dirty(handle, inode, path + depth); 3002 /* zero out the first half */ 3003 return allocated; 3004 } else if (err) 3005 goto fix_extent_len; 3006 out: 3007 ext4_ext_show_leaf(inode, path); 3008 return err ? err : allocated; 3009 3010 fix_extent_len: 3011 ex->ee_block = orig_ex.ee_block; 3012 ex->ee_len = orig_ex.ee_len; 3013 ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); 3014 ext4_ext_mark_uninitialized(ex); 3015 ext4_ext_dirty(handle, inode, path + depth); 3016 return err; 3017 } 3018 static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3019 struct inode *inode, 3020 struct ext4_ext_path *path) 3021 { 3022 struct ext4_extent *ex; 3023 struct ext4_extent_header *eh; 3024 int depth; 3025 int err = 0; 3026 int ret = 0; 3027 3028 depth = ext_depth(inode); 3029 eh = path[depth].p_hdr; 3030 ex = path[depth].p_ext; 3031 3032 err = ext4_ext_get_access(handle, inode, path + depth); 3033 if (err) 3034 goto out; 3035 /* first mark the extent as initialized */ 3036 ext4_ext_mark_initialized(ex); 3037 3038 /* 3039 * We have to see if it can be merged with the extent 3040 * on the left. 3041 */ 3042 if (ex > EXT_FIRST_EXTENT(eh)) { 3043 /* 3044 * To merge left, pass "ex - 1" to try_to_merge(), 3045 * since it merges towards right _only_. 3046 */ 3047 ret = ext4_ext_try_to_merge(inode, path, ex - 1); 3048 if (ret) { 3049 err = ext4_ext_correct_indexes(handle, inode, path); 3050 if (err) 3051 goto out; 3052 depth = ext_depth(inode); 3053 ex--; 3054 } 3055 } 3056 /* 3057 * Try to Merge towards right. 3058 */ 3059 ret = ext4_ext_try_to_merge(inode, path, ex); 3060 if (ret) { 3061 err = ext4_ext_correct_indexes(handle, inode, path); 3062 if (err) 3063 goto out; 3064 depth = ext_depth(inode); 3065 } 3066 /* Mark modified extent as dirty */ 3067 err = ext4_ext_dirty(handle, inode, path + depth); 3068 out: 3069 ext4_ext_show_leaf(inode, path); 3070 return err; 3071 } 3072 3073 static void unmap_underlying_metadata_blocks(struct block_device *bdev, 3074 sector_t block, int count) 3075 { 3076 int i; 3077 for (i = 0; i < count; i++) 3078 unmap_underlying_metadata(bdev, block + i); 3079 } 3080 3081 /* 3082 * Handle EOFBLOCKS_FL flag, clearing it if necessary 3083 */ 3084 static int check_eofblocks_fl(handle_t *handle, struct inode *inode, 3085 struct ext4_map_blocks *map, 3086 struct ext4_ext_path *path, 3087 unsigned int len) 3088 { 3089 int i, depth; 3090 struct ext4_extent_header *eh; 3091 struct ext4_extent *ex, *last_ex; 3092 3093 if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) 3094 return 0; 3095 3096 depth = ext_depth(inode); 3097 eh = path[depth].p_hdr; 3098 ex = path[depth].p_ext; 3099 3100 if (unlikely(!eh->eh_entries)) { 3101 EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " 3102 "EOFBLOCKS_FL set"); 3103 return -EIO; 3104 } 3105 last_ex = EXT_LAST_EXTENT(eh); 3106 /* 3107 * We should clear the EOFBLOCKS_FL flag if we are writing the 3108 * last block in the last extent in the file. We test this by 3109 * first checking to see if the caller to 3110 * ext4_ext_get_blocks() was interested in the last block (or 3111 * a block beyond the last block) in the current extent. If 3112 * this turns out to be false, we can bail out from this 3113 * function immediately. 3114 */ 3115 if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) + 3116 ext4_ext_get_actual_len(last_ex)) 3117 return 0; 3118 /* 3119 * If the caller does appear to be planning to write at or 3120 * beyond the end of the current extent, we then test to see 3121 * if the current extent is the last extent in the file, by 3122 * checking to make sure it was reached via the rightmost node 3123 * at each level of the tree. 3124 */ 3125 for (i = depth-1; i >= 0; i--) 3126 if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) 3127 return 0; 3128 ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3129 return ext4_mark_inode_dirty(handle, inode); 3130 } 3131 3132 static int 3133 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3134 struct ext4_map_blocks *map, 3135 struct ext4_ext_path *path, int flags, 3136 unsigned int allocated, ext4_fsblk_t newblock) 3137 { 3138 int ret = 0; 3139 int err = 0; 3140 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3141 3142 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" 3143 "block %llu, max_blocks %u, flags %d, allocated %u", 3144 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, 3145 flags, allocated); 3146 ext4_ext_show_leaf(inode, path); 3147 3148 /* get_block() before submit the IO, split the extent */ 3149 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3150 ret = ext4_split_unwritten_extents(handle, inode, map, 3151 path, flags); 3152 /* 3153 * Flag the inode(non aio case) or end_io struct (aio case) 3154 * that this IO needs to convertion to written when IO is 3155 * completed 3156 */ 3157 if (io) 3158 io->flag = EXT4_IO_END_UNWRITTEN; 3159 else 3160 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3161 if (ext4_should_dioread_nolock(inode)) 3162 map->m_flags |= EXT4_MAP_UNINIT; 3163 goto out; 3164 } 3165 /* IO end_io complete, convert the filled extent to written */ 3166 if ((flags & EXT4_GET_BLOCKS_CONVERT)) { 3167 ret = ext4_convert_unwritten_extents_endio(handle, inode, 3168 path); 3169 if (ret >= 0) { 3170 ext4_update_inode_fsync_trans(handle, inode, 1); 3171 err = check_eofblocks_fl(handle, inode, map, path, 3172 map->m_len); 3173 } else 3174 err = ret; 3175 goto out2; 3176 } 3177 /* buffered IO case */ 3178 /* 3179 * repeat fallocate creation request 3180 * we already have an unwritten extent 3181 */ 3182 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) 3183 goto map_out; 3184 3185 /* buffered READ or buffered write_begin() lookup */ 3186 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3187 /* 3188 * We have blocks reserved already. We 3189 * return allocated blocks so that delalloc 3190 * won't do block reservation for us. But 3191 * the buffer head will be unmapped so that 3192 * a read from the block returns 0s. 3193 */ 3194 map->m_flags |= EXT4_MAP_UNWRITTEN; 3195 goto out1; 3196 } 3197 3198 /* buffered write, writepage time, convert*/ 3199 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3200 if (ret >= 0) { 3201 ext4_update_inode_fsync_trans(handle, inode, 1); 3202 err = check_eofblocks_fl(handle, inode, map, path, map->m_len); 3203 if (err < 0) 3204 goto out2; 3205 } 3206 3207 out: 3208 if (ret <= 0) { 3209 err = ret; 3210 goto out2; 3211 } else 3212 allocated = ret; 3213 map->m_flags |= EXT4_MAP_NEW; 3214 /* 3215 * if we allocated more blocks than requested 3216 * we need to make sure we unmap the extra block 3217 * allocated. The actual needed block will get 3218 * unmapped later when we find the buffer_head marked 3219 * new. 3220 */ 3221 if (allocated > map->m_len) { 3222 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, 3223 newblock + map->m_len, 3224 allocated - map->m_len); 3225 allocated = map->m_len; 3226 } 3227 3228 /* 3229 * If we have done fallocate with the offset that is already 3230 * delayed allocated, we would have block reservation 3231 * and quota reservation done in the delayed write path. 3232 * But fallocate would have already updated quota and block 3233 * count for this offset. So cancel these reservation 3234 */ 3235 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3236 ext4_da_update_reserve_space(inode, allocated, 0); 3237 3238 map_out: 3239 map->m_flags |= EXT4_MAP_MAPPED; 3240 out1: 3241 if (allocated > map->m_len) 3242 allocated = map->m_len; 3243 ext4_ext_show_leaf(inode, path); 3244 map->m_pblk = newblock; 3245 map->m_len = allocated; 3246 out2: 3247 if (path) { 3248 ext4_ext_drop_refs(path); 3249 kfree(path); 3250 } 3251 return err ? err : allocated; 3252 } 3253 3254 /* 3255 * Block allocation/map/preallocation routine for extents based files 3256 * 3257 * 3258 * Need to be called with 3259 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block 3260 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) 3261 * 3262 * return > 0, number of of blocks already mapped/allocated 3263 * if create == 0 and these are pre-allocated blocks 3264 * buffer head is unmapped 3265 * otherwise blocks are mapped 3266 * 3267 * return = 0, if plain look up failed (blocks have not been allocated) 3268 * buffer head is unmapped 3269 * 3270 * return < 0, error case. 3271 */ 3272 int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, 3273 struct ext4_map_blocks *map, int flags) 3274 { 3275 struct ext4_ext_path *path = NULL; 3276 struct ext4_extent_header *eh; 3277 struct ext4_extent newex, *ex; 3278 ext4_fsblk_t newblock; 3279 int err = 0, depth, ret, cache_type; 3280 unsigned int allocated = 0; 3281 struct ext4_allocation_request ar; 3282 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3283 3284 ext_debug("blocks %u/%u requested for inode %lu\n", 3285 map->m_lblk, map->m_len, inode->i_ino); 3286 3287 /* check in cache */ 3288 cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex); 3289 if (cache_type) { 3290 if (cache_type == EXT4_EXT_CACHE_GAP) { 3291 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3292 /* 3293 * block isn't allocated yet and 3294 * user doesn't want to allocate it 3295 */ 3296 goto out2; 3297 } 3298 /* we should allocate requested block */ 3299 } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { 3300 /* block is already allocated */ 3301 newblock = map->m_lblk 3302 - le32_to_cpu(newex.ee_block) 3303 + ext4_ext_pblock(&newex); 3304 /* number of remaining blocks in the extent */ 3305 allocated = ext4_ext_get_actual_len(&newex) - 3306 (map->m_lblk - le32_to_cpu(newex.ee_block)); 3307 goto out; 3308 } else { 3309 BUG(); 3310 } 3311 } 3312 3313 /* find extent for this block */ 3314 path = ext4_ext_find_extent(inode, map->m_lblk, NULL); 3315 if (IS_ERR(path)) { 3316 err = PTR_ERR(path); 3317 path = NULL; 3318 goto out2; 3319 } 3320 3321 depth = ext_depth(inode); 3322 3323 /* 3324 * consistent leaf must not be empty; 3325 * this situation is possible, though, _during_ tree modification; 3326 * this is why assert can't be put in ext4_ext_find_extent() 3327 */ 3328 if (unlikely(path[depth].p_ext == NULL && depth != 0)) { 3329 EXT4_ERROR_INODE(inode, "bad extent address " 3330 "lblock: %lu, depth: %d pblock %lld", 3331 (unsigned long) map->m_lblk, depth, 3332 path[depth].p_block); 3333 err = -EIO; 3334 goto out2; 3335 } 3336 eh = path[depth].p_hdr; 3337 3338 ex = path[depth].p_ext; 3339 if (ex) { 3340 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 3341 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 3342 unsigned short ee_len; 3343 3344 /* 3345 * Uninitialized extents are treated as holes, except that 3346 * we split out initialized portions during a write. 3347 */ 3348 ee_len = ext4_ext_get_actual_len(ex); 3349 /* if found extent covers block, simply return it */ 3350 if (in_range(map->m_lblk, ee_block, ee_len)) { 3351 newblock = map->m_lblk - ee_block + ee_start; 3352 /* number of remaining blocks in the extent */ 3353 allocated = ee_len - (map->m_lblk - ee_block); 3354 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 3355 ee_block, ee_len, newblock); 3356 3357 /* Do not put uninitialized extent in the cache */ 3358 if (!ext4_ext_is_uninitialized(ex)) { 3359 ext4_ext_put_in_cache(inode, ee_block, 3360 ee_len, ee_start, 3361 EXT4_EXT_CACHE_EXTENT); 3362 goto out; 3363 } 3364 ret = ext4_ext_handle_uninitialized_extents(handle, 3365 inode, map, path, flags, allocated, 3366 newblock); 3367 return ret; 3368 } 3369 } 3370 3371 /* 3372 * requested block isn't allocated yet; 3373 * we couldn't try to create block if create flag is zero 3374 */ 3375 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3376 /* 3377 * put just found gap into cache to speed up 3378 * subsequent requests 3379 */ 3380 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); 3381 goto out2; 3382 } 3383 /* 3384 * Okay, we need to do block allocation. 3385 */ 3386 3387 /* find neighbour allocated blocks */ 3388 ar.lleft = map->m_lblk; 3389 err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); 3390 if (err) 3391 goto out2; 3392 ar.lright = map->m_lblk; 3393 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); 3394 if (err) 3395 goto out2; 3396 3397 /* 3398 * See if request is beyond maximum number of blocks we can have in 3399 * a single extent. For an initialized extent this limit is 3400 * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is 3401 * EXT_UNINIT_MAX_LEN. 3402 */ 3403 if (map->m_len > EXT_INIT_MAX_LEN && 3404 !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) 3405 map->m_len = EXT_INIT_MAX_LEN; 3406 else if (map->m_len > EXT_UNINIT_MAX_LEN && 3407 (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) 3408 map->m_len = EXT_UNINIT_MAX_LEN; 3409 3410 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ 3411 newex.ee_block = cpu_to_le32(map->m_lblk); 3412 newex.ee_len = cpu_to_le16(map->m_len); 3413 err = ext4_ext_check_overlap(inode, &newex, path); 3414 if (err) 3415 allocated = ext4_ext_get_actual_len(&newex); 3416 else 3417 allocated = map->m_len; 3418 3419 /* allocate new block */ 3420 ar.inode = inode; 3421 ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); 3422 ar.logical = map->m_lblk; 3423 ar.len = allocated; 3424 if (S_ISREG(inode->i_mode)) 3425 ar.flags = EXT4_MB_HINT_DATA; 3426 else 3427 /* disable in-core preallocation for non-regular files */ 3428 ar.flags = 0; 3429 newblock = ext4_mb_new_blocks(handle, &ar, &err); 3430 if (!newblock) 3431 goto out2; 3432 ext_debug("allocate new block: goal %llu, found %llu/%u\n", 3433 ar.goal, newblock, allocated); 3434 3435 /* try to insert new extent into found leaf and return */ 3436 ext4_ext_store_pblock(&newex, newblock); 3437 newex.ee_len = cpu_to_le16(ar.len); 3438 /* Mark uninitialized */ 3439 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 3440 ext4_ext_mark_uninitialized(&newex); 3441 /* 3442 * io_end structure was created for every IO write to an 3443 * uninitialized extent. To avoid unecessary conversion, 3444 * here we flag the IO that really needs the conversion. 3445 * For non asycn direct IO case, flag the inode state 3446 * that we need to perform convertion when IO is done. 3447 */ 3448 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3449 if (io) 3450 io->flag = EXT4_IO_END_UNWRITTEN; 3451 else 3452 ext4_set_inode_state(inode, 3453 EXT4_STATE_DIO_UNWRITTEN); 3454 } 3455 if (ext4_should_dioread_nolock(inode)) 3456 map->m_flags |= EXT4_MAP_UNINIT; 3457 } 3458 3459 err = check_eofblocks_fl(handle, inode, map, path, ar.len); 3460 if (err) 3461 goto out2; 3462 3463 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3464 if (err) { 3465 /* free data blocks we just allocated */ 3466 /* not a good idea to call discard here directly, 3467 * but otherwise we'd need to call it every free() */ 3468 ext4_discard_preallocations(inode); 3469 ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex), 3470 ext4_ext_get_actual_len(&newex), 0); 3471 goto out2; 3472 } 3473 3474 /* previous routine could use block we allocated */ 3475 newblock = ext4_ext_pblock(&newex); 3476 allocated = ext4_ext_get_actual_len(&newex); 3477 if (allocated > map->m_len) 3478 allocated = map->m_len; 3479 map->m_flags |= EXT4_MAP_NEW; 3480 3481 /* 3482 * Update reserved blocks/metadata blocks after successful 3483 * block allocation which had been deferred till now. 3484 */ 3485 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3486 ext4_da_update_reserve_space(inode, allocated, 1); 3487 3488 /* 3489 * Cache the extent and update transaction to commit on fdatasync only 3490 * when it is _not_ an uninitialized extent. 3491 */ 3492 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { 3493 ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock, 3494 EXT4_EXT_CACHE_EXTENT); 3495 ext4_update_inode_fsync_trans(handle, inode, 1); 3496 } else 3497 ext4_update_inode_fsync_trans(handle, inode, 0); 3498 out: 3499 if (allocated > map->m_len) 3500 allocated = map->m_len; 3501 ext4_ext_show_leaf(inode, path); 3502 map->m_flags |= EXT4_MAP_MAPPED; 3503 map->m_pblk = newblock; 3504 map->m_len = allocated; 3505 out2: 3506 if (path) { 3507 ext4_ext_drop_refs(path); 3508 kfree(path); 3509 } 3510 return err ? err : allocated; 3511 } 3512 3513 void ext4_ext_truncate(struct inode *inode) 3514 { 3515 struct address_space *mapping = inode->i_mapping; 3516 struct super_block *sb = inode->i_sb; 3517 ext4_lblk_t last_block; 3518 handle_t *handle; 3519 int err = 0; 3520 3521 /* 3522 * probably first extent we're gonna free will be last in block 3523 */ 3524 err = ext4_writepage_trans_blocks(inode); 3525 handle = ext4_journal_start(inode, err); 3526 if (IS_ERR(handle)) 3527 return; 3528 3529 if (inode->i_size & (sb->s_blocksize - 1)) 3530 ext4_block_truncate_page(handle, mapping, inode->i_size); 3531 3532 if (ext4_orphan_add(handle, inode)) 3533 goto out_stop; 3534 3535 down_write(&EXT4_I(inode)->i_data_sem); 3536 ext4_ext_invalidate_cache(inode); 3537 3538 ext4_discard_preallocations(inode); 3539 3540 /* 3541 * TODO: optimization is possible here. 3542 * Probably we need not scan at all, 3543 * because page truncation is enough. 3544 */ 3545 3546 /* we have to know where to truncate from in crash case */ 3547 EXT4_I(inode)->i_disksize = inode->i_size; 3548 ext4_mark_inode_dirty(handle, inode); 3549 3550 last_block = (inode->i_size + sb->s_blocksize - 1) 3551 >> EXT4_BLOCK_SIZE_BITS(sb); 3552 err = ext4_ext_remove_space(inode, last_block); 3553 3554 /* In a multi-transaction truncate, we only make the final 3555 * transaction synchronous. 3556 */ 3557 if (IS_SYNC(inode)) 3558 ext4_handle_sync(handle); 3559 3560 out_stop: 3561 up_write(&EXT4_I(inode)->i_data_sem); 3562 /* 3563 * If this was a simple ftruncate() and the file will remain alive, 3564 * then we need to clear up the orphan record which we created above. 3565 * However, if this was a real unlink then we were called by 3566 * ext4_delete_inode(), and we allow that function to clean up the 3567 * orphan info for us. 3568 */ 3569 if (inode->i_nlink) 3570 ext4_orphan_del(handle, inode); 3571 3572 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3573 ext4_mark_inode_dirty(handle, inode); 3574 ext4_journal_stop(handle); 3575 } 3576 3577 static void ext4_falloc_update_inode(struct inode *inode, 3578 int mode, loff_t new_size, int update_ctime) 3579 { 3580 struct timespec now; 3581 3582 if (update_ctime) { 3583 now = current_fs_time(inode->i_sb); 3584 if (!timespec_equal(&inode->i_ctime, &now)) 3585 inode->i_ctime = now; 3586 } 3587 /* 3588 * Update only when preallocation was requested beyond 3589 * the file size. 3590 */ 3591 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 3592 if (new_size > i_size_read(inode)) 3593 i_size_write(inode, new_size); 3594 if (new_size > EXT4_I(inode)->i_disksize) 3595 ext4_update_i_disksize(inode, new_size); 3596 } else { 3597 /* 3598 * Mark that we allocate beyond EOF so the subsequent truncate 3599 * can proceed even if the new size is the same as i_size. 3600 */ 3601 if (new_size > i_size_read(inode)) 3602 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 3603 } 3604 3605 } 3606 3607 /* 3608 * preallocate space for a file. This implements ext4's fallocate inode 3609 * operation, which gets called from sys_fallocate system call. 3610 * For block-mapped files, posix_fallocate should fall back to the method 3611 * of writing zeroes to the required new blocks (the same behavior which is 3612 * expected for file systems which do not support fallocate() system call). 3613 */ 3614 long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) 3615 { 3616 handle_t *handle; 3617 loff_t new_size; 3618 unsigned int max_blocks; 3619 int ret = 0; 3620 int ret2 = 0; 3621 int retries = 0; 3622 struct ext4_map_blocks map; 3623 unsigned int credits, blkbits = inode->i_blkbits; 3624 3625 /* 3626 * currently supporting (pre)allocate mode for extent-based 3627 * files _only_ 3628 */ 3629 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3630 return -EOPNOTSUPP; 3631 3632 /* preallocation to directories is currently not supported */ 3633 if (S_ISDIR(inode->i_mode)) 3634 return -ENODEV; 3635 3636 map.m_lblk = offset >> blkbits; 3637 /* 3638 * We can't just convert len to max_blocks because 3639 * If blocksize = 4096 offset = 3072 and len = 2048 3640 */ 3641 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 3642 - map.m_lblk; 3643 /* 3644 * credits to insert 1 extent into extent tree 3645 */ 3646 credits = ext4_chunk_trans_blocks(inode, max_blocks); 3647 mutex_lock(&inode->i_mutex); 3648 ret = inode_newsize_ok(inode, (len + offset)); 3649 if (ret) { 3650 mutex_unlock(&inode->i_mutex); 3651 return ret; 3652 } 3653 retry: 3654 while (ret >= 0 && ret < max_blocks) { 3655 map.m_lblk = map.m_lblk + ret; 3656 map.m_len = max_blocks = max_blocks - ret; 3657 handle = ext4_journal_start(inode, credits); 3658 if (IS_ERR(handle)) { 3659 ret = PTR_ERR(handle); 3660 break; 3661 } 3662 ret = ext4_map_blocks(handle, inode, &map, 3663 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); 3664 if (ret <= 0) { 3665 #ifdef EXT4FS_DEBUG 3666 WARN_ON(ret <= 0); 3667 printk(KERN_ERR "%s: ext4_ext_map_blocks " 3668 "returned error inode#%lu, block=%u, " 3669 "max_blocks=%u", __func__, 3670 inode->i_ino, map.m_lblk, max_blocks); 3671 #endif 3672 ext4_mark_inode_dirty(handle, inode); 3673 ret2 = ext4_journal_stop(handle); 3674 break; 3675 } 3676 if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, 3677 blkbits) >> blkbits)) 3678 new_size = offset + len; 3679 else 3680 new_size = (map.m_lblk + ret) << blkbits; 3681 3682 ext4_falloc_update_inode(inode, mode, new_size, 3683 (map.m_flags & EXT4_MAP_NEW)); 3684 ext4_mark_inode_dirty(handle, inode); 3685 ret2 = ext4_journal_stop(handle); 3686 if (ret2) 3687 break; 3688 } 3689 if (ret == -ENOSPC && 3690 ext4_should_retry_alloc(inode->i_sb, &retries)) { 3691 ret = 0; 3692 goto retry; 3693 } 3694 mutex_unlock(&inode->i_mutex); 3695 return ret > 0 ? ret2 : ret; 3696 } 3697 3698 /* 3699 * This function convert a range of blocks to written extents 3700 * The caller of this function will pass the start offset and the size. 3701 * all unwritten extents within this range will be converted to 3702 * written extents. 3703 * 3704 * This function is called from the direct IO end io call back 3705 * function, to convert the fallocated extents after IO is completed. 3706 * Returns 0 on success. 3707 */ 3708 int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 3709 ssize_t len) 3710 { 3711 handle_t *handle; 3712 unsigned int max_blocks; 3713 int ret = 0; 3714 int ret2 = 0; 3715 struct ext4_map_blocks map; 3716 unsigned int credits, blkbits = inode->i_blkbits; 3717 3718 map.m_lblk = offset >> blkbits; 3719 /* 3720 * We can't just convert len to max_blocks because 3721 * If blocksize = 4096 offset = 3072 and len = 2048 3722 */ 3723 max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - 3724 map.m_lblk); 3725 /* 3726 * credits to insert 1 extent into extent tree 3727 */ 3728 credits = ext4_chunk_trans_blocks(inode, max_blocks); 3729 while (ret >= 0 && ret < max_blocks) { 3730 map.m_lblk += ret; 3731 map.m_len = (max_blocks -= ret); 3732 handle = ext4_journal_start(inode, credits); 3733 if (IS_ERR(handle)) { 3734 ret = PTR_ERR(handle); 3735 break; 3736 } 3737 ret = ext4_map_blocks(handle, inode, &map, 3738 EXT4_GET_BLOCKS_IO_CONVERT_EXT); 3739 if (ret <= 0) { 3740 WARN_ON(ret <= 0); 3741 printk(KERN_ERR "%s: ext4_ext_map_blocks " 3742 "returned error inode#%lu, block=%u, " 3743 "max_blocks=%u", __func__, 3744 inode->i_ino, map.m_lblk, map.m_len); 3745 } 3746 ext4_mark_inode_dirty(handle, inode); 3747 ret2 = ext4_journal_stop(handle); 3748 if (ret <= 0 || ret2 ) 3749 break; 3750 } 3751 return ret > 0 ? ret2 : ret; 3752 } 3753 /* 3754 * Callback function called for each extent to gather FIEMAP information. 3755 */ 3756 static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, 3757 struct ext4_ext_cache *newex, struct ext4_extent *ex, 3758 void *data) 3759 { 3760 struct fiemap_extent_info *fieinfo = data; 3761 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; 3762 __u64 logical; 3763 __u64 physical; 3764 __u64 length; 3765 __u32 flags = 0; 3766 int error; 3767 3768 logical = (__u64)newex->ec_block << blksize_bits; 3769 3770 if (newex->ec_type == EXT4_EXT_CACHE_GAP) { 3771 pgoff_t offset; 3772 struct page *page; 3773 struct buffer_head *bh = NULL; 3774 3775 offset = logical >> PAGE_SHIFT; 3776 page = find_get_page(inode->i_mapping, offset); 3777 if (!page || !page_has_buffers(page)) 3778 return EXT_CONTINUE; 3779 3780 bh = page_buffers(page); 3781 3782 if (!bh) 3783 return EXT_CONTINUE; 3784 3785 if (buffer_delay(bh)) { 3786 flags |= FIEMAP_EXTENT_DELALLOC; 3787 page_cache_release(page); 3788 } else { 3789 page_cache_release(page); 3790 return EXT_CONTINUE; 3791 } 3792 } 3793 3794 physical = (__u64)newex->ec_start << blksize_bits; 3795 length = (__u64)newex->ec_len << blksize_bits; 3796 3797 if (ex && ext4_ext_is_uninitialized(ex)) 3798 flags |= FIEMAP_EXTENT_UNWRITTEN; 3799 3800 /* 3801 * If this extent reaches EXT_MAX_BLOCK, it must be last. 3802 * 3803 * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, 3804 * this also indicates no more allocated blocks. 3805 * 3806 * XXX this might miss a single-block extent at EXT_MAX_BLOCK 3807 */ 3808 if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK || 3809 newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) { 3810 loff_t size = i_size_read(inode); 3811 loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb); 3812 3813 flags |= FIEMAP_EXTENT_LAST; 3814 if ((flags & FIEMAP_EXTENT_DELALLOC) && 3815 logical+length > size) 3816 length = (size - logical + bs - 1) & ~(bs-1); 3817 } 3818 3819 error = fiemap_fill_next_extent(fieinfo, logical, physical, 3820 length, flags); 3821 if (error < 0) 3822 return error; 3823 if (error == 1) 3824 return EXT_BREAK; 3825 3826 return EXT_CONTINUE; 3827 } 3828 3829 /* fiemap flags we can handle specified here */ 3830 #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 3831 3832 static int ext4_xattr_fiemap(struct inode *inode, 3833 struct fiemap_extent_info *fieinfo) 3834 { 3835 __u64 physical = 0; 3836 __u64 length; 3837 __u32 flags = FIEMAP_EXTENT_LAST; 3838 int blockbits = inode->i_sb->s_blocksize_bits; 3839 int error = 0; 3840 3841 /* in-inode? */ 3842 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 3843 struct ext4_iloc iloc; 3844 int offset; /* offset of xattr in inode */ 3845 3846 error = ext4_get_inode_loc(inode, &iloc); 3847 if (error) 3848 return error; 3849 physical = iloc.bh->b_blocknr << blockbits; 3850 offset = EXT4_GOOD_OLD_INODE_SIZE + 3851 EXT4_I(inode)->i_extra_isize; 3852 physical += offset; 3853 length = EXT4_SB(inode->i_sb)->s_inode_size - offset; 3854 flags |= FIEMAP_EXTENT_DATA_INLINE; 3855 brelse(iloc.bh); 3856 } else { /* external block */ 3857 physical = EXT4_I(inode)->i_file_acl << blockbits; 3858 length = inode->i_sb->s_blocksize; 3859 } 3860 3861 if (physical) 3862 error = fiemap_fill_next_extent(fieinfo, 0, physical, 3863 length, flags); 3864 return (error < 0 ? error : 0); 3865 } 3866 3867 int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 3868 __u64 start, __u64 len) 3869 { 3870 ext4_lblk_t start_blk; 3871 int error = 0; 3872 3873 /* fallback to generic here if not in extents fmt */ 3874 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3875 return generic_block_fiemap(inode, fieinfo, start, len, 3876 ext4_get_block); 3877 3878 if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) 3879 return -EBADR; 3880 3881 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { 3882 error = ext4_xattr_fiemap(inode, fieinfo); 3883 } else { 3884 ext4_lblk_t len_blks; 3885 __u64 last_blk; 3886 3887 start_blk = start >> inode->i_sb->s_blocksize_bits; 3888 last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; 3889 if (last_blk >= EXT_MAX_BLOCK) 3890 last_blk = EXT_MAX_BLOCK-1; 3891 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; 3892 3893 /* 3894 * Walk the extent tree gathering extent information. 3895 * ext4_ext_fiemap_cb will push extents back to user. 3896 */ 3897 error = ext4_ext_walk_space(inode, start_blk, len_blks, 3898 ext4_ext_fiemap_cb, fieinfo); 3899 } 3900 3901 return error; 3902 } 3903 3904