1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * extent_map.c 4 * 5 * Block/Cluster mapping functions 6 * 7 * Copyright (C) 2004 Oracle. All rights reserved. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/init.h> 12 #include <linux/slab.h> 13 #include <linux/types.h> 14 #include <linux/fiemap.h> 15 16 #include <cluster/masklog.h> 17 18 #include "ocfs2.h" 19 20 #include "alloc.h" 21 #include "dlmglue.h" 22 #include "extent_map.h" 23 #include "inode.h" 24 #include "super.h" 25 #include "symlink.h" 26 #include "aops.h" 27 #include "ocfs2_trace.h" 28 29 #include "buffer_head_io.h" 30 31 /* 32 * The extent caching implementation is intentionally trivial. 33 * 34 * We only cache a small number of extents stored directly on the 35 * inode, so linear order operations are acceptable. If we ever want 36 * to increase the size of the extent map, then these algorithms must 37 * get smarter. 38 */ 39 40 void ocfs2_extent_map_init(struct inode *inode) 41 { 42 struct ocfs2_inode_info *oi = OCFS2_I(inode); 43 44 oi->ip_extent_map.em_num_items = 0; 45 INIT_LIST_HEAD(&oi->ip_extent_map.em_list); 46 } 47 48 static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em, 49 unsigned int cpos, 50 struct ocfs2_extent_map_item **ret_emi) 51 { 52 unsigned int range; 53 struct ocfs2_extent_map_item *emi; 54 55 *ret_emi = NULL; 56 57 list_for_each_entry(emi, &em->em_list, ei_list) { 58 range = emi->ei_cpos + emi->ei_clusters; 59 60 if (cpos >= emi->ei_cpos && cpos < range) { 61 list_move(&emi->ei_list, &em->em_list); 62 63 *ret_emi = emi; 64 break; 65 } 66 } 67 } 68 69 static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos, 70 unsigned int *phys, unsigned int *len, 71 unsigned int *flags) 72 { 73 unsigned int coff; 74 struct ocfs2_inode_info *oi = OCFS2_I(inode); 75 struct ocfs2_extent_map_item *emi; 76 77 spin_lock(&oi->ip_lock); 78 79 __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi); 80 if (emi) { 81 coff = cpos - emi->ei_cpos; 82 *phys = emi->ei_phys + coff; 83 if (len) 84 *len = emi->ei_clusters - coff; 85 if (flags) 86 *flags = emi->ei_flags; 87 } 88 89 spin_unlock(&oi->ip_lock); 90 91 if (emi == NULL) 92 return -ENOENT; 93 94 return 0; 95 } 96 97 /* 98 * Forget about all clusters equal to or greater than cpos. 99 */ 100 void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) 101 { 102 struct ocfs2_extent_map_item *emi, *n; 103 struct ocfs2_inode_info *oi = OCFS2_I(inode); 104 struct ocfs2_extent_map *em = &oi->ip_extent_map; 105 LIST_HEAD(tmp_list); 106 unsigned int range; 107 108 spin_lock(&oi->ip_lock); 109 list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { 110 if (emi->ei_cpos >= cpos) { 111 /* Full truncate of this record. */ 112 list_move(&emi->ei_list, &tmp_list); 113 BUG_ON(em->em_num_items == 0); 114 em->em_num_items--; 115 continue; 116 } 117 118 range = emi->ei_cpos + emi->ei_clusters; 119 if (range > cpos) { 120 /* Partial truncate */ 121 emi->ei_clusters = cpos - emi->ei_cpos; 122 } 123 } 124 spin_unlock(&oi->ip_lock); 125 126 list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { 127 list_del(&emi->ei_list); 128 kfree(emi); 129 } 130 } 131 132 /* 133 * Is any part of emi2 contained within emi1 134 */ 135 static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1, 136 struct ocfs2_extent_map_item *emi2) 137 { 138 unsigned int range1, range2; 139 140 /* 141 * Check if logical start of emi2 is inside emi1 142 */ 143 range1 = emi1->ei_cpos + emi1->ei_clusters; 144 if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1) 145 return 1; 146 147 /* 148 * Check if logical end of emi2 is inside emi1 149 */ 150 range2 = emi2->ei_cpos + emi2->ei_clusters; 151 if (range2 > emi1->ei_cpos && range2 <= range1) 152 return 1; 153 154 return 0; 155 } 156 157 static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest, 158 struct ocfs2_extent_map_item *src) 159 { 160 dest->ei_cpos = src->ei_cpos; 161 dest->ei_phys = src->ei_phys; 162 dest->ei_clusters = src->ei_clusters; 163 dest->ei_flags = src->ei_flags; 164 } 165 166 /* 167 * Try to merge emi with ins. Returns 1 if merge succeeds, zero 168 * otherwise. 169 */ 170 static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi, 171 struct ocfs2_extent_map_item *ins) 172 { 173 /* 174 * Handle contiguousness 175 */ 176 if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) && 177 ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) && 178 ins->ei_flags == emi->ei_flags) { 179 emi->ei_clusters += ins->ei_clusters; 180 return 1; 181 } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && 182 (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos && 183 ins->ei_flags == emi->ei_flags) { 184 emi->ei_phys = ins->ei_phys; 185 emi->ei_cpos = ins->ei_cpos; 186 emi->ei_clusters += ins->ei_clusters; 187 return 1; 188 } 189 190 /* 191 * Overlapping extents - this shouldn't happen unless we've 192 * split an extent to change it's flags. That is exceedingly 193 * rare, so there's no sense in trying to optimize it yet. 194 */ 195 if (ocfs2_ei_is_contained(emi, ins) || 196 ocfs2_ei_is_contained(ins, emi)) { 197 ocfs2_copy_emi_fields(emi, ins); 198 return 1; 199 } 200 201 /* No merge was possible. */ 202 return 0; 203 } 204 205 /* 206 * In order to reduce complexity on the caller, this insert function 207 * is intentionally liberal in what it will accept. 208 * 209 * The only rule is that the truncate call *must* be used whenever 210 * records have been deleted. This avoids inserting overlapping 211 * records with different physical mappings. 212 */ 213 void ocfs2_extent_map_insert_rec(struct inode *inode, 214 struct ocfs2_extent_rec *rec) 215 { 216 struct ocfs2_inode_info *oi = OCFS2_I(inode); 217 struct ocfs2_extent_map *em = &oi->ip_extent_map; 218 struct ocfs2_extent_map_item *emi, *new_emi = NULL; 219 struct ocfs2_extent_map_item ins; 220 221 ins.ei_cpos = le32_to_cpu(rec->e_cpos); 222 ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb, 223 le64_to_cpu(rec->e_blkno)); 224 ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters); 225 ins.ei_flags = rec->e_flags; 226 227 search: 228 spin_lock(&oi->ip_lock); 229 230 list_for_each_entry(emi, &em->em_list, ei_list) { 231 if (ocfs2_try_to_merge_extent_map(emi, &ins)) { 232 list_move(&emi->ei_list, &em->em_list); 233 spin_unlock(&oi->ip_lock); 234 goto out; 235 } 236 } 237 238 /* 239 * No item could be merged. 240 * 241 * Either allocate and add a new item, or overwrite the last recently 242 * inserted. 243 */ 244 245 if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) { 246 if (new_emi == NULL) { 247 spin_unlock(&oi->ip_lock); 248 249 new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS); 250 if (new_emi == NULL) 251 goto out; 252 253 goto search; 254 } 255 256 ocfs2_copy_emi_fields(new_emi, &ins); 257 list_add(&new_emi->ei_list, &em->em_list); 258 em->em_num_items++; 259 new_emi = NULL; 260 } else { 261 BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0); 262 emi = list_entry(em->em_list.prev, 263 struct ocfs2_extent_map_item, ei_list); 264 list_move(&emi->ei_list, &em->em_list); 265 ocfs2_copy_emi_fields(emi, &ins); 266 } 267 268 spin_unlock(&oi->ip_lock); 269 270 out: 271 kfree(new_emi); 272 } 273 274 static int ocfs2_last_eb_is_empty(struct inode *inode, 275 struct ocfs2_dinode *di) 276 { 277 int ret, next_free; 278 u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); 279 struct buffer_head *eb_bh = NULL; 280 struct ocfs2_extent_block *eb; 281 struct ocfs2_extent_list *el; 282 283 ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh); 284 if (ret) { 285 mlog_errno(ret); 286 goto out; 287 } 288 289 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 290 el = &eb->h_list; 291 292 if (el->l_tree_depth) { 293 ocfs2_error(inode->i_sb, 294 "Inode %lu has non zero tree depth in leaf block %llu\n", 295 inode->i_ino, 296 (unsigned long long)eb_bh->b_blocknr); 297 ret = -EROFS; 298 goto out; 299 } 300 301 next_free = le16_to_cpu(el->l_next_free_rec); 302 303 if (next_free == 0 || 304 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) 305 ret = 1; 306 307 out: 308 brelse(eb_bh); 309 return ret; 310 } 311 312 /* 313 * Return the 1st index within el which contains an extent start 314 * larger than v_cluster. 315 */ 316 static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el, 317 u32 v_cluster) 318 { 319 int i; 320 struct ocfs2_extent_rec *rec; 321 322 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 323 rec = &el->l_recs[i]; 324 325 if (v_cluster < le32_to_cpu(rec->e_cpos)) 326 break; 327 } 328 329 return i; 330 } 331 332 /* 333 * Figure out the size of a hole which starts at v_cluster within the given 334 * extent list. 335 * 336 * If there is no more allocation past v_cluster, we return the maximum 337 * cluster size minus v_cluster. 338 * 339 * If we have in-inode extents, then el points to the dinode list and 340 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block 341 * containing el. 342 */ 343 int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci, 344 struct ocfs2_extent_list *el, 345 struct buffer_head *eb_bh, 346 u32 v_cluster, 347 u32 *num_clusters) 348 { 349 int ret, i; 350 struct buffer_head *next_eb_bh = NULL; 351 struct ocfs2_extent_block *eb, *next_eb; 352 353 i = ocfs2_search_for_hole_index(el, v_cluster); 354 355 if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) { 356 eb = (struct ocfs2_extent_block *)eb_bh->b_data; 357 358 /* 359 * Check the next leaf for any extents. 360 */ 361 362 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 363 goto no_more_extents; 364 365 ret = ocfs2_read_extent_block(ci, 366 le64_to_cpu(eb->h_next_leaf_blk), 367 &next_eb_bh); 368 if (ret) { 369 mlog_errno(ret); 370 goto out; 371 } 372 373 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; 374 el = &next_eb->h_list; 375 i = ocfs2_search_for_hole_index(el, v_cluster); 376 } 377 378 no_more_extents: 379 if (i == le16_to_cpu(el->l_next_free_rec)) { 380 /* 381 * We're at the end of our existing allocation. Just 382 * return the maximum number of clusters we could 383 * possibly allocate. 384 */ 385 *num_clusters = UINT_MAX - v_cluster; 386 } else { 387 *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster; 388 } 389 390 ret = 0; 391 out: 392 brelse(next_eb_bh); 393 return ret; 394 } 395 396 static int ocfs2_get_clusters_nocache(struct inode *inode, 397 struct buffer_head *di_bh, 398 u32 v_cluster, unsigned int *hole_len, 399 struct ocfs2_extent_rec *ret_rec, 400 unsigned int *is_last) 401 { 402 int i, ret, tree_height, len; 403 struct ocfs2_dinode *di; 404 struct ocfs2_extent_block *eb; 405 struct ocfs2_extent_list *el; 406 struct ocfs2_extent_rec *rec; 407 struct buffer_head *eb_bh = NULL; 408 409 memset(ret_rec, 0, sizeof(*ret_rec)); 410 if (is_last) 411 *is_last = 0; 412 413 di = (struct ocfs2_dinode *) di_bh->b_data; 414 el = &di->id2.i_list; 415 tree_height = le16_to_cpu(el->l_tree_depth); 416 417 if (tree_height > 0) { 418 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 419 &eb_bh); 420 if (ret) { 421 mlog_errno(ret); 422 goto out; 423 } 424 425 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 426 el = &eb->h_list; 427 428 if (el->l_tree_depth) { 429 ocfs2_error(inode->i_sb, 430 "Inode %lu has non zero tree depth in leaf block %llu\n", 431 inode->i_ino, 432 (unsigned long long)eb_bh->b_blocknr); 433 ret = -EROFS; 434 goto out; 435 } 436 } 437 438 if (le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)) { 439 ocfs2_error(inode->i_sb, 440 "Inode %lu has an invalid extent (next_free_rec %u, count %u)\n", 441 inode->i_ino, 442 le16_to_cpu(el->l_next_free_rec), 443 le16_to_cpu(el->l_count)); 444 ret = -EROFS; 445 goto out; 446 } 447 448 i = ocfs2_search_extent_list(el, v_cluster); 449 if (i == -1) { 450 /* 451 * Holes can be larger than the maximum size of an 452 * extent, so we return their lengths in a separate 453 * field. 454 */ 455 if (hole_len) { 456 ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode), 457 el, eb_bh, 458 v_cluster, &len); 459 if (ret) { 460 mlog_errno(ret); 461 goto out; 462 } 463 464 *hole_len = len; 465 } 466 goto out_hole; 467 } 468 469 rec = &el->l_recs[i]; 470 471 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 472 473 if (!rec->e_blkno) { 474 ocfs2_error(inode->i_sb, 475 "Inode %lu has bad extent record (%u, %u, 0)\n", 476 inode->i_ino, 477 le32_to_cpu(rec->e_cpos), 478 ocfs2_rec_clusters(el, rec)); 479 ret = -EROFS; 480 goto out; 481 } 482 483 *ret_rec = *rec; 484 485 /* 486 * Checking for last extent is potentially expensive - we 487 * might have to look at the next leaf over to see if it's 488 * empty. 489 * 490 * The first two checks are to see whether the caller even 491 * cares for this information, and if the extent is at least 492 * the last in it's list. 493 * 494 * If those hold true, then the extent is last if any of the 495 * additional conditions hold true: 496 * - Extent list is in-inode 497 * - Extent list is right-most 498 * - Extent list is 2nd to rightmost, with empty right-most 499 */ 500 if (is_last) { 501 if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { 502 if (tree_height == 0) 503 *is_last = 1; 504 else if (eb->h_blkno == di->i_last_eb_blk) 505 *is_last = 1; 506 else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { 507 ret = ocfs2_last_eb_is_empty(inode, di); 508 if (ret < 0) { 509 mlog_errno(ret); 510 goto out; 511 } 512 if (ret == 1) 513 *is_last = 1; 514 } 515 } 516 } 517 518 out_hole: 519 ret = 0; 520 out: 521 brelse(eb_bh); 522 return ret; 523 } 524 525 static void ocfs2_relative_extent_offsets(struct super_block *sb, 526 u32 v_cluster, 527 struct ocfs2_extent_rec *rec, 528 u32 *p_cluster, u32 *num_clusters) 529 530 { 531 u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); 532 533 *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); 534 *p_cluster = *p_cluster + coff; 535 536 if (num_clusters) 537 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; 538 } 539 540 int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, 541 u32 *p_cluster, u32 *num_clusters, 542 struct ocfs2_extent_list *el, 543 unsigned int *extent_flags) 544 { 545 int ret = 0, i; 546 struct buffer_head *eb_bh = NULL; 547 struct ocfs2_extent_block *eb; 548 struct ocfs2_extent_rec *rec; 549 u32 coff; 550 551 if (el->l_tree_depth) { 552 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 553 &eb_bh); 554 if (ret) { 555 mlog_errno(ret); 556 goto out; 557 } 558 559 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 560 el = &eb->h_list; 561 562 if (el->l_tree_depth) { 563 ocfs2_error(inode->i_sb, 564 "Inode %lu has non zero tree depth in xattr leaf block %llu\n", 565 inode->i_ino, 566 (unsigned long long)eb_bh->b_blocknr); 567 ret = -EROFS; 568 goto out; 569 } 570 } 571 572 i = ocfs2_search_extent_list(el, v_cluster); 573 if (i == -1) { 574 ret = -EROFS; 575 mlog_errno(ret); 576 goto out; 577 } else { 578 rec = &el->l_recs[i]; 579 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 580 581 if (!rec->e_blkno) { 582 ocfs2_error(inode->i_sb, 583 "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 584 inode->i_ino, 585 le32_to_cpu(rec->e_cpos), 586 ocfs2_rec_clusters(el, rec)); 587 ret = -EROFS; 588 goto out; 589 } 590 coff = v_cluster - le32_to_cpu(rec->e_cpos); 591 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, 592 le64_to_cpu(rec->e_blkno)); 593 *p_cluster = *p_cluster + coff; 594 if (num_clusters) 595 *num_clusters = ocfs2_rec_clusters(el, rec) - coff; 596 597 if (extent_flags) 598 *extent_flags = rec->e_flags; 599 } 600 out: 601 brelse(eb_bh); 602 return ret; 603 } 604 605 int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, 606 u32 *p_cluster, u32 *num_clusters, 607 unsigned int *extent_flags) 608 { 609 int ret; 610 unsigned int hole_len, flags = 0; 611 struct buffer_head *di_bh = NULL; 612 struct ocfs2_extent_rec rec; 613 614 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 615 ret = -ERANGE; 616 mlog_errno(ret); 617 goto out; 618 } 619 620 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, 621 num_clusters, extent_flags); 622 if (ret == 0) 623 goto out; 624 625 ret = ocfs2_read_inode_block(inode, &di_bh); 626 if (ret) { 627 mlog_errno(ret); 628 goto out; 629 } 630 631 ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, 632 &rec, NULL); 633 if (ret) { 634 mlog_errno(ret); 635 goto out; 636 } 637 638 if (rec.e_blkno == 0ULL) { 639 /* 640 * A hole was found. Return some canned values that 641 * callers can key on. If asked for, num_clusters will 642 * be populated with the size of the hole. 643 */ 644 *p_cluster = 0; 645 if (num_clusters) { 646 *num_clusters = hole_len; 647 } 648 } else { 649 ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, 650 p_cluster, num_clusters); 651 flags = rec.e_flags; 652 653 ocfs2_extent_map_insert_rec(inode, &rec); 654 } 655 656 if (extent_flags) 657 *extent_flags = flags; 658 659 out: 660 brelse(di_bh); 661 return ret; 662 } 663 664 /* 665 * This expects alloc_sem to be held. The allocation cannot change at 666 * all while the map is in the process of being updated. 667 */ 668 int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, 669 u64 *ret_count, unsigned int *extent_flags) 670 { 671 int ret; 672 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 673 u32 cpos, num_clusters, p_cluster; 674 u64 boff = 0; 675 676 cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); 677 678 ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, 679 extent_flags); 680 if (ret) { 681 mlog_errno(ret); 682 goto out; 683 } 684 685 /* 686 * p_cluster == 0 indicates a hole. 687 */ 688 if (p_cluster) { 689 boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 690 boff += (v_blkno & (u64)(bpc - 1)); 691 } 692 693 *p_blkno = boff; 694 695 if (ret_count) { 696 *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 697 *ret_count -= v_blkno & (u64)(bpc - 1); 698 } 699 700 out: 701 return ret; 702 } 703 704 /* 705 * The ocfs2_fiemap_inline() may be a little bit misleading, since 706 * it not only handles the fiemap for inlined files, but also deals 707 * with the fast symlink, cause they have no difference for extent 708 * mapping per se. 709 */ 710 static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, 711 struct fiemap_extent_info *fieinfo, 712 u64 map_start) 713 { 714 int ret; 715 unsigned int id_count; 716 struct ocfs2_dinode *di; 717 u64 phys; 718 u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; 719 struct ocfs2_inode_info *oi = OCFS2_I(inode); 720 721 di = (struct ocfs2_dinode *)di_bh->b_data; 722 if (ocfs2_inode_is_fast_symlink(inode)) 723 id_count = ocfs2_fast_symlink_chars(inode->i_sb); 724 else 725 id_count = le16_to_cpu(di->id2.i_data.id_count); 726 727 if (map_start < id_count) { 728 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; 729 if (ocfs2_inode_is_fast_symlink(inode)) 730 phys += offsetof(struct ocfs2_dinode, id2.i_symlink); 731 else 732 phys += offsetof(struct ocfs2_dinode, 733 id2.i_data.id_data); 734 735 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, 736 flags); 737 if (ret < 0) 738 return ret; 739 } 740 741 return 0; 742 } 743 744 int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 745 u64 map_start, u64 map_len) 746 { 747 int ret, is_last; 748 u32 mapping_end, cpos; 749 unsigned int hole_size; 750 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 751 u64 len_bytes, phys_bytes, virt_bytes; 752 struct buffer_head *di_bh = NULL; 753 struct ocfs2_extent_rec rec; 754 755 ret = fiemap_prep(inode, fieinfo, map_start, &map_len, 0); 756 if (ret) 757 return ret; 758 759 ret = ocfs2_inode_lock(inode, &di_bh, 0); 760 if (ret) { 761 mlog_errno(ret); 762 goto out; 763 } 764 765 down_read(&OCFS2_I(inode)->ip_alloc_sem); 766 767 /* 768 * Handle inline-data and fast symlink separately. 769 */ 770 if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) || 771 ocfs2_inode_is_fast_symlink(inode)) { 772 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); 773 goto out_unlock; 774 } 775 776 cpos = map_start >> osb->s_clustersize_bits; 777 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 778 map_start + map_len); 779 is_last = 0; 780 while (cpos < mapping_end && !is_last) { 781 u32 fe_flags; 782 783 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 784 &hole_size, &rec, &is_last); 785 if (ret) { 786 mlog_errno(ret); 787 goto out_unlock; 788 } 789 790 if (rec.e_blkno == 0ULL) { 791 cpos += hole_size; 792 continue; 793 } 794 795 fe_flags = 0; 796 if (rec.e_flags & OCFS2_EXT_UNWRITTEN) 797 fe_flags |= FIEMAP_EXTENT_UNWRITTEN; 798 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 799 fe_flags |= FIEMAP_EXTENT_SHARED; 800 if (is_last) 801 fe_flags |= FIEMAP_EXTENT_LAST; 802 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; 803 phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; 804 virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; 805 806 ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, 807 len_bytes, fe_flags); 808 if (ret) 809 break; 810 811 cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); 812 } 813 814 if (ret > 0) 815 ret = 0; 816 817 out_unlock: 818 brelse(di_bh); 819 820 up_read(&OCFS2_I(inode)->ip_alloc_sem); 821 822 ocfs2_inode_unlock(inode, 0); 823 out: 824 825 return ret; 826 } 827 828 /* Is IO overwriting allocated blocks? */ 829 int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh, 830 u64 map_start, u64 map_len) 831 { 832 int ret = 0, is_last; 833 u32 mapping_end, cpos; 834 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 835 struct ocfs2_extent_rec rec; 836 837 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 838 if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len)) 839 return ret; 840 else 841 return -EAGAIN; 842 } 843 844 cpos = map_start >> osb->s_clustersize_bits; 845 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 846 map_start + map_len); 847 is_last = 0; 848 while (cpos < mapping_end && !is_last) { 849 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 850 NULL, &rec, &is_last); 851 if (ret) { 852 mlog_errno(ret); 853 goto out; 854 } 855 856 if (rec.e_blkno == 0ULL) 857 break; 858 859 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 860 break; 861 862 cpos = le32_to_cpu(rec.e_cpos) + 863 le16_to_cpu(rec.e_leaf_clusters); 864 } 865 866 if (cpos < mapping_end) 867 ret = -EAGAIN; 868 out: 869 return ret; 870 } 871 872 int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) 873 { 874 struct inode *inode = file->f_mapping->host; 875 int ret; 876 unsigned int is_last = 0, is_data = 0; 877 u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; 878 u32 cpos, cend, clen, hole_size; 879 u64 extoff, extlen; 880 struct buffer_head *di_bh = NULL; 881 struct ocfs2_extent_rec rec; 882 883 BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE); 884 885 ret = ocfs2_inode_lock(inode, &di_bh, 0); 886 if (ret) { 887 mlog_errno(ret); 888 goto out; 889 } 890 891 down_read(&OCFS2_I(inode)->ip_alloc_sem); 892 893 if (*offset >= i_size_read(inode)) { 894 ret = -ENXIO; 895 goto out_unlock; 896 } 897 898 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 899 if (whence == SEEK_HOLE) 900 *offset = i_size_read(inode); 901 goto out_unlock; 902 } 903 904 clen = 0; 905 cpos = *offset >> cs_bits; 906 cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); 907 908 while (cpos < cend && !is_last) { 909 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, 910 &rec, &is_last); 911 if (ret) { 912 mlog_errno(ret); 913 goto out_unlock; 914 } 915 916 extoff = cpos; 917 extoff <<= cs_bits; 918 919 if (rec.e_blkno == 0ULL) { 920 clen = hole_size; 921 is_data = 0; 922 } else { 923 clen = le16_to_cpu(rec.e_leaf_clusters) - 924 (cpos - le32_to_cpu(rec.e_cpos)); 925 is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; 926 } 927 928 if ((!is_data && whence == SEEK_HOLE) || 929 (is_data && whence == SEEK_DATA)) { 930 if (extoff > *offset) 931 *offset = extoff; 932 goto out_unlock; 933 } 934 935 if (!is_last) 936 cpos += clen; 937 } 938 939 if (whence == SEEK_HOLE) { 940 extoff = cpos; 941 extoff <<= cs_bits; 942 extlen = clen; 943 extlen <<= cs_bits; 944 945 if ((extoff + extlen) > i_size_read(inode)) 946 extlen = i_size_read(inode) - extoff; 947 extoff += extlen; 948 if (extoff > *offset) 949 *offset = extoff; 950 goto out_unlock; 951 } 952 953 ret = -ENXIO; 954 955 out_unlock: 956 957 brelse(di_bh); 958 959 up_read(&OCFS2_I(inode)->ip_alloc_sem); 960 961 ocfs2_inode_unlock(inode, 0); 962 out: 963 return ret; 964 } 965 966 int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, 967 struct buffer_head *bhs[], int flags, 968 int (*validate)(struct super_block *sb, 969 struct buffer_head *bh)) 970 { 971 int rc = 0; 972 u64 p_block, p_count; 973 int i, count, done = 0; 974 975 trace_ocfs2_read_virt_blocks( 976 inode, (unsigned long long)v_block, nr, bhs, flags, 977 validate); 978 979 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= 980 i_size_read(inode)) { 981 BUG_ON(!(flags & OCFS2_BH_READAHEAD)); 982 goto out; 983 } 984 985 while (done < nr) { 986 if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) { 987 rc = -EAGAIN; 988 mlog(ML_ERROR, 989 "Inode #%llu ip_alloc_sem is temporarily unavailable\n", 990 (unsigned long long)OCFS2_I(inode)->ip_blkno); 991 break; 992 } 993 rc = ocfs2_extent_map_get_blocks(inode, v_block + done, 994 &p_block, &p_count, NULL); 995 up_read(&OCFS2_I(inode)->ip_alloc_sem); 996 if (rc) { 997 mlog_errno(rc); 998 break; 999 } 1000 1001 if (!p_block) { 1002 rc = -EIO; 1003 mlog(ML_ERROR, 1004 "Inode #%llu contains a hole at offset %llu\n", 1005 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1006 (unsigned long long)(v_block + done) << 1007 inode->i_sb->s_blocksize_bits); 1008 break; 1009 } 1010 1011 count = nr - done; 1012 if (p_count < count) 1013 count = p_count; 1014 1015 /* 1016 * If the caller passed us bhs, they should have come 1017 * from a previous readahead call to this function. Thus, 1018 * they should have the right b_blocknr. 1019 */ 1020 for (i = 0; i < count; i++) { 1021 if (!bhs[done + i]) 1022 continue; 1023 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); 1024 } 1025 1026 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count, 1027 bhs + done, flags, validate); 1028 if (rc) { 1029 mlog_errno(rc); 1030 break; 1031 } 1032 done += count; 1033 } 1034 1035 out: 1036 return rc; 1037 } 1038 1039 1040