1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * extent_map.c 4 * 5 * Block/Cluster mapping functions 6 * 7 * Copyright (C) 2004 Oracle. All rights reserved. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/init.h> 12 #include <linux/slab.h> 13 #include <linux/types.h> 14 #include <linux/fiemap.h> 15 16 #include <cluster/masklog.h> 17 18 #include "ocfs2.h" 19 20 #include "alloc.h" 21 #include "dlmglue.h" 22 #include "extent_map.h" 23 #include "inode.h" 24 #include "super.h" 25 #include "symlink.h" 26 #include "aops.h" 27 #include "ocfs2_trace.h" 28 29 #include "buffer_head_io.h" 30 31 /* 32 * The extent caching implementation is intentionally trivial. 33 * 34 * We only cache a small number of extents stored directly on the 35 * inode, so linear order operations are acceptable. If we ever want 36 * to increase the size of the extent map, then these algorithms must 37 * get smarter. 38 */ 39 40 void ocfs2_extent_map_init(struct inode *inode) 41 { 42 struct ocfs2_inode_info *oi = OCFS2_I(inode); 43 44 oi->ip_extent_map.em_num_items = 0; 45 INIT_LIST_HEAD(&oi->ip_extent_map.em_list); 46 } 47 48 static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em, 49 unsigned int cpos, 50 struct ocfs2_extent_map_item **ret_emi) 51 { 52 unsigned int range; 53 struct ocfs2_extent_map_item *emi; 54 55 *ret_emi = NULL; 56 57 list_for_each_entry(emi, &em->em_list, ei_list) { 58 range = emi->ei_cpos + emi->ei_clusters; 59 60 if (cpos >= emi->ei_cpos && cpos < range) { 61 list_move(&emi->ei_list, &em->em_list); 62 63 *ret_emi = emi; 64 break; 65 } 66 } 67 } 68 69 static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos, 70 unsigned int *phys, unsigned int *len, 71 unsigned int *flags) 72 { 73 unsigned int coff; 74 struct ocfs2_inode_info *oi = OCFS2_I(inode); 75 struct ocfs2_extent_map_item *emi; 76 77 spin_lock(&oi->ip_lock); 78 79 __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi); 80 if (emi) { 81 coff = cpos - emi->ei_cpos; 82 *phys = emi->ei_phys + coff; 83 if (len) 84 *len = emi->ei_clusters - coff; 85 if (flags) 86 *flags = emi->ei_flags; 87 } 88 89 spin_unlock(&oi->ip_lock); 90 91 if (emi == NULL) 92 return -ENOENT; 93 94 return 0; 95 } 96 97 /* 98 * Forget about all clusters equal to or greater than cpos. 99 */ 100 void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) 101 { 102 struct ocfs2_extent_map_item *emi, *n; 103 struct ocfs2_inode_info *oi = OCFS2_I(inode); 104 struct ocfs2_extent_map *em = &oi->ip_extent_map; 105 LIST_HEAD(tmp_list); 106 unsigned int range; 107 108 spin_lock(&oi->ip_lock); 109 list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { 110 if (emi->ei_cpos >= cpos) { 111 /* Full truncate of this record. */ 112 list_move(&emi->ei_list, &tmp_list); 113 BUG_ON(em->em_num_items == 0); 114 em->em_num_items--; 115 continue; 116 } 117 118 range = emi->ei_cpos + emi->ei_clusters; 119 if (range > cpos) { 120 /* Partial truncate */ 121 emi->ei_clusters = cpos - emi->ei_cpos; 122 } 123 } 124 spin_unlock(&oi->ip_lock); 125 126 list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { 127 list_del(&emi->ei_list); 128 kfree(emi); 129 } 130 } 131 132 /* 133 * Is any part of emi2 contained within emi1 134 */ 135 static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1, 136 struct ocfs2_extent_map_item *emi2) 137 { 138 unsigned int range1, range2; 139 140 /* 141 * Check if logical start of emi2 is inside emi1 142 */ 143 range1 = emi1->ei_cpos + emi1->ei_clusters; 144 if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1) 145 return 1; 146 147 /* 148 * Check if logical end of emi2 is inside emi1 149 */ 150 range2 = emi2->ei_cpos + emi2->ei_clusters; 151 if (range2 > emi1->ei_cpos && range2 <= range1) 152 return 1; 153 154 return 0; 155 } 156 157 static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest, 158 struct ocfs2_extent_map_item *src) 159 { 160 dest->ei_cpos = src->ei_cpos; 161 dest->ei_phys = src->ei_phys; 162 dest->ei_clusters = src->ei_clusters; 163 dest->ei_flags = src->ei_flags; 164 } 165 166 /* 167 * Try to merge emi with ins. Returns 1 if merge succeeds, zero 168 * otherwise. 169 */ 170 static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi, 171 struct ocfs2_extent_map_item *ins) 172 { 173 /* 174 * Handle contiguousness 175 */ 176 if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) && 177 ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) && 178 ins->ei_flags == emi->ei_flags) { 179 emi->ei_clusters += ins->ei_clusters; 180 return 1; 181 } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && 182 (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos && 183 ins->ei_flags == emi->ei_flags) { 184 emi->ei_phys = ins->ei_phys; 185 emi->ei_cpos = ins->ei_cpos; 186 emi->ei_clusters += ins->ei_clusters; 187 return 1; 188 } 189 190 /* 191 * Overlapping extents - this shouldn't happen unless we've 192 * split an extent to change it's flags. That is exceedingly 193 * rare, so there's no sense in trying to optimize it yet. 194 */ 195 if (ocfs2_ei_is_contained(emi, ins) || 196 ocfs2_ei_is_contained(ins, emi)) { 197 ocfs2_copy_emi_fields(emi, ins); 198 return 1; 199 } 200 201 /* No merge was possible. */ 202 return 0; 203 } 204 205 /* 206 * In order to reduce complexity on the caller, this insert function 207 * is intentionally liberal in what it will accept. 208 * 209 * The only rule is that the truncate call *must* be used whenever 210 * records have been deleted. This avoids inserting overlapping 211 * records with different physical mappings. 212 */ 213 void ocfs2_extent_map_insert_rec(struct inode *inode, 214 struct ocfs2_extent_rec *rec) 215 { 216 struct ocfs2_inode_info *oi = OCFS2_I(inode); 217 struct ocfs2_extent_map *em = &oi->ip_extent_map; 218 struct ocfs2_extent_map_item *emi, *new_emi = NULL; 219 struct ocfs2_extent_map_item ins; 220 221 ins.ei_cpos = le32_to_cpu(rec->e_cpos); 222 ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb, 223 le64_to_cpu(rec->e_blkno)); 224 ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters); 225 ins.ei_flags = rec->e_flags; 226 227 search: 228 spin_lock(&oi->ip_lock); 229 230 list_for_each_entry(emi, &em->em_list, ei_list) { 231 if (ocfs2_try_to_merge_extent_map(emi, &ins)) { 232 list_move(&emi->ei_list, &em->em_list); 233 spin_unlock(&oi->ip_lock); 234 goto out; 235 } 236 } 237 238 /* 239 * No item could be merged. 240 * 241 * Either allocate and add a new item, or overwrite the last recently 242 * inserted. 243 */ 244 245 if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) { 246 if (new_emi == NULL) { 247 spin_unlock(&oi->ip_lock); 248 249 new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS); 250 if (new_emi == NULL) 251 goto out; 252 253 goto search; 254 } 255 256 ocfs2_copy_emi_fields(new_emi, &ins); 257 list_add(&new_emi->ei_list, &em->em_list); 258 em->em_num_items++; 259 new_emi = NULL; 260 } else { 261 BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0); 262 emi = list_entry(em->em_list.prev, 263 struct ocfs2_extent_map_item, ei_list); 264 list_move(&emi->ei_list, &em->em_list); 265 ocfs2_copy_emi_fields(emi, &ins); 266 } 267 268 spin_unlock(&oi->ip_lock); 269 270 out: 271 kfree(new_emi); 272 } 273 274 static int ocfs2_last_eb_is_empty(struct inode *inode, 275 struct ocfs2_dinode *di) 276 { 277 int ret, next_free; 278 u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); 279 struct buffer_head *eb_bh = NULL; 280 struct ocfs2_extent_block *eb; 281 struct ocfs2_extent_list *el; 282 283 ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh); 284 if (ret) { 285 mlog_errno(ret); 286 goto out; 287 } 288 289 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 290 el = &eb->h_list; 291 292 if (el->l_tree_depth) { 293 ocfs2_error(inode->i_sb, 294 "Inode %lu has non zero tree depth in leaf block %llu\n", 295 inode->i_ino, 296 (unsigned long long)eb_bh->b_blocknr); 297 ret = -EROFS; 298 goto out; 299 } 300 301 next_free = le16_to_cpu(el->l_next_free_rec); 302 303 if (next_free == 0 || 304 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) 305 ret = 1; 306 307 out: 308 brelse(eb_bh); 309 return ret; 310 } 311 312 /* 313 * Return the 1st index within el which contains an extent start 314 * larger than v_cluster. 315 */ 316 static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el, 317 u32 v_cluster) 318 { 319 int i; 320 struct ocfs2_extent_rec *rec; 321 322 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 323 rec = &el->l_recs[i]; 324 325 if (v_cluster < le32_to_cpu(rec->e_cpos)) 326 break; 327 } 328 329 return i; 330 } 331 332 /* 333 * Figure out the size of a hole which starts at v_cluster within the given 334 * extent list. 335 * 336 * If there is no more allocation past v_cluster, we return the maximum 337 * cluster size minus v_cluster. 338 * 339 * If we have in-inode extents, then el points to the dinode list and 340 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block 341 * containing el. 342 */ 343 int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci, 344 struct ocfs2_extent_list *el, 345 struct buffer_head *eb_bh, 346 u32 v_cluster, 347 u32 *num_clusters) 348 { 349 int ret, i; 350 struct buffer_head *next_eb_bh = NULL; 351 struct ocfs2_extent_block *eb, *next_eb; 352 353 i = ocfs2_search_for_hole_index(el, v_cluster); 354 355 if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) { 356 eb = (struct ocfs2_extent_block *)eb_bh->b_data; 357 358 /* 359 * Check the next leaf for any extents. 360 */ 361 362 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 363 goto no_more_extents; 364 365 ret = ocfs2_read_extent_block(ci, 366 le64_to_cpu(eb->h_next_leaf_blk), 367 &next_eb_bh); 368 if (ret) { 369 mlog_errno(ret); 370 goto out; 371 } 372 373 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; 374 el = &next_eb->h_list; 375 i = ocfs2_search_for_hole_index(el, v_cluster); 376 } 377 378 no_more_extents: 379 if (i == le16_to_cpu(el->l_next_free_rec)) { 380 /* 381 * We're at the end of our existing allocation. Just 382 * return the maximum number of clusters we could 383 * possibly allocate. 384 */ 385 *num_clusters = UINT_MAX - v_cluster; 386 } else { 387 *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster; 388 } 389 390 ret = 0; 391 out: 392 brelse(next_eb_bh); 393 return ret; 394 } 395 396 static int ocfs2_get_clusters_nocache(struct inode *inode, 397 struct buffer_head *di_bh, 398 u32 v_cluster, unsigned int *hole_len, 399 struct ocfs2_extent_rec *ret_rec, 400 unsigned int *is_last) 401 { 402 int i, ret, tree_height, len; 403 struct ocfs2_dinode *di; 404 struct ocfs2_extent_block *eb; 405 struct ocfs2_extent_list *el; 406 struct ocfs2_extent_rec *rec; 407 struct buffer_head *eb_bh = NULL; 408 409 memset(ret_rec, 0, sizeof(*ret_rec)); 410 if (is_last) 411 *is_last = 0; 412 413 di = (struct ocfs2_dinode *) di_bh->b_data; 414 el = &di->id2.i_list; 415 tree_height = le16_to_cpu(el->l_tree_depth); 416 417 if (tree_height > 0) { 418 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 419 &eb_bh); 420 if (ret) { 421 mlog_errno(ret); 422 goto out; 423 } 424 425 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 426 el = &eb->h_list; 427 428 if (el->l_tree_depth) { 429 ocfs2_error(inode->i_sb, 430 "Inode %lu has non zero tree depth in leaf block %llu\n", 431 inode->i_ino, 432 (unsigned long long)eb_bh->b_blocknr); 433 ret = -EROFS; 434 goto out; 435 } 436 } 437 438 if (le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)) { 439 ocfs2_error(inode->i_sb, 440 "Inode %lu has an invalid extent (next_free_rec %u, count %u)\n", 441 inode->i_ino, 442 le16_to_cpu(el->l_next_free_rec), 443 le16_to_cpu(el->l_count)); 444 ret = -EROFS; 445 goto out; 446 } 447 448 i = ocfs2_search_extent_list(el, v_cluster); 449 if (i == -1) { 450 /* 451 * Holes can be larger than the maximum size of an 452 * extent, so we return their lengths in a separate 453 * field. 454 */ 455 if (hole_len) { 456 ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode), 457 el, eb_bh, 458 v_cluster, &len); 459 if (ret) { 460 mlog_errno(ret); 461 goto out; 462 } 463 464 *hole_len = len; 465 } 466 goto out_hole; 467 } 468 469 rec = &el->l_recs[i]; 470 471 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 472 473 if (!rec->e_blkno) { 474 ocfs2_error(inode->i_sb, 475 "Inode %lu has bad extent record (%u, %u, 0)\n", 476 inode->i_ino, 477 le32_to_cpu(rec->e_cpos), 478 ocfs2_rec_clusters(el, rec)); 479 ret = -EROFS; 480 goto out; 481 } 482 483 *ret_rec = *rec; 484 485 /* 486 * Checking for last extent is potentially expensive - we 487 * might have to look at the next leaf over to see if it's 488 * empty. 489 * 490 * The first two checks are to see whether the caller even 491 * cares for this information, and if the extent is at least 492 * the last in it's list. 493 * 494 * If those hold true, then the extent is last if any of the 495 * additional conditions hold true: 496 * - Extent list is in-inode 497 * - Extent list is right-most 498 * - Extent list is 2nd to rightmost, with empty right-most 499 */ 500 if (is_last) { 501 if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { 502 if (tree_height == 0) 503 *is_last = 1; 504 else if (eb->h_blkno == di->i_last_eb_blk) 505 *is_last = 1; 506 else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { 507 ret = ocfs2_last_eb_is_empty(inode, di); 508 if (ret < 0) { 509 mlog_errno(ret); 510 goto out; 511 } 512 if (ret == 1) 513 *is_last = 1; 514 } 515 } 516 } 517 518 out_hole: 519 ret = 0; 520 out: 521 brelse(eb_bh); 522 return ret; 523 } 524 525 static void ocfs2_relative_extent_offsets(struct super_block *sb, 526 u32 v_cluster, 527 struct ocfs2_extent_rec *rec, 528 u32 *p_cluster, u32 *num_clusters) 529 530 { 531 u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); 532 533 *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); 534 *p_cluster = *p_cluster + coff; 535 536 if (num_clusters) 537 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; 538 } 539 540 int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, 541 u32 *p_cluster, u32 *num_clusters, 542 struct ocfs2_extent_list *el, 543 unsigned int *extent_flags) 544 { 545 int ret = 0, i; 546 struct buffer_head *eb_bh = NULL; 547 struct ocfs2_extent_block *eb; 548 struct ocfs2_extent_rec *rec; 549 u32 coff; 550 551 if (el->l_tree_depth) { 552 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 553 &eb_bh); 554 if (ret) { 555 mlog_errno(ret); 556 goto out; 557 } 558 559 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 560 el = &eb->h_list; 561 562 if (el->l_tree_depth) { 563 ocfs2_error(inode->i_sb, 564 "Inode %lu has non zero tree depth in xattr leaf block %llu\n", 565 inode->i_ino, 566 (unsigned long long)eb_bh->b_blocknr); 567 ret = -EROFS; 568 goto out; 569 } 570 } 571 572 i = ocfs2_search_extent_list(el, v_cluster); 573 if (i == -1) { 574 ret = -EROFS; 575 mlog_errno(ret); 576 goto out; 577 } else { 578 rec = &el->l_recs[i]; 579 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 580 581 if (!rec->e_blkno) { 582 ocfs2_error(inode->i_sb, 583 "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 584 inode->i_ino, 585 le32_to_cpu(rec->e_cpos), 586 ocfs2_rec_clusters(el, rec)); 587 ret = -EROFS; 588 goto out; 589 } 590 coff = v_cluster - le32_to_cpu(rec->e_cpos); 591 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, 592 le64_to_cpu(rec->e_blkno)); 593 *p_cluster = *p_cluster + coff; 594 if (num_clusters) 595 *num_clusters = ocfs2_rec_clusters(el, rec) - coff; 596 597 if (extent_flags) 598 *extent_flags = rec->e_flags; 599 } 600 out: 601 brelse(eb_bh); 602 return ret; 603 } 604 605 int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, 606 u32 *p_cluster, u32 *num_clusters, 607 unsigned int *extent_flags) 608 { 609 int ret; 610 unsigned int hole_len, flags = 0; 611 struct buffer_head *di_bh = NULL; 612 struct ocfs2_extent_rec rec; 613 614 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 615 ret = -ERANGE; 616 mlog_errno(ret); 617 goto out; 618 } 619 620 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, 621 num_clusters, extent_flags); 622 if (ret == 0) 623 goto out; 624 625 ret = ocfs2_read_inode_block(inode, &di_bh); 626 if (ret) { 627 mlog_errno(ret); 628 goto out; 629 } 630 631 ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, 632 &rec, NULL); 633 if (ret) { 634 mlog_errno(ret); 635 goto out; 636 } 637 638 if (rec.e_blkno == 0ULL) { 639 /* 640 * A hole was found. Return some canned values that 641 * callers can key on. If asked for, num_clusters will 642 * be populated with the size of the hole. 643 */ 644 *p_cluster = 0; 645 if (num_clusters) { 646 *num_clusters = hole_len; 647 } 648 } else { 649 ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, 650 p_cluster, num_clusters); 651 flags = rec.e_flags; 652 653 ocfs2_extent_map_insert_rec(inode, &rec); 654 } 655 656 if (extent_flags) 657 *extent_flags = flags; 658 659 out: 660 brelse(di_bh); 661 return ret; 662 } 663 664 /* 665 * This expects alloc_sem to be held. The allocation cannot change at 666 * all while the map is in the process of being updated. 667 */ 668 int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, 669 u64 *ret_count, unsigned int *extent_flags) 670 { 671 int ret; 672 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 673 u32 cpos, num_clusters, p_cluster; 674 u64 boff = 0; 675 676 cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); 677 678 ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, 679 extent_flags); 680 if (ret) { 681 mlog_errno(ret); 682 goto out; 683 } 684 685 /* 686 * p_cluster == 0 indicates a hole. 687 */ 688 if (p_cluster) { 689 boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 690 boff += (v_blkno & (u64)(bpc - 1)); 691 } 692 693 *p_blkno = boff; 694 695 if (ret_count) { 696 *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 697 *ret_count -= v_blkno & (u64)(bpc - 1); 698 } 699 700 out: 701 return ret; 702 } 703 704 /* 705 * The ocfs2_fiemap_inline() may be a little bit misleading, since 706 * it not only handles the fiemap for inlined files, but also deals 707 * with the fast symlink, cause they have no difference for extent 708 * mapping per se. 709 * 710 * Must be called with ip_alloc_sem semaphore held. 711 */ 712 static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, 713 struct fiemap_extent_info *fieinfo, 714 u64 map_start) 715 { 716 int ret; 717 unsigned int id_count; 718 struct ocfs2_dinode *di; 719 u64 phys; 720 u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; 721 struct ocfs2_inode_info *oi = OCFS2_I(inode); 722 lockdep_assert_held_read(&oi->ip_alloc_sem); 723 724 di = (struct ocfs2_dinode *)di_bh->b_data; 725 if (ocfs2_inode_is_fast_symlink(inode)) 726 id_count = ocfs2_fast_symlink_chars(inode->i_sb); 727 else 728 id_count = le16_to_cpu(di->id2.i_data.id_count); 729 730 if (map_start < id_count) { 731 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; 732 if (ocfs2_inode_is_fast_symlink(inode)) 733 phys += offsetof(struct ocfs2_dinode, id2.i_symlink); 734 else 735 phys += offsetof(struct ocfs2_dinode, 736 id2.i_data.id_data); 737 738 /* Release the ip_alloc_sem to prevent deadlock on page fault */ 739 up_read(&OCFS2_I(inode)->ip_alloc_sem); 740 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, 741 flags); 742 down_read(&OCFS2_I(inode)->ip_alloc_sem); 743 if (ret < 0) 744 return ret; 745 } 746 747 return 0; 748 } 749 750 int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 751 u64 map_start, u64 map_len) 752 { 753 int ret, is_last; 754 u32 mapping_end, cpos; 755 unsigned int hole_size; 756 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 757 u64 len_bytes, phys_bytes, virt_bytes; 758 struct buffer_head *di_bh = NULL; 759 struct ocfs2_extent_rec rec; 760 761 ret = fiemap_prep(inode, fieinfo, map_start, &map_len, 0); 762 if (ret) 763 return ret; 764 765 ret = ocfs2_inode_lock(inode, &di_bh, 0); 766 if (ret) { 767 mlog_errno(ret); 768 goto out; 769 } 770 771 down_read(&OCFS2_I(inode)->ip_alloc_sem); 772 773 /* 774 * Handle inline-data and fast symlink separately. 775 */ 776 if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) || 777 ocfs2_inode_is_fast_symlink(inode)) { 778 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); 779 goto out_unlock; 780 } 781 782 cpos = map_start >> osb->s_clustersize_bits; 783 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 784 map_start + map_len); 785 is_last = 0; 786 while (cpos < mapping_end && !is_last) { 787 u32 fe_flags; 788 789 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 790 &hole_size, &rec, &is_last); 791 if (ret) { 792 mlog_errno(ret); 793 goto out_unlock; 794 } 795 796 if (rec.e_blkno == 0ULL) { 797 cpos += hole_size; 798 continue; 799 } 800 801 fe_flags = 0; 802 if (rec.e_flags & OCFS2_EXT_UNWRITTEN) 803 fe_flags |= FIEMAP_EXTENT_UNWRITTEN; 804 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 805 fe_flags |= FIEMAP_EXTENT_SHARED; 806 if (is_last) 807 fe_flags |= FIEMAP_EXTENT_LAST; 808 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; 809 phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; 810 virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; 811 /* Release the ip_alloc_sem to prevent deadlock on page fault */ 812 up_read(&OCFS2_I(inode)->ip_alloc_sem); 813 ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, 814 len_bytes, fe_flags); 815 down_read(&OCFS2_I(inode)->ip_alloc_sem); 816 if (ret) 817 break; 818 819 cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); 820 } 821 822 if (ret > 0) 823 ret = 0; 824 825 out_unlock: 826 brelse(di_bh); 827 828 up_read(&OCFS2_I(inode)->ip_alloc_sem); 829 830 ocfs2_inode_unlock(inode, 0); 831 out: 832 833 return ret; 834 } 835 836 /* Is IO overwriting allocated blocks? */ 837 int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh, 838 u64 map_start, u64 map_len) 839 { 840 int ret = 0, is_last; 841 u32 mapping_end, cpos; 842 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 843 struct ocfs2_extent_rec rec; 844 845 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 846 if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len)) 847 return ret; 848 else 849 return -EAGAIN; 850 } 851 852 cpos = map_start >> osb->s_clustersize_bits; 853 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 854 map_start + map_len); 855 is_last = 0; 856 while (cpos < mapping_end && !is_last) { 857 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 858 NULL, &rec, &is_last); 859 if (ret) { 860 mlog_errno(ret); 861 goto out; 862 } 863 864 if (rec.e_blkno == 0ULL) 865 break; 866 867 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 868 break; 869 870 cpos = le32_to_cpu(rec.e_cpos) + 871 le16_to_cpu(rec.e_leaf_clusters); 872 } 873 874 if (cpos < mapping_end) 875 ret = -EAGAIN; 876 out: 877 return ret; 878 } 879 880 int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) 881 { 882 struct inode *inode = file->f_mapping->host; 883 int ret; 884 unsigned int is_last = 0, is_data = 0; 885 u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; 886 u32 cpos, cend, clen, hole_size; 887 u64 extoff, extlen; 888 struct buffer_head *di_bh = NULL; 889 struct ocfs2_extent_rec rec; 890 891 BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE); 892 893 ret = ocfs2_inode_lock(inode, &di_bh, 0); 894 if (ret) { 895 mlog_errno(ret); 896 goto out; 897 } 898 899 down_read(&OCFS2_I(inode)->ip_alloc_sem); 900 901 if (*offset >= i_size_read(inode)) { 902 ret = -ENXIO; 903 goto out_unlock; 904 } 905 906 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 907 if (whence == SEEK_HOLE) 908 *offset = i_size_read(inode); 909 goto out_unlock; 910 } 911 912 clen = 0; 913 cpos = *offset >> cs_bits; 914 cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); 915 916 while (cpos < cend && !is_last) { 917 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, 918 &rec, &is_last); 919 if (ret) { 920 mlog_errno(ret); 921 goto out_unlock; 922 } 923 924 extoff = cpos; 925 extoff <<= cs_bits; 926 927 if (rec.e_blkno == 0ULL) { 928 clen = hole_size; 929 is_data = 0; 930 } else { 931 clen = le16_to_cpu(rec.e_leaf_clusters) - 932 (cpos - le32_to_cpu(rec.e_cpos)); 933 is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; 934 } 935 936 if ((!is_data && whence == SEEK_HOLE) || 937 (is_data && whence == SEEK_DATA)) { 938 if (extoff > *offset) 939 *offset = extoff; 940 goto out_unlock; 941 } 942 943 if (!is_last) 944 cpos += clen; 945 } 946 947 if (whence == SEEK_HOLE) { 948 extoff = cpos; 949 extoff <<= cs_bits; 950 extlen = clen; 951 extlen <<= cs_bits; 952 953 if ((extoff + extlen) > i_size_read(inode)) 954 extlen = i_size_read(inode) - extoff; 955 extoff += extlen; 956 if (extoff > *offset) 957 *offset = extoff; 958 goto out_unlock; 959 } 960 961 ret = -ENXIO; 962 963 out_unlock: 964 965 brelse(di_bh); 966 967 up_read(&OCFS2_I(inode)->ip_alloc_sem); 968 969 ocfs2_inode_unlock(inode, 0); 970 out: 971 return ret; 972 } 973 974 int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, 975 struct buffer_head *bhs[], int flags, 976 int (*validate)(struct super_block *sb, 977 struct buffer_head *bh)) 978 { 979 int rc = 0; 980 u64 p_block, p_count; 981 int i, count, done = 0; 982 983 trace_ocfs2_read_virt_blocks( 984 inode, (unsigned long long)v_block, nr, bhs, flags, 985 validate); 986 987 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= 988 i_size_read(inode)) { 989 BUG_ON(!(flags & OCFS2_BH_READAHEAD)); 990 goto out; 991 } 992 993 while (done < nr) { 994 if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) { 995 rc = -EAGAIN; 996 mlog(ML_ERROR, 997 "Inode #%llu ip_alloc_sem is temporarily unavailable\n", 998 (unsigned long long)OCFS2_I(inode)->ip_blkno); 999 break; 1000 } 1001 rc = ocfs2_extent_map_get_blocks(inode, v_block + done, 1002 &p_block, &p_count, NULL); 1003 up_read(&OCFS2_I(inode)->ip_alloc_sem); 1004 if (rc) { 1005 mlog_errno(rc); 1006 break; 1007 } 1008 1009 if (!p_block) { 1010 rc = -EIO; 1011 mlog(ML_ERROR, 1012 "Inode #%llu contains a hole at offset %llu\n", 1013 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1014 (unsigned long long)(v_block + done) << 1015 inode->i_sb->s_blocksize_bits); 1016 break; 1017 } 1018 1019 count = nr - done; 1020 if (p_count < count) 1021 count = p_count; 1022 1023 /* 1024 * If the caller passed us bhs, they should have come 1025 * from a previous readahead call to this function. Thus, 1026 * they should have the right b_blocknr. 1027 */ 1028 for (i = 0; i < count; i++) { 1029 if (!bhs[done + i]) 1030 continue; 1031 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); 1032 } 1033 1034 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count, 1035 bhs + done, flags, validate); 1036 if (rc) { 1037 mlog_errno(rc); 1038 break; 1039 } 1040 done += count; 1041 } 1042 1043 out: 1044 return rc; 1045 } 1046 1047 1048