1 // SPDX-License-Identifier: GPL-2.0-only 2 /* -*- mode: c; c-basic-offset: 8; -*- 3 * vim: noexpandtab sw=8 ts=8 sts=0: 4 * 5 * extent_map.c 6 * 7 * Block/Cluster mapping functions 8 * 9 * Copyright (C) 2004 Oracle. All rights reserved. 10 */ 11 12 #include <linux/fs.h> 13 #include <linux/init.h> 14 #include <linux/slab.h> 15 #include <linux/types.h> 16 #include <linux/fiemap.h> 17 18 #include <cluster/masklog.h> 19 20 #include "ocfs2.h" 21 22 #include "alloc.h" 23 #include "dlmglue.h" 24 #include "extent_map.h" 25 #include "inode.h" 26 #include "super.h" 27 #include "symlink.h" 28 #include "aops.h" 29 #include "ocfs2_trace.h" 30 31 #include "buffer_head_io.h" 32 33 /* 34 * The extent caching implementation is intentionally trivial. 35 * 36 * We only cache a small number of extents stored directly on the 37 * inode, so linear order operations are acceptable. If we ever want 38 * to increase the size of the extent map, then these algorithms must 39 * get smarter. 40 */ 41 42 void ocfs2_extent_map_init(struct inode *inode) 43 { 44 struct ocfs2_inode_info *oi = OCFS2_I(inode); 45 46 oi->ip_extent_map.em_num_items = 0; 47 INIT_LIST_HEAD(&oi->ip_extent_map.em_list); 48 } 49 50 static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em, 51 unsigned int cpos, 52 struct ocfs2_extent_map_item **ret_emi) 53 { 54 unsigned int range; 55 struct ocfs2_extent_map_item *emi; 56 57 *ret_emi = NULL; 58 59 list_for_each_entry(emi, &em->em_list, ei_list) { 60 range = emi->ei_cpos + emi->ei_clusters; 61 62 if (cpos >= emi->ei_cpos && cpos < range) { 63 list_move(&emi->ei_list, &em->em_list); 64 65 *ret_emi = emi; 66 break; 67 } 68 } 69 } 70 71 static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos, 72 unsigned int *phys, unsigned int *len, 73 unsigned int *flags) 74 { 75 unsigned int coff; 76 struct ocfs2_inode_info *oi = OCFS2_I(inode); 77 struct ocfs2_extent_map_item *emi; 78 79 spin_lock(&oi->ip_lock); 80 81 __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi); 82 if (emi) { 83 coff = cpos - emi->ei_cpos; 84 *phys = emi->ei_phys + coff; 85 if (len) 86 *len = emi->ei_clusters - coff; 87 if (flags) 88 *flags = emi->ei_flags; 89 } 90 91 spin_unlock(&oi->ip_lock); 92 93 if (emi == NULL) 94 return -ENOENT; 95 96 return 0; 97 } 98 99 /* 100 * Forget about all clusters equal to or greater than cpos. 101 */ 102 void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) 103 { 104 struct ocfs2_extent_map_item *emi, *n; 105 struct ocfs2_inode_info *oi = OCFS2_I(inode); 106 struct ocfs2_extent_map *em = &oi->ip_extent_map; 107 LIST_HEAD(tmp_list); 108 unsigned int range; 109 110 spin_lock(&oi->ip_lock); 111 list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { 112 if (emi->ei_cpos >= cpos) { 113 /* Full truncate of this record. */ 114 list_move(&emi->ei_list, &tmp_list); 115 BUG_ON(em->em_num_items == 0); 116 em->em_num_items--; 117 continue; 118 } 119 120 range = emi->ei_cpos + emi->ei_clusters; 121 if (range > cpos) { 122 /* Partial truncate */ 123 emi->ei_clusters = cpos - emi->ei_cpos; 124 } 125 } 126 spin_unlock(&oi->ip_lock); 127 128 list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { 129 list_del(&emi->ei_list); 130 kfree(emi); 131 } 132 } 133 134 /* 135 * Is any part of emi2 contained within emi1 136 */ 137 static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1, 138 struct ocfs2_extent_map_item *emi2) 139 { 140 unsigned int range1, range2; 141 142 /* 143 * Check if logical start of emi2 is inside emi1 144 */ 145 range1 = emi1->ei_cpos + emi1->ei_clusters; 146 if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1) 147 return 1; 148 149 /* 150 * Check if logical end of emi2 is inside emi1 151 */ 152 range2 = emi2->ei_cpos + emi2->ei_clusters; 153 if (range2 > emi1->ei_cpos && range2 <= range1) 154 return 1; 155 156 return 0; 157 } 158 159 static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest, 160 struct ocfs2_extent_map_item *src) 161 { 162 dest->ei_cpos = src->ei_cpos; 163 dest->ei_phys = src->ei_phys; 164 dest->ei_clusters = src->ei_clusters; 165 dest->ei_flags = src->ei_flags; 166 } 167 168 /* 169 * Try to merge emi with ins. Returns 1 if merge succeeds, zero 170 * otherwise. 171 */ 172 static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi, 173 struct ocfs2_extent_map_item *ins) 174 { 175 /* 176 * Handle contiguousness 177 */ 178 if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) && 179 ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) && 180 ins->ei_flags == emi->ei_flags) { 181 emi->ei_clusters += ins->ei_clusters; 182 return 1; 183 } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && 184 (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos && 185 ins->ei_flags == emi->ei_flags) { 186 emi->ei_phys = ins->ei_phys; 187 emi->ei_cpos = ins->ei_cpos; 188 emi->ei_clusters += ins->ei_clusters; 189 return 1; 190 } 191 192 /* 193 * Overlapping extents - this shouldn't happen unless we've 194 * split an extent to change it's flags. That is exceedingly 195 * rare, so there's no sense in trying to optimize it yet. 196 */ 197 if (ocfs2_ei_is_contained(emi, ins) || 198 ocfs2_ei_is_contained(ins, emi)) { 199 ocfs2_copy_emi_fields(emi, ins); 200 return 1; 201 } 202 203 /* No merge was possible. */ 204 return 0; 205 } 206 207 /* 208 * In order to reduce complexity on the caller, this insert function 209 * is intentionally liberal in what it will accept. 210 * 211 * The only rule is that the truncate call *must* be used whenever 212 * records have been deleted. This avoids inserting overlapping 213 * records with different physical mappings. 214 */ 215 void ocfs2_extent_map_insert_rec(struct inode *inode, 216 struct ocfs2_extent_rec *rec) 217 { 218 struct ocfs2_inode_info *oi = OCFS2_I(inode); 219 struct ocfs2_extent_map *em = &oi->ip_extent_map; 220 struct ocfs2_extent_map_item *emi, *new_emi = NULL; 221 struct ocfs2_extent_map_item ins; 222 223 ins.ei_cpos = le32_to_cpu(rec->e_cpos); 224 ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb, 225 le64_to_cpu(rec->e_blkno)); 226 ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters); 227 ins.ei_flags = rec->e_flags; 228 229 search: 230 spin_lock(&oi->ip_lock); 231 232 list_for_each_entry(emi, &em->em_list, ei_list) { 233 if (ocfs2_try_to_merge_extent_map(emi, &ins)) { 234 list_move(&emi->ei_list, &em->em_list); 235 spin_unlock(&oi->ip_lock); 236 goto out; 237 } 238 } 239 240 /* 241 * No item could be merged. 242 * 243 * Either allocate and add a new item, or overwrite the last recently 244 * inserted. 245 */ 246 247 if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) { 248 if (new_emi == NULL) { 249 spin_unlock(&oi->ip_lock); 250 251 new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS); 252 if (new_emi == NULL) 253 goto out; 254 255 goto search; 256 } 257 258 ocfs2_copy_emi_fields(new_emi, &ins); 259 list_add(&new_emi->ei_list, &em->em_list); 260 em->em_num_items++; 261 new_emi = NULL; 262 } else { 263 BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0); 264 emi = list_entry(em->em_list.prev, 265 struct ocfs2_extent_map_item, ei_list); 266 list_move(&emi->ei_list, &em->em_list); 267 ocfs2_copy_emi_fields(emi, &ins); 268 } 269 270 spin_unlock(&oi->ip_lock); 271 272 out: 273 kfree(new_emi); 274 } 275 276 static int ocfs2_last_eb_is_empty(struct inode *inode, 277 struct ocfs2_dinode *di) 278 { 279 int ret, next_free; 280 u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); 281 struct buffer_head *eb_bh = NULL; 282 struct ocfs2_extent_block *eb; 283 struct ocfs2_extent_list *el; 284 285 ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh); 286 if (ret) { 287 mlog_errno(ret); 288 goto out; 289 } 290 291 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 292 el = &eb->h_list; 293 294 if (el->l_tree_depth) { 295 ocfs2_error(inode->i_sb, 296 "Inode %lu has non zero tree depth in leaf block %llu\n", 297 inode->i_ino, 298 (unsigned long long)eb_bh->b_blocknr); 299 ret = -EROFS; 300 goto out; 301 } 302 303 next_free = le16_to_cpu(el->l_next_free_rec); 304 305 if (next_free == 0 || 306 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) 307 ret = 1; 308 309 out: 310 brelse(eb_bh); 311 return ret; 312 } 313 314 /* 315 * Return the 1st index within el which contains an extent start 316 * larger than v_cluster. 317 */ 318 static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el, 319 u32 v_cluster) 320 { 321 int i; 322 struct ocfs2_extent_rec *rec; 323 324 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 325 rec = &el->l_recs[i]; 326 327 if (v_cluster < le32_to_cpu(rec->e_cpos)) 328 break; 329 } 330 331 return i; 332 } 333 334 /* 335 * Figure out the size of a hole which starts at v_cluster within the given 336 * extent list. 337 * 338 * If there is no more allocation past v_cluster, we return the maximum 339 * cluster size minus v_cluster. 340 * 341 * If we have in-inode extents, then el points to the dinode list and 342 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block 343 * containing el. 344 */ 345 int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci, 346 struct ocfs2_extent_list *el, 347 struct buffer_head *eb_bh, 348 u32 v_cluster, 349 u32 *num_clusters) 350 { 351 int ret, i; 352 struct buffer_head *next_eb_bh = NULL; 353 struct ocfs2_extent_block *eb, *next_eb; 354 355 i = ocfs2_search_for_hole_index(el, v_cluster); 356 357 if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) { 358 eb = (struct ocfs2_extent_block *)eb_bh->b_data; 359 360 /* 361 * Check the next leaf for any extents. 362 */ 363 364 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 365 goto no_more_extents; 366 367 ret = ocfs2_read_extent_block(ci, 368 le64_to_cpu(eb->h_next_leaf_blk), 369 &next_eb_bh); 370 if (ret) { 371 mlog_errno(ret); 372 goto out; 373 } 374 375 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; 376 el = &next_eb->h_list; 377 i = ocfs2_search_for_hole_index(el, v_cluster); 378 } 379 380 no_more_extents: 381 if (i == le16_to_cpu(el->l_next_free_rec)) { 382 /* 383 * We're at the end of our existing allocation. Just 384 * return the maximum number of clusters we could 385 * possibly allocate. 386 */ 387 *num_clusters = UINT_MAX - v_cluster; 388 } else { 389 *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster; 390 } 391 392 ret = 0; 393 out: 394 brelse(next_eb_bh); 395 return ret; 396 } 397 398 static int ocfs2_get_clusters_nocache(struct inode *inode, 399 struct buffer_head *di_bh, 400 u32 v_cluster, unsigned int *hole_len, 401 struct ocfs2_extent_rec *ret_rec, 402 unsigned int *is_last) 403 { 404 int i, ret, tree_height, len; 405 struct ocfs2_dinode *di; 406 struct ocfs2_extent_block *uninitialized_var(eb); 407 struct ocfs2_extent_list *el; 408 struct ocfs2_extent_rec *rec; 409 struct buffer_head *eb_bh = NULL; 410 411 memset(ret_rec, 0, sizeof(*ret_rec)); 412 if (is_last) 413 *is_last = 0; 414 415 di = (struct ocfs2_dinode *) di_bh->b_data; 416 el = &di->id2.i_list; 417 tree_height = le16_to_cpu(el->l_tree_depth); 418 419 if (tree_height > 0) { 420 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 421 &eb_bh); 422 if (ret) { 423 mlog_errno(ret); 424 goto out; 425 } 426 427 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 428 el = &eb->h_list; 429 430 if (el->l_tree_depth) { 431 ocfs2_error(inode->i_sb, 432 "Inode %lu has non zero tree depth in leaf block %llu\n", 433 inode->i_ino, 434 (unsigned long long)eb_bh->b_blocknr); 435 ret = -EROFS; 436 goto out; 437 } 438 } 439 440 i = ocfs2_search_extent_list(el, v_cluster); 441 if (i == -1) { 442 /* 443 * Holes can be larger than the maximum size of an 444 * extent, so we return their lengths in a separate 445 * field. 446 */ 447 if (hole_len) { 448 ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode), 449 el, eb_bh, 450 v_cluster, &len); 451 if (ret) { 452 mlog_errno(ret); 453 goto out; 454 } 455 456 *hole_len = len; 457 } 458 goto out_hole; 459 } 460 461 rec = &el->l_recs[i]; 462 463 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 464 465 if (!rec->e_blkno) { 466 ocfs2_error(inode->i_sb, 467 "Inode %lu has bad extent record (%u, %u, 0)\n", 468 inode->i_ino, 469 le32_to_cpu(rec->e_cpos), 470 ocfs2_rec_clusters(el, rec)); 471 ret = -EROFS; 472 goto out; 473 } 474 475 *ret_rec = *rec; 476 477 /* 478 * Checking for last extent is potentially expensive - we 479 * might have to look at the next leaf over to see if it's 480 * empty. 481 * 482 * The first two checks are to see whether the caller even 483 * cares for this information, and if the extent is at least 484 * the last in it's list. 485 * 486 * If those hold true, then the extent is last if any of the 487 * additional conditions hold true: 488 * - Extent list is in-inode 489 * - Extent list is right-most 490 * - Extent list is 2nd to rightmost, with empty right-most 491 */ 492 if (is_last) { 493 if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { 494 if (tree_height == 0) 495 *is_last = 1; 496 else if (eb->h_blkno == di->i_last_eb_blk) 497 *is_last = 1; 498 else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { 499 ret = ocfs2_last_eb_is_empty(inode, di); 500 if (ret < 0) { 501 mlog_errno(ret); 502 goto out; 503 } 504 if (ret == 1) 505 *is_last = 1; 506 } 507 } 508 } 509 510 out_hole: 511 ret = 0; 512 out: 513 brelse(eb_bh); 514 return ret; 515 } 516 517 static void ocfs2_relative_extent_offsets(struct super_block *sb, 518 u32 v_cluster, 519 struct ocfs2_extent_rec *rec, 520 u32 *p_cluster, u32 *num_clusters) 521 522 { 523 u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); 524 525 *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); 526 *p_cluster = *p_cluster + coff; 527 528 if (num_clusters) 529 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; 530 } 531 532 int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, 533 u32 *p_cluster, u32 *num_clusters, 534 struct ocfs2_extent_list *el, 535 unsigned int *extent_flags) 536 { 537 int ret = 0, i; 538 struct buffer_head *eb_bh = NULL; 539 struct ocfs2_extent_block *eb; 540 struct ocfs2_extent_rec *rec; 541 u32 coff; 542 543 if (el->l_tree_depth) { 544 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 545 &eb_bh); 546 if (ret) { 547 mlog_errno(ret); 548 goto out; 549 } 550 551 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 552 el = &eb->h_list; 553 554 if (el->l_tree_depth) { 555 ocfs2_error(inode->i_sb, 556 "Inode %lu has non zero tree depth in xattr leaf block %llu\n", 557 inode->i_ino, 558 (unsigned long long)eb_bh->b_blocknr); 559 ret = -EROFS; 560 goto out; 561 } 562 } 563 564 i = ocfs2_search_extent_list(el, v_cluster); 565 if (i == -1) { 566 ret = -EROFS; 567 mlog_errno(ret); 568 goto out; 569 } else { 570 rec = &el->l_recs[i]; 571 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 572 573 if (!rec->e_blkno) { 574 ocfs2_error(inode->i_sb, 575 "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 576 inode->i_ino, 577 le32_to_cpu(rec->e_cpos), 578 ocfs2_rec_clusters(el, rec)); 579 ret = -EROFS; 580 goto out; 581 } 582 coff = v_cluster - le32_to_cpu(rec->e_cpos); 583 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, 584 le64_to_cpu(rec->e_blkno)); 585 *p_cluster = *p_cluster + coff; 586 if (num_clusters) 587 *num_clusters = ocfs2_rec_clusters(el, rec) - coff; 588 589 if (extent_flags) 590 *extent_flags = rec->e_flags; 591 } 592 out: 593 if (eb_bh) 594 brelse(eb_bh); 595 return ret; 596 } 597 598 int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, 599 u32 *p_cluster, u32 *num_clusters, 600 unsigned int *extent_flags) 601 { 602 int ret; 603 unsigned int uninitialized_var(hole_len), flags = 0; 604 struct buffer_head *di_bh = NULL; 605 struct ocfs2_extent_rec rec; 606 607 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 608 ret = -ERANGE; 609 mlog_errno(ret); 610 goto out; 611 } 612 613 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, 614 num_clusters, extent_flags); 615 if (ret == 0) 616 goto out; 617 618 ret = ocfs2_read_inode_block(inode, &di_bh); 619 if (ret) { 620 mlog_errno(ret); 621 goto out; 622 } 623 624 ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, 625 &rec, NULL); 626 if (ret) { 627 mlog_errno(ret); 628 goto out; 629 } 630 631 if (rec.e_blkno == 0ULL) { 632 /* 633 * A hole was found. Return some canned values that 634 * callers can key on. If asked for, num_clusters will 635 * be populated with the size of the hole. 636 */ 637 *p_cluster = 0; 638 if (num_clusters) { 639 *num_clusters = hole_len; 640 } 641 } else { 642 ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, 643 p_cluster, num_clusters); 644 flags = rec.e_flags; 645 646 ocfs2_extent_map_insert_rec(inode, &rec); 647 } 648 649 if (extent_flags) 650 *extent_flags = flags; 651 652 out: 653 brelse(di_bh); 654 return ret; 655 } 656 657 /* 658 * This expects alloc_sem to be held. The allocation cannot change at 659 * all while the map is in the process of being updated. 660 */ 661 int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, 662 u64 *ret_count, unsigned int *extent_flags) 663 { 664 int ret; 665 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 666 u32 cpos, num_clusters, p_cluster; 667 u64 boff = 0; 668 669 cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); 670 671 ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, 672 extent_flags); 673 if (ret) { 674 mlog_errno(ret); 675 goto out; 676 } 677 678 /* 679 * p_cluster == 0 indicates a hole. 680 */ 681 if (p_cluster) { 682 boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 683 boff += (v_blkno & (u64)(bpc - 1)); 684 } 685 686 *p_blkno = boff; 687 688 if (ret_count) { 689 *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 690 *ret_count -= v_blkno & (u64)(bpc - 1); 691 } 692 693 out: 694 return ret; 695 } 696 697 /* 698 * The ocfs2_fiemap_inline() may be a little bit misleading, since 699 * it not only handles the fiemap for inlined files, but also deals 700 * with the fast symlink, cause they have no difference for extent 701 * mapping per se. 702 */ 703 static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, 704 struct fiemap_extent_info *fieinfo, 705 u64 map_start) 706 { 707 int ret; 708 unsigned int id_count; 709 struct ocfs2_dinode *di; 710 u64 phys; 711 u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; 712 struct ocfs2_inode_info *oi = OCFS2_I(inode); 713 714 di = (struct ocfs2_dinode *)di_bh->b_data; 715 if (ocfs2_inode_is_fast_symlink(inode)) 716 id_count = ocfs2_fast_symlink_chars(inode->i_sb); 717 else 718 id_count = le16_to_cpu(di->id2.i_data.id_count); 719 720 if (map_start < id_count) { 721 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; 722 if (ocfs2_inode_is_fast_symlink(inode)) 723 phys += offsetof(struct ocfs2_dinode, id2.i_symlink); 724 else 725 phys += offsetof(struct ocfs2_dinode, 726 id2.i_data.id_data); 727 728 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, 729 flags); 730 if (ret < 0) 731 return ret; 732 } 733 734 return 0; 735 } 736 737 #define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) 738 739 int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 740 u64 map_start, u64 map_len) 741 { 742 int ret, is_last; 743 u32 mapping_end, cpos; 744 unsigned int hole_size; 745 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 746 u64 len_bytes, phys_bytes, virt_bytes; 747 struct buffer_head *di_bh = NULL; 748 struct ocfs2_extent_rec rec; 749 750 ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS); 751 if (ret) 752 return ret; 753 754 ret = ocfs2_inode_lock(inode, &di_bh, 0); 755 if (ret) { 756 mlog_errno(ret); 757 goto out; 758 } 759 760 down_read(&OCFS2_I(inode)->ip_alloc_sem); 761 762 /* 763 * Handle inline-data and fast symlink separately. 764 */ 765 if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) || 766 ocfs2_inode_is_fast_symlink(inode)) { 767 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); 768 goto out_unlock; 769 } 770 771 cpos = map_start >> osb->s_clustersize_bits; 772 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 773 map_start + map_len); 774 is_last = 0; 775 while (cpos < mapping_end && !is_last) { 776 u32 fe_flags; 777 778 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 779 &hole_size, &rec, &is_last); 780 if (ret) { 781 mlog_errno(ret); 782 goto out_unlock; 783 } 784 785 if (rec.e_blkno == 0ULL) { 786 cpos += hole_size; 787 continue; 788 } 789 790 fe_flags = 0; 791 if (rec.e_flags & OCFS2_EXT_UNWRITTEN) 792 fe_flags |= FIEMAP_EXTENT_UNWRITTEN; 793 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 794 fe_flags |= FIEMAP_EXTENT_SHARED; 795 if (is_last) 796 fe_flags |= FIEMAP_EXTENT_LAST; 797 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; 798 phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; 799 virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; 800 801 ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, 802 len_bytes, fe_flags); 803 if (ret) 804 break; 805 806 cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); 807 } 808 809 if (ret > 0) 810 ret = 0; 811 812 out_unlock: 813 brelse(di_bh); 814 815 up_read(&OCFS2_I(inode)->ip_alloc_sem); 816 817 ocfs2_inode_unlock(inode, 0); 818 out: 819 820 return ret; 821 } 822 823 /* Is IO overwriting allocated blocks? */ 824 int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh, 825 u64 map_start, u64 map_len) 826 { 827 int ret = 0, is_last; 828 u32 mapping_end, cpos; 829 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 830 struct ocfs2_extent_rec rec; 831 832 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 833 if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len)) 834 return ret; 835 else 836 return -EAGAIN; 837 } 838 839 cpos = map_start >> osb->s_clustersize_bits; 840 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 841 map_start + map_len); 842 is_last = 0; 843 while (cpos < mapping_end && !is_last) { 844 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 845 NULL, &rec, &is_last); 846 if (ret) { 847 mlog_errno(ret); 848 goto out; 849 } 850 851 if (rec.e_blkno == 0ULL) 852 break; 853 854 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 855 break; 856 857 cpos = le32_to_cpu(rec.e_cpos) + 858 le16_to_cpu(rec.e_leaf_clusters); 859 } 860 861 if (cpos < mapping_end) 862 ret = -EAGAIN; 863 out: 864 return ret; 865 } 866 867 int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) 868 { 869 struct inode *inode = file->f_mapping->host; 870 int ret; 871 unsigned int is_last = 0, is_data = 0; 872 u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; 873 u32 cpos, cend, clen, hole_size; 874 u64 extoff, extlen; 875 struct buffer_head *di_bh = NULL; 876 struct ocfs2_extent_rec rec; 877 878 BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE); 879 880 ret = ocfs2_inode_lock(inode, &di_bh, 0); 881 if (ret) { 882 mlog_errno(ret); 883 goto out; 884 } 885 886 down_read(&OCFS2_I(inode)->ip_alloc_sem); 887 888 if (*offset >= i_size_read(inode)) { 889 ret = -ENXIO; 890 goto out_unlock; 891 } 892 893 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 894 if (whence == SEEK_HOLE) 895 *offset = i_size_read(inode); 896 goto out_unlock; 897 } 898 899 clen = 0; 900 cpos = *offset >> cs_bits; 901 cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); 902 903 while (cpos < cend && !is_last) { 904 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, 905 &rec, &is_last); 906 if (ret) { 907 mlog_errno(ret); 908 goto out_unlock; 909 } 910 911 extoff = cpos; 912 extoff <<= cs_bits; 913 914 if (rec.e_blkno == 0ULL) { 915 clen = hole_size; 916 is_data = 0; 917 } else { 918 clen = le16_to_cpu(rec.e_leaf_clusters) - 919 (cpos - le32_to_cpu(rec.e_cpos)); 920 is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; 921 } 922 923 if ((!is_data && whence == SEEK_HOLE) || 924 (is_data && whence == SEEK_DATA)) { 925 if (extoff > *offset) 926 *offset = extoff; 927 goto out_unlock; 928 } 929 930 if (!is_last) 931 cpos += clen; 932 } 933 934 if (whence == SEEK_HOLE) { 935 extoff = cpos; 936 extoff <<= cs_bits; 937 extlen = clen; 938 extlen <<= cs_bits; 939 940 if ((extoff + extlen) > i_size_read(inode)) 941 extlen = i_size_read(inode) - extoff; 942 extoff += extlen; 943 if (extoff > *offset) 944 *offset = extoff; 945 goto out_unlock; 946 } 947 948 ret = -ENXIO; 949 950 out_unlock: 951 952 brelse(di_bh); 953 954 up_read(&OCFS2_I(inode)->ip_alloc_sem); 955 956 ocfs2_inode_unlock(inode, 0); 957 out: 958 return ret; 959 } 960 961 int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, 962 struct buffer_head *bhs[], int flags, 963 int (*validate)(struct super_block *sb, 964 struct buffer_head *bh)) 965 { 966 int rc = 0; 967 u64 p_block, p_count; 968 int i, count, done = 0; 969 970 trace_ocfs2_read_virt_blocks( 971 inode, (unsigned long long)v_block, nr, bhs, flags, 972 validate); 973 974 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= 975 i_size_read(inode)) { 976 BUG_ON(!(flags & OCFS2_BH_READAHEAD)); 977 goto out; 978 } 979 980 while (done < nr) { 981 down_read(&OCFS2_I(inode)->ip_alloc_sem); 982 rc = ocfs2_extent_map_get_blocks(inode, v_block + done, 983 &p_block, &p_count, NULL); 984 up_read(&OCFS2_I(inode)->ip_alloc_sem); 985 if (rc) { 986 mlog_errno(rc); 987 break; 988 } 989 990 if (!p_block) { 991 rc = -EIO; 992 mlog(ML_ERROR, 993 "Inode #%llu contains a hole at offset %llu\n", 994 (unsigned long long)OCFS2_I(inode)->ip_blkno, 995 (unsigned long long)(v_block + done) << 996 inode->i_sb->s_blocksize_bits); 997 break; 998 } 999 1000 count = nr - done; 1001 if (p_count < count) 1002 count = p_count; 1003 1004 /* 1005 * If the caller passed us bhs, they should have come 1006 * from a previous readahead call to this function. Thus, 1007 * they should have the right b_blocknr. 1008 */ 1009 for (i = 0; i < count; i++) { 1010 if (!bhs[done + i]) 1011 continue; 1012 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); 1013 } 1014 1015 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count, 1016 bhs + done, flags, validate); 1017 if (rc) { 1018 mlog_errno(rc); 1019 break; 1020 } 1021 done += count; 1022 } 1023 1024 out: 1025 return rc; 1026 } 1027 1028 1029