1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2005 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 /* 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 20 */ 21 22 #include <linux/fs.h> 23 #include <linux/quotaops.h> 24 #include "jfs_incore.h" 25 #include "jfs_filsys.h" 26 #include "jfs_metapage.h" 27 #include "jfs_dmap.h" 28 #include "jfs_dinode.h" 29 #include "jfs_superblock.h" 30 #include "jfs_debug.h" 31 32 /* 33 * xtree local flag 34 */ 35 #define XT_INSERT 0x00000001 36 37 /* 38 * xtree key/entry comparison: extent offset 39 * 40 * return: 41 * -1: k < start of extent 42 * 0: start_of_extent <= k <= end_of_extent 43 * 1: k > end_of_extent 44 */ 45 #define XT_CMP(CMP, K, X, OFFSET64)\ 46 {\ 47 OFFSET64 = offsetXAD(X);\ 48 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 49 ((K) < OFFSET64) ? -1 : 0;\ 50 } 51 52 /* write a xad entry */ 53 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 54 {\ 55 (XAD)->flag = (FLAG);\ 56 XADoffset((XAD), (OFF));\ 57 XADlength((XAD), (LEN));\ 58 XADaddress((XAD), (ADDR));\ 59 } 60 61 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 62 63 /* get page buffer for specified block address */ 64 /* ToDo: Replace this ugly macro with a function */ 65 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 66 {\ 67 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ 68 if (!(RC))\ 69 {\ 70 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ 71 (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ 72 (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ 73 {\ 74 jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ 75 BT_PUTPAGE(MP);\ 76 MP = NULL;\ 77 RC = -EIO;\ 78 }\ 79 }\ 80 } 81 82 /* for consistency */ 83 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 84 85 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 86 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 87 /* xtree entry parameter descriptor */ 88 struct xtsplit { 89 struct metapage *mp; 90 s16 index; 91 u8 flag; 92 s64 off; 93 s64 addr; 94 int len; 95 struct pxdlist *pxdlist; 96 }; 97 98 99 /* 100 * statistics 101 */ 102 #ifdef CONFIG_JFS_STATISTICS 103 static struct { 104 uint search; 105 uint fastSearch; 106 uint split; 107 } xtStat; 108 #endif 109 110 111 /* 112 * forward references 113 */ 114 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, 115 struct btstack * btstack, int flag); 116 117 static int xtSplitUp(tid_t tid, 118 struct inode *ip, 119 struct xtsplit * split, struct btstack * btstack); 120 121 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 122 struct metapage ** rmpp, s64 * rbnp); 123 124 static int xtSplitRoot(tid_t tid, struct inode *ip, 125 struct xtsplit * split, struct metapage ** rmpp); 126 127 #ifdef _STILL_TO_PORT 128 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 129 xtpage_t * fp, struct btstack * btstack); 130 131 static int xtSearchNode(struct inode *ip, 132 xad_t * xad, 133 int *cmpp, struct btstack * btstack, int flag); 134 135 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 136 #endif /* _STILL_TO_PORT */ 137 138 /* External references */ 139 140 /* 141 * debug control 142 */ 143 /* #define _JFS_DEBUG_XTREE 1 */ 144 145 146 /* 147 * xtLookup() 148 * 149 * function: map a single page into a physical extent; 150 */ 151 int xtLookup(struct inode *ip, s64 lstart, 152 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 153 { 154 int rc = 0; 155 struct btstack btstack; 156 int cmp; 157 s64 bn; 158 struct metapage *mp; 159 xtpage_t *p; 160 int index; 161 xad_t *xad; 162 s64 next, size, xoff, xend; 163 int xlen; 164 s64 xaddr; 165 166 *paddr = 0; 167 *plen = llen; 168 169 if (!no_check) { 170 /* is lookup offset beyond eof ? */ 171 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 172 JFS_SBI(ip->i_sb)->l2bsize; 173 if (lstart >= size) { 174 jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)", 175 (ulong) lstart, (ulong) size); 176 return 0; 177 } 178 } 179 180 /* 181 * search for the xad entry covering the logical extent 182 */ 183 //search: 184 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { 185 jfs_err("xtLookup: xtSearch returned %d", rc); 186 return rc; 187 } 188 189 /* 190 * compute the physical extent covering logical extent 191 * 192 * N.B. search may have failed (e.g., hole in sparse file), 193 * and returned the index of the next entry. 194 */ 195 /* retrieve search result */ 196 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 197 198 /* is xad found covering start of logical extent ? 199 * lstart is a page start address, 200 * i.e., lstart cannot start in a hole; 201 */ 202 if (cmp) { 203 if (next) 204 *plen = min(next - lstart, llen); 205 goto out; 206 } 207 208 /* 209 * lxd covered by xad 210 */ 211 xad = &p->xad[index]; 212 xoff = offsetXAD(xad); 213 xlen = lengthXAD(xad); 214 xend = xoff + xlen; 215 xaddr = addressXAD(xad); 216 217 /* initialize new pxd */ 218 *pflag = xad->flag; 219 *paddr = xaddr + (lstart - xoff); 220 /* a page must be fully covered by an xad */ 221 *plen = min(xend - lstart, llen); 222 223 out: 224 XT_PUTPAGE(mp); 225 226 return rc; 227 } 228 229 230 /* 231 * xtLookupList() 232 * 233 * function: map a single logical extent into a list of physical extent; 234 * 235 * parameter: 236 * struct inode *ip, 237 * struct lxdlist *lxdlist, lxd list (in) 238 * struct xadlist *xadlist, xad list (in/out) 239 * int flag) 240 * 241 * coverage of lxd by xad under assumption of 242 * . lxd's are ordered and disjoint. 243 * . xad's are ordered and disjoint. 244 * 245 * return: 246 * 0: success 247 * 248 * note: a page being written (even a single byte) is backed fully, 249 * except the last page which is only backed with blocks 250 * required to cover the last byte; 251 * the extent backing a page is fully contained within an xad; 252 */ 253 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, 254 struct xadlist * xadlist, int flag) 255 { 256 int rc = 0; 257 struct btstack btstack; 258 int cmp; 259 s64 bn; 260 struct metapage *mp; 261 xtpage_t *p; 262 int index; 263 lxd_t *lxd; 264 xad_t *xad, *pxd; 265 s64 size, lstart, lend, xstart, xend, pstart; 266 s64 llen, xlen, plen; 267 s64 xaddr, paddr; 268 int nlxd, npxd, maxnpxd; 269 270 npxd = xadlist->nxad = 0; 271 maxnpxd = xadlist->maxnxad; 272 pxd = xadlist->xad; 273 274 nlxd = lxdlist->nlxd; 275 lxd = lxdlist->lxd; 276 277 lstart = offsetLXD(lxd); 278 llen = lengthLXD(lxd); 279 lend = lstart + llen; 280 281 size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 282 JFS_SBI(ip->i_sb)->l2bsize; 283 284 /* 285 * search for the xad entry covering the logical extent 286 */ 287 search: 288 if (lstart >= size) 289 return 0; 290 291 if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0))) 292 return rc; 293 294 /* 295 * compute the physical extent covering logical extent 296 * 297 * N.B. search may have failed (e.g., hole in sparse file), 298 * and returned the index of the next entry. 299 */ 300 //map: 301 /* retrieve search result */ 302 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 303 304 /* is xad on the next sibling page ? */ 305 if (index == le16_to_cpu(p->header.nextindex)) { 306 if (p->header.flag & BT_ROOT) 307 goto mapend; 308 309 if ((bn = le64_to_cpu(p->header.next)) == 0) 310 goto mapend; 311 312 XT_PUTPAGE(mp); 313 314 /* get next sibling page */ 315 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 316 if (rc) 317 return rc; 318 319 index = XTENTRYSTART; 320 } 321 322 xad = &p->xad[index]; 323 324 /* 325 * is lxd covered by xad ? 326 */ 327 compare: 328 xstart = offsetXAD(xad); 329 xlen = lengthXAD(xad); 330 xend = xstart + xlen; 331 xaddr = addressXAD(xad); 332 333 compare1: 334 if (xstart < lstart) 335 goto compare2; 336 337 /* (lstart <= xstart) */ 338 339 /* lxd is NOT covered by xad */ 340 if (lend <= xstart) { 341 /* 342 * get next lxd 343 */ 344 if (--nlxd == 0) 345 goto mapend; 346 lxd++; 347 348 lstart = offsetLXD(lxd); 349 llen = lengthLXD(lxd); 350 lend = lstart + llen; 351 if (lstart >= size) 352 goto mapend; 353 354 /* compare with the current xad */ 355 goto compare1; 356 } 357 /* lxd is covered by xad */ 358 else { /* (xstart < lend) */ 359 360 /* initialize new pxd */ 361 pstart = xstart; 362 plen = min(lend - xstart, xlen); 363 paddr = xaddr; 364 365 goto cover; 366 } 367 368 /* (xstart < lstart) */ 369 compare2: 370 /* lxd is covered by xad */ 371 if (lstart < xend) { 372 /* initialize new pxd */ 373 pstart = lstart; 374 plen = min(xend - lstart, llen); 375 paddr = xaddr + (lstart - xstart); 376 377 goto cover; 378 } 379 /* lxd is NOT covered by xad */ 380 else { /* (xend <= lstart) */ 381 382 /* 383 * get next xad 384 * 385 * linear search next xad covering lxd on 386 * the current xad page, and then tree search 387 */ 388 if (index == le16_to_cpu(p->header.nextindex) - 1) { 389 if (p->header.flag & BT_ROOT) 390 goto mapend; 391 392 XT_PUTPAGE(mp); 393 goto search; 394 } else { 395 index++; 396 xad++; 397 398 /* compare with new xad */ 399 goto compare; 400 } 401 } 402 403 /* 404 * lxd is covered by xad and a new pxd has been initialized 405 * (lstart <= xstart < lend) or (xstart < lstart < xend) 406 */ 407 cover: 408 /* finalize pxd corresponding to current xad */ 409 XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); 410 411 if (++npxd >= maxnpxd) 412 goto mapend; 413 pxd++; 414 415 /* 416 * lxd is fully covered by xad 417 */ 418 if (lend <= xend) { 419 /* 420 * get next lxd 421 */ 422 if (--nlxd == 0) 423 goto mapend; 424 lxd++; 425 426 lstart = offsetLXD(lxd); 427 llen = lengthLXD(lxd); 428 lend = lstart + llen; 429 if (lstart >= size) 430 goto mapend; 431 432 /* 433 * test for old xad covering new lxd 434 * (old xstart < new lstart) 435 */ 436 goto compare2; 437 } 438 /* 439 * lxd is partially covered by xad 440 */ 441 else { /* (xend < lend) */ 442 443 /* 444 * get next xad 445 * 446 * linear search next xad covering lxd on 447 * the current xad page, and then next xad page search 448 */ 449 if (index == le16_to_cpu(p->header.nextindex) - 1) { 450 if (p->header.flag & BT_ROOT) 451 goto mapend; 452 453 if ((bn = le64_to_cpu(p->header.next)) == 0) 454 goto mapend; 455 456 XT_PUTPAGE(mp); 457 458 /* get next sibling page */ 459 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 460 if (rc) 461 return rc; 462 463 index = XTENTRYSTART; 464 xad = &p->xad[index]; 465 } else { 466 index++; 467 xad++; 468 } 469 470 /* 471 * test for new xad covering old lxd 472 * (old lstart < new xstart) 473 */ 474 goto compare; 475 } 476 477 mapend: 478 xadlist->nxad = npxd; 479 480 //out: 481 XT_PUTPAGE(mp); 482 483 return rc; 484 } 485 486 487 /* 488 * xtSearch() 489 * 490 * function: search for the xad entry covering specified offset. 491 * 492 * parameters: 493 * ip - file object; 494 * xoff - extent offset; 495 * nextp - address of next extent (if any) for search miss 496 * cmpp - comparison result: 497 * btstack - traverse stack; 498 * flag - search process flag (XT_INSERT); 499 * 500 * returns: 501 * btstack contains (bn, index) of search path traversed to the entry. 502 * *cmpp is set to result of comparison with the entry returned. 503 * the page containing the entry is pinned at exit. 504 */ 505 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 506 int *cmpp, struct btstack * btstack, int flag) 507 { 508 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 509 int rc = 0; 510 int cmp = 1; /* init for empty page */ 511 s64 bn; /* block number */ 512 struct metapage *mp; /* page buffer */ 513 xtpage_t *p; /* page */ 514 xad_t *xad; 515 int base, index, lim, btindex; 516 struct btframe *btsp; 517 int nsplit = 0; /* number of pages to split */ 518 s64 t64; 519 s64 next = 0; 520 521 INCREMENT(xtStat.search); 522 523 BT_CLR(btstack); 524 525 btstack->nsplit = 0; 526 527 /* 528 * search down tree from root: 529 * 530 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 531 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 532 * 533 * if entry with search key K is not found 534 * internal page search find the entry with largest key Ki 535 * less than K which point to the child page to search; 536 * leaf page search find the entry with smallest key Kj 537 * greater than K so that the returned index is the position of 538 * the entry to be shifted right for insertion of new entry. 539 * for empty tree, search key is greater than any key of the tree. 540 * 541 * by convention, root bn = 0. 542 */ 543 for (bn = 0;;) { 544 /* get/pin the page to search */ 545 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 546 if (rc) 547 return rc; 548 549 /* try sequential access heuristics with the previous 550 * access entry in target leaf page: 551 * once search narrowed down into the target leaf, 552 * key must either match an entry in the leaf or 553 * key entry does not exist in the tree; 554 */ 555 //fastSearch: 556 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 557 (p->header.flag & BT_LEAF) && 558 (index = jfs_ip->btindex) < 559 le16_to_cpu(p->header.nextindex)) { 560 xad = &p->xad[index]; 561 t64 = offsetXAD(xad); 562 if (xoff < t64 + lengthXAD(xad)) { 563 if (xoff >= t64) { 564 *cmpp = 0; 565 goto out; 566 } 567 568 /* stop sequential access heuristics */ 569 goto binarySearch; 570 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 571 572 /* try next sequential entry */ 573 index++; 574 if (index < 575 le16_to_cpu(p->header.nextindex)) { 576 xad++; 577 t64 = offsetXAD(xad); 578 if (xoff < t64 + lengthXAD(xad)) { 579 if (xoff >= t64) { 580 *cmpp = 0; 581 goto out; 582 } 583 584 /* miss: key falls between 585 * previous and this entry 586 */ 587 *cmpp = 1; 588 next = t64; 589 goto out; 590 } 591 592 /* (xoff >= t64 + lengthXAD(xad)); 593 * matching entry may be further out: 594 * stop heuristic search 595 */ 596 /* stop sequential access heuristics */ 597 goto binarySearch; 598 } 599 600 /* (index == p->header.nextindex); 601 * miss: key entry does not exist in 602 * the target leaf/tree 603 */ 604 *cmpp = 1; 605 goto out; 606 } 607 608 /* 609 * if hit, return index of the entry found, and 610 * if miss, where new entry with search key is 611 * to be inserted; 612 */ 613 out: 614 /* compute number of pages to split */ 615 if (flag & XT_INSERT) { 616 if (p->header.nextindex == /* little-endian */ 617 p->header.maxentry) 618 nsplit++; 619 else 620 nsplit = 0; 621 btstack->nsplit = nsplit; 622 } 623 624 /* save search result */ 625 btsp = btstack->top; 626 btsp->bn = bn; 627 btsp->index = index; 628 btsp->mp = mp; 629 630 /* update sequential access heuristics */ 631 jfs_ip->btindex = index; 632 633 if (nextp) 634 *nextp = next; 635 636 INCREMENT(xtStat.fastSearch); 637 return 0; 638 } 639 640 /* well, ... full search now */ 641 binarySearch: 642 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 643 644 /* 645 * binary search with search key K on the current page 646 */ 647 for (base = XTENTRYSTART; lim; lim >>= 1) { 648 index = base + (lim >> 1); 649 650 XT_CMP(cmp, xoff, &p->xad[index], t64); 651 if (cmp == 0) { 652 /* 653 * search hit 654 */ 655 /* search hit - leaf page: 656 * return the entry found 657 */ 658 if (p->header.flag & BT_LEAF) { 659 *cmpp = cmp; 660 661 /* compute number of pages to split */ 662 if (flag & XT_INSERT) { 663 if (p->header.nextindex == 664 p->header.maxentry) 665 nsplit++; 666 else 667 nsplit = 0; 668 btstack->nsplit = nsplit; 669 } 670 671 /* save search result */ 672 btsp = btstack->top; 673 btsp->bn = bn; 674 btsp->index = index; 675 btsp->mp = mp; 676 677 /* init sequential access heuristics */ 678 btindex = jfs_ip->btindex; 679 if (index == btindex || 680 index == btindex + 1) 681 jfs_ip->btorder = BT_SEQUENTIAL; 682 else 683 jfs_ip->btorder = BT_RANDOM; 684 jfs_ip->btindex = index; 685 686 return 0; 687 } 688 /* search hit - internal page: 689 * descend/search its child page 690 */ 691 if (index < le16_to_cpu(p->header.nextindex)-1) 692 next = offsetXAD(&p->xad[index + 1]); 693 goto next; 694 } 695 696 if (cmp > 0) { 697 base = index + 1; 698 --lim; 699 } 700 } 701 702 /* 703 * search miss 704 * 705 * base is the smallest index with key (Kj) greater than 706 * search key (K) and may be zero or maxentry index. 707 */ 708 if (base < le16_to_cpu(p->header.nextindex)) 709 next = offsetXAD(&p->xad[base]); 710 /* 711 * search miss - leaf page: 712 * 713 * return location of entry (base) where new entry with 714 * search key K is to be inserted. 715 */ 716 if (p->header.flag & BT_LEAF) { 717 *cmpp = cmp; 718 719 /* compute number of pages to split */ 720 if (flag & XT_INSERT) { 721 if (p->header.nextindex == 722 p->header.maxentry) 723 nsplit++; 724 else 725 nsplit = 0; 726 btstack->nsplit = nsplit; 727 } 728 729 /* save search result */ 730 btsp = btstack->top; 731 btsp->bn = bn; 732 btsp->index = base; 733 btsp->mp = mp; 734 735 /* init sequential access heuristics */ 736 btindex = jfs_ip->btindex; 737 if (base == btindex || base == btindex + 1) 738 jfs_ip->btorder = BT_SEQUENTIAL; 739 else 740 jfs_ip->btorder = BT_RANDOM; 741 jfs_ip->btindex = base; 742 743 if (nextp) 744 *nextp = next; 745 746 return 0; 747 } 748 749 /* 750 * search miss - non-leaf page: 751 * 752 * if base is non-zero, decrement base by one to get the parent 753 * entry of the child page to search. 754 */ 755 index = base ? base - 1 : base; 756 757 /* 758 * go down to child page 759 */ 760 next: 761 /* update number of pages to split */ 762 if (p->header.nextindex == p->header.maxentry) 763 nsplit++; 764 else 765 nsplit = 0; 766 767 /* push (bn, index) of the parent page/entry */ 768 BT_PUSH(btstack, bn, index); 769 770 /* get the child page block number */ 771 bn = addressXAD(&p->xad[index]); 772 773 /* unpin the parent page */ 774 XT_PUTPAGE(mp); 775 } 776 } 777 778 /* 779 * xtInsert() 780 * 781 * function: 782 * 783 * parameter: 784 * tid - transaction id; 785 * ip - file object; 786 * xflag - extent flag (XAD_NOTRECORDED): 787 * xoff - extent offset; 788 * xlen - extent length; 789 * xaddrp - extent address pointer (in/out): 790 * if (*xaddrp) 791 * caller allocated data extent at *xaddrp; 792 * else 793 * allocate data extent and return its xaddr; 794 * flag - 795 * 796 * return: 797 */ 798 int xtInsert(tid_t tid, /* transaction id */ 799 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 800 int flag) 801 { 802 int rc = 0; 803 s64 xaddr, hint; 804 struct metapage *mp; /* meta-page buffer */ 805 xtpage_t *p; /* base B+-tree index page */ 806 s64 bn; 807 int index, nextindex; 808 struct btstack btstack; /* traverse stack */ 809 struct xtsplit split; /* split information */ 810 xad_t *xad; 811 int cmp; 812 s64 next; 813 struct tlock *tlck; 814 struct xtlock *xtlck; 815 816 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 817 818 /* 819 * search for the entry location at which to insert: 820 * 821 * xtFastSearch() and xtSearch() both returns (leaf page 822 * pinned, index at which to insert). 823 * n.b. xtSearch() may return index of maxentry of 824 * the full page. 825 */ 826 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 827 return rc; 828 829 /* retrieve search result */ 830 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 831 832 /* This test must follow XT_GETSEARCH since mp must be valid if 833 * we branch to out: */ 834 if ((cmp == 0) || (next && (xlen > next - xoff))) { 835 rc = -EEXIST; 836 goto out; 837 } 838 839 /* 840 * allocate data extent requested 841 * 842 * allocation hint: last xad 843 */ 844 if ((xaddr = *xaddrp) == 0) { 845 if (index > XTENTRYSTART) { 846 xad = &p->xad[index - 1]; 847 hint = addressXAD(xad) + lengthXAD(xad) - 1; 848 } else 849 hint = 0; 850 if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) 851 goto out; 852 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 853 DQUOT_FREE_BLOCK(ip, xlen); 854 goto out; 855 } 856 } 857 858 /* 859 * insert entry for new extent 860 */ 861 xflag |= XAD_NEW; 862 863 /* 864 * if the leaf page is full, split the page and 865 * propagate up the router entry for the new page from split 866 * 867 * The xtSplitUp() will insert the entry and unpin the leaf page. 868 */ 869 nextindex = le16_to_cpu(p->header.nextindex); 870 if (nextindex == le16_to_cpu(p->header.maxentry)) { 871 split.mp = mp; 872 split.index = index; 873 split.flag = xflag; 874 split.off = xoff; 875 split.len = xlen; 876 split.addr = xaddr; 877 split.pxdlist = NULL; 878 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 879 /* undo data extent allocation */ 880 if (*xaddrp == 0) { 881 dbFree(ip, xaddr, (s64) xlen); 882 DQUOT_FREE_BLOCK(ip, xlen); 883 } 884 return rc; 885 } 886 887 *xaddrp = xaddr; 888 return 0; 889 } 890 891 /* 892 * insert the new entry into the leaf page 893 */ 894 /* 895 * acquire a transaction lock on the leaf page; 896 * 897 * action: xad insertion/extension; 898 */ 899 BT_MARK_DIRTY(mp, ip); 900 901 /* if insert into middle, shift right remaining entries. */ 902 if (index < nextindex) 903 memmove(&p->xad[index + 1], &p->xad[index], 904 (nextindex - index) * sizeof(xad_t)); 905 906 /* insert the new entry: mark the entry NEW */ 907 xad = &p->xad[index]; 908 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 909 910 /* advance next available entry index */ 911 p->header.nextindex = 912 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 913 914 /* Don't log it if there are no links to the file */ 915 if (!test_cflag(COMMIT_Nolink, ip)) { 916 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 917 xtlck = (struct xtlock *) & tlck->lock; 918 xtlck->lwm.offset = 919 (xtlck->lwm.offset) ? min(index, 920 (int)xtlck->lwm.offset) : index; 921 xtlck->lwm.length = 922 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 923 } 924 925 *xaddrp = xaddr; 926 927 out: 928 /* unpin the leaf page */ 929 XT_PUTPAGE(mp); 930 931 return rc; 932 } 933 934 935 /* 936 * xtSplitUp() 937 * 938 * function: 939 * split full pages as propagating insertion up the tree 940 * 941 * parameter: 942 * tid - transaction id; 943 * ip - file object; 944 * split - entry parameter descriptor; 945 * btstack - traverse stack from xtSearch() 946 * 947 * return: 948 */ 949 static int 950 xtSplitUp(tid_t tid, 951 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 952 { 953 int rc = 0; 954 struct metapage *smp; 955 xtpage_t *sp; /* split page */ 956 struct metapage *rmp; 957 s64 rbn; /* new right page block number */ 958 struct metapage *rcmp; 959 xtpage_t *rcp; /* right child page */ 960 s64 rcbn; /* right child page block number */ 961 int skip; /* index of entry of insertion */ 962 int nextindex; /* next available entry index of p */ 963 struct btframe *parent; /* parent page entry on traverse stack */ 964 xad_t *xad; 965 s64 xaddr; 966 int xlen; 967 int nsplit; /* number of pages split */ 968 struct pxdlist pxdlist; 969 pxd_t *pxd; 970 struct tlock *tlck; 971 struct xtlock *xtlck; 972 973 smp = split->mp; 974 sp = XT_PAGE(ip, smp); 975 976 /* is inode xtree root extension/inline EA area free ? */ 977 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 978 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && 979 (JFS_IP(ip)->mode2 & INLINEEA)) { 980 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 981 JFS_IP(ip)->mode2 &= ~INLINEEA; 982 983 BT_MARK_DIRTY(smp, ip); 984 /* 985 * acquire a transaction lock on the leaf page; 986 * 987 * action: xad insertion/extension; 988 */ 989 990 /* if insert into middle, shift right remaining entries. */ 991 skip = split->index; 992 nextindex = le16_to_cpu(sp->header.nextindex); 993 if (skip < nextindex) 994 memmove(&sp->xad[skip + 1], &sp->xad[skip], 995 (nextindex - skip) * sizeof(xad_t)); 996 997 /* insert the new entry: mark the entry NEW */ 998 xad = &sp->xad[skip]; 999 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1000 split->addr); 1001 1002 /* advance next available entry index */ 1003 sp->header.nextindex = 1004 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1); 1005 1006 /* Don't log it if there are no links to the file */ 1007 if (!test_cflag(COMMIT_Nolink, ip)) { 1008 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1009 xtlck = (struct xtlock *) & tlck->lock; 1010 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1011 min(skip, (int)xtlck->lwm.offset) : skip; 1012 xtlck->lwm.length = 1013 le16_to_cpu(sp->header.nextindex) - 1014 xtlck->lwm.offset; 1015 } 1016 1017 return 0; 1018 } 1019 1020 /* 1021 * allocate new index blocks to cover index page split(s) 1022 * 1023 * allocation hint: ? 1024 */ 1025 if (split->pxdlist == NULL) { 1026 nsplit = btstack->nsplit; 1027 split->pxdlist = &pxdlist; 1028 pxdlist.maxnpxd = pxdlist.npxd = 0; 1029 pxd = &pxdlist.pxd[0]; 1030 xlen = JFS_SBI(ip->i_sb)->nbperpage; 1031 for (; nsplit > 0; nsplit--, pxd++) { 1032 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 1033 == 0) { 1034 PXDaddress(pxd, xaddr); 1035 PXDlength(pxd, xlen); 1036 1037 pxdlist.maxnpxd++; 1038 1039 continue; 1040 } 1041 1042 /* undo allocation */ 1043 1044 XT_PUTPAGE(smp); 1045 return rc; 1046 } 1047 } 1048 1049 /* 1050 * Split leaf page <sp> into <sp> and a new right page <rp>. 1051 * 1052 * The split routines insert the new entry into the leaf page, 1053 * and acquire txLock as appropriate. 1054 * return <rp> pinned and its block number <rpbn>. 1055 */ 1056 rc = (sp->header.flag & BT_ROOT) ? 1057 xtSplitRoot(tid, ip, split, &rmp) : 1058 xtSplitPage(tid, ip, split, &rmp, &rbn); 1059 1060 XT_PUTPAGE(smp); 1061 1062 if (rc) 1063 return -EIO; 1064 /* 1065 * propagate up the router entry for the leaf page just split 1066 * 1067 * insert a router entry for the new page into the parent page, 1068 * propagate the insert/split up the tree by walking back the stack 1069 * of (bn of parent page, index of child page entry in parent page) 1070 * that were traversed during the search for the page that split. 1071 * 1072 * the propagation of insert/split up the tree stops if the root 1073 * splits or the page inserted into doesn't have to split to hold 1074 * the new entry. 1075 * 1076 * the parent entry for the split page remains the same, and 1077 * a new entry is inserted at its right with the first key and 1078 * block number of the new right page. 1079 * 1080 * There are a maximum of 3 pages pinned at any time: 1081 * right child, left parent and right parent (when the parent splits) 1082 * to keep the child page pinned while working on the parent. 1083 * make sure that all pins are released at exit. 1084 */ 1085 while ((parent = BT_POP(btstack)) != NULL) { 1086 /* parent page specified by stack frame <parent> */ 1087 1088 /* keep current child pages <rcp> pinned */ 1089 rcmp = rmp; 1090 rcbn = rbn; 1091 rcp = XT_PAGE(ip, rcmp); 1092 1093 /* 1094 * insert router entry in parent for new right child page <rp> 1095 */ 1096 /* get/pin the parent page <sp> */ 1097 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1098 if (rc) { 1099 XT_PUTPAGE(rcmp); 1100 return rc; 1101 } 1102 1103 /* 1104 * The new key entry goes ONE AFTER the index of parent entry, 1105 * because the split was to the right. 1106 */ 1107 skip = parent->index + 1; 1108 1109 /* 1110 * split or shift right remaining entries of the parent page 1111 */ 1112 nextindex = le16_to_cpu(sp->header.nextindex); 1113 /* 1114 * parent page is full - split the parent page 1115 */ 1116 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 1117 /* init for parent page split */ 1118 split->mp = smp; 1119 split->index = skip; /* index at insert */ 1120 split->flag = XAD_NEW; 1121 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 1122 split->len = JFS_SBI(ip->i_sb)->nbperpage; 1123 split->addr = rcbn; 1124 1125 /* unpin previous right child page */ 1126 XT_PUTPAGE(rcmp); 1127 1128 /* The split routines insert the new entry, 1129 * and acquire txLock as appropriate. 1130 * return <rp> pinned and its block number <rpbn>. 1131 */ 1132 rc = (sp->header.flag & BT_ROOT) ? 1133 xtSplitRoot(tid, ip, split, &rmp) : 1134 xtSplitPage(tid, ip, split, &rmp, &rbn); 1135 if (rc) { 1136 XT_PUTPAGE(smp); 1137 return rc; 1138 } 1139 1140 XT_PUTPAGE(smp); 1141 /* keep new child page <rp> pinned */ 1142 } 1143 /* 1144 * parent page is not full - insert in parent page 1145 */ 1146 else { 1147 /* 1148 * insert router entry in parent for the right child 1149 * page from the first entry of the right child page: 1150 */ 1151 /* 1152 * acquire a transaction lock on the parent page; 1153 * 1154 * action: router xad insertion; 1155 */ 1156 BT_MARK_DIRTY(smp, ip); 1157 1158 /* 1159 * if insert into middle, shift right remaining entries 1160 */ 1161 if (skip < nextindex) 1162 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1163 (nextindex - 1164 skip) << L2XTSLOTSIZE); 1165 1166 /* insert the router entry */ 1167 xad = &sp->xad[skip]; 1168 XT_PUTENTRY(xad, XAD_NEW, 1169 offsetXAD(&rcp->xad[XTENTRYSTART]), 1170 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 1171 1172 /* advance next available entry index. */ 1173 sp->header.nextindex = 1174 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1175 1); 1176 1177 /* Don't log it if there are no links to the file */ 1178 if (!test_cflag(COMMIT_Nolink, ip)) { 1179 tlck = txLock(tid, ip, smp, 1180 tlckXTREE | tlckGROW); 1181 xtlck = (struct xtlock *) & tlck->lock; 1182 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1183 min(skip, (int)xtlck->lwm.offset) : skip; 1184 xtlck->lwm.length = 1185 le16_to_cpu(sp->header.nextindex) - 1186 xtlck->lwm.offset; 1187 } 1188 1189 /* unpin parent page */ 1190 XT_PUTPAGE(smp); 1191 1192 /* exit propagate up */ 1193 break; 1194 } 1195 } 1196 1197 /* unpin current right page */ 1198 XT_PUTPAGE(rmp); 1199 1200 return 0; 1201 } 1202 1203 1204 /* 1205 * xtSplitPage() 1206 * 1207 * function: 1208 * split a full non-root page into 1209 * original/split/left page and new right page 1210 * i.e., the original/split page remains as left page. 1211 * 1212 * parameter: 1213 * int tid, 1214 * struct inode *ip, 1215 * struct xtsplit *split, 1216 * struct metapage **rmpp, 1217 * u64 *rbnp, 1218 * 1219 * return: 1220 * Pointer to page in which to insert or NULL on error. 1221 */ 1222 static int 1223 xtSplitPage(tid_t tid, struct inode *ip, 1224 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 1225 { 1226 int rc = 0; 1227 struct metapage *smp; 1228 xtpage_t *sp; 1229 struct metapage *rmp; 1230 xtpage_t *rp; /* new right page allocated */ 1231 s64 rbn; /* new right page block number */ 1232 struct metapage *mp; 1233 xtpage_t *p; 1234 s64 nextbn; 1235 int skip, maxentry, middle, righthalf, n; 1236 xad_t *xad; 1237 struct pxdlist *pxdlist; 1238 pxd_t *pxd; 1239 struct tlock *tlck; 1240 struct xtlock *sxtlck = NULL, *rxtlck = NULL; 1241 int quota_allocation = 0; 1242 1243 smp = split->mp; 1244 sp = XT_PAGE(ip, smp); 1245 1246 INCREMENT(xtStat.split); 1247 1248 pxdlist = split->pxdlist; 1249 pxd = &pxdlist->pxd[pxdlist->npxd]; 1250 pxdlist->npxd++; 1251 rbn = addressPXD(pxd); 1252 1253 /* Allocate blocks to quota. */ 1254 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1255 rc = -EDQUOT; 1256 goto clean_up; 1257 } 1258 1259 quota_allocation += lengthPXD(pxd); 1260 1261 /* 1262 * allocate the new right page for the split 1263 */ 1264 rmp = get_metapage(ip, rbn, PSIZE, 1); 1265 if (rmp == NULL) { 1266 rc = -EIO; 1267 goto clean_up; 1268 } 1269 1270 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1271 1272 BT_MARK_DIRTY(rmp, ip); 1273 /* 1274 * action: new page; 1275 */ 1276 1277 rp = (xtpage_t *) rmp->data; 1278 rp->header.self = *pxd; 1279 rp->header.flag = sp->header.flag & BT_TYPE; 1280 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1281 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1282 1283 BT_MARK_DIRTY(smp, ip); 1284 /* Don't log it if there are no links to the file */ 1285 if (!test_cflag(COMMIT_Nolink, ip)) { 1286 /* 1287 * acquire a transaction lock on the new right page; 1288 */ 1289 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1290 rxtlck = (struct xtlock *) & tlck->lock; 1291 rxtlck->lwm.offset = XTENTRYSTART; 1292 /* 1293 * acquire a transaction lock on the split page 1294 */ 1295 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1296 sxtlck = (struct xtlock *) & tlck->lock; 1297 } 1298 1299 /* 1300 * initialize/update sibling pointers of <sp> and <rp> 1301 */ 1302 nextbn = le64_to_cpu(sp->header.next); 1303 rp->header.next = cpu_to_le64(nextbn); 1304 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1305 sp->header.next = cpu_to_le64(rbn); 1306 1307 skip = split->index; 1308 1309 /* 1310 * sequential append at tail (after last entry of last page) 1311 * 1312 * if splitting the last page on a level because of appending 1313 * a entry to it (skip is maxentry), it's likely that the access is 1314 * sequential. adding an empty page on the side of the level is less 1315 * work and can push the fill factor much higher than normal. 1316 * if we're wrong it's no big deal - we will do the split the right 1317 * way next time. 1318 * (it may look like it's equally easy to do a similar hack for 1319 * reverse sorted data, that is, split the tree left, but it's not. 1320 * Be my guest.) 1321 */ 1322 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1323 /* 1324 * acquire a transaction lock on the new/right page; 1325 * 1326 * action: xad insertion; 1327 */ 1328 /* insert entry at the first entry of the new right page */ 1329 xad = &rp->xad[XTENTRYSTART]; 1330 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1331 split->addr); 1332 1333 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1334 1335 if (!test_cflag(COMMIT_Nolink, ip)) { 1336 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1337 rxtlck->lwm.length = 1; 1338 } 1339 1340 *rmpp = rmp; 1341 *rbnp = rbn; 1342 1343 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1344 return 0; 1345 } 1346 1347 /* 1348 * non-sequential insert (at possibly middle page) 1349 */ 1350 1351 /* 1352 * update previous pointer of old next/right page of <sp> 1353 */ 1354 if (nextbn != 0) { 1355 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1356 if (rc) { 1357 XT_PUTPAGE(rmp); 1358 goto clean_up; 1359 } 1360 1361 BT_MARK_DIRTY(mp, ip); 1362 /* 1363 * acquire a transaction lock on the next page; 1364 * 1365 * action:sibling pointer update; 1366 */ 1367 if (!test_cflag(COMMIT_Nolink, ip)) 1368 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1369 1370 p->header.prev = cpu_to_le64(rbn); 1371 1372 /* sibling page may have been updated previously, or 1373 * it may be updated later; 1374 */ 1375 1376 XT_PUTPAGE(mp); 1377 } 1378 1379 /* 1380 * split the data between the split and new/right pages 1381 */ 1382 maxentry = le16_to_cpu(sp->header.maxentry); 1383 middle = maxentry >> 1; 1384 righthalf = maxentry - middle; 1385 1386 /* 1387 * skip index in old split/left page - insert into left page: 1388 */ 1389 if (skip <= middle) { 1390 /* move right half of split page to the new right page */ 1391 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1392 righthalf << L2XTSLOTSIZE); 1393 1394 /* shift right tail of left half to make room for new entry */ 1395 if (skip < middle) 1396 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1397 (middle - skip) << L2XTSLOTSIZE); 1398 1399 /* insert new entry */ 1400 xad = &sp->xad[skip]; 1401 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1402 split->addr); 1403 1404 /* update page header */ 1405 sp->header.nextindex = cpu_to_le16(middle + 1); 1406 if (!test_cflag(COMMIT_Nolink, ip)) { 1407 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1408 min(skip, (int)sxtlck->lwm.offset) : skip; 1409 } 1410 1411 rp->header.nextindex = 1412 cpu_to_le16(XTENTRYSTART + righthalf); 1413 } 1414 /* 1415 * skip index in new right page - insert into right page: 1416 */ 1417 else { 1418 /* move left head of right half to right page */ 1419 n = skip - middle; 1420 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1421 n << L2XTSLOTSIZE); 1422 1423 /* insert new entry */ 1424 n += XTENTRYSTART; 1425 xad = &rp->xad[n]; 1426 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1427 split->addr); 1428 1429 /* move right tail of right half to right page */ 1430 if (skip < maxentry) 1431 memmove(&rp->xad[n + 1], &sp->xad[skip], 1432 (maxentry - skip) << L2XTSLOTSIZE); 1433 1434 /* update page header */ 1435 sp->header.nextindex = cpu_to_le16(middle); 1436 if (!test_cflag(COMMIT_Nolink, ip)) { 1437 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1438 min(middle, (int)sxtlck->lwm.offset) : middle; 1439 } 1440 1441 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1442 righthalf + 1); 1443 } 1444 1445 if (!test_cflag(COMMIT_Nolink, ip)) { 1446 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1447 sxtlck->lwm.offset; 1448 1449 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1450 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1451 XTENTRYSTART; 1452 } 1453 1454 *rmpp = rmp; 1455 *rbnp = rbn; 1456 1457 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1458 return rc; 1459 1460 clean_up: 1461 1462 /* Rollback quota allocation. */ 1463 if (quota_allocation) 1464 DQUOT_FREE_BLOCK(ip, quota_allocation); 1465 1466 return (rc); 1467 } 1468 1469 1470 /* 1471 * xtSplitRoot() 1472 * 1473 * function: 1474 * split the full root page into 1475 * original/root/split page and new right page 1476 * i.e., root remains fixed in tree anchor (inode) and 1477 * the root is copied to a single new right child page 1478 * since root page << non-root page, and 1479 * the split root page contains a single entry for the 1480 * new right child page. 1481 * 1482 * parameter: 1483 * int tid, 1484 * struct inode *ip, 1485 * struct xtsplit *split, 1486 * struct metapage **rmpp) 1487 * 1488 * return: 1489 * Pointer to page in which to insert or NULL on error. 1490 */ 1491 static int 1492 xtSplitRoot(tid_t tid, 1493 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1494 { 1495 xtpage_t *sp; 1496 struct metapage *rmp; 1497 xtpage_t *rp; 1498 s64 rbn; 1499 int skip, nextindex; 1500 xad_t *xad; 1501 pxd_t *pxd; 1502 struct pxdlist *pxdlist; 1503 struct tlock *tlck; 1504 struct xtlock *xtlck; 1505 1506 sp = &JFS_IP(ip)->i_xtroot; 1507 1508 INCREMENT(xtStat.split); 1509 1510 /* 1511 * allocate a single (right) child page 1512 */ 1513 pxdlist = split->pxdlist; 1514 pxd = &pxdlist->pxd[pxdlist->npxd]; 1515 pxdlist->npxd++; 1516 rbn = addressPXD(pxd); 1517 rmp = get_metapage(ip, rbn, PSIZE, 1); 1518 if (rmp == NULL) 1519 return -EIO; 1520 1521 /* Allocate blocks to quota. */ 1522 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1523 release_metapage(rmp); 1524 return -EDQUOT; 1525 } 1526 1527 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1528 1529 /* 1530 * acquire a transaction lock on the new right page; 1531 * 1532 * action: new page; 1533 */ 1534 BT_MARK_DIRTY(rmp, ip); 1535 1536 rp = (xtpage_t *) rmp->data; 1537 rp->header.flag = 1538 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1539 rp->header.self = *pxd; 1540 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1541 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1542 1543 /* initialize sibling pointers */ 1544 rp->header.next = 0; 1545 rp->header.prev = 0; 1546 1547 /* 1548 * copy the in-line root page into new right page extent 1549 */ 1550 nextindex = le16_to_cpu(sp->header.maxentry); 1551 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1552 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1553 1554 /* 1555 * insert the new entry into the new right/child page 1556 * (skip index in the new right page will not change) 1557 */ 1558 skip = split->index; 1559 /* if insert into middle, shift right remaining entries */ 1560 if (skip != nextindex) 1561 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1562 (nextindex - skip) * sizeof(xad_t)); 1563 1564 xad = &rp->xad[skip]; 1565 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1566 1567 /* update page header */ 1568 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1569 1570 if (!test_cflag(COMMIT_Nolink, ip)) { 1571 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1572 xtlck = (struct xtlock *) & tlck->lock; 1573 xtlck->lwm.offset = XTENTRYSTART; 1574 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1575 XTENTRYSTART; 1576 } 1577 1578 /* 1579 * reset the root 1580 * 1581 * init root with the single entry for the new right page 1582 * set the 1st entry offset to 0, which force the left-most key 1583 * at any level of the tree to be less than any search key. 1584 */ 1585 /* 1586 * acquire a transaction lock on the root page (in-memory inode); 1587 * 1588 * action: root split; 1589 */ 1590 BT_MARK_DIRTY(split->mp, ip); 1591 1592 xad = &sp->xad[XTENTRYSTART]; 1593 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1594 1595 /* update page header of root */ 1596 sp->header.flag &= ~BT_LEAF; 1597 sp->header.flag |= BT_INTERNAL; 1598 1599 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1600 1601 if (!test_cflag(COMMIT_Nolink, ip)) { 1602 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1603 xtlck = (struct xtlock *) & tlck->lock; 1604 xtlck->lwm.offset = XTENTRYSTART; 1605 xtlck->lwm.length = 1; 1606 } 1607 1608 *rmpp = rmp; 1609 1610 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); 1611 return 0; 1612 } 1613 1614 1615 /* 1616 * xtExtend() 1617 * 1618 * function: extend in-place; 1619 * 1620 * note: existing extent may or may not have been committed. 1621 * caller is responsible for pager buffer cache update, and 1622 * working block allocation map update; 1623 * update pmap: alloc whole extended extent; 1624 */ 1625 int xtExtend(tid_t tid, /* transaction id */ 1626 struct inode *ip, s64 xoff, /* delta extent offset */ 1627 s32 xlen, /* delta extent length */ 1628 int flag) 1629 { 1630 int rc = 0; 1631 int cmp; 1632 struct metapage *mp; /* meta-page buffer */ 1633 xtpage_t *p; /* base B+-tree index page */ 1634 s64 bn; 1635 int index, nextindex, len; 1636 struct btstack btstack; /* traverse stack */ 1637 struct xtsplit split; /* split information */ 1638 xad_t *xad; 1639 s64 xaddr; 1640 struct tlock *tlck; 1641 struct xtlock *xtlck = NULL; 1642 1643 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1644 1645 /* there must exist extent to be extended */ 1646 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) 1647 return rc; 1648 1649 /* retrieve search result */ 1650 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1651 1652 if (cmp != 0) { 1653 XT_PUTPAGE(mp); 1654 jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); 1655 return -EIO; 1656 } 1657 1658 /* extension must be contiguous */ 1659 xad = &p->xad[index]; 1660 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { 1661 XT_PUTPAGE(mp); 1662 jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); 1663 return -EIO; 1664 } 1665 1666 /* 1667 * acquire a transaction lock on the leaf page; 1668 * 1669 * action: xad insertion/extension; 1670 */ 1671 BT_MARK_DIRTY(mp, ip); 1672 if (!test_cflag(COMMIT_Nolink, ip)) { 1673 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1674 xtlck = (struct xtlock *) & tlck->lock; 1675 } 1676 1677 /* extend will overflow extent ? */ 1678 xlen = lengthXAD(xad) + xlen; 1679 if ((len = xlen - MAXXLEN) <= 0) 1680 goto extendOld; 1681 1682 /* 1683 * extent overflow: insert entry for new extent 1684 */ 1685 //insertNew: 1686 xoff = offsetXAD(xad) + MAXXLEN; 1687 xaddr = addressXAD(xad) + MAXXLEN; 1688 nextindex = le16_to_cpu(p->header.nextindex); 1689 1690 /* 1691 * if the leaf page is full, insert the new entry and 1692 * propagate up the router entry for the new page from split 1693 * 1694 * The xtSplitUp() will insert the entry and unpin the leaf page. 1695 */ 1696 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1697 /* xtSpliUp() unpins leaf pages */ 1698 split.mp = mp; 1699 split.index = index + 1; 1700 split.flag = XAD_NEW; 1701 split.off = xoff; /* split offset */ 1702 split.len = len; 1703 split.addr = xaddr; 1704 split.pxdlist = NULL; 1705 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1706 return rc; 1707 1708 /* get back old page */ 1709 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1710 if (rc) 1711 return rc; 1712 /* 1713 * if leaf root has been split, original root has been 1714 * copied to new child page, i.e., original entry now 1715 * resides on the new child page; 1716 */ 1717 if (p->header.flag & BT_INTERNAL) { 1718 ASSERT(p->header.nextindex == 1719 cpu_to_le16(XTENTRYSTART + 1)); 1720 xad = &p->xad[XTENTRYSTART]; 1721 bn = addressXAD(xad); 1722 XT_PUTPAGE(mp); 1723 1724 /* get new child page */ 1725 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1726 if (rc) 1727 return rc; 1728 1729 BT_MARK_DIRTY(mp, ip); 1730 if (!test_cflag(COMMIT_Nolink, ip)) { 1731 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1732 xtlck = (struct xtlock *) & tlck->lock; 1733 } 1734 } 1735 } 1736 /* 1737 * insert the new entry into the leaf page 1738 */ 1739 else { 1740 /* insert the new entry: mark the entry NEW */ 1741 xad = &p->xad[index + 1]; 1742 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1743 1744 /* advance next available entry index */ 1745 p->header.nextindex = 1746 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1747 } 1748 1749 /* get back old entry */ 1750 xad = &p->xad[index]; 1751 xlen = MAXXLEN; 1752 1753 /* 1754 * extend old extent 1755 */ 1756 extendOld: 1757 XADlength(xad, xlen); 1758 if (!(xad->flag & XAD_NEW)) 1759 xad->flag |= XAD_EXTENDED; 1760 1761 if (!test_cflag(COMMIT_Nolink, ip)) { 1762 xtlck->lwm.offset = 1763 (xtlck->lwm.offset) ? min(index, 1764 (int)xtlck->lwm.offset) : index; 1765 xtlck->lwm.length = 1766 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1767 } 1768 1769 /* unpin the leaf page */ 1770 XT_PUTPAGE(mp); 1771 1772 return rc; 1773 } 1774 1775 #ifdef _NOTYET 1776 /* 1777 * xtTailgate() 1778 * 1779 * function: split existing 'tail' extent 1780 * (split offset >= start offset of tail extent), and 1781 * relocate and extend the split tail half; 1782 * 1783 * note: existing extent may or may not have been committed. 1784 * caller is responsible for pager buffer cache update, and 1785 * working block allocation map update; 1786 * update pmap: free old split tail extent, alloc new extent; 1787 */ 1788 int xtTailgate(tid_t tid, /* transaction id */ 1789 struct inode *ip, s64 xoff, /* split/new extent offset */ 1790 s32 xlen, /* new extent length */ 1791 s64 xaddr, /* new extent address */ 1792 int flag) 1793 { 1794 int rc = 0; 1795 int cmp; 1796 struct metapage *mp; /* meta-page buffer */ 1797 xtpage_t *p; /* base B+-tree index page */ 1798 s64 bn; 1799 int index, nextindex, llen, rlen; 1800 struct btstack btstack; /* traverse stack */ 1801 struct xtsplit split; /* split information */ 1802 xad_t *xad; 1803 struct tlock *tlck; 1804 struct xtlock *xtlck = 0; 1805 struct tlock *mtlck; 1806 struct maplock *pxdlock; 1807 1808 /* 1809 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1810 (ulong)xoff, xlen, (ulong)xaddr); 1811 */ 1812 1813 /* there must exist extent to be tailgated */ 1814 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) 1815 return rc; 1816 1817 /* retrieve search result */ 1818 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1819 1820 if (cmp != 0) { 1821 XT_PUTPAGE(mp); 1822 jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); 1823 return -EIO; 1824 } 1825 1826 /* entry found must be last entry */ 1827 nextindex = le16_to_cpu(p->header.nextindex); 1828 if (index != nextindex - 1) { 1829 XT_PUTPAGE(mp); 1830 jfs_error(ip->i_sb, 1831 "xtTailgate: the entry found is not the last entry"); 1832 return -EIO; 1833 } 1834 1835 BT_MARK_DIRTY(mp, ip); 1836 /* 1837 * acquire tlock of the leaf page containing original entry 1838 */ 1839 if (!test_cflag(COMMIT_Nolink, ip)) { 1840 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1841 xtlck = (struct xtlock *) & tlck->lock; 1842 } 1843 1844 /* completely replace extent ? */ 1845 xad = &p->xad[index]; 1846 /* 1847 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1848 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1849 */ 1850 if ((llen = xoff - offsetXAD(xad)) == 0) 1851 goto updateOld; 1852 1853 /* 1854 * partially replace extent: insert entry for new extent 1855 */ 1856 //insertNew: 1857 /* 1858 * if the leaf page is full, insert the new entry and 1859 * propagate up the router entry for the new page from split 1860 * 1861 * The xtSplitUp() will insert the entry and unpin the leaf page. 1862 */ 1863 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1864 /* xtSpliUp() unpins leaf pages */ 1865 split.mp = mp; 1866 split.index = index + 1; 1867 split.flag = XAD_NEW; 1868 split.off = xoff; /* split offset */ 1869 split.len = xlen; 1870 split.addr = xaddr; 1871 split.pxdlist = NULL; 1872 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1873 return rc; 1874 1875 /* get back old page */ 1876 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1877 if (rc) 1878 return rc; 1879 /* 1880 * if leaf root has been split, original root has been 1881 * copied to new child page, i.e., original entry now 1882 * resides on the new child page; 1883 */ 1884 if (p->header.flag & BT_INTERNAL) { 1885 ASSERT(p->header.nextindex == 1886 cpu_to_le16(XTENTRYSTART + 1)); 1887 xad = &p->xad[XTENTRYSTART]; 1888 bn = addressXAD(xad); 1889 XT_PUTPAGE(mp); 1890 1891 /* get new child page */ 1892 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1893 if (rc) 1894 return rc; 1895 1896 BT_MARK_DIRTY(mp, ip); 1897 if (!test_cflag(COMMIT_Nolink, ip)) { 1898 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1899 xtlck = (struct xtlock *) & tlck->lock; 1900 } 1901 } 1902 } 1903 /* 1904 * insert the new entry into the leaf page 1905 */ 1906 else { 1907 /* insert the new entry: mark the entry NEW */ 1908 xad = &p->xad[index + 1]; 1909 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1910 1911 /* advance next available entry index */ 1912 p->header.nextindex = 1913 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1914 } 1915 1916 /* get back old XAD */ 1917 xad = &p->xad[index]; 1918 1919 /* 1920 * truncate/relocate old extent at split offset 1921 */ 1922 updateOld: 1923 /* update dmap for old/committed/truncated extent */ 1924 rlen = lengthXAD(xad) - llen; 1925 if (!(xad->flag & XAD_NEW)) { 1926 /* free from PWMAP at commit */ 1927 if (!test_cflag(COMMIT_Nolink, ip)) { 1928 mtlck = txMaplock(tid, ip, tlckMAP); 1929 pxdlock = (struct maplock *) & mtlck->lock; 1930 pxdlock->flag = mlckFREEPXD; 1931 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1932 PXDlength(&pxdlock->pxd, rlen); 1933 pxdlock->index = 1; 1934 } 1935 } else 1936 /* free from WMAP */ 1937 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1938 1939 if (llen) 1940 /* truncate */ 1941 XADlength(xad, llen); 1942 else 1943 /* replace */ 1944 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1945 1946 if (!test_cflag(COMMIT_Nolink, ip)) { 1947 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1948 min(index, (int)xtlck->lwm.offset) : index; 1949 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1950 xtlck->lwm.offset; 1951 } 1952 1953 /* unpin the leaf page */ 1954 XT_PUTPAGE(mp); 1955 1956 return rc; 1957 } 1958 #endif /* _NOTYET */ 1959 1960 /* 1961 * xtUpdate() 1962 * 1963 * function: update XAD; 1964 * 1965 * update extent for allocated_but_not_recorded or 1966 * compressed extent; 1967 * 1968 * parameter: 1969 * nxad - new XAD; 1970 * logical extent of the specified XAD must be completely 1971 * contained by an existing XAD; 1972 */ 1973 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1974 { /* new XAD */ 1975 int rc = 0; 1976 int cmp; 1977 struct metapage *mp; /* meta-page buffer */ 1978 xtpage_t *p; /* base B+-tree index page */ 1979 s64 bn; 1980 int index0, index, newindex, nextindex; 1981 struct btstack btstack; /* traverse stack */ 1982 struct xtsplit split; /* split information */ 1983 xad_t *xad, *lxad, *rxad; 1984 int xflag; 1985 s64 nxoff, xoff; 1986 int nxlen, xlen, lxlen, rxlen; 1987 s64 nxaddr, xaddr; 1988 struct tlock *tlck; 1989 struct xtlock *xtlck = NULL; 1990 int newpage = 0; 1991 1992 /* there must exist extent to be tailgated */ 1993 nxoff = offsetXAD(nxad); 1994 nxlen = lengthXAD(nxad); 1995 nxaddr = addressXAD(nxad); 1996 1997 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 1998 return rc; 1999 2000 /* retrieve search result */ 2001 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2002 2003 if (cmp != 0) { 2004 XT_PUTPAGE(mp); 2005 jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); 2006 return -EIO; 2007 } 2008 2009 BT_MARK_DIRTY(mp, ip); 2010 /* 2011 * acquire tlock of the leaf page containing original entry 2012 */ 2013 if (!test_cflag(COMMIT_Nolink, ip)) { 2014 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2015 xtlck = (struct xtlock *) & tlck->lock; 2016 } 2017 2018 xad = &p->xad[index0]; 2019 xflag = xad->flag; 2020 xoff = offsetXAD(xad); 2021 xlen = lengthXAD(xad); 2022 xaddr = addressXAD(xad); 2023 2024 /* nXAD must be completely contained within XAD */ 2025 if ((xoff > nxoff) || 2026 (nxoff + nxlen > xoff + xlen)) { 2027 XT_PUTPAGE(mp); 2028 jfs_error(ip->i_sb, 2029 "xtUpdate: nXAD in not completely contained within XAD"); 2030 return -EIO; 2031 } 2032 2033 index = index0; 2034 newindex = index + 1; 2035 nextindex = le16_to_cpu(p->header.nextindex); 2036 2037 #ifdef _JFS_WIP_NOCOALESCE 2038 if (xoff < nxoff) 2039 goto updateRight; 2040 2041 /* 2042 * replace XAD with nXAD 2043 */ 2044 replace: /* (nxoff == xoff) */ 2045 if (nxlen == xlen) { 2046 /* replace XAD with nXAD:recorded */ 2047 *xad = *nxad; 2048 xad->flag = xflag & ~XAD_NOTRECORDED; 2049 2050 goto out; 2051 } else /* (nxlen < xlen) */ 2052 goto updateLeft; 2053 #endif /* _JFS_WIP_NOCOALESCE */ 2054 2055 /* #ifdef _JFS_WIP_COALESCE */ 2056 if (xoff < nxoff) 2057 goto coalesceRight; 2058 2059 /* 2060 * coalesce with left XAD 2061 */ 2062 //coalesceLeft: /* (xoff == nxoff) */ 2063 /* is XAD first entry of page ? */ 2064 if (index == XTENTRYSTART) 2065 goto replace; 2066 2067 /* is nXAD logically and physically contiguous with lXAD ? */ 2068 lxad = &p->xad[index - 1]; 2069 lxlen = lengthXAD(lxad); 2070 if (!(lxad->flag & XAD_NOTRECORDED) && 2071 (nxoff == offsetXAD(lxad) + lxlen) && 2072 (nxaddr == addressXAD(lxad) + lxlen) && 2073 (lxlen + nxlen < MAXXLEN)) { 2074 /* extend right lXAD */ 2075 index0 = index - 1; 2076 XADlength(lxad, lxlen + nxlen); 2077 2078 /* If we just merged two extents together, need to make sure the 2079 * right extent gets logged. If the left one is marked XAD_NEW, 2080 * then we know it will be logged. Otherwise, mark as 2081 * XAD_EXTENDED 2082 */ 2083 if (!(lxad->flag & XAD_NEW)) 2084 lxad->flag |= XAD_EXTENDED; 2085 2086 if (xlen > nxlen) { 2087 /* truncate XAD */ 2088 XADoffset(xad, xoff + nxlen); 2089 XADlength(xad, xlen - nxlen); 2090 XADaddress(xad, xaddr + nxlen); 2091 goto out; 2092 } else { /* (xlen == nxlen) */ 2093 2094 /* remove XAD */ 2095 if (index < nextindex - 1) 2096 memmove(&p->xad[index], &p->xad[index + 1], 2097 (nextindex - index - 2098 1) << L2XTSLOTSIZE); 2099 2100 p->header.nextindex = 2101 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2102 1); 2103 2104 index = index0; 2105 newindex = index + 1; 2106 nextindex = le16_to_cpu(p->header.nextindex); 2107 xoff = nxoff = offsetXAD(lxad); 2108 xlen = nxlen = lxlen + nxlen; 2109 xaddr = nxaddr = addressXAD(lxad); 2110 goto coalesceRight; 2111 } 2112 } 2113 2114 /* 2115 * replace XAD with nXAD 2116 */ 2117 replace: /* (nxoff == xoff) */ 2118 if (nxlen == xlen) { 2119 /* replace XAD with nXAD:recorded */ 2120 *xad = *nxad; 2121 xad->flag = xflag & ~XAD_NOTRECORDED; 2122 2123 goto coalesceRight; 2124 } else /* (nxlen < xlen) */ 2125 goto updateLeft; 2126 2127 /* 2128 * coalesce with right XAD 2129 */ 2130 coalesceRight: /* (xoff <= nxoff) */ 2131 /* is XAD last entry of page ? */ 2132 if (newindex == nextindex) { 2133 if (xoff == nxoff) 2134 goto out; 2135 goto updateRight; 2136 } 2137 2138 /* is nXAD logically and physically contiguous with rXAD ? */ 2139 rxad = &p->xad[index + 1]; 2140 rxlen = lengthXAD(rxad); 2141 if (!(rxad->flag & XAD_NOTRECORDED) && 2142 (nxoff + nxlen == offsetXAD(rxad)) && 2143 (nxaddr + nxlen == addressXAD(rxad)) && 2144 (rxlen + nxlen < MAXXLEN)) { 2145 /* extend left rXAD */ 2146 XADoffset(rxad, nxoff); 2147 XADlength(rxad, rxlen + nxlen); 2148 XADaddress(rxad, nxaddr); 2149 2150 /* If we just merged two extents together, need to make sure 2151 * the left extent gets logged. If the right one is marked 2152 * XAD_NEW, then we know it will be logged. Otherwise, mark as 2153 * XAD_EXTENDED 2154 */ 2155 if (!(rxad->flag & XAD_NEW)) 2156 rxad->flag |= XAD_EXTENDED; 2157 2158 if (xlen > nxlen) 2159 /* truncate XAD */ 2160 XADlength(xad, xlen - nxlen); 2161 else { /* (xlen == nxlen) */ 2162 2163 /* remove XAD */ 2164 memmove(&p->xad[index], &p->xad[index + 1], 2165 (nextindex - index - 1) << L2XTSLOTSIZE); 2166 2167 p->header.nextindex = 2168 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2169 1); 2170 } 2171 2172 goto out; 2173 } else if (xoff == nxoff) 2174 goto out; 2175 2176 if (xoff >= nxoff) { 2177 XT_PUTPAGE(mp); 2178 jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); 2179 return -EIO; 2180 } 2181 /* #endif _JFS_WIP_COALESCE */ 2182 2183 /* 2184 * split XAD into (lXAD, nXAD): 2185 * 2186 * |---nXAD---> 2187 * --|----------XAD----------|-- 2188 * |-lXAD-| 2189 */ 2190 updateRight: /* (xoff < nxoff) */ 2191 /* truncate old XAD as lXAD:not_recorded */ 2192 xad = &p->xad[index]; 2193 XADlength(xad, nxoff - xoff); 2194 2195 /* insert nXAD:recorded */ 2196 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2197 2198 /* xtSpliUp() unpins leaf pages */ 2199 split.mp = mp; 2200 split.index = newindex; 2201 split.flag = xflag & ~XAD_NOTRECORDED; 2202 split.off = nxoff; 2203 split.len = nxlen; 2204 split.addr = nxaddr; 2205 split.pxdlist = NULL; 2206 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2207 return rc; 2208 2209 /* get back old page */ 2210 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2211 if (rc) 2212 return rc; 2213 /* 2214 * if leaf root has been split, original root has been 2215 * copied to new child page, i.e., original entry now 2216 * resides on the new child page; 2217 */ 2218 if (p->header.flag & BT_INTERNAL) { 2219 ASSERT(p->header.nextindex == 2220 cpu_to_le16(XTENTRYSTART + 1)); 2221 xad = &p->xad[XTENTRYSTART]; 2222 bn = addressXAD(xad); 2223 XT_PUTPAGE(mp); 2224 2225 /* get new child page */ 2226 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2227 if (rc) 2228 return rc; 2229 2230 BT_MARK_DIRTY(mp, ip); 2231 if (!test_cflag(COMMIT_Nolink, ip)) { 2232 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2233 xtlck = (struct xtlock *) & tlck->lock; 2234 } 2235 } else { 2236 /* is nXAD on new page ? */ 2237 if (newindex > 2238 (le16_to_cpu(p->header.maxentry) >> 1)) { 2239 newindex = 2240 newindex - 2241 le16_to_cpu(p->header.nextindex) + 2242 XTENTRYSTART; 2243 newpage = 1; 2244 } 2245 } 2246 } else { 2247 /* if insert into middle, shift right remaining entries */ 2248 if (newindex < nextindex) 2249 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2250 (nextindex - newindex) << L2XTSLOTSIZE); 2251 2252 /* insert the entry */ 2253 xad = &p->xad[newindex]; 2254 *xad = *nxad; 2255 xad->flag = xflag & ~XAD_NOTRECORDED; 2256 2257 /* advance next available entry index. */ 2258 p->header.nextindex = 2259 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2260 } 2261 2262 /* 2263 * does nXAD force 3-way split ? 2264 * 2265 * |---nXAD--->| 2266 * --|----------XAD-------------|-- 2267 * |-lXAD-| |-rXAD -| 2268 */ 2269 if (nxoff + nxlen == xoff + xlen) 2270 goto out; 2271 2272 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 2273 if (newpage) { 2274 /* close out old page */ 2275 if (!test_cflag(COMMIT_Nolink, ip)) { 2276 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2277 min(index0, (int)xtlck->lwm.offset) : index0; 2278 xtlck->lwm.length = 2279 le16_to_cpu(p->header.nextindex) - 2280 xtlck->lwm.offset; 2281 } 2282 2283 bn = le64_to_cpu(p->header.next); 2284 XT_PUTPAGE(mp); 2285 2286 /* get new right page */ 2287 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2288 if (rc) 2289 return rc; 2290 2291 BT_MARK_DIRTY(mp, ip); 2292 if (!test_cflag(COMMIT_Nolink, ip)) { 2293 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2294 xtlck = (struct xtlock *) & tlck->lock; 2295 } 2296 2297 index0 = index = newindex; 2298 } else 2299 index++; 2300 2301 newindex = index + 1; 2302 nextindex = le16_to_cpu(p->header.nextindex); 2303 xlen = xlen - (nxoff - xoff); 2304 xoff = nxoff; 2305 xaddr = nxaddr; 2306 2307 /* recompute split pages */ 2308 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2309 XT_PUTPAGE(mp); 2310 2311 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 2312 return rc; 2313 2314 /* retrieve search result */ 2315 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2316 2317 if (cmp != 0) { 2318 XT_PUTPAGE(mp); 2319 jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); 2320 return -EIO; 2321 } 2322 2323 if (index0 != index) { 2324 XT_PUTPAGE(mp); 2325 jfs_error(ip->i_sb, 2326 "xtUpdate: unexpected value of index"); 2327 return -EIO; 2328 } 2329 } 2330 2331 /* 2332 * split XAD into (nXAD, rXAD) 2333 * 2334 * ---nXAD---| 2335 * --|----------XAD----------|-- 2336 * |-rXAD-| 2337 */ 2338 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2339 /* update old XAD with nXAD:recorded */ 2340 xad = &p->xad[index]; 2341 *xad = *nxad; 2342 xad->flag = xflag & ~XAD_NOTRECORDED; 2343 2344 /* insert rXAD:not_recorded */ 2345 xoff = xoff + nxlen; 2346 xlen = xlen - nxlen; 2347 xaddr = xaddr + nxlen; 2348 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2349 /* 2350 printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2351 */ 2352 /* xtSpliUp() unpins leaf pages */ 2353 split.mp = mp; 2354 split.index = newindex; 2355 split.flag = xflag; 2356 split.off = xoff; 2357 split.len = xlen; 2358 split.addr = xaddr; 2359 split.pxdlist = NULL; 2360 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2361 return rc; 2362 2363 /* get back old page */ 2364 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2365 if (rc) 2366 return rc; 2367 2368 /* 2369 * if leaf root has been split, original root has been 2370 * copied to new child page, i.e., original entry now 2371 * resides on the new child page; 2372 */ 2373 if (p->header.flag & BT_INTERNAL) { 2374 ASSERT(p->header.nextindex == 2375 cpu_to_le16(XTENTRYSTART + 1)); 2376 xad = &p->xad[XTENTRYSTART]; 2377 bn = addressXAD(xad); 2378 XT_PUTPAGE(mp); 2379 2380 /* get new child page */ 2381 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2382 if (rc) 2383 return rc; 2384 2385 BT_MARK_DIRTY(mp, ip); 2386 if (!test_cflag(COMMIT_Nolink, ip)) { 2387 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2388 xtlck = (struct xtlock *) & tlck->lock; 2389 } 2390 } 2391 } else { 2392 /* if insert into middle, shift right remaining entries */ 2393 if (newindex < nextindex) 2394 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2395 (nextindex - newindex) << L2XTSLOTSIZE); 2396 2397 /* insert the entry */ 2398 xad = &p->xad[newindex]; 2399 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2400 2401 /* advance next available entry index. */ 2402 p->header.nextindex = 2403 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2404 } 2405 2406 out: 2407 if (!test_cflag(COMMIT_Nolink, ip)) { 2408 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2409 min(index0, (int)xtlck->lwm.offset) : index0; 2410 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2411 xtlck->lwm.offset; 2412 } 2413 2414 /* unpin the leaf page */ 2415 XT_PUTPAGE(mp); 2416 2417 return rc; 2418 } 2419 2420 2421 /* 2422 * xtAppend() 2423 * 2424 * function: grow in append mode from contiguous region specified ; 2425 * 2426 * parameter: 2427 * tid - transaction id; 2428 * ip - file object; 2429 * xflag - extent flag: 2430 * xoff - extent offset; 2431 * maxblocks - max extent length; 2432 * xlen - extent length (in/out); 2433 * xaddrp - extent address pointer (in/out): 2434 * flag - 2435 * 2436 * return: 2437 */ 2438 int xtAppend(tid_t tid, /* transaction id */ 2439 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2440 s32 * xlenp, /* (in/out) */ 2441 s64 * xaddrp, /* (in/out) */ 2442 int flag) 2443 { 2444 int rc = 0; 2445 struct metapage *mp; /* meta-page buffer */ 2446 xtpage_t *p; /* base B+-tree index page */ 2447 s64 bn, xaddr; 2448 int index, nextindex; 2449 struct btstack btstack; /* traverse stack */ 2450 struct xtsplit split; /* split information */ 2451 xad_t *xad; 2452 int cmp; 2453 struct tlock *tlck; 2454 struct xtlock *xtlck; 2455 int nsplit, nblocks, xlen; 2456 struct pxdlist pxdlist; 2457 pxd_t *pxd; 2458 s64 next; 2459 2460 xaddr = *xaddrp; 2461 xlen = *xlenp; 2462 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", 2463 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2464 2465 /* 2466 * search for the entry location at which to insert: 2467 * 2468 * xtFastSearch() and xtSearch() both returns (leaf page 2469 * pinned, index at which to insert). 2470 * n.b. xtSearch() may return index of maxentry of 2471 * the full page. 2472 */ 2473 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 2474 return rc; 2475 2476 /* retrieve search result */ 2477 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2478 2479 if (cmp == 0) { 2480 rc = -EEXIST; 2481 goto out; 2482 } 2483 2484 if (next) 2485 xlen = min(xlen, (int)(next - xoff)); 2486 //insert: 2487 /* 2488 * insert entry for new extent 2489 */ 2490 xflag |= XAD_NEW; 2491 2492 /* 2493 * if the leaf page is full, split the page and 2494 * propagate up the router entry for the new page from split 2495 * 2496 * The xtSplitUp() will insert the entry and unpin the leaf page. 2497 */ 2498 nextindex = le16_to_cpu(p->header.nextindex); 2499 if (nextindex < le16_to_cpu(p->header.maxentry)) 2500 goto insertLeaf; 2501 2502 /* 2503 * allocate new index blocks to cover index page split(s) 2504 */ 2505 nsplit = btstack.nsplit; 2506 split.pxdlist = &pxdlist; 2507 pxdlist.maxnpxd = pxdlist.npxd = 0; 2508 pxd = &pxdlist.pxd[0]; 2509 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2510 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2511 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2512 PXDaddress(pxd, xaddr); 2513 PXDlength(pxd, nblocks); 2514 2515 pxdlist.maxnpxd++; 2516 2517 continue; 2518 } 2519 2520 /* undo allocation */ 2521 2522 goto out; 2523 } 2524 2525 xlen = min(xlen, maxblocks); 2526 2527 /* 2528 * allocate data extent requested 2529 */ 2530 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2531 goto out; 2532 2533 split.mp = mp; 2534 split.index = index; 2535 split.flag = xflag; 2536 split.off = xoff; 2537 split.len = xlen; 2538 split.addr = xaddr; 2539 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2540 /* undo data extent allocation */ 2541 dbFree(ip, *xaddrp, (s64) * xlenp); 2542 2543 return rc; 2544 } 2545 2546 *xaddrp = xaddr; 2547 *xlenp = xlen; 2548 return 0; 2549 2550 /* 2551 * insert the new entry into the leaf page 2552 */ 2553 insertLeaf: 2554 /* 2555 * allocate data extent requested 2556 */ 2557 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2558 goto out; 2559 2560 BT_MARK_DIRTY(mp, ip); 2561 /* 2562 * acquire a transaction lock on the leaf page; 2563 * 2564 * action: xad insertion/extension; 2565 */ 2566 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2567 xtlck = (struct xtlock *) & tlck->lock; 2568 2569 /* insert the new entry: mark the entry NEW */ 2570 xad = &p->xad[index]; 2571 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2572 2573 /* advance next available entry index */ 2574 p->header.nextindex = 2575 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2576 2577 xtlck->lwm.offset = 2578 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2579 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2580 xtlck->lwm.offset; 2581 2582 *xaddrp = xaddr; 2583 *xlenp = xlen; 2584 2585 out: 2586 /* unpin the leaf page */ 2587 XT_PUTPAGE(mp); 2588 2589 return rc; 2590 } 2591 #ifdef _STILL_TO_PORT 2592 2593 /* - TBD for defragmentaion/reorganization - 2594 * 2595 * xtDelete() 2596 * 2597 * function: 2598 * delete the entry with the specified key. 2599 * 2600 * N.B.: whole extent of the entry is assumed to be deleted. 2601 * 2602 * parameter: 2603 * 2604 * return: 2605 * ENOENT: if the entry is not found. 2606 * 2607 * exception: 2608 */ 2609 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2610 { 2611 int rc = 0; 2612 struct btstack btstack; 2613 int cmp; 2614 s64 bn; 2615 struct metapage *mp; 2616 xtpage_t *p; 2617 int index, nextindex; 2618 struct tlock *tlck; 2619 struct xtlock *xtlck; 2620 2621 /* 2622 * find the matching entry; xtSearch() pins the page 2623 */ 2624 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2625 return rc; 2626 2627 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2628 if (cmp) { 2629 /* unpin the leaf page */ 2630 XT_PUTPAGE(mp); 2631 return -ENOENT; 2632 } 2633 2634 /* 2635 * delete the entry from the leaf page 2636 */ 2637 nextindex = le16_to_cpu(p->header.nextindex); 2638 p->header.nextindex = 2639 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); 2640 2641 /* 2642 * if the leaf page bocome empty, free the page 2643 */ 2644 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2645 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2646 2647 BT_MARK_DIRTY(mp, ip); 2648 /* 2649 * acquire a transaction lock on the leaf page; 2650 * 2651 * action:xad deletion; 2652 */ 2653 tlck = txLock(tid, ip, mp, tlckXTREE); 2654 xtlck = (struct xtlock *) & tlck->lock; 2655 xtlck->lwm.offset = 2656 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2657 2658 /* if delete from middle, shift left/compact the remaining entries */ 2659 if (index < nextindex - 1) 2660 memmove(&p->xad[index], &p->xad[index + 1], 2661 (nextindex - index - 1) * sizeof(xad_t)); 2662 2663 XT_PUTPAGE(mp); 2664 2665 return 0; 2666 } 2667 2668 2669 /* - TBD for defragmentaion/reorganization - 2670 * 2671 * xtDeleteUp() 2672 * 2673 * function: 2674 * free empty pages as propagating deletion up the tree 2675 * 2676 * parameter: 2677 * 2678 * return: 2679 */ 2680 static int 2681 xtDeleteUp(tid_t tid, struct inode *ip, 2682 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2683 { 2684 int rc = 0; 2685 struct metapage *mp; 2686 xtpage_t *p; 2687 int index, nextindex; 2688 s64 xaddr; 2689 int xlen; 2690 struct btframe *parent; 2691 struct tlock *tlck; 2692 struct xtlock *xtlck; 2693 2694 /* 2695 * keep root leaf page which has become empty 2696 */ 2697 if (fp->header.flag & BT_ROOT) { 2698 /* keep the root page */ 2699 fp->header.flag &= ~BT_INTERNAL; 2700 fp->header.flag |= BT_LEAF; 2701 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2702 2703 /* XT_PUTPAGE(fmp); */ 2704 2705 return 0; 2706 } 2707 2708 /* 2709 * free non-root leaf page 2710 */ 2711 if ((rc = xtRelink(tid, ip, fp))) { 2712 XT_PUTPAGE(fmp); 2713 return rc; 2714 } 2715 2716 xaddr = addressPXD(&fp->header.self); 2717 xlen = lengthPXD(&fp->header.self); 2718 /* free the page extent */ 2719 dbFree(ip, xaddr, (s64) xlen); 2720 2721 /* free the buffer page */ 2722 discard_metapage(fmp); 2723 2724 /* 2725 * propagate page deletion up the index tree 2726 * 2727 * If the delete from the parent page makes it empty, 2728 * continue all the way up the tree. 2729 * stop if the root page is reached (which is never deleted) or 2730 * if the entry deletion does not empty the page. 2731 */ 2732 while ((parent = BT_POP(btstack)) != NULL) { 2733 /* get/pin the parent page <sp> */ 2734 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2735 if (rc) 2736 return rc; 2737 2738 index = parent->index; 2739 2740 /* delete the entry for the freed child page from parent. 2741 */ 2742 nextindex = le16_to_cpu(p->header.nextindex); 2743 2744 /* 2745 * the parent has the single entry being deleted: 2746 * free the parent page which has become empty. 2747 */ 2748 if (nextindex == 1) { 2749 if (p->header.flag & BT_ROOT) { 2750 /* keep the root page */ 2751 p->header.flag &= ~BT_INTERNAL; 2752 p->header.flag |= BT_LEAF; 2753 p->header.nextindex = 2754 cpu_to_le16(XTENTRYSTART); 2755 2756 /* XT_PUTPAGE(mp); */ 2757 2758 break; 2759 } else { 2760 /* free the parent page */ 2761 if ((rc = xtRelink(tid, ip, p))) 2762 return rc; 2763 2764 xaddr = addressPXD(&p->header.self); 2765 /* free the page extent */ 2766 dbFree(ip, xaddr, 2767 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2768 2769 /* unpin/free the buffer page */ 2770 discard_metapage(mp); 2771 2772 /* propagate up */ 2773 continue; 2774 } 2775 } 2776 /* 2777 * the parent has other entries remaining: 2778 * delete the router entry from the parent page. 2779 */ 2780 else { 2781 BT_MARK_DIRTY(mp, ip); 2782 /* 2783 * acquire a transaction lock on the leaf page; 2784 * 2785 * action:xad deletion; 2786 */ 2787 tlck = txLock(tid, ip, mp, tlckXTREE); 2788 xtlck = (struct xtlock *) & tlck->lock; 2789 xtlck->lwm.offset = 2790 (xtlck->lwm.offset) ? min(index, 2791 xtlck->lwm. 2792 offset) : index; 2793 2794 /* if delete from middle, 2795 * shift left/compact the remaining entries in the page 2796 */ 2797 if (index < nextindex - 1) 2798 memmove(&p->xad[index], &p->xad[index + 1], 2799 (nextindex - index - 2800 1) << L2XTSLOTSIZE); 2801 2802 p->header.nextindex = 2803 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2804 1); 2805 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2806 (ulong) parent->bn, index); 2807 } 2808 2809 /* unpin the parent page */ 2810 XT_PUTPAGE(mp); 2811 2812 /* exit propagation up */ 2813 break; 2814 } 2815 2816 return 0; 2817 } 2818 2819 2820 /* 2821 * NAME: xtRelocate() 2822 * 2823 * FUNCTION: relocate xtpage or data extent of regular file; 2824 * This function is mainly used by defragfs utility. 2825 * 2826 * NOTE: This routine does not have the logic to handle 2827 * uncommitted allocated extent. The caller should call 2828 * txCommit() to commit all the allocation before call 2829 * this routine. 2830 */ 2831 int 2832 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2833 s64 nxaddr, /* new xaddr */ 2834 int xtype) 2835 { /* extent type: XTPAGE or DATAEXT */ 2836 int rc = 0; 2837 struct tblock *tblk; 2838 struct tlock *tlck; 2839 struct xtlock *xtlck; 2840 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2841 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2842 xad_t *xad; 2843 pxd_t *pxd; 2844 s64 xoff, xsize; 2845 int xlen; 2846 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2847 cbuf_t *cp; 2848 s64 offset, nbytes, nbrd, pno; 2849 int nb, npages, nblks; 2850 s64 bn; 2851 int cmp; 2852 int index; 2853 struct pxd_lock *pxdlock; 2854 struct btstack btstack; /* traverse stack */ 2855 2856 xtype = xtype & EXTENT_TYPE; 2857 2858 xoff = offsetXAD(oxad); 2859 oxaddr = addressXAD(oxad); 2860 xlen = lengthXAD(oxad); 2861 2862 /* validate extent offset */ 2863 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2864 if (offset >= ip->i_size) 2865 return -ESTALE; /* stale extent */ 2866 2867 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", 2868 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2869 2870 /* 2871 * 1. get and validate the parent xtpage/xad entry 2872 * covering the source extent to be relocated; 2873 */ 2874 if (xtype == DATAEXT) { 2875 /* search in leaf entry */ 2876 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 2877 if (rc) 2878 return rc; 2879 2880 /* retrieve search result */ 2881 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2882 2883 if (cmp) { 2884 XT_PUTPAGE(pmp); 2885 return -ESTALE; 2886 } 2887 2888 /* validate for exact match with a single entry */ 2889 xad = &pp->xad[index]; 2890 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2891 XT_PUTPAGE(pmp); 2892 return -ESTALE; 2893 } 2894 } else { /* (xtype == XTPAGE) */ 2895 2896 /* search in internal entry */ 2897 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2898 if (rc) 2899 return rc; 2900 2901 /* retrieve search result */ 2902 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2903 2904 if (cmp) { 2905 XT_PUTPAGE(pmp); 2906 return -ESTALE; 2907 } 2908 2909 /* xtSearchNode() validated for exact match with a single entry 2910 */ 2911 xad = &pp->xad[index]; 2912 } 2913 jfs_info("xtRelocate: parent xad entry validated."); 2914 2915 /* 2916 * 2. relocate the extent 2917 */ 2918 if (xtype == DATAEXT) { 2919 /* if the extent is allocated-but-not-recorded 2920 * there is no real data to be moved in this extent, 2921 */ 2922 if (xad->flag & XAD_NOTRECORDED) 2923 goto out; 2924 else 2925 /* release xtpage for cmRead()/xtLookup() */ 2926 XT_PUTPAGE(pmp); 2927 2928 /* 2929 * cmRelocate() 2930 * 2931 * copy target data pages to be relocated; 2932 * 2933 * data extent must start at page boundary and 2934 * multiple of page size (except the last data extent); 2935 * read in each page of the source data extent into cbuf, 2936 * update the cbuf extent descriptor of the page to be 2937 * homeward bound to new dst data extent 2938 * copy the data from the old extent to new extent. 2939 * copy is essential for compressed files to avoid problems 2940 * that can arise if there was a change in compression 2941 * algorithms. 2942 * it is a good strategy because it may disrupt cache 2943 * policy to keep the pages in memory afterwards. 2944 */ 2945 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2946 assert((offset & CM_OFFSET) == 0); 2947 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2948 pno = offset >> CM_L2BSIZE; 2949 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2950 /* 2951 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2952 (offset >> CM_L2BSIZE) + 1; 2953 */ 2954 sxaddr = oxaddr; 2955 dxaddr = nxaddr; 2956 2957 /* process the request one cache buffer at a time */ 2958 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2959 offset += nb, pno++, npages--) { 2960 /* compute page size */ 2961 nb = min(nbytes - nbrd, CM_BSIZE); 2962 2963 /* get the cache buffer of the page */ 2964 if (rc = cmRead(ip, offset, npages, &cp)) 2965 break; 2966 2967 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2968 assert(!cp->cm_modified); 2969 2970 /* bind buffer with the new extent address */ 2971 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2972 cmSetXD(ip, cp, pno, dxaddr, nblks); 2973 2974 /* release the cbuf, mark it as modified */ 2975 cmPut(cp, TRUE); 2976 2977 dxaddr += nblks; 2978 sxaddr += nblks; 2979 } 2980 2981 /* get back parent page */ 2982 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2983 return rc; 2984 2985 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2986 jfs_info("xtRelocate: target data extent relocated."); 2987 } else { /* (xtype == XTPAGE) */ 2988 2989 /* 2990 * read in the target xtpage from the source extent; 2991 */ 2992 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2993 if (rc) { 2994 XT_PUTPAGE(pmp); 2995 return rc; 2996 } 2997 2998 /* 2999 * read in sibling pages if any to update sibling pointers; 3000 */ 3001 rmp = NULL; 3002 if (p->header.next) { 3003 nextbn = le64_to_cpu(p->header.next); 3004 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 3005 if (rc) { 3006 XT_PUTPAGE(pmp); 3007 XT_PUTPAGE(mp); 3008 return (rc); 3009 } 3010 } 3011 3012 lmp = NULL; 3013 if (p->header.prev) { 3014 prevbn = le64_to_cpu(p->header.prev); 3015 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 3016 if (rc) { 3017 XT_PUTPAGE(pmp); 3018 XT_PUTPAGE(mp); 3019 if (rmp) 3020 XT_PUTPAGE(rmp); 3021 return (rc); 3022 } 3023 } 3024 3025 /* at this point, all xtpages to be updated are in memory */ 3026 3027 /* 3028 * update sibling pointers of sibling xtpages if any; 3029 */ 3030 if (lmp) { 3031 BT_MARK_DIRTY(lmp, ip); 3032 tlck = 3033 txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 3034 lp->header.next = cpu_to_le64(nxaddr); 3035 XT_PUTPAGE(lmp); 3036 } 3037 3038 if (rmp) { 3039 BT_MARK_DIRTY(rmp, ip); 3040 tlck = 3041 txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 3042 rp->header.prev = cpu_to_le64(nxaddr); 3043 XT_PUTPAGE(rmp); 3044 } 3045 3046 /* 3047 * update the target xtpage to be relocated 3048 * 3049 * update the self address of the target page 3050 * and write to destination extent; 3051 * redo image covers the whole xtpage since it is new page 3052 * to the destination extent; 3053 * update of bmap for the free of source extent 3054 * of the target xtpage itself: 3055 * update of bmap for the allocation of destination extent 3056 * of the target xtpage itself: 3057 * update of bmap for the extents covered by xad entries in 3058 * the target xtpage is not necessary since they are not 3059 * updated; 3060 * if not committed before this relocation, 3061 * target page may contain XAD_NEW entries which must 3062 * be scanned for bmap update (logredo() always 3063 * scan xtpage REDOPAGE image for bmap update); 3064 * if committed before this relocation (tlckRELOCATE), 3065 * scan may be skipped by commit() and logredo(); 3066 */ 3067 BT_MARK_DIRTY(mp, ip); 3068 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 3069 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 3070 xtlck = (struct xtlock *) & tlck->lock; 3071 3072 /* update the self address in the xtpage header */ 3073 pxd = &p->header.self; 3074 PXDaddress(pxd, nxaddr); 3075 3076 /* linelock for the after image of the whole page */ 3077 xtlck->lwm.length = 3078 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 3079 3080 /* update the buffer extent descriptor of target xtpage */ 3081 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 3082 bmSetXD(mp, nxaddr, xsize); 3083 3084 /* unpin the target page to new homeward bound */ 3085 XT_PUTPAGE(mp); 3086 jfs_info("xtRelocate: target xtpage relocated."); 3087 } 3088 3089 /* 3090 * 3. acquire maplock for the source extent to be freed; 3091 * 3092 * acquire a maplock saving the src relocated extent address; 3093 * to free of the extent at commit time; 3094 */ 3095 out: 3096 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 3097 * free PXD of the source data extent (logredo() will update 3098 * bmap for free of source data extent), and update bmap for 3099 * free of the source data extent; 3100 */ 3101 if (xtype == DATAEXT) 3102 tlck = txMaplock(tid, ip, tlckMAP); 3103 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 3104 * for the source xtpage (logredo() will init NoRedoPage 3105 * filter and will also update bmap for free of the source 3106 * xtpage), and update bmap for free of the source xtpage; 3107 * N.B. We use tlckMAP instead of tlkcXTREE because there 3108 * is no buffer associated with this lock since the buffer 3109 * has been redirected to the target location. 3110 */ 3111 else /* (xtype == XTPAGE) */ 3112 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 3113 3114 pxdlock = (struct pxd_lock *) & tlck->lock; 3115 pxdlock->flag = mlckFREEPXD; 3116 PXDaddress(&pxdlock->pxd, oxaddr); 3117 PXDlength(&pxdlock->pxd, xlen); 3118 pxdlock->index = 1; 3119 3120 /* 3121 * 4. update the parent xad entry for relocation; 3122 * 3123 * acquire tlck for the parent entry with XAD_NEW as entry 3124 * update which will write LOG_REDOPAGE and update bmap for 3125 * allocation of XAD_NEW destination extent; 3126 */ 3127 jfs_info("xtRelocate: update parent xad entry."); 3128 BT_MARK_DIRTY(pmp, ip); 3129 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 3130 xtlck = (struct xtlock *) & tlck->lock; 3131 3132 /* update the XAD with the new destination extent; */ 3133 xad = &pp->xad[index]; 3134 xad->flag |= XAD_NEW; 3135 XADaddress(xad, nxaddr); 3136 3137 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 3138 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 3139 xtlck->lwm.offset; 3140 3141 /* unpin the parent xtpage */ 3142 XT_PUTPAGE(pmp); 3143 3144 return rc; 3145 } 3146 3147 3148 /* 3149 * xtSearchNode() 3150 * 3151 * function: search for the internal xad entry covering specified extent. 3152 * This function is mainly used by defragfs utility. 3153 * 3154 * parameters: 3155 * ip - file object; 3156 * xad - extent to find; 3157 * cmpp - comparison result: 3158 * btstack - traverse stack; 3159 * flag - search process flag; 3160 * 3161 * returns: 3162 * btstack contains (bn, index) of search path traversed to the entry. 3163 * *cmpp is set to result of comparison with the entry returned. 3164 * the page containing the entry is pinned at exit. 3165 */ 3166 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 3167 int *cmpp, struct btstack * btstack, int flag) 3168 { 3169 int rc = 0; 3170 s64 xoff, xaddr; 3171 int xlen; 3172 int cmp = 1; /* init for empty page */ 3173 s64 bn; /* block number */ 3174 struct metapage *mp; /* meta-page buffer */ 3175 xtpage_t *p; /* page */ 3176 int base, index, lim; 3177 struct btframe *btsp; 3178 s64 t64; 3179 3180 BT_CLR(btstack); 3181 3182 xoff = offsetXAD(xad); 3183 xlen = lengthXAD(xad); 3184 xaddr = addressXAD(xad); 3185 3186 /* 3187 * search down tree from root: 3188 * 3189 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 3190 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 3191 * 3192 * if entry with search key K is not found 3193 * internal page search find the entry with largest key Ki 3194 * less than K which point to the child page to search; 3195 * leaf page search find the entry with smallest key Kj 3196 * greater than K so that the returned index is the position of 3197 * the entry to be shifted right for insertion of new entry. 3198 * for empty tree, search key is greater than any key of the tree. 3199 * 3200 * by convention, root bn = 0. 3201 */ 3202 for (bn = 0;;) { 3203 /* get/pin the page to search */ 3204 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3205 if (rc) 3206 return rc; 3207 if (p->header.flag & BT_LEAF) { 3208 XT_PUTPAGE(mp); 3209 return -ESTALE; 3210 } 3211 3212 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3213 3214 /* 3215 * binary search with search key K on the current page 3216 */ 3217 for (base = XTENTRYSTART; lim; lim >>= 1) { 3218 index = base + (lim >> 1); 3219 3220 XT_CMP(cmp, xoff, &p->xad[index], t64); 3221 if (cmp == 0) { 3222 /* 3223 * search hit 3224 * 3225 * verify for exact match; 3226 */ 3227 if (xaddr == addressXAD(&p->xad[index]) && 3228 xoff == offsetXAD(&p->xad[index])) { 3229 *cmpp = cmp; 3230 3231 /* save search result */ 3232 btsp = btstack->top; 3233 btsp->bn = bn; 3234 btsp->index = index; 3235 btsp->mp = mp; 3236 3237 return 0; 3238 } 3239 3240 /* descend/search its child page */ 3241 goto next; 3242 } 3243 3244 if (cmp > 0) { 3245 base = index + 1; 3246 --lim; 3247 } 3248 } 3249 3250 /* 3251 * search miss - non-leaf page: 3252 * 3253 * base is the smallest index with key (Kj) greater than 3254 * search key (K) and may be zero or maxentry index. 3255 * if base is non-zero, decrement base by one to get the parent 3256 * entry of the child page to search. 3257 */ 3258 index = base ? base - 1 : base; 3259 3260 /* 3261 * go down to child page 3262 */ 3263 next: 3264 /* get the child page block number */ 3265 bn = addressXAD(&p->xad[index]); 3266 3267 /* unpin the parent page */ 3268 XT_PUTPAGE(mp); 3269 } 3270 } 3271 3272 3273 /* 3274 * xtRelink() 3275 * 3276 * function: 3277 * link around a freed page. 3278 * 3279 * Parameter: 3280 * int tid, 3281 * struct inode *ip, 3282 * xtpage_t *p) 3283 * 3284 * returns: 3285 */ 3286 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 3287 { 3288 int rc = 0; 3289 struct metapage *mp; 3290 s64 nextbn, prevbn; 3291 struct tlock *tlck; 3292 3293 nextbn = le64_to_cpu(p->header.next); 3294 prevbn = le64_to_cpu(p->header.prev); 3295 3296 /* update prev pointer of the next page */ 3297 if (nextbn != 0) { 3298 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3299 if (rc) 3300 return rc; 3301 3302 /* 3303 * acquire a transaction lock on the page; 3304 * 3305 * action: update prev pointer; 3306 */ 3307 BT_MARK_DIRTY(mp, ip); 3308 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3309 3310 /* the page may already have been tlock'd */ 3311 3312 p->header.prev = cpu_to_le64(prevbn); 3313 3314 XT_PUTPAGE(mp); 3315 } 3316 3317 /* update next pointer of the previous page */ 3318 if (prevbn != 0) { 3319 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3320 if (rc) 3321 return rc; 3322 3323 /* 3324 * acquire a transaction lock on the page; 3325 * 3326 * action: update next pointer; 3327 */ 3328 BT_MARK_DIRTY(mp, ip); 3329 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3330 3331 /* the page may already have been tlock'd */ 3332 3333 p->header.next = le64_to_cpu(nextbn); 3334 3335 XT_PUTPAGE(mp); 3336 } 3337 3338 return 0; 3339 } 3340 #endif /* _STILL_TO_PORT */ 3341 3342 3343 /* 3344 * xtInitRoot() 3345 * 3346 * initialize file root (inline in inode) 3347 */ 3348 void xtInitRoot(tid_t tid, struct inode *ip) 3349 { 3350 xtpage_t *p; 3351 3352 /* 3353 * acquire a transaction lock on the root 3354 * 3355 * action: 3356 */ 3357 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3358 tlckXTREE | tlckNEW); 3359 p = &JFS_IP(ip)->i_xtroot; 3360 3361 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3362 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3363 3364 if (S_ISDIR(ip->i_mode)) 3365 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3366 else { 3367 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3368 ip->i_size = 0; 3369 } 3370 3371 3372 return; 3373 } 3374 3375 3376 /* 3377 * We can run into a deadlock truncating a file with a large number of 3378 * xtree pages (large fragmented file). A robust fix would entail a 3379 * reservation system where we would reserve a number of metadata pages 3380 * and tlocks which we would be guaranteed without a deadlock. Without 3381 * this, a partial fix is to limit number of metadata pages we will lock 3382 * in a single transaction. Currently we will truncate the file so that 3383 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3384 * will be responsible for ensuring that the current transaction gets 3385 * committed, and that subsequent transactions are created to truncate 3386 * the file further if needed. 3387 */ 3388 #define MAX_TRUNCATE_LEAVES 50 3389 3390 /* 3391 * xtTruncate() 3392 * 3393 * function: 3394 * traverse for truncation logging backward bottom up; 3395 * terminate at the last extent entry at the current subtree 3396 * root page covering new down size. 3397 * truncation may occur within the last extent entry. 3398 * 3399 * parameter: 3400 * int tid, 3401 * struct inode *ip, 3402 * s64 newsize, 3403 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3404 * 3405 * return: 3406 * 3407 * note: 3408 * PWMAP: 3409 * 1. truncate (non-COMMIT_NOLINK file) 3410 * by jfs_truncate() or jfs_open(O_TRUNC): 3411 * xtree is updated; 3412 * 2. truncate index table of directory when last entry removed 3413 * map update via tlock at commit time; 3414 * PMAP: 3415 * Call xtTruncate_pmap instead 3416 * WMAP: 3417 * 1. remove (free zero link count) on last reference release 3418 * (pmap has been freed at commit zero link count); 3419 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3420 * xtree is updated; 3421 * map update directly at truncation time; 3422 * 3423 * if (DELETE) 3424 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3425 * else if (TRUNCATE) 3426 * must write LOG_NOREDOPAGE for deleted index page; 3427 * 3428 * pages may already have been tlocked by anonymous transactions 3429 * during file growth (i.e., write) before truncation; 3430 * 3431 * except last truncated entry, deleted entries remains as is 3432 * in the page (nextindex is updated) for other use 3433 * (e.g., log/update allocation map): this avoid copying the page 3434 * info but delay free of pages; 3435 * 3436 */ 3437 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3438 { 3439 int rc = 0; 3440 s64 teof; 3441 struct metapage *mp; 3442 xtpage_t *p; 3443 s64 bn; 3444 int index, nextindex; 3445 xad_t *xad; 3446 s64 xoff, xaddr; 3447 int xlen, len, freexlen; 3448 struct btstack btstack; 3449 struct btframe *parent; 3450 struct tblock *tblk = NULL; 3451 struct tlock *tlck = NULL; 3452 struct xtlock *xtlck = NULL; 3453 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3454 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3455 s64 nfreed; 3456 int freed, log; 3457 int locked_leaves = 0; 3458 3459 /* save object truncation type */ 3460 if (tid) { 3461 tblk = tid_to_tblock(tid); 3462 tblk->xflag |= flag; 3463 } 3464 3465 nfreed = 0; 3466 3467 flag &= COMMIT_MAP; 3468 assert(flag != COMMIT_PMAP); 3469 3470 if (flag == COMMIT_PWMAP) 3471 log = 1; 3472 else { 3473 log = 0; 3474 xadlock.flag = mlckFREEXADLIST; 3475 xadlock.index = 1; 3476 } 3477 3478 /* 3479 * if the newsize is not an integral number of pages, 3480 * the file between newsize and next page boundary will 3481 * be cleared. 3482 * if truncating into a file hole, it will cause 3483 * a full block to be allocated for the logical block. 3484 */ 3485 3486 /* 3487 * release page blocks of truncated region <teof, eof> 3488 * 3489 * free the data blocks from the leaf index blocks. 3490 * delete the parent index entries corresponding to 3491 * the freed child data/index blocks. 3492 * free the index blocks themselves which aren't needed 3493 * in new sized file. 3494 * 3495 * index blocks are updated only if the blocks are to be 3496 * retained in the new sized file. 3497 * if type is PMAP, the data and index pages are NOT 3498 * freed, and the data and index blocks are NOT freed 3499 * from working map. 3500 * (this will allow continued access of data/index of 3501 * temporary file (zerolink count file truncated to zero-length)). 3502 */ 3503 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3504 JFS_SBI(ip->i_sb)->l2bsize; 3505 3506 /* clear stack */ 3507 BT_CLR(&btstack); 3508 3509 /* 3510 * start with root 3511 * 3512 * root resides in the inode 3513 */ 3514 bn = 0; 3515 3516 /* 3517 * first access of each page: 3518 */ 3519 getPage: 3520 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3521 if (rc) 3522 return rc; 3523 3524 /* process entries backward from last index */ 3525 index = le16_to_cpu(p->header.nextindex) - 1; 3526 3527 if (p->header.flag & BT_INTERNAL) 3528 goto getChild; 3529 3530 /* 3531 * leaf page 3532 */ 3533 3534 /* Since this is the rightmost leaf, and we may have already freed 3535 * a page that was formerly to the right, let's make sure that the 3536 * next pointer is zero. 3537 */ 3538 if (p->header.next) { 3539 if (log) 3540 /* 3541 * Make sure this change to the header is logged. 3542 * If we really truncate this leaf, the flag 3543 * will be changed to tlckTRUNCATE 3544 */ 3545 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 3546 BT_MARK_DIRTY(mp, ip); 3547 p->header.next = 0; 3548 } 3549 3550 freed = 0; 3551 3552 /* does region covered by leaf page precede Teof ? */ 3553 xad = &p->xad[index]; 3554 xoff = offsetXAD(xad); 3555 xlen = lengthXAD(xad); 3556 if (teof >= xoff + xlen) { 3557 XT_PUTPAGE(mp); 3558 goto getParent; 3559 } 3560 3561 /* (re)acquire tlock of the leaf page */ 3562 if (log) { 3563 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3564 /* 3565 * We need to limit the size of the transaction 3566 * to avoid exhausting pagecache & tlocks 3567 */ 3568 XT_PUTPAGE(mp); 3569 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3570 goto getParent; 3571 } 3572 tlck = txLock(tid, ip, mp, tlckXTREE); 3573 tlck->type = tlckXTREE | tlckTRUNCATE; 3574 xtlck = (struct xtlock *) & tlck->lock; 3575 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3576 } 3577 BT_MARK_DIRTY(mp, ip); 3578 3579 /* 3580 * scan backward leaf page entries 3581 */ 3582 for (; index >= XTENTRYSTART; index--) { 3583 xad = &p->xad[index]; 3584 xoff = offsetXAD(xad); 3585 xlen = lengthXAD(xad); 3586 xaddr = addressXAD(xad); 3587 3588 /* 3589 * The "data" for a directory is indexed by the block 3590 * device's address space. This metadata must be invalidated 3591 * here 3592 */ 3593 if (S_ISDIR(ip->i_mode) && (teof == 0)) 3594 invalidate_xad_metapages(ip, *xad); 3595 /* 3596 * entry beyond eof: continue scan of current page 3597 * xad 3598 * ---|---=======-------> 3599 * eof 3600 */ 3601 if (teof < xoff) { 3602 nfreed += xlen; 3603 continue; 3604 } 3605 3606 /* 3607 * (xoff <= teof): last entry to be deleted from page; 3608 * If other entries remain in page: keep and update the page. 3609 */ 3610 3611 /* 3612 * eof == entry_start: delete the entry 3613 * xad 3614 * -------|=======-------> 3615 * eof 3616 * 3617 */ 3618 if (teof == xoff) { 3619 nfreed += xlen; 3620 3621 if (index == XTENTRYSTART) 3622 break; 3623 3624 nextindex = index; 3625 } 3626 /* 3627 * eof within the entry: truncate the entry. 3628 * xad 3629 * -------===|===-------> 3630 * eof 3631 */ 3632 else if (teof < xoff + xlen) { 3633 /* update truncated entry */ 3634 len = teof - xoff; 3635 freexlen = xlen - len; 3636 XADlength(xad, len); 3637 3638 /* save pxd of truncated extent in tlck */ 3639 xaddr += len; 3640 if (log) { /* COMMIT_PWMAP */ 3641 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3642 min(index, (int)xtlck->lwm.offset) : index; 3643 xtlck->lwm.length = index + 1 - 3644 xtlck->lwm.offset; 3645 xtlck->twm.offset = index; 3646 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3647 pxdlock->flag = mlckFREEPXD; 3648 PXDaddress(&pxdlock->pxd, xaddr); 3649 PXDlength(&pxdlock->pxd, freexlen); 3650 } 3651 /* free truncated extent */ 3652 else { /* COMMIT_WMAP */ 3653 3654 pxdlock = (struct pxd_lock *) & xadlock; 3655 pxdlock->flag = mlckFREEPXD; 3656 PXDaddress(&pxdlock->pxd, xaddr); 3657 PXDlength(&pxdlock->pxd, freexlen); 3658 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); 3659 3660 /* reset map lock */ 3661 xadlock.flag = mlckFREEXADLIST; 3662 } 3663 3664 /* current entry is new last entry; */ 3665 nextindex = index + 1; 3666 3667 nfreed += freexlen; 3668 } 3669 /* 3670 * eof beyond the entry: 3671 * xad 3672 * -------=======---|---> 3673 * eof 3674 */ 3675 else { /* (xoff + xlen < teof) */ 3676 3677 nextindex = index + 1; 3678 } 3679 3680 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3681 if (!log) { /* COMMIT_WAMP */ 3682 xadlock.xdlist = &p->xad[nextindex]; 3683 xadlock.count = 3684 le16_to_cpu(p->header.nextindex) - 3685 nextindex; 3686 txFreeMap(ip, (struct maplock *) & xadlock, 3687 NULL, COMMIT_WMAP); 3688 } 3689 p->header.nextindex = cpu_to_le16(nextindex); 3690 } 3691 3692 XT_PUTPAGE(mp); 3693 3694 /* assert(freed == 0); */ 3695 goto getParent; 3696 } /* end scan of leaf page entries */ 3697 3698 freed = 1; 3699 3700 /* 3701 * leaf page become empty: free the page if type != PMAP 3702 */ 3703 if (log) { /* COMMIT_PWMAP */ 3704 /* txCommit() with tlckFREE: 3705 * free data extents covered by leaf [XTENTRYSTART:hwm); 3706 * invalidate leaf if COMMIT_PWMAP; 3707 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3708 */ 3709 tlck->type = tlckXTREE | tlckFREE; 3710 } else { /* COMMIT_WAMP */ 3711 3712 /* free data extents covered by leaf */ 3713 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3714 xadlock.count = 3715 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3716 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); 3717 } 3718 3719 if (p->header.flag & BT_ROOT) { 3720 p->header.flag &= ~BT_INTERNAL; 3721 p->header.flag |= BT_LEAF; 3722 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3723 3724 XT_PUTPAGE(mp); /* debug */ 3725 goto out; 3726 } else { 3727 if (log) { /* COMMIT_PWMAP */ 3728 /* page will be invalidated at tx completion 3729 */ 3730 XT_PUTPAGE(mp); 3731 } else { /* COMMIT_WMAP */ 3732 3733 if (mp->lid) 3734 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3735 3736 /* invalidate empty leaf page */ 3737 discard_metapage(mp); 3738 } 3739 } 3740 3741 /* 3742 * the leaf page become empty: delete the parent entry 3743 * for the leaf page if the parent page is to be kept 3744 * in the new sized file. 3745 */ 3746 3747 /* 3748 * go back up to the parent page 3749 */ 3750 getParent: 3751 /* pop/restore parent entry for the current child page */ 3752 if ((parent = BT_POP(&btstack)) == NULL) 3753 /* current page must have been root */ 3754 goto out; 3755 3756 /* get back the parent page */ 3757 bn = parent->bn; 3758 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3759 if (rc) 3760 return rc; 3761 3762 index = parent->index; 3763 3764 /* 3765 * child page was not empty: 3766 */ 3767 if (freed == 0) { 3768 /* has any entry deleted from parent ? */ 3769 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3770 /* (re)acquire tlock on the parent page */ 3771 if (log) { /* COMMIT_PWMAP */ 3772 /* txCommit() with tlckTRUNCATE: 3773 * free child extents covered by parent [); 3774 */ 3775 tlck = txLock(tid, ip, mp, tlckXTREE); 3776 xtlck = (struct xtlock *) & tlck->lock; 3777 if (!(tlck->type & tlckTRUNCATE)) { 3778 xtlck->hwm.offset = 3779 le16_to_cpu(p->header. 3780 nextindex) - 1; 3781 tlck->type = 3782 tlckXTREE | tlckTRUNCATE; 3783 } 3784 } else { /* COMMIT_WMAP */ 3785 3786 /* free child extents covered by parent */ 3787 xadlock.xdlist = &p->xad[index + 1]; 3788 xadlock.count = 3789 le16_to_cpu(p->header.nextindex) - 3790 index - 1; 3791 txFreeMap(ip, (struct maplock *) & xadlock, 3792 NULL, COMMIT_WMAP); 3793 } 3794 BT_MARK_DIRTY(mp, ip); 3795 3796 p->header.nextindex = cpu_to_le16(index + 1); 3797 } 3798 XT_PUTPAGE(mp); 3799 goto getParent; 3800 } 3801 3802 /* 3803 * child page was empty: 3804 */ 3805 nfreed += lengthXAD(&p->xad[index]); 3806 3807 /* 3808 * During working map update, child page's tlock must be handled 3809 * before parent's. This is because the parent's tlock will cause 3810 * the child's disk space to be marked available in the wmap, so 3811 * it's important that the child page be released by that time. 3812 * 3813 * ToDo: tlocks should be on doubly-linked list, so we can 3814 * quickly remove it and add it to the end. 3815 */ 3816 3817 /* 3818 * Move parent page's tlock to the end of the tid's tlock list 3819 */ 3820 if (log && mp->lid && (tblk->last != mp->lid) && 3821 lid_to_tlock(mp->lid)->tid) { 3822 lid_t lid = mp->lid; 3823 struct tlock *prev; 3824 3825 tlck = lid_to_tlock(lid); 3826 3827 if (tblk->next == lid) 3828 tblk->next = tlck->next; 3829 else { 3830 for (prev = lid_to_tlock(tblk->next); 3831 prev->next != lid; 3832 prev = lid_to_tlock(prev->next)) { 3833 assert(prev->next); 3834 } 3835 prev->next = tlck->next; 3836 } 3837 lid_to_tlock(tblk->last)->next = lid; 3838 tlck->next = 0; 3839 tblk->last = lid; 3840 } 3841 3842 /* 3843 * parent page become empty: free the page 3844 */ 3845 if (index == XTENTRYSTART) { 3846 if (log) { /* COMMIT_PWMAP */ 3847 /* txCommit() with tlckFREE: 3848 * free child extents covered by parent; 3849 * invalidate parent if COMMIT_PWMAP; 3850 */ 3851 tlck = txLock(tid, ip, mp, tlckXTREE); 3852 xtlck = (struct xtlock *) & tlck->lock; 3853 xtlck->hwm.offset = 3854 le16_to_cpu(p->header.nextindex) - 1; 3855 tlck->type = tlckXTREE | tlckFREE; 3856 } else { /* COMMIT_WMAP */ 3857 3858 /* free child extents covered by parent */ 3859 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3860 xadlock.count = 3861 le16_to_cpu(p->header.nextindex) - 3862 XTENTRYSTART; 3863 txFreeMap(ip, (struct maplock *) & xadlock, NULL, 3864 COMMIT_WMAP); 3865 } 3866 BT_MARK_DIRTY(mp, ip); 3867 3868 if (p->header.flag & BT_ROOT) { 3869 p->header.flag &= ~BT_INTERNAL; 3870 p->header.flag |= BT_LEAF; 3871 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3872 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3873 /* 3874 * Shrink root down to allow inline 3875 * EA (otherwise fsck complains) 3876 */ 3877 p->header.maxentry = 3878 cpu_to_le16(XTROOTINITSLOT); 3879 JFS_IP(ip)->mode2 |= INLINEEA; 3880 } 3881 3882 XT_PUTPAGE(mp); /* debug */ 3883 goto out; 3884 } else { 3885 if (log) { /* COMMIT_PWMAP */ 3886 /* page will be invalidated at tx completion 3887 */ 3888 XT_PUTPAGE(mp); 3889 } else { /* COMMIT_WMAP */ 3890 3891 if (mp->lid) 3892 lid_to_tlock(mp->lid)->flag |= 3893 tlckFREELOCK; 3894 3895 /* invalidate parent page */ 3896 discard_metapage(mp); 3897 } 3898 3899 /* parent has become empty and freed: 3900 * go back up to its parent page 3901 */ 3902 /* freed = 1; */ 3903 goto getParent; 3904 } 3905 } 3906 /* 3907 * parent page still has entries for front region; 3908 */ 3909 else { 3910 /* try truncate region covered by preceding entry 3911 * (process backward) 3912 */ 3913 index--; 3914 3915 /* go back down to the child page corresponding 3916 * to the entry 3917 */ 3918 goto getChild; 3919 } 3920 3921 /* 3922 * internal page: go down to child page of current entry 3923 */ 3924 getChild: 3925 /* save current parent entry for the child page */ 3926 BT_PUSH(&btstack, bn, index); 3927 3928 /* get child page */ 3929 xad = &p->xad[index]; 3930 bn = addressXAD(xad); 3931 3932 /* 3933 * first access of each internal entry: 3934 */ 3935 /* release parent page */ 3936 XT_PUTPAGE(mp); 3937 3938 /* process the child page */ 3939 goto getPage; 3940 3941 out: 3942 /* 3943 * update file resource stat 3944 */ 3945 /* set size 3946 */ 3947 if (S_ISDIR(ip->i_mode) && !newsize) 3948 ip->i_size = 1; /* fsck hates zero-length directories */ 3949 else 3950 ip->i_size = newsize; 3951 3952 /* update quota allocation to reflect freed blocks */ 3953 DQUOT_FREE_BLOCK(ip, nfreed); 3954 3955 /* 3956 * free tlock of invalidated pages 3957 */ 3958 if (flag == COMMIT_WMAP) 3959 txFreelock(ip); 3960 3961 return newsize; 3962 } 3963 3964 3965 /* 3966 * xtTruncate_pmap() 3967 * 3968 * function: 3969 * Perform truncate to zero lenghth for deleted file, leaving the 3970 * the xtree and working map untouched. This allows the file to 3971 * be accessed via open file handles, while the delete of the file 3972 * is committed to disk. 3973 * 3974 * parameter: 3975 * tid_t tid, 3976 * struct inode *ip, 3977 * s64 committed_size) 3978 * 3979 * return: new committed size 3980 * 3981 * note: 3982 * 3983 * To avoid deadlock by holding too many transaction locks, the 3984 * truncation may be broken up into multiple transactions. 3985 * The committed_size keeps track of part of the file has been 3986 * freed from the pmaps. 3987 */ 3988 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3989 { 3990 s64 bn; 3991 struct btstack btstack; 3992 int cmp; 3993 int index; 3994 int locked_leaves = 0; 3995 struct metapage *mp; 3996 xtpage_t *p; 3997 struct btframe *parent; 3998 int rc; 3999 struct tblock *tblk; 4000 struct tlock *tlck = NULL; 4001 xad_t *xad; 4002 int xlen; 4003 s64 xoff; 4004 struct xtlock *xtlck = NULL; 4005 4006 /* save object truncation type */ 4007 tblk = tid_to_tblock(tid); 4008 tblk->xflag |= COMMIT_PMAP; 4009 4010 /* clear stack */ 4011 BT_CLR(&btstack); 4012 4013 if (committed_size) { 4014 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 4015 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 4016 if (rc) 4017 return rc; 4018 4019 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4020 4021 if (cmp != 0) { 4022 XT_PUTPAGE(mp); 4023 jfs_error(ip->i_sb, 4024 "xtTruncate_pmap: did not find extent"); 4025 return -EIO; 4026 } 4027 } else { 4028 /* 4029 * start with root 4030 * 4031 * root resides in the inode 4032 */ 4033 bn = 0; 4034 4035 /* 4036 * first access of each page: 4037 */ 4038 getPage: 4039 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4040 if (rc) 4041 return rc; 4042 4043 /* process entries backward from last index */ 4044 index = le16_to_cpu(p->header.nextindex) - 1; 4045 4046 if (p->header.flag & BT_INTERNAL) 4047 goto getChild; 4048 } 4049 4050 /* 4051 * leaf page 4052 */ 4053 4054 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 4055 /* 4056 * We need to limit the size of the transaction 4057 * to avoid exhausting pagecache & tlocks 4058 */ 4059 xad = &p->xad[index]; 4060 xoff = offsetXAD(xad); 4061 xlen = lengthXAD(xad); 4062 XT_PUTPAGE(mp); 4063 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 4064 } 4065 tlck = txLock(tid, ip, mp, tlckXTREE); 4066 tlck->type = tlckXTREE | tlckFREE; 4067 xtlck = (struct xtlock *) & tlck->lock; 4068 xtlck->hwm.offset = index; 4069 4070 4071 XT_PUTPAGE(mp); 4072 4073 /* 4074 * go back up to the parent page 4075 */ 4076 getParent: 4077 /* pop/restore parent entry for the current child page */ 4078 if ((parent = BT_POP(&btstack)) == NULL) 4079 /* current page must have been root */ 4080 goto out; 4081 4082 /* get back the parent page */ 4083 bn = parent->bn; 4084 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4085 if (rc) 4086 return rc; 4087 4088 index = parent->index; 4089 4090 /* 4091 * parent page become empty: free the page 4092 */ 4093 if (index == XTENTRYSTART) { 4094 /* txCommit() with tlckFREE: 4095 * free child extents covered by parent; 4096 * invalidate parent if COMMIT_PWMAP; 4097 */ 4098 tlck = txLock(tid, ip, mp, tlckXTREE); 4099 xtlck = (struct xtlock *) & tlck->lock; 4100 xtlck->hwm.offset = 4101 le16_to_cpu(p->header.nextindex) - 1; 4102 tlck->type = tlckXTREE | tlckFREE; 4103 4104 XT_PUTPAGE(mp); 4105 4106 if (p->header.flag & BT_ROOT) { 4107 4108 goto out; 4109 } else { 4110 goto getParent; 4111 } 4112 } 4113 /* 4114 * parent page still has entries for front region; 4115 */ 4116 else 4117 index--; 4118 /* 4119 * internal page: go down to child page of current entry 4120 */ 4121 getChild: 4122 /* save current parent entry for the child page */ 4123 BT_PUSH(&btstack, bn, index); 4124 4125 /* get child page */ 4126 xad = &p->xad[index]; 4127 bn = addressXAD(xad); 4128 4129 /* 4130 * first access of each internal entry: 4131 */ 4132 /* release parent page */ 4133 XT_PUTPAGE(mp); 4134 4135 /* process the child page */ 4136 goto getPage; 4137 4138 out: 4139 4140 return 0; 4141 } 4142 4143 4144 #ifdef _JFS_DEBUG_XTREE 4145 /* 4146 * xtDisplayTree() 4147 * 4148 * function: traverse forward 4149 */ 4150 int xtDisplayTree(struct inode *ip) 4151 { 4152 int rc = 0; 4153 struct metapage *mp; 4154 xtpage_t *p; 4155 s64 bn, pbn; 4156 int index, lastindex, v, h; 4157 xad_t *xad; 4158 struct btstack btstack; 4159 struct btframe *btsp; 4160 struct btframe *parent; 4161 4162 printk("display B+-tree.\n"); 4163 4164 /* clear stack */ 4165 btsp = btstack.stack; 4166 4167 /* 4168 * start with root 4169 * 4170 * root resides in the inode 4171 */ 4172 bn = 0; 4173 v = h = 0; 4174 4175 /* 4176 * first access of each page: 4177 */ 4178 getPage: 4179 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4180 if (rc) 4181 return rc; 4182 4183 /* process entries forward from first index */ 4184 index = XTENTRYSTART; 4185 lastindex = le16_to_cpu(p->header.nextindex) - 1; 4186 4187 if (p->header.flag & BT_INTERNAL) { 4188 /* 4189 * first access of each internal page 4190 */ 4191 goto getChild; 4192 } else { /* (p->header.flag & BT_LEAF) */ 4193 4194 /* 4195 * first access of each leaf page 4196 */ 4197 printf("leaf page "); 4198 xtDisplayPage(ip, bn, p); 4199 4200 /* unpin the leaf page */ 4201 XT_PUTPAGE(mp); 4202 } 4203 4204 /* 4205 * go back up to the parent page 4206 */ 4207 getParent: 4208 /* pop/restore parent entry for the current child page */ 4209 if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL) 4210 /* current page must have been root */ 4211 return; 4212 4213 /* 4214 * parent page scan completed 4215 */ 4216 if ((index = parent->index) == (lastindex = parent->lastindex)) { 4217 /* go back up to the parent page */ 4218 goto getParent; 4219 } 4220 4221 /* 4222 * parent page has entries remaining 4223 */ 4224 /* get back the parent page */ 4225 bn = parent->bn; 4226 /* v = parent->level; */ 4227 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4228 if (rc) 4229 return rc; 4230 4231 /* get next parent entry */ 4232 index++; 4233 4234 /* 4235 * internal page: go down to child page of current entry 4236 */ 4237 getChild: 4238 /* push/save current parent entry for the child page */ 4239 btsp->bn = pbn = bn; 4240 btsp->index = index; 4241 btsp->lastindex = lastindex; 4242 /* btsp->level = v; */ 4243 /* btsp->node = h; */ 4244 ++btsp; 4245 4246 /* get child page */ 4247 xad = &p->xad[index]; 4248 bn = addressXAD(xad); 4249 4250 /* 4251 * first access of each internal entry: 4252 */ 4253 /* release parent page */ 4254 XT_PUTPAGE(mp); 4255 4256 printk("traverse down 0x%lx[%d]->0x%lx\n", (ulong) pbn, index, 4257 (ulong) bn); 4258 v++; 4259 h = index; 4260 4261 /* process the child page */ 4262 goto getPage; 4263 } 4264 4265 4266 /* 4267 * xtDisplayPage() 4268 * 4269 * function: display page 4270 */ 4271 int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p) 4272 { 4273 int rc = 0; 4274 xad_t *xad; 4275 s64 xaddr, xoff; 4276 int xlen, i, j; 4277 4278 /* display page control */ 4279 printf("bn:0x%lx flag:0x%x nextindex:%d\n", 4280 (ulong) bn, p->header.flag, 4281 le16_to_cpu(p->header.nextindex)); 4282 4283 /* display entries */ 4284 xad = &p->xad[XTENTRYSTART]; 4285 for (i = XTENTRYSTART, j = 1; i < le16_to_cpu(p->header.nextindex); 4286 i++, xad++, j++) { 4287 xoff = offsetXAD(xad); 4288 xaddr = addressXAD(xad); 4289 xlen = lengthXAD(xad); 4290 printf("\t[%d] 0x%lx:0x%lx(0x%x)", i, (ulong) xoff, 4291 (ulong) xaddr, xlen); 4292 4293 if (j == 4) { 4294 printf("\n"); 4295 j = 0; 4296 } 4297 } 4298 4299 printf("\n"); 4300 } 4301 #endif /* _JFS_DEBUG_XTREE */ 4302 4303 4304 #ifdef _JFS_WIP 4305 /* 4306 * xtGather() 4307 * 4308 * function: 4309 * traverse for allocation acquiring tlock at commit time 4310 * (vs at the time of update) logging backward top down 4311 * 4312 * note: 4313 * problem - establishing that all new allocation have been 4314 * processed both for append and random write in sparse file 4315 * at the current entry at the current subtree root page 4316 * 4317 */ 4318 int xtGather(btree_t *t) 4319 { 4320 int rc = 0; 4321 xtpage_t *p; 4322 u64 bn; 4323 int index; 4324 btentry_t *e; 4325 struct btstack btstack; 4326 struct btsf *parent; 4327 4328 /* clear stack */ 4329 BT_CLR(&btstack); 4330 4331 /* 4332 * start with root 4333 * 4334 * root resides in the inode 4335 */ 4336 bn = 0; 4337 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4338 if (rc) 4339 return rc; 4340 4341 /* new root is NOT pointed by a new entry 4342 if (p->header.flag & NEW) 4343 allocate new page lock; 4344 write a NEWPAGE log; 4345 */ 4346 4347 dopage: 4348 /* 4349 * first access of each page: 4350 */ 4351 /* process entries backward from last index */ 4352 index = le16_to_cpu(p->header.nextindex) - 1; 4353 4354 if (p->header.flag & BT_LEAF) { 4355 /* 4356 * first access of each leaf page 4357 */ 4358 /* process leaf page entries backward */ 4359 for (; index >= XTENTRYSTART; index--) { 4360 e = &p->xad[index]; 4361 /* 4362 * if newpage, log NEWPAGE. 4363 * 4364 if (e->flag & XAD_NEW) { 4365 nfound =+ entry->length; 4366 update current page lock for the entry; 4367 newpage(entry); 4368 * 4369 * if moved, log move. 4370 * 4371 } else if (e->flag & XAD_MOVED) { 4372 reset flag; 4373 update current page lock for the entry; 4374 } 4375 */ 4376 } 4377 4378 /* unpin the leaf page */ 4379 XT_PUTPAGE(mp); 4380 4381 /* 4382 * go back up to the parent page 4383 */ 4384 getParent: 4385 /* restore parent entry for the current child page */ 4386 if ((parent = BT_POP(&btstack)) == NULL) 4387 /* current page must have been root */ 4388 return 0; 4389 4390 if ((index = parent->index) == XTENTRYSTART) { 4391 /* 4392 * parent page scan completed 4393 */ 4394 /* go back up to the parent page */ 4395 goto getParent; 4396 } else { 4397 /* 4398 * parent page has entries remaining 4399 */ 4400 /* get back the parent page */ 4401 bn = parent->bn; 4402 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4403 if (rc) 4404 return -EIO; 4405 4406 /* first subroot page which 4407 * covers all new allocated blocks 4408 * itself not new/modified. 4409 * (if modified from split of descendent, 4410 * go down path of split page) 4411 4412 if (nfound == nnew && 4413 !(p->header.flag & (NEW | MOD))) 4414 exit scan; 4415 */ 4416 4417 /* process parent page entries backward */ 4418 index--; 4419 } 4420 } else { 4421 /* 4422 * first access of each internal page 4423 */ 4424 } 4425 4426 /* 4427 * internal page: go down to child page of current entry 4428 */ 4429 4430 /* save current parent entry for the child page */ 4431 BT_PUSH(&btstack, bn, index); 4432 4433 /* get current entry for the child page */ 4434 e = &p->xad[index]; 4435 4436 /* 4437 * first access of each internal entry: 4438 */ 4439 /* 4440 * if new entry, log btree_tnewentry. 4441 * 4442 if (e->flag & XAD_NEW) 4443 update parent page lock for the entry; 4444 */ 4445 4446 /* release parent page */ 4447 XT_PUTPAGE(mp); 4448 4449 /* get child page */ 4450 bn = e->bn; 4451 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4452 if (rc) 4453 return rc; 4454 4455 /* 4456 * first access of each non-root page: 4457 */ 4458 /* 4459 * if new, log btree_newpage. 4460 * 4461 if (p->header.flag & NEW) 4462 allocate new page lock; 4463 write a NEWPAGE log (next, prev); 4464 */ 4465 4466 /* process the child page */ 4467 goto dopage; 4468 4469 out: 4470 return 0; 4471 } 4472 #endif /* _JFS_WIP */ 4473 4474 4475 #ifdef CONFIG_JFS_STATISTICS 4476 int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, 4477 int *eof, void *data) 4478 { 4479 int len = 0; 4480 off_t begin; 4481 4482 len += sprintf(buffer, 4483 "JFS Xtree statistics\n" 4484 "====================\n" 4485 "searches = %d\n" 4486 "fast searches = %d\n" 4487 "splits = %d\n", 4488 xtStat.search, 4489 xtStat.fastSearch, 4490 xtStat.split); 4491 4492 begin = offset; 4493 *start = buffer + begin; 4494 len -= begin; 4495 4496 if (len > length) 4497 len = length; 4498 else 4499 *eof = 1; 4500 4501 if (len < 0) 4502 len = 0; 4503 4504 return len; 4505 } 4506 #endif 4507