1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2005 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 /* 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 20 */ 21 22 #include <linux/fs.h> 23 #include <linux/quotaops.h> 24 #include "jfs_incore.h" 25 #include "jfs_filsys.h" 26 #include "jfs_metapage.h" 27 #include "jfs_dmap.h" 28 #include "jfs_dinode.h" 29 #include "jfs_superblock.h" 30 #include "jfs_debug.h" 31 32 /* 33 * xtree local flag 34 */ 35 #define XT_INSERT 0x00000001 36 37 /* 38 * xtree key/entry comparison: extent offset 39 * 40 * return: 41 * -1: k < start of extent 42 * 0: start_of_extent <= k <= end_of_extent 43 * 1: k > end_of_extent 44 */ 45 #define XT_CMP(CMP, K, X, OFFSET64)\ 46 {\ 47 OFFSET64 = offsetXAD(X);\ 48 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 49 ((K) < OFFSET64) ? -1 : 0;\ 50 } 51 52 /* write a xad entry */ 53 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 54 {\ 55 (XAD)->flag = (FLAG);\ 56 XADoffset((XAD), (OFF));\ 57 XADlength((XAD), (LEN));\ 58 XADaddress((XAD), (ADDR));\ 59 } 60 61 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 62 63 /* get page buffer for specified block address */ 64 /* ToDo: Replace this ugly macro with a function */ 65 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 66 {\ 67 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ 68 if (!(RC))\ 69 {\ 70 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ 71 (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ 72 (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ 73 {\ 74 jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ 75 BT_PUTPAGE(MP);\ 76 MP = NULL;\ 77 RC = -EIO;\ 78 }\ 79 }\ 80 } 81 82 /* for consistency */ 83 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 84 85 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 86 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 87 /* xtree entry parameter descriptor */ 88 struct xtsplit { 89 struct metapage *mp; 90 s16 index; 91 u8 flag; 92 s64 off; 93 s64 addr; 94 int len; 95 struct pxdlist *pxdlist; 96 }; 97 98 99 /* 100 * statistics 101 */ 102 #ifdef CONFIG_JFS_STATISTICS 103 static struct { 104 uint search; 105 uint fastSearch; 106 uint split; 107 } xtStat; 108 #endif 109 110 111 /* 112 * forward references 113 */ 114 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, 115 struct btstack * btstack, int flag); 116 117 static int xtSplitUp(tid_t tid, 118 struct inode *ip, 119 struct xtsplit * split, struct btstack * btstack); 120 121 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 122 struct metapage ** rmpp, s64 * rbnp); 123 124 static int xtSplitRoot(tid_t tid, struct inode *ip, 125 struct xtsplit * split, struct metapage ** rmpp); 126 127 #ifdef _STILL_TO_PORT 128 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 129 xtpage_t * fp, struct btstack * btstack); 130 131 static int xtSearchNode(struct inode *ip, 132 xad_t * xad, 133 int *cmpp, struct btstack * btstack, int flag); 134 135 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 136 #endif /* _STILL_TO_PORT */ 137 138 /* 139 * xtLookup() 140 * 141 * function: map a single page into a physical extent; 142 */ 143 int xtLookup(struct inode *ip, s64 lstart, 144 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 145 { 146 int rc = 0; 147 struct btstack btstack; 148 int cmp; 149 s64 bn; 150 struct metapage *mp; 151 xtpage_t *p; 152 int index; 153 xad_t *xad; 154 s64 next, size, xoff, xend; 155 int xlen; 156 s64 xaddr; 157 158 *paddr = 0; 159 *plen = llen; 160 161 if (!no_check) { 162 /* is lookup offset beyond eof ? */ 163 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 164 JFS_SBI(ip->i_sb)->l2bsize; 165 if (lstart >= size) { 166 jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)", 167 (ulong) lstart, (ulong) size); 168 return 0; 169 } 170 } 171 172 /* 173 * search for the xad entry covering the logical extent 174 */ 175 //search: 176 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { 177 jfs_err("xtLookup: xtSearch returned %d", rc); 178 return rc; 179 } 180 181 /* 182 * compute the physical extent covering logical extent 183 * 184 * N.B. search may have failed (e.g., hole in sparse file), 185 * and returned the index of the next entry. 186 */ 187 /* retrieve search result */ 188 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 189 190 /* is xad found covering start of logical extent ? 191 * lstart is a page start address, 192 * i.e., lstart cannot start in a hole; 193 */ 194 if (cmp) { 195 if (next) 196 *plen = min(next - lstart, llen); 197 goto out; 198 } 199 200 /* 201 * lxd covered by xad 202 */ 203 xad = &p->xad[index]; 204 xoff = offsetXAD(xad); 205 xlen = lengthXAD(xad); 206 xend = xoff + xlen; 207 xaddr = addressXAD(xad); 208 209 /* initialize new pxd */ 210 *pflag = xad->flag; 211 *paddr = xaddr + (lstart - xoff); 212 /* a page must be fully covered by an xad */ 213 *plen = min(xend - lstart, llen); 214 215 out: 216 XT_PUTPAGE(mp); 217 218 return rc; 219 } 220 221 222 /* 223 * xtLookupList() 224 * 225 * function: map a single logical extent into a list of physical extent; 226 * 227 * parameter: 228 * struct inode *ip, 229 * struct lxdlist *lxdlist, lxd list (in) 230 * struct xadlist *xadlist, xad list (in/out) 231 * int flag) 232 * 233 * coverage of lxd by xad under assumption of 234 * . lxd's are ordered and disjoint. 235 * . xad's are ordered and disjoint. 236 * 237 * return: 238 * 0: success 239 * 240 * note: a page being written (even a single byte) is backed fully, 241 * except the last page which is only backed with blocks 242 * required to cover the last byte; 243 * the extent backing a page is fully contained within an xad; 244 */ 245 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, 246 struct xadlist * xadlist, int flag) 247 { 248 int rc = 0; 249 struct btstack btstack; 250 int cmp; 251 s64 bn; 252 struct metapage *mp; 253 xtpage_t *p; 254 int index; 255 lxd_t *lxd; 256 xad_t *xad, *pxd; 257 s64 size, lstart, lend, xstart, xend, pstart; 258 s64 llen, xlen, plen; 259 s64 xaddr, paddr; 260 int nlxd, npxd, maxnpxd; 261 262 npxd = xadlist->nxad = 0; 263 maxnpxd = xadlist->maxnxad; 264 pxd = xadlist->xad; 265 266 nlxd = lxdlist->nlxd; 267 lxd = lxdlist->lxd; 268 269 lstart = offsetLXD(lxd); 270 llen = lengthLXD(lxd); 271 lend = lstart + llen; 272 273 size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 274 JFS_SBI(ip->i_sb)->l2bsize; 275 276 /* 277 * search for the xad entry covering the logical extent 278 */ 279 search: 280 if (lstart >= size) 281 return 0; 282 283 if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0))) 284 return rc; 285 286 /* 287 * compute the physical extent covering logical extent 288 * 289 * N.B. search may have failed (e.g., hole in sparse file), 290 * and returned the index of the next entry. 291 */ 292 //map: 293 /* retrieve search result */ 294 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 295 296 /* is xad on the next sibling page ? */ 297 if (index == le16_to_cpu(p->header.nextindex)) { 298 if (p->header.flag & BT_ROOT) 299 goto mapend; 300 301 if ((bn = le64_to_cpu(p->header.next)) == 0) 302 goto mapend; 303 304 XT_PUTPAGE(mp); 305 306 /* get next sibling page */ 307 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 308 if (rc) 309 return rc; 310 311 index = XTENTRYSTART; 312 } 313 314 xad = &p->xad[index]; 315 316 /* 317 * is lxd covered by xad ? 318 */ 319 compare: 320 xstart = offsetXAD(xad); 321 xlen = lengthXAD(xad); 322 xend = xstart + xlen; 323 xaddr = addressXAD(xad); 324 325 compare1: 326 if (xstart < lstart) 327 goto compare2; 328 329 /* (lstart <= xstart) */ 330 331 /* lxd is NOT covered by xad */ 332 if (lend <= xstart) { 333 /* 334 * get next lxd 335 */ 336 if (--nlxd == 0) 337 goto mapend; 338 lxd++; 339 340 lstart = offsetLXD(lxd); 341 llen = lengthLXD(lxd); 342 lend = lstart + llen; 343 if (lstart >= size) 344 goto mapend; 345 346 /* compare with the current xad */ 347 goto compare1; 348 } 349 /* lxd is covered by xad */ 350 else { /* (xstart < lend) */ 351 352 /* initialize new pxd */ 353 pstart = xstart; 354 plen = min(lend - xstart, xlen); 355 paddr = xaddr; 356 357 goto cover; 358 } 359 360 /* (xstart < lstart) */ 361 compare2: 362 /* lxd is covered by xad */ 363 if (lstart < xend) { 364 /* initialize new pxd */ 365 pstart = lstart; 366 plen = min(xend - lstart, llen); 367 paddr = xaddr + (lstart - xstart); 368 369 goto cover; 370 } 371 /* lxd is NOT covered by xad */ 372 else { /* (xend <= lstart) */ 373 374 /* 375 * get next xad 376 * 377 * linear search next xad covering lxd on 378 * the current xad page, and then tree search 379 */ 380 if (index == le16_to_cpu(p->header.nextindex) - 1) { 381 if (p->header.flag & BT_ROOT) 382 goto mapend; 383 384 XT_PUTPAGE(mp); 385 goto search; 386 } else { 387 index++; 388 xad++; 389 390 /* compare with new xad */ 391 goto compare; 392 } 393 } 394 395 /* 396 * lxd is covered by xad and a new pxd has been initialized 397 * (lstart <= xstart < lend) or (xstart < lstart < xend) 398 */ 399 cover: 400 /* finalize pxd corresponding to current xad */ 401 XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); 402 403 if (++npxd >= maxnpxd) 404 goto mapend; 405 pxd++; 406 407 /* 408 * lxd is fully covered by xad 409 */ 410 if (lend <= xend) { 411 /* 412 * get next lxd 413 */ 414 if (--nlxd == 0) 415 goto mapend; 416 lxd++; 417 418 lstart = offsetLXD(lxd); 419 llen = lengthLXD(lxd); 420 lend = lstart + llen; 421 if (lstart >= size) 422 goto mapend; 423 424 /* 425 * test for old xad covering new lxd 426 * (old xstart < new lstart) 427 */ 428 goto compare2; 429 } 430 /* 431 * lxd is partially covered by xad 432 */ 433 else { /* (xend < lend) */ 434 435 /* 436 * get next xad 437 * 438 * linear search next xad covering lxd on 439 * the current xad page, and then next xad page search 440 */ 441 if (index == le16_to_cpu(p->header.nextindex) - 1) { 442 if (p->header.flag & BT_ROOT) 443 goto mapend; 444 445 if ((bn = le64_to_cpu(p->header.next)) == 0) 446 goto mapend; 447 448 XT_PUTPAGE(mp); 449 450 /* get next sibling page */ 451 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 452 if (rc) 453 return rc; 454 455 index = XTENTRYSTART; 456 xad = &p->xad[index]; 457 } else { 458 index++; 459 xad++; 460 } 461 462 /* 463 * test for new xad covering old lxd 464 * (old lstart < new xstart) 465 */ 466 goto compare; 467 } 468 469 mapend: 470 xadlist->nxad = npxd; 471 472 //out: 473 XT_PUTPAGE(mp); 474 475 return rc; 476 } 477 478 479 /* 480 * xtSearch() 481 * 482 * function: search for the xad entry covering specified offset. 483 * 484 * parameters: 485 * ip - file object; 486 * xoff - extent offset; 487 * nextp - address of next extent (if any) for search miss 488 * cmpp - comparison result: 489 * btstack - traverse stack; 490 * flag - search process flag (XT_INSERT); 491 * 492 * returns: 493 * btstack contains (bn, index) of search path traversed to the entry. 494 * *cmpp is set to result of comparison with the entry returned. 495 * the page containing the entry is pinned at exit. 496 */ 497 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 498 int *cmpp, struct btstack * btstack, int flag) 499 { 500 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 501 int rc = 0; 502 int cmp = 1; /* init for empty page */ 503 s64 bn; /* block number */ 504 struct metapage *mp; /* page buffer */ 505 xtpage_t *p; /* page */ 506 xad_t *xad; 507 int base, index, lim, btindex; 508 struct btframe *btsp; 509 int nsplit = 0; /* number of pages to split */ 510 s64 t64; 511 s64 next = 0; 512 513 INCREMENT(xtStat.search); 514 515 BT_CLR(btstack); 516 517 btstack->nsplit = 0; 518 519 /* 520 * search down tree from root: 521 * 522 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 523 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 524 * 525 * if entry with search key K is not found 526 * internal page search find the entry with largest key Ki 527 * less than K which point to the child page to search; 528 * leaf page search find the entry with smallest key Kj 529 * greater than K so that the returned index is the position of 530 * the entry to be shifted right for insertion of new entry. 531 * for empty tree, search key is greater than any key of the tree. 532 * 533 * by convention, root bn = 0. 534 */ 535 for (bn = 0;;) { 536 /* get/pin the page to search */ 537 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 538 if (rc) 539 return rc; 540 541 /* try sequential access heuristics with the previous 542 * access entry in target leaf page: 543 * once search narrowed down into the target leaf, 544 * key must either match an entry in the leaf or 545 * key entry does not exist in the tree; 546 */ 547 //fastSearch: 548 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 549 (p->header.flag & BT_LEAF) && 550 (index = jfs_ip->btindex) < 551 le16_to_cpu(p->header.nextindex)) { 552 xad = &p->xad[index]; 553 t64 = offsetXAD(xad); 554 if (xoff < t64 + lengthXAD(xad)) { 555 if (xoff >= t64) { 556 *cmpp = 0; 557 goto out; 558 } 559 560 /* stop sequential access heuristics */ 561 goto binarySearch; 562 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 563 564 /* try next sequential entry */ 565 index++; 566 if (index < 567 le16_to_cpu(p->header.nextindex)) { 568 xad++; 569 t64 = offsetXAD(xad); 570 if (xoff < t64 + lengthXAD(xad)) { 571 if (xoff >= t64) { 572 *cmpp = 0; 573 goto out; 574 } 575 576 /* miss: key falls between 577 * previous and this entry 578 */ 579 *cmpp = 1; 580 next = t64; 581 goto out; 582 } 583 584 /* (xoff >= t64 + lengthXAD(xad)); 585 * matching entry may be further out: 586 * stop heuristic search 587 */ 588 /* stop sequential access heuristics */ 589 goto binarySearch; 590 } 591 592 /* (index == p->header.nextindex); 593 * miss: key entry does not exist in 594 * the target leaf/tree 595 */ 596 *cmpp = 1; 597 goto out; 598 } 599 600 /* 601 * if hit, return index of the entry found, and 602 * if miss, where new entry with search key is 603 * to be inserted; 604 */ 605 out: 606 /* compute number of pages to split */ 607 if (flag & XT_INSERT) { 608 if (p->header.nextindex == /* little-endian */ 609 p->header.maxentry) 610 nsplit++; 611 else 612 nsplit = 0; 613 btstack->nsplit = nsplit; 614 } 615 616 /* save search result */ 617 btsp = btstack->top; 618 btsp->bn = bn; 619 btsp->index = index; 620 btsp->mp = mp; 621 622 /* update sequential access heuristics */ 623 jfs_ip->btindex = index; 624 625 if (nextp) 626 *nextp = next; 627 628 INCREMENT(xtStat.fastSearch); 629 return 0; 630 } 631 632 /* well, ... full search now */ 633 binarySearch: 634 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 635 636 /* 637 * binary search with search key K on the current page 638 */ 639 for (base = XTENTRYSTART; lim; lim >>= 1) { 640 index = base + (lim >> 1); 641 642 XT_CMP(cmp, xoff, &p->xad[index], t64); 643 if (cmp == 0) { 644 /* 645 * search hit 646 */ 647 /* search hit - leaf page: 648 * return the entry found 649 */ 650 if (p->header.flag & BT_LEAF) { 651 *cmpp = cmp; 652 653 /* compute number of pages to split */ 654 if (flag & XT_INSERT) { 655 if (p->header.nextindex == 656 p->header.maxentry) 657 nsplit++; 658 else 659 nsplit = 0; 660 btstack->nsplit = nsplit; 661 } 662 663 /* save search result */ 664 btsp = btstack->top; 665 btsp->bn = bn; 666 btsp->index = index; 667 btsp->mp = mp; 668 669 /* init sequential access heuristics */ 670 btindex = jfs_ip->btindex; 671 if (index == btindex || 672 index == btindex + 1) 673 jfs_ip->btorder = BT_SEQUENTIAL; 674 else 675 jfs_ip->btorder = BT_RANDOM; 676 jfs_ip->btindex = index; 677 678 return 0; 679 } 680 /* search hit - internal page: 681 * descend/search its child page 682 */ 683 if (index < le16_to_cpu(p->header.nextindex)-1) 684 next = offsetXAD(&p->xad[index + 1]); 685 goto next; 686 } 687 688 if (cmp > 0) { 689 base = index + 1; 690 --lim; 691 } 692 } 693 694 /* 695 * search miss 696 * 697 * base is the smallest index with key (Kj) greater than 698 * search key (K) and may be zero or maxentry index. 699 */ 700 if (base < le16_to_cpu(p->header.nextindex)) 701 next = offsetXAD(&p->xad[base]); 702 /* 703 * search miss - leaf page: 704 * 705 * return location of entry (base) where new entry with 706 * search key K is to be inserted. 707 */ 708 if (p->header.flag & BT_LEAF) { 709 *cmpp = cmp; 710 711 /* compute number of pages to split */ 712 if (flag & XT_INSERT) { 713 if (p->header.nextindex == 714 p->header.maxentry) 715 nsplit++; 716 else 717 nsplit = 0; 718 btstack->nsplit = nsplit; 719 } 720 721 /* save search result */ 722 btsp = btstack->top; 723 btsp->bn = bn; 724 btsp->index = base; 725 btsp->mp = mp; 726 727 /* init sequential access heuristics */ 728 btindex = jfs_ip->btindex; 729 if (base == btindex || base == btindex + 1) 730 jfs_ip->btorder = BT_SEQUENTIAL; 731 else 732 jfs_ip->btorder = BT_RANDOM; 733 jfs_ip->btindex = base; 734 735 if (nextp) 736 *nextp = next; 737 738 return 0; 739 } 740 741 /* 742 * search miss - non-leaf page: 743 * 744 * if base is non-zero, decrement base by one to get the parent 745 * entry of the child page to search. 746 */ 747 index = base ? base - 1 : base; 748 749 /* 750 * go down to child page 751 */ 752 next: 753 /* update number of pages to split */ 754 if (p->header.nextindex == p->header.maxentry) 755 nsplit++; 756 else 757 nsplit = 0; 758 759 /* push (bn, index) of the parent page/entry */ 760 if (BT_STACK_FULL(btstack)) { 761 jfs_error(ip->i_sb, "stack overrun in xtSearch!"); 762 XT_PUTPAGE(mp); 763 return -EIO; 764 } 765 BT_PUSH(btstack, bn, index); 766 767 /* get the child page block number */ 768 bn = addressXAD(&p->xad[index]); 769 770 /* unpin the parent page */ 771 XT_PUTPAGE(mp); 772 } 773 } 774 775 /* 776 * xtInsert() 777 * 778 * function: 779 * 780 * parameter: 781 * tid - transaction id; 782 * ip - file object; 783 * xflag - extent flag (XAD_NOTRECORDED): 784 * xoff - extent offset; 785 * xlen - extent length; 786 * xaddrp - extent address pointer (in/out): 787 * if (*xaddrp) 788 * caller allocated data extent at *xaddrp; 789 * else 790 * allocate data extent and return its xaddr; 791 * flag - 792 * 793 * return: 794 */ 795 int xtInsert(tid_t tid, /* transaction id */ 796 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 797 int flag) 798 { 799 int rc = 0; 800 s64 xaddr, hint; 801 struct metapage *mp; /* meta-page buffer */ 802 xtpage_t *p; /* base B+-tree index page */ 803 s64 bn; 804 int index, nextindex; 805 struct btstack btstack; /* traverse stack */ 806 struct xtsplit split; /* split information */ 807 xad_t *xad; 808 int cmp; 809 s64 next; 810 struct tlock *tlck; 811 struct xtlock *xtlck; 812 813 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 814 815 /* 816 * search for the entry location at which to insert: 817 * 818 * xtFastSearch() and xtSearch() both returns (leaf page 819 * pinned, index at which to insert). 820 * n.b. xtSearch() may return index of maxentry of 821 * the full page. 822 */ 823 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 824 return rc; 825 826 /* retrieve search result */ 827 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 828 829 /* This test must follow XT_GETSEARCH since mp must be valid if 830 * we branch to out: */ 831 if ((cmp == 0) || (next && (xlen > next - xoff))) { 832 rc = -EEXIST; 833 goto out; 834 } 835 836 /* 837 * allocate data extent requested 838 * 839 * allocation hint: last xad 840 */ 841 if ((xaddr = *xaddrp) == 0) { 842 if (index > XTENTRYSTART) { 843 xad = &p->xad[index - 1]; 844 hint = addressXAD(xad) + lengthXAD(xad) - 1; 845 } else 846 hint = 0; 847 if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) 848 goto out; 849 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 850 DQUOT_FREE_BLOCK(ip, xlen); 851 goto out; 852 } 853 } 854 855 /* 856 * insert entry for new extent 857 */ 858 xflag |= XAD_NEW; 859 860 /* 861 * if the leaf page is full, split the page and 862 * propagate up the router entry for the new page from split 863 * 864 * The xtSplitUp() will insert the entry and unpin the leaf page. 865 */ 866 nextindex = le16_to_cpu(p->header.nextindex); 867 if (nextindex == le16_to_cpu(p->header.maxentry)) { 868 split.mp = mp; 869 split.index = index; 870 split.flag = xflag; 871 split.off = xoff; 872 split.len = xlen; 873 split.addr = xaddr; 874 split.pxdlist = NULL; 875 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 876 /* undo data extent allocation */ 877 if (*xaddrp == 0) { 878 dbFree(ip, xaddr, (s64) xlen); 879 DQUOT_FREE_BLOCK(ip, xlen); 880 } 881 return rc; 882 } 883 884 *xaddrp = xaddr; 885 return 0; 886 } 887 888 /* 889 * insert the new entry into the leaf page 890 */ 891 /* 892 * acquire a transaction lock on the leaf page; 893 * 894 * action: xad insertion/extension; 895 */ 896 BT_MARK_DIRTY(mp, ip); 897 898 /* if insert into middle, shift right remaining entries. */ 899 if (index < nextindex) 900 memmove(&p->xad[index + 1], &p->xad[index], 901 (nextindex - index) * sizeof(xad_t)); 902 903 /* insert the new entry: mark the entry NEW */ 904 xad = &p->xad[index]; 905 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 906 907 /* advance next available entry index */ 908 p->header.nextindex = 909 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 910 911 /* Don't log it if there are no links to the file */ 912 if (!test_cflag(COMMIT_Nolink, ip)) { 913 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 914 xtlck = (struct xtlock *) & tlck->lock; 915 xtlck->lwm.offset = 916 (xtlck->lwm.offset) ? min(index, 917 (int)xtlck->lwm.offset) : index; 918 xtlck->lwm.length = 919 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 920 } 921 922 *xaddrp = xaddr; 923 924 out: 925 /* unpin the leaf page */ 926 XT_PUTPAGE(mp); 927 928 return rc; 929 } 930 931 932 /* 933 * xtSplitUp() 934 * 935 * function: 936 * split full pages as propagating insertion up the tree 937 * 938 * parameter: 939 * tid - transaction id; 940 * ip - file object; 941 * split - entry parameter descriptor; 942 * btstack - traverse stack from xtSearch() 943 * 944 * return: 945 */ 946 static int 947 xtSplitUp(tid_t tid, 948 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 949 { 950 int rc = 0; 951 struct metapage *smp; 952 xtpage_t *sp; /* split page */ 953 struct metapage *rmp; 954 s64 rbn; /* new right page block number */ 955 struct metapage *rcmp; 956 xtpage_t *rcp; /* right child page */ 957 s64 rcbn; /* right child page block number */ 958 int skip; /* index of entry of insertion */ 959 int nextindex; /* next available entry index of p */ 960 struct btframe *parent; /* parent page entry on traverse stack */ 961 xad_t *xad; 962 s64 xaddr; 963 int xlen; 964 int nsplit; /* number of pages split */ 965 struct pxdlist pxdlist; 966 pxd_t *pxd; 967 struct tlock *tlck; 968 struct xtlock *xtlck; 969 970 smp = split->mp; 971 sp = XT_PAGE(ip, smp); 972 973 /* is inode xtree root extension/inline EA area free ? */ 974 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 975 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && 976 (JFS_IP(ip)->mode2 & INLINEEA)) { 977 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 978 JFS_IP(ip)->mode2 &= ~INLINEEA; 979 980 BT_MARK_DIRTY(smp, ip); 981 /* 982 * acquire a transaction lock on the leaf page; 983 * 984 * action: xad insertion/extension; 985 */ 986 987 /* if insert into middle, shift right remaining entries. */ 988 skip = split->index; 989 nextindex = le16_to_cpu(sp->header.nextindex); 990 if (skip < nextindex) 991 memmove(&sp->xad[skip + 1], &sp->xad[skip], 992 (nextindex - skip) * sizeof(xad_t)); 993 994 /* insert the new entry: mark the entry NEW */ 995 xad = &sp->xad[skip]; 996 XT_PUTENTRY(xad, split->flag, split->off, split->len, 997 split->addr); 998 999 /* advance next available entry index */ 1000 sp->header.nextindex = 1001 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1); 1002 1003 /* Don't log it if there are no links to the file */ 1004 if (!test_cflag(COMMIT_Nolink, ip)) { 1005 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1006 xtlck = (struct xtlock *) & tlck->lock; 1007 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1008 min(skip, (int)xtlck->lwm.offset) : skip; 1009 xtlck->lwm.length = 1010 le16_to_cpu(sp->header.nextindex) - 1011 xtlck->lwm.offset; 1012 } 1013 1014 return 0; 1015 } 1016 1017 /* 1018 * allocate new index blocks to cover index page split(s) 1019 * 1020 * allocation hint: ? 1021 */ 1022 if (split->pxdlist == NULL) { 1023 nsplit = btstack->nsplit; 1024 split->pxdlist = &pxdlist; 1025 pxdlist.maxnpxd = pxdlist.npxd = 0; 1026 pxd = &pxdlist.pxd[0]; 1027 xlen = JFS_SBI(ip->i_sb)->nbperpage; 1028 for (; nsplit > 0; nsplit--, pxd++) { 1029 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 1030 == 0) { 1031 PXDaddress(pxd, xaddr); 1032 PXDlength(pxd, xlen); 1033 1034 pxdlist.maxnpxd++; 1035 1036 continue; 1037 } 1038 1039 /* undo allocation */ 1040 1041 XT_PUTPAGE(smp); 1042 return rc; 1043 } 1044 } 1045 1046 /* 1047 * Split leaf page <sp> into <sp> and a new right page <rp>. 1048 * 1049 * The split routines insert the new entry into the leaf page, 1050 * and acquire txLock as appropriate. 1051 * return <rp> pinned and its block number <rpbn>. 1052 */ 1053 rc = (sp->header.flag & BT_ROOT) ? 1054 xtSplitRoot(tid, ip, split, &rmp) : 1055 xtSplitPage(tid, ip, split, &rmp, &rbn); 1056 1057 XT_PUTPAGE(smp); 1058 1059 if (rc) 1060 return -EIO; 1061 /* 1062 * propagate up the router entry for the leaf page just split 1063 * 1064 * insert a router entry for the new page into the parent page, 1065 * propagate the insert/split up the tree by walking back the stack 1066 * of (bn of parent page, index of child page entry in parent page) 1067 * that were traversed during the search for the page that split. 1068 * 1069 * the propagation of insert/split up the tree stops if the root 1070 * splits or the page inserted into doesn't have to split to hold 1071 * the new entry. 1072 * 1073 * the parent entry for the split page remains the same, and 1074 * a new entry is inserted at its right with the first key and 1075 * block number of the new right page. 1076 * 1077 * There are a maximum of 3 pages pinned at any time: 1078 * right child, left parent and right parent (when the parent splits) 1079 * to keep the child page pinned while working on the parent. 1080 * make sure that all pins are released at exit. 1081 */ 1082 while ((parent = BT_POP(btstack)) != NULL) { 1083 /* parent page specified by stack frame <parent> */ 1084 1085 /* keep current child pages <rcp> pinned */ 1086 rcmp = rmp; 1087 rcbn = rbn; 1088 rcp = XT_PAGE(ip, rcmp); 1089 1090 /* 1091 * insert router entry in parent for new right child page <rp> 1092 */ 1093 /* get/pin the parent page <sp> */ 1094 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1095 if (rc) { 1096 XT_PUTPAGE(rcmp); 1097 return rc; 1098 } 1099 1100 /* 1101 * The new key entry goes ONE AFTER the index of parent entry, 1102 * because the split was to the right. 1103 */ 1104 skip = parent->index + 1; 1105 1106 /* 1107 * split or shift right remaining entries of the parent page 1108 */ 1109 nextindex = le16_to_cpu(sp->header.nextindex); 1110 /* 1111 * parent page is full - split the parent page 1112 */ 1113 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 1114 /* init for parent page split */ 1115 split->mp = smp; 1116 split->index = skip; /* index at insert */ 1117 split->flag = XAD_NEW; 1118 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 1119 split->len = JFS_SBI(ip->i_sb)->nbperpage; 1120 split->addr = rcbn; 1121 1122 /* unpin previous right child page */ 1123 XT_PUTPAGE(rcmp); 1124 1125 /* The split routines insert the new entry, 1126 * and acquire txLock as appropriate. 1127 * return <rp> pinned and its block number <rpbn>. 1128 */ 1129 rc = (sp->header.flag & BT_ROOT) ? 1130 xtSplitRoot(tid, ip, split, &rmp) : 1131 xtSplitPage(tid, ip, split, &rmp, &rbn); 1132 if (rc) { 1133 XT_PUTPAGE(smp); 1134 return rc; 1135 } 1136 1137 XT_PUTPAGE(smp); 1138 /* keep new child page <rp> pinned */ 1139 } 1140 /* 1141 * parent page is not full - insert in parent page 1142 */ 1143 else { 1144 /* 1145 * insert router entry in parent for the right child 1146 * page from the first entry of the right child page: 1147 */ 1148 /* 1149 * acquire a transaction lock on the parent page; 1150 * 1151 * action: router xad insertion; 1152 */ 1153 BT_MARK_DIRTY(smp, ip); 1154 1155 /* 1156 * if insert into middle, shift right remaining entries 1157 */ 1158 if (skip < nextindex) 1159 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1160 (nextindex - 1161 skip) << L2XTSLOTSIZE); 1162 1163 /* insert the router entry */ 1164 xad = &sp->xad[skip]; 1165 XT_PUTENTRY(xad, XAD_NEW, 1166 offsetXAD(&rcp->xad[XTENTRYSTART]), 1167 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 1168 1169 /* advance next available entry index. */ 1170 sp->header.nextindex = 1171 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1172 1); 1173 1174 /* Don't log it if there are no links to the file */ 1175 if (!test_cflag(COMMIT_Nolink, ip)) { 1176 tlck = txLock(tid, ip, smp, 1177 tlckXTREE | tlckGROW); 1178 xtlck = (struct xtlock *) & tlck->lock; 1179 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1180 min(skip, (int)xtlck->lwm.offset) : skip; 1181 xtlck->lwm.length = 1182 le16_to_cpu(sp->header.nextindex) - 1183 xtlck->lwm.offset; 1184 } 1185 1186 /* unpin parent page */ 1187 XT_PUTPAGE(smp); 1188 1189 /* exit propagate up */ 1190 break; 1191 } 1192 } 1193 1194 /* unpin current right page */ 1195 XT_PUTPAGE(rmp); 1196 1197 return 0; 1198 } 1199 1200 1201 /* 1202 * xtSplitPage() 1203 * 1204 * function: 1205 * split a full non-root page into 1206 * original/split/left page and new right page 1207 * i.e., the original/split page remains as left page. 1208 * 1209 * parameter: 1210 * int tid, 1211 * struct inode *ip, 1212 * struct xtsplit *split, 1213 * struct metapage **rmpp, 1214 * u64 *rbnp, 1215 * 1216 * return: 1217 * Pointer to page in which to insert or NULL on error. 1218 */ 1219 static int 1220 xtSplitPage(tid_t tid, struct inode *ip, 1221 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 1222 { 1223 int rc = 0; 1224 struct metapage *smp; 1225 xtpage_t *sp; 1226 struct metapage *rmp; 1227 xtpage_t *rp; /* new right page allocated */ 1228 s64 rbn; /* new right page block number */ 1229 struct metapage *mp; 1230 xtpage_t *p; 1231 s64 nextbn; 1232 int skip, maxentry, middle, righthalf, n; 1233 xad_t *xad; 1234 struct pxdlist *pxdlist; 1235 pxd_t *pxd; 1236 struct tlock *tlck; 1237 struct xtlock *sxtlck = NULL, *rxtlck = NULL; 1238 int quota_allocation = 0; 1239 1240 smp = split->mp; 1241 sp = XT_PAGE(ip, smp); 1242 1243 INCREMENT(xtStat.split); 1244 1245 pxdlist = split->pxdlist; 1246 pxd = &pxdlist->pxd[pxdlist->npxd]; 1247 pxdlist->npxd++; 1248 rbn = addressPXD(pxd); 1249 1250 /* Allocate blocks to quota. */ 1251 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1252 rc = -EDQUOT; 1253 goto clean_up; 1254 } 1255 1256 quota_allocation += lengthPXD(pxd); 1257 1258 /* 1259 * allocate the new right page for the split 1260 */ 1261 rmp = get_metapage(ip, rbn, PSIZE, 1); 1262 if (rmp == NULL) { 1263 rc = -EIO; 1264 goto clean_up; 1265 } 1266 1267 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1268 1269 BT_MARK_DIRTY(rmp, ip); 1270 /* 1271 * action: new page; 1272 */ 1273 1274 rp = (xtpage_t *) rmp->data; 1275 rp->header.self = *pxd; 1276 rp->header.flag = sp->header.flag & BT_TYPE; 1277 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1278 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1279 1280 BT_MARK_DIRTY(smp, ip); 1281 /* Don't log it if there are no links to the file */ 1282 if (!test_cflag(COMMIT_Nolink, ip)) { 1283 /* 1284 * acquire a transaction lock on the new right page; 1285 */ 1286 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1287 rxtlck = (struct xtlock *) & tlck->lock; 1288 rxtlck->lwm.offset = XTENTRYSTART; 1289 /* 1290 * acquire a transaction lock on the split page 1291 */ 1292 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1293 sxtlck = (struct xtlock *) & tlck->lock; 1294 } 1295 1296 /* 1297 * initialize/update sibling pointers of <sp> and <rp> 1298 */ 1299 nextbn = le64_to_cpu(sp->header.next); 1300 rp->header.next = cpu_to_le64(nextbn); 1301 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1302 sp->header.next = cpu_to_le64(rbn); 1303 1304 skip = split->index; 1305 1306 /* 1307 * sequential append at tail (after last entry of last page) 1308 * 1309 * if splitting the last page on a level because of appending 1310 * a entry to it (skip is maxentry), it's likely that the access is 1311 * sequential. adding an empty page on the side of the level is less 1312 * work and can push the fill factor much higher than normal. 1313 * if we're wrong it's no big deal - we will do the split the right 1314 * way next time. 1315 * (it may look like it's equally easy to do a similar hack for 1316 * reverse sorted data, that is, split the tree left, but it's not. 1317 * Be my guest.) 1318 */ 1319 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1320 /* 1321 * acquire a transaction lock on the new/right page; 1322 * 1323 * action: xad insertion; 1324 */ 1325 /* insert entry at the first entry of the new right page */ 1326 xad = &rp->xad[XTENTRYSTART]; 1327 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1328 split->addr); 1329 1330 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1331 1332 if (!test_cflag(COMMIT_Nolink, ip)) { 1333 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1334 rxtlck->lwm.length = 1; 1335 } 1336 1337 *rmpp = rmp; 1338 *rbnp = rbn; 1339 1340 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1341 return 0; 1342 } 1343 1344 /* 1345 * non-sequential insert (at possibly middle page) 1346 */ 1347 1348 /* 1349 * update previous pointer of old next/right page of <sp> 1350 */ 1351 if (nextbn != 0) { 1352 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1353 if (rc) { 1354 XT_PUTPAGE(rmp); 1355 goto clean_up; 1356 } 1357 1358 BT_MARK_DIRTY(mp, ip); 1359 /* 1360 * acquire a transaction lock on the next page; 1361 * 1362 * action:sibling pointer update; 1363 */ 1364 if (!test_cflag(COMMIT_Nolink, ip)) 1365 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1366 1367 p->header.prev = cpu_to_le64(rbn); 1368 1369 /* sibling page may have been updated previously, or 1370 * it may be updated later; 1371 */ 1372 1373 XT_PUTPAGE(mp); 1374 } 1375 1376 /* 1377 * split the data between the split and new/right pages 1378 */ 1379 maxentry = le16_to_cpu(sp->header.maxentry); 1380 middle = maxentry >> 1; 1381 righthalf = maxentry - middle; 1382 1383 /* 1384 * skip index in old split/left page - insert into left page: 1385 */ 1386 if (skip <= middle) { 1387 /* move right half of split page to the new right page */ 1388 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1389 righthalf << L2XTSLOTSIZE); 1390 1391 /* shift right tail of left half to make room for new entry */ 1392 if (skip < middle) 1393 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1394 (middle - skip) << L2XTSLOTSIZE); 1395 1396 /* insert new entry */ 1397 xad = &sp->xad[skip]; 1398 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1399 split->addr); 1400 1401 /* update page header */ 1402 sp->header.nextindex = cpu_to_le16(middle + 1); 1403 if (!test_cflag(COMMIT_Nolink, ip)) { 1404 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1405 min(skip, (int)sxtlck->lwm.offset) : skip; 1406 } 1407 1408 rp->header.nextindex = 1409 cpu_to_le16(XTENTRYSTART + righthalf); 1410 } 1411 /* 1412 * skip index in new right page - insert into right page: 1413 */ 1414 else { 1415 /* move left head of right half to right page */ 1416 n = skip - middle; 1417 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1418 n << L2XTSLOTSIZE); 1419 1420 /* insert new entry */ 1421 n += XTENTRYSTART; 1422 xad = &rp->xad[n]; 1423 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1424 split->addr); 1425 1426 /* move right tail of right half to right page */ 1427 if (skip < maxentry) 1428 memmove(&rp->xad[n + 1], &sp->xad[skip], 1429 (maxentry - skip) << L2XTSLOTSIZE); 1430 1431 /* update page header */ 1432 sp->header.nextindex = cpu_to_le16(middle); 1433 if (!test_cflag(COMMIT_Nolink, ip)) { 1434 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1435 min(middle, (int)sxtlck->lwm.offset) : middle; 1436 } 1437 1438 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1439 righthalf + 1); 1440 } 1441 1442 if (!test_cflag(COMMIT_Nolink, ip)) { 1443 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1444 sxtlck->lwm.offset; 1445 1446 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1447 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1448 XTENTRYSTART; 1449 } 1450 1451 *rmpp = rmp; 1452 *rbnp = rbn; 1453 1454 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1455 return rc; 1456 1457 clean_up: 1458 1459 /* Rollback quota allocation. */ 1460 if (quota_allocation) 1461 DQUOT_FREE_BLOCK(ip, quota_allocation); 1462 1463 return (rc); 1464 } 1465 1466 1467 /* 1468 * xtSplitRoot() 1469 * 1470 * function: 1471 * split the full root page into 1472 * original/root/split page and new right page 1473 * i.e., root remains fixed in tree anchor (inode) and 1474 * the root is copied to a single new right child page 1475 * since root page << non-root page, and 1476 * the split root page contains a single entry for the 1477 * new right child page. 1478 * 1479 * parameter: 1480 * int tid, 1481 * struct inode *ip, 1482 * struct xtsplit *split, 1483 * struct metapage **rmpp) 1484 * 1485 * return: 1486 * Pointer to page in which to insert or NULL on error. 1487 */ 1488 static int 1489 xtSplitRoot(tid_t tid, 1490 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1491 { 1492 xtpage_t *sp; 1493 struct metapage *rmp; 1494 xtpage_t *rp; 1495 s64 rbn; 1496 int skip, nextindex; 1497 xad_t *xad; 1498 pxd_t *pxd; 1499 struct pxdlist *pxdlist; 1500 struct tlock *tlck; 1501 struct xtlock *xtlck; 1502 1503 sp = &JFS_IP(ip)->i_xtroot; 1504 1505 INCREMENT(xtStat.split); 1506 1507 /* 1508 * allocate a single (right) child page 1509 */ 1510 pxdlist = split->pxdlist; 1511 pxd = &pxdlist->pxd[pxdlist->npxd]; 1512 pxdlist->npxd++; 1513 rbn = addressPXD(pxd); 1514 rmp = get_metapage(ip, rbn, PSIZE, 1); 1515 if (rmp == NULL) 1516 return -EIO; 1517 1518 /* Allocate blocks to quota. */ 1519 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1520 release_metapage(rmp); 1521 return -EDQUOT; 1522 } 1523 1524 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1525 1526 /* 1527 * acquire a transaction lock on the new right page; 1528 * 1529 * action: new page; 1530 */ 1531 BT_MARK_DIRTY(rmp, ip); 1532 1533 rp = (xtpage_t *) rmp->data; 1534 rp->header.flag = 1535 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1536 rp->header.self = *pxd; 1537 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1538 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1539 1540 /* initialize sibling pointers */ 1541 rp->header.next = 0; 1542 rp->header.prev = 0; 1543 1544 /* 1545 * copy the in-line root page into new right page extent 1546 */ 1547 nextindex = le16_to_cpu(sp->header.maxentry); 1548 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1549 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1550 1551 /* 1552 * insert the new entry into the new right/child page 1553 * (skip index in the new right page will not change) 1554 */ 1555 skip = split->index; 1556 /* if insert into middle, shift right remaining entries */ 1557 if (skip != nextindex) 1558 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1559 (nextindex - skip) * sizeof(xad_t)); 1560 1561 xad = &rp->xad[skip]; 1562 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1563 1564 /* update page header */ 1565 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1566 1567 if (!test_cflag(COMMIT_Nolink, ip)) { 1568 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1569 xtlck = (struct xtlock *) & tlck->lock; 1570 xtlck->lwm.offset = XTENTRYSTART; 1571 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1572 XTENTRYSTART; 1573 } 1574 1575 /* 1576 * reset the root 1577 * 1578 * init root with the single entry for the new right page 1579 * set the 1st entry offset to 0, which force the left-most key 1580 * at any level of the tree to be less than any search key. 1581 */ 1582 /* 1583 * acquire a transaction lock on the root page (in-memory inode); 1584 * 1585 * action: root split; 1586 */ 1587 BT_MARK_DIRTY(split->mp, ip); 1588 1589 xad = &sp->xad[XTENTRYSTART]; 1590 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1591 1592 /* update page header of root */ 1593 sp->header.flag &= ~BT_LEAF; 1594 sp->header.flag |= BT_INTERNAL; 1595 1596 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1597 1598 if (!test_cflag(COMMIT_Nolink, ip)) { 1599 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1600 xtlck = (struct xtlock *) & tlck->lock; 1601 xtlck->lwm.offset = XTENTRYSTART; 1602 xtlck->lwm.length = 1; 1603 } 1604 1605 *rmpp = rmp; 1606 1607 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); 1608 return 0; 1609 } 1610 1611 1612 /* 1613 * xtExtend() 1614 * 1615 * function: extend in-place; 1616 * 1617 * note: existing extent may or may not have been committed. 1618 * caller is responsible for pager buffer cache update, and 1619 * working block allocation map update; 1620 * update pmap: alloc whole extended extent; 1621 */ 1622 int xtExtend(tid_t tid, /* transaction id */ 1623 struct inode *ip, s64 xoff, /* delta extent offset */ 1624 s32 xlen, /* delta extent length */ 1625 int flag) 1626 { 1627 int rc = 0; 1628 int cmp; 1629 struct metapage *mp; /* meta-page buffer */ 1630 xtpage_t *p; /* base B+-tree index page */ 1631 s64 bn; 1632 int index, nextindex, len; 1633 struct btstack btstack; /* traverse stack */ 1634 struct xtsplit split; /* split information */ 1635 xad_t *xad; 1636 s64 xaddr; 1637 struct tlock *tlck; 1638 struct xtlock *xtlck = NULL; 1639 1640 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1641 1642 /* there must exist extent to be extended */ 1643 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) 1644 return rc; 1645 1646 /* retrieve search result */ 1647 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1648 1649 if (cmp != 0) { 1650 XT_PUTPAGE(mp); 1651 jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); 1652 return -EIO; 1653 } 1654 1655 /* extension must be contiguous */ 1656 xad = &p->xad[index]; 1657 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { 1658 XT_PUTPAGE(mp); 1659 jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); 1660 return -EIO; 1661 } 1662 1663 /* 1664 * acquire a transaction lock on the leaf page; 1665 * 1666 * action: xad insertion/extension; 1667 */ 1668 BT_MARK_DIRTY(mp, ip); 1669 if (!test_cflag(COMMIT_Nolink, ip)) { 1670 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1671 xtlck = (struct xtlock *) & tlck->lock; 1672 } 1673 1674 /* extend will overflow extent ? */ 1675 xlen = lengthXAD(xad) + xlen; 1676 if ((len = xlen - MAXXLEN) <= 0) 1677 goto extendOld; 1678 1679 /* 1680 * extent overflow: insert entry for new extent 1681 */ 1682 //insertNew: 1683 xoff = offsetXAD(xad) + MAXXLEN; 1684 xaddr = addressXAD(xad) + MAXXLEN; 1685 nextindex = le16_to_cpu(p->header.nextindex); 1686 1687 /* 1688 * if the leaf page is full, insert the new entry and 1689 * propagate up the router entry for the new page from split 1690 * 1691 * The xtSplitUp() will insert the entry and unpin the leaf page. 1692 */ 1693 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1694 /* xtSpliUp() unpins leaf pages */ 1695 split.mp = mp; 1696 split.index = index + 1; 1697 split.flag = XAD_NEW; 1698 split.off = xoff; /* split offset */ 1699 split.len = len; 1700 split.addr = xaddr; 1701 split.pxdlist = NULL; 1702 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1703 return rc; 1704 1705 /* get back old page */ 1706 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1707 if (rc) 1708 return rc; 1709 /* 1710 * if leaf root has been split, original root has been 1711 * copied to new child page, i.e., original entry now 1712 * resides on the new child page; 1713 */ 1714 if (p->header.flag & BT_INTERNAL) { 1715 ASSERT(p->header.nextindex == 1716 cpu_to_le16(XTENTRYSTART + 1)); 1717 xad = &p->xad[XTENTRYSTART]; 1718 bn = addressXAD(xad); 1719 XT_PUTPAGE(mp); 1720 1721 /* get new child page */ 1722 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1723 if (rc) 1724 return rc; 1725 1726 BT_MARK_DIRTY(mp, ip); 1727 if (!test_cflag(COMMIT_Nolink, ip)) { 1728 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1729 xtlck = (struct xtlock *) & tlck->lock; 1730 } 1731 } 1732 } 1733 /* 1734 * insert the new entry into the leaf page 1735 */ 1736 else { 1737 /* insert the new entry: mark the entry NEW */ 1738 xad = &p->xad[index + 1]; 1739 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1740 1741 /* advance next available entry index */ 1742 p->header.nextindex = 1743 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1744 } 1745 1746 /* get back old entry */ 1747 xad = &p->xad[index]; 1748 xlen = MAXXLEN; 1749 1750 /* 1751 * extend old extent 1752 */ 1753 extendOld: 1754 XADlength(xad, xlen); 1755 if (!(xad->flag & XAD_NEW)) 1756 xad->flag |= XAD_EXTENDED; 1757 1758 if (!test_cflag(COMMIT_Nolink, ip)) { 1759 xtlck->lwm.offset = 1760 (xtlck->lwm.offset) ? min(index, 1761 (int)xtlck->lwm.offset) : index; 1762 xtlck->lwm.length = 1763 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1764 } 1765 1766 /* unpin the leaf page */ 1767 XT_PUTPAGE(mp); 1768 1769 return rc; 1770 } 1771 1772 #ifdef _NOTYET 1773 /* 1774 * xtTailgate() 1775 * 1776 * function: split existing 'tail' extent 1777 * (split offset >= start offset of tail extent), and 1778 * relocate and extend the split tail half; 1779 * 1780 * note: existing extent may or may not have been committed. 1781 * caller is responsible for pager buffer cache update, and 1782 * working block allocation map update; 1783 * update pmap: free old split tail extent, alloc new extent; 1784 */ 1785 int xtTailgate(tid_t tid, /* transaction id */ 1786 struct inode *ip, s64 xoff, /* split/new extent offset */ 1787 s32 xlen, /* new extent length */ 1788 s64 xaddr, /* new extent address */ 1789 int flag) 1790 { 1791 int rc = 0; 1792 int cmp; 1793 struct metapage *mp; /* meta-page buffer */ 1794 xtpage_t *p; /* base B+-tree index page */ 1795 s64 bn; 1796 int index, nextindex, llen, rlen; 1797 struct btstack btstack; /* traverse stack */ 1798 struct xtsplit split; /* split information */ 1799 xad_t *xad; 1800 struct tlock *tlck; 1801 struct xtlock *xtlck = 0; 1802 struct tlock *mtlck; 1803 struct maplock *pxdlock; 1804 1805 /* 1806 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1807 (ulong)xoff, xlen, (ulong)xaddr); 1808 */ 1809 1810 /* there must exist extent to be tailgated */ 1811 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) 1812 return rc; 1813 1814 /* retrieve search result */ 1815 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1816 1817 if (cmp != 0) { 1818 XT_PUTPAGE(mp); 1819 jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); 1820 return -EIO; 1821 } 1822 1823 /* entry found must be last entry */ 1824 nextindex = le16_to_cpu(p->header.nextindex); 1825 if (index != nextindex - 1) { 1826 XT_PUTPAGE(mp); 1827 jfs_error(ip->i_sb, 1828 "xtTailgate: the entry found is not the last entry"); 1829 return -EIO; 1830 } 1831 1832 BT_MARK_DIRTY(mp, ip); 1833 /* 1834 * acquire tlock of the leaf page containing original entry 1835 */ 1836 if (!test_cflag(COMMIT_Nolink, ip)) { 1837 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1838 xtlck = (struct xtlock *) & tlck->lock; 1839 } 1840 1841 /* completely replace extent ? */ 1842 xad = &p->xad[index]; 1843 /* 1844 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1845 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1846 */ 1847 if ((llen = xoff - offsetXAD(xad)) == 0) 1848 goto updateOld; 1849 1850 /* 1851 * partially replace extent: insert entry for new extent 1852 */ 1853 //insertNew: 1854 /* 1855 * if the leaf page is full, insert the new entry and 1856 * propagate up the router entry for the new page from split 1857 * 1858 * The xtSplitUp() will insert the entry and unpin the leaf page. 1859 */ 1860 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1861 /* xtSpliUp() unpins leaf pages */ 1862 split.mp = mp; 1863 split.index = index + 1; 1864 split.flag = XAD_NEW; 1865 split.off = xoff; /* split offset */ 1866 split.len = xlen; 1867 split.addr = xaddr; 1868 split.pxdlist = NULL; 1869 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1870 return rc; 1871 1872 /* get back old page */ 1873 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1874 if (rc) 1875 return rc; 1876 /* 1877 * if leaf root has been split, original root has been 1878 * copied to new child page, i.e., original entry now 1879 * resides on the new child page; 1880 */ 1881 if (p->header.flag & BT_INTERNAL) { 1882 ASSERT(p->header.nextindex == 1883 cpu_to_le16(XTENTRYSTART + 1)); 1884 xad = &p->xad[XTENTRYSTART]; 1885 bn = addressXAD(xad); 1886 XT_PUTPAGE(mp); 1887 1888 /* get new child page */ 1889 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1890 if (rc) 1891 return rc; 1892 1893 BT_MARK_DIRTY(mp, ip); 1894 if (!test_cflag(COMMIT_Nolink, ip)) { 1895 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1896 xtlck = (struct xtlock *) & tlck->lock; 1897 } 1898 } 1899 } 1900 /* 1901 * insert the new entry into the leaf page 1902 */ 1903 else { 1904 /* insert the new entry: mark the entry NEW */ 1905 xad = &p->xad[index + 1]; 1906 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1907 1908 /* advance next available entry index */ 1909 p->header.nextindex = 1910 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1911 } 1912 1913 /* get back old XAD */ 1914 xad = &p->xad[index]; 1915 1916 /* 1917 * truncate/relocate old extent at split offset 1918 */ 1919 updateOld: 1920 /* update dmap for old/committed/truncated extent */ 1921 rlen = lengthXAD(xad) - llen; 1922 if (!(xad->flag & XAD_NEW)) { 1923 /* free from PWMAP at commit */ 1924 if (!test_cflag(COMMIT_Nolink, ip)) { 1925 mtlck = txMaplock(tid, ip, tlckMAP); 1926 pxdlock = (struct maplock *) & mtlck->lock; 1927 pxdlock->flag = mlckFREEPXD; 1928 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1929 PXDlength(&pxdlock->pxd, rlen); 1930 pxdlock->index = 1; 1931 } 1932 } else 1933 /* free from WMAP */ 1934 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1935 1936 if (llen) 1937 /* truncate */ 1938 XADlength(xad, llen); 1939 else 1940 /* replace */ 1941 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1942 1943 if (!test_cflag(COMMIT_Nolink, ip)) { 1944 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1945 min(index, (int)xtlck->lwm.offset) : index; 1946 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1947 xtlck->lwm.offset; 1948 } 1949 1950 /* unpin the leaf page */ 1951 XT_PUTPAGE(mp); 1952 1953 return rc; 1954 } 1955 #endif /* _NOTYET */ 1956 1957 /* 1958 * xtUpdate() 1959 * 1960 * function: update XAD; 1961 * 1962 * update extent for allocated_but_not_recorded or 1963 * compressed extent; 1964 * 1965 * parameter: 1966 * nxad - new XAD; 1967 * logical extent of the specified XAD must be completely 1968 * contained by an existing XAD; 1969 */ 1970 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1971 { /* new XAD */ 1972 int rc = 0; 1973 int cmp; 1974 struct metapage *mp; /* meta-page buffer */ 1975 xtpage_t *p; /* base B+-tree index page */ 1976 s64 bn; 1977 int index0, index, newindex, nextindex; 1978 struct btstack btstack; /* traverse stack */ 1979 struct xtsplit split; /* split information */ 1980 xad_t *xad, *lxad, *rxad; 1981 int xflag; 1982 s64 nxoff, xoff; 1983 int nxlen, xlen, lxlen, rxlen; 1984 s64 nxaddr, xaddr; 1985 struct tlock *tlck; 1986 struct xtlock *xtlck = NULL; 1987 int newpage = 0; 1988 1989 /* there must exist extent to be tailgated */ 1990 nxoff = offsetXAD(nxad); 1991 nxlen = lengthXAD(nxad); 1992 nxaddr = addressXAD(nxad); 1993 1994 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 1995 return rc; 1996 1997 /* retrieve search result */ 1998 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 1999 2000 if (cmp != 0) { 2001 XT_PUTPAGE(mp); 2002 jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); 2003 return -EIO; 2004 } 2005 2006 BT_MARK_DIRTY(mp, ip); 2007 /* 2008 * acquire tlock of the leaf page containing original entry 2009 */ 2010 if (!test_cflag(COMMIT_Nolink, ip)) { 2011 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2012 xtlck = (struct xtlock *) & tlck->lock; 2013 } 2014 2015 xad = &p->xad[index0]; 2016 xflag = xad->flag; 2017 xoff = offsetXAD(xad); 2018 xlen = lengthXAD(xad); 2019 xaddr = addressXAD(xad); 2020 2021 /* nXAD must be completely contained within XAD */ 2022 if ((xoff > nxoff) || 2023 (nxoff + nxlen > xoff + xlen)) { 2024 XT_PUTPAGE(mp); 2025 jfs_error(ip->i_sb, 2026 "xtUpdate: nXAD in not completely contained within XAD"); 2027 return -EIO; 2028 } 2029 2030 index = index0; 2031 newindex = index + 1; 2032 nextindex = le16_to_cpu(p->header.nextindex); 2033 2034 #ifdef _JFS_WIP_NOCOALESCE 2035 if (xoff < nxoff) 2036 goto updateRight; 2037 2038 /* 2039 * replace XAD with nXAD 2040 */ 2041 replace: /* (nxoff == xoff) */ 2042 if (nxlen == xlen) { 2043 /* replace XAD with nXAD:recorded */ 2044 *xad = *nxad; 2045 xad->flag = xflag & ~XAD_NOTRECORDED; 2046 2047 goto out; 2048 } else /* (nxlen < xlen) */ 2049 goto updateLeft; 2050 #endif /* _JFS_WIP_NOCOALESCE */ 2051 2052 /* #ifdef _JFS_WIP_COALESCE */ 2053 if (xoff < nxoff) 2054 goto coalesceRight; 2055 2056 /* 2057 * coalesce with left XAD 2058 */ 2059 //coalesceLeft: /* (xoff == nxoff) */ 2060 /* is XAD first entry of page ? */ 2061 if (index == XTENTRYSTART) 2062 goto replace; 2063 2064 /* is nXAD logically and physically contiguous with lXAD ? */ 2065 lxad = &p->xad[index - 1]; 2066 lxlen = lengthXAD(lxad); 2067 if (!(lxad->flag & XAD_NOTRECORDED) && 2068 (nxoff == offsetXAD(lxad) + lxlen) && 2069 (nxaddr == addressXAD(lxad) + lxlen) && 2070 (lxlen + nxlen < MAXXLEN)) { 2071 /* extend right lXAD */ 2072 index0 = index - 1; 2073 XADlength(lxad, lxlen + nxlen); 2074 2075 /* If we just merged two extents together, need to make sure the 2076 * right extent gets logged. If the left one is marked XAD_NEW, 2077 * then we know it will be logged. Otherwise, mark as 2078 * XAD_EXTENDED 2079 */ 2080 if (!(lxad->flag & XAD_NEW)) 2081 lxad->flag |= XAD_EXTENDED; 2082 2083 if (xlen > nxlen) { 2084 /* truncate XAD */ 2085 XADoffset(xad, xoff + nxlen); 2086 XADlength(xad, xlen - nxlen); 2087 XADaddress(xad, xaddr + nxlen); 2088 goto out; 2089 } else { /* (xlen == nxlen) */ 2090 2091 /* remove XAD */ 2092 if (index < nextindex - 1) 2093 memmove(&p->xad[index], &p->xad[index + 1], 2094 (nextindex - index - 2095 1) << L2XTSLOTSIZE); 2096 2097 p->header.nextindex = 2098 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2099 1); 2100 2101 index = index0; 2102 newindex = index + 1; 2103 nextindex = le16_to_cpu(p->header.nextindex); 2104 xoff = nxoff = offsetXAD(lxad); 2105 xlen = nxlen = lxlen + nxlen; 2106 xaddr = nxaddr = addressXAD(lxad); 2107 goto coalesceRight; 2108 } 2109 } 2110 2111 /* 2112 * replace XAD with nXAD 2113 */ 2114 replace: /* (nxoff == xoff) */ 2115 if (nxlen == xlen) { 2116 /* replace XAD with nXAD:recorded */ 2117 *xad = *nxad; 2118 xad->flag = xflag & ~XAD_NOTRECORDED; 2119 2120 goto coalesceRight; 2121 } else /* (nxlen < xlen) */ 2122 goto updateLeft; 2123 2124 /* 2125 * coalesce with right XAD 2126 */ 2127 coalesceRight: /* (xoff <= nxoff) */ 2128 /* is XAD last entry of page ? */ 2129 if (newindex == nextindex) { 2130 if (xoff == nxoff) 2131 goto out; 2132 goto updateRight; 2133 } 2134 2135 /* is nXAD logically and physically contiguous with rXAD ? */ 2136 rxad = &p->xad[index + 1]; 2137 rxlen = lengthXAD(rxad); 2138 if (!(rxad->flag & XAD_NOTRECORDED) && 2139 (nxoff + nxlen == offsetXAD(rxad)) && 2140 (nxaddr + nxlen == addressXAD(rxad)) && 2141 (rxlen + nxlen < MAXXLEN)) { 2142 /* extend left rXAD */ 2143 XADoffset(rxad, nxoff); 2144 XADlength(rxad, rxlen + nxlen); 2145 XADaddress(rxad, nxaddr); 2146 2147 /* If we just merged two extents together, need to make sure 2148 * the left extent gets logged. If the right one is marked 2149 * XAD_NEW, then we know it will be logged. Otherwise, mark as 2150 * XAD_EXTENDED 2151 */ 2152 if (!(rxad->flag & XAD_NEW)) 2153 rxad->flag |= XAD_EXTENDED; 2154 2155 if (xlen > nxlen) 2156 /* truncate XAD */ 2157 XADlength(xad, xlen - nxlen); 2158 else { /* (xlen == nxlen) */ 2159 2160 /* remove XAD */ 2161 memmove(&p->xad[index], &p->xad[index + 1], 2162 (nextindex - index - 1) << L2XTSLOTSIZE); 2163 2164 p->header.nextindex = 2165 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2166 1); 2167 } 2168 2169 goto out; 2170 } else if (xoff == nxoff) 2171 goto out; 2172 2173 if (xoff >= nxoff) { 2174 XT_PUTPAGE(mp); 2175 jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); 2176 return -EIO; 2177 } 2178 /* #endif _JFS_WIP_COALESCE */ 2179 2180 /* 2181 * split XAD into (lXAD, nXAD): 2182 * 2183 * |---nXAD---> 2184 * --|----------XAD----------|-- 2185 * |-lXAD-| 2186 */ 2187 updateRight: /* (xoff < nxoff) */ 2188 /* truncate old XAD as lXAD:not_recorded */ 2189 xad = &p->xad[index]; 2190 XADlength(xad, nxoff - xoff); 2191 2192 /* insert nXAD:recorded */ 2193 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2194 2195 /* xtSpliUp() unpins leaf pages */ 2196 split.mp = mp; 2197 split.index = newindex; 2198 split.flag = xflag & ~XAD_NOTRECORDED; 2199 split.off = nxoff; 2200 split.len = nxlen; 2201 split.addr = nxaddr; 2202 split.pxdlist = NULL; 2203 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2204 return rc; 2205 2206 /* get back old page */ 2207 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2208 if (rc) 2209 return rc; 2210 /* 2211 * if leaf root has been split, original root has been 2212 * copied to new child page, i.e., original entry now 2213 * resides on the new child page; 2214 */ 2215 if (p->header.flag & BT_INTERNAL) { 2216 ASSERT(p->header.nextindex == 2217 cpu_to_le16(XTENTRYSTART + 1)); 2218 xad = &p->xad[XTENTRYSTART]; 2219 bn = addressXAD(xad); 2220 XT_PUTPAGE(mp); 2221 2222 /* get new child page */ 2223 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2224 if (rc) 2225 return rc; 2226 2227 BT_MARK_DIRTY(mp, ip); 2228 if (!test_cflag(COMMIT_Nolink, ip)) { 2229 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2230 xtlck = (struct xtlock *) & tlck->lock; 2231 } 2232 } else { 2233 /* is nXAD on new page ? */ 2234 if (newindex > 2235 (le16_to_cpu(p->header.maxentry) >> 1)) { 2236 newindex = 2237 newindex - 2238 le16_to_cpu(p->header.nextindex) + 2239 XTENTRYSTART; 2240 newpage = 1; 2241 } 2242 } 2243 } else { 2244 /* if insert into middle, shift right remaining entries */ 2245 if (newindex < nextindex) 2246 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2247 (nextindex - newindex) << L2XTSLOTSIZE); 2248 2249 /* insert the entry */ 2250 xad = &p->xad[newindex]; 2251 *xad = *nxad; 2252 xad->flag = xflag & ~XAD_NOTRECORDED; 2253 2254 /* advance next available entry index. */ 2255 p->header.nextindex = 2256 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2257 } 2258 2259 /* 2260 * does nXAD force 3-way split ? 2261 * 2262 * |---nXAD--->| 2263 * --|----------XAD-------------|-- 2264 * |-lXAD-| |-rXAD -| 2265 */ 2266 if (nxoff + nxlen == xoff + xlen) 2267 goto out; 2268 2269 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 2270 if (newpage) { 2271 /* close out old page */ 2272 if (!test_cflag(COMMIT_Nolink, ip)) { 2273 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2274 min(index0, (int)xtlck->lwm.offset) : index0; 2275 xtlck->lwm.length = 2276 le16_to_cpu(p->header.nextindex) - 2277 xtlck->lwm.offset; 2278 } 2279 2280 bn = le64_to_cpu(p->header.next); 2281 XT_PUTPAGE(mp); 2282 2283 /* get new right page */ 2284 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2285 if (rc) 2286 return rc; 2287 2288 BT_MARK_DIRTY(mp, ip); 2289 if (!test_cflag(COMMIT_Nolink, ip)) { 2290 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2291 xtlck = (struct xtlock *) & tlck->lock; 2292 } 2293 2294 index0 = index = newindex; 2295 } else 2296 index++; 2297 2298 newindex = index + 1; 2299 nextindex = le16_to_cpu(p->header.nextindex); 2300 xlen = xlen - (nxoff - xoff); 2301 xoff = nxoff; 2302 xaddr = nxaddr; 2303 2304 /* recompute split pages */ 2305 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2306 XT_PUTPAGE(mp); 2307 2308 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 2309 return rc; 2310 2311 /* retrieve search result */ 2312 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2313 2314 if (cmp != 0) { 2315 XT_PUTPAGE(mp); 2316 jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); 2317 return -EIO; 2318 } 2319 2320 if (index0 != index) { 2321 XT_PUTPAGE(mp); 2322 jfs_error(ip->i_sb, 2323 "xtUpdate: unexpected value of index"); 2324 return -EIO; 2325 } 2326 } 2327 2328 /* 2329 * split XAD into (nXAD, rXAD) 2330 * 2331 * ---nXAD---| 2332 * --|----------XAD----------|-- 2333 * |-rXAD-| 2334 */ 2335 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2336 /* update old XAD with nXAD:recorded */ 2337 xad = &p->xad[index]; 2338 *xad = *nxad; 2339 xad->flag = xflag & ~XAD_NOTRECORDED; 2340 2341 /* insert rXAD:not_recorded */ 2342 xoff = xoff + nxlen; 2343 xlen = xlen - nxlen; 2344 xaddr = xaddr + nxlen; 2345 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2346 /* 2347 printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2348 */ 2349 /* xtSpliUp() unpins leaf pages */ 2350 split.mp = mp; 2351 split.index = newindex; 2352 split.flag = xflag; 2353 split.off = xoff; 2354 split.len = xlen; 2355 split.addr = xaddr; 2356 split.pxdlist = NULL; 2357 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2358 return rc; 2359 2360 /* get back old page */ 2361 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2362 if (rc) 2363 return rc; 2364 2365 /* 2366 * if leaf root has been split, original root has been 2367 * copied to new child page, i.e., original entry now 2368 * resides on the new child page; 2369 */ 2370 if (p->header.flag & BT_INTERNAL) { 2371 ASSERT(p->header.nextindex == 2372 cpu_to_le16(XTENTRYSTART + 1)); 2373 xad = &p->xad[XTENTRYSTART]; 2374 bn = addressXAD(xad); 2375 XT_PUTPAGE(mp); 2376 2377 /* get new child page */ 2378 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2379 if (rc) 2380 return rc; 2381 2382 BT_MARK_DIRTY(mp, ip); 2383 if (!test_cflag(COMMIT_Nolink, ip)) { 2384 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2385 xtlck = (struct xtlock *) & tlck->lock; 2386 } 2387 } 2388 } else { 2389 /* if insert into middle, shift right remaining entries */ 2390 if (newindex < nextindex) 2391 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2392 (nextindex - newindex) << L2XTSLOTSIZE); 2393 2394 /* insert the entry */ 2395 xad = &p->xad[newindex]; 2396 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2397 2398 /* advance next available entry index. */ 2399 p->header.nextindex = 2400 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2401 } 2402 2403 out: 2404 if (!test_cflag(COMMIT_Nolink, ip)) { 2405 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2406 min(index0, (int)xtlck->lwm.offset) : index0; 2407 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2408 xtlck->lwm.offset; 2409 } 2410 2411 /* unpin the leaf page */ 2412 XT_PUTPAGE(mp); 2413 2414 return rc; 2415 } 2416 2417 2418 /* 2419 * xtAppend() 2420 * 2421 * function: grow in append mode from contiguous region specified ; 2422 * 2423 * parameter: 2424 * tid - transaction id; 2425 * ip - file object; 2426 * xflag - extent flag: 2427 * xoff - extent offset; 2428 * maxblocks - max extent length; 2429 * xlen - extent length (in/out); 2430 * xaddrp - extent address pointer (in/out): 2431 * flag - 2432 * 2433 * return: 2434 */ 2435 int xtAppend(tid_t tid, /* transaction id */ 2436 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2437 s32 * xlenp, /* (in/out) */ 2438 s64 * xaddrp, /* (in/out) */ 2439 int flag) 2440 { 2441 int rc = 0; 2442 struct metapage *mp; /* meta-page buffer */ 2443 xtpage_t *p; /* base B+-tree index page */ 2444 s64 bn, xaddr; 2445 int index, nextindex; 2446 struct btstack btstack; /* traverse stack */ 2447 struct xtsplit split; /* split information */ 2448 xad_t *xad; 2449 int cmp; 2450 struct tlock *tlck; 2451 struct xtlock *xtlck; 2452 int nsplit, nblocks, xlen; 2453 struct pxdlist pxdlist; 2454 pxd_t *pxd; 2455 s64 next; 2456 2457 xaddr = *xaddrp; 2458 xlen = *xlenp; 2459 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", 2460 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2461 2462 /* 2463 * search for the entry location at which to insert: 2464 * 2465 * xtFastSearch() and xtSearch() both returns (leaf page 2466 * pinned, index at which to insert). 2467 * n.b. xtSearch() may return index of maxentry of 2468 * the full page. 2469 */ 2470 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 2471 return rc; 2472 2473 /* retrieve search result */ 2474 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2475 2476 if (cmp == 0) { 2477 rc = -EEXIST; 2478 goto out; 2479 } 2480 2481 if (next) 2482 xlen = min(xlen, (int)(next - xoff)); 2483 //insert: 2484 /* 2485 * insert entry for new extent 2486 */ 2487 xflag |= XAD_NEW; 2488 2489 /* 2490 * if the leaf page is full, split the page and 2491 * propagate up the router entry for the new page from split 2492 * 2493 * The xtSplitUp() will insert the entry and unpin the leaf page. 2494 */ 2495 nextindex = le16_to_cpu(p->header.nextindex); 2496 if (nextindex < le16_to_cpu(p->header.maxentry)) 2497 goto insertLeaf; 2498 2499 /* 2500 * allocate new index blocks to cover index page split(s) 2501 */ 2502 nsplit = btstack.nsplit; 2503 split.pxdlist = &pxdlist; 2504 pxdlist.maxnpxd = pxdlist.npxd = 0; 2505 pxd = &pxdlist.pxd[0]; 2506 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2507 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2508 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2509 PXDaddress(pxd, xaddr); 2510 PXDlength(pxd, nblocks); 2511 2512 pxdlist.maxnpxd++; 2513 2514 continue; 2515 } 2516 2517 /* undo allocation */ 2518 2519 goto out; 2520 } 2521 2522 xlen = min(xlen, maxblocks); 2523 2524 /* 2525 * allocate data extent requested 2526 */ 2527 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2528 goto out; 2529 2530 split.mp = mp; 2531 split.index = index; 2532 split.flag = xflag; 2533 split.off = xoff; 2534 split.len = xlen; 2535 split.addr = xaddr; 2536 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2537 /* undo data extent allocation */ 2538 dbFree(ip, *xaddrp, (s64) * xlenp); 2539 2540 return rc; 2541 } 2542 2543 *xaddrp = xaddr; 2544 *xlenp = xlen; 2545 return 0; 2546 2547 /* 2548 * insert the new entry into the leaf page 2549 */ 2550 insertLeaf: 2551 /* 2552 * allocate data extent requested 2553 */ 2554 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2555 goto out; 2556 2557 BT_MARK_DIRTY(mp, ip); 2558 /* 2559 * acquire a transaction lock on the leaf page; 2560 * 2561 * action: xad insertion/extension; 2562 */ 2563 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2564 xtlck = (struct xtlock *) & tlck->lock; 2565 2566 /* insert the new entry: mark the entry NEW */ 2567 xad = &p->xad[index]; 2568 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2569 2570 /* advance next available entry index */ 2571 p->header.nextindex = 2572 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2573 2574 xtlck->lwm.offset = 2575 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2576 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2577 xtlck->lwm.offset; 2578 2579 *xaddrp = xaddr; 2580 *xlenp = xlen; 2581 2582 out: 2583 /* unpin the leaf page */ 2584 XT_PUTPAGE(mp); 2585 2586 return rc; 2587 } 2588 #ifdef _STILL_TO_PORT 2589 2590 /* - TBD for defragmentaion/reorganization - 2591 * 2592 * xtDelete() 2593 * 2594 * function: 2595 * delete the entry with the specified key. 2596 * 2597 * N.B.: whole extent of the entry is assumed to be deleted. 2598 * 2599 * parameter: 2600 * 2601 * return: 2602 * ENOENT: if the entry is not found. 2603 * 2604 * exception: 2605 */ 2606 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2607 { 2608 int rc = 0; 2609 struct btstack btstack; 2610 int cmp; 2611 s64 bn; 2612 struct metapage *mp; 2613 xtpage_t *p; 2614 int index, nextindex; 2615 struct tlock *tlck; 2616 struct xtlock *xtlck; 2617 2618 /* 2619 * find the matching entry; xtSearch() pins the page 2620 */ 2621 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2622 return rc; 2623 2624 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2625 if (cmp) { 2626 /* unpin the leaf page */ 2627 XT_PUTPAGE(mp); 2628 return -ENOENT; 2629 } 2630 2631 /* 2632 * delete the entry from the leaf page 2633 */ 2634 nextindex = le16_to_cpu(p->header.nextindex); 2635 p->header.nextindex = 2636 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); 2637 2638 /* 2639 * if the leaf page bocome empty, free the page 2640 */ 2641 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2642 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2643 2644 BT_MARK_DIRTY(mp, ip); 2645 /* 2646 * acquire a transaction lock on the leaf page; 2647 * 2648 * action:xad deletion; 2649 */ 2650 tlck = txLock(tid, ip, mp, tlckXTREE); 2651 xtlck = (struct xtlock *) & tlck->lock; 2652 xtlck->lwm.offset = 2653 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2654 2655 /* if delete from middle, shift left/compact the remaining entries */ 2656 if (index < nextindex - 1) 2657 memmove(&p->xad[index], &p->xad[index + 1], 2658 (nextindex - index - 1) * sizeof(xad_t)); 2659 2660 XT_PUTPAGE(mp); 2661 2662 return 0; 2663 } 2664 2665 2666 /* - TBD for defragmentaion/reorganization - 2667 * 2668 * xtDeleteUp() 2669 * 2670 * function: 2671 * free empty pages as propagating deletion up the tree 2672 * 2673 * parameter: 2674 * 2675 * return: 2676 */ 2677 static int 2678 xtDeleteUp(tid_t tid, struct inode *ip, 2679 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2680 { 2681 int rc = 0; 2682 struct metapage *mp; 2683 xtpage_t *p; 2684 int index, nextindex; 2685 s64 xaddr; 2686 int xlen; 2687 struct btframe *parent; 2688 struct tlock *tlck; 2689 struct xtlock *xtlck; 2690 2691 /* 2692 * keep root leaf page which has become empty 2693 */ 2694 if (fp->header.flag & BT_ROOT) { 2695 /* keep the root page */ 2696 fp->header.flag &= ~BT_INTERNAL; 2697 fp->header.flag |= BT_LEAF; 2698 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2699 2700 /* XT_PUTPAGE(fmp); */ 2701 2702 return 0; 2703 } 2704 2705 /* 2706 * free non-root leaf page 2707 */ 2708 if ((rc = xtRelink(tid, ip, fp))) { 2709 XT_PUTPAGE(fmp); 2710 return rc; 2711 } 2712 2713 xaddr = addressPXD(&fp->header.self); 2714 xlen = lengthPXD(&fp->header.self); 2715 /* free the page extent */ 2716 dbFree(ip, xaddr, (s64) xlen); 2717 2718 /* free the buffer page */ 2719 discard_metapage(fmp); 2720 2721 /* 2722 * propagate page deletion up the index tree 2723 * 2724 * If the delete from the parent page makes it empty, 2725 * continue all the way up the tree. 2726 * stop if the root page is reached (which is never deleted) or 2727 * if the entry deletion does not empty the page. 2728 */ 2729 while ((parent = BT_POP(btstack)) != NULL) { 2730 /* get/pin the parent page <sp> */ 2731 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2732 if (rc) 2733 return rc; 2734 2735 index = parent->index; 2736 2737 /* delete the entry for the freed child page from parent. 2738 */ 2739 nextindex = le16_to_cpu(p->header.nextindex); 2740 2741 /* 2742 * the parent has the single entry being deleted: 2743 * free the parent page which has become empty. 2744 */ 2745 if (nextindex == 1) { 2746 if (p->header.flag & BT_ROOT) { 2747 /* keep the root page */ 2748 p->header.flag &= ~BT_INTERNAL; 2749 p->header.flag |= BT_LEAF; 2750 p->header.nextindex = 2751 cpu_to_le16(XTENTRYSTART); 2752 2753 /* XT_PUTPAGE(mp); */ 2754 2755 break; 2756 } else { 2757 /* free the parent page */ 2758 if ((rc = xtRelink(tid, ip, p))) 2759 return rc; 2760 2761 xaddr = addressPXD(&p->header.self); 2762 /* free the page extent */ 2763 dbFree(ip, xaddr, 2764 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2765 2766 /* unpin/free the buffer page */ 2767 discard_metapage(mp); 2768 2769 /* propagate up */ 2770 continue; 2771 } 2772 } 2773 /* 2774 * the parent has other entries remaining: 2775 * delete the router entry from the parent page. 2776 */ 2777 else { 2778 BT_MARK_DIRTY(mp, ip); 2779 /* 2780 * acquire a transaction lock on the leaf page; 2781 * 2782 * action:xad deletion; 2783 */ 2784 tlck = txLock(tid, ip, mp, tlckXTREE); 2785 xtlck = (struct xtlock *) & tlck->lock; 2786 xtlck->lwm.offset = 2787 (xtlck->lwm.offset) ? min(index, 2788 xtlck->lwm. 2789 offset) : index; 2790 2791 /* if delete from middle, 2792 * shift left/compact the remaining entries in the page 2793 */ 2794 if (index < nextindex - 1) 2795 memmove(&p->xad[index], &p->xad[index + 1], 2796 (nextindex - index - 2797 1) << L2XTSLOTSIZE); 2798 2799 p->header.nextindex = 2800 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2801 1); 2802 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2803 (ulong) parent->bn, index); 2804 } 2805 2806 /* unpin the parent page */ 2807 XT_PUTPAGE(mp); 2808 2809 /* exit propagation up */ 2810 break; 2811 } 2812 2813 return 0; 2814 } 2815 2816 2817 /* 2818 * NAME: xtRelocate() 2819 * 2820 * FUNCTION: relocate xtpage or data extent of regular file; 2821 * This function is mainly used by defragfs utility. 2822 * 2823 * NOTE: This routine does not have the logic to handle 2824 * uncommitted allocated extent. The caller should call 2825 * txCommit() to commit all the allocation before call 2826 * this routine. 2827 */ 2828 int 2829 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2830 s64 nxaddr, /* new xaddr */ 2831 int xtype) 2832 { /* extent type: XTPAGE or DATAEXT */ 2833 int rc = 0; 2834 struct tblock *tblk; 2835 struct tlock *tlck; 2836 struct xtlock *xtlck; 2837 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2838 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2839 xad_t *xad; 2840 pxd_t *pxd; 2841 s64 xoff, xsize; 2842 int xlen; 2843 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2844 cbuf_t *cp; 2845 s64 offset, nbytes, nbrd, pno; 2846 int nb, npages, nblks; 2847 s64 bn; 2848 int cmp; 2849 int index; 2850 struct pxd_lock *pxdlock; 2851 struct btstack btstack; /* traverse stack */ 2852 2853 xtype = xtype & EXTENT_TYPE; 2854 2855 xoff = offsetXAD(oxad); 2856 oxaddr = addressXAD(oxad); 2857 xlen = lengthXAD(oxad); 2858 2859 /* validate extent offset */ 2860 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2861 if (offset >= ip->i_size) 2862 return -ESTALE; /* stale extent */ 2863 2864 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", 2865 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2866 2867 /* 2868 * 1. get and validate the parent xtpage/xad entry 2869 * covering the source extent to be relocated; 2870 */ 2871 if (xtype == DATAEXT) { 2872 /* search in leaf entry */ 2873 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 2874 if (rc) 2875 return rc; 2876 2877 /* retrieve search result */ 2878 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2879 2880 if (cmp) { 2881 XT_PUTPAGE(pmp); 2882 return -ESTALE; 2883 } 2884 2885 /* validate for exact match with a single entry */ 2886 xad = &pp->xad[index]; 2887 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2888 XT_PUTPAGE(pmp); 2889 return -ESTALE; 2890 } 2891 } else { /* (xtype == XTPAGE) */ 2892 2893 /* search in internal entry */ 2894 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2895 if (rc) 2896 return rc; 2897 2898 /* retrieve search result */ 2899 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2900 2901 if (cmp) { 2902 XT_PUTPAGE(pmp); 2903 return -ESTALE; 2904 } 2905 2906 /* xtSearchNode() validated for exact match with a single entry 2907 */ 2908 xad = &pp->xad[index]; 2909 } 2910 jfs_info("xtRelocate: parent xad entry validated."); 2911 2912 /* 2913 * 2. relocate the extent 2914 */ 2915 if (xtype == DATAEXT) { 2916 /* if the extent is allocated-but-not-recorded 2917 * there is no real data to be moved in this extent, 2918 */ 2919 if (xad->flag & XAD_NOTRECORDED) 2920 goto out; 2921 else 2922 /* release xtpage for cmRead()/xtLookup() */ 2923 XT_PUTPAGE(pmp); 2924 2925 /* 2926 * cmRelocate() 2927 * 2928 * copy target data pages to be relocated; 2929 * 2930 * data extent must start at page boundary and 2931 * multiple of page size (except the last data extent); 2932 * read in each page of the source data extent into cbuf, 2933 * update the cbuf extent descriptor of the page to be 2934 * homeward bound to new dst data extent 2935 * copy the data from the old extent to new extent. 2936 * copy is essential for compressed files to avoid problems 2937 * that can arise if there was a change in compression 2938 * algorithms. 2939 * it is a good strategy because it may disrupt cache 2940 * policy to keep the pages in memory afterwards. 2941 */ 2942 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2943 assert((offset & CM_OFFSET) == 0); 2944 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2945 pno = offset >> CM_L2BSIZE; 2946 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2947 /* 2948 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2949 (offset >> CM_L2BSIZE) + 1; 2950 */ 2951 sxaddr = oxaddr; 2952 dxaddr = nxaddr; 2953 2954 /* process the request one cache buffer at a time */ 2955 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2956 offset += nb, pno++, npages--) { 2957 /* compute page size */ 2958 nb = min(nbytes - nbrd, CM_BSIZE); 2959 2960 /* get the cache buffer of the page */ 2961 if (rc = cmRead(ip, offset, npages, &cp)) 2962 break; 2963 2964 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2965 assert(!cp->cm_modified); 2966 2967 /* bind buffer with the new extent address */ 2968 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2969 cmSetXD(ip, cp, pno, dxaddr, nblks); 2970 2971 /* release the cbuf, mark it as modified */ 2972 cmPut(cp, true); 2973 2974 dxaddr += nblks; 2975 sxaddr += nblks; 2976 } 2977 2978 /* get back parent page */ 2979 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2980 return rc; 2981 2982 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2983 jfs_info("xtRelocate: target data extent relocated."); 2984 } else { /* (xtype == XTPAGE) */ 2985 2986 /* 2987 * read in the target xtpage from the source extent; 2988 */ 2989 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2990 if (rc) { 2991 XT_PUTPAGE(pmp); 2992 return rc; 2993 } 2994 2995 /* 2996 * read in sibling pages if any to update sibling pointers; 2997 */ 2998 rmp = NULL; 2999 if (p->header.next) { 3000 nextbn = le64_to_cpu(p->header.next); 3001 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 3002 if (rc) { 3003 XT_PUTPAGE(pmp); 3004 XT_PUTPAGE(mp); 3005 return (rc); 3006 } 3007 } 3008 3009 lmp = NULL; 3010 if (p->header.prev) { 3011 prevbn = le64_to_cpu(p->header.prev); 3012 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 3013 if (rc) { 3014 XT_PUTPAGE(pmp); 3015 XT_PUTPAGE(mp); 3016 if (rmp) 3017 XT_PUTPAGE(rmp); 3018 return (rc); 3019 } 3020 } 3021 3022 /* at this point, all xtpages to be updated are in memory */ 3023 3024 /* 3025 * update sibling pointers of sibling xtpages if any; 3026 */ 3027 if (lmp) { 3028 BT_MARK_DIRTY(lmp, ip); 3029 tlck = 3030 txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 3031 lp->header.next = cpu_to_le64(nxaddr); 3032 XT_PUTPAGE(lmp); 3033 } 3034 3035 if (rmp) { 3036 BT_MARK_DIRTY(rmp, ip); 3037 tlck = 3038 txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 3039 rp->header.prev = cpu_to_le64(nxaddr); 3040 XT_PUTPAGE(rmp); 3041 } 3042 3043 /* 3044 * update the target xtpage to be relocated 3045 * 3046 * update the self address of the target page 3047 * and write to destination extent; 3048 * redo image covers the whole xtpage since it is new page 3049 * to the destination extent; 3050 * update of bmap for the free of source extent 3051 * of the target xtpage itself: 3052 * update of bmap for the allocation of destination extent 3053 * of the target xtpage itself: 3054 * update of bmap for the extents covered by xad entries in 3055 * the target xtpage is not necessary since they are not 3056 * updated; 3057 * if not committed before this relocation, 3058 * target page may contain XAD_NEW entries which must 3059 * be scanned for bmap update (logredo() always 3060 * scan xtpage REDOPAGE image for bmap update); 3061 * if committed before this relocation (tlckRELOCATE), 3062 * scan may be skipped by commit() and logredo(); 3063 */ 3064 BT_MARK_DIRTY(mp, ip); 3065 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 3066 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 3067 xtlck = (struct xtlock *) & tlck->lock; 3068 3069 /* update the self address in the xtpage header */ 3070 pxd = &p->header.self; 3071 PXDaddress(pxd, nxaddr); 3072 3073 /* linelock for the after image of the whole page */ 3074 xtlck->lwm.length = 3075 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 3076 3077 /* update the buffer extent descriptor of target xtpage */ 3078 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 3079 bmSetXD(mp, nxaddr, xsize); 3080 3081 /* unpin the target page to new homeward bound */ 3082 XT_PUTPAGE(mp); 3083 jfs_info("xtRelocate: target xtpage relocated."); 3084 } 3085 3086 /* 3087 * 3. acquire maplock for the source extent to be freed; 3088 * 3089 * acquire a maplock saving the src relocated extent address; 3090 * to free of the extent at commit time; 3091 */ 3092 out: 3093 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 3094 * free PXD of the source data extent (logredo() will update 3095 * bmap for free of source data extent), and update bmap for 3096 * free of the source data extent; 3097 */ 3098 if (xtype == DATAEXT) 3099 tlck = txMaplock(tid, ip, tlckMAP); 3100 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 3101 * for the source xtpage (logredo() will init NoRedoPage 3102 * filter and will also update bmap for free of the source 3103 * xtpage), and update bmap for free of the source xtpage; 3104 * N.B. We use tlckMAP instead of tlkcXTREE because there 3105 * is no buffer associated with this lock since the buffer 3106 * has been redirected to the target location. 3107 */ 3108 else /* (xtype == XTPAGE) */ 3109 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 3110 3111 pxdlock = (struct pxd_lock *) & tlck->lock; 3112 pxdlock->flag = mlckFREEPXD; 3113 PXDaddress(&pxdlock->pxd, oxaddr); 3114 PXDlength(&pxdlock->pxd, xlen); 3115 pxdlock->index = 1; 3116 3117 /* 3118 * 4. update the parent xad entry for relocation; 3119 * 3120 * acquire tlck for the parent entry with XAD_NEW as entry 3121 * update which will write LOG_REDOPAGE and update bmap for 3122 * allocation of XAD_NEW destination extent; 3123 */ 3124 jfs_info("xtRelocate: update parent xad entry."); 3125 BT_MARK_DIRTY(pmp, ip); 3126 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 3127 xtlck = (struct xtlock *) & tlck->lock; 3128 3129 /* update the XAD with the new destination extent; */ 3130 xad = &pp->xad[index]; 3131 xad->flag |= XAD_NEW; 3132 XADaddress(xad, nxaddr); 3133 3134 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 3135 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 3136 xtlck->lwm.offset; 3137 3138 /* unpin the parent xtpage */ 3139 XT_PUTPAGE(pmp); 3140 3141 return rc; 3142 } 3143 3144 3145 /* 3146 * xtSearchNode() 3147 * 3148 * function: search for the internal xad entry covering specified extent. 3149 * This function is mainly used by defragfs utility. 3150 * 3151 * parameters: 3152 * ip - file object; 3153 * xad - extent to find; 3154 * cmpp - comparison result: 3155 * btstack - traverse stack; 3156 * flag - search process flag; 3157 * 3158 * returns: 3159 * btstack contains (bn, index) of search path traversed to the entry. 3160 * *cmpp is set to result of comparison with the entry returned. 3161 * the page containing the entry is pinned at exit. 3162 */ 3163 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 3164 int *cmpp, struct btstack * btstack, int flag) 3165 { 3166 int rc = 0; 3167 s64 xoff, xaddr; 3168 int xlen; 3169 int cmp = 1; /* init for empty page */ 3170 s64 bn; /* block number */ 3171 struct metapage *mp; /* meta-page buffer */ 3172 xtpage_t *p; /* page */ 3173 int base, index, lim; 3174 struct btframe *btsp; 3175 s64 t64; 3176 3177 BT_CLR(btstack); 3178 3179 xoff = offsetXAD(xad); 3180 xlen = lengthXAD(xad); 3181 xaddr = addressXAD(xad); 3182 3183 /* 3184 * search down tree from root: 3185 * 3186 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 3187 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 3188 * 3189 * if entry with search key K is not found 3190 * internal page search find the entry with largest key Ki 3191 * less than K which point to the child page to search; 3192 * leaf page search find the entry with smallest key Kj 3193 * greater than K so that the returned index is the position of 3194 * the entry to be shifted right for insertion of new entry. 3195 * for empty tree, search key is greater than any key of the tree. 3196 * 3197 * by convention, root bn = 0. 3198 */ 3199 for (bn = 0;;) { 3200 /* get/pin the page to search */ 3201 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3202 if (rc) 3203 return rc; 3204 if (p->header.flag & BT_LEAF) { 3205 XT_PUTPAGE(mp); 3206 return -ESTALE; 3207 } 3208 3209 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3210 3211 /* 3212 * binary search with search key K on the current page 3213 */ 3214 for (base = XTENTRYSTART; lim; lim >>= 1) { 3215 index = base + (lim >> 1); 3216 3217 XT_CMP(cmp, xoff, &p->xad[index], t64); 3218 if (cmp == 0) { 3219 /* 3220 * search hit 3221 * 3222 * verify for exact match; 3223 */ 3224 if (xaddr == addressXAD(&p->xad[index]) && 3225 xoff == offsetXAD(&p->xad[index])) { 3226 *cmpp = cmp; 3227 3228 /* save search result */ 3229 btsp = btstack->top; 3230 btsp->bn = bn; 3231 btsp->index = index; 3232 btsp->mp = mp; 3233 3234 return 0; 3235 } 3236 3237 /* descend/search its child page */ 3238 goto next; 3239 } 3240 3241 if (cmp > 0) { 3242 base = index + 1; 3243 --lim; 3244 } 3245 } 3246 3247 /* 3248 * search miss - non-leaf page: 3249 * 3250 * base is the smallest index with key (Kj) greater than 3251 * search key (K) and may be zero or maxentry index. 3252 * if base is non-zero, decrement base by one to get the parent 3253 * entry of the child page to search. 3254 */ 3255 index = base ? base - 1 : base; 3256 3257 /* 3258 * go down to child page 3259 */ 3260 next: 3261 /* get the child page block number */ 3262 bn = addressXAD(&p->xad[index]); 3263 3264 /* unpin the parent page */ 3265 XT_PUTPAGE(mp); 3266 } 3267 } 3268 3269 3270 /* 3271 * xtRelink() 3272 * 3273 * function: 3274 * link around a freed page. 3275 * 3276 * Parameter: 3277 * int tid, 3278 * struct inode *ip, 3279 * xtpage_t *p) 3280 * 3281 * returns: 3282 */ 3283 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 3284 { 3285 int rc = 0; 3286 struct metapage *mp; 3287 s64 nextbn, prevbn; 3288 struct tlock *tlck; 3289 3290 nextbn = le64_to_cpu(p->header.next); 3291 prevbn = le64_to_cpu(p->header.prev); 3292 3293 /* update prev pointer of the next page */ 3294 if (nextbn != 0) { 3295 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3296 if (rc) 3297 return rc; 3298 3299 /* 3300 * acquire a transaction lock on the page; 3301 * 3302 * action: update prev pointer; 3303 */ 3304 BT_MARK_DIRTY(mp, ip); 3305 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3306 3307 /* the page may already have been tlock'd */ 3308 3309 p->header.prev = cpu_to_le64(prevbn); 3310 3311 XT_PUTPAGE(mp); 3312 } 3313 3314 /* update next pointer of the previous page */ 3315 if (prevbn != 0) { 3316 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3317 if (rc) 3318 return rc; 3319 3320 /* 3321 * acquire a transaction lock on the page; 3322 * 3323 * action: update next pointer; 3324 */ 3325 BT_MARK_DIRTY(mp, ip); 3326 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3327 3328 /* the page may already have been tlock'd */ 3329 3330 p->header.next = le64_to_cpu(nextbn); 3331 3332 XT_PUTPAGE(mp); 3333 } 3334 3335 return 0; 3336 } 3337 #endif /* _STILL_TO_PORT */ 3338 3339 3340 /* 3341 * xtInitRoot() 3342 * 3343 * initialize file root (inline in inode) 3344 */ 3345 void xtInitRoot(tid_t tid, struct inode *ip) 3346 { 3347 xtpage_t *p; 3348 3349 /* 3350 * acquire a transaction lock on the root 3351 * 3352 * action: 3353 */ 3354 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3355 tlckXTREE | tlckNEW); 3356 p = &JFS_IP(ip)->i_xtroot; 3357 3358 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3359 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3360 3361 if (S_ISDIR(ip->i_mode)) 3362 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3363 else { 3364 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3365 ip->i_size = 0; 3366 } 3367 3368 3369 return; 3370 } 3371 3372 3373 /* 3374 * We can run into a deadlock truncating a file with a large number of 3375 * xtree pages (large fragmented file). A robust fix would entail a 3376 * reservation system where we would reserve a number of metadata pages 3377 * and tlocks which we would be guaranteed without a deadlock. Without 3378 * this, a partial fix is to limit number of metadata pages we will lock 3379 * in a single transaction. Currently we will truncate the file so that 3380 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3381 * will be responsible for ensuring that the current transaction gets 3382 * committed, and that subsequent transactions are created to truncate 3383 * the file further if needed. 3384 */ 3385 #define MAX_TRUNCATE_LEAVES 50 3386 3387 /* 3388 * xtTruncate() 3389 * 3390 * function: 3391 * traverse for truncation logging backward bottom up; 3392 * terminate at the last extent entry at the current subtree 3393 * root page covering new down size. 3394 * truncation may occur within the last extent entry. 3395 * 3396 * parameter: 3397 * int tid, 3398 * struct inode *ip, 3399 * s64 newsize, 3400 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3401 * 3402 * return: 3403 * 3404 * note: 3405 * PWMAP: 3406 * 1. truncate (non-COMMIT_NOLINK file) 3407 * by jfs_truncate() or jfs_open(O_TRUNC): 3408 * xtree is updated; 3409 * 2. truncate index table of directory when last entry removed 3410 * map update via tlock at commit time; 3411 * PMAP: 3412 * Call xtTruncate_pmap instead 3413 * WMAP: 3414 * 1. remove (free zero link count) on last reference release 3415 * (pmap has been freed at commit zero link count); 3416 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3417 * xtree is updated; 3418 * map update directly at truncation time; 3419 * 3420 * if (DELETE) 3421 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3422 * else if (TRUNCATE) 3423 * must write LOG_NOREDOPAGE for deleted index page; 3424 * 3425 * pages may already have been tlocked by anonymous transactions 3426 * during file growth (i.e., write) before truncation; 3427 * 3428 * except last truncated entry, deleted entries remains as is 3429 * in the page (nextindex is updated) for other use 3430 * (e.g., log/update allocation map): this avoid copying the page 3431 * info but delay free of pages; 3432 * 3433 */ 3434 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3435 { 3436 int rc = 0; 3437 s64 teof; 3438 struct metapage *mp; 3439 xtpage_t *p; 3440 s64 bn; 3441 int index, nextindex; 3442 xad_t *xad; 3443 s64 xoff, xaddr; 3444 int xlen, len, freexlen; 3445 struct btstack btstack; 3446 struct btframe *parent; 3447 struct tblock *tblk = NULL; 3448 struct tlock *tlck = NULL; 3449 struct xtlock *xtlck = NULL; 3450 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3451 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3452 s64 nfreed; 3453 int freed, log; 3454 int locked_leaves = 0; 3455 3456 /* save object truncation type */ 3457 if (tid) { 3458 tblk = tid_to_tblock(tid); 3459 tblk->xflag |= flag; 3460 } 3461 3462 nfreed = 0; 3463 3464 flag &= COMMIT_MAP; 3465 assert(flag != COMMIT_PMAP); 3466 3467 if (flag == COMMIT_PWMAP) 3468 log = 1; 3469 else { 3470 log = 0; 3471 xadlock.flag = mlckFREEXADLIST; 3472 xadlock.index = 1; 3473 } 3474 3475 /* 3476 * if the newsize is not an integral number of pages, 3477 * the file between newsize and next page boundary will 3478 * be cleared. 3479 * if truncating into a file hole, it will cause 3480 * a full block to be allocated for the logical block. 3481 */ 3482 3483 /* 3484 * release page blocks of truncated region <teof, eof> 3485 * 3486 * free the data blocks from the leaf index blocks. 3487 * delete the parent index entries corresponding to 3488 * the freed child data/index blocks. 3489 * free the index blocks themselves which aren't needed 3490 * in new sized file. 3491 * 3492 * index blocks are updated only if the blocks are to be 3493 * retained in the new sized file. 3494 * if type is PMAP, the data and index pages are NOT 3495 * freed, and the data and index blocks are NOT freed 3496 * from working map. 3497 * (this will allow continued access of data/index of 3498 * temporary file (zerolink count file truncated to zero-length)). 3499 */ 3500 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3501 JFS_SBI(ip->i_sb)->l2bsize; 3502 3503 /* clear stack */ 3504 BT_CLR(&btstack); 3505 3506 /* 3507 * start with root 3508 * 3509 * root resides in the inode 3510 */ 3511 bn = 0; 3512 3513 /* 3514 * first access of each page: 3515 */ 3516 getPage: 3517 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3518 if (rc) 3519 return rc; 3520 3521 /* process entries backward from last index */ 3522 index = le16_to_cpu(p->header.nextindex) - 1; 3523 3524 3525 /* Since this is the rightmost page at this level, and we may have 3526 * already freed a page that was formerly to the right, let's make 3527 * sure that the next pointer is zero. 3528 */ 3529 if (p->header.next) { 3530 if (log) 3531 /* 3532 * Make sure this change to the header is logged. 3533 * If we really truncate this leaf, the flag 3534 * will be changed to tlckTRUNCATE 3535 */ 3536 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 3537 BT_MARK_DIRTY(mp, ip); 3538 p->header.next = 0; 3539 } 3540 3541 if (p->header.flag & BT_INTERNAL) 3542 goto getChild; 3543 3544 /* 3545 * leaf page 3546 */ 3547 freed = 0; 3548 3549 /* does region covered by leaf page precede Teof ? */ 3550 xad = &p->xad[index]; 3551 xoff = offsetXAD(xad); 3552 xlen = lengthXAD(xad); 3553 if (teof >= xoff + xlen) { 3554 XT_PUTPAGE(mp); 3555 goto getParent; 3556 } 3557 3558 /* (re)acquire tlock of the leaf page */ 3559 if (log) { 3560 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3561 /* 3562 * We need to limit the size of the transaction 3563 * to avoid exhausting pagecache & tlocks 3564 */ 3565 XT_PUTPAGE(mp); 3566 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3567 goto getParent; 3568 } 3569 tlck = txLock(tid, ip, mp, tlckXTREE); 3570 tlck->type = tlckXTREE | tlckTRUNCATE; 3571 xtlck = (struct xtlock *) & tlck->lock; 3572 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3573 } 3574 BT_MARK_DIRTY(mp, ip); 3575 3576 /* 3577 * scan backward leaf page entries 3578 */ 3579 for (; index >= XTENTRYSTART; index--) { 3580 xad = &p->xad[index]; 3581 xoff = offsetXAD(xad); 3582 xlen = lengthXAD(xad); 3583 xaddr = addressXAD(xad); 3584 3585 /* 3586 * The "data" for a directory is indexed by the block 3587 * device's address space. This metadata must be invalidated 3588 * here 3589 */ 3590 if (S_ISDIR(ip->i_mode) && (teof == 0)) 3591 invalidate_xad_metapages(ip, *xad); 3592 /* 3593 * entry beyond eof: continue scan of current page 3594 * xad 3595 * ---|---=======-------> 3596 * eof 3597 */ 3598 if (teof < xoff) { 3599 nfreed += xlen; 3600 continue; 3601 } 3602 3603 /* 3604 * (xoff <= teof): last entry to be deleted from page; 3605 * If other entries remain in page: keep and update the page. 3606 */ 3607 3608 /* 3609 * eof == entry_start: delete the entry 3610 * xad 3611 * -------|=======-------> 3612 * eof 3613 * 3614 */ 3615 if (teof == xoff) { 3616 nfreed += xlen; 3617 3618 if (index == XTENTRYSTART) 3619 break; 3620 3621 nextindex = index; 3622 } 3623 /* 3624 * eof within the entry: truncate the entry. 3625 * xad 3626 * -------===|===-------> 3627 * eof 3628 */ 3629 else if (teof < xoff + xlen) { 3630 /* update truncated entry */ 3631 len = teof - xoff; 3632 freexlen = xlen - len; 3633 XADlength(xad, len); 3634 3635 /* save pxd of truncated extent in tlck */ 3636 xaddr += len; 3637 if (log) { /* COMMIT_PWMAP */ 3638 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3639 min(index, (int)xtlck->lwm.offset) : index; 3640 xtlck->lwm.length = index + 1 - 3641 xtlck->lwm.offset; 3642 xtlck->twm.offset = index; 3643 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3644 pxdlock->flag = mlckFREEPXD; 3645 PXDaddress(&pxdlock->pxd, xaddr); 3646 PXDlength(&pxdlock->pxd, freexlen); 3647 } 3648 /* free truncated extent */ 3649 else { /* COMMIT_WMAP */ 3650 3651 pxdlock = (struct pxd_lock *) & xadlock; 3652 pxdlock->flag = mlckFREEPXD; 3653 PXDaddress(&pxdlock->pxd, xaddr); 3654 PXDlength(&pxdlock->pxd, freexlen); 3655 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); 3656 3657 /* reset map lock */ 3658 xadlock.flag = mlckFREEXADLIST; 3659 } 3660 3661 /* current entry is new last entry; */ 3662 nextindex = index + 1; 3663 3664 nfreed += freexlen; 3665 } 3666 /* 3667 * eof beyond the entry: 3668 * xad 3669 * -------=======---|---> 3670 * eof 3671 */ 3672 else { /* (xoff + xlen < teof) */ 3673 3674 nextindex = index + 1; 3675 } 3676 3677 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3678 if (!log) { /* COMMIT_WAMP */ 3679 xadlock.xdlist = &p->xad[nextindex]; 3680 xadlock.count = 3681 le16_to_cpu(p->header.nextindex) - 3682 nextindex; 3683 txFreeMap(ip, (struct maplock *) & xadlock, 3684 NULL, COMMIT_WMAP); 3685 } 3686 p->header.nextindex = cpu_to_le16(nextindex); 3687 } 3688 3689 XT_PUTPAGE(mp); 3690 3691 /* assert(freed == 0); */ 3692 goto getParent; 3693 } /* end scan of leaf page entries */ 3694 3695 freed = 1; 3696 3697 /* 3698 * leaf page become empty: free the page if type != PMAP 3699 */ 3700 if (log) { /* COMMIT_PWMAP */ 3701 /* txCommit() with tlckFREE: 3702 * free data extents covered by leaf [XTENTRYSTART:hwm); 3703 * invalidate leaf if COMMIT_PWMAP; 3704 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3705 */ 3706 tlck->type = tlckXTREE | tlckFREE; 3707 } else { /* COMMIT_WAMP */ 3708 3709 /* free data extents covered by leaf */ 3710 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3711 xadlock.count = 3712 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3713 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); 3714 } 3715 3716 if (p->header.flag & BT_ROOT) { 3717 p->header.flag &= ~BT_INTERNAL; 3718 p->header.flag |= BT_LEAF; 3719 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3720 3721 XT_PUTPAGE(mp); /* debug */ 3722 goto out; 3723 } else { 3724 if (log) { /* COMMIT_PWMAP */ 3725 /* page will be invalidated at tx completion 3726 */ 3727 XT_PUTPAGE(mp); 3728 } else { /* COMMIT_WMAP */ 3729 3730 if (mp->lid) 3731 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3732 3733 /* invalidate empty leaf page */ 3734 discard_metapage(mp); 3735 } 3736 } 3737 3738 /* 3739 * the leaf page become empty: delete the parent entry 3740 * for the leaf page if the parent page is to be kept 3741 * in the new sized file. 3742 */ 3743 3744 /* 3745 * go back up to the parent page 3746 */ 3747 getParent: 3748 /* pop/restore parent entry for the current child page */ 3749 if ((parent = BT_POP(&btstack)) == NULL) 3750 /* current page must have been root */ 3751 goto out; 3752 3753 /* get back the parent page */ 3754 bn = parent->bn; 3755 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3756 if (rc) 3757 return rc; 3758 3759 index = parent->index; 3760 3761 /* 3762 * child page was not empty: 3763 */ 3764 if (freed == 0) { 3765 /* has any entry deleted from parent ? */ 3766 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3767 /* (re)acquire tlock on the parent page */ 3768 if (log) { /* COMMIT_PWMAP */ 3769 /* txCommit() with tlckTRUNCATE: 3770 * free child extents covered by parent [); 3771 */ 3772 tlck = txLock(tid, ip, mp, tlckXTREE); 3773 xtlck = (struct xtlock *) & tlck->lock; 3774 if (!(tlck->type & tlckTRUNCATE)) { 3775 xtlck->hwm.offset = 3776 le16_to_cpu(p->header. 3777 nextindex) - 1; 3778 tlck->type = 3779 tlckXTREE | tlckTRUNCATE; 3780 } 3781 } else { /* COMMIT_WMAP */ 3782 3783 /* free child extents covered by parent */ 3784 xadlock.xdlist = &p->xad[index + 1]; 3785 xadlock.count = 3786 le16_to_cpu(p->header.nextindex) - 3787 index - 1; 3788 txFreeMap(ip, (struct maplock *) & xadlock, 3789 NULL, COMMIT_WMAP); 3790 } 3791 BT_MARK_DIRTY(mp, ip); 3792 3793 p->header.nextindex = cpu_to_le16(index + 1); 3794 } 3795 XT_PUTPAGE(mp); 3796 goto getParent; 3797 } 3798 3799 /* 3800 * child page was empty: 3801 */ 3802 nfreed += lengthXAD(&p->xad[index]); 3803 3804 /* 3805 * During working map update, child page's tlock must be handled 3806 * before parent's. This is because the parent's tlock will cause 3807 * the child's disk space to be marked available in the wmap, so 3808 * it's important that the child page be released by that time. 3809 * 3810 * ToDo: tlocks should be on doubly-linked list, so we can 3811 * quickly remove it and add it to the end. 3812 */ 3813 3814 /* 3815 * Move parent page's tlock to the end of the tid's tlock list 3816 */ 3817 if (log && mp->lid && (tblk->last != mp->lid) && 3818 lid_to_tlock(mp->lid)->tid) { 3819 lid_t lid = mp->lid; 3820 struct tlock *prev; 3821 3822 tlck = lid_to_tlock(lid); 3823 3824 if (tblk->next == lid) 3825 tblk->next = tlck->next; 3826 else { 3827 for (prev = lid_to_tlock(tblk->next); 3828 prev->next != lid; 3829 prev = lid_to_tlock(prev->next)) { 3830 assert(prev->next); 3831 } 3832 prev->next = tlck->next; 3833 } 3834 lid_to_tlock(tblk->last)->next = lid; 3835 tlck->next = 0; 3836 tblk->last = lid; 3837 } 3838 3839 /* 3840 * parent page become empty: free the page 3841 */ 3842 if (index == XTENTRYSTART) { 3843 if (log) { /* COMMIT_PWMAP */ 3844 /* txCommit() with tlckFREE: 3845 * free child extents covered by parent; 3846 * invalidate parent if COMMIT_PWMAP; 3847 */ 3848 tlck = txLock(tid, ip, mp, tlckXTREE); 3849 xtlck = (struct xtlock *) & tlck->lock; 3850 xtlck->hwm.offset = 3851 le16_to_cpu(p->header.nextindex) - 1; 3852 tlck->type = tlckXTREE | tlckFREE; 3853 } else { /* COMMIT_WMAP */ 3854 3855 /* free child extents covered by parent */ 3856 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3857 xadlock.count = 3858 le16_to_cpu(p->header.nextindex) - 3859 XTENTRYSTART; 3860 txFreeMap(ip, (struct maplock *) & xadlock, NULL, 3861 COMMIT_WMAP); 3862 } 3863 BT_MARK_DIRTY(mp, ip); 3864 3865 if (p->header.flag & BT_ROOT) { 3866 p->header.flag &= ~BT_INTERNAL; 3867 p->header.flag |= BT_LEAF; 3868 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3869 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3870 /* 3871 * Shrink root down to allow inline 3872 * EA (otherwise fsck complains) 3873 */ 3874 p->header.maxentry = 3875 cpu_to_le16(XTROOTINITSLOT); 3876 JFS_IP(ip)->mode2 |= INLINEEA; 3877 } 3878 3879 XT_PUTPAGE(mp); /* debug */ 3880 goto out; 3881 } else { 3882 if (log) { /* COMMIT_PWMAP */ 3883 /* page will be invalidated at tx completion 3884 */ 3885 XT_PUTPAGE(mp); 3886 } else { /* COMMIT_WMAP */ 3887 3888 if (mp->lid) 3889 lid_to_tlock(mp->lid)->flag |= 3890 tlckFREELOCK; 3891 3892 /* invalidate parent page */ 3893 discard_metapage(mp); 3894 } 3895 3896 /* parent has become empty and freed: 3897 * go back up to its parent page 3898 */ 3899 /* freed = 1; */ 3900 goto getParent; 3901 } 3902 } 3903 /* 3904 * parent page still has entries for front region; 3905 */ 3906 else { 3907 /* try truncate region covered by preceding entry 3908 * (process backward) 3909 */ 3910 index--; 3911 3912 /* go back down to the child page corresponding 3913 * to the entry 3914 */ 3915 goto getChild; 3916 } 3917 3918 /* 3919 * internal page: go down to child page of current entry 3920 */ 3921 getChild: 3922 /* save current parent entry for the child page */ 3923 if (BT_STACK_FULL(&btstack)) { 3924 jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); 3925 XT_PUTPAGE(mp); 3926 return -EIO; 3927 } 3928 BT_PUSH(&btstack, bn, index); 3929 3930 /* get child page */ 3931 xad = &p->xad[index]; 3932 bn = addressXAD(xad); 3933 3934 /* 3935 * first access of each internal entry: 3936 */ 3937 /* release parent page */ 3938 XT_PUTPAGE(mp); 3939 3940 /* process the child page */ 3941 goto getPage; 3942 3943 out: 3944 /* 3945 * update file resource stat 3946 */ 3947 /* set size 3948 */ 3949 if (S_ISDIR(ip->i_mode) && !newsize) 3950 ip->i_size = 1; /* fsck hates zero-length directories */ 3951 else 3952 ip->i_size = newsize; 3953 3954 /* update quota allocation to reflect freed blocks */ 3955 DQUOT_FREE_BLOCK(ip, nfreed); 3956 3957 /* 3958 * free tlock of invalidated pages 3959 */ 3960 if (flag == COMMIT_WMAP) 3961 txFreelock(ip); 3962 3963 return newsize; 3964 } 3965 3966 3967 /* 3968 * xtTruncate_pmap() 3969 * 3970 * function: 3971 * Perform truncate to zero lenghth for deleted file, leaving the 3972 * the xtree and working map untouched. This allows the file to 3973 * be accessed via open file handles, while the delete of the file 3974 * is committed to disk. 3975 * 3976 * parameter: 3977 * tid_t tid, 3978 * struct inode *ip, 3979 * s64 committed_size) 3980 * 3981 * return: new committed size 3982 * 3983 * note: 3984 * 3985 * To avoid deadlock by holding too many transaction locks, the 3986 * truncation may be broken up into multiple transactions. 3987 * The committed_size keeps track of part of the file has been 3988 * freed from the pmaps. 3989 */ 3990 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3991 { 3992 s64 bn; 3993 struct btstack btstack; 3994 int cmp; 3995 int index; 3996 int locked_leaves = 0; 3997 struct metapage *mp; 3998 xtpage_t *p; 3999 struct btframe *parent; 4000 int rc; 4001 struct tblock *tblk; 4002 struct tlock *tlck = NULL; 4003 xad_t *xad; 4004 int xlen; 4005 s64 xoff; 4006 struct xtlock *xtlck = NULL; 4007 4008 /* save object truncation type */ 4009 tblk = tid_to_tblock(tid); 4010 tblk->xflag |= COMMIT_PMAP; 4011 4012 /* clear stack */ 4013 BT_CLR(&btstack); 4014 4015 if (committed_size) { 4016 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 4017 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 4018 if (rc) 4019 return rc; 4020 4021 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4022 4023 if (cmp != 0) { 4024 XT_PUTPAGE(mp); 4025 jfs_error(ip->i_sb, 4026 "xtTruncate_pmap: did not find extent"); 4027 return -EIO; 4028 } 4029 } else { 4030 /* 4031 * start with root 4032 * 4033 * root resides in the inode 4034 */ 4035 bn = 0; 4036 4037 /* 4038 * first access of each page: 4039 */ 4040 getPage: 4041 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4042 if (rc) 4043 return rc; 4044 4045 /* process entries backward from last index */ 4046 index = le16_to_cpu(p->header.nextindex) - 1; 4047 4048 if (p->header.flag & BT_INTERNAL) 4049 goto getChild; 4050 } 4051 4052 /* 4053 * leaf page 4054 */ 4055 4056 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 4057 /* 4058 * We need to limit the size of the transaction 4059 * to avoid exhausting pagecache & tlocks 4060 */ 4061 xad = &p->xad[index]; 4062 xoff = offsetXAD(xad); 4063 xlen = lengthXAD(xad); 4064 XT_PUTPAGE(mp); 4065 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 4066 } 4067 tlck = txLock(tid, ip, mp, tlckXTREE); 4068 tlck->type = tlckXTREE | tlckFREE; 4069 xtlck = (struct xtlock *) & tlck->lock; 4070 xtlck->hwm.offset = index; 4071 4072 4073 XT_PUTPAGE(mp); 4074 4075 /* 4076 * go back up to the parent page 4077 */ 4078 getParent: 4079 /* pop/restore parent entry for the current child page */ 4080 if ((parent = BT_POP(&btstack)) == NULL) 4081 /* current page must have been root */ 4082 goto out; 4083 4084 /* get back the parent page */ 4085 bn = parent->bn; 4086 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4087 if (rc) 4088 return rc; 4089 4090 index = parent->index; 4091 4092 /* 4093 * parent page become empty: free the page 4094 */ 4095 if (index == XTENTRYSTART) { 4096 /* txCommit() with tlckFREE: 4097 * free child extents covered by parent; 4098 * invalidate parent if COMMIT_PWMAP; 4099 */ 4100 tlck = txLock(tid, ip, mp, tlckXTREE); 4101 xtlck = (struct xtlock *) & tlck->lock; 4102 xtlck->hwm.offset = 4103 le16_to_cpu(p->header.nextindex) - 1; 4104 tlck->type = tlckXTREE | tlckFREE; 4105 4106 XT_PUTPAGE(mp); 4107 4108 if (p->header.flag & BT_ROOT) { 4109 4110 goto out; 4111 } else { 4112 goto getParent; 4113 } 4114 } 4115 /* 4116 * parent page still has entries for front region; 4117 */ 4118 else 4119 index--; 4120 /* 4121 * internal page: go down to child page of current entry 4122 */ 4123 getChild: 4124 /* save current parent entry for the child page */ 4125 if (BT_STACK_FULL(&btstack)) { 4126 jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); 4127 XT_PUTPAGE(mp); 4128 return -EIO; 4129 } 4130 BT_PUSH(&btstack, bn, index); 4131 4132 /* get child page */ 4133 xad = &p->xad[index]; 4134 bn = addressXAD(xad); 4135 4136 /* 4137 * first access of each internal entry: 4138 */ 4139 /* release parent page */ 4140 XT_PUTPAGE(mp); 4141 4142 /* process the child page */ 4143 goto getPage; 4144 4145 out: 4146 4147 return 0; 4148 } 4149 4150 #ifdef CONFIG_JFS_STATISTICS 4151 int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, 4152 int *eof, void *data) 4153 { 4154 int len = 0; 4155 off_t begin; 4156 4157 len += sprintf(buffer, 4158 "JFS Xtree statistics\n" 4159 "====================\n" 4160 "searches = %d\n" 4161 "fast searches = %d\n" 4162 "splits = %d\n", 4163 xtStat.search, 4164 xtStat.fastSearch, 4165 xtStat.split); 4166 4167 begin = offset; 4168 *start = buffer + begin; 4169 len -= begin; 4170 4171 if (len > length) 4172 len = length; 4173 else 4174 *eof = 1; 4175 4176 if (len < 0) 4177 len = 0; 4178 4179 return len; 4180 } 4181 #endif 4182