1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2005 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 /* 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 20 */ 21 22 #include <linux/fs.h> 23 #include <linux/module.h> 24 #include <linux/quotaops.h> 25 #include <linux/seq_file.h> 26 #include "jfs_incore.h" 27 #include "jfs_filsys.h" 28 #include "jfs_metapage.h" 29 #include "jfs_dmap.h" 30 #include "jfs_dinode.h" 31 #include "jfs_superblock.h" 32 #include "jfs_debug.h" 33 34 /* 35 * xtree local flag 36 */ 37 #define XT_INSERT 0x00000001 38 39 /* 40 * xtree key/entry comparison: extent offset 41 * 42 * return: 43 * -1: k < start of extent 44 * 0: start_of_extent <= k <= end_of_extent 45 * 1: k > end_of_extent 46 */ 47 #define XT_CMP(CMP, K, X, OFFSET64)\ 48 {\ 49 OFFSET64 = offsetXAD(X);\ 50 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 51 ((K) < OFFSET64) ? -1 : 0;\ 52 } 53 54 /* write a xad entry */ 55 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 56 {\ 57 (XAD)->flag = (FLAG);\ 58 XADoffset((XAD), (OFF));\ 59 XADlength((XAD), (LEN));\ 60 XADaddress((XAD), (ADDR));\ 61 } 62 63 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 64 65 /* get page buffer for specified block address */ 66 /* ToDo: Replace this ugly macro with a function */ 67 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 68 {\ 69 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ 70 if (!(RC))\ 71 {\ 72 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ 73 (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ 74 (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ 75 {\ 76 jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ 77 BT_PUTPAGE(MP);\ 78 MP = NULL;\ 79 RC = -EIO;\ 80 }\ 81 }\ 82 } 83 84 /* for consistency */ 85 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 86 87 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 88 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 89 /* xtree entry parameter descriptor */ 90 struct xtsplit { 91 struct metapage *mp; 92 s16 index; 93 u8 flag; 94 s64 off; 95 s64 addr; 96 int len; 97 struct pxdlist *pxdlist; 98 }; 99 100 101 /* 102 * statistics 103 */ 104 #ifdef CONFIG_JFS_STATISTICS 105 static struct { 106 uint search; 107 uint fastSearch; 108 uint split; 109 } xtStat; 110 #endif 111 112 113 /* 114 * forward references 115 */ 116 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, 117 struct btstack * btstack, int flag); 118 119 static int xtSplitUp(tid_t tid, 120 struct inode *ip, 121 struct xtsplit * split, struct btstack * btstack); 122 123 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 124 struct metapage ** rmpp, s64 * rbnp); 125 126 static int xtSplitRoot(tid_t tid, struct inode *ip, 127 struct xtsplit * split, struct metapage ** rmpp); 128 129 #ifdef _STILL_TO_PORT 130 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 131 xtpage_t * fp, struct btstack * btstack); 132 133 static int xtSearchNode(struct inode *ip, 134 xad_t * xad, 135 int *cmpp, struct btstack * btstack, int flag); 136 137 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 138 #endif /* _STILL_TO_PORT */ 139 140 /* 141 * xtLookup() 142 * 143 * function: map a single page into a physical extent; 144 */ 145 int xtLookup(struct inode *ip, s64 lstart, 146 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 147 { 148 int rc = 0; 149 struct btstack btstack; 150 int cmp; 151 s64 bn; 152 struct metapage *mp; 153 xtpage_t *p; 154 int index; 155 xad_t *xad; 156 s64 next, size, xoff, xend; 157 int xlen; 158 s64 xaddr; 159 160 *paddr = 0; 161 *plen = llen; 162 163 if (!no_check) { 164 /* is lookup offset beyond eof ? */ 165 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 166 JFS_SBI(ip->i_sb)->l2bsize; 167 if (lstart >= size) { 168 jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)", 169 (ulong) lstart, (ulong) size); 170 return 0; 171 } 172 } 173 174 /* 175 * search for the xad entry covering the logical extent 176 */ 177 //search: 178 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { 179 jfs_err("xtLookup: xtSearch returned %d", rc); 180 return rc; 181 } 182 183 /* 184 * compute the physical extent covering logical extent 185 * 186 * N.B. search may have failed (e.g., hole in sparse file), 187 * and returned the index of the next entry. 188 */ 189 /* retrieve search result */ 190 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 191 192 /* is xad found covering start of logical extent ? 193 * lstart is a page start address, 194 * i.e., lstart cannot start in a hole; 195 */ 196 if (cmp) { 197 if (next) 198 *plen = min(next - lstart, llen); 199 goto out; 200 } 201 202 /* 203 * lxd covered by xad 204 */ 205 xad = &p->xad[index]; 206 xoff = offsetXAD(xad); 207 xlen = lengthXAD(xad); 208 xend = xoff + xlen; 209 xaddr = addressXAD(xad); 210 211 /* initialize new pxd */ 212 *pflag = xad->flag; 213 *paddr = xaddr + (lstart - xoff); 214 /* a page must be fully covered by an xad */ 215 *plen = min(xend - lstart, llen); 216 217 out: 218 XT_PUTPAGE(mp); 219 220 return rc; 221 } 222 223 224 /* 225 * xtLookupList() 226 * 227 * function: map a single logical extent into a list of physical extent; 228 * 229 * parameter: 230 * struct inode *ip, 231 * struct lxdlist *lxdlist, lxd list (in) 232 * struct xadlist *xadlist, xad list (in/out) 233 * int flag) 234 * 235 * coverage of lxd by xad under assumption of 236 * . lxd's are ordered and disjoint. 237 * . xad's are ordered and disjoint. 238 * 239 * return: 240 * 0: success 241 * 242 * note: a page being written (even a single byte) is backed fully, 243 * except the last page which is only backed with blocks 244 * required to cover the last byte; 245 * the extent backing a page is fully contained within an xad; 246 */ 247 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, 248 struct xadlist * xadlist, int flag) 249 { 250 int rc = 0; 251 struct btstack btstack; 252 int cmp; 253 s64 bn; 254 struct metapage *mp; 255 xtpage_t *p; 256 int index; 257 lxd_t *lxd; 258 xad_t *xad, *pxd; 259 s64 size, lstart, lend, xstart, xend, pstart; 260 s64 llen, xlen, plen; 261 s64 xaddr, paddr; 262 int nlxd, npxd, maxnpxd; 263 264 npxd = xadlist->nxad = 0; 265 maxnpxd = xadlist->maxnxad; 266 pxd = xadlist->xad; 267 268 nlxd = lxdlist->nlxd; 269 lxd = lxdlist->lxd; 270 271 lstart = offsetLXD(lxd); 272 llen = lengthLXD(lxd); 273 lend = lstart + llen; 274 275 size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 276 JFS_SBI(ip->i_sb)->l2bsize; 277 278 /* 279 * search for the xad entry covering the logical extent 280 */ 281 search: 282 if (lstart >= size) 283 return 0; 284 285 if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0))) 286 return rc; 287 288 /* 289 * compute the physical extent covering logical extent 290 * 291 * N.B. search may have failed (e.g., hole in sparse file), 292 * and returned the index of the next entry. 293 */ 294 //map: 295 /* retrieve search result */ 296 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 297 298 /* is xad on the next sibling page ? */ 299 if (index == le16_to_cpu(p->header.nextindex)) { 300 if (p->header.flag & BT_ROOT) 301 goto mapend; 302 303 if ((bn = le64_to_cpu(p->header.next)) == 0) 304 goto mapend; 305 306 XT_PUTPAGE(mp); 307 308 /* get next sibling page */ 309 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 310 if (rc) 311 return rc; 312 313 index = XTENTRYSTART; 314 } 315 316 xad = &p->xad[index]; 317 318 /* 319 * is lxd covered by xad ? 320 */ 321 compare: 322 xstart = offsetXAD(xad); 323 xlen = lengthXAD(xad); 324 xend = xstart + xlen; 325 xaddr = addressXAD(xad); 326 327 compare1: 328 if (xstart < lstart) 329 goto compare2; 330 331 /* (lstart <= xstart) */ 332 333 /* lxd is NOT covered by xad */ 334 if (lend <= xstart) { 335 /* 336 * get next lxd 337 */ 338 if (--nlxd == 0) 339 goto mapend; 340 lxd++; 341 342 lstart = offsetLXD(lxd); 343 llen = lengthLXD(lxd); 344 lend = lstart + llen; 345 if (lstart >= size) 346 goto mapend; 347 348 /* compare with the current xad */ 349 goto compare1; 350 } 351 /* lxd is covered by xad */ 352 else { /* (xstart < lend) */ 353 354 /* initialize new pxd */ 355 pstart = xstart; 356 plen = min(lend - xstart, xlen); 357 paddr = xaddr; 358 359 goto cover; 360 } 361 362 /* (xstart < lstart) */ 363 compare2: 364 /* lxd is covered by xad */ 365 if (lstart < xend) { 366 /* initialize new pxd */ 367 pstart = lstart; 368 plen = min(xend - lstart, llen); 369 paddr = xaddr + (lstart - xstart); 370 371 goto cover; 372 } 373 /* lxd is NOT covered by xad */ 374 else { /* (xend <= lstart) */ 375 376 /* 377 * get next xad 378 * 379 * linear search next xad covering lxd on 380 * the current xad page, and then tree search 381 */ 382 if (index == le16_to_cpu(p->header.nextindex) - 1) { 383 if (p->header.flag & BT_ROOT) 384 goto mapend; 385 386 XT_PUTPAGE(mp); 387 goto search; 388 } else { 389 index++; 390 xad++; 391 392 /* compare with new xad */ 393 goto compare; 394 } 395 } 396 397 /* 398 * lxd is covered by xad and a new pxd has been initialized 399 * (lstart <= xstart < lend) or (xstart < lstart < xend) 400 */ 401 cover: 402 /* finalize pxd corresponding to current xad */ 403 XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); 404 405 if (++npxd >= maxnpxd) 406 goto mapend; 407 pxd++; 408 409 /* 410 * lxd is fully covered by xad 411 */ 412 if (lend <= xend) { 413 /* 414 * get next lxd 415 */ 416 if (--nlxd == 0) 417 goto mapend; 418 lxd++; 419 420 lstart = offsetLXD(lxd); 421 llen = lengthLXD(lxd); 422 lend = lstart + llen; 423 if (lstart >= size) 424 goto mapend; 425 426 /* 427 * test for old xad covering new lxd 428 * (old xstart < new lstart) 429 */ 430 goto compare2; 431 } 432 /* 433 * lxd is partially covered by xad 434 */ 435 else { /* (xend < lend) */ 436 437 /* 438 * get next xad 439 * 440 * linear search next xad covering lxd on 441 * the current xad page, and then next xad page search 442 */ 443 if (index == le16_to_cpu(p->header.nextindex) - 1) { 444 if (p->header.flag & BT_ROOT) 445 goto mapend; 446 447 if ((bn = le64_to_cpu(p->header.next)) == 0) 448 goto mapend; 449 450 XT_PUTPAGE(mp); 451 452 /* get next sibling page */ 453 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 454 if (rc) 455 return rc; 456 457 index = XTENTRYSTART; 458 xad = &p->xad[index]; 459 } else { 460 index++; 461 xad++; 462 } 463 464 /* 465 * test for new xad covering old lxd 466 * (old lstart < new xstart) 467 */ 468 goto compare; 469 } 470 471 mapend: 472 xadlist->nxad = npxd; 473 474 //out: 475 XT_PUTPAGE(mp); 476 477 return rc; 478 } 479 480 481 /* 482 * xtSearch() 483 * 484 * function: search for the xad entry covering specified offset. 485 * 486 * parameters: 487 * ip - file object; 488 * xoff - extent offset; 489 * nextp - address of next extent (if any) for search miss 490 * cmpp - comparison result: 491 * btstack - traverse stack; 492 * flag - search process flag (XT_INSERT); 493 * 494 * returns: 495 * btstack contains (bn, index) of search path traversed to the entry. 496 * *cmpp is set to result of comparison with the entry returned. 497 * the page containing the entry is pinned at exit. 498 */ 499 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 500 int *cmpp, struct btstack * btstack, int flag) 501 { 502 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 503 int rc = 0; 504 int cmp = 1; /* init for empty page */ 505 s64 bn; /* block number */ 506 struct metapage *mp; /* page buffer */ 507 xtpage_t *p; /* page */ 508 xad_t *xad; 509 int base, index, lim, btindex; 510 struct btframe *btsp; 511 int nsplit = 0; /* number of pages to split */ 512 s64 t64; 513 s64 next = 0; 514 515 INCREMENT(xtStat.search); 516 517 BT_CLR(btstack); 518 519 btstack->nsplit = 0; 520 521 /* 522 * search down tree from root: 523 * 524 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 525 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 526 * 527 * if entry with search key K is not found 528 * internal page search find the entry with largest key Ki 529 * less than K which point to the child page to search; 530 * leaf page search find the entry with smallest key Kj 531 * greater than K so that the returned index is the position of 532 * the entry to be shifted right for insertion of new entry. 533 * for empty tree, search key is greater than any key of the tree. 534 * 535 * by convention, root bn = 0. 536 */ 537 for (bn = 0;;) { 538 /* get/pin the page to search */ 539 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 540 if (rc) 541 return rc; 542 543 /* try sequential access heuristics with the previous 544 * access entry in target leaf page: 545 * once search narrowed down into the target leaf, 546 * key must either match an entry in the leaf or 547 * key entry does not exist in the tree; 548 */ 549 //fastSearch: 550 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 551 (p->header.flag & BT_LEAF) && 552 (index = jfs_ip->btindex) < 553 le16_to_cpu(p->header.nextindex)) { 554 xad = &p->xad[index]; 555 t64 = offsetXAD(xad); 556 if (xoff < t64 + lengthXAD(xad)) { 557 if (xoff >= t64) { 558 *cmpp = 0; 559 goto out; 560 } 561 562 /* stop sequential access heuristics */ 563 goto binarySearch; 564 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 565 566 /* try next sequential entry */ 567 index++; 568 if (index < 569 le16_to_cpu(p->header.nextindex)) { 570 xad++; 571 t64 = offsetXAD(xad); 572 if (xoff < t64 + lengthXAD(xad)) { 573 if (xoff >= t64) { 574 *cmpp = 0; 575 goto out; 576 } 577 578 /* miss: key falls between 579 * previous and this entry 580 */ 581 *cmpp = 1; 582 next = t64; 583 goto out; 584 } 585 586 /* (xoff >= t64 + lengthXAD(xad)); 587 * matching entry may be further out: 588 * stop heuristic search 589 */ 590 /* stop sequential access heuristics */ 591 goto binarySearch; 592 } 593 594 /* (index == p->header.nextindex); 595 * miss: key entry does not exist in 596 * the target leaf/tree 597 */ 598 *cmpp = 1; 599 goto out; 600 } 601 602 /* 603 * if hit, return index of the entry found, and 604 * if miss, where new entry with search key is 605 * to be inserted; 606 */ 607 out: 608 /* compute number of pages to split */ 609 if (flag & XT_INSERT) { 610 if (p->header.nextindex == /* little-endian */ 611 p->header.maxentry) 612 nsplit++; 613 else 614 nsplit = 0; 615 btstack->nsplit = nsplit; 616 } 617 618 /* save search result */ 619 btsp = btstack->top; 620 btsp->bn = bn; 621 btsp->index = index; 622 btsp->mp = mp; 623 624 /* update sequential access heuristics */ 625 jfs_ip->btindex = index; 626 627 if (nextp) 628 *nextp = next; 629 630 INCREMENT(xtStat.fastSearch); 631 return 0; 632 } 633 634 /* well, ... full search now */ 635 binarySearch: 636 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 637 638 /* 639 * binary search with search key K on the current page 640 */ 641 for (base = XTENTRYSTART; lim; lim >>= 1) { 642 index = base + (lim >> 1); 643 644 XT_CMP(cmp, xoff, &p->xad[index], t64); 645 if (cmp == 0) { 646 /* 647 * search hit 648 */ 649 /* search hit - leaf page: 650 * return the entry found 651 */ 652 if (p->header.flag & BT_LEAF) { 653 *cmpp = cmp; 654 655 /* compute number of pages to split */ 656 if (flag & XT_INSERT) { 657 if (p->header.nextindex == 658 p->header.maxentry) 659 nsplit++; 660 else 661 nsplit = 0; 662 btstack->nsplit = nsplit; 663 } 664 665 /* save search result */ 666 btsp = btstack->top; 667 btsp->bn = bn; 668 btsp->index = index; 669 btsp->mp = mp; 670 671 /* init sequential access heuristics */ 672 btindex = jfs_ip->btindex; 673 if (index == btindex || 674 index == btindex + 1) 675 jfs_ip->btorder = BT_SEQUENTIAL; 676 else 677 jfs_ip->btorder = BT_RANDOM; 678 jfs_ip->btindex = index; 679 680 return 0; 681 } 682 /* search hit - internal page: 683 * descend/search its child page 684 */ 685 if (index < le16_to_cpu(p->header.nextindex)-1) 686 next = offsetXAD(&p->xad[index + 1]); 687 goto next; 688 } 689 690 if (cmp > 0) { 691 base = index + 1; 692 --lim; 693 } 694 } 695 696 /* 697 * search miss 698 * 699 * base is the smallest index with key (Kj) greater than 700 * search key (K) and may be zero or maxentry index. 701 */ 702 if (base < le16_to_cpu(p->header.nextindex)) 703 next = offsetXAD(&p->xad[base]); 704 /* 705 * search miss - leaf page: 706 * 707 * return location of entry (base) where new entry with 708 * search key K is to be inserted. 709 */ 710 if (p->header.flag & BT_LEAF) { 711 *cmpp = cmp; 712 713 /* compute number of pages to split */ 714 if (flag & XT_INSERT) { 715 if (p->header.nextindex == 716 p->header.maxentry) 717 nsplit++; 718 else 719 nsplit = 0; 720 btstack->nsplit = nsplit; 721 } 722 723 /* save search result */ 724 btsp = btstack->top; 725 btsp->bn = bn; 726 btsp->index = base; 727 btsp->mp = mp; 728 729 /* init sequential access heuristics */ 730 btindex = jfs_ip->btindex; 731 if (base == btindex || base == btindex + 1) 732 jfs_ip->btorder = BT_SEQUENTIAL; 733 else 734 jfs_ip->btorder = BT_RANDOM; 735 jfs_ip->btindex = base; 736 737 if (nextp) 738 *nextp = next; 739 740 return 0; 741 } 742 743 /* 744 * search miss - non-leaf page: 745 * 746 * if base is non-zero, decrement base by one to get the parent 747 * entry of the child page to search. 748 */ 749 index = base ? base - 1 : base; 750 751 /* 752 * go down to child page 753 */ 754 next: 755 /* update number of pages to split */ 756 if (p->header.nextindex == p->header.maxentry) 757 nsplit++; 758 else 759 nsplit = 0; 760 761 /* push (bn, index) of the parent page/entry */ 762 if (BT_STACK_FULL(btstack)) { 763 jfs_error(ip->i_sb, "stack overrun in xtSearch!"); 764 XT_PUTPAGE(mp); 765 return -EIO; 766 } 767 BT_PUSH(btstack, bn, index); 768 769 /* get the child page block number */ 770 bn = addressXAD(&p->xad[index]); 771 772 /* unpin the parent page */ 773 XT_PUTPAGE(mp); 774 } 775 } 776 777 /* 778 * xtInsert() 779 * 780 * function: 781 * 782 * parameter: 783 * tid - transaction id; 784 * ip - file object; 785 * xflag - extent flag (XAD_NOTRECORDED): 786 * xoff - extent offset; 787 * xlen - extent length; 788 * xaddrp - extent address pointer (in/out): 789 * if (*xaddrp) 790 * caller allocated data extent at *xaddrp; 791 * else 792 * allocate data extent and return its xaddr; 793 * flag - 794 * 795 * return: 796 */ 797 int xtInsert(tid_t tid, /* transaction id */ 798 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 799 int flag) 800 { 801 int rc = 0; 802 s64 xaddr, hint; 803 struct metapage *mp; /* meta-page buffer */ 804 xtpage_t *p; /* base B+-tree index page */ 805 s64 bn; 806 int index, nextindex; 807 struct btstack btstack; /* traverse stack */ 808 struct xtsplit split; /* split information */ 809 xad_t *xad; 810 int cmp; 811 s64 next; 812 struct tlock *tlck; 813 struct xtlock *xtlck; 814 815 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 816 817 /* 818 * search for the entry location at which to insert: 819 * 820 * xtFastSearch() and xtSearch() both returns (leaf page 821 * pinned, index at which to insert). 822 * n.b. xtSearch() may return index of maxentry of 823 * the full page. 824 */ 825 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 826 return rc; 827 828 /* retrieve search result */ 829 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 830 831 /* This test must follow XT_GETSEARCH since mp must be valid if 832 * we branch to out: */ 833 if ((cmp == 0) || (next && (xlen > next - xoff))) { 834 rc = -EEXIST; 835 goto out; 836 } 837 838 /* 839 * allocate data extent requested 840 * 841 * allocation hint: last xad 842 */ 843 if ((xaddr = *xaddrp) == 0) { 844 if (index > XTENTRYSTART) { 845 xad = &p->xad[index - 1]; 846 hint = addressXAD(xad) + lengthXAD(xad) - 1; 847 } else 848 hint = 0; 849 if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) 850 goto out; 851 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 852 DQUOT_FREE_BLOCK(ip, xlen); 853 goto out; 854 } 855 } 856 857 /* 858 * insert entry for new extent 859 */ 860 xflag |= XAD_NEW; 861 862 /* 863 * if the leaf page is full, split the page and 864 * propagate up the router entry for the new page from split 865 * 866 * The xtSplitUp() will insert the entry and unpin the leaf page. 867 */ 868 nextindex = le16_to_cpu(p->header.nextindex); 869 if (nextindex == le16_to_cpu(p->header.maxentry)) { 870 split.mp = mp; 871 split.index = index; 872 split.flag = xflag; 873 split.off = xoff; 874 split.len = xlen; 875 split.addr = xaddr; 876 split.pxdlist = NULL; 877 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 878 /* undo data extent allocation */ 879 if (*xaddrp == 0) { 880 dbFree(ip, xaddr, (s64) xlen); 881 DQUOT_FREE_BLOCK(ip, xlen); 882 } 883 return rc; 884 } 885 886 *xaddrp = xaddr; 887 return 0; 888 } 889 890 /* 891 * insert the new entry into the leaf page 892 */ 893 /* 894 * acquire a transaction lock on the leaf page; 895 * 896 * action: xad insertion/extension; 897 */ 898 BT_MARK_DIRTY(mp, ip); 899 900 /* if insert into middle, shift right remaining entries. */ 901 if (index < nextindex) 902 memmove(&p->xad[index + 1], &p->xad[index], 903 (nextindex - index) * sizeof(xad_t)); 904 905 /* insert the new entry: mark the entry NEW */ 906 xad = &p->xad[index]; 907 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 908 909 /* advance next available entry index */ 910 le16_add_cpu(&p->header.nextindex, 1); 911 912 /* Don't log it if there are no links to the file */ 913 if (!test_cflag(COMMIT_Nolink, ip)) { 914 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 915 xtlck = (struct xtlock *) & tlck->lock; 916 xtlck->lwm.offset = 917 (xtlck->lwm.offset) ? min(index, 918 (int)xtlck->lwm.offset) : index; 919 xtlck->lwm.length = 920 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 921 } 922 923 *xaddrp = xaddr; 924 925 out: 926 /* unpin the leaf page */ 927 XT_PUTPAGE(mp); 928 929 return rc; 930 } 931 932 933 /* 934 * xtSplitUp() 935 * 936 * function: 937 * split full pages as propagating insertion up the tree 938 * 939 * parameter: 940 * tid - transaction id; 941 * ip - file object; 942 * split - entry parameter descriptor; 943 * btstack - traverse stack from xtSearch() 944 * 945 * return: 946 */ 947 static int 948 xtSplitUp(tid_t tid, 949 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 950 { 951 int rc = 0; 952 struct metapage *smp; 953 xtpage_t *sp; /* split page */ 954 struct metapage *rmp; 955 s64 rbn; /* new right page block number */ 956 struct metapage *rcmp; 957 xtpage_t *rcp; /* right child page */ 958 s64 rcbn; /* right child page block number */ 959 int skip; /* index of entry of insertion */ 960 int nextindex; /* next available entry index of p */ 961 struct btframe *parent; /* parent page entry on traverse stack */ 962 xad_t *xad; 963 s64 xaddr; 964 int xlen; 965 int nsplit; /* number of pages split */ 966 struct pxdlist pxdlist; 967 pxd_t *pxd; 968 struct tlock *tlck; 969 struct xtlock *xtlck; 970 971 smp = split->mp; 972 sp = XT_PAGE(ip, smp); 973 974 /* is inode xtree root extension/inline EA area free ? */ 975 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 976 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && 977 (JFS_IP(ip)->mode2 & INLINEEA)) { 978 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 979 JFS_IP(ip)->mode2 &= ~INLINEEA; 980 981 BT_MARK_DIRTY(smp, ip); 982 /* 983 * acquire a transaction lock on the leaf page; 984 * 985 * action: xad insertion/extension; 986 */ 987 988 /* if insert into middle, shift right remaining entries. */ 989 skip = split->index; 990 nextindex = le16_to_cpu(sp->header.nextindex); 991 if (skip < nextindex) 992 memmove(&sp->xad[skip + 1], &sp->xad[skip], 993 (nextindex - skip) * sizeof(xad_t)); 994 995 /* insert the new entry: mark the entry NEW */ 996 xad = &sp->xad[skip]; 997 XT_PUTENTRY(xad, split->flag, split->off, split->len, 998 split->addr); 999 1000 /* advance next available entry index */ 1001 le16_add_cpu(&sp->header.nextindex, 1); 1002 1003 /* Don't log it if there are no links to the file */ 1004 if (!test_cflag(COMMIT_Nolink, ip)) { 1005 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1006 xtlck = (struct xtlock *) & tlck->lock; 1007 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1008 min(skip, (int)xtlck->lwm.offset) : skip; 1009 xtlck->lwm.length = 1010 le16_to_cpu(sp->header.nextindex) - 1011 xtlck->lwm.offset; 1012 } 1013 1014 return 0; 1015 } 1016 1017 /* 1018 * allocate new index blocks to cover index page split(s) 1019 * 1020 * allocation hint: ? 1021 */ 1022 if (split->pxdlist == NULL) { 1023 nsplit = btstack->nsplit; 1024 split->pxdlist = &pxdlist; 1025 pxdlist.maxnpxd = pxdlist.npxd = 0; 1026 pxd = &pxdlist.pxd[0]; 1027 xlen = JFS_SBI(ip->i_sb)->nbperpage; 1028 for (; nsplit > 0; nsplit--, pxd++) { 1029 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 1030 == 0) { 1031 PXDaddress(pxd, xaddr); 1032 PXDlength(pxd, xlen); 1033 1034 pxdlist.maxnpxd++; 1035 1036 continue; 1037 } 1038 1039 /* undo allocation */ 1040 1041 XT_PUTPAGE(smp); 1042 return rc; 1043 } 1044 } 1045 1046 /* 1047 * Split leaf page <sp> into <sp> and a new right page <rp>. 1048 * 1049 * The split routines insert the new entry into the leaf page, 1050 * and acquire txLock as appropriate. 1051 * return <rp> pinned and its block number <rpbn>. 1052 */ 1053 rc = (sp->header.flag & BT_ROOT) ? 1054 xtSplitRoot(tid, ip, split, &rmp) : 1055 xtSplitPage(tid, ip, split, &rmp, &rbn); 1056 1057 XT_PUTPAGE(smp); 1058 1059 if (rc) 1060 return -EIO; 1061 /* 1062 * propagate up the router entry for the leaf page just split 1063 * 1064 * insert a router entry for the new page into the parent page, 1065 * propagate the insert/split up the tree by walking back the stack 1066 * of (bn of parent page, index of child page entry in parent page) 1067 * that were traversed during the search for the page that split. 1068 * 1069 * the propagation of insert/split up the tree stops if the root 1070 * splits or the page inserted into doesn't have to split to hold 1071 * the new entry. 1072 * 1073 * the parent entry for the split page remains the same, and 1074 * a new entry is inserted at its right with the first key and 1075 * block number of the new right page. 1076 * 1077 * There are a maximum of 3 pages pinned at any time: 1078 * right child, left parent and right parent (when the parent splits) 1079 * to keep the child page pinned while working on the parent. 1080 * make sure that all pins are released at exit. 1081 */ 1082 while ((parent = BT_POP(btstack)) != NULL) { 1083 /* parent page specified by stack frame <parent> */ 1084 1085 /* keep current child pages <rcp> pinned */ 1086 rcmp = rmp; 1087 rcbn = rbn; 1088 rcp = XT_PAGE(ip, rcmp); 1089 1090 /* 1091 * insert router entry in parent for new right child page <rp> 1092 */ 1093 /* get/pin the parent page <sp> */ 1094 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1095 if (rc) { 1096 XT_PUTPAGE(rcmp); 1097 return rc; 1098 } 1099 1100 /* 1101 * The new key entry goes ONE AFTER the index of parent entry, 1102 * because the split was to the right. 1103 */ 1104 skip = parent->index + 1; 1105 1106 /* 1107 * split or shift right remaining entries of the parent page 1108 */ 1109 nextindex = le16_to_cpu(sp->header.nextindex); 1110 /* 1111 * parent page is full - split the parent page 1112 */ 1113 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 1114 /* init for parent page split */ 1115 split->mp = smp; 1116 split->index = skip; /* index at insert */ 1117 split->flag = XAD_NEW; 1118 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 1119 split->len = JFS_SBI(ip->i_sb)->nbperpage; 1120 split->addr = rcbn; 1121 1122 /* unpin previous right child page */ 1123 XT_PUTPAGE(rcmp); 1124 1125 /* The split routines insert the new entry, 1126 * and acquire txLock as appropriate. 1127 * return <rp> pinned and its block number <rpbn>. 1128 */ 1129 rc = (sp->header.flag & BT_ROOT) ? 1130 xtSplitRoot(tid, ip, split, &rmp) : 1131 xtSplitPage(tid, ip, split, &rmp, &rbn); 1132 if (rc) { 1133 XT_PUTPAGE(smp); 1134 return rc; 1135 } 1136 1137 XT_PUTPAGE(smp); 1138 /* keep new child page <rp> pinned */ 1139 } 1140 /* 1141 * parent page is not full - insert in parent page 1142 */ 1143 else { 1144 /* 1145 * insert router entry in parent for the right child 1146 * page from the first entry of the right child page: 1147 */ 1148 /* 1149 * acquire a transaction lock on the parent page; 1150 * 1151 * action: router xad insertion; 1152 */ 1153 BT_MARK_DIRTY(smp, ip); 1154 1155 /* 1156 * if insert into middle, shift right remaining entries 1157 */ 1158 if (skip < nextindex) 1159 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1160 (nextindex - 1161 skip) << L2XTSLOTSIZE); 1162 1163 /* insert the router entry */ 1164 xad = &sp->xad[skip]; 1165 XT_PUTENTRY(xad, XAD_NEW, 1166 offsetXAD(&rcp->xad[XTENTRYSTART]), 1167 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 1168 1169 /* advance next available entry index. */ 1170 le16_add_cpu(&sp->header.nextindex, 1); 1171 1172 /* Don't log it if there are no links to the file */ 1173 if (!test_cflag(COMMIT_Nolink, ip)) { 1174 tlck = txLock(tid, ip, smp, 1175 tlckXTREE | tlckGROW); 1176 xtlck = (struct xtlock *) & tlck->lock; 1177 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1178 min(skip, (int)xtlck->lwm.offset) : skip; 1179 xtlck->lwm.length = 1180 le16_to_cpu(sp->header.nextindex) - 1181 xtlck->lwm.offset; 1182 } 1183 1184 /* unpin parent page */ 1185 XT_PUTPAGE(smp); 1186 1187 /* exit propagate up */ 1188 break; 1189 } 1190 } 1191 1192 /* unpin current right page */ 1193 XT_PUTPAGE(rmp); 1194 1195 return 0; 1196 } 1197 1198 1199 /* 1200 * xtSplitPage() 1201 * 1202 * function: 1203 * split a full non-root page into 1204 * original/split/left page and new right page 1205 * i.e., the original/split page remains as left page. 1206 * 1207 * parameter: 1208 * int tid, 1209 * struct inode *ip, 1210 * struct xtsplit *split, 1211 * struct metapage **rmpp, 1212 * u64 *rbnp, 1213 * 1214 * return: 1215 * Pointer to page in which to insert or NULL on error. 1216 */ 1217 static int 1218 xtSplitPage(tid_t tid, struct inode *ip, 1219 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 1220 { 1221 int rc = 0; 1222 struct metapage *smp; 1223 xtpage_t *sp; 1224 struct metapage *rmp; 1225 xtpage_t *rp; /* new right page allocated */ 1226 s64 rbn; /* new right page block number */ 1227 struct metapage *mp; 1228 xtpage_t *p; 1229 s64 nextbn; 1230 int skip, maxentry, middle, righthalf, n; 1231 xad_t *xad; 1232 struct pxdlist *pxdlist; 1233 pxd_t *pxd; 1234 struct tlock *tlck; 1235 struct xtlock *sxtlck = NULL, *rxtlck = NULL; 1236 int quota_allocation = 0; 1237 1238 smp = split->mp; 1239 sp = XT_PAGE(ip, smp); 1240 1241 INCREMENT(xtStat.split); 1242 1243 pxdlist = split->pxdlist; 1244 pxd = &pxdlist->pxd[pxdlist->npxd]; 1245 pxdlist->npxd++; 1246 rbn = addressPXD(pxd); 1247 1248 /* Allocate blocks to quota. */ 1249 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1250 rc = -EDQUOT; 1251 goto clean_up; 1252 } 1253 1254 quota_allocation += lengthPXD(pxd); 1255 1256 /* 1257 * allocate the new right page for the split 1258 */ 1259 rmp = get_metapage(ip, rbn, PSIZE, 1); 1260 if (rmp == NULL) { 1261 rc = -EIO; 1262 goto clean_up; 1263 } 1264 1265 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1266 1267 BT_MARK_DIRTY(rmp, ip); 1268 /* 1269 * action: new page; 1270 */ 1271 1272 rp = (xtpage_t *) rmp->data; 1273 rp->header.self = *pxd; 1274 rp->header.flag = sp->header.flag & BT_TYPE; 1275 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1276 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1277 1278 BT_MARK_DIRTY(smp, ip); 1279 /* Don't log it if there are no links to the file */ 1280 if (!test_cflag(COMMIT_Nolink, ip)) { 1281 /* 1282 * acquire a transaction lock on the new right page; 1283 */ 1284 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1285 rxtlck = (struct xtlock *) & tlck->lock; 1286 rxtlck->lwm.offset = XTENTRYSTART; 1287 /* 1288 * acquire a transaction lock on the split page 1289 */ 1290 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1291 sxtlck = (struct xtlock *) & tlck->lock; 1292 } 1293 1294 /* 1295 * initialize/update sibling pointers of <sp> and <rp> 1296 */ 1297 nextbn = le64_to_cpu(sp->header.next); 1298 rp->header.next = cpu_to_le64(nextbn); 1299 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1300 sp->header.next = cpu_to_le64(rbn); 1301 1302 skip = split->index; 1303 1304 /* 1305 * sequential append at tail (after last entry of last page) 1306 * 1307 * if splitting the last page on a level because of appending 1308 * a entry to it (skip is maxentry), it's likely that the access is 1309 * sequential. adding an empty page on the side of the level is less 1310 * work and can push the fill factor much higher than normal. 1311 * if we're wrong it's no big deal - we will do the split the right 1312 * way next time. 1313 * (it may look like it's equally easy to do a similar hack for 1314 * reverse sorted data, that is, split the tree left, but it's not. 1315 * Be my guest.) 1316 */ 1317 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1318 /* 1319 * acquire a transaction lock on the new/right page; 1320 * 1321 * action: xad insertion; 1322 */ 1323 /* insert entry at the first entry of the new right page */ 1324 xad = &rp->xad[XTENTRYSTART]; 1325 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1326 split->addr); 1327 1328 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1329 1330 if (!test_cflag(COMMIT_Nolink, ip)) { 1331 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1332 rxtlck->lwm.length = 1; 1333 } 1334 1335 *rmpp = rmp; 1336 *rbnp = rbn; 1337 1338 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1339 return 0; 1340 } 1341 1342 /* 1343 * non-sequential insert (at possibly middle page) 1344 */ 1345 1346 /* 1347 * update previous pointer of old next/right page of <sp> 1348 */ 1349 if (nextbn != 0) { 1350 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1351 if (rc) { 1352 XT_PUTPAGE(rmp); 1353 goto clean_up; 1354 } 1355 1356 BT_MARK_DIRTY(mp, ip); 1357 /* 1358 * acquire a transaction lock on the next page; 1359 * 1360 * action:sibling pointer update; 1361 */ 1362 if (!test_cflag(COMMIT_Nolink, ip)) 1363 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1364 1365 p->header.prev = cpu_to_le64(rbn); 1366 1367 /* sibling page may have been updated previously, or 1368 * it may be updated later; 1369 */ 1370 1371 XT_PUTPAGE(mp); 1372 } 1373 1374 /* 1375 * split the data between the split and new/right pages 1376 */ 1377 maxentry = le16_to_cpu(sp->header.maxentry); 1378 middle = maxentry >> 1; 1379 righthalf = maxentry - middle; 1380 1381 /* 1382 * skip index in old split/left page - insert into left page: 1383 */ 1384 if (skip <= middle) { 1385 /* move right half of split page to the new right page */ 1386 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1387 righthalf << L2XTSLOTSIZE); 1388 1389 /* shift right tail of left half to make room for new entry */ 1390 if (skip < middle) 1391 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1392 (middle - skip) << L2XTSLOTSIZE); 1393 1394 /* insert new entry */ 1395 xad = &sp->xad[skip]; 1396 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1397 split->addr); 1398 1399 /* update page header */ 1400 sp->header.nextindex = cpu_to_le16(middle + 1); 1401 if (!test_cflag(COMMIT_Nolink, ip)) { 1402 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1403 min(skip, (int)sxtlck->lwm.offset) : skip; 1404 } 1405 1406 rp->header.nextindex = 1407 cpu_to_le16(XTENTRYSTART + righthalf); 1408 } 1409 /* 1410 * skip index in new right page - insert into right page: 1411 */ 1412 else { 1413 /* move left head of right half to right page */ 1414 n = skip - middle; 1415 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1416 n << L2XTSLOTSIZE); 1417 1418 /* insert new entry */ 1419 n += XTENTRYSTART; 1420 xad = &rp->xad[n]; 1421 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1422 split->addr); 1423 1424 /* move right tail of right half to right page */ 1425 if (skip < maxentry) 1426 memmove(&rp->xad[n + 1], &sp->xad[skip], 1427 (maxentry - skip) << L2XTSLOTSIZE); 1428 1429 /* update page header */ 1430 sp->header.nextindex = cpu_to_le16(middle); 1431 if (!test_cflag(COMMIT_Nolink, ip)) { 1432 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1433 min(middle, (int)sxtlck->lwm.offset) : middle; 1434 } 1435 1436 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1437 righthalf + 1); 1438 } 1439 1440 if (!test_cflag(COMMIT_Nolink, ip)) { 1441 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1442 sxtlck->lwm.offset; 1443 1444 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1445 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1446 XTENTRYSTART; 1447 } 1448 1449 *rmpp = rmp; 1450 *rbnp = rbn; 1451 1452 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1453 return rc; 1454 1455 clean_up: 1456 1457 /* Rollback quota allocation. */ 1458 if (quota_allocation) 1459 DQUOT_FREE_BLOCK(ip, quota_allocation); 1460 1461 return (rc); 1462 } 1463 1464 1465 /* 1466 * xtSplitRoot() 1467 * 1468 * function: 1469 * split the full root page into original/root/split page and new 1470 * right page 1471 * i.e., root remains fixed in tree anchor (inode) and the root is 1472 * copied to a single new right child page since root page << 1473 * non-root page, and the split root page contains a single entry 1474 * for the new right child page. 1475 * 1476 * parameter: 1477 * int tid, 1478 * struct inode *ip, 1479 * struct xtsplit *split, 1480 * struct metapage **rmpp) 1481 * 1482 * return: 1483 * Pointer to page in which to insert or NULL on error. 1484 */ 1485 static int 1486 xtSplitRoot(tid_t tid, 1487 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1488 { 1489 xtpage_t *sp; 1490 struct metapage *rmp; 1491 xtpage_t *rp; 1492 s64 rbn; 1493 int skip, nextindex; 1494 xad_t *xad; 1495 pxd_t *pxd; 1496 struct pxdlist *pxdlist; 1497 struct tlock *tlck; 1498 struct xtlock *xtlck; 1499 1500 sp = &JFS_IP(ip)->i_xtroot; 1501 1502 INCREMENT(xtStat.split); 1503 1504 /* 1505 * allocate a single (right) child page 1506 */ 1507 pxdlist = split->pxdlist; 1508 pxd = &pxdlist->pxd[pxdlist->npxd]; 1509 pxdlist->npxd++; 1510 rbn = addressPXD(pxd); 1511 rmp = get_metapage(ip, rbn, PSIZE, 1); 1512 if (rmp == NULL) 1513 return -EIO; 1514 1515 /* Allocate blocks to quota. */ 1516 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1517 release_metapage(rmp); 1518 return -EDQUOT; 1519 } 1520 1521 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1522 1523 /* 1524 * acquire a transaction lock on the new right page; 1525 * 1526 * action: new page; 1527 */ 1528 BT_MARK_DIRTY(rmp, ip); 1529 1530 rp = (xtpage_t *) rmp->data; 1531 rp->header.flag = 1532 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1533 rp->header.self = *pxd; 1534 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1535 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1536 1537 /* initialize sibling pointers */ 1538 rp->header.next = 0; 1539 rp->header.prev = 0; 1540 1541 /* 1542 * copy the in-line root page into new right page extent 1543 */ 1544 nextindex = le16_to_cpu(sp->header.maxentry); 1545 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1546 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1547 1548 /* 1549 * insert the new entry into the new right/child page 1550 * (skip index in the new right page will not change) 1551 */ 1552 skip = split->index; 1553 /* if insert into middle, shift right remaining entries */ 1554 if (skip != nextindex) 1555 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1556 (nextindex - skip) * sizeof(xad_t)); 1557 1558 xad = &rp->xad[skip]; 1559 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1560 1561 /* update page header */ 1562 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1563 1564 if (!test_cflag(COMMIT_Nolink, ip)) { 1565 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1566 xtlck = (struct xtlock *) & tlck->lock; 1567 xtlck->lwm.offset = XTENTRYSTART; 1568 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1569 XTENTRYSTART; 1570 } 1571 1572 /* 1573 * reset the root 1574 * 1575 * init root with the single entry for the new right page 1576 * set the 1st entry offset to 0, which force the left-most key 1577 * at any level of the tree to be less than any search key. 1578 */ 1579 /* 1580 * acquire a transaction lock on the root page (in-memory inode); 1581 * 1582 * action: root split; 1583 */ 1584 BT_MARK_DIRTY(split->mp, ip); 1585 1586 xad = &sp->xad[XTENTRYSTART]; 1587 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1588 1589 /* update page header of root */ 1590 sp->header.flag &= ~BT_LEAF; 1591 sp->header.flag |= BT_INTERNAL; 1592 1593 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1594 1595 if (!test_cflag(COMMIT_Nolink, ip)) { 1596 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1597 xtlck = (struct xtlock *) & tlck->lock; 1598 xtlck->lwm.offset = XTENTRYSTART; 1599 xtlck->lwm.length = 1; 1600 } 1601 1602 *rmpp = rmp; 1603 1604 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); 1605 return 0; 1606 } 1607 1608 1609 /* 1610 * xtExtend() 1611 * 1612 * function: extend in-place; 1613 * 1614 * note: existing extent may or may not have been committed. 1615 * caller is responsible for pager buffer cache update, and 1616 * working block allocation map update; 1617 * update pmap: alloc whole extended extent; 1618 */ 1619 int xtExtend(tid_t tid, /* transaction id */ 1620 struct inode *ip, s64 xoff, /* delta extent offset */ 1621 s32 xlen, /* delta extent length */ 1622 int flag) 1623 { 1624 int rc = 0; 1625 int cmp; 1626 struct metapage *mp; /* meta-page buffer */ 1627 xtpage_t *p; /* base B+-tree index page */ 1628 s64 bn; 1629 int index, nextindex, len; 1630 struct btstack btstack; /* traverse stack */ 1631 struct xtsplit split; /* split information */ 1632 xad_t *xad; 1633 s64 xaddr; 1634 struct tlock *tlck; 1635 struct xtlock *xtlck = NULL; 1636 1637 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1638 1639 /* there must exist extent to be extended */ 1640 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) 1641 return rc; 1642 1643 /* retrieve search result */ 1644 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1645 1646 if (cmp != 0) { 1647 XT_PUTPAGE(mp); 1648 jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); 1649 return -EIO; 1650 } 1651 1652 /* extension must be contiguous */ 1653 xad = &p->xad[index]; 1654 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { 1655 XT_PUTPAGE(mp); 1656 jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); 1657 return -EIO; 1658 } 1659 1660 /* 1661 * acquire a transaction lock on the leaf page; 1662 * 1663 * action: xad insertion/extension; 1664 */ 1665 BT_MARK_DIRTY(mp, ip); 1666 if (!test_cflag(COMMIT_Nolink, ip)) { 1667 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1668 xtlck = (struct xtlock *) & tlck->lock; 1669 } 1670 1671 /* extend will overflow extent ? */ 1672 xlen = lengthXAD(xad) + xlen; 1673 if ((len = xlen - MAXXLEN) <= 0) 1674 goto extendOld; 1675 1676 /* 1677 * extent overflow: insert entry for new extent 1678 */ 1679 //insertNew: 1680 xoff = offsetXAD(xad) + MAXXLEN; 1681 xaddr = addressXAD(xad) + MAXXLEN; 1682 nextindex = le16_to_cpu(p->header.nextindex); 1683 1684 /* 1685 * if the leaf page is full, insert the new entry and 1686 * propagate up the router entry for the new page from split 1687 * 1688 * The xtSplitUp() will insert the entry and unpin the leaf page. 1689 */ 1690 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1691 /* xtSpliUp() unpins leaf pages */ 1692 split.mp = mp; 1693 split.index = index + 1; 1694 split.flag = XAD_NEW; 1695 split.off = xoff; /* split offset */ 1696 split.len = len; 1697 split.addr = xaddr; 1698 split.pxdlist = NULL; 1699 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1700 return rc; 1701 1702 /* get back old page */ 1703 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1704 if (rc) 1705 return rc; 1706 /* 1707 * if leaf root has been split, original root has been 1708 * copied to new child page, i.e., original entry now 1709 * resides on the new child page; 1710 */ 1711 if (p->header.flag & BT_INTERNAL) { 1712 ASSERT(p->header.nextindex == 1713 cpu_to_le16(XTENTRYSTART + 1)); 1714 xad = &p->xad[XTENTRYSTART]; 1715 bn = addressXAD(xad); 1716 XT_PUTPAGE(mp); 1717 1718 /* get new child page */ 1719 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1720 if (rc) 1721 return rc; 1722 1723 BT_MARK_DIRTY(mp, ip); 1724 if (!test_cflag(COMMIT_Nolink, ip)) { 1725 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1726 xtlck = (struct xtlock *) & tlck->lock; 1727 } 1728 } 1729 } 1730 /* 1731 * insert the new entry into the leaf page 1732 */ 1733 else { 1734 /* insert the new entry: mark the entry NEW */ 1735 xad = &p->xad[index + 1]; 1736 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1737 1738 /* advance next available entry index */ 1739 le16_add_cpu(&p->header.nextindex, 1); 1740 } 1741 1742 /* get back old entry */ 1743 xad = &p->xad[index]; 1744 xlen = MAXXLEN; 1745 1746 /* 1747 * extend old extent 1748 */ 1749 extendOld: 1750 XADlength(xad, xlen); 1751 if (!(xad->flag & XAD_NEW)) 1752 xad->flag |= XAD_EXTENDED; 1753 1754 if (!test_cflag(COMMIT_Nolink, ip)) { 1755 xtlck->lwm.offset = 1756 (xtlck->lwm.offset) ? min(index, 1757 (int)xtlck->lwm.offset) : index; 1758 xtlck->lwm.length = 1759 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1760 } 1761 1762 /* unpin the leaf page */ 1763 XT_PUTPAGE(mp); 1764 1765 return rc; 1766 } 1767 1768 #ifdef _NOTYET 1769 /* 1770 * xtTailgate() 1771 * 1772 * function: split existing 'tail' extent 1773 * (split offset >= start offset of tail extent), and 1774 * relocate and extend the split tail half; 1775 * 1776 * note: existing extent may or may not have been committed. 1777 * caller is responsible for pager buffer cache update, and 1778 * working block allocation map update; 1779 * update pmap: free old split tail extent, alloc new extent; 1780 */ 1781 int xtTailgate(tid_t tid, /* transaction id */ 1782 struct inode *ip, s64 xoff, /* split/new extent offset */ 1783 s32 xlen, /* new extent length */ 1784 s64 xaddr, /* new extent address */ 1785 int flag) 1786 { 1787 int rc = 0; 1788 int cmp; 1789 struct metapage *mp; /* meta-page buffer */ 1790 xtpage_t *p; /* base B+-tree index page */ 1791 s64 bn; 1792 int index, nextindex, llen, rlen; 1793 struct btstack btstack; /* traverse stack */ 1794 struct xtsplit split; /* split information */ 1795 xad_t *xad; 1796 struct tlock *tlck; 1797 struct xtlock *xtlck = 0; 1798 struct tlock *mtlck; 1799 struct maplock *pxdlock; 1800 1801 /* 1802 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1803 (ulong)xoff, xlen, (ulong)xaddr); 1804 */ 1805 1806 /* there must exist extent to be tailgated */ 1807 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) 1808 return rc; 1809 1810 /* retrieve search result */ 1811 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1812 1813 if (cmp != 0) { 1814 XT_PUTPAGE(mp); 1815 jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); 1816 return -EIO; 1817 } 1818 1819 /* entry found must be last entry */ 1820 nextindex = le16_to_cpu(p->header.nextindex); 1821 if (index != nextindex - 1) { 1822 XT_PUTPAGE(mp); 1823 jfs_error(ip->i_sb, 1824 "xtTailgate: the entry found is not the last entry"); 1825 return -EIO; 1826 } 1827 1828 BT_MARK_DIRTY(mp, ip); 1829 /* 1830 * acquire tlock of the leaf page containing original entry 1831 */ 1832 if (!test_cflag(COMMIT_Nolink, ip)) { 1833 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1834 xtlck = (struct xtlock *) & tlck->lock; 1835 } 1836 1837 /* completely replace extent ? */ 1838 xad = &p->xad[index]; 1839 /* 1840 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1841 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1842 */ 1843 if ((llen = xoff - offsetXAD(xad)) == 0) 1844 goto updateOld; 1845 1846 /* 1847 * partially replace extent: insert entry for new extent 1848 */ 1849 //insertNew: 1850 /* 1851 * if the leaf page is full, insert the new entry and 1852 * propagate up the router entry for the new page from split 1853 * 1854 * The xtSplitUp() will insert the entry and unpin the leaf page. 1855 */ 1856 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1857 /* xtSpliUp() unpins leaf pages */ 1858 split.mp = mp; 1859 split.index = index + 1; 1860 split.flag = XAD_NEW; 1861 split.off = xoff; /* split offset */ 1862 split.len = xlen; 1863 split.addr = xaddr; 1864 split.pxdlist = NULL; 1865 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1866 return rc; 1867 1868 /* get back old page */ 1869 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1870 if (rc) 1871 return rc; 1872 /* 1873 * if leaf root has been split, original root has been 1874 * copied to new child page, i.e., original entry now 1875 * resides on the new child page; 1876 */ 1877 if (p->header.flag & BT_INTERNAL) { 1878 ASSERT(p->header.nextindex == 1879 cpu_to_le16(XTENTRYSTART + 1)); 1880 xad = &p->xad[XTENTRYSTART]; 1881 bn = addressXAD(xad); 1882 XT_PUTPAGE(mp); 1883 1884 /* get new child page */ 1885 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1886 if (rc) 1887 return rc; 1888 1889 BT_MARK_DIRTY(mp, ip); 1890 if (!test_cflag(COMMIT_Nolink, ip)) { 1891 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1892 xtlck = (struct xtlock *) & tlck->lock; 1893 } 1894 } 1895 } 1896 /* 1897 * insert the new entry into the leaf page 1898 */ 1899 else { 1900 /* insert the new entry: mark the entry NEW */ 1901 xad = &p->xad[index + 1]; 1902 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1903 1904 /* advance next available entry index */ 1905 le16_add_cpu(&p->header.nextindex, 1); 1906 } 1907 1908 /* get back old XAD */ 1909 xad = &p->xad[index]; 1910 1911 /* 1912 * truncate/relocate old extent at split offset 1913 */ 1914 updateOld: 1915 /* update dmap for old/committed/truncated extent */ 1916 rlen = lengthXAD(xad) - llen; 1917 if (!(xad->flag & XAD_NEW)) { 1918 /* free from PWMAP at commit */ 1919 if (!test_cflag(COMMIT_Nolink, ip)) { 1920 mtlck = txMaplock(tid, ip, tlckMAP); 1921 pxdlock = (struct maplock *) & mtlck->lock; 1922 pxdlock->flag = mlckFREEPXD; 1923 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1924 PXDlength(&pxdlock->pxd, rlen); 1925 pxdlock->index = 1; 1926 } 1927 } else 1928 /* free from WMAP */ 1929 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1930 1931 if (llen) 1932 /* truncate */ 1933 XADlength(xad, llen); 1934 else 1935 /* replace */ 1936 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1937 1938 if (!test_cflag(COMMIT_Nolink, ip)) { 1939 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1940 min(index, (int)xtlck->lwm.offset) : index; 1941 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1942 xtlck->lwm.offset; 1943 } 1944 1945 /* unpin the leaf page */ 1946 XT_PUTPAGE(mp); 1947 1948 return rc; 1949 } 1950 #endif /* _NOTYET */ 1951 1952 /* 1953 * xtUpdate() 1954 * 1955 * function: update XAD; 1956 * 1957 * update extent for allocated_but_not_recorded or 1958 * compressed extent; 1959 * 1960 * parameter: 1961 * nxad - new XAD; 1962 * logical extent of the specified XAD must be completely 1963 * contained by an existing XAD; 1964 */ 1965 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1966 { /* new XAD */ 1967 int rc = 0; 1968 int cmp; 1969 struct metapage *mp; /* meta-page buffer */ 1970 xtpage_t *p; /* base B+-tree index page */ 1971 s64 bn; 1972 int index0, index, newindex, nextindex; 1973 struct btstack btstack; /* traverse stack */ 1974 struct xtsplit split; /* split information */ 1975 xad_t *xad, *lxad, *rxad; 1976 int xflag; 1977 s64 nxoff, xoff; 1978 int nxlen, xlen, lxlen, rxlen; 1979 s64 nxaddr, xaddr; 1980 struct tlock *tlck; 1981 struct xtlock *xtlck = NULL; 1982 int newpage = 0; 1983 1984 /* there must exist extent to be tailgated */ 1985 nxoff = offsetXAD(nxad); 1986 nxlen = lengthXAD(nxad); 1987 nxaddr = addressXAD(nxad); 1988 1989 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 1990 return rc; 1991 1992 /* retrieve search result */ 1993 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 1994 1995 if (cmp != 0) { 1996 XT_PUTPAGE(mp); 1997 jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); 1998 return -EIO; 1999 } 2000 2001 BT_MARK_DIRTY(mp, ip); 2002 /* 2003 * acquire tlock of the leaf page containing original entry 2004 */ 2005 if (!test_cflag(COMMIT_Nolink, ip)) { 2006 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2007 xtlck = (struct xtlock *) & tlck->lock; 2008 } 2009 2010 xad = &p->xad[index0]; 2011 xflag = xad->flag; 2012 xoff = offsetXAD(xad); 2013 xlen = lengthXAD(xad); 2014 xaddr = addressXAD(xad); 2015 2016 /* nXAD must be completely contained within XAD */ 2017 if ((xoff > nxoff) || 2018 (nxoff + nxlen > xoff + xlen)) { 2019 XT_PUTPAGE(mp); 2020 jfs_error(ip->i_sb, 2021 "xtUpdate: nXAD in not completely contained within XAD"); 2022 return -EIO; 2023 } 2024 2025 index = index0; 2026 newindex = index + 1; 2027 nextindex = le16_to_cpu(p->header.nextindex); 2028 2029 #ifdef _JFS_WIP_NOCOALESCE 2030 if (xoff < nxoff) 2031 goto updateRight; 2032 2033 /* 2034 * replace XAD with nXAD 2035 */ 2036 replace: /* (nxoff == xoff) */ 2037 if (nxlen == xlen) { 2038 /* replace XAD with nXAD:recorded */ 2039 *xad = *nxad; 2040 xad->flag = xflag & ~XAD_NOTRECORDED; 2041 2042 goto out; 2043 } else /* (nxlen < xlen) */ 2044 goto updateLeft; 2045 #endif /* _JFS_WIP_NOCOALESCE */ 2046 2047 /* #ifdef _JFS_WIP_COALESCE */ 2048 if (xoff < nxoff) 2049 goto coalesceRight; 2050 2051 /* 2052 * coalesce with left XAD 2053 */ 2054 //coalesceLeft: /* (xoff == nxoff) */ 2055 /* is XAD first entry of page ? */ 2056 if (index == XTENTRYSTART) 2057 goto replace; 2058 2059 /* is nXAD logically and physically contiguous with lXAD ? */ 2060 lxad = &p->xad[index - 1]; 2061 lxlen = lengthXAD(lxad); 2062 if (!(lxad->flag & XAD_NOTRECORDED) && 2063 (nxoff == offsetXAD(lxad) + lxlen) && 2064 (nxaddr == addressXAD(lxad) + lxlen) && 2065 (lxlen + nxlen < MAXXLEN)) { 2066 /* extend right lXAD */ 2067 index0 = index - 1; 2068 XADlength(lxad, lxlen + nxlen); 2069 2070 /* If we just merged two extents together, need to make sure the 2071 * right extent gets logged. If the left one is marked XAD_NEW, 2072 * then we know it will be logged. Otherwise, mark as 2073 * XAD_EXTENDED 2074 */ 2075 if (!(lxad->flag & XAD_NEW)) 2076 lxad->flag |= XAD_EXTENDED; 2077 2078 if (xlen > nxlen) { 2079 /* truncate XAD */ 2080 XADoffset(xad, xoff + nxlen); 2081 XADlength(xad, xlen - nxlen); 2082 XADaddress(xad, xaddr + nxlen); 2083 goto out; 2084 } else { /* (xlen == nxlen) */ 2085 2086 /* remove XAD */ 2087 if (index < nextindex - 1) 2088 memmove(&p->xad[index], &p->xad[index + 1], 2089 (nextindex - index - 2090 1) << L2XTSLOTSIZE); 2091 2092 p->header.nextindex = 2093 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2094 1); 2095 2096 index = index0; 2097 newindex = index + 1; 2098 nextindex = le16_to_cpu(p->header.nextindex); 2099 xoff = nxoff = offsetXAD(lxad); 2100 xlen = nxlen = lxlen + nxlen; 2101 xaddr = nxaddr = addressXAD(lxad); 2102 goto coalesceRight; 2103 } 2104 } 2105 2106 /* 2107 * replace XAD with nXAD 2108 */ 2109 replace: /* (nxoff == xoff) */ 2110 if (nxlen == xlen) { 2111 /* replace XAD with nXAD:recorded */ 2112 *xad = *nxad; 2113 xad->flag = xflag & ~XAD_NOTRECORDED; 2114 2115 goto coalesceRight; 2116 } else /* (nxlen < xlen) */ 2117 goto updateLeft; 2118 2119 /* 2120 * coalesce with right XAD 2121 */ 2122 coalesceRight: /* (xoff <= nxoff) */ 2123 /* is XAD last entry of page ? */ 2124 if (newindex == nextindex) { 2125 if (xoff == nxoff) 2126 goto out; 2127 goto updateRight; 2128 } 2129 2130 /* is nXAD logically and physically contiguous with rXAD ? */ 2131 rxad = &p->xad[index + 1]; 2132 rxlen = lengthXAD(rxad); 2133 if (!(rxad->flag & XAD_NOTRECORDED) && 2134 (nxoff + nxlen == offsetXAD(rxad)) && 2135 (nxaddr + nxlen == addressXAD(rxad)) && 2136 (rxlen + nxlen < MAXXLEN)) { 2137 /* extend left rXAD */ 2138 XADoffset(rxad, nxoff); 2139 XADlength(rxad, rxlen + nxlen); 2140 XADaddress(rxad, nxaddr); 2141 2142 /* If we just merged two extents together, need to make sure 2143 * the left extent gets logged. If the right one is marked 2144 * XAD_NEW, then we know it will be logged. Otherwise, mark as 2145 * XAD_EXTENDED 2146 */ 2147 if (!(rxad->flag & XAD_NEW)) 2148 rxad->flag |= XAD_EXTENDED; 2149 2150 if (xlen > nxlen) 2151 /* truncate XAD */ 2152 XADlength(xad, xlen - nxlen); 2153 else { /* (xlen == nxlen) */ 2154 2155 /* remove XAD */ 2156 memmove(&p->xad[index], &p->xad[index + 1], 2157 (nextindex - index - 1) << L2XTSLOTSIZE); 2158 2159 p->header.nextindex = 2160 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2161 1); 2162 } 2163 2164 goto out; 2165 } else if (xoff == nxoff) 2166 goto out; 2167 2168 if (xoff >= nxoff) { 2169 XT_PUTPAGE(mp); 2170 jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); 2171 return -EIO; 2172 } 2173 /* #endif _JFS_WIP_COALESCE */ 2174 2175 /* 2176 * split XAD into (lXAD, nXAD): 2177 * 2178 * |---nXAD---> 2179 * --|----------XAD----------|-- 2180 * |-lXAD-| 2181 */ 2182 updateRight: /* (xoff < nxoff) */ 2183 /* truncate old XAD as lXAD:not_recorded */ 2184 xad = &p->xad[index]; 2185 XADlength(xad, nxoff - xoff); 2186 2187 /* insert nXAD:recorded */ 2188 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2189 2190 /* xtSpliUp() unpins leaf pages */ 2191 split.mp = mp; 2192 split.index = newindex; 2193 split.flag = xflag & ~XAD_NOTRECORDED; 2194 split.off = nxoff; 2195 split.len = nxlen; 2196 split.addr = nxaddr; 2197 split.pxdlist = NULL; 2198 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2199 return rc; 2200 2201 /* get back old page */ 2202 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2203 if (rc) 2204 return rc; 2205 /* 2206 * if leaf root has been split, original root has been 2207 * copied to new child page, i.e., original entry now 2208 * resides on the new child page; 2209 */ 2210 if (p->header.flag & BT_INTERNAL) { 2211 ASSERT(p->header.nextindex == 2212 cpu_to_le16(XTENTRYSTART + 1)); 2213 xad = &p->xad[XTENTRYSTART]; 2214 bn = addressXAD(xad); 2215 XT_PUTPAGE(mp); 2216 2217 /* get new child page */ 2218 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2219 if (rc) 2220 return rc; 2221 2222 BT_MARK_DIRTY(mp, ip); 2223 if (!test_cflag(COMMIT_Nolink, ip)) { 2224 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2225 xtlck = (struct xtlock *) & tlck->lock; 2226 } 2227 } else { 2228 /* is nXAD on new page ? */ 2229 if (newindex > 2230 (le16_to_cpu(p->header.maxentry) >> 1)) { 2231 newindex = 2232 newindex - 2233 le16_to_cpu(p->header.nextindex) + 2234 XTENTRYSTART; 2235 newpage = 1; 2236 } 2237 } 2238 } else { 2239 /* if insert into middle, shift right remaining entries */ 2240 if (newindex < nextindex) 2241 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2242 (nextindex - newindex) << L2XTSLOTSIZE); 2243 2244 /* insert the entry */ 2245 xad = &p->xad[newindex]; 2246 *xad = *nxad; 2247 xad->flag = xflag & ~XAD_NOTRECORDED; 2248 2249 /* advance next available entry index. */ 2250 p->header.nextindex = 2251 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2252 } 2253 2254 /* 2255 * does nXAD force 3-way split ? 2256 * 2257 * |---nXAD--->| 2258 * --|----------XAD-------------|-- 2259 * |-lXAD-| |-rXAD -| 2260 */ 2261 if (nxoff + nxlen == xoff + xlen) 2262 goto out; 2263 2264 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 2265 if (newpage) { 2266 /* close out old page */ 2267 if (!test_cflag(COMMIT_Nolink, ip)) { 2268 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2269 min(index0, (int)xtlck->lwm.offset) : index0; 2270 xtlck->lwm.length = 2271 le16_to_cpu(p->header.nextindex) - 2272 xtlck->lwm.offset; 2273 } 2274 2275 bn = le64_to_cpu(p->header.next); 2276 XT_PUTPAGE(mp); 2277 2278 /* get new right page */ 2279 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2280 if (rc) 2281 return rc; 2282 2283 BT_MARK_DIRTY(mp, ip); 2284 if (!test_cflag(COMMIT_Nolink, ip)) { 2285 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2286 xtlck = (struct xtlock *) & tlck->lock; 2287 } 2288 2289 index0 = index = newindex; 2290 } else 2291 index++; 2292 2293 newindex = index + 1; 2294 nextindex = le16_to_cpu(p->header.nextindex); 2295 xlen = xlen - (nxoff - xoff); 2296 xoff = nxoff; 2297 xaddr = nxaddr; 2298 2299 /* recompute split pages */ 2300 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2301 XT_PUTPAGE(mp); 2302 2303 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 2304 return rc; 2305 2306 /* retrieve search result */ 2307 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2308 2309 if (cmp != 0) { 2310 XT_PUTPAGE(mp); 2311 jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); 2312 return -EIO; 2313 } 2314 2315 if (index0 != index) { 2316 XT_PUTPAGE(mp); 2317 jfs_error(ip->i_sb, 2318 "xtUpdate: unexpected value of index"); 2319 return -EIO; 2320 } 2321 } 2322 2323 /* 2324 * split XAD into (nXAD, rXAD) 2325 * 2326 * ---nXAD---| 2327 * --|----------XAD----------|-- 2328 * |-rXAD-| 2329 */ 2330 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2331 /* update old XAD with nXAD:recorded */ 2332 xad = &p->xad[index]; 2333 *xad = *nxad; 2334 xad->flag = xflag & ~XAD_NOTRECORDED; 2335 2336 /* insert rXAD:not_recorded */ 2337 xoff = xoff + nxlen; 2338 xlen = xlen - nxlen; 2339 xaddr = xaddr + nxlen; 2340 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2341 /* 2342 printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2343 */ 2344 /* xtSpliUp() unpins leaf pages */ 2345 split.mp = mp; 2346 split.index = newindex; 2347 split.flag = xflag; 2348 split.off = xoff; 2349 split.len = xlen; 2350 split.addr = xaddr; 2351 split.pxdlist = NULL; 2352 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2353 return rc; 2354 2355 /* get back old page */ 2356 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2357 if (rc) 2358 return rc; 2359 2360 /* 2361 * if leaf root has been split, original root has been 2362 * copied to new child page, i.e., original entry now 2363 * resides on the new child page; 2364 */ 2365 if (p->header.flag & BT_INTERNAL) { 2366 ASSERT(p->header.nextindex == 2367 cpu_to_le16(XTENTRYSTART + 1)); 2368 xad = &p->xad[XTENTRYSTART]; 2369 bn = addressXAD(xad); 2370 XT_PUTPAGE(mp); 2371 2372 /* get new child page */ 2373 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2374 if (rc) 2375 return rc; 2376 2377 BT_MARK_DIRTY(mp, ip); 2378 if (!test_cflag(COMMIT_Nolink, ip)) { 2379 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2380 xtlck = (struct xtlock *) & tlck->lock; 2381 } 2382 } 2383 } else { 2384 /* if insert into middle, shift right remaining entries */ 2385 if (newindex < nextindex) 2386 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2387 (nextindex - newindex) << L2XTSLOTSIZE); 2388 2389 /* insert the entry */ 2390 xad = &p->xad[newindex]; 2391 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2392 2393 /* advance next available entry index. */ 2394 p->header.nextindex = 2395 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2396 } 2397 2398 out: 2399 if (!test_cflag(COMMIT_Nolink, ip)) { 2400 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2401 min(index0, (int)xtlck->lwm.offset) : index0; 2402 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2403 xtlck->lwm.offset; 2404 } 2405 2406 /* unpin the leaf page */ 2407 XT_PUTPAGE(mp); 2408 2409 return rc; 2410 } 2411 2412 2413 /* 2414 * xtAppend() 2415 * 2416 * function: grow in append mode from contiguous region specified ; 2417 * 2418 * parameter: 2419 * tid - transaction id; 2420 * ip - file object; 2421 * xflag - extent flag: 2422 * xoff - extent offset; 2423 * maxblocks - max extent length; 2424 * xlen - extent length (in/out); 2425 * xaddrp - extent address pointer (in/out): 2426 * flag - 2427 * 2428 * return: 2429 */ 2430 int xtAppend(tid_t tid, /* transaction id */ 2431 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2432 s32 * xlenp, /* (in/out) */ 2433 s64 * xaddrp, /* (in/out) */ 2434 int flag) 2435 { 2436 int rc = 0; 2437 struct metapage *mp; /* meta-page buffer */ 2438 xtpage_t *p; /* base B+-tree index page */ 2439 s64 bn, xaddr; 2440 int index, nextindex; 2441 struct btstack btstack; /* traverse stack */ 2442 struct xtsplit split; /* split information */ 2443 xad_t *xad; 2444 int cmp; 2445 struct tlock *tlck; 2446 struct xtlock *xtlck; 2447 int nsplit, nblocks, xlen; 2448 struct pxdlist pxdlist; 2449 pxd_t *pxd; 2450 s64 next; 2451 2452 xaddr = *xaddrp; 2453 xlen = *xlenp; 2454 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", 2455 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2456 2457 /* 2458 * search for the entry location at which to insert: 2459 * 2460 * xtFastSearch() and xtSearch() both returns (leaf page 2461 * pinned, index at which to insert). 2462 * n.b. xtSearch() may return index of maxentry of 2463 * the full page. 2464 */ 2465 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 2466 return rc; 2467 2468 /* retrieve search result */ 2469 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2470 2471 if (cmp == 0) { 2472 rc = -EEXIST; 2473 goto out; 2474 } 2475 2476 if (next) 2477 xlen = min(xlen, (int)(next - xoff)); 2478 //insert: 2479 /* 2480 * insert entry for new extent 2481 */ 2482 xflag |= XAD_NEW; 2483 2484 /* 2485 * if the leaf page is full, split the page and 2486 * propagate up the router entry for the new page from split 2487 * 2488 * The xtSplitUp() will insert the entry and unpin the leaf page. 2489 */ 2490 nextindex = le16_to_cpu(p->header.nextindex); 2491 if (nextindex < le16_to_cpu(p->header.maxentry)) 2492 goto insertLeaf; 2493 2494 /* 2495 * allocate new index blocks to cover index page split(s) 2496 */ 2497 nsplit = btstack.nsplit; 2498 split.pxdlist = &pxdlist; 2499 pxdlist.maxnpxd = pxdlist.npxd = 0; 2500 pxd = &pxdlist.pxd[0]; 2501 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2502 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2503 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2504 PXDaddress(pxd, xaddr); 2505 PXDlength(pxd, nblocks); 2506 2507 pxdlist.maxnpxd++; 2508 2509 continue; 2510 } 2511 2512 /* undo allocation */ 2513 2514 goto out; 2515 } 2516 2517 xlen = min(xlen, maxblocks); 2518 2519 /* 2520 * allocate data extent requested 2521 */ 2522 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2523 goto out; 2524 2525 split.mp = mp; 2526 split.index = index; 2527 split.flag = xflag; 2528 split.off = xoff; 2529 split.len = xlen; 2530 split.addr = xaddr; 2531 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2532 /* undo data extent allocation */ 2533 dbFree(ip, *xaddrp, (s64) * xlenp); 2534 2535 return rc; 2536 } 2537 2538 *xaddrp = xaddr; 2539 *xlenp = xlen; 2540 return 0; 2541 2542 /* 2543 * insert the new entry into the leaf page 2544 */ 2545 insertLeaf: 2546 /* 2547 * allocate data extent requested 2548 */ 2549 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2550 goto out; 2551 2552 BT_MARK_DIRTY(mp, ip); 2553 /* 2554 * acquire a transaction lock on the leaf page; 2555 * 2556 * action: xad insertion/extension; 2557 */ 2558 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2559 xtlck = (struct xtlock *) & tlck->lock; 2560 2561 /* insert the new entry: mark the entry NEW */ 2562 xad = &p->xad[index]; 2563 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2564 2565 /* advance next available entry index */ 2566 le16_add_cpu(&p->header.nextindex, 1); 2567 2568 xtlck->lwm.offset = 2569 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2570 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2571 xtlck->lwm.offset; 2572 2573 *xaddrp = xaddr; 2574 *xlenp = xlen; 2575 2576 out: 2577 /* unpin the leaf page */ 2578 XT_PUTPAGE(mp); 2579 2580 return rc; 2581 } 2582 #ifdef _STILL_TO_PORT 2583 2584 /* - TBD for defragmentaion/reorganization - 2585 * 2586 * xtDelete() 2587 * 2588 * function: 2589 * delete the entry with the specified key. 2590 * 2591 * N.B.: whole extent of the entry is assumed to be deleted. 2592 * 2593 * parameter: 2594 * 2595 * return: 2596 * ENOENT: if the entry is not found. 2597 * 2598 * exception: 2599 */ 2600 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2601 { 2602 int rc = 0; 2603 struct btstack btstack; 2604 int cmp; 2605 s64 bn; 2606 struct metapage *mp; 2607 xtpage_t *p; 2608 int index, nextindex; 2609 struct tlock *tlck; 2610 struct xtlock *xtlck; 2611 2612 /* 2613 * find the matching entry; xtSearch() pins the page 2614 */ 2615 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2616 return rc; 2617 2618 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2619 if (cmp) { 2620 /* unpin the leaf page */ 2621 XT_PUTPAGE(mp); 2622 return -ENOENT; 2623 } 2624 2625 /* 2626 * delete the entry from the leaf page 2627 */ 2628 nextindex = le16_to_cpu(p->header.nextindex); 2629 le16_add_cpu(&p->header.nextindex, -1); 2630 2631 /* 2632 * if the leaf page bocome empty, free the page 2633 */ 2634 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2635 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2636 2637 BT_MARK_DIRTY(mp, ip); 2638 /* 2639 * acquire a transaction lock on the leaf page; 2640 * 2641 * action:xad deletion; 2642 */ 2643 tlck = txLock(tid, ip, mp, tlckXTREE); 2644 xtlck = (struct xtlock *) & tlck->lock; 2645 xtlck->lwm.offset = 2646 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2647 2648 /* if delete from middle, shift left/compact the remaining entries */ 2649 if (index < nextindex - 1) 2650 memmove(&p->xad[index], &p->xad[index + 1], 2651 (nextindex - index - 1) * sizeof(xad_t)); 2652 2653 XT_PUTPAGE(mp); 2654 2655 return 0; 2656 } 2657 2658 2659 /* - TBD for defragmentaion/reorganization - 2660 * 2661 * xtDeleteUp() 2662 * 2663 * function: 2664 * free empty pages as propagating deletion up the tree 2665 * 2666 * parameter: 2667 * 2668 * return: 2669 */ 2670 static int 2671 xtDeleteUp(tid_t tid, struct inode *ip, 2672 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2673 { 2674 int rc = 0; 2675 struct metapage *mp; 2676 xtpage_t *p; 2677 int index, nextindex; 2678 s64 xaddr; 2679 int xlen; 2680 struct btframe *parent; 2681 struct tlock *tlck; 2682 struct xtlock *xtlck; 2683 2684 /* 2685 * keep root leaf page which has become empty 2686 */ 2687 if (fp->header.flag & BT_ROOT) { 2688 /* keep the root page */ 2689 fp->header.flag &= ~BT_INTERNAL; 2690 fp->header.flag |= BT_LEAF; 2691 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2692 2693 /* XT_PUTPAGE(fmp); */ 2694 2695 return 0; 2696 } 2697 2698 /* 2699 * free non-root leaf page 2700 */ 2701 if ((rc = xtRelink(tid, ip, fp))) { 2702 XT_PUTPAGE(fmp); 2703 return rc; 2704 } 2705 2706 xaddr = addressPXD(&fp->header.self); 2707 xlen = lengthPXD(&fp->header.self); 2708 /* free the page extent */ 2709 dbFree(ip, xaddr, (s64) xlen); 2710 2711 /* free the buffer page */ 2712 discard_metapage(fmp); 2713 2714 /* 2715 * propagate page deletion up the index tree 2716 * 2717 * If the delete from the parent page makes it empty, 2718 * continue all the way up the tree. 2719 * stop if the root page is reached (which is never deleted) or 2720 * if the entry deletion does not empty the page. 2721 */ 2722 while ((parent = BT_POP(btstack)) != NULL) { 2723 /* get/pin the parent page <sp> */ 2724 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2725 if (rc) 2726 return rc; 2727 2728 index = parent->index; 2729 2730 /* delete the entry for the freed child page from parent. 2731 */ 2732 nextindex = le16_to_cpu(p->header.nextindex); 2733 2734 /* 2735 * the parent has the single entry being deleted: 2736 * free the parent page which has become empty. 2737 */ 2738 if (nextindex == 1) { 2739 if (p->header.flag & BT_ROOT) { 2740 /* keep the root page */ 2741 p->header.flag &= ~BT_INTERNAL; 2742 p->header.flag |= BT_LEAF; 2743 p->header.nextindex = 2744 cpu_to_le16(XTENTRYSTART); 2745 2746 /* XT_PUTPAGE(mp); */ 2747 2748 break; 2749 } else { 2750 /* free the parent page */ 2751 if ((rc = xtRelink(tid, ip, p))) 2752 return rc; 2753 2754 xaddr = addressPXD(&p->header.self); 2755 /* free the page extent */ 2756 dbFree(ip, xaddr, 2757 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2758 2759 /* unpin/free the buffer page */ 2760 discard_metapage(mp); 2761 2762 /* propagate up */ 2763 continue; 2764 } 2765 } 2766 /* 2767 * the parent has other entries remaining: 2768 * delete the router entry from the parent page. 2769 */ 2770 else { 2771 BT_MARK_DIRTY(mp, ip); 2772 /* 2773 * acquire a transaction lock on the leaf page; 2774 * 2775 * action:xad deletion; 2776 */ 2777 tlck = txLock(tid, ip, mp, tlckXTREE); 2778 xtlck = (struct xtlock *) & tlck->lock; 2779 xtlck->lwm.offset = 2780 (xtlck->lwm.offset) ? min(index, 2781 xtlck->lwm. 2782 offset) : index; 2783 2784 /* if delete from middle, 2785 * shift left/compact the remaining entries in the page 2786 */ 2787 if (index < nextindex - 1) 2788 memmove(&p->xad[index], &p->xad[index + 1], 2789 (nextindex - index - 2790 1) << L2XTSLOTSIZE); 2791 2792 le16_add_cpu(&p->header.nextindex, -1); 2793 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2794 (ulong) parent->bn, index); 2795 } 2796 2797 /* unpin the parent page */ 2798 XT_PUTPAGE(mp); 2799 2800 /* exit propagation up */ 2801 break; 2802 } 2803 2804 return 0; 2805 } 2806 2807 2808 /* 2809 * NAME: xtRelocate() 2810 * 2811 * FUNCTION: relocate xtpage or data extent of regular file; 2812 * This function is mainly used by defragfs utility. 2813 * 2814 * NOTE: This routine does not have the logic to handle 2815 * uncommitted allocated extent. The caller should call 2816 * txCommit() to commit all the allocation before call 2817 * this routine. 2818 */ 2819 int 2820 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2821 s64 nxaddr, /* new xaddr */ 2822 int xtype) 2823 { /* extent type: XTPAGE or DATAEXT */ 2824 int rc = 0; 2825 struct tblock *tblk; 2826 struct tlock *tlck; 2827 struct xtlock *xtlck; 2828 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2829 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2830 xad_t *xad; 2831 pxd_t *pxd; 2832 s64 xoff, xsize; 2833 int xlen; 2834 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2835 cbuf_t *cp; 2836 s64 offset, nbytes, nbrd, pno; 2837 int nb, npages, nblks; 2838 s64 bn; 2839 int cmp; 2840 int index; 2841 struct pxd_lock *pxdlock; 2842 struct btstack btstack; /* traverse stack */ 2843 2844 xtype = xtype & EXTENT_TYPE; 2845 2846 xoff = offsetXAD(oxad); 2847 oxaddr = addressXAD(oxad); 2848 xlen = lengthXAD(oxad); 2849 2850 /* validate extent offset */ 2851 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2852 if (offset >= ip->i_size) 2853 return -ESTALE; /* stale extent */ 2854 2855 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", 2856 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2857 2858 /* 2859 * 1. get and validate the parent xtpage/xad entry 2860 * covering the source extent to be relocated; 2861 */ 2862 if (xtype == DATAEXT) { 2863 /* search in leaf entry */ 2864 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 2865 if (rc) 2866 return rc; 2867 2868 /* retrieve search result */ 2869 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2870 2871 if (cmp) { 2872 XT_PUTPAGE(pmp); 2873 return -ESTALE; 2874 } 2875 2876 /* validate for exact match with a single entry */ 2877 xad = &pp->xad[index]; 2878 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2879 XT_PUTPAGE(pmp); 2880 return -ESTALE; 2881 } 2882 } else { /* (xtype == XTPAGE) */ 2883 2884 /* search in internal entry */ 2885 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2886 if (rc) 2887 return rc; 2888 2889 /* retrieve search result */ 2890 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2891 2892 if (cmp) { 2893 XT_PUTPAGE(pmp); 2894 return -ESTALE; 2895 } 2896 2897 /* xtSearchNode() validated for exact match with a single entry 2898 */ 2899 xad = &pp->xad[index]; 2900 } 2901 jfs_info("xtRelocate: parent xad entry validated."); 2902 2903 /* 2904 * 2. relocate the extent 2905 */ 2906 if (xtype == DATAEXT) { 2907 /* if the extent is allocated-but-not-recorded 2908 * there is no real data to be moved in this extent, 2909 */ 2910 if (xad->flag & XAD_NOTRECORDED) 2911 goto out; 2912 else 2913 /* release xtpage for cmRead()/xtLookup() */ 2914 XT_PUTPAGE(pmp); 2915 2916 /* 2917 * cmRelocate() 2918 * 2919 * copy target data pages to be relocated; 2920 * 2921 * data extent must start at page boundary and 2922 * multiple of page size (except the last data extent); 2923 * read in each page of the source data extent into cbuf, 2924 * update the cbuf extent descriptor of the page to be 2925 * homeward bound to new dst data extent 2926 * copy the data from the old extent to new extent. 2927 * copy is essential for compressed files to avoid problems 2928 * that can arise if there was a change in compression 2929 * algorithms. 2930 * it is a good strategy because it may disrupt cache 2931 * policy to keep the pages in memory afterwards. 2932 */ 2933 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2934 assert((offset & CM_OFFSET) == 0); 2935 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2936 pno = offset >> CM_L2BSIZE; 2937 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2938 /* 2939 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2940 (offset >> CM_L2BSIZE) + 1; 2941 */ 2942 sxaddr = oxaddr; 2943 dxaddr = nxaddr; 2944 2945 /* process the request one cache buffer at a time */ 2946 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2947 offset += nb, pno++, npages--) { 2948 /* compute page size */ 2949 nb = min(nbytes - nbrd, CM_BSIZE); 2950 2951 /* get the cache buffer of the page */ 2952 if (rc = cmRead(ip, offset, npages, &cp)) 2953 break; 2954 2955 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2956 assert(!cp->cm_modified); 2957 2958 /* bind buffer with the new extent address */ 2959 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2960 cmSetXD(ip, cp, pno, dxaddr, nblks); 2961 2962 /* release the cbuf, mark it as modified */ 2963 cmPut(cp, true); 2964 2965 dxaddr += nblks; 2966 sxaddr += nblks; 2967 } 2968 2969 /* get back parent page */ 2970 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2971 return rc; 2972 2973 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2974 jfs_info("xtRelocate: target data extent relocated."); 2975 } else { /* (xtype == XTPAGE) */ 2976 2977 /* 2978 * read in the target xtpage from the source extent; 2979 */ 2980 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2981 if (rc) { 2982 XT_PUTPAGE(pmp); 2983 return rc; 2984 } 2985 2986 /* 2987 * read in sibling pages if any to update sibling pointers; 2988 */ 2989 rmp = NULL; 2990 if (p->header.next) { 2991 nextbn = le64_to_cpu(p->header.next); 2992 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2993 if (rc) { 2994 XT_PUTPAGE(pmp); 2995 XT_PUTPAGE(mp); 2996 return (rc); 2997 } 2998 } 2999 3000 lmp = NULL; 3001 if (p->header.prev) { 3002 prevbn = le64_to_cpu(p->header.prev); 3003 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 3004 if (rc) { 3005 XT_PUTPAGE(pmp); 3006 XT_PUTPAGE(mp); 3007 if (rmp) 3008 XT_PUTPAGE(rmp); 3009 return (rc); 3010 } 3011 } 3012 3013 /* at this point, all xtpages to be updated are in memory */ 3014 3015 /* 3016 * update sibling pointers of sibling xtpages if any; 3017 */ 3018 if (lmp) { 3019 BT_MARK_DIRTY(lmp, ip); 3020 tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 3021 lp->header.next = cpu_to_le64(nxaddr); 3022 XT_PUTPAGE(lmp); 3023 } 3024 3025 if (rmp) { 3026 BT_MARK_DIRTY(rmp, ip); 3027 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 3028 rp->header.prev = cpu_to_le64(nxaddr); 3029 XT_PUTPAGE(rmp); 3030 } 3031 3032 /* 3033 * update the target xtpage to be relocated 3034 * 3035 * update the self address of the target page 3036 * and write to destination extent; 3037 * redo image covers the whole xtpage since it is new page 3038 * to the destination extent; 3039 * update of bmap for the free of source extent 3040 * of the target xtpage itself: 3041 * update of bmap for the allocation of destination extent 3042 * of the target xtpage itself: 3043 * update of bmap for the extents covered by xad entries in 3044 * the target xtpage is not necessary since they are not 3045 * updated; 3046 * if not committed before this relocation, 3047 * target page may contain XAD_NEW entries which must 3048 * be scanned for bmap update (logredo() always 3049 * scan xtpage REDOPAGE image for bmap update); 3050 * if committed before this relocation (tlckRELOCATE), 3051 * scan may be skipped by commit() and logredo(); 3052 */ 3053 BT_MARK_DIRTY(mp, ip); 3054 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 3055 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 3056 xtlck = (struct xtlock *) & tlck->lock; 3057 3058 /* update the self address in the xtpage header */ 3059 pxd = &p->header.self; 3060 PXDaddress(pxd, nxaddr); 3061 3062 /* linelock for the after image of the whole page */ 3063 xtlck->lwm.length = 3064 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 3065 3066 /* update the buffer extent descriptor of target xtpage */ 3067 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 3068 bmSetXD(mp, nxaddr, xsize); 3069 3070 /* unpin the target page to new homeward bound */ 3071 XT_PUTPAGE(mp); 3072 jfs_info("xtRelocate: target xtpage relocated."); 3073 } 3074 3075 /* 3076 * 3. acquire maplock for the source extent to be freed; 3077 * 3078 * acquire a maplock saving the src relocated extent address; 3079 * to free of the extent at commit time; 3080 */ 3081 out: 3082 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 3083 * free PXD of the source data extent (logredo() will update 3084 * bmap for free of source data extent), and update bmap for 3085 * free of the source data extent; 3086 */ 3087 if (xtype == DATAEXT) 3088 tlck = txMaplock(tid, ip, tlckMAP); 3089 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 3090 * for the source xtpage (logredo() will init NoRedoPage 3091 * filter and will also update bmap for free of the source 3092 * xtpage), and update bmap for free of the source xtpage; 3093 * N.B. We use tlckMAP instead of tlkcXTREE because there 3094 * is no buffer associated with this lock since the buffer 3095 * has been redirected to the target location. 3096 */ 3097 else /* (xtype == XTPAGE) */ 3098 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 3099 3100 pxdlock = (struct pxd_lock *) & tlck->lock; 3101 pxdlock->flag = mlckFREEPXD; 3102 PXDaddress(&pxdlock->pxd, oxaddr); 3103 PXDlength(&pxdlock->pxd, xlen); 3104 pxdlock->index = 1; 3105 3106 /* 3107 * 4. update the parent xad entry for relocation; 3108 * 3109 * acquire tlck for the parent entry with XAD_NEW as entry 3110 * update which will write LOG_REDOPAGE and update bmap for 3111 * allocation of XAD_NEW destination extent; 3112 */ 3113 jfs_info("xtRelocate: update parent xad entry."); 3114 BT_MARK_DIRTY(pmp, ip); 3115 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 3116 xtlck = (struct xtlock *) & tlck->lock; 3117 3118 /* update the XAD with the new destination extent; */ 3119 xad = &pp->xad[index]; 3120 xad->flag |= XAD_NEW; 3121 XADaddress(xad, nxaddr); 3122 3123 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 3124 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 3125 xtlck->lwm.offset; 3126 3127 /* unpin the parent xtpage */ 3128 XT_PUTPAGE(pmp); 3129 3130 return rc; 3131 } 3132 3133 3134 /* 3135 * xtSearchNode() 3136 * 3137 * function: search for the internal xad entry covering specified extent. 3138 * This function is mainly used by defragfs utility. 3139 * 3140 * parameters: 3141 * ip - file object; 3142 * xad - extent to find; 3143 * cmpp - comparison result: 3144 * btstack - traverse stack; 3145 * flag - search process flag; 3146 * 3147 * returns: 3148 * btstack contains (bn, index) of search path traversed to the entry. 3149 * *cmpp is set to result of comparison with the entry returned. 3150 * the page containing the entry is pinned at exit. 3151 */ 3152 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 3153 int *cmpp, struct btstack * btstack, int flag) 3154 { 3155 int rc = 0; 3156 s64 xoff, xaddr; 3157 int xlen; 3158 int cmp = 1; /* init for empty page */ 3159 s64 bn; /* block number */ 3160 struct metapage *mp; /* meta-page buffer */ 3161 xtpage_t *p; /* page */ 3162 int base, index, lim; 3163 struct btframe *btsp; 3164 s64 t64; 3165 3166 BT_CLR(btstack); 3167 3168 xoff = offsetXAD(xad); 3169 xlen = lengthXAD(xad); 3170 xaddr = addressXAD(xad); 3171 3172 /* 3173 * search down tree from root: 3174 * 3175 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 3176 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 3177 * 3178 * if entry with search key K is not found 3179 * internal page search find the entry with largest key Ki 3180 * less than K which point to the child page to search; 3181 * leaf page search find the entry with smallest key Kj 3182 * greater than K so that the returned index is the position of 3183 * the entry to be shifted right for insertion of new entry. 3184 * for empty tree, search key is greater than any key of the tree. 3185 * 3186 * by convention, root bn = 0. 3187 */ 3188 for (bn = 0;;) { 3189 /* get/pin the page to search */ 3190 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3191 if (rc) 3192 return rc; 3193 if (p->header.flag & BT_LEAF) { 3194 XT_PUTPAGE(mp); 3195 return -ESTALE; 3196 } 3197 3198 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3199 3200 /* 3201 * binary search with search key K on the current page 3202 */ 3203 for (base = XTENTRYSTART; lim; lim >>= 1) { 3204 index = base + (lim >> 1); 3205 3206 XT_CMP(cmp, xoff, &p->xad[index], t64); 3207 if (cmp == 0) { 3208 /* 3209 * search hit 3210 * 3211 * verify for exact match; 3212 */ 3213 if (xaddr == addressXAD(&p->xad[index]) && 3214 xoff == offsetXAD(&p->xad[index])) { 3215 *cmpp = cmp; 3216 3217 /* save search result */ 3218 btsp = btstack->top; 3219 btsp->bn = bn; 3220 btsp->index = index; 3221 btsp->mp = mp; 3222 3223 return 0; 3224 } 3225 3226 /* descend/search its child page */ 3227 goto next; 3228 } 3229 3230 if (cmp > 0) { 3231 base = index + 1; 3232 --lim; 3233 } 3234 } 3235 3236 /* 3237 * search miss - non-leaf page: 3238 * 3239 * base is the smallest index with key (Kj) greater than 3240 * search key (K) and may be zero or maxentry index. 3241 * if base is non-zero, decrement base by one to get the parent 3242 * entry of the child page to search. 3243 */ 3244 index = base ? base - 1 : base; 3245 3246 /* 3247 * go down to child page 3248 */ 3249 next: 3250 /* get the child page block number */ 3251 bn = addressXAD(&p->xad[index]); 3252 3253 /* unpin the parent page */ 3254 XT_PUTPAGE(mp); 3255 } 3256 } 3257 3258 3259 /* 3260 * xtRelink() 3261 * 3262 * function: 3263 * link around a freed page. 3264 * 3265 * Parameter: 3266 * int tid, 3267 * struct inode *ip, 3268 * xtpage_t *p) 3269 * 3270 * returns: 3271 */ 3272 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 3273 { 3274 int rc = 0; 3275 struct metapage *mp; 3276 s64 nextbn, prevbn; 3277 struct tlock *tlck; 3278 3279 nextbn = le64_to_cpu(p->header.next); 3280 prevbn = le64_to_cpu(p->header.prev); 3281 3282 /* update prev pointer of the next page */ 3283 if (nextbn != 0) { 3284 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3285 if (rc) 3286 return rc; 3287 3288 /* 3289 * acquire a transaction lock on the page; 3290 * 3291 * action: update prev pointer; 3292 */ 3293 BT_MARK_DIRTY(mp, ip); 3294 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3295 3296 /* the page may already have been tlock'd */ 3297 3298 p->header.prev = cpu_to_le64(prevbn); 3299 3300 XT_PUTPAGE(mp); 3301 } 3302 3303 /* update next pointer of the previous page */ 3304 if (prevbn != 0) { 3305 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3306 if (rc) 3307 return rc; 3308 3309 /* 3310 * acquire a transaction lock on the page; 3311 * 3312 * action: update next pointer; 3313 */ 3314 BT_MARK_DIRTY(mp, ip); 3315 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3316 3317 /* the page may already have been tlock'd */ 3318 3319 p->header.next = le64_to_cpu(nextbn); 3320 3321 XT_PUTPAGE(mp); 3322 } 3323 3324 return 0; 3325 } 3326 #endif /* _STILL_TO_PORT */ 3327 3328 3329 /* 3330 * xtInitRoot() 3331 * 3332 * initialize file root (inline in inode) 3333 */ 3334 void xtInitRoot(tid_t tid, struct inode *ip) 3335 { 3336 xtpage_t *p; 3337 3338 /* 3339 * acquire a transaction lock on the root 3340 * 3341 * action: 3342 */ 3343 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3344 tlckXTREE | tlckNEW); 3345 p = &JFS_IP(ip)->i_xtroot; 3346 3347 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3348 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3349 3350 if (S_ISDIR(ip->i_mode)) 3351 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3352 else { 3353 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3354 ip->i_size = 0; 3355 } 3356 3357 3358 return; 3359 } 3360 3361 3362 /* 3363 * We can run into a deadlock truncating a file with a large number of 3364 * xtree pages (large fragmented file). A robust fix would entail a 3365 * reservation system where we would reserve a number of metadata pages 3366 * and tlocks which we would be guaranteed without a deadlock. Without 3367 * this, a partial fix is to limit number of metadata pages we will lock 3368 * in a single transaction. Currently we will truncate the file so that 3369 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3370 * will be responsible for ensuring that the current transaction gets 3371 * committed, and that subsequent transactions are created to truncate 3372 * the file further if needed. 3373 */ 3374 #define MAX_TRUNCATE_LEAVES 50 3375 3376 /* 3377 * xtTruncate() 3378 * 3379 * function: 3380 * traverse for truncation logging backward bottom up; 3381 * terminate at the last extent entry at the current subtree 3382 * root page covering new down size. 3383 * truncation may occur within the last extent entry. 3384 * 3385 * parameter: 3386 * int tid, 3387 * struct inode *ip, 3388 * s64 newsize, 3389 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3390 * 3391 * return: 3392 * 3393 * note: 3394 * PWMAP: 3395 * 1. truncate (non-COMMIT_NOLINK file) 3396 * by jfs_truncate() or jfs_open(O_TRUNC): 3397 * xtree is updated; 3398 * 2. truncate index table of directory when last entry removed 3399 * map update via tlock at commit time; 3400 * PMAP: 3401 * Call xtTruncate_pmap instead 3402 * WMAP: 3403 * 1. remove (free zero link count) on last reference release 3404 * (pmap has been freed at commit zero link count); 3405 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3406 * xtree is updated; 3407 * map update directly at truncation time; 3408 * 3409 * if (DELETE) 3410 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3411 * else if (TRUNCATE) 3412 * must write LOG_NOREDOPAGE for deleted index page; 3413 * 3414 * pages may already have been tlocked by anonymous transactions 3415 * during file growth (i.e., write) before truncation; 3416 * 3417 * except last truncated entry, deleted entries remains as is 3418 * in the page (nextindex is updated) for other use 3419 * (e.g., log/update allocation map): this avoid copying the page 3420 * info but delay free of pages; 3421 * 3422 */ 3423 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3424 { 3425 int rc = 0; 3426 s64 teof; 3427 struct metapage *mp; 3428 xtpage_t *p; 3429 s64 bn; 3430 int index, nextindex; 3431 xad_t *xad; 3432 s64 xoff, xaddr; 3433 int xlen, len, freexlen; 3434 struct btstack btstack; 3435 struct btframe *parent; 3436 struct tblock *tblk = NULL; 3437 struct tlock *tlck = NULL; 3438 struct xtlock *xtlck = NULL; 3439 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3440 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3441 s64 nfreed; 3442 int freed, log; 3443 int locked_leaves = 0; 3444 3445 /* save object truncation type */ 3446 if (tid) { 3447 tblk = tid_to_tblock(tid); 3448 tblk->xflag |= flag; 3449 } 3450 3451 nfreed = 0; 3452 3453 flag &= COMMIT_MAP; 3454 assert(flag != COMMIT_PMAP); 3455 3456 if (flag == COMMIT_PWMAP) 3457 log = 1; 3458 else { 3459 log = 0; 3460 xadlock.flag = mlckFREEXADLIST; 3461 xadlock.index = 1; 3462 } 3463 3464 /* 3465 * if the newsize is not an integral number of pages, 3466 * the file between newsize and next page boundary will 3467 * be cleared. 3468 * if truncating into a file hole, it will cause 3469 * a full block to be allocated for the logical block. 3470 */ 3471 3472 /* 3473 * release page blocks of truncated region <teof, eof> 3474 * 3475 * free the data blocks from the leaf index blocks. 3476 * delete the parent index entries corresponding to 3477 * the freed child data/index blocks. 3478 * free the index blocks themselves which aren't needed 3479 * in new sized file. 3480 * 3481 * index blocks are updated only if the blocks are to be 3482 * retained in the new sized file. 3483 * if type is PMAP, the data and index pages are NOT 3484 * freed, and the data and index blocks are NOT freed 3485 * from working map. 3486 * (this will allow continued access of data/index of 3487 * temporary file (zerolink count file truncated to zero-length)). 3488 */ 3489 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3490 JFS_SBI(ip->i_sb)->l2bsize; 3491 3492 /* clear stack */ 3493 BT_CLR(&btstack); 3494 3495 /* 3496 * start with root 3497 * 3498 * root resides in the inode 3499 */ 3500 bn = 0; 3501 3502 /* 3503 * first access of each page: 3504 */ 3505 getPage: 3506 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3507 if (rc) 3508 return rc; 3509 3510 /* process entries backward from last index */ 3511 index = le16_to_cpu(p->header.nextindex) - 1; 3512 3513 3514 /* Since this is the rightmost page at this level, and we may have 3515 * already freed a page that was formerly to the right, let's make 3516 * sure that the next pointer is zero. 3517 */ 3518 if (p->header.next) { 3519 if (log) 3520 /* 3521 * Make sure this change to the header is logged. 3522 * If we really truncate this leaf, the flag 3523 * will be changed to tlckTRUNCATE 3524 */ 3525 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 3526 BT_MARK_DIRTY(mp, ip); 3527 p->header.next = 0; 3528 } 3529 3530 if (p->header.flag & BT_INTERNAL) 3531 goto getChild; 3532 3533 /* 3534 * leaf page 3535 */ 3536 freed = 0; 3537 3538 /* does region covered by leaf page precede Teof ? */ 3539 xad = &p->xad[index]; 3540 xoff = offsetXAD(xad); 3541 xlen = lengthXAD(xad); 3542 if (teof >= xoff + xlen) { 3543 XT_PUTPAGE(mp); 3544 goto getParent; 3545 } 3546 3547 /* (re)acquire tlock of the leaf page */ 3548 if (log) { 3549 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3550 /* 3551 * We need to limit the size of the transaction 3552 * to avoid exhausting pagecache & tlocks 3553 */ 3554 XT_PUTPAGE(mp); 3555 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3556 goto getParent; 3557 } 3558 tlck = txLock(tid, ip, mp, tlckXTREE); 3559 tlck->type = tlckXTREE | tlckTRUNCATE; 3560 xtlck = (struct xtlock *) & tlck->lock; 3561 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3562 } 3563 BT_MARK_DIRTY(mp, ip); 3564 3565 /* 3566 * scan backward leaf page entries 3567 */ 3568 for (; index >= XTENTRYSTART; index--) { 3569 xad = &p->xad[index]; 3570 xoff = offsetXAD(xad); 3571 xlen = lengthXAD(xad); 3572 xaddr = addressXAD(xad); 3573 3574 /* 3575 * The "data" for a directory is indexed by the block 3576 * device's address space. This metadata must be invalidated 3577 * here 3578 */ 3579 if (S_ISDIR(ip->i_mode) && (teof == 0)) 3580 invalidate_xad_metapages(ip, *xad); 3581 /* 3582 * entry beyond eof: continue scan of current page 3583 * xad 3584 * ---|---=======-------> 3585 * eof 3586 */ 3587 if (teof < xoff) { 3588 nfreed += xlen; 3589 continue; 3590 } 3591 3592 /* 3593 * (xoff <= teof): last entry to be deleted from page; 3594 * If other entries remain in page: keep and update the page. 3595 */ 3596 3597 /* 3598 * eof == entry_start: delete the entry 3599 * xad 3600 * -------|=======-------> 3601 * eof 3602 * 3603 */ 3604 if (teof == xoff) { 3605 nfreed += xlen; 3606 3607 if (index == XTENTRYSTART) 3608 break; 3609 3610 nextindex = index; 3611 } 3612 /* 3613 * eof within the entry: truncate the entry. 3614 * xad 3615 * -------===|===-------> 3616 * eof 3617 */ 3618 else if (teof < xoff + xlen) { 3619 /* update truncated entry */ 3620 len = teof - xoff; 3621 freexlen = xlen - len; 3622 XADlength(xad, len); 3623 3624 /* save pxd of truncated extent in tlck */ 3625 xaddr += len; 3626 if (log) { /* COMMIT_PWMAP */ 3627 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3628 min(index, (int)xtlck->lwm.offset) : index; 3629 xtlck->lwm.length = index + 1 - 3630 xtlck->lwm.offset; 3631 xtlck->twm.offset = index; 3632 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3633 pxdlock->flag = mlckFREEPXD; 3634 PXDaddress(&pxdlock->pxd, xaddr); 3635 PXDlength(&pxdlock->pxd, freexlen); 3636 } 3637 /* free truncated extent */ 3638 else { /* COMMIT_WMAP */ 3639 3640 pxdlock = (struct pxd_lock *) & xadlock; 3641 pxdlock->flag = mlckFREEPXD; 3642 PXDaddress(&pxdlock->pxd, xaddr); 3643 PXDlength(&pxdlock->pxd, freexlen); 3644 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); 3645 3646 /* reset map lock */ 3647 xadlock.flag = mlckFREEXADLIST; 3648 } 3649 3650 /* current entry is new last entry; */ 3651 nextindex = index + 1; 3652 3653 nfreed += freexlen; 3654 } 3655 /* 3656 * eof beyond the entry: 3657 * xad 3658 * -------=======---|---> 3659 * eof 3660 */ 3661 else { /* (xoff + xlen < teof) */ 3662 3663 nextindex = index + 1; 3664 } 3665 3666 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3667 if (!log) { /* COMMIT_WAMP */ 3668 xadlock.xdlist = &p->xad[nextindex]; 3669 xadlock.count = 3670 le16_to_cpu(p->header.nextindex) - 3671 nextindex; 3672 txFreeMap(ip, (struct maplock *) & xadlock, 3673 NULL, COMMIT_WMAP); 3674 } 3675 p->header.nextindex = cpu_to_le16(nextindex); 3676 } 3677 3678 XT_PUTPAGE(mp); 3679 3680 /* assert(freed == 0); */ 3681 goto getParent; 3682 } /* end scan of leaf page entries */ 3683 3684 freed = 1; 3685 3686 /* 3687 * leaf page become empty: free the page if type != PMAP 3688 */ 3689 if (log) { /* COMMIT_PWMAP */ 3690 /* txCommit() with tlckFREE: 3691 * free data extents covered by leaf [XTENTRYSTART:hwm); 3692 * invalidate leaf if COMMIT_PWMAP; 3693 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3694 */ 3695 tlck->type = tlckXTREE | tlckFREE; 3696 } else { /* COMMIT_WAMP */ 3697 3698 /* free data extents covered by leaf */ 3699 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3700 xadlock.count = 3701 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3702 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); 3703 } 3704 3705 if (p->header.flag & BT_ROOT) { 3706 p->header.flag &= ~BT_INTERNAL; 3707 p->header.flag |= BT_LEAF; 3708 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3709 3710 XT_PUTPAGE(mp); /* debug */ 3711 goto out; 3712 } else { 3713 if (log) { /* COMMIT_PWMAP */ 3714 /* page will be invalidated at tx completion 3715 */ 3716 XT_PUTPAGE(mp); 3717 } else { /* COMMIT_WMAP */ 3718 3719 if (mp->lid) 3720 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3721 3722 /* invalidate empty leaf page */ 3723 discard_metapage(mp); 3724 } 3725 } 3726 3727 /* 3728 * the leaf page become empty: delete the parent entry 3729 * for the leaf page if the parent page is to be kept 3730 * in the new sized file. 3731 */ 3732 3733 /* 3734 * go back up to the parent page 3735 */ 3736 getParent: 3737 /* pop/restore parent entry for the current child page */ 3738 if ((parent = BT_POP(&btstack)) == NULL) 3739 /* current page must have been root */ 3740 goto out; 3741 3742 /* get back the parent page */ 3743 bn = parent->bn; 3744 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3745 if (rc) 3746 return rc; 3747 3748 index = parent->index; 3749 3750 /* 3751 * child page was not empty: 3752 */ 3753 if (freed == 0) { 3754 /* has any entry deleted from parent ? */ 3755 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3756 /* (re)acquire tlock on the parent page */ 3757 if (log) { /* COMMIT_PWMAP */ 3758 /* txCommit() with tlckTRUNCATE: 3759 * free child extents covered by parent [); 3760 */ 3761 tlck = txLock(tid, ip, mp, tlckXTREE); 3762 xtlck = (struct xtlock *) & tlck->lock; 3763 if (!(tlck->type & tlckTRUNCATE)) { 3764 xtlck->hwm.offset = 3765 le16_to_cpu(p->header. 3766 nextindex) - 1; 3767 tlck->type = 3768 tlckXTREE | tlckTRUNCATE; 3769 } 3770 } else { /* COMMIT_WMAP */ 3771 3772 /* free child extents covered by parent */ 3773 xadlock.xdlist = &p->xad[index + 1]; 3774 xadlock.count = 3775 le16_to_cpu(p->header.nextindex) - 3776 index - 1; 3777 txFreeMap(ip, (struct maplock *) & xadlock, 3778 NULL, COMMIT_WMAP); 3779 } 3780 BT_MARK_DIRTY(mp, ip); 3781 3782 p->header.nextindex = cpu_to_le16(index + 1); 3783 } 3784 XT_PUTPAGE(mp); 3785 goto getParent; 3786 } 3787 3788 /* 3789 * child page was empty: 3790 */ 3791 nfreed += lengthXAD(&p->xad[index]); 3792 3793 /* 3794 * During working map update, child page's tlock must be handled 3795 * before parent's. This is because the parent's tlock will cause 3796 * the child's disk space to be marked available in the wmap, so 3797 * it's important that the child page be released by that time. 3798 * 3799 * ToDo: tlocks should be on doubly-linked list, so we can 3800 * quickly remove it and add it to the end. 3801 */ 3802 3803 /* 3804 * Move parent page's tlock to the end of the tid's tlock list 3805 */ 3806 if (log && mp->lid && (tblk->last != mp->lid) && 3807 lid_to_tlock(mp->lid)->tid) { 3808 lid_t lid = mp->lid; 3809 struct tlock *prev; 3810 3811 tlck = lid_to_tlock(lid); 3812 3813 if (tblk->next == lid) 3814 tblk->next = tlck->next; 3815 else { 3816 for (prev = lid_to_tlock(tblk->next); 3817 prev->next != lid; 3818 prev = lid_to_tlock(prev->next)) { 3819 assert(prev->next); 3820 } 3821 prev->next = tlck->next; 3822 } 3823 lid_to_tlock(tblk->last)->next = lid; 3824 tlck->next = 0; 3825 tblk->last = lid; 3826 } 3827 3828 /* 3829 * parent page become empty: free the page 3830 */ 3831 if (index == XTENTRYSTART) { 3832 if (log) { /* COMMIT_PWMAP */ 3833 /* txCommit() with tlckFREE: 3834 * free child extents covered by parent; 3835 * invalidate parent if COMMIT_PWMAP; 3836 */ 3837 tlck = txLock(tid, ip, mp, tlckXTREE); 3838 xtlck = (struct xtlock *) & tlck->lock; 3839 xtlck->hwm.offset = 3840 le16_to_cpu(p->header.nextindex) - 1; 3841 tlck->type = tlckXTREE | tlckFREE; 3842 } else { /* COMMIT_WMAP */ 3843 3844 /* free child extents covered by parent */ 3845 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3846 xadlock.count = 3847 le16_to_cpu(p->header.nextindex) - 3848 XTENTRYSTART; 3849 txFreeMap(ip, (struct maplock *) & xadlock, NULL, 3850 COMMIT_WMAP); 3851 } 3852 BT_MARK_DIRTY(mp, ip); 3853 3854 if (p->header.flag & BT_ROOT) { 3855 p->header.flag &= ~BT_INTERNAL; 3856 p->header.flag |= BT_LEAF; 3857 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3858 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3859 /* 3860 * Shrink root down to allow inline 3861 * EA (otherwise fsck complains) 3862 */ 3863 p->header.maxentry = 3864 cpu_to_le16(XTROOTINITSLOT); 3865 JFS_IP(ip)->mode2 |= INLINEEA; 3866 } 3867 3868 XT_PUTPAGE(mp); /* debug */ 3869 goto out; 3870 } else { 3871 if (log) { /* COMMIT_PWMAP */ 3872 /* page will be invalidated at tx completion 3873 */ 3874 XT_PUTPAGE(mp); 3875 } else { /* COMMIT_WMAP */ 3876 3877 if (mp->lid) 3878 lid_to_tlock(mp->lid)->flag |= 3879 tlckFREELOCK; 3880 3881 /* invalidate parent page */ 3882 discard_metapage(mp); 3883 } 3884 3885 /* parent has become empty and freed: 3886 * go back up to its parent page 3887 */ 3888 /* freed = 1; */ 3889 goto getParent; 3890 } 3891 } 3892 /* 3893 * parent page still has entries for front region; 3894 */ 3895 else { 3896 /* try truncate region covered by preceding entry 3897 * (process backward) 3898 */ 3899 index--; 3900 3901 /* go back down to the child page corresponding 3902 * to the entry 3903 */ 3904 goto getChild; 3905 } 3906 3907 /* 3908 * internal page: go down to child page of current entry 3909 */ 3910 getChild: 3911 /* save current parent entry for the child page */ 3912 if (BT_STACK_FULL(&btstack)) { 3913 jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); 3914 XT_PUTPAGE(mp); 3915 return -EIO; 3916 } 3917 BT_PUSH(&btstack, bn, index); 3918 3919 /* get child page */ 3920 xad = &p->xad[index]; 3921 bn = addressXAD(xad); 3922 3923 /* 3924 * first access of each internal entry: 3925 */ 3926 /* release parent page */ 3927 XT_PUTPAGE(mp); 3928 3929 /* process the child page */ 3930 goto getPage; 3931 3932 out: 3933 /* 3934 * update file resource stat 3935 */ 3936 /* set size 3937 */ 3938 if (S_ISDIR(ip->i_mode) && !newsize) 3939 ip->i_size = 1; /* fsck hates zero-length directories */ 3940 else 3941 ip->i_size = newsize; 3942 3943 /* update quota allocation to reflect freed blocks */ 3944 DQUOT_FREE_BLOCK(ip, nfreed); 3945 3946 /* 3947 * free tlock of invalidated pages 3948 */ 3949 if (flag == COMMIT_WMAP) 3950 txFreelock(ip); 3951 3952 return newsize; 3953 } 3954 3955 3956 /* 3957 * xtTruncate_pmap() 3958 * 3959 * function: 3960 * Perform truncate to zero length for deleted file, leaving the 3961 * the xtree and working map untouched. This allows the file to 3962 * be accessed via open file handles, while the delete of the file 3963 * is committed to disk. 3964 * 3965 * parameter: 3966 * tid_t tid, 3967 * struct inode *ip, 3968 * s64 committed_size) 3969 * 3970 * return: new committed size 3971 * 3972 * note: 3973 * 3974 * To avoid deadlock by holding too many transaction locks, the 3975 * truncation may be broken up into multiple transactions. 3976 * The committed_size keeps track of part of the file has been 3977 * freed from the pmaps. 3978 */ 3979 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3980 { 3981 s64 bn; 3982 struct btstack btstack; 3983 int cmp; 3984 int index; 3985 int locked_leaves = 0; 3986 struct metapage *mp; 3987 xtpage_t *p; 3988 struct btframe *parent; 3989 int rc; 3990 struct tblock *tblk; 3991 struct tlock *tlck = NULL; 3992 xad_t *xad; 3993 int xlen; 3994 s64 xoff; 3995 struct xtlock *xtlck = NULL; 3996 3997 /* save object truncation type */ 3998 tblk = tid_to_tblock(tid); 3999 tblk->xflag |= COMMIT_PMAP; 4000 4001 /* clear stack */ 4002 BT_CLR(&btstack); 4003 4004 if (committed_size) { 4005 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 4006 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 4007 if (rc) 4008 return rc; 4009 4010 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4011 4012 if (cmp != 0) { 4013 XT_PUTPAGE(mp); 4014 jfs_error(ip->i_sb, 4015 "xtTruncate_pmap: did not find extent"); 4016 return -EIO; 4017 } 4018 } else { 4019 /* 4020 * start with root 4021 * 4022 * root resides in the inode 4023 */ 4024 bn = 0; 4025 4026 /* 4027 * first access of each page: 4028 */ 4029 getPage: 4030 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4031 if (rc) 4032 return rc; 4033 4034 /* process entries backward from last index */ 4035 index = le16_to_cpu(p->header.nextindex) - 1; 4036 4037 if (p->header.flag & BT_INTERNAL) 4038 goto getChild; 4039 } 4040 4041 /* 4042 * leaf page 4043 */ 4044 4045 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 4046 /* 4047 * We need to limit the size of the transaction 4048 * to avoid exhausting pagecache & tlocks 4049 */ 4050 xad = &p->xad[index]; 4051 xoff = offsetXAD(xad); 4052 xlen = lengthXAD(xad); 4053 XT_PUTPAGE(mp); 4054 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 4055 } 4056 tlck = txLock(tid, ip, mp, tlckXTREE); 4057 tlck->type = tlckXTREE | tlckFREE; 4058 xtlck = (struct xtlock *) & tlck->lock; 4059 xtlck->hwm.offset = index; 4060 4061 4062 XT_PUTPAGE(mp); 4063 4064 /* 4065 * go back up to the parent page 4066 */ 4067 getParent: 4068 /* pop/restore parent entry for the current child page */ 4069 if ((parent = BT_POP(&btstack)) == NULL) 4070 /* current page must have been root */ 4071 goto out; 4072 4073 /* get back the parent page */ 4074 bn = parent->bn; 4075 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4076 if (rc) 4077 return rc; 4078 4079 index = parent->index; 4080 4081 /* 4082 * parent page become empty: free the page 4083 */ 4084 if (index == XTENTRYSTART) { 4085 /* txCommit() with tlckFREE: 4086 * free child extents covered by parent; 4087 * invalidate parent if COMMIT_PWMAP; 4088 */ 4089 tlck = txLock(tid, ip, mp, tlckXTREE); 4090 xtlck = (struct xtlock *) & tlck->lock; 4091 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 4092 tlck->type = tlckXTREE | tlckFREE; 4093 4094 XT_PUTPAGE(mp); 4095 4096 if (p->header.flag & BT_ROOT) { 4097 4098 goto out; 4099 } else { 4100 goto getParent; 4101 } 4102 } 4103 /* 4104 * parent page still has entries for front region; 4105 */ 4106 else 4107 index--; 4108 /* 4109 * internal page: go down to child page of current entry 4110 */ 4111 getChild: 4112 /* save current parent entry for the child page */ 4113 if (BT_STACK_FULL(&btstack)) { 4114 jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); 4115 XT_PUTPAGE(mp); 4116 return -EIO; 4117 } 4118 BT_PUSH(&btstack, bn, index); 4119 4120 /* get child page */ 4121 xad = &p->xad[index]; 4122 bn = addressXAD(xad); 4123 4124 /* 4125 * first access of each internal entry: 4126 */ 4127 /* release parent page */ 4128 XT_PUTPAGE(mp); 4129 4130 /* process the child page */ 4131 goto getPage; 4132 4133 out: 4134 4135 return 0; 4136 } 4137 4138 #ifdef CONFIG_JFS_STATISTICS 4139 static int jfs_xtstat_proc_show(struct seq_file *m, void *v) 4140 { 4141 seq_printf(m, 4142 "JFS Xtree statistics\n" 4143 "====================\n" 4144 "searches = %d\n" 4145 "fast searches = %d\n" 4146 "splits = %d\n", 4147 xtStat.search, 4148 xtStat.fastSearch, 4149 xtStat.split); 4150 return 0; 4151 } 4152 4153 static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) 4154 { 4155 return single_open(file, jfs_xtstat_proc_show, NULL); 4156 } 4157 4158 const struct file_operations jfs_xtstat_proc_fops = { 4159 .owner = THIS_MODULE, 4160 .open = jfs_xtstat_proc_open, 4161 .read = seq_read, 4162 .llseek = seq_lseek, 4163 .release = single_release, 4164 }; 4165 #endif 4166