1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2005 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 /* 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 20 */ 21 22 #include <linux/fs.h> 23 #include <linux/module.h> 24 #include <linux/quotaops.h> 25 #include <linux/seq_file.h> 26 #include "jfs_incore.h" 27 #include "jfs_filsys.h" 28 #include "jfs_metapage.h" 29 #include "jfs_dmap.h" 30 #include "jfs_dinode.h" 31 #include "jfs_superblock.h" 32 #include "jfs_debug.h" 33 34 /* 35 * xtree local flag 36 */ 37 #define XT_INSERT 0x00000001 38 39 /* 40 * xtree key/entry comparison: extent offset 41 * 42 * return: 43 * -1: k < start of extent 44 * 0: start_of_extent <= k <= end_of_extent 45 * 1: k > end_of_extent 46 */ 47 #define XT_CMP(CMP, K, X, OFFSET64)\ 48 {\ 49 OFFSET64 = offsetXAD(X);\ 50 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 51 ((K) < OFFSET64) ? -1 : 0;\ 52 } 53 54 /* write a xad entry */ 55 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 56 {\ 57 (XAD)->flag = (FLAG);\ 58 XADoffset((XAD), (OFF));\ 59 XADlength((XAD), (LEN));\ 60 XADaddress((XAD), (ADDR));\ 61 } 62 63 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 64 65 /* get page buffer for specified block address */ 66 /* ToDo: Replace this ugly macro with a function */ 67 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ 68 do { \ 69 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot); \ 70 if (!(RC)) { \ 71 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \ 72 (le16_to_cpu((P)->header.nextindex) > \ 73 le16_to_cpu((P)->header.maxentry)) || \ 74 (le16_to_cpu((P)->header.maxentry) > \ 75 (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \ 76 jfs_error((IP)->i_sb, \ 77 "XT_GETPAGE: xtree page corrupt\n"); \ 78 BT_PUTPAGE(MP); \ 79 MP = NULL; \ 80 RC = -EIO; \ 81 } \ 82 } \ 83 } while (0) 84 85 /* for consistency */ 86 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 87 88 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 89 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 90 /* xtree entry parameter descriptor */ 91 struct xtsplit { 92 struct metapage *mp; 93 s16 index; 94 u8 flag; 95 s64 off; 96 s64 addr; 97 int len; 98 struct pxdlist *pxdlist; 99 }; 100 101 102 /* 103 * statistics 104 */ 105 #ifdef CONFIG_JFS_STATISTICS 106 static struct { 107 uint search; 108 uint fastSearch; 109 uint split; 110 } xtStat; 111 #endif 112 113 114 /* 115 * forward references 116 */ 117 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, 118 struct btstack * btstack, int flag); 119 120 static int xtSplitUp(tid_t tid, 121 struct inode *ip, 122 struct xtsplit * split, struct btstack * btstack); 123 124 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 125 struct metapage ** rmpp, s64 * rbnp); 126 127 static int xtSplitRoot(tid_t tid, struct inode *ip, 128 struct xtsplit * split, struct metapage ** rmpp); 129 130 #ifdef _STILL_TO_PORT 131 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 132 xtpage_t * fp, struct btstack * btstack); 133 134 static int xtSearchNode(struct inode *ip, 135 xad_t * xad, 136 int *cmpp, struct btstack * btstack, int flag); 137 138 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 139 #endif /* _STILL_TO_PORT */ 140 141 /* 142 * xtLookup() 143 * 144 * function: map a single page into a physical extent; 145 */ 146 int xtLookup(struct inode *ip, s64 lstart, 147 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 148 { 149 int rc = 0; 150 struct btstack btstack; 151 int cmp; 152 s64 bn; 153 struct metapage *mp; 154 xtpage_t *p; 155 int index; 156 xad_t *xad; 157 s64 next, size, xoff, xend; 158 int xlen; 159 s64 xaddr; 160 161 *paddr = 0; 162 *plen = llen; 163 164 if (!no_check) { 165 /* is lookup offset beyond eof ? */ 166 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 167 JFS_SBI(ip->i_sb)->l2bsize; 168 if (lstart >= size) 169 return 0; 170 } 171 172 /* 173 * search for the xad entry covering the logical extent 174 */ 175 //search: 176 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { 177 jfs_err("xtLookup: xtSearch returned %d", rc); 178 return rc; 179 } 180 181 /* 182 * compute the physical extent covering logical extent 183 * 184 * N.B. search may have failed (e.g., hole in sparse file), 185 * and returned the index of the next entry. 186 */ 187 /* retrieve search result */ 188 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 189 190 /* is xad found covering start of logical extent ? 191 * lstart is a page start address, 192 * i.e., lstart cannot start in a hole; 193 */ 194 if (cmp) { 195 if (next) 196 *plen = min(next - lstart, llen); 197 goto out; 198 } 199 200 /* 201 * lxd covered by xad 202 */ 203 xad = &p->xad[index]; 204 xoff = offsetXAD(xad); 205 xlen = lengthXAD(xad); 206 xend = xoff + xlen; 207 xaddr = addressXAD(xad); 208 209 /* initialize new pxd */ 210 *pflag = xad->flag; 211 *paddr = xaddr + (lstart - xoff); 212 /* a page must be fully covered by an xad */ 213 *plen = min(xend - lstart, llen); 214 215 out: 216 XT_PUTPAGE(mp); 217 218 return rc; 219 } 220 221 /* 222 * xtSearch() 223 * 224 * function: search for the xad entry covering specified offset. 225 * 226 * parameters: 227 * ip - file object; 228 * xoff - extent offset; 229 * nextp - address of next extent (if any) for search miss 230 * cmpp - comparison result: 231 * btstack - traverse stack; 232 * flag - search process flag (XT_INSERT); 233 * 234 * returns: 235 * btstack contains (bn, index) of search path traversed to the entry. 236 * *cmpp is set to result of comparison with the entry returned. 237 * the page containing the entry is pinned at exit. 238 */ 239 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 240 int *cmpp, struct btstack * btstack, int flag) 241 { 242 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 243 int rc = 0; 244 int cmp = 1; /* init for empty page */ 245 s64 bn; /* block number */ 246 struct metapage *mp; /* page buffer */ 247 xtpage_t *p; /* page */ 248 xad_t *xad; 249 int base, index, lim, btindex; 250 struct btframe *btsp; 251 int nsplit = 0; /* number of pages to split */ 252 s64 t64; 253 s64 next = 0; 254 255 INCREMENT(xtStat.search); 256 257 BT_CLR(btstack); 258 259 btstack->nsplit = 0; 260 261 /* 262 * search down tree from root: 263 * 264 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 265 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 266 * 267 * if entry with search key K is not found 268 * internal page search find the entry with largest key Ki 269 * less than K which point to the child page to search; 270 * leaf page search find the entry with smallest key Kj 271 * greater than K so that the returned index is the position of 272 * the entry to be shifted right for insertion of new entry. 273 * for empty tree, search key is greater than any key of the tree. 274 * 275 * by convention, root bn = 0. 276 */ 277 for (bn = 0;;) { 278 /* get/pin the page to search */ 279 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 280 if (rc) 281 return rc; 282 283 /* try sequential access heuristics with the previous 284 * access entry in target leaf page: 285 * once search narrowed down into the target leaf, 286 * key must either match an entry in the leaf or 287 * key entry does not exist in the tree; 288 */ 289 //fastSearch: 290 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 291 (p->header.flag & BT_LEAF) && 292 (index = jfs_ip->btindex) < 293 le16_to_cpu(p->header.nextindex)) { 294 xad = &p->xad[index]; 295 t64 = offsetXAD(xad); 296 if (xoff < t64 + lengthXAD(xad)) { 297 if (xoff >= t64) { 298 *cmpp = 0; 299 goto out; 300 } 301 302 /* stop sequential access heuristics */ 303 goto binarySearch; 304 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 305 306 /* try next sequential entry */ 307 index++; 308 if (index < 309 le16_to_cpu(p->header.nextindex)) { 310 xad++; 311 t64 = offsetXAD(xad); 312 if (xoff < t64 + lengthXAD(xad)) { 313 if (xoff >= t64) { 314 *cmpp = 0; 315 goto out; 316 } 317 318 /* miss: key falls between 319 * previous and this entry 320 */ 321 *cmpp = 1; 322 next = t64; 323 goto out; 324 } 325 326 /* (xoff >= t64 + lengthXAD(xad)); 327 * matching entry may be further out: 328 * stop heuristic search 329 */ 330 /* stop sequential access heuristics */ 331 goto binarySearch; 332 } 333 334 /* (index == p->header.nextindex); 335 * miss: key entry does not exist in 336 * the target leaf/tree 337 */ 338 *cmpp = 1; 339 goto out; 340 } 341 342 /* 343 * if hit, return index of the entry found, and 344 * if miss, where new entry with search key is 345 * to be inserted; 346 */ 347 out: 348 /* compute number of pages to split */ 349 if (flag & XT_INSERT) { 350 if (p->header.nextindex == /* little-endian */ 351 p->header.maxentry) 352 nsplit++; 353 else 354 nsplit = 0; 355 btstack->nsplit = nsplit; 356 } 357 358 /* save search result */ 359 btsp = btstack->top; 360 btsp->bn = bn; 361 btsp->index = index; 362 btsp->mp = mp; 363 364 /* update sequential access heuristics */ 365 jfs_ip->btindex = index; 366 367 if (nextp) 368 *nextp = next; 369 370 INCREMENT(xtStat.fastSearch); 371 return 0; 372 } 373 374 /* well, ... full search now */ 375 binarySearch: 376 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 377 378 /* 379 * binary search with search key K on the current page 380 */ 381 for (base = XTENTRYSTART; lim; lim >>= 1) { 382 index = base + (lim >> 1); 383 384 XT_CMP(cmp, xoff, &p->xad[index], t64); 385 if (cmp == 0) { 386 /* 387 * search hit 388 */ 389 /* search hit - leaf page: 390 * return the entry found 391 */ 392 if (p->header.flag & BT_LEAF) { 393 *cmpp = cmp; 394 395 /* compute number of pages to split */ 396 if (flag & XT_INSERT) { 397 if (p->header.nextindex == 398 p->header.maxentry) 399 nsplit++; 400 else 401 nsplit = 0; 402 btstack->nsplit = nsplit; 403 } 404 405 /* save search result */ 406 btsp = btstack->top; 407 btsp->bn = bn; 408 btsp->index = index; 409 btsp->mp = mp; 410 411 /* init sequential access heuristics */ 412 btindex = jfs_ip->btindex; 413 if (index == btindex || 414 index == btindex + 1) 415 jfs_ip->btorder = BT_SEQUENTIAL; 416 else 417 jfs_ip->btorder = BT_RANDOM; 418 jfs_ip->btindex = index; 419 420 return 0; 421 } 422 /* search hit - internal page: 423 * descend/search its child page 424 */ 425 if (index < le16_to_cpu(p->header.nextindex)-1) 426 next = offsetXAD(&p->xad[index + 1]); 427 goto next; 428 } 429 430 if (cmp > 0) { 431 base = index + 1; 432 --lim; 433 } 434 } 435 436 /* 437 * search miss 438 * 439 * base is the smallest index with key (Kj) greater than 440 * search key (K) and may be zero or maxentry index. 441 */ 442 if (base < le16_to_cpu(p->header.nextindex)) 443 next = offsetXAD(&p->xad[base]); 444 /* 445 * search miss - leaf page: 446 * 447 * return location of entry (base) where new entry with 448 * search key K is to be inserted. 449 */ 450 if (p->header.flag & BT_LEAF) { 451 *cmpp = cmp; 452 453 /* compute number of pages to split */ 454 if (flag & XT_INSERT) { 455 if (p->header.nextindex == 456 p->header.maxentry) 457 nsplit++; 458 else 459 nsplit = 0; 460 btstack->nsplit = nsplit; 461 } 462 463 /* save search result */ 464 btsp = btstack->top; 465 btsp->bn = bn; 466 btsp->index = base; 467 btsp->mp = mp; 468 469 /* init sequential access heuristics */ 470 btindex = jfs_ip->btindex; 471 if (base == btindex || base == btindex + 1) 472 jfs_ip->btorder = BT_SEQUENTIAL; 473 else 474 jfs_ip->btorder = BT_RANDOM; 475 jfs_ip->btindex = base; 476 477 if (nextp) 478 *nextp = next; 479 480 return 0; 481 } 482 483 /* 484 * search miss - non-leaf page: 485 * 486 * if base is non-zero, decrement base by one to get the parent 487 * entry of the child page to search. 488 */ 489 index = base ? base - 1 : base; 490 491 /* 492 * go down to child page 493 */ 494 next: 495 /* update number of pages to split */ 496 if (p->header.nextindex == p->header.maxentry) 497 nsplit++; 498 else 499 nsplit = 0; 500 501 /* push (bn, index) of the parent page/entry */ 502 if (BT_STACK_FULL(btstack)) { 503 jfs_error(ip->i_sb, "stack overrun!\n"); 504 XT_PUTPAGE(mp); 505 return -EIO; 506 } 507 BT_PUSH(btstack, bn, index); 508 509 /* get the child page block number */ 510 bn = addressXAD(&p->xad[index]); 511 512 /* unpin the parent page */ 513 XT_PUTPAGE(mp); 514 } 515 } 516 517 /* 518 * xtInsert() 519 * 520 * function: 521 * 522 * parameter: 523 * tid - transaction id; 524 * ip - file object; 525 * xflag - extent flag (XAD_NOTRECORDED): 526 * xoff - extent offset; 527 * xlen - extent length; 528 * xaddrp - extent address pointer (in/out): 529 * if (*xaddrp) 530 * caller allocated data extent at *xaddrp; 531 * else 532 * allocate data extent and return its xaddr; 533 * flag - 534 * 535 * return: 536 */ 537 int xtInsert(tid_t tid, /* transaction id */ 538 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 539 int flag) 540 { 541 int rc = 0; 542 s64 xaddr, hint; 543 struct metapage *mp; /* meta-page buffer */ 544 xtpage_t *p; /* base B+-tree index page */ 545 s64 bn; 546 int index, nextindex; 547 struct btstack btstack; /* traverse stack */ 548 struct xtsplit split; /* split information */ 549 xad_t *xad; 550 int cmp; 551 s64 next; 552 struct tlock *tlck; 553 struct xtlock *xtlck; 554 555 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 556 557 /* 558 * search for the entry location at which to insert: 559 * 560 * xtFastSearch() and xtSearch() both returns (leaf page 561 * pinned, index at which to insert). 562 * n.b. xtSearch() may return index of maxentry of 563 * the full page. 564 */ 565 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 566 return rc; 567 568 /* retrieve search result */ 569 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 570 571 /* This test must follow XT_GETSEARCH since mp must be valid if 572 * we branch to out: */ 573 if ((cmp == 0) || (next && (xlen > next - xoff))) { 574 rc = -EEXIST; 575 goto out; 576 } 577 578 /* 579 * allocate data extent requested 580 * 581 * allocation hint: last xad 582 */ 583 if ((xaddr = *xaddrp) == 0) { 584 if (index > XTENTRYSTART) { 585 xad = &p->xad[index - 1]; 586 hint = addressXAD(xad) + lengthXAD(xad) - 1; 587 } else 588 hint = 0; 589 if ((rc = dquot_alloc_block(ip, xlen))) 590 goto out; 591 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 592 dquot_free_block(ip, xlen); 593 goto out; 594 } 595 } 596 597 /* 598 * insert entry for new extent 599 */ 600 xflag |= XAD_NEW; 601 602 /* 603 * if the leaf page is full, split the page and 604 * propagate up the router entry for the new page from split 605 * 606 * The xtSplitUp() will insert the entry and unpin the leaf page. 607 */ 608 nextindex = le16_to_cpu(p->header.nextindex); 609 if (nextindex == le16_to_cpu(p->header.maxentry)) { 610 split.mp = mp; 611 split.index = index; 612 split.flag = xflag; 613 split.off = xoff; 614 split.len = xlen; 615 split.addr = xaddr; 616 split.pxdlist = NULL; 617 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 618 /* undo data extent allocation */ 619 if (*xaddrp == 0) { 620 dbFree(ip, xaddr, (s64) xlen); 621 dquot_free_block(ip, xlen); 622 } 623 return rc; 624 } 625 626 *xaddrp = xaddr; 627 return 0; 628 } 629 630 /* 631 * insert the new entry into the leaf page 632 */ 633 /* 634 * acquire a transaction lock on the leaf page; 635 * 636 * action: xad insertion/extension; 637 */ 638 BT_MARK_DIRTY(mp, ip); 639 640 /* if insert into middle, shift right remaining entries. */ 641 if (index < nextindex) 642 memmove(&p->xad[index + 1], &p->xad[index], 643 (nextindex - index) * sizeof(xad_t)); 644 645 /* insert the new entry: mark the entry NEW */ 646 xad = &p->xad[index]; 647 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 648 649 /* advance next available entry index */ 650 le16_add_cpu(&p->header.nextindex, 1); 651 652 /* Don't log it if there are no links to the file */ 653 if (!test_cflag(COMMIT_Nolink, ip)) { 654 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 655 xtlck = (struct xtlock *) & tlck->lock; 656 xtlck->lwm.offset = 657 (xtlck->lwm.offset) ? min(index, 658 (int)xtlck->lwm.offset) : index; 659 xtlck->lwm.length = 660 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 661 } 662 663 *xaddrp = xaddr; 664 665 out: 666 /* unpin the leaf page */ 667 XT_PUTPAGE(mp); 668 669 return rc; 670 } 671 672 673 /* 674 * xtSplitUp() 675 * 676 * function: 677 * split full pages as propagating insertion up the tree 678 * 679 * parameter: 680 * tid - transaction id; 681 * ip - file object; 682 * split - entry parameter descriptor; 683 * btstack - traverse stack from xtSearch() 684 * 685 * return: 686 */ 687 static int 688 xtSplitUp(tid_t tid, 689 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 690 { 691 int rc = 0; 692 struct metapage *smp; 693 xtpage_t *sp; /* split page */ 694 struct metapage *rmp; 695 s64 rbn; /* new right page block number */ 696 struct metapage *rcmp; 697 xtpage_t *rcp; /* right child page */ 698 s64 rcbn; /* right child page block number */ 699 int skip; /* index of entry of insertion */ 700 int nextindex; /* next available entry index of p */ 701 struct btframe *parent; /* parent page entry on traverse stack */ 702 xad_t *xad; 703 s64 xaddr; 704 int xlen; 705 int nsplit; /* number of pages split */ 706 struct pxdlist pxdlist; 707 pxd_t *pxd; 708 struct tlock *tlck; 709 struct xtlock *xtlck; 710 711 smp = split->mp; 712 sp = XT_PAGE(ip, smp); 713 714 /* is inode xtree root extension/inline EA area free ? */ 715 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 716 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && 717 (JFS_IP(ip)->mode2 & INLINEEA)) { 718 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 719 JFS_IP(ip)->mode2 &= ~INLINEEA; 720 721 BT_MARK_DIRTY(smp, ip); 722 /* 723 * acquire a transaction lock on the leaf page; 724 * 725 * action: xad insertion/extension; 726 */ 727 728 /* if insert into middle, shift right remaining entries. */ 729 skip = split->index; 730 nextindex = le16_to_cpu(sp->header.nextindex); 731 if (skip < nextindex) 732 memmove(&sp->xad[skip + 1], &sp->xad[skip], 733 (nextindex - skip) * sizeof(xad_t)); 734 735 /* insert the new entry: mark the entry NEW */ 736 xad = &sp->xad[skip]; 737 XT_PUTENTRY(xad, split->flag, split->off, split->len, 738 split->addr); 739 740 /* advance next available entry index */ 741 le16_add_cpu(&sp->header.nextindex, 1); 742 743 /* Don't log it if there are no links to the file */ 744 if (!test_cflag(COMMIT_Nolink, ip)) { 745 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 746 xtlck = (struct xtlock *) & tlck->lock; 747 xtlck->lwm.offset = (xtlck->lwm.offset) ? 748 min(skip, (int)xtlck->lwm.offset) : skip; 749 xtlck->lwm.length = 750 le16_to_cpu(sp->header.nextindex) - 751 xtlck->lwm.offset; 752 } 753 754 return 0; 755 } 756 757 /* 758 * allocate new index blocks to cover index page split(s) 759 * 760 * allocation hint: ? 761 */ 762 if (split->pxdlist == NULL) { 763 nsplit = btstack->nsplit; 764 split->pxdlist = &pxdlist; 765 pxdlist.maxnpxd = pxdlist.npxd = 0; 766 pxd = &pxdlist.pxd[0]; 767 xlen = JFS_SBI(ip->i_sb)->nbperpage; 768 for (; nsplit > 0; nsplit--, pxd++) { 769 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 770 == 0) { 771 PXDaddress(pxd, xaddr); 772 PXDlength(pxd, xlen); 773 774 pxdlist.maxnpxd++; 775 776 continue; 777 } 778 779 /* undo allocation */ 780 781 XT_PUTPAGE(smp); 782 return rc; 783 } 784 } 785 786 /* 787 * Split leaf page <sp> into <sp> and a new right page <rp>. 788 * 789 * The split routines insert the new entry into the leaf page, 790 * and acquire txLock as appropriate. 791 * return <rp> pinned and its block number <rpbn>. 792 */ 793 rc = (sp->header.flag & BT_ROOT) ? 794 xtSplitRoot(tid, ip, split, &rmp) : 795 xtSplitPage(tid, ip, split, &rmp, &rbn); 796 797 XT_PUTPAGE(smp); 798 799 if (rc) 800 return -EIO; 801 /* 802 * propagate up the router entry for the leaf page just split 803 * 804 * insert a router entry for the new page into the parent page, 805 * propagate the insert/split up the tree by walking back the stack 806 * of (bn of parent page, index of child page entry in parent page) 807 * that were traversed during the search for the page that split. 808 * 809 * the propagation of insert/split up the tree stops if the root 810 * splits or the page inserted into doesn't have to split to hold 811 * the new entry. 812 * 813 * the parent entry for the split page remains the same, and 814 * a new entry is inserted at its right with the first key and 815 * block number of the new right page. 816 * 817 * There are a maximum of 3 pages pinned at any time: 818 * right child, left parent and right parent (when the parent splits) 819 * to keep the child page pinned while working on the parent. 820 * make sure that all pins are released at exit. 821 */ 822 while ((parent = BT_POP(btstack)) != NULL) { 823 /* parent page specified by stack frame <parent> */ 824 825 /* keep current child pages <rcp> pinned */ 826 rcmp = rmp; 827 rcbn = rbn; 828 rcp = XT_PAGE(ip, rcmp); 829 830 /* 831 * insert router entry in parent for new right child page <rp> 832 */ 833 /* get/pin the parent page <sp> */ 834 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 835 if (rc) { 836 XT_PUTPAGE(rcmp); 837 return rc; 838 } 839 840 /* 841 * The new key entry goes ONE AFTER the index of parent entry, 842 * because the split was to the right. 843 */ 844 skip = parent->index + 1; 845 846 /* 847 * split or shift right remaining entries of the parent page 848 */ 849 nextindex = le16_to_cpu(sp->header.nextindex); 850 /* 851 * parent page is full - split the parent page 852 */ 853 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 854 /* init for parent page split */ 855 split->mp = smp; 856 split->index = skip; /* index at insert */ 857 split->flag = XAD_NEW; 858 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 859 split->len = JFS_SBI(ip->i_sb)->nbperpage; 860 split->addr = rcbn; 861 862 /* unpin previous right child page */ 863 XT_PUTPAGE(rcmp); 864 865 /* The split routines insert the new entry, 866 * and acquire txLock as appropriate. 867 * return <rp> pinned and its block number <rpbn>. 868 */ 869 rc = (sp->header.flag & BT_ROOT) ? 870 xtSplitRoot(tid, ip, split, &rmp) : 871 xtSplitPage(tid, ip, split, &rmp, &rbn); 872 if (rc) { 873 XT_PUTPAGE(smp); 874 return rc; 875 } 876 877 XT_PUTPAGE(smp); 878 /* keep new child page <rp> pinned */ 879 } 880 /* 881 * parent page is not full - insert in parent page 882 */ 883 else { 884 /* 885 * insert router entry in parent for the right child 886 * page from the first entry of the right child page: 887 */ 888 /* 889 * acquire a transaction lock on the parent page; 890 * 891 * action: router xad insertion; 892 */ 893 BT_MARK_DIRTY(smp, ip); 894 895 /* 896 * if insert into middle, shift right remaining entries 897 */ 898 if (skip < nextindex) 899 memmove(&sp->xad[skip + 1], &sp->xad[skip], 900 (nextindex - 901 skip) << L2XTSLOTSIZE); 902 903 /* insert the router entry */ 904 xad = &sp->xad[skip]; 905 XT_PUTENTRY(xad, XAD_NEW, 906 offsetXAD(&rcp->xad[XTENTRYSTART]), 907 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 908 909 /* advance next available entry index. */ 910 le16_add_cpu(&sp->header.nextindex, 1); 911 912 /* Don't log it if there are no links to the file */ 913 if (!test_cflag(COMMIT_Nolink, ip)) { 914 tlck = txLock(tid, ip, smp, 915 tlckXTREE | tlckGROW); 916 xtlck = (struct xtlock *) & tlck->lock; 917 xtlck->lwm.offset = (xtlck->lwm.offset) ? 918 min(skip, (int)xtlck->lwm.offset) : skip; 919 xtlck->lwm.length = 920 le16_to_cpu(sp->header.nextindex) - 921 xtlck->lwm.offset; 922 } 923 924 /* unpin parent page */ 925 XT_PUTPAGE(smp); 926 927 /* exit propagate up */ 928 break; 929 } 930 } 931 932 /* unpin current right page */ 933 XT_PUTPAGE(rmp); 934 935 return 0; 936 } 937 938 939 /* 940 * xtSplitPage() 941 * 942 * function: 943 * split a full non-root page into 944 * original/split/left page and new right page 945 * i.e., the original/split page remains as left page. 946 * 947 * parameter: 948 * int tid, 949 * struct inode *ip, 950 * struct xtsplit *split, 951 * struct metapage **rmpp, 952 * u64 *rbnp, 953 * 954 * return: 955 * Pointer to page in which to insert or NULL on error. 956 */ 957 static int 958 xtSplitPage(tid_t tid, struct inode *ip, 959 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 960 { 961 int rc = 0; 962 struct metapage *smp; 963 xtpage_t *sp; 964 struct metapage *rmp; 965 xtpage_t *rp; /* new right page allocated */ 966 s64 rbn; /* new right page block number */ 967 struct metapage *mp; 968 xtpage_t *p; 969 s64 nextbn; 970 int skip, maxentry, middle, righthalf, n; 971 xad_t *xad; 972 struct pxdlist *pxdlist; 973 pxd_t *pxd; 974 struct tlock *tlck; 975 struct xtlock *sxtlck = NULL, *rxtlck = NULL; 976 int quota_allocation = 0; 977 978 smp = split->mp; 979 sp = XT_PAGE(ip, smp); 980 981 INCREMENT(xtStat.split); 982 983 pxdlist = split->pxdlist; 984 pxd = &pxdlist->pxd[pxdlist->npxd]; 985 pxdlist->npxd++; 986 rbn = addressPXD(pxd); 987 988 /* Allocate blocks to quota. */ 989 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 990 if (rc) 991 goto clean_up; 992 993 quota_allocation += lengthPXD(pxd); 994 995 /* 996 * allocate the new right page for the split 997 */ 998 rmp = get_metapage(ip, rbn, PSIZE, 1); 999 if (rmp == NULL) { 1000 rc = -EIO; 1001 goto clean_up; 1002 } 1003 1004 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1005 1006 BT_MARK_DIRTY(rmp, ip); 1007 /* 1008 * action: new page; 1009 */ 1010 1011 rp = (xtpage_t *) rmp->data; 1012 rp->header.self = *pxd; 1013 rp->header.flag = sp->header.flag & BT_TYPE; 1014 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1015 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1016 1017 BT_MARK_DIRTY(smp, ip); 1018 /* Don't log it if there are no links to the file */ 1019 if (!test_cflag(COMMIT_Nolink, ip)) { 1020 /* 1021 * acquire a transaction lock on the new right page; 1022 */ 1023 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1024 rxtlck = (struct xtlock *) & tlck->lock; 1025 rxtlck->lwm.offset = XTENTRYSTART; 1026 /* 1027 * acquire a transaction lock on the split page 1028 */ 1029 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1030 sxtlck = (struct xtlock *) & tlck->lock; 1031 } 1032 1033 /* 1034 * initialize/update sibling pointers of <sp> and <rp> 1035 */ 1036 nextbn = le64_to_cpu(sp->header.next); 1037 rp->header.next = cpu_to_le64(nextbn); 1038 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1039 sp->header.next = cpu_to_le64(rbn); 1040 1041 skip = split->index; 1042 1043 /* 1044 * sequential append at tail (after last entry of last page) 1045 * 1046 * if splitting the last page on a level because of appending 1047 * a entry to it (skip is maxentry), it's likely that the access is 1048 * sequential. adding an empty page on the side of the level is less 1049 * work and can push the fill factor much higher than normal. 1050 * if we're wrong it's no big deal - we will do the split the right 1051 * way next time. 1052 * (it may look like it's equally easy to do a similar hack for 1053 * reverse sorted data, that is, split the tree left, but it's not. 1054 * Be my guest.) 1055 */ 1056 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1057 /* 1058 * acquire a transaction lock on the new/right page; 1059 * 1060 * action: xad insertion; 1061 */ 1062 /* insert entry at the first entry of the new right page */ 1063 xad = &rp->xad[XTENTRYSTART]; 1064 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1065 split->addr); 1066 1067 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1068 1069 if (!test_cflag(COMMIT_Nolink, ip)) { 1070 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1071 rxtlck->lwm.length = 1; 1072 } 1073 1074 *rmpp = rmp; 1075 *rbnp = rbn; 1076 1077 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1078 return 0; 1079 } 1080 1081 /* 1082 * non-sequential insert (at possibly middle page) 1083 */ 1084 1085 /* 1086 * update previous pointer of old next/right page of <sp> 1087 */ 1088 if (nextbn != 0) { 1089 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1090 if (rc) { 1091 XT_PUTPAGE(rmp); 1092 goto clean_up; 1093 } 1094 1095 BT_MARK_DIRTY(mp, ip); 1096 /* 1097 * acquire a transaction lock on the next page; 1098 * 1099 * action:sibling pointer update; 1100 */ 1101 if (!test_cflag(COMMIT_Nolink, ip)) 1102 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1103 1104 p->header.prev = cpu_to_le64(rbn); 1105 1106 /* sibling page may have been updated previously, or 1107 * it may be updated later; 1108 */ 1109 1110 XT_PUTPAGE(mp); 1111 } 1112 1113 /* 1114 * split the data between the split and new/right pages 1115 */ 1116 maxentry = le16_to_cpu(sp->header.maxentry); 1117 middle = maxentry >> 1; 1118 righthalf = maxentry - middle; 1119 1120 /* 1121 * skip index in old split/left page - insert into left page: 1122 */ 1123 if (skip <= middle) { 1124 /* move right half of split page to the new right page */ 1125 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1126 righthalf << L2XTSLOTSIZE); 1127 1128 /* shift right tail of left half to make room for new entry */ 1129 if (skip < middle) 1130 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1131 (middle - skip) << L2XTSLOTSIZE); 1132 1133 /* insert new entry */ 1134 xad = &sp->xad[skip]; 1135 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1136 split->addr); 1137 1138 /* update page header */ 1139 sp->header.nextindex = cpu_to_le16(middle + 1); 1140 if (!test_cflag(COMMIT_Nolink, ip)) { 1141 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1142 min(skip, (int)sxtlck->lwm.offset) : skip; 1143 } 1144 1145 rp->header.nextindex = 1146 cpu_to_le16(XTENTRYSTART + righthalf); 1147 } 1148 /* 1149 * skip index in new right page - insert into right page: 1150 */ 1151 else { 1152 /* move left head of right half to right page */ 1153 n = skip - middle; 1154 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1155 n << L2XTSLOTSIZE); 1156 1157 /* insert new entry */ 1158 n += XTENTRYSTART; 1159 xad = &rp->xad[n]; 1160 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1161 split->addr); 1162 1163 /* move right tail of right half to right page */ 1164 if (skip < maxentry) 1165 memmove(&rp->xad[n + 1], &sp->xad[skip], 1166 (maxentry - skip) << L2XTSLOTSIZE); 1167 1168 /* update page header */ 1169 sp->header.nextindex = cpu_to_le16(middle); 1170 if (!test_cflag(COMMIT_Nolink, ip)) { 1171 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1172 min(middle, (int)sxtlck->lwm.offset) : middle; 1173 } 1174 1175 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1176 righthalf + 1); 1177 } 1178 1179 if (!test_cflag(COMMIT_Nolink, ip)) { 1180 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1181 sxtlck->lwm.offset; 1182 1183 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1184 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1185 XTENTRYSTART; 1186 } 1187 1188 *rmpp = rmp; 1189 *rbnp = rbn; 1190 1191 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1192 return rc; 1193 1194 clean_up: 1195 1196 /* Rollback quota allocation. */ 1197 if (quota_allocation) 1198 dquot_free_block(ip, quota_allocation); 1199 1200 return (rc); 1201 } 1202 1203 1204 /* 1205 * xtSplitRoot() 1206 * 1207 * function: 1208 * split the full root page into original/root/split page and new 1209 * right page 1210 * i.e., root remains fixed in tree anchor (inode) and the root is 1211 * copied to a single new right child page since root page << 1212 * non-root page, and the split root page contains a single entry 1213 * for the new right child page. 1214 * 1215 * parameter: 1216 * int tid, 1217 * struct inode *ip, 1218 * struct xtsplit *split, 1219 * struct metapage **rmpp) 1220 * 1221 * return: 1222 * Pointer to page in which to insert or NULL on error. 1223 */ 1224 static int 1225 xtSplitRoot(tid_t tid, 1226 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1227 { 1228 xtpage_t *sp; 1229 struct metapage *rmp; 1230 xtpage_t *rp; 1231 s64 rbn; 1232 int skip, nextindex; 1233 xad_t *xad; 1234 pxd_t *pxd; 1235 struct pxdlist *pxdlist; 1236 struct tlock *tlck; 1237 struct xtlock *xtlck; 1238 int rc; 1239 1240 sp = &JFS_IP(ip)->i_xtroot; 1241 1242 INCREMENT(xtStat.split); 1243 1244 /* 1245 * allocate a single (right) child page 1246 */ 1247 pxdlist = split->pxdlist; 1248 pxd = &pxdlist->pxd[pxdlist->npxd]; 1249 pxdlist->npxd++; 1250 rbn = addressPXD(pxd); 1251 rmp = get_metapage(ip, rbn, PSIZE, 1); 1252 if (rmp == NULL) 1253 return -EIO; 1254 1255 /* Allocate blocks to quota. */ 1256 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 1257 if (rc) { 1258 release_metapage(rmp); 1259 return rc; 1260 } 1261 1262 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1263 1264 /* 1265 * acquire a transaction lock on the new right page; 1266 * 1267 * action: new page; 1268 */ 1269 BT_MARK_DIRTY(rmp, ip); 1270 1271 rp = (xtpage_t *) rmp->data; 1272 rp->header.flag = 1273 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1274 rp->header.self = *pxd; 1275 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1276 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1277 1278 /* initialize sibling pointers */ 1279 rp->header.next = 0; 1280 rp->header.prev = 0; 1281 1282 /* 1283 * copy the in-line root page into new right page extent 1284 */ 1285 nextindex = le16_to_cpu(sp->header.maxentry); 1286 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1287 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1288 1289 /* 1290 * insert the new entry into the new right/child page 1291 * (skip index in the new right page will not change) 1292 */ 1293 skip = split->index; 1294 /* if insert into middle, shift right remaining entries */ 1295 if (skip != nextindex) 1296 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1297 (nextindex - skip) * sizeof(xad_t)); 1298 1299 xad = &rp->xad[skip]; 1300 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1301 1302 /* update page header */ 1303 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1304 1305 if (!test_cflag(COMMIT_Nolink, ip)) { 1306 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1307 xtlck = (struct xtlock *) & tlck->lock; 1308 xtlck->lwm.offset = XTENTRYSTART; 1309 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1310 XTENTRYSTART; 1311 } 1312 1313 /* 1314 * reset the root 1315 * 1316 * init root with the single entry for the new right page 1317 * set the 1st entry offset to 0, which force the left-most key 1318 * at any level of the tree to be less than any search key. 1319 */ 1320 /* 1321 * acquire a transaction lock on the root page (in-memory inode); 1322 * 1323 * action: root split; 1324 */ 1325 BT_MARK_DIRTY(split->mp, ip); 1326 1327 xad = &sp->xad[XTENTRYSTART]; 1328 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1329 1330 /* update page header of root */ 1331 sp->header.flag &= ~BT_LEAF; 1332 sp->header.flag |= BT_INTERNAL; 1333 1334 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1335 1336 if (!test_cflag(COMMIT_Nolink, ip)) { 1337 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1338 xtlck = (struct xtlock *) & tlck->lock; 1339 xtlck->lwm.offset = XTENTRYSTART; 1340 xtlck->lwm.length = 1; 1341 } 1342 1343 *rmpp = rmp; 1344 1345 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); 1346 return 0; 1347 } 1348 1349 1350 /* 1351 * xtExtend() 1352 * 1353 * function: extend in-place; 1354 * 1355 * note: existing extent may or may not have been committed. 1356 * caller is responsible for pager buffer cache update, and 1357 * working block allocation map update; 1358 * update pmap: alloc whole extended extent; 1359 */ 1360 int xtExtend(tid_t tid, /* transaction id */ 1361 struct inode *ip, s64 xoff, /* delta extent offset */ 1362 s32 xlen, /* delta extent length */ 1363 int flag) 1364 { 1365 int rc = 0; 1366 int cmp; 1367 struct metapage *mp; /* meta-page buffer */ 1368 xtpage_t *p; /* base B+-tree index page */ 1369 s64 bn; 1370 int index, nextindex, len; 1371 struct btstack btstack; /* traverse stack */ 1372 struct xtsplit split; /* split information */ 1373 xad_t *xad; 1374 s64 xaddr; 1375 struct tlock *tlck; 1376 struct xtlock *xtlck = NULL; 1377 1378 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1379 1380 /* there must exist extent to be extended */ 1381 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) 1382 return rc; 1383 1384 /* retrieve search result */ 1385 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1386 1387 if (cmp != 0) { 1388 XT_PUTPAGE(mp); 1389 jfs_error(ip->i_sb, "xtSearch did not find extent\n"); 1390 return -EIO; 1391 } 1392 1393 /* extension must be contiguous */ 1394 xad = &p->xad[index]; 1395 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { 1396 XT_PUTPAGE(mp); 1397 jfs_error(ip->i_sb, "extension is not contiguous\n"); 1398 return -EIO; 1399 } 1400 1401 /* 1402 * acquire a transaction lock on the leaf page; 1403 * 1404 * action: xad insertion/extension; 1405 */ 1406 BT_MARK_DIRTY(mp, ip); 1407 if (!test_cflag(COMMIT_Nolink, ip)) { 1408 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1409 xtlck = (struct xtlock *) & tlck->lock; 1410 } 1411 1412 /* extend will overflow extent ? */ 1413 xlen = lengthXAD(xad) + xlen; 1414 if ((len = xlen - MAXXLEN) <= 0) 1415 goto extendOld; 1416 1417 /* 1418 * extent overflow: insert entry for new extent 1419 */ 1420 //insertNew: 1421 xoff = offsetXAD(xad) + MAXXLEN; 1422 xaddr = addressXAD(xad) + MAXXLEN; 1423 nextindex = le16_to_cpu(p->header.nextindex); 1424 1425 /* 1426 * if the leaf page is full, insert the new entry and 1427 * propagate up the router entry for the new page from split 1428 * 1429 * The xtSplitUp() will insert the entry and unpin the leaf page. 1430 */ 1431 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1432 /* xtSpliUp() unpins leaf pages */ 1433 split.mp = mp; 1434 split.index = index + 1; 1435 split.flag = XAD_NEW; 1436 split.off = xoff; /* split offset */ 1437 split.len = len; 1438 split.addr = xaddr; 1439 split.pxdlist = NULL; 1440 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1441 return rc; 1442 1443 /* get back old page */ 1444 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1445 if (rc) 1446 return rc; 1447 /* 1448 * if leaf root has been split, original root has been 1449 * copied to new child page, i.e., original entry now 1450 * resides on the new child page; 1451 */ 1452 if (p->header.flag & BT_INTERNAL) { 1453 ASSERT(p->header.nextindex == 1454 cpu_to_le16(XTENTRYSTART + 1)); 1455 xad = &p->xad[XTENTRYSTART]; 1456 bn = addressXAD(xad); 1457 XT_PUTPAGE(mp); 1458 1459 /* get new child page */ 1460 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1461 if (rc) 1462 return rc; 1463 1464 BT_MARK_DIRTY(mp, ip); 1465 if (!test_cflag(COMMIT_Nolink, ip)) { 1466 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1467 xtlck = (struct xtlock *) & tlck->lock; 1468 } 1469 } 1470 } 1471 /* 1472 * insert the new entry into the leaf page 1473 */ 1474 else { 1475 /* insert the new entry: mark the entry NEW */ 1476 xad = &p->xad[index + 1]; 1477 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1478 1479 /* advance next available entry index */ 1480 le16_add_cpu(&p->header.nextindex, 1); 1481 } 1482 1483 /* get back old entry */ 1484 xad = &p->xad[index]; 1485 xlen = MAXXLEN; 1486 1487 /* 1488 * extend old extent 1489 */ 1490 extendOld: 1491 XADlength(xad, xlen); 1492 if (!(xad->flag & XAD_NEW)) 1493 xad->flag |= XAD_EXTENDED; 1494 1495 if (!test_cflag(COMMIT_Nolink, ip)) { 1496 xtlck->lwm.offset = 1497 (xtlck->lwm.offset) ? min(index, 1498 (int)xtlck->lwm.offset) : index; 1499 xtlck->lwm.length = 1500 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1501 } 1502 1503 /* unpin the leaf page */ 1504 XT_PUTPAGE(mp); 1505 1506 return rc; 1507 } 1508 1509 #ifdef _NOTYET 1510 /* 1511 * xtTailgate() 1512 * 1513 * function: split existing 'tail' extent 1514 * (split offset >= start offset of tail extent), and 1515 * relocate and extend the split tail half; 1516 * 1517 * note: existing extent may or may not have been committed. 1518 * caller is responsible for pager buffer cache update, and 1519 * working block allocation map update; 1520 * update pmap: free old split tail extent, alloc new extent; 1521 */ 1522 int xtTailgate(tid_t tid, /* transaction id */ 1523 struct inode *ip, s64 xoff, /* split/new extent offset */ 1524 s32 xlen, /* new extent length */ 1525 s64 xaddr, /* new extent address */ 1526 int flag) 1527 { 1528 int rc = 0; 1529 int cmp; 1530 struct metapage *mp; /* meta-page buffer */ 1531 xtpage_t *p; /* base B+-tree index page */ 1532 s64 bn; 1533 int index, nextindex, llen, rlen; 1534 struct btstack btstack; /* traverse stack */ 1535 struct xtsplit split; /* split information */ 1536 xad_t *xad; 1537 struct tlock *tlck; 1538 struct xtlock *xtlck = 0; 1539 struct tlock *mtlck; 1540 struct maplock *pxdlock; 1541 1542 /* 1543 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1544 (ulong)xoff, xlen, (ulong)xaddr); 1545 */ 1546 1547 /* there must exist extent to be tailgated */ 1548 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) 1549 return rc; 1550 1551 /* retrieve search result */ 1552 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1553 1554 if (cmp != 0) { 1555 XT_PUTPAGE(mp); 1556 jfs_error(ip->i_sb, "couldn't find extent\n"); 1557 return -EIO; 1558 } 1559 1560 /* entry found must be last entry */ 1561 nextindex = le16_to_cpu(p->header.nextindex); 1562 if (index != nextindex - 1) { 1563 XT_PUTPAGE(mp); 1564 jfs_error(ip->i_sb, "the entry found is not the last entry\n"); 1565 return -EIO; 1566 } 1567 1568 BT_MARK_DIRTY(mp, ip); 1569 /* 1570 * acquire tlock of the leaf page containing original entry 1571 */ 1572 if (!test_cflag(COMMIT_Nolink, ip)) { 1573 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1574 xtlck = (struct xtlock *) & tlck->lock; 1575 } 1576 1577 /* completely replace extent ? */ 1578 xad = &p->xad[index]; 1579 /* 1580 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1581 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1582 */ 1583 if ((llen = xoff - offsetXAD(xad)) == 0) 1584 goto updateOld; 1585 1586 /* 1587 * partially replace extent: insert entry for new extent 1588 */ 1589 //insertNew: 1590 /* 1591 * if the leaf page is full, insert the new entry and 1592 * propagate up the router entry for the new page from split 1593 * 1594 * The xtSplitUp() will insert the entry and unpin the leaf page. 1595 */ 1596 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1597 /* xtSpliUp() unpins leaf pages */ 1598 split.mp = mp; 1599 split.index = index + 1; 1600 split.flag = XAD_NEW; 1601 split.off = xoff; /* split offset */ 1602 split.len = xlen; 1603 split.addr = xaddr; 1604 split.pxdlist = NULL; 1605 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1606 return rc; 1607 1608 /* get back old page */ 1609 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1610 if (rc) 1611 return rc; 1612 /* 1613 * if leaf root has been split, original root has been 1614 * copied to new child page, i.e., original entry now 1615 * resides on the new child page; 1616 */ 1617 if (p->header.flag & BT_INTERNAL) { 1618 ASSERT(p->header.nextindex == 1619 cpu_to_le16(XTENTRYSTART + 1)); 1620 xad = &p->xad[XTENTRYSTART]; 1621 bn = addressXAD(xad); 1622 XT_PUTPAGE(mp); 1623 1624 /* get new child page */ 1625 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1626 if (rc) 1627 return rc; 1628 1629 BT_MARK_DIRTY(mp, ip); 1630 if (!test_cflag(COMMIT_Nolink, ip)) { 1631 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1632 xtlck = (struct xtlock *) & tlck->lock; 1633 } 1634 } 1635 } 1636 /* 1637 * insert the new entry into the leaf page 1638 */ 1639 else { 1640 /* insert the new entry: mark the entry NEW */ 1641 xad = &p->xad[index + 1]; 1642 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1643 1644 /* advance next available entry index */ 1645 le16_add_cpu(&p->header.nextindex, 1); 1646 } 1647 1648 /* get back old XAD */ 1649 xad = &p->xad[index]; 1650 1651 /* 1652 * truncate/relocate old extent at split offset 1653 */ 1654 updateOld: 1655 /* update dmap for old/committed/truncated extent */ 1656 rlen = lengthXAD(xad) - llen; 1657 if (!(xad->flag & XAD_NEW)) { 1658 /* free from PWMAP at commit */ 1659 if (!test_cflag(COMMIT_Nolink, ip)) { 1660 mtlck = txMaplock(tid, ip, tlckMAP); 1661 pxdlock = (struct maplock *) & mtlck->lock; 1662 pxdlock->flag = mlckFREEPXD; 1663 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1664 PXDlength(&pxdlock->pxd, rlen); 1665 pxdlock->index = 1; 1666 } 1667 } else 1668 /* free from WMAP */ 1669 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1670 1671 if (llen) 1672 /* truncate */ 1673 XADlength(xad, llen); 1674 else 1675 /* replace */ 1676 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1677 1678 if (!test_cflag(COMMIT_Nolink, ip)) { 1679 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1680 min(index, (int)xtlck->lwm.offset) : index; 1681 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1682 xtlck->lwm.offset; 1683 } 1684 1685 /* unpin the leaf page */ 1686 XT_PUTPAGE(mp); 1687 1688 return rc; 1689 } 1690 #endif /* _NOTYET */ 1691 1692 /* 1693 * xtUpdate() 1694 * 1695 * function: update XAD; 1696 * 1697 * update extent for allocated_but_not_recorded or 1698 * compressed extent; 1699 * 1700 * parameter: 1701 * nxad - new XAD; 1702 * logical extent of the specified XAD must be completely 1703 * contained by an existing XAD; 1704 */ 1705 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1706 { /* new XAD */ 1707 int rc = 0; 1708 int cmp; 1709 struct metapage *mp; /* meta-page buffer */ 1710 xtpage_t *p; /* base B+-tree index page */ 1711 s64 bn; 1712 int index0, index, newindex, nextindex; 1713 struct btstack btstack; /* traverse stack */ 1714 struct xtsplit split; /* split information */ 1715 xad_t *xad, *lxad, *rxad; 1716 int xflag; 1717 s64 nxoff, xoff; 1718 int nxlen, xlen, lxlen, rxlen; 1719 s64 nxaddr, xaddr; 1720 struct tlock *tlck; 1721 struct xtlock *xtlck = NULL; 1722 int newpage = 0; 1723 1724 /* there must exist extent to be tailgated */ 1725 nxoff = offsetXAD(nxad); 1726 nxlen = lengthXAD(nxad); 1727 nxaddr = addressXAD(nxad); 1728 1729 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 1730 return rc; 1731 1732 /* retrieve search result */ 1733 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 1734 1735 if (cmp != 0) { 1736 XT_PUTPAGE(mp); 1737 jfs_error(ip->i_sb, "Could not find extent\n"); 1738 return -EIO; 1739 } 1740 1741 BT_MARK_DIRTY(mp, ip); 1742 /* 1743 * acquire tlock of the leaf page containing original entry 1744 */ 1745 if (!test_cflag(COMMIT_Nolink, ip)) { 1746 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1747 xtlck = (struct xtlock *) & tlck->lock; 1748 } 1749 1750 xad = &p->xad[index0]; 1751 xflag = xad->flag; 1752 xoff = offsetXAD(xad); 1753 xlen = lengthXAD(xad); 1754 xaddr = addressXAD(xad); 1755 1756 /* nXAD must be completely contained within XAD */ 1757 if ((xoff > nxoff) || 1758 (nxoff + nxlen > xoff + xlen)) { 1759 XT_PUTPAGE(mp); 1760 jfs_error(ip->i_sb, 1761 "nXAD in not completely contained within XAD\n"); 1762 return -EIO; 1763 } 1764 1765 index = index0; 1766 newindex = index + 1; 1767 nextindex = le16_to_cpu(p->header.nextindex); 1768 1769 #ifdef _JFS_WIP_NOCOALESCE 1770 if (xoff < nxoff) 1771 goto updateRight; 1772 1773 /* 1774 * replace XAD with nXAD 1775 */ 1776 replace: /* (nxoff == xoff) */ 1777 if (nxlen == xlen) { 1778 /* replace XAD with nXAD:recorded */ 1779 *xad = *nxad; 1780 xad->flag = xflag & ~XAD_NOTRECORDED; 1781 1782 goto out; 1783 } else /* (nxlen < xlen) */ 1784 goto updateLeft; 1785 #endif /* _JFS_WIP_NOCOALESCE */ 1786 1787 /* #ifdef _JFS_WIP_COALESCE */ 1788 if (xoff < nxoff) 1789 goto coalesceRight; 1790 1791 /* 1792 * coalesce with left XAD 1793 */ 1794 //coalesceLeft: /* (xoff == nxoff) */ 1795 /* is XAD first entry of page ? */ 1796 if (index == XTENTRYSTART) 1797 goto replace; 1798 1799 /* is nXAD logically and physically contiguous with lXAD ? */ 1800 lxad = &p->xad[index - 1]; 1801 lxlen = lengthXAD(lxad); 1802 if (!(lxad->flag & XAD_NOTRECORDED) && 1803 (nxoff == offsetXAD(lxad) + lxlen) && 1804 (nxaddr == addressXAD(lxad) + lxlen) && 1805 (lxlen + nxlen < MAXXLEN)) { 1806 /* extend right lXAD */ 1807 index0 = index - 1; 1808 XADlength(lxad, lxlen + nxlen); 1809 1810 /* If we just merged two extents together, need to make sure the 1811 * right extent gets logged. If the left one is marked XAD_NEW, 1812 * then we know it will be logged. Otherwise, mark as 1813 * XAD_EXTENDED 1814 */ 1815 if (!(lxad->flag & XAD_NEW)) 1816 lxad->flag |= XAD_EXTENDED; 1817 1818 if (xlen > nxlen) { 1819 /* truncate XAD */ 1820 XADoffset(xad, xoff + nxlen); 1821 XADlength(xad, xlen - nxlen); 1822 XADaddress(xad, xaddr + nxlen); 1823 goto out; 1824 } else { /* (xlen == nxlen) */ 1825 1826 /* remove XAD */ 1827 if (index < nextindex - 1) 1828 memmove(&p->xad[index], &p->xad[index + 1], 1829 (nextindex - index - 1830 1) << L2XTSLOTSIZE); 1831 1832 p->header.nextindex = 1833 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1834 1); 1835 1836 index = index0; 1837 newindex = index + 1; 1838 nextindex = le16_to_cpu(p->header.nextindex); 1839 xoff = nxoff = offsetXAD(lxad); 1840 xlen = nxlen = lxlen + nxlen; 1841 xaddr = nxaddr = addressXAD(lxad); 1842 goto coalesceRight; 1843 } 1844 } 1845 1846 /* 1847 * replace XAD with nXAD 1848 */ 1849 replace: /* (nxoff == xoff) */ 1850 if (nxlen == xlen) { 1851 /* replace XAD with nXAD:recorded */ 1852 *xad = *nxad; 1853 xad->flag = xflag & ~XAD_NOTRECORDED; 1854 1855 goto coalesceRight; 1856 } else /* (nxlen < xlen) */ 1857 goto updateLeft; 1858 1859 /* 1860 * coalesce with right XAD 1861 */ 1862 coalesceRight: /* (xoff <= nxoff) */ 1863 /* is XAD last entry of page ? */ 1864 if (newindex == nextindex) { 1865 if (xoff == nxoff) 1866 goto out; 1867 goto updateRight; 1868 } 1869 1870 /* is nXAD logically and physically contiguous with rXAD ? */ 1871 rxad = &p->xad[index + 1]; 1872 rxlen = lengthXAD(rxad); 1873 if (!(rxad->flag & XAD_NOTRECORDED) && 1874 (nxoff + nxlen == offsetXAD(rxad)) && 1875 (nxaddr + nxlen == addressXAD(rxad)) && 1876 (rxlen + nxlen < MAXXLEN)) { 1877 /* extend left rXAD */ 1878 XADoffset(rxad, nxoff); 1879 XADlength(rxad, rxlen + nxlen); 1880 XADaddress(rxad, nxaddr); 1881 1882 /* If we just merged two extents together, need to make sure 1883 * the left extent gets logged. If the right one is marked 1884 * XAD_NEW, then we know it will be logged. Otherwise, mark as 1885 * XAD_EXTENDED 1886 */ 1887 if (!(rxad->flag & XAD_NEW)) 1888 rxad->flag |= XAD_EXTENDED; 1889 1890 if (xlen > nxlen) 1891 /* truncate XAD */ 1892 XADlength(xad, xlen - nxlen); 1893 else { /* (xlen == nxlen) */ 1894 1895 /* remove XAD */ 1896 memmove(&p->xad[index], &p->xad[index + 1], 1897 (nextindex - index - 1) << L2XTSLOTSIZE); 1898 1899 p->header.nextindex = 1900 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1901 1); 1902 } 1903 1904 goto out; 1905 } else if (xoff == nxoff) 1906 goto out; 1907 1908 if (xoff >= nxoff) { 1909 XT_PUTPAGE(mp); 1910 jfs_error(ip->i_sb, "xoff >= nxoff\n"); 1911 return -EIO; 1912 } 1913 /* #endif _JFS_WIP_COALESCE */ 1914 1915 /* 1916 * split XAD into (lXAD, nXAD): 1917 * 1918 * |---nXAD---> 1919 * --|----------XAD----------|-- 1920 * |-lXAD-| 1921 */ 1922 updateRight: /* (xoff < nxoff) */ 1923 /* truncate old XAD as lXAD:not_recorded */ 1924 xad = &p->xad[index]; 1925 XADlength(xad, nxoff - xoff); 1926 1927 /* insert nXAD:recorded */ 1928 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1929 1930 /* xtSpliUp() unpins leaf pages */ 1931 split.mp = mp; 1932 split.index = newindex; 1933 split.flag = xflag & ~XAD_NOTRECORDED; 1934 split.off = nxoff; 1935 split.len = nxlen; 1936 split.addr = nxaddr; 1937 split.pxdlist = NULL; 1938 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1939 return rc; 1940 1941 /* get back old page */ 1942 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1943 if (rc) 1944 return rc; 1945 /* 1946 * if leaf root has been split, original root has been 1947 * copied to new child page, i.e., original entry now 1948 * resides on the new child page; 1949 */ 1950 if (p->header.flag & BT_INTERNAL) { 1951 ASSERT(p->header.nextindex == 1952 cpu_to_le16(XTENTRYSTART + 1)); 1953 xad = &p->xad[XTENTRYSTART]; 1954 bn = addressXAD(xad); 1955 XT_PUTPAGE(mp); 1956 1957 /* get new child page */ 1958 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1959 if (rc) 1960 return rc; 1961 1962 BT_MARK_DIRTY(mp, ip); 1963 if (!test_cflag(COMMIT_Nolink, ip)) { 1964 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1965 xtlck = (struct xtlock *) & tlck->lock; 1966 } 1967 } else { 1968 /* is nXAD on new page ? */ 1969 if (newindex > 1970 (le16_to_cpu(p->header.maxentry) >> 1)) { 1971 newindex = 1972 newindex - 1973 le16_to_cpu(p->header.nextindex) + 1974 XTENTRYSTART; 1975 newpage = 1; 1976 } 1977 } 1978 } else { 1979 /* if insert into middle, shift right remaining entries */ 1980 if (newindex < nextindex) 1981 memmove(&p->xad[newindex + 1], &p->xad[newindex], 1982 (nextindex - newindex) << L2XTSLOTSIZE); 1983 1984 /* insert the entry */ 1985 xad = &p->xad[newindex]; 1986 *xad = *nxad; 1987 xad->flag = xflag & ~XAD_NOTRECORDED; 1988 1989 /* advance next available entry index. */ 1990 p->header.nextindex = 1991 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1992 } 1993 1994 /* 1995 * does nXAD force 3-way split ? 1996 * 1997 * |---nXAD--->| 1998 * --|----------XAD-------------|-- 1999 * |-lXAD-| |-rXAD -| 2000 */ 2001 if (nxoff + nxlen == xoff + xlen) 2002 goto out; 2003 2004 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 2005 if (newpage) { 2006 /* close out old page */ 2007 if (!test_cflag(COMMIT_Nolink, ip)) { 2008 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2009 min(index0, (int)xtlck->lwm.offset) : index0; 2010 xtlck->lwm.length = 2011 le16_to_cpu(p->header.nextindex) - 2012 xtlck->lwm.offset; 2013 } 2014 2015 bn = le64_to_cpu(p->header.next); 2016 XT_PUTPAGE(mp); 2017 2018 /* get new right page */ 2019 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2020 if (rc) 2021 return rc; 2022 2023 BT_MARK_DIRTY(mp, ip); 2024 if (!test_cflag(COMMIT_Nolink, ip)) { 2025 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2026 xtlck = (struct xtlock *) & tlck->lock; 2027 } 2028 2029 index0 = index = newindex; 2030 } else 2031 index++; 2032 2033 newindex = index + 1; 2034 nextindex = le16_to_cpu(p->header.nextindex); 2035 xlen = xlen - (nxoff - xoff); 2036 xoff = nxoff; 2037 xaddr = nxaddr; 2038 2039 /* recompute split pages */ 2040 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2041 XT_PUTPAGE(mp); 2042 2043 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 2044 return rc; 2045 2046 /* retrieve search result */ 2047 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2048 2049 if (cmp != 0) { 2050 XT_PUTPAGE(mp); 2051 jfs_error(ip->i_sb, "xtSearch failed\n"); 2052 return -EIO; 2053 } 2054 2055 if (index0 != index) { 2056 XT_PUTPAGE(mp); 2057 jfs_error(ip->i_sb, "unexpected value of index\n"); 2058 return -EIO; 2059 } 2060 } 2061 2062 /* 2063 * split XAD into (nXAD, rXAD) 2064 * 2065 * ---nXAD---| 2066 * --|----------XAD----------|-- 2067 * |-rXAD-| 2068 */ 2069 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2070 /* update old XAD with nXAD:recorded */ 2071 xad = &p->xad[index]; 2072 *xad = *nxad; 2073 xad->flag = xflag & ~XAD_NOTRECORDED; 2074 2075 /* insert rXAD:not_recorded */ 2076 xoff = xoff + nxlen; 2077 xlen = xlen - nxlen; 2078 xaddr = xaddr + nxlen; 2079 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2080 /* 2081 printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2082 */ 2083 /* xtSpliUp() unpins leaf pages */ 2084 split.mp = mp; 2085 split.index = newindex; 2086 split.flag = xflag; 2087 split.off = xoff; 2088 split.len = xlen; 2089 split.addr = xaddr; 2090 split.pxdlist = NULL; 2091 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2092 return rc; 2093 2094 /* get back old page */ 2095 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2096 if (rc) 2097 return rc; 2098 2099 /* 2100 * if leaf root has been split, original root has been 2101 * copied to new child page, i.e., original entry now 2102 * resides on the new child page; 2103 */ 2104 if (p->header.flag & BT_INTERNAL) { 2105 ASSERT(p->header.nextindex == 2106 cpu_to_le16(XTENTRYSTART + 1)); 2107 xad = &p->xad[XTENTRYSTART]; 2108 bn = addressXAD(xad); 2109 XT_PUTPAGE(mp); 2110 2111 /* get new child page */ 2112 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2113 if (rc) 2114 return rc; 2115 2116 BT_MARK_DIRTY(mp, ip); 2117 if (!test_cflag(COMMIT_Nolink, ip)) { 2118 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2119 xtlck = (struct xtlock *) & tlck->lock; 2120 } 2121 } 2122 } else { 2123 /* if insert into middle, shift right remaining entries */ 2124 if (newindex < nextindex) 2125 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2126 (nextindex - newindex) << L2XTSLOTSIZE); 2127 2128 /* insert the entry */ 2129 xad = &p->xad[newindex]; 2130 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2131 2132 /* advance next available entry index. */ 2133 p->header.nextindex = 2134 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2135 } 2136 2137 out: 2138 if (!test_cflag(COMMIT_Nolink, ip)) { 2139 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2140 min(index0, (int)xtlck->lwm.offset) : index0; 2141 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2142 xtlck->lwm.offset; 2143 } 2144 2145 /* unpin the leaf page */ 2146 XT_PUTPAGE(mp); 2147 2148 return rc; 2149 } 2150 2151 2152 /* 2153 * xtAppend() 2154 * 2155 * function: grow in append mode from contiguous region specified ; 2156 * 2157 * parameter: 2158 * tid - transaction id; 2159 * ip - file object; 2160 * xflag - extent flag: 2161 * xoff - extent offset; 2162 * maxblocks - max extent length; 2163 * xlen - extent length (in/out); 2164 * xaddrp - extent address pointer (in/out): 2165 * flag - 2166 * 2167 * return: 2168 */ 2169 int xtAppend(tid_t tid, /* transaction id */ 2170 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2171 s32 * xlenp, /* (in/out) */ 2172 s64 * xaddrp, /* (in/out) */ 2173 int flag) 2174 { 2175 int rc = 0; 2176 struct metapage *mp; /* meta-page buffer */ 2177 xtpage_t *p; /* base B+-tree index page */ 2178 s64 bn, xaddr; 2179 int index, nextindex; 2180 struct btstack btstack; /* traverse stack */ 2181 struct xtsplit split; /* split information */ 2182 xad_t *xad; 2183 int cmp; 2184 struct tlock *tlck; 2185 struct xtlock *xtlck; 2186 int nsplit, nblocks, xlen; 2187 struct pxdlist pxdlist; 2188 pxd_t *pxd; 2189 s64 next; 2190 2191 xaddr = *xaddrp; 2192 xlen = *xlenp; 2193 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", 2194 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2195 2196 /* 2197 * search for the entry location at which to insert: 2198 * 2199 * xtFastSearch() and xtSearch() both returns (leaf page 2200 * pinned, index at which to insert). 2201 * n.b. xtSearch() may return index of maxentry of 2202 * the full page. 2203 */ 2204 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 2205 return rc; 2206 2207 /* retrieve search result */ 2208 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2209 2210 if (cmp == 0) { 2211 rc = -EEXIST; 2212 goto out; 2213 } 2214 2215 if (next) 2216 xlen = min(xlen, (int)(next - xoff)); 2217 //insert: 2218 /* 2219 * insert entry for new extent 2220 */ 2221 xflag |= XAD_NEW; 2222 2223 /* 2224 * if the leaf page is full, split the page and 2225 * propagate up the router entry for the new page from split 2226 * 2227 * The xtSplitUp() will insert the entry and unpin the leaf page. 2228 */ 2229 nextindex = le16_to_cpu(p->header.nextindex); 2230 if (nextindex < le16_to_cpu(p->header.maxentry)) 2231 goto insertLeaf; 2232 2233 /* 2234 * allocate new index blocks to cover index page split(s) 2235 */ 2236 nsplit = btstack.nsplit; 2237 split.pxdlist = &pxdlist; 2238 pxdlist.maxnpxd = pxdlist.npxd = 0; 2239 pxd = &pxdlist.pxd[0]; 2240 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2241 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2242 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2243 PXDaddress(pxd, xaddr); 2244 PXDlength(pxd, nblocks); 2245 2246 pxdlist.maxnpxd++; 2247 2248 continue; 2249 } 2250 2251 /* undo allocation */ 2252 2253 goto out; 2254 } 2255 2256 xlen = min(xlen, maxblocks); 2257 2258 /* 2259 * allocate data extent requested 2260 */ 2261 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2262 goto out; 2263 2264 split.mp = mp; 2265 split.index = index; 2266 split.flag = xflag; 2267 split.off = xoff; 2268 split.len = xlen; 2269 split.addr = xaddr; 2270 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2271 /* undo data extent allocation */ 2272 dbFree(ip, *xaddrp, (s64) * xlenp); 2273 2274 return rc; 2275 } 2276 2277 *xaddrp = xaddr; 2278 *xlenp = xlen; 2279 return 0; 2280 2281 /* 2282 * insert the new entry into the leaf page 2283 */ 2284 insertLeaf: 2285 /* 2286 * allocate data extent requested 2287 */ 2288 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2289 goto out; 2290 2291 BT_MARK_DIRTY(mp, ip); 2292 /* 2293 * acquire a transaction lock on the leaf page; 2294 * 2295 * action: xad insertion/extension; 2296 */ 2297 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2298 xtlck = (struct xtlock *) & tlck->lock; 2299 2300 /* insert the new entry: mark the entry NEW */ 2301 xad = &p->xad[index]; 2302 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2303 2304 /* advance next available entry index */ 2305 le16_add_cpu(&p->header.nextindex, 1); 2306 2307 xtlck->lwm.offset = 2308 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2309 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2310 xtlck->lwm.offset; 2311 2312 *xaddrp = xaddr; 2313 *xlenp = xlen; 2314 2315 out: 2316 /* unpin the leaf page */ 2317 XT_PUTPAGE(mp); 2318 2319 return rc; 2320 } 2321 #ifdef _STILL_TO_PORT 2322 2323 /* - TBD for defragmentaion/reorganization - 2324 * 2325 * xtDelete() 2326 * 2327 * function: 2328 * delete the entry with the specified key. 2329 * 2330 * N.B.: whole extent of the entry is assumed to be deleted. 2331 * 2332 * parameter: 2333 * 2334 * return: 2335 * ENOENT: if the entry is not found. 2336 * 2337 * exception: 2338 */ 2339 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2340 { 2341 int rc = 0; 2342 struct btstack btstack; 2343 int cmp; 2344 s64 bn; 2345 struct metapage *mp; 2346 xtpage_t *p; 2347 int index, nextindex; 2348 struct tlock *tlck; 2349 struct xtlock *xtlck; 2350 2351 /* 2352 * find the matching entry; xtSearch() pins the page 2353 */ 2354 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2355 return rc; 2356 2357 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2358 if (cmp) { 2359 /* unpin the leaf page */ 2360 XT_PUTPAGE(mp); 2361 return -ENOENT; 2362 } 2363 2364 /* 2365 * delete the entry from the leaf page 2366 */ 2367 nextindex = le16_to_cpu(p->header.nextindex); 2368 le16_add_cpu(&p->header.nextindex, -1); 2369 2370 /* 2371 * if the leaf page bocome empty, free the page 2372 */ 2373 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2374 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2375 2376 BT_MARK_DIRTY(mp, ip); 2377 /* 2378 * acquire a transaction lock on the leaf page; 2379 * 2380 * action:xad deletion; 2381 */ 2382 tlck = txLock(tid, ip, mp, tlckXTREE); 2383 xtlck = (struct xtlock *) & tlck->lock; 2384 xtlck->lwm.offset = 2385 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2386 2387 /* if delete from middle, shift left/compact the remaining entries */ 2388 if (index < nextindex - 1) 2389 memmove(&p->xad[index], &p->xad[index + 1], 2390 (nextindex - index - 1) * sizeof(xad_t)); 2391 2392 XT_PUTPAGE(mp); 2393 2394 return 0; 2395 } 2396 2397 2398 /* - TBD for defragmentaion/reorganization - 2399 * 2400 * xtDeleteUp() 2401 * 2402 * function: 2403 * free empty pages as propagating deletion up the tree 2404 * 2405 * parameter: 2406 * 2407 * return: 2408 */ 2409 static int 2410 xtDeleteUp(tid_t tid, struct inode *ip, 2411 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2412 { 2413 int rc = 0; 2414 struct metapage *mp; 2415 xtpage_t *p; 2416 int index, nextindex; 2417 s64 xaddr; 2418 int xlen; 2419 struct btframe *parent; 2420 struct tlock *tlck; 2421 struct xtlock *xtlck; 2422 2423 /* 2424 * keep root leaf page which has become empty 2425 */ 2426 if (fp->header.flag & BT_ROOT) { 2427 /* keep the root page */ 2428 fp->header.flag &= ~BT_INTERNAL; 2429 fp->header.flag |= BT_LEAF; 2430 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2431 2432 /* XT_PUTPAGE(fmp); */ 2433 2434 return 0; 2435 } 2436 2437 /* 2438 * free non-root leaf page 2439 */ 2440 if ((rc = xtRelink(tid, ip, fp))) { 2441 XT_PUTPAGE(fmp); 2442 return rc; 2443 } 2444 2445 xaddr = addressPXD(&fp->header.self); 2446 xlen = lengthPXD(&fp->header.self); 2447 /* free the page extent */ 2448 dbFree(ip, xaddr, (s64) xlen); 2449 2450 /* free the buffer page */ 2451 discard_metapage(fmp); 2452 2453 /* 2454 * propagate page deletion up the index tree 2455 * 2456 * If the delete from the parent page makes it empty, 2457 * continue all the way up the tree. 2458 * stop if the root page is reached (which is never deleted) or 2459 * if the entry deletion does not empty the page. 2460 */ 2461 while ((parent = BT_POP(btstack)) != NULL) { 2462 /* get/pin the parent page <sp> */ 2463 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2464 if (rc) 2465 return rc; 2466 2467 index = parent->index; 2468 2469 /* delete the entry for the freed child page from parent. 2470 */ 2471 nextindex = le16_to_cpu(p->header.nextindex); 2472 2473 /* 2474 * the parent has the single entry being deleted: 2475 * free the parent page which has become empty. 2476 */ 2477 if (nextindex == 1) { 2478 if (p->header.flag & BT_ROOT) { 2479 /* keep the root page */ 2480 p->header.flag &= ~BT_INTERNAL; 2481 p->header.flag |= BT_LEAF; 2482 p->header.nextindex = 2483 cpu_to_le16(XTENTRYSTART); 2484 2485 /* XT_PUTPAGE(mp); */ 2486 2487 break; 2488 } else { 2489 /* free the parent page */ 2490 if ((rc = xtRelink(tid, ip, p))) 2491 return rc; 2492 2493 xaddr = addressPXD(&p->header.self); 2494 /* free the page extent */ 2495 dbFree(ip, xaddr, 2496 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2497 2498 /* unpin/free the buffer page */ 2499 discard_metapage(mp); 2500 2501 /* propagate up */ 2502 continue; 2503 } 2504 } 2505 /* 2506 * the parent has other entries remaining: 2507 * delete the router entry from the parent page. 2508 */ 2509 else { 2510 BT_MARK_DIRTY(mp, ip); 2511 /* 2512 * acquire a transaction lock on the leaf page; 2513 * 2514 * action:xad deletion; 2515 */ 2516 tlck = txLock(tid, ip, mp, tlckXTREE); 2517 xtlck = (struct xtlock *) & tlck->lock; 2518 xtlck->lwm.offset = 2519 (xtlck->lwm.offset) ? min(index, 2520 xtlck->lwm. 2521 offset) : index; 2522 2523 /* if delete from middle, 2524 * shift left/compact the remaining entries in the page 2525 */ 2526 if (index < nextindex - 1) 2527 memmove(&p->xad[index], &p->xad[index + 1], 2528 (nextindex - index - 2529 1) << L2XTSLOTSIZE); 2530 2531 le16_add_cpu(&p->header.nextindex, -1); 2532 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2533 (ulong) parent->bn, index); 2534 } 2535 2536 /* unpin the parent page */ 2537 XT_PUTPAGE(mp); 2538 2539 /* exit propagation up */ 2540 break; 2541 } 2542 2543 return 0; 2544 } 2545 2546 2547 /* 2548 * NAME: xtRelocate() 2549 * 2550 * FUNCTION: relocate xtpage or data extent of regular file; 2551 * This function is mainly used by defragfs utility. 2552 * 2553 * NOTE: This routine does not have the logic to handle 2554 * uncommitted allocated extent. The caller should call 2555 * txCommit() to commit all the allocation before call 2556 * this routine. 2557 */ 2558 int 2559 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2560 s64 nxaddr, /* new xaddr */ 2561 int xtype) 2562 { /* extent type: XTPAGE or DATAEXT */ 2563 int rc = 0; 2564 struct tblock *tblk; 2565 struct tlock *tlck; 2566 struct xtlock *xtlck; 2567 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2568 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2569 xad_t *xad; 2570 pxd_t *pxd; 2571 s64 xoff, xsize; 2572 int xlen; 2573 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2574 cbuf_t *cp; 2575 s64 offset, nbytes, nbrd, pno; 2576 int nb, npages, nblks; 2577 s64 bn; 2578 int cmp; 2579 int index; 2580 struct pxd_lock *pxdlock; 2581 struct btstack btstack; /* traverse stack */ 2582 2583 xtype = xtype & EXTENT_TYPE; 2584 2585 xoff = offsetXAD(oxad); 2586 oxaddr = addressXAD(oxad); 2587 xlen = lengthXAD(oxad); 2588 2589 /* validate extent offset */ 2590 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2591 if (offset >= ip->i_size) 2592 return -ESTALE; /* stale extent */ 2593 2594 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", 2595 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2596 2597 /* 2598 * 1. get and validate the parent xtpage/xad entry 2599 * covering the source extent to be relocated; 2600 */ 2601 if (xtype == DATAEXT) { 2602 /* search in leaf entry */ 2603 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 2604 if (rc) 2605 return rc; 2606 2607 /* retrieve search result */ 2608 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2609 2610 if (cmp) { 2611 XT_PUTPAGE(pmp); 2612 return -ESTALE; 2613 } 2614 2615 /* validate for exact match with a single entry */ 2616 xad = &pp->xad[index]; 2617 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2618 XT_PUTPAGE(pmp); 2619 return -ESTALE; 2620 } 2621 } else { /* (xtype == XTPAGE) */ 2622 2623 /* search in internal entry */ 2624 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2625 if (rc) 2626 return rc; 2627 2628 /* retrieve search result */ 2629 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2630 2631 if (cmp) { 2632 XT_PUTPAGE(pmp); 2633 return -ESTALE; 2634 } 2635 2636 /* xtSearchNode() validated for exact match with a single entry 2637 */ 2638 xad = &pp->xad[index]; 2639 } 2640 jfs_info("xtRelocate: parent xad entry validated."); 2641 2642 /* 2643 * 2. relocate the extent 2644 */ 2645 if (xtype == DATAEXT) { 2646 /* if the extent is allocated-but-not-recorded 2647 * there is no real data to be moved in this extent, 2648 */ 2649 if (xad->flag & XAD_NOTRECORDED) 2650 goto out; 2651 else 2652 /* release xtpage for cmRead()/xtLookup() */ 2653 XT_PUTPAGE(pmp); 2654 2655 /* 2656 * cmRelocate() 2657 * 2658 * copy target data pages to be relocated; 2659 * 2660 * data extent must start at page boundary and 2661 * multiple of page size (except the last data extent); 2662 * read in each page of the source data extent into cbuf, 2663 * update the cbuf extent descriptor of the page to be 2664 * homeward bound to new dst data extent 2665 * copy the data from the old extent to new extent. 2666 * copy is essential for compressed files to avoid problems 2667 * that can arise if there was a change in compression 2668 * algorithms. 2669 * it is a good strategy because it may disrupt cache 2670 * policy to keep the pages in memory afterwards. 2671 */ 2672 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2673 assert((offset & CM_OFFSET) == 0); 2674 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2675 pno = offset >> CM_L2BSIZE; 2676 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2677 /* 2678 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2679 (offset >> CM_L2BSIZE) + 1; 2680 */ 2681 sxaddr = oxaddr; 2682 dxaddr = nxaddr; 2683 2684 /* process the request one cache buffer at a time */ 2685 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2686 offset += nb, pno++, npages--) { 2687 /* compute page size */ 2688 nb = min(nbytes - nbrd, CM_BSIZE); 2689 2690 /* get the cache buffer of the page */ 2691 if (rc = cmRead(ip, offset, npages, &cp)) 2692 break; 2693 2694 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2695 assert(!cp->cm_modified); 2696 2697 /* bind buffer with the new extent address */ 2698 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2699 cmSetXD(ip, cp, pno, dxaddr, nblks); 2700 2701 /* release the cbuf, mark it as modified */ 2702 cmPut(cp, true); 2703 2704 dxaddr += nblks; 2705 sxaddr += nblks; 2706 } 2707 2708 /* get back parent page */ 2709 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2710 return rc; 2711 2712 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2713 jfs_info("xtRelocate: target data extent relocated."); 2714 } else { /* (xtype == XTPAGE) */ 2715 2716 /* 2717 * read in the target xtpage from the source extent; 2718 */ 2719 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2720 if (rc) { 2721 XT_PUTPAGE(pmp); 2722 return rc; 2723 } 2724 2725 /* 2726 * read in sibling pages if any to update sibling pointers; 2727 */ 2728 rmp = NULL; 2729 if (p->header.next) { 2730 nextbn = le64_to_cpu(p->header.next); 2731 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2732 if (rc) { 2733 XT_PUTPAGE(pmp); 2734 XT_PUTPAGE(mp); 2735 return (rc); 2736 } 2737 } 2738 2739 lmp = NULL; 2740 if (p->header.prev) { 2741 prevbn = le64_to_cpu(p->header.prev); 2742 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 2743 if (rc) { 2744 XT_PUTPAGE(pmp); 2745 XT_PUTPAGE(mp); 2746 if (rmp) 2747 XT_PUTPAGE(rmp); 2748 return (rc); 2749 } 2750 } 2751 2752 /* at this point, all xtpages to be updated are in memory */ 2753 2754 /* 2755 * update sibling pointers of sibling xtpages if any; 2756 */ 2757 if (lmp) { 2758 BT_MARK_DIRTY(lmp, ip); 2759 tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 2760 lp->header.next = cpu_to_le64(nxaddr); 2761 XT_PUTPAGE(lmp); 2762 } 2763 2764 if (rmp) { 2765 BT_MARK_DIRTY(rmp, ip); 2766 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 2767 rp->header.prev = cpu_to_le64(nxaddr); 2768 XT_PUTPAGE(rmp); 2769 } 2770 2771 /* 2772 * update the target xtpage to be relocated 2773 * 2774 * update the self address of the target page 2775 * and write to destination extent; 2776 * redo image covers the whole xtpage since it is new page 2777 * to the destination extent; 2778 * update of bmap for the free of source extent 2779 * of the target xtpage itself: 2780 * update of bmap for the allocation of destination extent 2781 * of the target xtpage itself: 2782 * update of bmap for the extents covered by xad entries in 2783 * the target xtpage is not necessary since they are not 2784 * updated; 2785 * if not committed before this relocation, 2786 * target page may contain XAD_NEW entries which must 2787 * be scanned for bmap update (logredo() always 2788 * scan xtpage REDOPAGE image for bmap update); 2789 * if committed before this relocation (tlckRELOCATE), 2790 * scan may be skipped by commit() and logredo(); 2791 */ 2792 BT_MARK_DIRTY(mp, ip); 2793 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 2794 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 2795 xtlck = (struct xtlock *) & tlck->lock; 2796 2797 /* update the self address in the xtpage header */ 2798 pxd = &p->header.self; 2799 PXDaddress(pxd, nxaddr); 2800 2801 /* linelock for the after image of the whole page */ 2802 xtlck->lwm.length = 2803 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 2804 2805 /* update the buffer extent descriptor of target xtpage */ 2806 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2807 bmSetXD(mp, nxaddr, xsize); 2808 2809 /* unpin the target page to new homeward bound */ 2810 XT_PUTPAGE(mp); 2811 jfs_info("xtRelocate: target xtpage relocated."); 2812 } 2813 2814 /* 2815 * 3. acquire maplock for the source extent to be freed; 2816 * 2817 * acquire a maplock saving the src relocated extent address; 2818 * to free of the extent at commit time; 2819 */ 2820 out: 2821 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 2822 * free PXD of the source data extent (logredo() will update 2823 * bmap for free of source data extent), and update bmap for 2824 * free of the source data extent; 2825 */ 2826 if (xtype == DATAEXT) 2827 tlck = txMaplock(tid, ip, tlckMAP); 2828 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 2829 * for the source xtpage (logredo() will init NoRedoPage 2830 * filter and will also update bmap for free of the source 2831 * xtpage), and update bmap for free of the source xtpage; 2832 * N.B. We use tlckMAP instead of tlkcXTREE because there 2833 * is no buffer associated with this lock since the buffer 2834 * has been redirected to the target location. 2835 */ 2836 else /* (xtype == XTPAGE) */ 2837 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 2838 2839 pxdlock = (struct pxd_lock *) & tlck->lock; 2840 pxdlock->flag = mlckFREEPXD; 2841 PXDaddress(&pxdlock->pxd, oxaddr); 2842 PXDlength(&pxdlock->pxd, xlen); 2843 pxdlock->index = 1; 2844 2845 /* 2846 * 4. update the parent xad entry for relocation; 2847 * 2848 * acquire tlck for the parent entry with XAD_NEW as entry 2849 * update which will write LOG_REDOPAGE and update bmap for 2850 * allocation of XAD_NEW destination extent; 2851 */ 2852 jfs_info("xtRelocate: update parent xad entry."); 2853 BT_MARK_DIRTY(pmp, ip); 2854 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 2855 xtlck = (struct xtlock *) & tlck->lock; 2856 2857 /* update the XAD with the new destination extent; */ 2858 xad = &pp->xad[index]; 2859 xad->flag |= XAD_NEW; 2860 XADaddress(xad, nxaddr); 2861 2862 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 2863 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 2864 xtlck->lwm.offset; 2865 2866 /* unpin the parent xtpage */ 2867 XT_PUTPAGE(pmp); 2868 2869 return rc; 2870 } 2871 2872 2873 /* 2874 * xtSearchNode() 2875 * 2876 * function: search for the internal xad entry covering specified extent. 2877 * This function is mainly used by defragfs utility. 2878 * 2879 * parameters: 2880 * ip - file object; 2881 * xad - extent to find; 2882 * cmpp - comparison result: 2883 * btstack - traverse stack; 2884 * flag - search process flag; 2885 * 2886 * returns: 2887 * btstack contains (bn, index) of search path traversed to the entry. 2888 * *cmpp is set to result of comparison with the entry returned. 2889 * the page containing the entry is pinned at exit. 2890 */ 2891 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 2892 int *cmpp, struct btstack * btstack, int flag) 2893 { 2894 int rc = 0; 2895 s64 xoff, xaddr; 2896 int xlen; 2897 int cmp = 1; /* init for empty page */ 2898 s64 bn; /* block number */ 2899 struct metapage *mp; /* meta-page buffer */ 2900 xtpage_t *p; /* page */ 2901 int base, index, lim; 2902 struct btframe *btsp; 2903 s64 t64; 2904 2905 BT_CLR(btstack); 2906 2907 xoff = offsetXAD(xad); 2908 xlen = lengthXAD(xad); 2909 xaddr = addressXAD(xad); 2910 2911 /* 2912 * search down tree from root: 2913 * 2914 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 2915 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 2916 * 2917 * if entry with search key K is not found 2918 * internal page search find the entry with largest key Ki 2919 * less than K which point to the child page to search; 2920 * leaf page search find the entry with smallest key Kj 2921 * greater than K so that the returned index is the position of 2922 * the entry to be shifted right for insertion of new entry. 2923 * for empty tree, search key is greater than any key of the tree. 2924 * 2925 * by convention, root bn = 0. 2926 */ 2927 for (bn = 0;;) { 2928 /* get/pin the page to search */ 2929 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2930 if (rc) 2931 return rc; 2932 if (p->header.flag & BT_LEAF) { 2933 XT_PUTPAGE(mp); 2934 return -ESTALE; 2935 } 2936 2937 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 2938 2939 /* 2940 * binary search with search key K on the current page 2941 */ 2942 for (base = XTENTRYSTART; lim; lim >>= 1) { 2943 index = base + (lim >> 1); 2944 2945 XT_CMP(cmp, xoff, &p->xad[index], t64); 2946 if (cmp == 0) { 2947 /* 2948 * search hit 2949 * 2950 * verify for exact match; 2951 */ 2952 if (xaddr == addressXAD(&p->xad[index]) && 2953 xoff == offsetXAD(&p->xad[index])) { 2954 *cmpp = cmp; 2955 2956 /* save search result */ 2957 btsp = btstack->top; 2958 btsp->bn = bn; 2959 btsp->index = index; 2960 btsp->mp = mp; 2961 2962 return 0; 2963 } 2964 2965 /* descend/search its child page */ 2966 goto next; 2967 } 2968 2969 if (cmp > 0) { 2970 base = index + 1; 2971 --lim; 2972 } 2973 } 2974 2975 /* 2976 * search miss - non-leaf page: 2977 * 2978 * base is the smallest index with key (Kj) greater than 2979 * search key (K) and may be zero or maxentry index. 2980 * if base is non-zero, decrement base by one to get the parent 2981 * entry of the child page to search. 2982 */ 2983 index = base ? base - 1 : base; 2984 2985 /* 2986 * go down to child page 2987 */ 2988 next: 2989 /* get the child page block number */ 2990 bn = addressXAD(&p->xad[index]); 2991 2992 /* unpin the parent page */ 2993 XT_PUTPAGE(mp); 2994 } 2995 } 2996 2997 2998 /* 2999 * xtRelink() 3000 * 3001 * function: 3002 * link around a freed page. 3003 * 3004 * Parameter: 3005 * int tid, 3006 * struct inode *ip, 3007 * xtpage_t *p) 3008 * 3009 * returns: 3010 */ 3011 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 3012 { 3013 int rc = 0; 3014 struct metapage *mp; 3015 s64 nextbn, prevbn; 3016 struct tlock *tlck; 3017 3018 nextbn = le64_to_cpu(p->header.next); 3019 prevbn = le64_to_cpu(p->header.prev); 3020 3021 /* update prev pointer of the next page */ 3022 if (nextbn != 0) { 3023 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3024 if (rc) 3025 return rc; 3026 3027 /* 3028 * acquire a transaction lock on the page; 3029 * 3030 * action: update prev pointer; 3031 */ 3032 BT_MARK_DIRTY(mp, ip); 3033 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3034 3035 /* the page may already have been tlock'd */ 3036 3037 p->header.prev = cpu_to_le64(prevbn); 3038 3039 XT_PUTPAGE(mp); 3040 } 3041 3042 /* update next pointer of the previous page */ 3043 if (prevbn != 0) { 3044 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3045 if (rc) 3046 return rc; 3047 3048 /* 3049 * acquire a transaction lock on the page; 3050 * 3051 * action: update next pointer; 3052 */ 3053 BT_MARK_DIRTY(mp, ip); 3054 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3055 3056 /* the page may already have been tlock'd */ 3057 3058 p->header.next = le64_to_cpu(nextbn); 3059 3060 XT_PUTPAGE(mp); 3061 } 3062 3063 return 0; 3064 } 3065 #endif /* _STILL_TO_PORT */ 3066 3067 3068 /* 3069 * xtInitRoot() 3070 * 3071 * initialize file root (inline in inode) 3072 */ 3073 void xtInitRoot(tid_t tid, struct inode *ip) 3074 { 3075 xtpage_t *p; 3076 3077 /* 3078 * acquire a transaction lock on the root 3079 * 3080 * action: 3081 */ 3082 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3083 tlckXTREE | tlckNEW); 3084 p = &JFS_IP(ip)->i_xtroot; 3085 3086 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3087 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3088 3089 if (S_ISDIR(ip->i_mode)) 3090 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3091 else { 3092 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3093 ip->i_size = 0; 3094 } 3095 3096 3097 return; 3098 } 3099 3100 3101 /* 3102 * We can run into a deadlock truncating a file with a large number of 3103 * xtree pages (large fragmented file). A robust fix would entail a 3104 * reservation system where we would reserve a number of metadata pages 3105 * and tlocks which we would be guaranteed without a deadlock. Without 3106 * this, a partial fix is to limit number of metadata pages we will lock 3107 * in a single transaction. Currently we will truncate the file so that 3108 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3109 * will be responsible for ensuring that the current transaction gets 3110 * committed, and that subsequent transactions are created to truncate 3111 * the file further if needed. 3112 */ 3113 #define MAX_TRUNCATE_LEAVES 50 3114 3115 /* 3116 * xtTruncate() 3117 * 3118 * function: 3119 * traverse for truncation logging backward bottom up; 3120 * terminate at the last extent entry at the current subtree 3121 * root page covering new down size. 3122 * truncation may occur within the last extent entry. 3123 * 3124 * parameter: 3125 * int tid, 3126 * struct inode *ip, 3127 * s64 newsize, 3128 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3129 * 3130 * return: 3131 * 3132 * note: 3133 * PWMAP: 3134 * 1. truncate (non-COMMIT_NOLINK file) 3135 * by jfs_truncate() or jfs_open(O_TRUNC): 3136 * xtree is updated; 3137 * 2. truncate index table of directory when last entry removed 3138 * map update via tlock at commit time; 3139 * PMAP: 3140 * Call xtTruncate_pmap instead 3141 * WMAP: 3142 * 1. remove (free zero link count) on last reference release 3143 * (pmap has been freed at commit zero link count); 3144 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3145 * xtree is updated; 3146 * map update directly at truncation time; 3147 * 3148 * if (DELETE) 3149 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3150 * else if (TRUNCATE) 3151 * must write LOG_NOREDOPAGE for deleted index page; 3152 * 3153 * pages may already have been tlocked by anonymous transactions 3154 * during file growth (i.e., write) before truncation; 3155 * 3156 * except last truncated entry, deleted entries remains as is 3157 * in the page (nextindex is updated) for other use 3158 * (e.g., log/update allocation map): this avoid copying the page 3159 * info but delay free of pages; 3160 * 3161 */ 3162 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3163 { 3164 int rc = 0; 3165 s64 teof; 3166 struct metapage *mp; 3167 xtpage_t *p; 3168 s64 bn; 3169 int index, nextindex; 3170 xad_t *xad; 3171 s64 xoff, xaddr; 3172 int xlen, len, freexlen; 3173 struct btstack btstack; 3174 struct btframe *parent; 3175 struct tblock *tblk = NULL; 3176 struct tlock *tlck = NULL; 3177 struct xtlock *xtlck = NULL; 3178 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3179 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3180 s64 nfreed; 3181 int freed, log; 3182 int locked_leaves = 0; 3183 3184 /* save object truncation type */ 3185 if (tid) { 3186 tblk = tid_to_tblock(tid); 3187 tblk->xflag |= flag; 3188 } 3189 3190 nfreed = 0; 3191 3192 flag &= COMMIT_MAP; 3193 assert(flag != COMMIT_PMAP); 3194 3195 if (flag == COMMIT_PWMAP) 3196 log = 1; 3197 else { 3198 log = 0; 3199 xadlock.flag = mlckFREEXADLIST; 3200 xadlock.index = 1; 3201 } 3202 3203 /* 3204 * if the newsize is not an integral number of pages, 3205 * the file between newsize and next page boundary will 3206 * be cleared. 3207 * if truncating into a file hole, it will cause 3208 * a full block to be allocated for the logical block. 3209 */ 3210 3211 /* 3212 * release page blocks of truncated region <teof, eof> 3213 * 3214 * free the data blocks from the leaf index blocks. 3215 * delete the parent index entries corresponding to 3216 * the freed child data/index blocks. 3217 * free the index blocks themselves which aren't needed 3218 * in new sized file. 3219 * 3220 * index blocks are updated only if the blocks are to be 3221 * retained in the new sized file. 3222 * if type is PMAP, the data and index pages are NOT 3223 * freed, and the data and index blocks are NOT freed 3224 * from working map. 3225 * (this will allow continued access of data/index of 3226 * temporary file (zerolink count file truncated to zero-length)). 3227 */ 3228 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3229 JFS_SBI(ip->i_sb)->l2bsize; 3230 3231 /* clear stack */ 3232 BT_CLR(&btstack); 3233 3234 /* 3235 * start with root 3236 * 3237 * root resides in the inode 3238 */ 3239 bn = 0; 3240 3241 /* 3242 * first access of each page: 3243 */ 3244 getPage: 3245 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3246 if (rc) 3247 return rc; 3248 3249 /* process entries backward from last index */ 3250 index = le16_to_cpu(p->header.nextindex) - 1; 3251 3252 3253 /* Since this is the rightmost page at this level, and we may have 3254 * already freed a page that was formerly to the right, let's make 3255 * sure that the next pointer is zero. 3256 */ 3257 if (p->header.next) { 3258 if (log) 3259 /* 3260 * Make sure this change to the header is logged. 3261 * If we really truncate this leaf, the flag 3262 * will be changed to tlckTRUNCATE 3263 */ 3264 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 3265 BT_MARK_DIRTY(mp, ip); 3266 p->header.next = 0; 3267 } 3268 3269 if (p->header.flag & BT_INTERNAL) 3270 goto getChild; 3271 3272 /* 3273 * leaf page 3274 */ 3275 freed = 0; 3276 3277 /* does region covered by leaf page precede Teof ? */ 3278 xad = &p->xad[index]; 3279 xoff = offsetXAD(xad); 3280 xlen = lengthXAD(xad); 3281 if (teof >= xoff + xlen) { 3282 XT_PUTPAGE(mp); 3283 goto getParent; 3284 } 3285 3286 /* (re)acquire tlock of the leaf page */ 3287 if (log) { 3288 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3289 /* 3290 * We need to limit the size of the transaction 3291 * to avoid exhausting pagecache & tlocks 3292 */ 3293 XT_PUTPAGE(mp); 3294 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3295 goto getParent; 3296 } 3297 tlck = txLock(tid, ip, mp, tlckXTREE); 3298 tlck->type = tlckXTREE | tlckTRUNCATE; 3299 xtlck = (struct xtlock *) & tlck->lock; 3300 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3301 } 3302 BT_MARK_DIRTY(mp, ip); 3303 3304 /* 3305 * scan backward leaf page entries 3306 */ 3307 for (; index >= XTENTRYSTART; index--) { 3308 xad = &p->xad[index]; 3309 xoff = offsetXAD(xad); 3310 xlen = lengthXAD(xad); 3311 xaddr = addressXAD(xad); 3312 3313 /* 3314 * The "data" for a directory is indexed by the block 3315 * device's address space. This metadata must be invalidated 3316 * here 3317 */ 3318 if (S_ISDIR(ip->i_mode) && (teof == 0)) 3319 invalidate_xad_metapages(ip, *xad); 3320 /* 3321 * entry beyond eof: continue scan of current page 3322 * xad 3323 * ---|---=======-------> 3324 * eof 3325 */ 3326 if (teof < xoff) { 3327 nfreed += xlen; 3328 continue; 3329 } 3330 3331 /* 3332 * (xoff <= teof): last entry to be deleted from page; 3333 * If other entries remain in page: keep and update the page. 3334 */ 3335 3336 /* 3337 * eof == entry_start: delete the entry 3338 * xad 3339 * -------|=======-------> 3340 * eof 3341 * 3342 */ 3343 if (teof == xoff) { 3344 nfreed += xlen; 3345 3346 if (index == XTENTRYSTART) 3347 break; 3348 3349 nextindex = index; 3350 } 3351 /* 3352 * eof within the entry: truncate the entry. 3353 * xad 3354 * -------===|===-------> 3355 * eof 3356 */ 3357 else if (teof < xoff + xlen) { 3358 /* update truncated entry */ 3359 len = teof - xoff; 3360 freexlen = xlen - len; 3361 XADlength(xad, len); 3362 3363 /* save pxd of truncated extent in tlck */ 3364 xaddr += len; 3365 if (log) { /* COMMIT_PWMAP */ 3366 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3367 min(index, (int)xtlck->lwm.offset) : index; 3368 xtlck->lwm.length = index + 1 - 3369 xtlck->lwm.offset; 3370 xtlck->twm.offset = index; 3371 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3372 pxdlock->flag = mlckFREEPXD; 3373 PXDaddress(&pxdlock->pxd, xaddr); 3374 PXDlength(&pxdlock->pxd, freexlen); 3375 } 3376 /* free truncated extent */ 3377 else { /* COMMIT_WMAP */ 3378 3379 pxdlock = (struct pxd_lock *) & xadlock; 3380 pxdlock->flag = mlckFREEPXD; 3381 PXDaddress(&pxdlock->pxd, xaddr); 3382 PXDlength(&pxdlock->pxd, freexlen); 3383 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); 3384 3385 /* reset map lock */ 3386 xadlock.flag = mlckFREEXADLIST; 3387 } 3388 3389 /* current entry is new last entry; */ 3390 nextindex = index + 1; 3391 3392 nfreed += freexlen; 3393 } 3394 /* 3395 * eof beyond the entry: 3396 * xad 3397 * -------=======---|---> 3398 * eof 3399 */ 3400 else { /* (xoff + xlen < teof) */ 3401 3402 nextindex = index + 1; 3403 } 3404 3405 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3406 if (!log) { /* COMMIT_WAMP */ 3407 xadlock.xdlist = &p->xad[nextindex]; 3408 xadlock.count = 3409 le16_to_cpu(p->header.nextindex) - 3410 nextindex; 3411 txFreeMap(ip, (struct maplock *) & xadlock, 3412 NULL, COMMIT_WMAP); 3413 } 3414 p->header.nextindex = cpu_to_le16(nextindex); 3415 } 3416 3417 XT_PUTPAGE(mp); 3418 3419 /* assert(freed == 0); */ 3420 goto getParent; 3421 } /* end scan of leaf page entries */ 3422 3423 freed = 1; 3424 3425 /* 3426 * leaf page become empty: free the page if type != PMAP 3427 */ 3428 if (log) { /* COMMIT_PWMAP */ 3429 /* txCommit() with tlckFREE: 3430 * free data extents covered by leaf [XTENTRYSTART:hwm); 3431 * invalidate leaf if COMMIT_PWMAP; 3432 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3433 */ 3434 tlck->type = tlckXTREE | tlckFREE; 3435 } else { /* COMMIT_WAMP */ 3436 3437 /* free data extents covered by leaf */ 3438 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3439 xadlock.count = 3440 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3441 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); 3442 } 3443 3444 if (p->header.flag & BT_ROOT) { 3445 p->header.flag &= ~BT_INTERNAL; 3446 p->header.flag |= BT_LEAF; 3447 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3448 3449 XT_PUTPAGE(mp); /* debug */ 3450 goto out; 3451 } else { 3452 if (log) { /* COMMIT_PWMAP */ 3453 /* page will be invalidated at tx completion 3454 */ 3455 XT_PUTPAGE(mp); 3456 } else { /* COMMIT_WMAP */ 3457 3458 if (mp->lid) 3459 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3460 3461 /* invalidate empty leaf page */ 3462 discard_metapage(mp); 3463 } 3464 } 3465 3466 /* 3467 * the leaf page become empty: delete the parent entry 3468 * for the leaf page if the parent page is to be kept 3469 * in the new sized file. 3470 */ 3471 3472 /* 3473 * go back up to the parent page 3474 */ 3475 getParent: 3476 /* pop/restore parent entry for the current child page */ 3477 if ((parent = BT_POP(&btstack)) == NULL) 3478 /* current page must have been root */ 3479 goto out; 3480 3481 /* get back the parent page */ 3482 bn = parent->bn; 3483 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3484 if (rc) 3485 return rc; 3486 3487 index = parent->index; 3488 3489 /* 3490 * child page was not empty: 3491 */ 3492 if (freed == 0) { 3493 /* has any entry deleted from parent ? */ 3494 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3495 /* (re)acquire tlock on the parent page */ 3496 if (log) { /* COMMIT_PWMAP */ 3497 /* txCommit() with tlckTRUNCATE: 3498 * free child extents covered by parent [); 3499 */ 3500 tlck = txLock(tid, ip, mp, tlckXTREE); 3501 xtlck = (struct xtlock *) & tlck->lock; 3502 if (!(tlck->type & tlckTRUNCATE)) { 3503 xtlck->hwm.offset = 3504 le16_to_cpu(p->header. 3505 nextindex) - 1; 3506 tlck->type = 3507 tlckXTREE | tlckTRUNCATE; 3508 } 3509 } else { /* COMMIT_WMAP */ 3510 3511 /* free child extents covered by parent */ 3512 xadlock.xdlist = &p->xad[index + 1]; 3513 xadlock.count = 3514 le16_to_cpu(p->header.nextindex) - 3515 index - 1; 3516 txFreeMap(ip, (struct maplock *) & xadlock, 3517 NULL, COMMIT_WMAP); 3518 } 3519 BT_MARK_DIRTY(mp, ip); 3520 3521 p->header.nextindex = cpu_to_le16(index + 1); 3522 } 3523 XT_PUTPAGE(mp); 3524 goto getParent; 3525 } 3526 3527 /* 3528 * child page was empty: 3529 */ 3530 nfreed += lengthXAD(&p->xad[index]); 3531 3532 /* 3533 * During working map update, child page's tlock must be handled 3534 * before parent's. This is because the parent's tlock will cause 3535 * the child's disk space to be marked available in the wmap, so 3536 * it's important that the child page be released by that time. 3537 * 3538 * ToDo: tlocks should be on doubly-linked list, so we can 3539 * quickly remove it and add it to the end. 3540 */ 3541 3542 /* 3543 * Move parent page's tlock to the end of the tid's tlock list 3544 */ 3545 if (log && mp->lid && (tblk->last != mp->lid) && 3546 lid_to_tlock(mp->lid)->tid) { 3547 lid_t lid = mp->lid; 3548 struct tlock *prev; 3549 3550 tlck = lid_to_tlock(lid); 3551 3552 if (tblk->next == lid) 3553 tblk->next = tlck->next; 3554 else { 3555 for (prev = lid_to_tlock(tblk->next); 3556 prev->next != lid; 3557 prev = lid_to_tlock(prev->next)) { 3558 assert(prev->next); 3559 } 3560 prev->next = tlck->next; 3561 } 3562 lid_to_tlock(tblk->last)->next = lid; 3563 tlck->next = 0; 3564 tblk->last = lid; 3565 } 3566 3567 /* 3568 * parent page become empty: free the page 3569 */ 3570 if (index == XTENTRYSTART) { 3571 if (log) { /* COMMIT_PWMAP */ 3572 /* txCommit() with tlckFREE: 3573 * free child extents covered by parent; 3574 * invalidate parent if COMMIT_PWMAP; 3575 */ 3576 tlck = txLock(tid, ip, mp, tlckXTREE); 3577 xtlck = (struct xtlock *) & tlck->lock; 3578 xtlck->hwm.offset = 3579 le16_to_cpu(p->header.nextindex) - 1; 3580 tlck->type = tlckXTREE | tlckFREE; 3581 } else { /* COMMIT_WMAP */ 3582 3583 /* free child extents covered by parent */ 3584 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3585 xadlock.count = 3586 le16_to_cpu(p->header.nextindex) - 3587 XTENTRYSTART; 3588 txFreeMap(ip, (struct maplock *) & xadlock, NULL, 3589 COMMIT_WMAP); 3590 } 3591 BT_MARK_DIRTY(mp, ip); 3592 3593 if (p->header.flag & BT_ROOT) { 3594 p->header.flag &= ~BT_INTERNAL; 3595 p->header.flag |= BT_LEAF; 3596 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3597 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3598 /* 3599 * Shrink root down to allow inline 3600 * EA (otherwise fsck complains) 3601 */ 3602 p->header.maxentry = 3603 cpu_to_le16(XTROOTINITSLOT); 3604 JFS_IP(ip)->mode2 |= INLINEEA; 3605 } 3606 3607 XT_PUTPAGE(mp); /* debug */ 3608 goto out; 3609 } else { 3610 if (log) { /* COMMIT_PWMAP */ 3611 /* page will be invalidated at tx completion 3612 */ 3613 XT_PUTPAGE(mp); 3614 } else { /* COMMIT_WMAP */ 3615 3616 if (mp->lid) 3617 lid_to_tlock(mp->lid)->flag |= 3618 tlckFREELOCK; 3619 3620 /* invalidate parent page */ 3621 discard_metapage(mp); 3622 } 3623 3624 /* parent has become empty and freed: 3625 * go back up to its parent page 3626 */ 3627 /* freed = 1; */ 3628 goto getParent; 3629 } 3630 } 3631 /* 3632 * parent page still has entries for front region; 3633 */ 3634 else { 3635 /* try truncate region covered by preceding entry 3636 * (process backward) 3637 */ 3638 index--; 3639 3640 /* go back down to the child page corresponding 3641 * to the entry 3642 */ 3643 goto getChild; 3644 } 3645 3646 /* 3647 * internal page: go down to child page of current entry 3648 */ 3649 getChild: 3650 /* save current parent entry for the child page */ 3651 if (BT_STACK_FULL(&btstack)) { 3652 jfs_error(ip->i_sb, "stack overrun!\n"); 3653 XT_PUTPAGE(mp); 3654 return -EIO; 3655 } 3656 BT_PUSH(&btstack, bn, index); 3657 3658 /* get child page */ 3659 xad = &p->xad[index]; 3660 bn = addressXAD(xad); 3661 3662 /* 3663 * first access of each internal entry: 3664 */ 3665 /* release parent page */ 3666 XT_PUTPAGE(mp); 3667 3668 /* process the child page */ 3669 goto getPage; 3670 3671 out: 3672 /* 3673 * update file resource stat 3674 */ 3675 /* set size 3676 */ 3677 if (S_ISDIR(ip->i_mode) && !newsize) 3678 ip->i_size = 1; /* fsck hates zero-length directories */ 3679 else 3680 ip->i_size = newsize; 3681 3682 /* update quota allocation to reflect freed blocks */ 3683 dquot_free_block(ip, nfreed); 3684 3685 /* 3686 * free tlock of invalidated pages 3687 */ 3688 if (flag == COMMIT_WMAP) 3689 txFreelock(ip); 3690 3691 return newsize; 3692 } 3693 3694 3695 /* 3696 * xtTruncate_pmap() 3697 * 3698 * function: 3699 * Perform truncate to zero length for deleted file, leaving the 3700 * the xtree and working map untouched. This allows the file to 3701 * be accessed via open file handles, while the delete of the file 3702 * is committed to disk. 3703 * 3704 * parameter: 3705 * tid_t tid, 3706 * struct inode *ip, 3707 * s64 committed_size) 3708 * 3709 * return: new committed size 3710 * 3711 * note: 3712 * 3713 * To avoid deadlock by holding too many transaction locks, the 3714 * truncation may be broken up into multiple transactions. 3715 * The committed_size keeps track of part of the file has been 3716 * freed from the pmaps. 3717 */ 3718 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3719 { 3720 s64 bn; 3721 struct btstack btstack; 3722 int cmp; 3723 int index; 3724 int locked_leaves = 0; 3725 struct metapage *mp; 3726 xtpage_t *p; 3727 struct btframe *parent; 3728 int rc; 3729 struct tblock *tblk; 3730 struct tlock *tlck = NULL; 3731 xad_t *xad; 3732 int xlen; 3733 s64 xoff; 3734 struct xtlock *xtlck = NULL; 3735 3736 /* save object truncation type */ 3737 tblk = tid_to_tblock(tid); 3738 tblk->xflag |= COMMIT_PMAP; 3739 3740 /* clear stack */ 3741 BT_CLR(&btstack); 3742 3743 if (committed_size) { 3744 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 3745 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 3746 if (rc) 3747 return rc; 3748 3749 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3750 3751 if (cmp != 0) { 3752 XT_PUTPAGE(mp); 3753 jfs_error(ip->i_sb, "did not find extent\n"); 3754 return -EIO; 3755 } 3756 } else { 3757 /* 3758 * start with root 3759 * 3760 * root resides in the inode 3761 */ 3762 bn = 0; 3763 3764 /* 3765 * first access of each page: 3766 */ 3767 getPage: 3768 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3769 if (rc) 3770 return rc; 3771 3772 /* process entries backward from last index */ 3773 index = le16_to_cpu(p->header.nextindex) - 1; 3774 3775 if (p->header.flag & BT_INTERNAL) 3776 goto getChild; 3777 } 3778 3779 /* 3780 * leaf page 3781 */ 3782 3783 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3784 /* 3785 * We need to limit the size of the transaction 3786 * to avoid exhausting pagecache & tlocks 3787 */ 3788 xad = &p->xad[index]; 3789 xoff = offsetXAD(xad); 3790 xlen = lengthXAD(xad); 3791 XT_PUTPAGE(mp); 3792 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3793 } 3794 tlck = txLock(tid, ip, mp, tlckXTREE); 3795 tlck->type = tlckXTREE | tlckFREE; 3796 xtlck = (struct xtlock *) & tlck->lock; 3797 xtlck->hwm.offset = index; 3798 3799 3800 XT_PUTPAGE(mp); 3801 3802 /* 3803 * go back up to the parent page 3804 */ 3805 getParent: 3806 /* pop/restore parent entry for the current child page */ 3807 if ((parent = BT_POP(&btstack)) == NULL) 3808 /* current page must have been root */ 3809 goto out; 3810 3811 /* get back the parent page */ 3812 bn = parent->bn; 3813 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3814 if (rc) 3815 return rc; 3816 3817 index = parent->index; 3818 3819 /* 3820 * parent page become empty: free the page 3821 */ 3822 if (index == XTENTRYSTART) { 3823 /* txCommit() with tlckFREE: 3824 * free child extents covered by parent; 3825 * invalidate parent if COMMIT_PWMAP; 3826 */ 3827 tlck = txLock(tid, ip, mp, tlckXTREE); 3828 xtlck = (struct xtlock *) & tlck->lock; 3829 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3830 tlck->type = tlckXTREE | tlckFREE; 3831 3832 XT_PUTPAGE(mp); 3833 3834 if (p->header.flag & BT_ROOT) { 3835 3836 goto out; 3837 } else { 3838 goto getParent; 3839 } 3840 } 3841 /* 3842 * parent page still has entries for front region; 3843 */ 3844 else 3845 index--; 3846 /* 3847 * internal page: go down to child page of current entry 3848 */ 3849 getChild: 3850 /* save current parent entry for the child page */ 3851 if (BT_STACK_FULL(&btstack)) { 3852 jfs_error(ip->i_sb, "stack overrun!\n"); 3853 XT_PUTPAGE(mp); 3854 return -EIO; 3855 } 3856 BT_PUSH(&btstack, bn, index); 3857 3858 /* get child page */ 3859 xad = &p->xad[index]; 3860 bn = addressXAD(xad); 3861 3862 /* 3863 * first access of each internal entry: 3864 */ 3865 /* release parent page */ 3866 XT_PUTPAGE(mp); 3867 3868 /* process the child page */ 3869 goto getPage; 3870 3871 out: 3872 3873 return 0; 3874 } 3875 3876 #ifdef CONFIG_JFS_STATISTICS 3877 static int jfs_xtstat_proc_show(struct seq_file *m, void *v) 3878 { 3879 seq_printf(m, 3880 "JFS Xtree statistics\n" 3881 "====================\n" 3882 "searches = %d\n" 3883 "fast searches = %d\n" 3884 "splits = %d\n", 3885 xtStat.search, 3886 xtStat.fastSearch, 3887 xtStat.split); 3888 return 0; 3889 } 3890 3891 static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) 3892 { 3893 return single_open(file, jfs_xtstat_proc_show, NULL); 3894 } 3895 3896 const struct file_operations jfs_xtstat_proc_fops = { 3897 .owner = THIS_MODULE, 3898 .open = jfs_xtstat_proc_open, 3899 .read = seq_read, 3900 .llseek = seq_lseek, 3901 .release = single_release, 3902 }; 3903 #endif 3904