1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2004 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19 /* 20 * jfs_dtree.c: directory B+-tree manager 21 * 22 * B+-tree with variable length key directory: 23 * 24 * each directory page is structured as an array of 32-byte 25 * directory entry slots initialized as a freelist 26 * to avoid search/compaction of free space at insertion. 27 * when an entry is inserted, a number of slots are allocated 28 * from the freelist as required to store variable length data 29 * of the entry; when the entry is deleted, slots of the entry 30 * are returned to freelist. 31 * 32 * leaf entry stores full name as key and file serial number 33 * (aka inode number) as data. 34 * internal/router entry stores sufffix compressed name 35 * as key and simple extent descriptor as data. 36 * 37 * each directory page maintains a sorted entry index table 38 * which stores the start slot index of sorted entries 39 * to allow binary search on the table. 40 * 41 * directory starts as a root/leaf page in on-disk inode 42 * inline data area. 43 * when it becomes full, it starts a leaf of a external extent 44 * of length of 1 block. each time the first leaf becomes full, 45 * it is extended rather than split (its size is doubled), 46 * until its length becoms 4 KBytes, from then the extent is split 47 * with new 4 Kbyte extent when it becomes full 48 * to reduce external fragmentation of small directories. 49 * 50 * blah, blah, blah, for linear scan of directory in pieces by 51 * readdir(). 52 * 53 * 54 * case-insensitive directory file system 55 * 56 * names are stored in case-sensitive way in leaf entry. 57 * but stored, searched and compared in case-insensitive (uppercase) order 58 * (i.e., both search key and entry key are folded for search/compare): 59 * (note that case-sensitive order is BROKEN in storage, e.g., 60 * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad 61 * 62 * entries which folds to the same key makes up a equivalent class 63 * whose members are stored as contiguous cluster (may cross page boundary) 64 * but whose order is arbitrary and acts as duplicate, e.g., 65 * abc, Abc, aBc, abC) 66 * 67 * once match is found at leaf, requires scan forward/backward 68 * either for, in case-insensitive search, duplicate 69 * or for, in case-sensitive search, for exact match 70 * 71 * router entry must be created/stored in case-insensitive way 72 * in internal entry: 73 * (right most key of left page and left most key of right page 74 * are folded, and its suffix compression is propagated as router 75 * key in parent) 76 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB> 77 * should be made the router key for the split) 78 * 79 * case-insensitive search: 80 * 81 * fold search key; 82 * 83 * case-insensitive search of B-tree: 84 * for internal entry, router key is already folded; 85 * for leaf entry, fold the entry key before comparison. 86 * 87 * if (leaf entry case-insensitive match found) 88 * if (next entry satisfies case-insensitive match) 89 * return EDUPLICATE; 90 * if (prev entry satisfies case-insensitive match) 91 * return EDUPLICATE; 92 * return match; 93 * else 94 * return no match; 95 * 96 * serialization: 97 * target directory inode lock is being held on entry/exit 98 * of all main directory service routines. 99 * 100 * log based recovery: 101 */ 102 103 #include <linux/fs.h> 104 #include <linux/quotaops.h> 105 #include "jfs_incore.h" 106 #include "jfs_superblock.h" 107 #include "jfs_filsys.h" 108 #include "jfs_metapage.h" 109 #include "jfs_dmap.h" 110 #include "jfs_unicode.h" 111 #include "jfs_debug.h" 112 113 /* dtree split parameter */ 114 struct dtsplit { 115 struct metapage *mp; 116 s16 index; 117 s16 nslot; 118 struct component_name *key; 119 ddata_t *data; 120 struct pxdlist *pxdlist; 121 }; 122 123 #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) 124 125 /* get page buffer for specified block address */ 126 #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 127 {\ 128 BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ 129 if (!(RC))\ 130 {\ 131 if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ 132 ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ 133 {\ 134 BT_PUTPAGE(MP);\ 135 jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\ 136 MP = NULL;\ 137 RC = -EIO;\ 138 }\ 139 }\ 140 } 141 142 /* for consistency */ 143 #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) 144 145 #define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 146 BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot) 147 148 /* 149 * forward references 150 */ 151 static int dtSplitUp(tid_t tid, struct inode *ip, 152 struct dtsplit * split, struct btstack * btstack); 153 154 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 155 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp); 156 157 static int dtExtendPage(tid_t tid, struct inode *ip, 158 struct dtsplit * split, struct btstack * btstack); 159 160 static int dtSplitRoot(tid_t tid, struct inode *ip, 161 struct dtsplit * split, struct metapage ** rmpp); 162 163 static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 164 dtpage_t * fp, struct btstack * btstack); 165 166 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p); 167 168 static int dtReadFirst(struct inode *ip, struct btstack * btstack); 169 170 static int dtReadNext(struct inode *ip, 171 loff_t * offset, struct btstack * btstack); 172 173 static int dtCompare(struct component_name * key, dtpage_t * p, int si); 174 175 static int ciCompare(struct component_name * key, dtpage_t * p, int si, 176 int flag); 177 178 static void dtGetKey(dtpage_t * p, int i, struct component_name * key, 179 int flag); 180 181 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 182 int ri, struct component_name * key, int flag); 183 184 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 185 ddata_t * data, struct dt_lock **); 186 187 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 188 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 189 int do_index); 190 191 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock); 192 193 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock); 194 195 static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock); 196 197 #define ciToUpper(c) UniStrupr((c)->name) 198 199 /* 200 * read_index_page() 201 * 202 * Reads a page of a directory's index table. 203 * Having metadata mapped into the directory inode's address space 204 * presents a multitude of problems. We avoid this by mapping to 205 * the absolute address space outside of the *_metapage routines 206 */ 207 static struct metapage *read_index_page(struct inode *inode, s64 blkno) 208 { 209 int rc; 210 s64 xaddr; 211 int xflag; 212 s32 xlen; 213 214 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 215 if (rc || (xaddr == 0)) 216 return NULL; 217 218 return read_metapage(inode, xaddr, PSIZE, 1); 219 } 220 221 /* 222 * get_index_page() 223 * 224 * Same as get_index_page(), but get's a new page without reading 225 */ 226 static struct metapage *get_index_page(struct inode *inode, s64 blkno) 227 { 228 int rc; 229 s64 xaddr; 230 int xflag; 231 s32 xlen; 232 233 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 234 if (rc || (xaddr == 0)) 235 return NULL; 236 237 return get_metapage(inode, xaddr, PSIZE, 1); 238 } 239 240 /* 241 * find_index() 242 * 243 * Returns dtree page containing directory table entry for specified 244 * index and pointer to its entry. 245 * 246 * mp must be released by caller. 247 */ 248 static struct dir_table_slot *find_index(struct inode *ip, u32 index, 249 struct metapage ** mp, s64 *lblock) 250 { 251 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 252 s64 blkno; 253 s64 offset; 254 int page_offset; 255 struct dir_table_slot *slot; 256 static int maxWarnings = 10; 257 258 if (index < 2) { 259 if (maxWarnings) { 260 jfs_warn("find_entry called with index = %d", index); 261 maxWarnings--; 262 } 263 return NULL; 264 } 265 266 if (index >= jfs_ip->next_index) { 267 jfs_warn("find_entry called with index >= next_index"); 268 return NULL; 269 } 270 271 if (jfs_dirtable_inline(ip)) { 272 /* 273 * Inline directory table 274 */ 275 *mp = NULL; 276 slot = &jfs_ip->i_dirtable[index - 2]; 277 } else { 278 offset = (index - 2) * sizeof(struct dir_table_slot); 279 page_offset = offset & (PSIZE - 1); 280 blkno = ((offset + 1) >> L2PSIZE) << 281 JFS_SBI(ip->i_sb)->l2nbperpage; 282 283 if (*mp && (*lblock != blkno)) { 284 release_metapage(*mp); 285 *mp = NULL; 286 } 287 if (*mp == 0) { 288 *lblock = blkno; 289 *mp = read_index_page(ip, blkno); 290 } 291 if (*mp == 0) { 292 jfs_err("free_index: error reading directory table"); 293 return NULL; 294 } 295 296 slot = 297 (struct dir_table_slot *) ((char *) (*mp)->data + 298 page_offset); 299 } 300 return slot; 301 } 302 303 static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, 304 u32 index) 305 { 306 struct tlock *tlck; 307 struct linelock *llck; 308 struct lv *lv; 309 310 tlck = txLock(tid, ip, mp, tlckDATA); 311 llck = (struct linelock *) tlck->lock; 312 313 if (llck->index >= llck->maxcnt) 314 llck = txLinelock(llck); 315 lv = &llck->lv[llck->index]; 316 317 /* 318 * Linelock slot size is twice the size of directory table 319 * slot size. 512 entries per page. 320 */ 321 lv->offset = ((index - 2) & 511) >> 1; 322 lv->length = 1; 323 llck->index++; 324 } 325 326 /* 327 * add_index() 328 * 329 * Adds an entry to the directory index table. This is used to provide 330 * each directory entry with a persistent index in which to resume 331 * directory traversals 332 */ 333 static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) 334 { 335 struct super_block *sb = ip->i_sb; 336 struct jfs_sb_info *sbi = JFS_SBI(sb); 337 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 338 u64 blkno; 339 struct dir_table_slot *dirtab_slot; 340 u32 index; 341 struct linelock *llck; 342 struct lv *lv; 343 struct metapage *mp; 344 s64 offset; 345 uint page_offset; 346 struct tlock *tlck; 347 s64 xaddr; 348 349 ASSERT(DO_INDEX(ip)); 350 351 if (jfs_ip->next_index < 2) { 352 jfs_warn("add_index: next_index = %d. Resetting!", 353 jfs_ip->next_index); 354 jfs_ip->next_index = 2; 355 } 356 357 index = jfs_ip->next_index++; 358 359 if (index <= MAX_INLINE_DIRTABLE_ENTRY) { 360 /* 361 * i_size reflects size of index table, or 8 bytes per entry. 362 */ 363 ip->i_size = (loff_t) (index - 1) << 3; 364 365 /* 366 * dir table fits inline within inode 367 */ 368 dirtab_slot = &jfs_ip->i_dirtable[index-2]; 369 dirtab_slot->flag = DIR_INDEX_VALID; 370 dirtab_slot->slot = slot; 371 DTSaddress(dirtab_slot, bn); 372 373 set_cflag(COMMIT_Dirtable, ip); 374 375 return index; 376 } 377 if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) { 378 struct dir_table_slot temp_table[12]; 379 380 /* 381 * It's time to move the inline table to an external 382 * page and begin to build the xtree 383 */ 384 if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage) || 385 dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) 386 goto clean_up; /* No space */ 387 388 /* 389 * Save the table, we're going to overwrite it with the 390 * xtree root 391 */ 392 memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table)); 393 394 /* 395 * Initialize empty x-tree 396 */ 397 xtInitRoot(tid, ip); 398 399 /* 400 * Allocate the first block & add it to the xtree 401 */ 402 if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) { 403 /* This really shouldn't fail */ 404 jfs_warn("add_index: xtInsert failed!"); 405 memcpy(&jfs_ip->i_dirtable, temp_table, 406 sizeof (temp_table)); 407 goto clean_up; 408 } 409 ip->i_size = PSIZE; 410 411 if ((mp = get_index_page(ip, 0)) == 0) { 412 jfs_err("add_index: get_metapage failed!"); 413 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 414 memcpy(&jfs_ip->i_dirtable, temp_table, 415 sizeof (temp_table)); 416 goto clean_up; 417 } 418 tlck = txLock(tid, ip, mp, tlckDATA); 419 llck = (struct linelock *) & tlck->lock; 420 ASSERT(llck->index == 0); 421 lv = &llck->lv[0]; 422 423 lv->offset = 0; 424 lv->length = 6; /* tlckDATA slot size is 16 bytes */ 425 llck->index++; 426 427 memcpy(mp->data, temp_table, sizeof(temp_table)); 428 429 mark_metapage_dirty(mp); 430 release_metapage(mp); 431 432 /* 433 * Logging is now directed by xtree tlocks 434 */ 435 clear_cflag(COMMIT_Dirtable, ip); 436 } 437 438 offset = (index - 2) * sizeof(struct dir_table_slot); 439 page_offset = offset & (PSIZE - 1); 440 blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage; 441 if (page_offset == 0) { 442 /* 443 * This will be the beginning of a new page 444 */ 445 xaddr = 0; 446 if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) { 447 jfs_warn("add_index: xtInsert failed!"); 448 goto clean_up; 449 } 450 ip->i_size += PSIZE; 451 452 if ((mp = get_index_page(ip, blkno))) 453 memset(mp->data, 0, PSIZE); /* Just looks better */ 454 else 455 xtTruncate(tid, ip, offset, COMMIT_PWMAP); 456 } else 457 mp = read_index_page(ip, blkno); 458 459 if (mp == 0) { 460 jfs_err("add_index: get/read_metapage failed!"); 461 goto clean_up; 462 } 463 464 lock_index(tid, ip, mp, index); 465 466 dirtab_slot = 467 (struct dir_table_slot *) ((char *) mp->data + page_offset); 468 dirtab_slot->flag = DIR_INDEX_VALID; 469 dirtab_slot->slot = slot; 470 DTSaddress(dirtab_slot, bn); 471 472 mark_metapage_dirty(mp); 473 release_metapage(mp); 474 475 return index; 476 477 clean_up: 478 479 jfs_ip->next_index--; 480 481 return 0; 482 } 483 484 /* 485 * free_index() 486 * 487 * Marks an entry to the directory index table as free. 488 */ 489 static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) 490 { 491 struct dir_table_slot *dirtab_slot; 492 s64 lblock; 493 struct metapage *mp = NULL; 494 495 dirtab_slot = find_index(ip, index, &mp, &lblock); 496 497 if (dirtab_slot == 0) 498 return; 499 500 dirtab_slot->flag = DIR_INDEX_FREE; 501 dirtab_slot->slot = dirtab_slot->addr1 = 0; 502 dirtab_slot->addr2 = cpu_to_le32(next); 503 504 if (mp) { 505 lock_index(tid, ip, mp, index); 506 mark_metapage_dirty(mp); 507 release_metapage(mp); 508 } else 509 set_cflag(COMMIT_Dirtable, ip); 510 } 511 512 /* 513 * modify_index() 514 * 515 * Changes an entry in the directory index table 516 */ 517 static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, 518 int slot, struct metapage ** mp, u64 *lblock) 519 { 520 struct dir_table_slot *dirtab_slot; 521 522 dirtab_slot = find_index(ip, index, mp, lblock); 523 524 if (dirtab_slot == 0) 525 return; 526 527 DTSaddress(dirtab_slot, bn); 528 dirtab_slot->slot = slot; 529 530 if (*mp) { 531 lock_index(tid, ip, *mp, index); 532 mark_metapage_dirty(*mp); 533 } else 534 set_cflag(COMMIT_Dirtable, ip); 535 } 536 537 /* 538 * read_index() 539 * 540 * reads a directory table slot 541 */ 542 static int read_index(struct inode *ip, u32 index, 543 struct dir_table_slot * dirtab_slot) 544 { 545 s64 lblock; 546 struct metapage *mp = NULL; 547 struct dir_table_slot *slot; 548 549 slot = find_index(ip, index, &mp, &lblock); 550 if (slot == 0) { 551 return -EIO; 552 } 553 554 memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot)); 555 556 if (mp) 557 release_metapage(mp); 558 559 return 0; 560 } 561 562 /* 563 * dtSearch() 564 * 565 * function: 566 * Search for the entry with specified key 567 * 568 * parameter: 569 * 570 * return: 0 - search result on stack, leaf page pinned; 571 * errno - I/O error 572 */ 573 int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, 574 struct btstack * btstack, int flag) 575 { 576 int rc = 0; 577 int cmp = 1; /* init for empty page */ 578 s64 bn; 579 struct metapage *mp; 580 dtpage_t *p; 581 s8 *stbl; 582 int base, index, lim; 583 struct btframe *btsp; 584 pxd_t *pxd; 585 int psize = 288; /* initial in-line directory */ 586 ino_t inumber; 587 struct component_name ciKey; 588 struct super_block *sb = ip->i_sb; 589 590 ciKey.name = 591 (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), 592 GFP_NOFS); 593 if (ciKey.name == 0) { 594 rc = -ENOMEM; 595 goto dtSearch_Exit2; 596 } 597 598 599 /* uppercase search key for c-i directory */ 600 UniStrcpy(ciKey.name, key->name); 601 ciKey.namlen = key->namlen; 602 603 /* only uppercase if case-insensitive support is on */ 604 if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) { 605 ciToUpper(&ciKey); 606 } 607 BT_CLR(btstack); /* reset stack */ 608 609 /* init level count for max pages to split */ 610 btstack->nsplit = 1; 611 612 /* 613 * search down tree from root: 614 * 615 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 616 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 617 * 618 * if entry with search key K is not found 619 * internal page search find the entry with largest key Ki 620 * less than K which point to the child page to search; 621 * leaf page search find the entry with smallest key Kj 622 * greater than K so that the returned index is the position of 623 * the entry to be shifted right for insertion of new entry. 624 * for empty tree, search key is greater than any key of the tree. 625 * 626 * by convention, root bn = 0. 627 */ 628 for (bn = 0;;) { 629 /* get/pin the page to search */ 630 DT_GETPAGE(ip, bn, mp, psize, p, rc); 631 if (rc) 632 goto dtSearch_Exit1; 633 634 /* get sorted entry table of the page */ 635 stbl = DT_GETSTBL(p); 636 637 /* 638 * binary search with search key K on the current page. 639 */ 640 for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { 641 index = base + (lim >> 1); 642 643 if (p->header.flag & BT_LEAF) { 644 /* uppercase leaf name to compare */ 645 cmp = 646 ciCompare(&ciKey, p, stbl[index], 647 JFS_SBI(sb)->mntflag); 648 } else { 649 /* router key is in uppercase */ 650 651 cmp = dtCompare(&ciKey, p, stbl[index]); 652 653 654 } 655 if (cmp == 0) { 656 /* 657 * search hit 658 */ 659 /* search hit - leaf page: 660 * return the entry found 661 */ 662 if (p->header.flag & BT_LEAF) { 663 inumber = le32_to_cpu( 664 ((struct ldtentry *) & p->slot[stbl[index]])->inumber); 665 666 /* 667 * search for JFS_LOOKUP 668 */ 669 if (flag == JFS_LOOKUP) { 670 *data = inumber; 671 rc = 0; 672 goto out; 673 } 674 675 /* 676 * search for JFS_CREATE 677 */ 678 if (flag == JFS_CREATE) { 679 *data = inumber; 680 rc = -EEXIST; 681 goto out; 682 } 683 684 /* 685 * search for JFS_REMOVE or JFS_RENAME 686 */ 687 if ((flag == JFS_REMOVE || 688 flag == JFS_RENAME) && 689 *data != inumber) { 690 rc = -ESTALE; 691 goto out; 692 } 693 694 /* 695 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME 696 */ 697 /* save search result */ 698 *data = inumber; 699 btsp = btstack->top; 700 btsp->bn = bn; 701 btsp->index = index; 702 btsp->mp = mp; 703 704 rc = 0; 705 goto dtSearch_Exit1; 706 } 707 708 /* search hit - internal page: 709 * descend/search its child page 710 */ 711 goto getChild; 712 } 713 714 if (cmp > 0) { 715 base = index + 1; 716 --lim; 717 } 718 } 719 720 /* 721 * search miss 722 * 723 * base is the smallest index with key (Kj) greater than 724 * search key (K) and may be zero or (maxindex + 1) index. 725 */ 726 /* 727 * search miss - leaf page 728 * 729 * return location of entry (base) where new entry with 730 * search key K is to be inserted. 731 */ 732 if (p->header.flag & BT_LEAF) { 733 /* 734 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME 735 */ 736 if (flag == JFS_LOOKUP || flag == JFS_REMOVE || 737 flag == JFS_RENAME) { 738 rc = -ENOENT; 739 goto out; 740 } 741 742 /* 743 * search for JFS_CREATE|JFS_FINDDIR: 744 * 745 * save search result 746 */ 747 *data = 0; 748 btsp = btstack->top; 749 btsp->bn = bn; 750 btsp->index = base; 751 btsp->mp = mp; 752 753 rc = 0; 754 goto dtSearch_Exit1; 755 } 756 757 /* 758 * search miss - internal page 759 * 760 * if base is non-zero, decrement base by one to get the parent 761 * entry of the child page to search. 762 */ 763 index = base ? base - 1 : base; 764 765 /* 766 * go down to child page 767 */ 768 getChild: 769 /* update max. number of pages to split */ 770 if (BT_STACK_FULL(btstack)) { 771 /* Something's corrupted, mark filesytem dirty so 772 * chkdsk will fix it. 773 */ 774 jfs_error(sb, "stack overrun in dtSearch!"); 775 BT_STACK_DUMP(btstack); 776 rc = -EIO; 777 goto out; 778 } 779 btstack->nsplit++; 780 781 /* push (bn, index) of the parent page/entry */ 782 BT_PUSH(btstack, bn, index); 783 784 /* get the child page block number */ 785 pxd = (pxd_t *) & p->slot[stbl[index]]; 786 bn = addressPXD(pxd); 787 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 788 789 /* unpin the parent page */ 790 DT_PUTPAGE(mp); 791 } 792 793 out: 794 DT_PUTPAGE(mp); 795 796 dtSearch_Exit1: 797 798 kfree(ciKey.name); 799 800 dtSearch_Exit2: 801 802 return rc; 803 } 804 805 806 /* 807 * dtInsert() 808 * 809 * function: insert an entry to directory tree 810 * 811 * parameter: 812 * 813 * return: 0 - success; 814 * errno - failure; 815 */ 816 int dtInsert(tid_t tid, struct inode *ip, 817 struct component_name * name, ino_t * fsn, struct btstack * btstack) 818 { 819 int rc = 0; 820 struct metapage *mp; /* meta-page buffer */ 821 dtpage_t *p; /* base B+-tree index page */ 822 s64 bn; 823 int index; 824 struct dtsplit split; /* split information */ 825 ddata_t data; 826 struct dt_lock *dtlck; 827 int n; 828 struct tlock *tlck; 829 struct lv *lv; 830 831 /* 832 * retrieve search result 833 * 834 * dtSearch() returns (leaf page pinned, index at which to insert). 835 * n.b. dtSearch() may return index of (maxindex + 1) of 836 * the full page. 837 */ 838 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 839 840 /* 841 * insert entry for new key 842 */ 843 if (DO_INDEX(ip)) { 844 if (JFS_IP(ip)->next_index == DIREND) { 845 DT_PUTPAGE(mp); 846 return -EMLINK; 847 } 848 n = NDTLEAF(name->namlen); 849 data.leaf.tid = tid; 850 data.leaf.ip = ip; 851 } else { 852 n = NDTLEAF_LEGACY(name->namlen); 853 data.leaf.ip = NULL; /* signifies legacy directory format */ 854 } 855 data.leaf.ino = *fsn; 856 857 /* 858 * leaf page does not have enough room for new entry: 859 * 860 * extend/split the leaf page; 861 * 862 * dtSplitUp() will insert the entry and unpin the leaf page. 863 */ 864 if (n > p->header.freecnt) { 865 split.mp = mp; 866 split.index = index; 867 split.nslot = n; 868 split.key = name; 869 split.data = &data; 870 rc = dtSplitUp(tid, ip, &split, btstack); 871 return rc; 872 } 873 874 /* 875 * leaf page does have enough room for new entry: 876 * 877 * insert the new data entry into the leaf page; 878 */ 879 BT_MARK_DIRTY(mp, ip); 880 /* 881 * acquire a transaction lock on the leaf page 882 */ 883 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 884 dtlck = (struct dt_lock *) & tlck->lock; 885 ASSERT(dtlck->index == 0); 886 lv = & dtlck->lv[0]; 887 888 /* linelock header */ 889 lv->offset = 0; 890 lv->length = 1; 891 dtlck->index++; 892 893 dtInsertEntry(p, index, name, &data, &dtlck); 894 895 /* linelock stbl of non-root leaf page */ 896 if (!(p->header.flag & BT_ROOT)) { 897 if (dtlck->index >= dtlck->maxcnt) 898 dtlck = (struct dt_lock *) txLinelock(dtlck); 899 lv = & dtlck->lv[dtlck->index]; 900 n = index >> L2DTSLOTSIZE; 901 lv->offset = p->header.stblindex + n; 902 lv->length = 903 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 904 dtlck->index++; 905 } 906 907 /* unpin the leaf page */ 908 DT_PUTPAGE(mp); 909 910 return 0; 911 } 912 913 914 /* 915 * dtSplitUp() 916 * 917 * function: propagate insertion bottom up; 918 * 919 * parameter: 920 * 921 * return: 0 - success; 922 * errno - failure; 923 * leaf page unpinned; 924 */ 925 static int dtSplitUp(tid_t tid, 926 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 927 { 928 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 929 int rc = 0; 930 struct metapage *smp; 931 dtpage_t *sp; /* split page */ 932 struct metapage *rmp; 933 dtpage_t *rp; /* new right page split from sp */ 934 pxd_t rpxd; /* new right page extent descriptor */ 935 struct metapage *lmp; 936 dtpage_t *lp; /* left child page */ 937 int skip; /* index of entry of insertion */ 938 struct btframe *parent; /* parent page entry on traverse stack */ 939 s64 xaddr, nxaddr; 940 int xlen, xsize; 941 struct pxdlist pxdlist; 942 pxd_t *pxd; 943 struct component_name key = { 0, NULL }; 944 ddata_t *data = split->data; 945 int n; 946 struct dt_lock *dtlck; 947 struct tlock *tlck; 948 struct lv *lv; 949 int quota_allocation = 0; 950 951 /* get split page */ 952 smp = split->mp; 953 sp = DT_PAGE(ip, smp); 954 955 key.name = 956 (wchar_t *) kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), 957 GFP_NOFS); 958 if (key.name == 0) { 959 DT_PUTPAGE(smp); 960 rc = -ENOMEM; 961 goto dtSplitUp_Exit; 962 } 963 964 /* 965 * split leaf page 966 * 967 * The split routines insert the new entry, and 968 * acquire txLock as appropriate. 969 */ 970 /* 971 * split root leaf page: 972 */ 973 if (sp->header.flag & BT_ROOT) { 974 /* 975 * allocate a single extent child page 976 */ 977 xlen = 1; 978 n = sbi->bsize >> L2DTSLOTSIZE; 979 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 980 n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */ 981 if (n <= split->nslot) 982 xlen++; 983 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) { 984 DT_PUTPAGE(smp); 985 goto freeKeyName; 986 } 987 988 pxdlist.maxnpxd = 1; 989 pxdlist.npxd = 0; 990 pxd = &pxdlist.pxd[0]; 991 PXDaddress(pxd, xaddr); 992 PXDlength(pxd, xlen); 993 split->pxdlist = &pxdlist; 994 rc = dtSplitRoot(tid, ip, split, &rmp); 995 996 if (rc) 997 dbFree(ip, xaddr, xlen); 998 else 999 DT_PUTPAGE(rmp); 1000 1001 DT_PUTPAGE(smp); 1002 1003 goto freeKeyName; 1004 } 1005 1006 /* 1007 * extend first leaf page 1008 * 1009 * extend the 1st extent if less than buffer page size 1010 * (dtExtendPage() reurns leaf page unpinned) 1011 */ 1012 pxd = &sp->header.self; 1013 xlen = lengthPXD(pxd); 1014 xsize = xlen << sbi->l2bsize; 1015 if (xsize < PSIZE) { 1016 xaddr = addressPXD(pxd); 1017 n = xsize >> L2DTSLOTSIZE; 1018 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 1019 if ((n + sp->header.freecnt) <= split->nslot) 1020 n = xlen + (xlen << 1); 1021 else 1022 n = xlen; 1023 1024 /* Allocate blocks to quota. */ 1025 if (DQUOT_ALLOC_BLOCK(ip, n)) { 1026 rc = -EDQUOT; 1027 goto extendOut; 1028 } 1029 quota_allocation += n; 1030 1031 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, 1032 (s64) n, &nxaddr))) 1033 goto extendOut; 1034 1035 pxdlist.maxnpxd = 1; 1036 pxdlist.npxd = 0; 1037 pxd = &pxdlist.pxd[0]; 1038 PXDaddress(pxd, nxaddr) 1039 PXDlength(pxd, xlen + n); 1040 split->pxdlist = &pxdlist; 1041 if ((rc = dtExtendPage(tid, ip, split, btstack))) { 1042 nxaddr = addressPXD(pxd); 1043 if (xaddr != nxaddr) { 1044 /* free relocated extent */ 1045 xlen = lengthPXD(pxd); 1046 dbFree(ip, nxaddr, (s64) xlen); 1047 } else { 1048 /* free extended delta */ 1049 xlen = lengthPXD(pxd) - n; 1050 xaddr = addressPXD(pxd) + xlen; 1051 dbFree(ip, xaddr, (s64) n); 1052 } 1053 } 1054 1055 extendOut: 1056 DT_PUTPAGE(smp); 1057 goto freeKeyName; 1058 } 1059 1060 /* 1061 * split leaf page <sp> into <sp> and a new right page <rp>. 1062 * 1063 * return <rp> pinned and its extent descriptor <rpxd> 1064 */ 1065 /* 1066 * allocate new directory page extent and 1067 * new index page(s) to cover page split(s) 1068 * 1069 * allocation hint: ? 1070 */ 1071 n = btstack->nsplit; 1072 pxdlist.maxnpxd = pxdlist.npxd = 0; 1073 xlen = sbi->nbperpage; 1074 for (pxd = pxdlist.pxd; n > 0; n--, pxd++) { 1075 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) { 1076 PXDaddress(pxd, xaddr); 1077 PXDlength(pxd, xlen); 1078 pxdlist.maxnpxd++; 1079 continue; 1080 } 1081 1082 DT_PUTPAGE(smp); 1083 1084 /* undo allocation */ 1085 goto splitOut; 1086 } 1087 1088 split->pxdlist = &pxdlist; 1089 if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) { 1090 DT_PUTPAGE(smp); 1091 1092 /* undo allocation */ 1093 goto splitOut; 1094 } 1095 1096 /* 1097 * propagate up the router entry for the leaf page just split 1098 * 1099 * insert a router entry for the new page into the parent page, 1100 * propagate the insert/split up the tree by walking back the stack 1101 * of (bn of parent page, index of child page entry in parent page) 1102 * that were traversed during the search for the page that split. 1103 * 1104 * the propagation of insert/split up the tree stops if the root 1105 * splits or the page inserted into doesn't have to split to hold 1106 * the new entry. 1107 * 1108 * the parent entry for the split page remains the same, and 1109 * a new entry is inserted at its right with the first key and 1110 * block number of the new right page. 1111 * 1112 * There are a maximum of 4 pages pinned at any time: 1113 * two children, left parent and right parent (when the parent splits). 1114 * keep the child pages pinned while working on the parent. 1115 * make sure that all pins are released at exit. 1116 */ 1117 while ((parent = BT_POP(btstack)) != NULL) { 1118 /* parent page specified by stack frame <parent> */ 1119 1120 /* keep current child pages (<lp>, <rp>) pinned */ 1121 lmp = smp; 1122 lp = sp; 1123 1124 /* 1125 * insert router entry in parent for new right child page <rp> 1126 */ 1127 /* get the parent page <sp> */ 1128 DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1129 if (rc) { 1130 DT_PUTPAGE(lmp); 1131 DT_PUTPAGE(rmp); 1132 goto splitOut; 1133 } 1134 1135 /* 1136 * The new key entry goes ONE AFTER the index of parent entry, 1137 * because the split was to the right. 1138 */ 1139 skip = parent->index + 1; 1140 1141 /* 1142 * compute the key for the router entry 1143 * 1144 * key suffix compression: 1145 * for internal pages that have leaf pages as children, 1146 * retain only what's needed to distinguish between 1147 * the new entry and the entry on the page to its left. 1148 * If the keys compare equal, retain the entire key. 1149 * 1150 * note that compression is performed only at computing 1151 * router key at the lowest internal level. 1152 * further compression of the key between pairs of higher 1153 * level internal pages loses too much information and 1154 * the search may fail. 1155 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,} 1156 * results in two adjacent parent entries (a)(xx). 1157 * if split occurs between these two entries, and 1158 * if compression is applied, the router key of parent entry 1159 * of right page (x) will divert search for x into right 1160 * subtree and miss x in the left subtree.) 1161 * 1162 * the entire key must be retained for the next-to-leftmost 1163 * internal key at any level of the tree, or search may fail 1164 * (e.g., ?) 1165 */ 1166 switch (rp->header.flag & BT_TYPE) { 1167 case BT_LEAF: 1168 /* 1169 * compute the length of prefix for suffix compression 1170 * between last entry of left page and first entry 1171 * of right page 1172 */ 1173 if ((sp->header.flag & BT_ROOT && skip > 1) || 1174 sp->header.prev != 0 || skip > 1) { 1175 /* compute uppercase router prefix key */ 1176 rc = ciGetLeafPrefixKey(lp, 1177 lp->header.nextindex-1, 1178 rp, 0, &key, 1179 sbi->mntflag); 1180 if (rc) { 1181 DT_PUTPAGE(lmp); 1182 DT_PUTPAGE(rmp); 1183 DT_PUTPAGE(smp); 1184 goto splitOut; 1185 } 1186 } else { 1187 /* next to leftmost entry of 1188 lowest internal level */ 1189 1190 /* compute uppercase router key */ 1191 dtGetKey(rp, 0, &key, sbi->mntflag); 1192 key.name[key.namlen] = 0; 1193 1194 if ((sbi->mntflag & JFS_OS2) == JFS_OS2) 1195 ciToUpper(&key); 1196 } 1197 1198 n = NDTINTERNAL(key.namlen); 1199 break; 1200 1201 case BT_INTERNAL: 1202 dtGetKey(rp, 0, &key, sbi->mntflag); 1203 n = NDTINTERNAL(key.namlen); 1204 break; 1205 1206 default: 1207 jfs_err("dtSplitUp(): UFO!"); 1208 break; 1209 } 1210 1211 /* unpin left child page */ 1212 DT_PUTPAGE(lmp); 1213 1214 /* 1215 * compute the data for the router entry 1216 */ 1217 data->xd = rpxd; /* child page xd */ 1218 1219 /* 1220 * parent page is full - split the parent page 1221 */ 1222 if (n > sp->header.freecnt) { 1223 /* init for parent page split */ 1224 split->mp = smp; 1225 split->index = skip; /* index at insert */ 1226 split->nslot = n; 1227 split->key = &key; 1228 /* split->data = data; */ 1229 1230 /* unpin right child page */ 1231 DT_PUTPAGE(rmp); 1232 1233 /* The split routines insert the new entry, 1234 * acquire txLock as appropriate. 1235 * return <rp> pinned and its block number <rbn>. 1236 */ 1237 rc = (sp->header.flag & BT_ROOT) ? 1238 dtSplitRoot(tid, ip, split, &rmp) : 1239 dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd); 1240 if (rc) { 1241 DT_PUTPAGE(smp); 1242 goto splitOut; 1243 } 1244 1245 /* smp and rmp are pinned */ 1246 } 1247 /* 1248 * parent page is not full - insert router entry in parent page 1249 */ 1250 else { 1251 BT_MARK_DIRTY(smp, ip); 1252 /* 1253 * acquire a transaction lock on the parent page 1254 */ 1255 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1256 dtlck = (struct dt_lock *) & tlck->lock; 1257 ASSERT(dtlck->index == 0); 1258 lv = & dtlck->lv[0]; 1259 1260 /* linelock header */ 1261 lv->offset = 0; 1262 lv->length = 1; 1263 dtlck->index++; 1264 1265 /* linelock stbl of non-root parent page */ 1266 if (!(sp->header.flag & BT_ROOT)) { 1267 lv++; 1268 n = skip >> L2DTSLOTSIZE; 1269 lv->offset = sp->header.stblindex + n; 1270 lv->length = 1271 ((sp->header.nextindex - 1272 1) >> L2DTSLOTSIZE) - n + 1; 1273 dtlck->index++; 1274 } 1275 1276 dtInsertEntry(sp, skip, &key, data, &dtlck); 1277 1278 /* exit propagate up */ 1279 break; 1280 } 1281 } 1282 1283 /* unpin current split and its right page */ 1284 DT_PUTPAGE(smp); 1285 DT_PUTPAGE(rmp); 1286 1287 /* 1288 * free remaining extents allocated for split 1289 */ 1290 splitOut: 1291 n = pxdlist.npxd; 1292 pxd = &pxdlist.pxd[n]; 1293 for (; n < pxdlist.maxnpxd; n++, pxd++) 1294 dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd)); 1295 1296 freeKeyName: 1297 kfree(key.name); 1298 1299 /* Rollback quota allocation */ 1300 if (rc && quota_allocation) 1301 DQUOT_FREE_BLOCK(ip, quota_allocation); 1302 1303 dtSplitUp_Exit: 1304 1305 return rc; 1306 } 1307 1308 1309 /* 1310 * dtSplitPage() 1311 * 1312 * function: Split a non-root page of a btree. 1313 * 1314 * parameter: 1315 * 1316 * return: 0 - success; 1317 * errno - failure; 1318 * return split and new page pinned; 1319 */ 1320 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 1321 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp) 1322 { 1323 int rc = 0; 1324 struct metapage *smp; 1325 dtpage_t *sp; 1326 struct metapage *rmp; 1327 dtpage_t *rp; /* new right page allocated */ 1328 s64 rbn; /* new right page block number */ 1329 struct metapage *mp; 1330 dtpage_t *p; 1331 s64 nextbn; 1332 struct pxdlist *pxdlist; 1333 pxd_t *pxd; 1334 int skip, nextindex, half, left, nxt, off, si; 1335 struct ldtentry *ldtentry; 1336 struct idtentry *idtentry; 1337 u8 *stbl; 1338 struct dtslot *f; 1339 int fsi, stblsize; 1340 int n; 1341 struct dt_lock *sdtlck, *rdtlck; 1342 struct tlock *tlck; 1343 struct dt_lock *dtlck; 1344 struct lv *slv, *rlv, *lv; 1345 1346 /* get split page */ 1347 smp = split->mp; 1348 sp = DT_PAGE(ip, smp); 1349 1350 /* 1351 * allocate the new right page for the split 1352 */ 1353 pxdlist = split->pxdlist; 1354 pxd = &pxdlist->pxd[pxdlist->npxd]; 1355 pxdlist->npxd++; 1356 rbn = addressPXD(pxd); 1357 rmp = get_metapage(ip, rbn, PSIZE, 1); 1358 if (rmp == NULL) 1359 return -EIO; 1360 1361 /* Allocate blocks to quota. */ 1362 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1363 release_metapage(rmp); 1364 return -EDQUOT; 1365 } 1366 1367 jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1368 1369 BT_MARK_DIRTY(rmp, ip); 1370 /* 1371 * acquire a transaction lock on the new right page 1372 */ 1373 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1374 rdtlck = (struct dt_lock *) & tlck->lock; 1375 1376 rp = (dtpage_t *) rmp->data; 1377 *rpp = rp; 1378 rp->header.self = *pxd; 1379 1380 BT_MARK_DIRTY(smp, ip); 1381 /* 1382 * acquire a transaction lock on the split page 1383 * 1384 * action: 1385 */ 1386 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1387 sdtlck = (struct dt_lock *) & tlck->lock; 1388 1389 /* linelock header of split page */ 1390 ASSERT(sdtlck->index == 0); 1391 slv = & sdtlck->lv[0]; 1392 slv->offset = 0; 1393 slv->length = 1; 1394 sdtlck->index++; 1395 1396 /* 1397 * initialize/update sibling pointers between sp and rp 1398 */ 1399 nextbn = le64_to_cpu(sp->header.next); 1400 rp->header.next = cpu_to_le64(nextbn); 1401 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1402 sp->header.next = cpu_to_le64(rbn); 1403 1404 /* 1405 * initialize new right page 1406 */ 1407 rp->header.flag = sp->header.flag; 1408 1409 /* compute sorted entry table at start of extent data area */ 1410 rp->header.nextindex = 0; 1411 rp->header.stblindex = 1; 1412 1413 n = PSIZE >> L2DTSLOTSIZE; 1414 rp->header.maxslot = n; 1415 stblsize = (n + 31) >> L2DTSLOTSIZE; /* in unit of slot */ 1416 1417 /* init freelist */ 1418 fsi = rp->header.stblindex + stblsize; 1419 rp->header.freelist = fsi; 1420 rp->header.freecnt = rp->header.maxslot - fsi; 1421 1422 /* 1423 * sequential append at tail: append without split 1424 * 1425 * If splitting the last page on a level because of appending 1426 * a entry to it (skip is maxentry), it's likely that the access is 1427 * sequential. Adding an empty page on the side of the level is less 1428 * work and can push the fill factor much higher than normal. 1429 * If we're wrong it's no big deal, we'll just do the split the right 1430 * way next time. 1431 * (It may look like it's equally easy to do a similar hack for 1432 * reverse sorted data, that is, split the tree left, 1433 * but it's not. Be my guest.) 1434 */ 1435 if (nextbn == 0 && split->index == sp->header.nextindex) { 1436 /* linelock header + stbl (first slot) of new page */ 1437 rlv = & rdtlck->lv[rdtlck->index]; 1438 rlv->offset = 0; 1439 rlv->length = 2; 1440 rdtlck->index++; 1441 1442 /* 1443 * initialize freelist of new right page 1444 */ 1445 f = &rp->slot[fsi]; 1446 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1447 f->next = fsi; 1448 f->next = -1; 1449 1450 /* insert entry at the first entry of the new right page */ 1451 dtInsertEntry(rp, 0, split->key, split->data, &rdtlck); 1452 1453 goto out; 1454 } 1455 1456 /* 1457 * non-sequential insert (at possibly middle page) 1458 */ 1459 1460 /* 1461 * update prev pointer of previous right sibling page; 1462 */ 1463 if (nextbn != 0) { 1464 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1465 if (rc) { 1466 discard_metapage(rmp); 1467 return rc; 1468 } 1469 1470 BT_MARK_DIRTY(mp, ip); 1471 /* 1472 * acquire a transaction lock on the next page 1473 */ 1474 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 1475 jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p", 1476 tlck, ip, mp); 1477 dtlck = (struct dt_lock *) & tlck->lock; 1478 1479 /* linelock header of previous right sibling page */ 1480 lv = & dtlck->lv[dtlck->index]; 1481 lv->offset = 0; 1482 lv->length = 1; 1483 dtlck->index++; 1484 1485 p->header.prev = cpu_to_le64(rbn); 1486 1487 DT_PUTPAGE(mp); 1488 } 1489 1490 /* 1491 * split the data between the split and right pages. 1492 */ 1493 skip = split->index; 1494 half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */ 1495 left = 0; 1496 1497 /* 1498 * compute fill factor for split pages 1499 * 1500 * <nxt> traces the next entry to move to rp 1501 * <off> traces the next entry to stay in sp 1502 */ 1503 stbl = (u8 *) & sp->slot[sp->header.stblindex]; 1504 nextindex = sp->header.nextindex; 1505 for (nxt = off = 0; nxt < nextindex; ++off) { 1506 if (off == skip) 1507 /* check for fill factor with new entry size */ 1508 n = split->nslot; 1509 else { 1510 si = stbl[nxt]; 1511 switch (sp->header.flag & BT_TYPE) { 1512 case BT_LEAF: 1513 ldtentry = (struct ldtentry *) & sp->slot[si]; 1514 if (DO_INDEX(ip)) 1515 n = NDTLEAF(ldtentry->namlen); 1516 else 1517 n = NDTLEAF_LEGACY(ldtentry-> 1518 namlen); 1519 break; 1520 1521 case BT_INTERNAL: 1522 idtentry = (struct idtentry *) & sp->slot[si]; 1523 n = NDTINTERNAL(idtentry->namlen); 1524 break; 1525 1526 default: 1527 break; 1528 } 1529 1530 ++nxt; /* advance to next entry to move in sp */ 1531 } 1532 1533 left += n; 1534 if (left >= half) 1535 break; 1536 } 1537 1538 /* <nxt> poins to the 1st entry to move */ 1539 1540 /* 1541 * move entries to right page 1542 * 1543 * dtMoveEntry() initializes rp and reserves entry for insertion 1544 * 1545 * split page moved out entries are linelocked; 1546 * new/right page moved in entries are linelocked; 1547 */ 1548 /* linelock header + stbl of new right page */ 1549 rlv = & rdtlck->lv[rdtlck->index]; 1550 rlv->offset = 0; 1551 rlv->length = 5; 1552 rdtlck->index++; 1553 1554 dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip)); 1555 1556 sp->header.nextindex = nxt; 1557 1558 /* 1559 * finalize freelist of new right page 1560 */ 1561 fsi = rp->header.freelist; 1562 f = &rp->slot[fsi]; 1563 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1564 f->next = fsi; 1565 f->next = -1; 1566 1567 /* 1568 * Update directory index table for entries now in right page 1569 */ 1570 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1571 s64 lblock; 1572 1573 mp = NULL; 1574 stbl = DT_GETSTBL(rp); 1575 for (n = 0; n < rp->header.nextindex; n++) { 1576 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1577 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1578 rbn, n, &mp, &lblock); 1579 } 1580 if (mp) 1581 release_metapage(mp); 1582 } 1583 1584 /* 1585 * the skipped index was on the left page, 1586 */ 1587 if (skip <= off) { 1588 /* insert the new entry in the split page */ 1589 dtInsertEntry(sp, skip, split->key, split->data, &sdtlck); 1590 1591 /* linelock stbl of split page */ 1592 if (sdtlck->index >= sdtlck->maxcnt) 1593 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 1594 slv = & sdtlck->lv[sdtlck->index]; 1595 n = skip >> L2DTSLOTSIZE; 1596 slv->offset = sp->header.stblindex + n; 1597 slv->length = 1598 ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 1599 sdtlck->index++; 1600 } 1601 /* 1602 * the skipped index was on the right page, 1603 */ 1604 else { 1605 /* adjust the skip index to reflect the new position */ 1606 skip -= nxt; 1607 1608 /* insert the new entry in the right page */ 1609 dtInsertEntry(rp, skip, split->key, split->data, &rdtlck); 1610 } 1611 1612 out: 1613 *rmpp = rmp; 1614 *rpxdp = *pxd; 1615 1616 return rc; 1617 } 1618 1619 1620 /* 1621 * dtExtendPage() 1622 * 1623 * function: extend 1st/only directory leaf page 1624 * 1625 * parameter: 1626 * 1627 * return: 0 - success; 1628 * errno - failure; 1629 * return extended page pinned; 1630 */ 1631 static int dtExtendPage(tid_t tid, 1632 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 1633 { 1634 struct super_block *sb = ip->i_sb; 1635 int rc; 1636 struct metapage *smp, *pmp, *mp; 1637 dtpage_t *sp, *pp; 1638 struct pxdlist *pxdlist; 1639 pxd_t *pxd, *tpxd; 1640 int xlen, xsize; 1641 int newstblindex, newstblsize; 1642 int oldstblindex, oldstblsize; 1643 int fsi, last; 1644 struct dtslot *f; 1645 struct btframe *parent; 1646 int n; 1647 struct dt_lock *dtlck; 1648 s64 xaddr, txaddr; 1649 struct tlock *tlck; 1650 struct pxd_lock *pxdlock; 1651 struct lv *lv; 1652 uint type; 1653 struct ldtentry *ldtentry; 1654 u8 *stbl; 1655 1656 /* get page to extend */ 1657 smp = split->mp; 1658 sp = DT_PAGE(ip, smp); 1659 1660 /* get parent/root page */ 1661 parent = BT_POP(btstack); 1662 DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc); 1663 if (rc) 1664 return (rc); 1665 1666 /* 1667 * extend the extent 1668 */ 1669 pxdlist = split->pxdlist; 1670 pxd = &pxdlist->pxd[pxdlist->npxd]; 1671 pxdlist->npxd++; 1672 1673 xaddr = addressPXD(pxd); 1674 tpxd = &sp->header.self; 1675 txaddr = addressPXD(tpxd); 1676 /* in-place extension */ 1677 if (xaddr == txaddr) { 1678 type = tlckEXTEND; 1679 } 1680 /* relocation */ 1681 else { 1682 type = tlckNEW; 1683 1684 /* save moved extent descriptor for later free */ 1685 tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE); 1686 pxdlock = (struct pxd_lock *) & tlck->lock; 1687 pxdlock->flag = mlckFREEPXD; 1688 pxdlock->pxd = sp->header.self; 1689 pxdlock->index = 1; 1690 1691 /* 1692 * Update directory index table to reflect new page address 1693 */ 1694 if (DO_INDEX(ip)) { 1695 s64 lblock; 1696 1697 mp = NULL; 1698 stbl = DT_GETSTBL(sp); 1699 for (n = 0; n < sp->header.nextindex; n++) { 1700 ldtentry = 1701 (struct ldtentry *) & sp->slot[stbl[n]]; 1702 modify_index(tid, ip, 1703 le32_to_cpu(ldtentry->index), 1704 xaddr, n, &mp, &lblock); 1705 } 1706 if (mp) 1707 release_metapage(mp); 1708 } 1709 } 1710 1711 /* 1712 * extend the page 1713 */ 1714 sp->header.self = *pxd; 1715 1716 jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp); 1717 1718 BT_MARK_DIRTY(smp, ip); 1719 /* 1720 * acquire a transaction lock on the extended/leaf page 1721 */ 1722 tlck = txLock(tid, ip, smp, tlckDTREE | type); 1723 dtlck = (struct dt_lock *) & tlck->lock; 1724 lv = & dtlck->lv[0]; 1725 1726 /* update buffer extent descriptor of extended page */ 1727 xlen = lengthPXD(pxd); 1728 xsize = xlen << JFS_SBI(sb)->l2bsize; 1729 #ifdef _STILL_TO_PORT 1730 bmSetXD(smp, xaddr, xsize); 1731 #endif /* _STILL_TO_PORT */ 1732 1733 /* 1734 * copy old stbl to new stbl at start of extended area 1735 */ 1736 oldstblindex = sp->header.stblindex; 1737 oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE; 1738 newstblindex = sp->header.maxslot; 1739 n = xsize >> L2DTSLOTSIZE; 1740 newstblsize = (n + 31) >> L2DTSLOTSIZE; 1741 memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex], 1742 sp->header.nextindex); 1743 1744 /* 1745 * in-line extension: linelock old area of extended page 1746 */ 1747 if (type == tlckEXTEND) { 1748 /* linelock header */ 1749 lv->offset = 0; 1750 lv->length = 1; 1751 dtlck->index++; 1752 lv++; 1753 1754 /* linelock new stbl of extended page */ 1755 lv->offset = newstblindex; 1756 lv->length = newstblsize; 1757 } 1758 /* 1759 * relocation: linelock whole relocated area 1760 */ 1761 else { 1762 lv->offset = 0; 1763 lv->length = sp->header.maxslot + newstblsize; 1764 } 1765 1766 dtlck->index++; 1767 1768 sp->header.maxslot = n; 1769 sp->header.stblindex = newstblindex; 1770 /* sp->header.nextindex remains the same */ 1771 1772 /* 1773 * add old stbl region at head of freelist 1774 */ 1775 fsi = oldstblindex; 1776 f = &sp->slot[fsi]; 1777 last = sp->header.freelist; 1778 for (n = 0; n < oldstblsize; n++, fsi++, f++) { 1779 f->next = last; 1780 last = fsi; 1781 } 1782 sp->header.freelist = last; 1783 sp->header.freecnt += oldstblsize; 1784 1785 /* 1786 * append free region of newly extended area at tail of freelist 1787 */ 1788 /* init free region of newly extended area */ 1789 fsi = n = newstblindex + newstblsize; 1790 f = &sp->slot[fsi]; 1791 for (fsi++; fsi < sp->header.maxslot; f++, fsi++) 1792 f->next = fsi; 1793 f->next = -1; 1794 1795 /* append new free region at tail of old freelist */ 1796 fsi = sp->header.freelist; 1797 if (fsi == -1) 1798 sp->header.freelist = n; 1799 else { 1800 do { 1801 f = &sp->slot[fsi]; 1802 fsi = f->next; 1803 } while (fsi != -1); 1804 1805 f->next = n; 1806 } 1807 1808 sp->header.freecnt += sp->header.maxslot - n; 1809 1810 /* 1811 * insert the new entry 1812 */ 1813 dtInsertEntry(sp, split->index, split->key, split->data, &dtlck); 1814 1815 BT_MARK_DIRTY(pmp, ip); 1816 /* 1817 * linelock any freeslots residing in old extent 1818 */ 1819 if (type == tlckEXTEND) { 1820 n = sp->header.maxslot >> 2; 1821 if (sp->header.freelist < n) 1822 dtLinelockFreelist(sp, n, &dtlck); 1823 } 1824 1825 /* 1826 * update parent entry on the parent/root page 1827 */ 1828 /* 1829 * acquire a transaction lock on the parent/root page 1830 */ 1831 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 1832 dtlck = (struct dt_lock *) & tlck->lock; 1833 lv = & dtlck->lv[dtlck->index]; 1834 1835 /* linelock parent entry - 1st slot */ 1836 lv->offset = 1; 1837 lv->length = 1; 1838 dtlck->index++; 1839 1840 /* update the parent pxd for page extension */ 1841 tpxd = (pxd_t *) & pp->slot[1]; 1842 *tpxd = *pxd; 1843 1844 DT_PUTPAGE(pmp); 1845 return 0; 1846 } 1847 1848 1849 /* 1850 * dtSplitRoot() 1851 * 1852 * function: 1853 * split the full root page into 1854 * original/root/split page and new right page 1855 * i.e., root remains fixed in tree anchor (inode) and 1856 * the root is copied to a single new right child page 1857 * since root page << non-root page, and 1858 * the split root page contains a single entry for the 1859 * new right child page. 1860 * 1861 * parameter: 1862 * 1863 * return: 0 - success; 1864 * errno - failure; 1865 * return new page pinned; 1866 */ 1867 static int dtSplitRoot(tid_t tid, 1868 struct inode *ip, struct dtsplit * split, struct metapage ** rmpp) 1869 { 1870 struct super_block *sb = ip->i_sb; 1871 struct metapage *smp; 1872 dtroot_t *sp; 1873 struct metapage *rmp; 1874 dtpage_t *rp; 1875 s64 rbn; 1876 int xlen; 1877 int xsize; 1878 struct dtslot *f; 1879 s8 *stbl; 1880 int fsi, stblsize, n; 1881 struct idtentry *s; 1882 pxd_t *ppxd; 1883 struct pxdlist *pxdlist; 1884 pxd_t *pxd; 1885 struct dt_lock *dtlck; 1886 struct tlock *tlck; 1887 struct lv *lv; 1888 1889 /* get split root page */ 1890 smp = split->mp; 1891 sp = &JFS_IP(ip)->i_dtroot; 1892 1893 /* 1894 * allocate/initialize a single (right) child page 1895 * 1896 * N.B. at first split, a one (or two) block to fit new entry 1897 * is allocated; at subsequent split, a full page is allocated; 1898 */ 1899 pxdlist = split->pxdlist; 1900 pxd = &pxdlist->pxd[pxdlist->npxd]; 1901 pxdlist->npxd++; 1902 rbn = addressPXD(pxd); 1903 xlen = lengthPXD(pxd); 1904 xsize = xlen << JFS_SBI(sb)->l2bsize; 1905 rmp = get_metapage(ip, rbn, xsize, 1); 1906 if (!rmp) 1907 return -EIO; 1908 1909 rp = rmp->data; 1910 1911 /* Allocate blocks to quota. */ 1912 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1913 release_metapage(rmp); 1914 return -EDQUOT; 1915 } 1916 1917 BT_MARK_DIRTY(rmp, ip); 1918 /* 1919 * acquire a transaction lock on the new right page 1920 */ 1921 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1922 dtlck = (struct dt_lock *) & tlck->lock; 1923 1924 rp->header.flag = 1925 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1926 rp->header.self = *pxd; 1927 1928 /* initialize sibling pointers */ 1929 rp->header.next = 0; 1930 rp->header.prev = 0; 1931 1932 /* 1933 * move in-line root page into new right page extent 1934 */ 1935 /* linelock header + copied entries + new stbl (1st slot) in new page */ 1936 ASSERT(dtlck->index == 0); 1937 lv = & dtlck->lv[0]; 1938 lv->offset = 0; 1939 lv->length = 10; /* 1 + 8 + 1 */ 1940 dtlck->index++; 1941 1942 n = xsize >> L2DTSLOTSIZE; 1943 rp->header.maxslot = n; 1944 stblsize = (n + 31) >> L2DTSLOTSIZE; 1945 1946 /* copy old stbl to new stbl at start of extended area */ 1947 rp->header.stblindex = DTROOTMAXSLOT; 1948 stbl = (s8 *) & rp->slot[DTROOTMAXSLOT]; 1949 memcpy(stbl, sp->header.stbl, sp->header.nextindex); 1950 rp->header.nextindex = sp->header.nextindex; 1951 1952 /* copy old data area to start of new data area */ 1953 memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE); 1954 1955 /* 1956 * append free region of newly extended area at tail of freelist 1957 */ 1958 /* init free region of newly extended area */ 1959 fsi = n = DTROOTMAXSLOT + stblsize; 1960 f = &rp->slot[fsi]; 1961 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1962 f->next = fsi; 1963 f->next = -1; 1964 1965 /* append new free region at tail of old freelist */ 1966 fsi = sp->header.freelist; 1967 if (fsi == -1) 1968 rp->header.freelist = n; 1969 else { 1970 rp->header.freelist = fsi; 1971 1972 do { 1973 f = &rp->slot[fsi]; 1974 fsi = f->next; 1975 } while (fsi != -1); 1976 1977 f->next = n; 1978 } 1979 1980 rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n; 1981 1982 /* 1983 * Update directory index table for entries now in right page 1984 */ 1985 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1986 s64 lblock; 1987 struct metapage *mp = NULL; 1988 struct ldtentry *ldtentry; 1989 1990 stbl = DT_GETSTBL(rp); 1991 for (n = 0; n < rp->header.nextindex; n++) { 1992 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1993 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1994 rbn, n, &mp, &lblock); 1995 } 1996 if (mp) 1997 release_metapage(mp); 1998 } 1999 /* 2000 * insert the new entry into the new right/child page 2001 * (skip index in the new right page will not change) 2002 */ 2003 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); 2004 2005 /* 2006 * reset parent/root page 2007 * 2008 * set the 1st entry offset to 0, which force the left-most key 2009 * at any level of the tree to be less than any search key. 2010 * 2011 * The btree comparison code guarantees that the left-most key on any 2012 * level of the tree is never used, so it doesn't need to be filled in. 2013 */ 2014 BT_MARK_DIRTY(smp, ip); 2015 /* 2016 * acquire a transaction lock on the root page (in-memory inode) 2017 */ 2018 tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT); 2019 dtlck = (struct dt_lock *) & tlck->lock; 2020 2021 /* linelock root */ 2022 ASSERT(dtlck->index == 0); 2023 lv = & dtlck->lv[0]; 2024 lv->offset = 0; 2025 lv->length = DTROOTMAXSLOT; 2026 dtlck->index++; 2027 2028 /* update page header of root */ 2029 if (sp->header.flag & BT_LEAF) { 2030 sp->header.flag &= ~BT_LEAF; 2031 sp->header.flag |= BT_INTERNAL; 2032 } 2033 2034 /* init the first entry */ 2035 s = (struct idtentry *) & sp->slot[DTENTRYSTART]; 2036 ppxd = (pxd_t *) s; 2037 *ppxd = *pxd; 2038 s->next = -1; 2039 s->namlen = 0; 2040 2041 stbl = sp->header.stbl; 2042 stbl[0] = DTENTRYSTART; 2043 sp->header.nextindex = 1; 2044 2045 /* init freelist */ 2046 fsi = DTENTRYSTART + 1; 2047 f = &sp->slot[fsi]; 2048 2049 /* init free region of remaining area */ 2050 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2051 f->next = fsi; 2052 f->next = -1; 2053 2054 sp->header.freelist = DTENTRYSTART + 1; 2055 sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1); 2056 2057 *rmpp = rmp; 2058 2059 return 0; 2060 } 2061 2062 2063 /* 2064 * dtDelete() 2065 * 2066 * function: delete the entry(s) referenced by a key. 2067 * 2068 * parameter: 2069 * 2070 * return: 2071 */ 2072 int dtDelete(tid_t tid, 2073 struct inode *ip, struct component_name * key, ino_t * ino, int flag) 2074 { 2075 int rc = 0; 2076 s64 bn; 2077 struct metapage *mp, *imp; 2078 dtpage_t *p; 2079 int index; 2080 struct btstack btstack; 2081 struct dt_lock *dtlck; 2082 struct tlock *tlck; 2083 struct lv *lv; 2084 int i; 2085 struct ldtentry *ldtentry; 2086 u8 *stbl; 2087 u32 table_index, next_index; 2088 struct metapage *nmp; 2089 dtpage_t *np; 2090 2091 /* 2092 * search for the entry to delete: 2093 * 2094 * dtSearch() returns (leaf page pinned, index at which to delete). 2095 */ 2096 if ((rc = dtSearch(ip, key, ino, &btstack, flag))) 2097 return rc; 2098 2099 /* retrieve search result */ 2100 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2101 2102 /* 2103 * We need to find put the index of the next entry into the 2104 * directory index table in order to resume a readdir from this 2105 * entry. 2106 */ 2107 if (DO_INDEX(ip)) { 2108 stbl = DT_GETSTBL(p); 2109 ldtentry = (struct ldtentry *) & p->slot[stbl[index]]; 2110 table_index = le32_to_cpu(ldtentry->index); 2111 if (index == (p->header.nextindex - 1)) { 2112 /* 2113 * Last entry in this leaf page 2114 */ 2115 if ((p->header.flag & BT_ROOT) 2116 || (p->header.next == 0)) 2117 next_index = -1; 2118 else { 2119 /* Read next leaf page */ 2120 DT_GETPAGE(ip, le64_to_cpu(p->header.next), 2121 nmp, PSIZE, np, rc); 2122 if (rc) 2123 next_index = -1; 2124 else { 2125 stbl = DT_GETSTBL(np); 2126 ldtentry = 2127 (struct ldtentry *) & np-> 2128 slot[stbl[0]]; 2129 next_index = 2130 le32_to_cpu(ldtentry->index); 2131 DT_PUTPAGE(nmp); 2132 } 2133 } 2134 } else { 2135 ldtentry = 2136 (struct ldtentry *) & p->slot[stbl[index + 1]]; 2137 next_index = le32_to_cpu(ldtentry->index); 2138 } 2139 free_index(tid, ip, table_index, next_index); 2140 } 2141 /* 2142 * the leaf page becomes empty, delete the page 2143 */ 2144 if (p->header.nextindex == 1) { 2145 /* delete empty page */ 2146 rc = dtDeleteUp(tid, ip, mp, p, &btstack); 2147 } 2148 /* 2149 * the leaf page has other entries remaining: 2150 * 2151 * delete the entry from the leaf page. 2152 */ 2153 else { 2154 BT_MARK_DIRTY(mp, ip); 2155 /* 2156 * acquire a transaction lock on the leaf page 2157 */ 2158 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2159 dtlck = (struct dt_lock *) & tlck->lock; 2160 2161 /* 2162 * Do not assume that dtlck->index will be zero. During a 2163 * rename within a directory, this transaction may have 2164 * modified this page already when adding the new entry. 2165 */ 2166 2167 /* linelock header */ 2168 if (dtlck->index >= dtlck->maxcnt) 2169 dtlck = (struct dt_lock *) txLinelock(dtlck); 2170 lv = & dtlck->lv[dtlck->index]; 2171 lv->offset = 0; 2172 lv->length = 1; 2173 dtlck->index++; 2174 2175 /* linelock stbl of non-root leaf page */ 2176 if (!(p->header.flag & BT_ROOT)) { 2177 if (dtlck->index >= dtlck->maxcnt) 2178 dtlck = (struct dt_lock *) txLinelock(dtlck); 2179 lv = & dtlck->lv[dtlck->index]; 2180 i = index >> L2DTSLOTSIZE; 2181 lv->offset = p->header.stblindex + i; 2182 lv->length = 2183 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2184 i + 1; 2185 dtlck->index++; 2186 } 2187 2188 /* free the leaf entry */ 2189 dtDeleteEntry(p, index, &dtlck); 2190 2191 /* 2192 * Update directory index table for entries moved in stbl 2193 */ 2194 if (DO_INDEX(ip) && index < p->header.nextindex) { 2195 s64 lblock; 2196 2197 imp = NULL; 2198 stbl = DT_GETSTBL(p); 2199 for (i = index; i < p->header.nextindex; i++) { 2200 ldtentry = 2201 (struct ldtentry *) & p->slot[stbl[i]]; 2202 modify_index(tid, ip, 2203 le32_to_cpu(ldtentry->index), 2204 bn, i, &imp, &lblock); 2205 } 2206 if (imp) 2207 release_metapage(imp); 2208 } 2209 2210 DT_PUTPAGE(mp); 2211 } 2212 2213 return rc; 2214 } 2215 2216 2217 /* 2218 * dtDeleteUp() 2219 * 2220 * function: 2221 * free empty pages as propagating deletion up the tree 2222 * 2223 * parameter: 2224 * 2225 * return: 2226 */ 2227 static int dtDeleteUp(tid_t tid, struct inode *ip, 2228 struct metapage * fmp, dtpage_t * fp, struct btstack * btstack) 2229 { 2230 int rc = 0; 2231 struct metapage *mp; 2232 dtpage_t *p; 2233 int index, nextindex; 2234 int xlen; 2235 struct btframe *parent; 2236 struct dt_lock *dtlck; 2237 struct tlock *tlck; 2238 struct lv *lv; 2239 struct pxd_lock *pxdlock; 2240 int i; 2241 2242 /* 2243 * keep the root leaf page which has become empty 2244 */ 2245 if (BT_IS_ROOT(fmp)) { 2246 /* 2247 * reset the root 2248 * 2249 * dtInitRoot() acquires txlock on the root 2250 */ 2251 dtInitRoot(tid, ip, PARENT(ip)); 2252 2253 DT_PUTPAGE(fmp); 2254 2255 return 0; 2256 } 2257 2258 /* 2259 * free the non-root leaf page 2260 */ 2261 /* 2262 * acquire a transaction lock on the page 2263 * 2264 * write FREEXTENT|NOREDOPAGE log record 2265 * N.B. linelock is overlaid as freed extent descriptor, and 2266 * the buffer page is freed; 2267 */ 2268 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2269 pxdlock = (struct pxd_lock *) & tlck->lock; 2270 pxdlock->flag = mlckFREEPXD; 2271 pxdlock->pxd = fp->header.self; 2272 pxdlock->index = 1; 2273 2274 /* update sibling pointers */ 2275 if ((rc = dtRelink(tid, ip, fp))) { 2276 BT_PUTPAGE(fmp); 2277 return rc; 2278 } 2279 2280 xlen = lengthPXD(&fp->header.self); 2281 2282 /* Free quota allocation. */ 2283 DQUOT_FREE_BLOCK(ip, xlen); 2284 2285 /* free/invalidate its buffer page */ 2286 discard_metapage(fmp); 2287 2288 /* 2289 * propagate page deletion up the directory tree 2290 * 2291 * If the delete from the parent page makes it empty, 2292 * continue all the way up the tree. 2293 * stop if the root page is reached (which is never deleted) or 2294 * if the entry deletion does not empty the page. 2295 */ 2296 while ((parent = BT_POP(btstack)) != NULL) { 2297 /* pin the parent page <sp> */ 2298 DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2299 if (rc) 2300 return rc; 2301 2302 /* 2303 * free the extent of the child page deleted 2304 */ 2305 index = parent->index; 2306 2307 /* 2308 * delete the entry for the child page from parent 2309 */ 2310 nextindex = p->header.nextindex; 2311 2312 /* 2313 * the parent has the single entry being deleted: 2314 * 2315 * free the parent page which has become empty. 2316 */ 2317 if (nextindex == 1) { 2318 /* 2319 * keep the root internal page which has become empty 2320 */ 2321 if (p->header.flag & BT_ROOT) { 2322 /* 2323 * reset the root 2324 * 2325 * dtInitRoot() acquires txlock on the root 2326 */ 2327 dtInitRoot(tid, ip, PARENT(ip)); 2328 2329 DT_PUTPAGE(mp); 2330 2331 return 0; 2332 } 2333 /* 2334 * free the parent page 2335 */ 2336 else { 2337 /* 2338 * acquire a transaction lock on the page 2339 * 2340 * write FREEXTENT|NOREDOPAGE log record 2341 */ 2342 tlck = 2343 txMaplock(tid, ip, 2344 tlckDTREE | tlckFREE); 2345 pxdlock = (struct pxd_lock *) & tlck->lock; 2346 pxdlock->flag = mlckFREEPXD; 2347 pxdlock->pxd = p->header.self; 2348 pxdlock->index = 1; 2349 2350 /* update sibling pointers */ 2351 if ((rc = dtRelink(tid, ip, p))) { 2352 DT_PUTPAGE(mp); 2353 return rc; 2354 } 2355 2356 xlen = lengthPXD(&p->header.self); 2357 2358 /* Free quota allocation */ 2359 DQUOT_FREE_BLOCK(ip, xlen); 2360 2361 /* free/invalidate its buffer page */ 2362 discard_metapage(mp); 2363 2364 /* propagate up */ 2365 continue; 2366 } 2367 } 2368 2369 /* 2370 * the parent has other entries remaining: 2371 * 2372 * delete the router entry from the parent page. 2373 */ 2374 BT_MARK_DIRTY(mp, ip); 2375 /* 2376 * acquire a transaction lock on the page 2377 * 2378 * action: router entry deletion 2379 */ 2380 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2381 dtlck = (struct dt_lock *) & tlck->lock; 2382 2383 /* linelock header */ 2384 if (dtlck->index >= dtlck->maxcnt) 2385 dtlck = (struct dt_lock *) txLinelock(dtlck); 2386 lv = & dtlck->lv[dtlck->index]; 2387 lv->offset = 0; 2388 lv->length = 1; 2389 dtlck->index++; 2390 2391 /* linelock stbl of non-root leaf page */ 2392 if (!(p->header.flag & BT_ROOT)) { 2393 if (dtlck->index < dtlck->maxcnt) 2394 lv++; 2395 else { 2396 dtlck = (struct dt_lock *) txLinelock(dtlck); 2397 lv = & dtlck->lv[0]; 2398 } 2399 i = index >> L2DTSLOTSIZE; 2400 lv->offset = p->header.stblindex + i; 2401 lv->length = 2402 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2403 i + 1; 2404 dtlck->index++; 2405 } 2406 2407 /* free the router entry */ 2408 dtDeleteEntry(p, index, &dtlck); 2409 2410 /* reset key of new leftmost entry of level (for consistency) */ 2411 if (index == 0 && 2412 ((p->header.flag & BT_ROOT) || p->header.prev == 0)) 2413 dtTruncateEntry(p, 0, &dtlck); 2414 2415 /* unpin the parent page */ 2416 DT_PUTPAGE(mp); 2417 2418 /* exit propagation up */ 2419 break; 2420 } 2421 2422 return 0; 2423 } 2424 2425 #ifdef _NOTYET 2426 /* 2427 * NAME: dtRelocate() 2428 * 2429 * FUNCTION: relocate dtpage (internal or leaf) of directory; 2430 * This function is mainly used by defragfs utility. 2431 */ 2432 int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, 2433 s64 nxaddr) 2434 { 2435 int rc = 0; 2436 struct metapage *mp, *pmp, *lmp, *rmp; 2437 dtpage_t *p, *pp, *rp = 0, *lp= 0; 2438 s64 bn; 2439 int index; 2440 struct btstack btstack; 2441 pxd_t *pxd; 2442 s64 oxaddr, nextbn, prevbn; 2443 int xlen, xsize; 2444 struct tlock *tlck; 2445 struct dt_lock *dtlck; 2446 struct pxd_lock *pxdlock; 2447 s8 *stbl; 2448 struct lv *lv; 2449 2450 oxaddr = addressPXD(opxd); 2451 xlen = lengthPXD(opxd); 2452 2453 jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d", 2454 (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr, 2455 xlen); 2456 2457 /* 2458 * 1. get the internal parent dtpage covering 2459 * router entry for the tartget page to be relocated; 2460 */ 2461 rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); 2462 if (rc) 2463 return rc; 2464 2465 /* retrieve search result */ 2466 DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2467 jfs_info("dtRelocate: parent router entry validated."); 2468 2469 /* 2470 * 2. relocate the target dtpage 2471 */ 2472 /* read in the target page from src extent */ 2473 DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2474 if (rc) { 2475 /* release the pinned parent page */ 2476 DT_PUTPAGE(pmp); 2477 return rc; 2478 } 2479 2480 /* 2481 * read in sibling pages if any to update sibling pointers; 2482 */ 2483 rmp = NULL; 2484 if (p->header.next) { 2485 nextbn = le64_to_cpu(p->header.next); 2486 DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2487 if (rc) { 2488 DT_PUTPAGE(mp); 2489 DT_PUTPAGE(pmp); 2490 return (rc); 2491 } 2492 } 2493 2494 lmp = NULL; 2495 if (p->header.prev) { 2496 prevbn = le64_to_cpu(p->header.prev); 2497 DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 2498 if (rc) { 2499 DT_PUTPAGE(mp); 2500 DT_PUTPAGE(pmp); 2501 if (rmp) 2502 DT_PUTPAGE(rmp); 2503 return (rc); 2504 } 2505 } 2506 2507 /* at this point, all xtpages to be updated are in memory */ 2508 2509 /* 2510 * update sibling pointers of sibling dtpages if any; 2511 */ 2512 if (lmp) { 2513 tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK); 2514 dtlck = (struct dt_lock *) & tlck->lock; 2515 /* linelock header */ 2516 ASSERT(dtlck->index == 0); 2517 lv = & dtlck->lv[0]; 2518 lv->offset = 0; 2519 lv->length = 1; 2520 dtlck->index++; 2521 2522 lp->header.next = cpu_to_le64(nxaddr); 2523 DT_PUTPAGE(lmp); 2524 } 2525 2526 if (rmp) { 2527 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK); 2528 dtlck = (struct dt_lock *) & tlck->lock; 2529 /* linelock header */ 2530 ASSERT(dtlck->index == 0); 2531 lv = & dtlck->lv[0]; 2532 lv->offset = 0; 2533 lv->length = 1; 2534 dtlck->index++; 2535 2536 rp->header.prev = cpu_to_le64(nxaddr); 2537 DT_PUTPAGE(rmp); 2538 } 2539 2540 /* 2541 * update the target dtpage to be relocated 2542 * 2543 * write LOG_REDOPAGE of LOG_NEW type for dst page 2544 * for the whole target page (logredo() will apply 2545 * after image and update bmap for allocation of the 2546 * dst extent), and update bmap for allocation of 2547 * the dst extent; 2548 */ 2549 tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW); 2550 dtlck = (struct dt_lock *) & tlck->lock; 2551 /* linelock header */ 2552 ASSERT(dtlck->index == 0); 2553 lv = & dtlck->lv[0]; 2554 2555 /* update the self address in the dtpage header */ 2556 pxd = &p->header.self; 2557 PXDaddress(pxd, nxaddr); 2558 2559 /* the dst page is the same as the src page, i.e., 2560 * linelock for afterimage of the whole page; 2561 */ 2562 lv->offset = 0; 2563 lv->length = p->header.maxslot; 2564 dtlck->index++; 2565 2566 /* update the buffer extent descriptor of the dtpage */ 2567 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2568 #ifdef _STILL_TO_PORT 2569 bmSetXD(mp, nxaddr, xsize); 2570 #endif /* _STILL_TO_PORT */ 2571 /* unpin the relocated page */ 2572 DT_PUTPAGE(mp); 2573 jfs_info("dtRelocate: target dtpage relocated."); 2574 2575 /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec 2576 * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec 2577 * will also force a bmap update ). 2578 */ 2579 2580 /* 2581 * 3. acquire maplock for the source extent to be freed; 2582 */ 2583 /* for dtpage relocation, write a LOG_NOREDOPAGE record 2584 * for the source dtpage (logredo() will init NoRedoPage 2585 * filter and will also update bmap for free of the source 2586 * dtpage), and upadte bmap for free of the source dtpage; 2587 */ 2588 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2589 pxdlock = (struct pxd_lock *) & tlck->lock; 2590 pxdlock->flag = mlckFREEPXD; 2591 PXDaddress(&pxdlock->pxd, oxaddr); 2592 PXDlength(&pxdlock->pxd, xlen); 2593 pxdlock->index = 1; 2594 2595 /* 2596 * 4. update the parent router entry for relocation; 2597 * 2598 * acquire tlck for the parent entry covering the target dtpage; 2599 * write LOG_REDOPAGE to apply after image only; 2600 */ 2601 jfs_info("dtRelocate: update parent router entry."); 2602 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 2603 dtlck = (struct dt_lock *) & tlck->lock; 2604 lv = & dtlck->lv[dtlck->index]; 2605 2606 /* update the PXD with the new address */ 2607 stbl = DT_GETSTBL(pp); 2608 pxd = (pxd_t *) & pp->slot[stbl[index]]; 2609 PXDaddress(pxd, nxaddr); 2610 lv->offset = stbl[index]; 2611 lv->length = 1; 2612 dtlck->index++; 2613 2614 /* unpin the parent dtpage */ 2615 DT_PUTPAGE(pmp); 2616 2617 return rc; 2618 } 2619 2620 /* 2621 * NAME: dtSearchNode() 2622 * 2623 * FUNCTION: Search for an dtpage containing a specified address 2624 * This function is mainly used by defragfs utility. 2625 * 2626 * NOTE: Search result on stack, the found page is pinned at exit. 2627 * The result page must be an internal dtpage. 2628 * lmxaddr give the address of the left most page of the 2629 * dtree level, in which the required dtpage resides. 2630 */ 2631 static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, 2632 struct btstack * btstack) 2633 { 2634 int rc = 0; 2635 s64 bn; 2636 struct metapage *mp; 2637 dtpage_t *p; 2638 int psize = 288; /* initial in-line directory */ 2639 s8 *stbl; 2640 int i; 2641 pxd_t *pxd; 2642 struct btframe *btsp; 2643 2644 BT_CLR(btstack); /* reset stack */ 2645 2646 /* 2647 * descend tree to the level with specified leftmost page 2648 * 2649 * by convention, root bn = 0. 2650 */ 2651 for (bn = 0;;) { 2652 /* get/pin the page to search */ 2653 DT_GETPAGE(ip, bn, mp, psize, p, rc); 2654 if (rc) 2655 return rc; 2656 2657 /* does the xaddr of leftmost page of the levevl 2658 * matches levevl search key ? 2659 */ 2660 if (p->header.flag & BT_ROOT) { 2661 if (lmxaddr == 0) 2662 break; 2663 } else if (addressPXD(&p->header.self) == lmxaddr) 2664 break; 2665 2666 /* 2667 * descend down to leftmost child page 2668 */ 2669 if (p->header.flag & BT_LEAF) { 2670 DT_PUTPAGE(mp); 2671 return -ESTALE; 2672 } 2673 2674 /* get the leftmost entry */ 2675 stbl = DT_GETSTBL(p); 2676 pxd = (pxd_t *) & p->slot[stbl[0]]; 2677 2678 /* get the child page block address */ 2679 bn = addressPXD(pxd); 2680 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 2681 /* unpin the parent page */ 2682 DT_PUTPAGE(mp); 2683 } 2684 2685 /* 2686 * search each page at the current levevl 2687 */ 2688 loop: 2689 stbl = DT_GETSTBL(p); 2690 for (i = 0; i < p->header.nextindex; i++) { 2691 pxd = (pxd_t *) & p->slot[stbl[i]]; 2692 2693 /* found the specified router entry */ 2694 if (addressPXD(pxd) == addressPXD(kpxd) && 2695 lengthPXD(pxd) == lengthPXD(kpxd)) { 2696 btsp = btstack->top; 2697 btsp->bn = bn; 2698 btsp->index = i; 2699 btsp->mp = mp; 2700 2701 return 0; 2702 } 2703 } 2704 2705 /* get the right sibling page if any */ 2706 if (p->header.next) 2707 bn = le64_to_cpu(p->header.next); 2708 else { 2709 DT_PUTPAGE(mp); 2710 return -ESTALE; 2711 } 2712 2713 /* unpin current page */ 2714 DT_PUTPAGE(mp); 2715 2716 /* get the right sibling page */ 2717 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2718 if (rc) 2719 return rc; 2720 2721 goto loop; 2722 } 2723 #endif /* _NOTYET */ 2724 2725 /* 2726 * dtRelink() 2727 * 2728 * function: 2729 * link around a freed page. 2730 * 2731 * parameter: 2732 * fp: page to be freed 2733 * 2734 * return: 2735 */ 2736 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p) 2737 { 2738 int rc; 2739 struct metapage *mp; 2740 s64 nextbn, prevbn; 2741 struct tlock *tlck; 2742 struct dt_lock *dtlck; 2743 struct lv *lv; 2744 2745 nextbn = le64_to_cpu(p->header.next); 2746 prevbn = le64_to_cpu(p->header.prev); 2747 2748 /* update prev pointer of the next page */ 2749 if (nextbn != 0) { 2750 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 2751 if (rc) 2752 return rc; 2753 2754 BT_MARK_DIRTY(mp, ip); 2755 /* 2756 * acquire a transaction lock on the next page 2757 * 2758 * action: update prev pointer; 2759 */ 2760 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2761 jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2762 tlck, ip, mp); 2763 dtlck = (struct dt_lock *) & tlck->lock; 2764 2765 /* linelock header */ 2766 if (dtlck->index >= dtlck->maxcnt) 2767 dtlck = (struct dt_lock *) txLinelock(dtlck); 2768 lv = & dtlck->lv[dtlck->index]; 2769 lv->offset = 0; 2770 lv->length = 1; 2771 dtlck->index++; 2772 2773 p->header.prev = cpu_to_le64(prevbn); 2774 DT_PUTPAGE(mp); 2775 } 2776 2777 /* update next pointer of the previous page */ 2778 if (prevbn != 0) { 2779 DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 2780 if (rc) 2781 return rc; 2782 2783 BT_MARK_DIRTY(mp, ip); 2784 /* 2785 * acquire a transaction lock on the prev page 2786 * 2787 * action: update next pointer; 2788 */ 2789 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2790 jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2791 tlck, ip, mp); 2792 dtlck = (struct dt_lock *) & tlck->lock; 2793 2794 /* linelock header */ 2795 if (dtlck->index >= dtlck->maxcnt) 2796 dtlck = (struct dt_lock *) txLinelock(dtlck); 2797 lv = & dtlck->lv[dtlck->index]; 2798 lv->offset = 0; 2799 lv->length = 1; 2800 dtlck->index++; 2801 2802 p->header.next = cpu_to_le64(nextbn); 2803 DT_PUTPAGE(mp); 2804 } 2805 2806 return 0; 2807 } 2808 2809 2810 /* 2811 * dtInitRoot() 2812 * 2813 * initialize directory root (inline in inode) 2814 */ 2815 void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot) 2816 { 2817 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2818 dtroot_t *p; 2819 int fsi; 2820 struct dtslot *f; 2821 struct tlock *tlck; 2822 struct dt_lock *dtlck; 2823 struct lv *lv; 2824 u16 xflag_save; 2825 2826 /* 2827 * If this was previously an non-empty directory, we need to remove 2828 * the old directory table. 2829 */ 2830 if (DO_INDEX(ip)) { 2831 if (!jfs_dirtable_inline(ip)) { 2832 struct tblock *tblk = tid_to_tblock(tid); 2833 /* 2834 * We're playing games with the tid's xflag. If 2835 * we're removing a regular file, the file's xtree 2836 * is committed with COMMIT_PMAP, but we always 2837 * commit the directories xtree with COMMIT_PWMAP. 2838 */ 2839 xflag_save = tblk->xflag; 2840 tblk->xflag = 0; 2841 /* 2842 * xtTruncate isn't guaranteed to fully truncate 2843 * the xtree. The caller needs to check i_size 2844 * after committing the transaction to see if 2845 * additional truncation is needed. The 2846 * COMMIT_Stale flag tells caller that we 2847 * initiated the truncation. 2848 */ 2849 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 2850 set_cflag(COMMIT_Stale, ip); 2851 2852 tblk->xflag = xflag_save; 2853 } else 2854 ip->i_size = 1; 2855 2856 jfs_ip->next_index = 2; 2857 } else 2858 ip->i_size = IDATASIZE; 2859 2860 /* 2861 * acquire a transaction lock on the root 2862 * 2863 * action: directory initialization; 2864 */ 2865 tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag, 2866 tlckDTREE | tlckENTRY | tlckBTROOT); 2867 dtlck = (struct dt_lock *) & tlck->lock; 2868 2869 /* linelock root */ 2870 ASSERT(dtlck->index == 0); 2871 lv = & dtlck->lv[0]; 2872 lv->offset = 0; 2873 lv->length = DTROOTMAXSLOT; 2874 dtlck->index++; 2875 2876 p = &jfs_ip->i_dtroot; 2877 2878 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 2879 2880 p->header.nextindex = 0; 2881 2882 /* init freelist */ 2883 fsi = 1; 2884 f = &p->slot[fsi]; 2885 2886 /* init data area of root */ 2887 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2888 f->next = fsi; 2889 f->next = -1; 2890 2891 p->header.freelist = 1; 2892 p->header.freecnt = 8; 2893 2894 /* init '..' entry */ 2895 p->header.idotdot = cpu_to_le32(idotdot); 2896 2897 return; 2898 } 2899 2900 /* 2901 * add_missing_indices() 2902 * 2903 * function: Fix dtree page in which one or more entries has an invalid index. 2904 * fsck.jfs should really fix this, but it currently does not. 2905 * Called from jfs_readdir when bad index is detected. 2906 */ 2907 static void add_missing_indices(struct inode *inode, s64 bn) 2908 { 2909 struct ldtentry *d; 2910 struct dt_lock *dtlck; 2911 int i; 2912 uint index; 2913 struct lv *lv; 2914 struct metapage *mp; 2915 dtpage_t *p; 2916 int rc; 2917 s8 *stbl; 2918 tid_t tid; 2919 struct tlock *tlck; 2920 2921 tid = txBegin(inode->i_sb, 0); 2922 2923 DT_GETPAGE(inode, bn, mp, PSIZE, p, rc); 2924 2925 if (rc) { 2926 printk(KERN_ERR "DT_GETPAGE failed!\n"); 2927 goto end; 2928 } 2929 BT_MARK_DIRTY(mp, inode); 2930 2931 ASSERT(p->header.flag & BT_LEAF); 2932 2933 tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY); 2934 if (BT_IS_ROOT(mp)) 2935 tlck->type |= tlckBTROOT; 2936 2937 dtlck = (struct dt_lock *) &tlck->lock; 2938 2939 stbl = DT_GETSTBL(p); 2940 for (i = 0; i < p->header.nextindex; i++) { 2941 d = (struct ldtentry *) &p->slot[stbl[i]]; 2942 index = le32_to_cpu(d->index); 2943 if ((index < 2) || (index >= JFS_IP(inode)->next_index)) { 2944 d->index = cpu_to_le32(add_index(tid, inode, bn, i)); 2945 if (dtlck->index >= dtlck->maxcnt) 2946 dtlck = (struct dt_lock *) txLinelock(dtlck); 2947 lv = &dtlck->lv[dtlck->index]; 2948 lv->offset = stbl[i]; 2949 lv->length = 1; 2950 dtlck->index++; 2951 } 2952 } 2953 2954 DT_PUTPAGE(mp); 2955 (void) txCommit(tid, 1, &inode, 0); 2956 end: 2957 txEnd(tid); 2958 } 2959 2960 /* 2961 * Buffer to hold directory entry info while traversing a dtree page 2962 * before being fed to the filldir function 2963 */ 2964 struct jfs_dirent { 2965 loff_t position; 2966 int ino; 2967 u16 name_len; 2968 char name[0]; 2969 }; 2970 2971 /* 2972 * function to determine next variable-sized jfs_dirent in buffer 2973 */ 2974 static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) 2975 { 2976 return (struct jfs_dirent *) 2977 ((char *)dirent + 2978 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 + 2979 sizeof (loff_t) - 1) & 2980 ~(sizeof (loff_t) - 1))); 2981 } 2982 2983 /* 2984 * jfs_readdir() 2985 * 2986 * function: read directory entries sequentially 2987 * from the specified entry offset 2988 * 2989 * parameter: 2990 * 2991 * return: offset = (pn, index) of start entry 2992 * of next jfs_readdir()/dtRead() 2993 */ 2994 int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 2995 { 2996 struct inode *ip = filp->f_dentry->d_inode; 2997 struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; 2998 int rc = 0; 2999 loff_t dtpos; /* legacy OS/2 style position */ 3000 struct dtoffset { 3001 s16 pn; 3002 s16 index; 3003 s32 unused; 3004 } *dtoffset = (struct dtoffset *) &dtpos; 3005 s64 bn; 3006 struct metapage *mp; 3007 dtpage_t *p; 3008 int index; 3009 s8 *stbl; 3010 struct btstack btstack; 3011 int i, next; 3012 struct ldtentry *d; 3013 struct dtslot *t; 3014 int d_namleft, len, outlen; 3015 unsigned long dirent_buf; 3016 char *name_ptr; 3017 u32 dir_index; 3018 int do_index = 0; 3019 uint loop_count = 0; 3020 struct jfs_dirent *jfs_dirent; 3021 int jfs_dirents; 3022 int overflow, fix_page, page_fixed = 0; 3023 static int unique_pos = 2; /* If we can't fix broken index */ 3024 3025 if (filp->f_pos == DIREND) 3026 return 0; 3027 3028 if (DO_INDEX(ip)) { 3029 /* 3030 * persistent index is stored in directory entries. 3031 * Special cases: 0 = . 3032 * 1 = .. 3033 * -1 = End of directory 3034 */ 3035 do_index = 1; 3036 3037 dir_index = (u32) filp->f_pos; 3038 3039 if (dir_index > 1) { 3040 struct dir_table_slot dirtab_slot; 3041 3042 if (dtEmpty(ip) || 3043 (dir_index >= JFS_IP(ip)->next_index)) { 3044 /* Stale position. Directory has shrunk */ 3045 filp->f_pos = DIREND; 3046 return 0; 3047 } 3048 repeat: 3049 rc = read_index(ip, dir_index, &dirtab_slot); 3050 if (rc) { 3051 filp->f_pos = DIREND; 3052 return rc; 3053 } 3054 if (dirtab_slot.flag == DIR_INDEX_FREE) { 3055 if (loop_count++ > JFS_IP(ip)->next_index) { 3056 jfs_err("jfs_readdir detected " 3057 "infinite loop!"); 3058 filp->f_pos = DIREND; 3059 return 0; 3060 } 3061 dir_index = le32_to_cpu(dirtab_slot.addr2); 3062 if (dir_index == -1) { 3063 filp->f_pos = DIREND; 3064 return 0; 3065 } 3066 goto repeat; 3067 } 3068 bn = addressDTS(&dirtab_slot); 3069 index = dirtab_slot.slot; 3070 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3071 if (rc) { 3072 filp->f_pos = DIREND; 3073 return 0; 3074 } 3075 if (p->header.flag & BT_INTERNAL) { 3076 jfs_err("jfs_readdir: bad index table"); 3077 DT_PUTPAGE(mp); 3078 filp->f_pos = -1; 3079 return 0; 3080 } 3081 } else { 3082 if (dir_index == 0) { 3083 /* 3084 * self "." 3085 */ 3086 filp->f_pos = 0; 3087 if (filldir(dirent, ".", 1, 0, ip->i_ino, 3088 DT_DIR)) 3089 return 0; 3090 } 3091 /* 3092 * parent ".." 3093 */ 3094 filp->f_pos = 1; 3095 if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) 3096 return 0; 3097 3098 /* 3099 * Find first entry of left-most leaf 3100 */ 3101 if (dtEmpty(ip)) { 3102 filp->f_pos = DIREND; 3103 return 0; 3104 } 3105 3106 if ((rc = dtReadFirst(ip, &btstack))) 3107 return rc; 3108 3109 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3110 } 3111 } else { 3112 /* 3113 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 3114 * 3115 * pn = index = 0: First entry "." 3116 * pn = 0; index = 1: Second entry ".." 3117 * pn > 0: Real entries, pn=1 -> leftmost page 3118 * pn = index = -1: No more entries 3119 */ 3120 dtpos = filp->f_pos; 3121 if (dtpos == 0) { 3122 /* build "." entry */ 3123 3124 if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, 3125 DT_DIR)) 3126 return 0; 3127 dtoffset->index = 1; 3128 filp->f_pos = dtpos; 3129 } 3130 3131 if (dtoffset->pn == 0) { 3132 if (dtoffset->index == 1) { 3133 /* build ".." entry */ 3134 3135 if (filldir(dirent, "..", 2, filp->f_pos, 3136 PARENT(ip), DT_DIR)) 3137 return 0; 3138 } else { 3139 jfs_err("jfs_readdir called with " 3140 "invalid offset!"); 3141 } 3142 dtoffset->pn = 1; 3143 dtoffset->index = 0; 3144 filp->f_pos = dtpos; 3145 } 3146 3147 if (dtEmpty(ip)) { 3148 filp->f_pos = DIREND; 3149 return 0; 3150 } 3151 3152 if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) { 3153 jfs_err("jfs_readdir: unexpected rc = %d " 3154 "from dtReadNext", rc); 3155 filp->f_pos = DIREND; 3156 return 0; 3157 } 3158 /* get start leaf page and index */ 3159 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3160 3161 /* offset beyond directory eof ? */ 3162 if (bn < 0) { 3163 filp->f_pos = DIREND; 3164 return 0; 3165 } 3166 } 3167 3168 dirent_buf = __get_free_page(GFP_KERNEL); 3169 if (dirent_buf == 0) { 3170 DT_PUTPAGE(mp); 3171 jfs_warn("jfs_readdir: __get_free_page failed!"); 3172 filp->f_pos = DIREND; 3173 return -ENOMEM; 3174 } 3175 3176 while (1) { 3177 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3178 jfs_dirents = 0; 3179 overflow = fix_page = 0; 3180 3181 stbl = DT_GETSTBL(p); 3182 3183 for (i = index; i < p->header.nextindex; i++) { 3184 d = (struct ldtentry *) & p->slot[stbl[i]]; 3185 3186 if (((long) jfs_dirent + d->namlen + 1) > 3187 (dirent_buf + PAGE_SIZE)) { 3188 /* DBCS codepages could overrun dirent_buf */ 3189 index = i; 3190 overflow = 1; 3191 break; 3192 } 3193 3194 d_namleft = d->namlen; 3195 name_ptr = jfs_dirent->name; 3196 jfs_dirent->ino = le32_to_cpu(d->inumber); 3197 3198 if (do_index) { 3199 len = min(d_namleft, DTLHDRDATALEN); 3200 jfs_dirent->position = le32_to_cpu(d->index); 3201 /* 3202 * d->index should always be valid, but it 3203 * isn't. fsck.jfs doesn't create the 3204 * directory index for the lost+found 3205 * directory. Rather than let it go, 3206 * we can try to fix it. 3207 */ 3208 if ((jfs_dirent->position < 2) || 3209 (jfs_dirent->position >= 3210 JFS_IP(ip)->next_index)) { 3211 if (!page_fixed && !isReadOnly(ip)) { 3212 fix_page = 1; 3213 /* 3214 * setting overflow and setting 3215 * index to i will cause the 3216 * same page to be processed 3217 * again starting here 3218 */ 3219 overflow = 1; 3220 index = i; 3221 break; 3222 } 3223 jfs_dirent->position = unique_pos++; 3224 } 3225 } else { 3226 jfs_dirent->position = dtpos; 3227 len = min(d_namleft, DTLHDRDATALEN_LEGACY); 3228 } 3229 3230 /* copy the name of head/only segment */ 3231 outlen = jfs_strfromUCS_le(name_ptr, d->name, len, 3232 codepage); 3233 jfs_dirent->name_len = outlen; 3234 3235 /* copy name in the additional segment(s) */ 3236 next = d->next; 3237 while (next >= 0) { 3238 t = (struct dtslot *) & p->slot[next]; 3239 name_ptr += outlen; 3240 d_namleft -= len; 3241 /* Sanity Check */ 3242 if (d_namleft == 0) { 3243 jfs_error(ip->i_sb, 3244 "JFS:Dtree error: ino = " 3245 "%ld, bn=%Ld, index = %d", 3246 (long)ip->i_ino, 3247 (long long)bn, 3248 i); 3249 goto skip_one; 3250 } 3251 len = min(d_namleft, DTSLOTDATALEN); 3252 outlen = jfs_strfromUCS_le(name_ptr, t->name, 3253 len, codepage); 3254 jfs_dirent->name_len += outlen; 3255 3256 next = t->next; 3257 } 3258 3259 jfs_dirents++; 3260 jfs_dirent = next_jfs_dirent(jfs_dirent); 3261 skip_one: 3262 if (!do_index) 3263 dtoffset->index++; 3264 } 3265 3266 if (!overflow) { 3267 /* Point to next leaf page */ 3268 if (p->header.flag & BT_ROOT) 3269 bn = 0; 3270 else { 3271 bn = le64_to_cpu(p->header.next); 3272 index = 0; 3273 /* update offset (pn:index) for new page */ 3274 if (!do_index) { 3275 dtoffset->pn++; 3276 dtoffset->index = 0; 3277 } 3278 } 3279 page_fixed = 0; 3280 } 3281 3282 /* unpin previous leaf page */ 3283 DT_PUTPAGE(mp); 3284 3285 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3286 while (jfs_dirents--) { 3287 filp->f_pos = jfs_dirent->position; 3288 if (filldir(dirent, jfs_dirent->name, 3289 jfs_dirent->name_len, filp->f_pos, 3290 jfs_dirent->ino, DT_UNKNOWN)) 3291 goto out; 3292 jfs_dirent = next_jfs_dirent(jfs_dirent); 3293 } 3294 3295 if (fix_page) { 3296 add_missing_indices(ip, bn); 3297 page_fixed = 1; 3298 } 3299 3300 if (!overflow && (bn == 0)) { 3301 filp->f_pos = DIREND; 3302 break; 3303 } 3304 3305 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3306 if (rc) { 3307 free_page(dirent_buf); 3308 return rc; 3309 } 3310 } 3311 3312 out: 3313 free_page(dirent_buf); 3314 3315 return rc; 3316 } 3317 3318 3319 /* 3320 * dtReadFirst() 3321 * 3322 * function: get the leftmost page of the directory 3323 */ 3324 static int dtReadFirst(struct inode *ip, struct btstack * btstack) 3325 { 3326 int rc = 0; 3327 s64 bn; 3328 int psize = 288; /* initial in-line directory */ 3329 struct metapage *mp; 3330 dtpage_t *p; 3331 s8 *stbl; 3332 struct btframe *btsp; 3333 pxd_t *xd; 3334 3335 BT_CLR(btstack); /* reset stack */ 3336 3337 /* 3338 * descend leftmost path of the tree 3339 * 3340 * by convention, root bn = 0. 3341 */ 3342 for (bn = 0;;) { 3343 DT_GETPAGE(ip, bn, mp, psize, p, rc); 3344 if (rc) 3345 return rc; 3346 3347 /* 3348 * leftmost leaf page 3349 */ 3350 if (p->header.flag & BT_LEAF) { 3351 /* return leftmost entry */ 3352 btsp = btstack->top; 3353 btsp->bn = bn; 3354 btsp->index = 0; 3355 btsp->mp = mp; 3356 3357 return 0; 3358 } 3359 3360 /* 3361 * descend down to leftmost child page 3362 */ 3363 if (BT_STACK_FULL(btstack)) { 3364 DT_PUTPAGE(mp); 3365 jfs_error(ip->i_sb, "dtReadFirst: btstack overrun"); 3366 BT_STACK_DUMP(btstack); 3367 return -EIO; 3368 } 3369 /* push (bn, index) of the parent page/entry */ 3370 BT_PUSH(btstack, bn, 0); 3371 3372 /* get the leftmost entry */ 3373 stbl = DT_GETSTBL(p); 3374 xd = (pxd_t *) & p->slot[stbl[0]]; 3375 3376 /* get the child page block address */ 3377 bn = addressPXD(xd); 3378 psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize; 3379 3380 /* unpin the parent page */ 3381 DT_PUTPAGE(mp); 3382 } 3383 } 3384 3385 3386 /* 3387 * dtReadNext() 3388 * 3389 * function: get the page of the specified offset (pn:index) 3390 * 3391 * return: if (offset > eof), bn = -1; 3392 * 3393 * note: if index > nextindex of the target leaf page, 3394 * start with 1st entry of next leaf page; 3395 */ 3396 static int dtReadNext(struct inode *ip, loff_t * offset, 3397 struct btstack * btstack) 3398 { 3399 int rc = 0; 3400 struct dtoffset { 3401 s16 pn; 3402 s16 index; 3403 s32 unused; 3404 } *dtoffset = (struct dtoffset *) offset; 3405 s64 bn; 3406 struct metapage *mp; 3407 dtpage_t *p; 3408 int index; 3409 int pn; 3410 s8 *stbl; 3411 struct btframe *btsp, *parent; 3412 pxd_t *xd; 3413 3414 /* 3415 * get leftmost leaf page pinned 3416 */ 3417 if ((rc = dtReadFirst(ip, btstack))) 3418 return rc; 3419 3420 /* get leaf page */ 3421 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 3422 3423 /* get the start offset (pn:index) */ 3424 pn = dtoffset->pn - 1; /* Now pn = 0 represents leftmost leaf */ 3425 index = dtoffset->index; 3426 3427 /* start at leftmost page ? */ 3428 if (pn == 0) { 3429 /* offset beyond eof ? */ 3430 if (index < p->header.nextindex) 3431 goto out; 3432 3433 if (p->header.flag & BT_ROOT) { 3434 bn = -1; 3435 goto out; 3436 } 3437 3438 /* start with 1st entry of next leaf page */ 3439 dtoffset->pn++; 3440 dtoffset->index = index = 0; 3441 goto a; 3442 } 3443 3444 /* start at non-leftmost page: scan parent pages for large pn */ 3445 if (p->header.flag & BT_ROOT) { 3446 bn = -1; 3447 goto out; 3448 } 3449 3450 /* start after next leaf page ? */ 3451 if (pn > 1) 3452 goto b; 3453 3454 /* get leaf page pn = 1 */ 3455 a: 3456 bn = le64_to_cpu(p->header.next); 3457 3458 /* unpin leaf page */ 3459 DT_PUTPAGE(mp); 3460 3461 /* offset beyond eof ? */ 3462 if (bn == 0) { 3463 bn = -1; 3464 goto out; 3465 } 3466 3467 goto c; 3468 3469 /* 3470 * scan last internal page level to get target leaf page 3471 */ 3472 b: 3473 /* unpin leftmost leaf page */ 3474 DT_PUTPAGE(mp); 3475 3476 /* get left most parent page */ 3477 btsp = btstack->top; 3478 parent = btsp - 1; 3479 bn = parent->bn; 3480 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3481 if (rc) 3482 return rc; 3483 3484 /* scan parent pages at last internal page level */ 3485 while (pn >= p->header.nextindex) { 3486 pn -= p->header.nextindex; 3487 3488 /* get next parent page address */ 3489 bn = le64_to_cpu(p->header.next); 3490 3491 /* unpin current parent page */ 3492 DT_PUTPAGE(mp); 3493 3494 /* offset beyond eof ? */ 3495 if (bn == 0) { 3496 bn = -1; 3497 goto out; 3498 } 3499 3500 /* get next parent page */ 3501 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3502 if (rc) 3503 return rc; 3504 3505 /* update parent page stack frame */ 3506 parent->bn = bn; 3507 } 3508 3509 /* get leaf page address */ 3510 stbl = DT_GETSTBL(p); 3511 xd = (pxd_t *) & p->slot[stbl[pn]]; 3512 bn = addressPXD(xd); 3513 3514 /* unpin parent page */ 3515 DT_PUTPAGE(mp); 3516 3517 /* 3518 * get target leaf page 3519 */ 3520 c: 3521 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3522 if (rc) 3523 return rc; 3524 3525 /* 3526 * leaf page has been completed: 3527 * start with 1st entry of next leaf page 3528 */ 3529 if (index >= p->header.nextindex) { 3530 bn = le64_to_cpu(p->header.next); 3531 3532 /* unpin leaf page */ 3533 DT_PUTPAGE(mp); 3534 3535 /* offset beyond eof ? */ 3536 if (bn == 0) { 3537 bn = -1; 3538 goto out; 3539 } 3540 3541 /* get next leaf page */ 3542 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3543 if (rc) 3544 return rc; 3545 3546 /* start with 1st entry of next leaf page */ 3547 dtoffset->pn++; 3548 dtoffset->index = 0; 3549 } 3550 3551 out: 3552 /* return target leaf page pinned */ 3553 btsp = btstack->top; 3554 btsp->bn = bn; 3555 btsp->index = dtoffset->index; 3556 btsp->mp = mp; 3557 3558 return 0; 3559 } 3560 3561 3562 /* 3563 * dtCompare() 3564 * 3565 * function: compare search key with an internal entry 3566 * 3567 * return: 3568 * < 0 if k is < record 3569 * = 0 if k is = record 3570 * > 0 if k is > record 3571 */ 3572 static int dtCompare(struct component_name * key, /* search key */ 3573 dtpage_t * p, /* directory page */ 3574 int si) 3575 { /* entry slot index */ 3576 wchar_t *kname; 3577 __le16 *name; 3578 int klen, namlen, len, rc; 3579 struct idtentry *ih; 3580 struct dtslot *t; 3581 3582 /* 3583 * force the left-most key on internal pages, at any level of 3584 * the tree, to be less than any search key. 3585 * this obviates having to update the leftmost key on an internal 3586 * page when the user inserts a new key in the tree smaller than 3587 * anything that has been stored. 3588 * 3589 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3590 * at any internal page at any level of the tree, 3591 * it descends to child of the entry anyway - 3592 * ? make the entry as min size dummy entry) 3593 * 3594 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3595 * return (1); 3596 */ 3597 3598 kname = key->name; 3599 klen = key->namlen; 3600 3601 ih = (struct idtentry *) & p->slot[si]; 3602 si = ih->next; 3603 name = ih->name; 3604 namlen = ih->namlen; 3605 len = min(namlen, DTIHDRDATALEN); 3606 3607 /* compare with head/only segment */ 3608 len = min(klen, len); 3609 if ((rc = UniStrncmp_le(kname, name, len))) 3610 return rc; 3611 3612 klen -= len; 3613 namlen -= len; 3614 3615 /* compare with additional segment(s) */ 3616 kname += len; 3617 while (klen > 0 && namlen > 0) { 3618 /* compare with next name segment */ 3619 t = (struct dtslot *) & p->slot[si]; 3620 len = min(namlen, DTSLOTDATALEN); 3621 len = min(klen, len); 3622 name = t->name; 3623 if ((rc = UniStrncmp_le(kname, name, len))) 3624 return rc; 3625 3626 klen -= len; 3627 namlen -= len; 3628 kname += len; 3629 si = t->next; 3630 } 3631 3632 return (klen - namlen); 3633 } 3634 3635 3636 3637 3638 /* 3639 * ciCompare() 3640 * 3641 * function: compare search key with an (leaf/internal) entry 3642 * 3643 * return: 3644 * < 0 if k is < record 3645 * = 0 if k is = record 3646 * > 0 if k is > record 3647 */ 3648 static int ciCompare(struct component_name * key, /* search key */ 3649 dtpage_t * p, /* directory page */ 3650 int si, /* entry slot index */ 3651 int flag) 3652 { 3653 wchar_t *kname, x; 3654 __le16 *name; 3655 int klen, namlen, len, rc; 3656 struct ldtentry *lh; 3657 struct idtentry *ih; 3658 struct dtslot *t; 3659 int i; 3660 3661 /* 3662 * force the left-most key on internal pages, at any level of 3663 * the tree, to be less than any search key. 3664 * this obviates having to update the leftmost key on an internal 3665 * page when the user inserts a new key in the tree smaller than 3666 * anything that has been stored. 3667 * 3668 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3669 * at any internal page at any level of the tree, 3670 * it descends to child of the entry anyway - 3671 * ? make the entry as min size dummy entry) 3672 * 3673 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3674 * return (1); 3675 */ 3676 3677 kname = key->name; 3678 klen = key->namlen; 3679 3680 /* 3681 * leaf page entry 3682 */ 3683 if (p->header.flag & BT_LEAF) { 3684 lh = (struct ldtentry *) & p->slot[si]; 3685 si = lh->next; 3686 name = lh->name; 3687 namlen = lh->namlen; 3688 if (flag & JFS_DIR_INDEX) 3689 len = min(namlen, DTLHDRDATALEN); 3690 else 3691 len = min(namlen, DTLHDRDATALEN_LEGACY); 3692 } 3693 /* 3694 * internal page entry 3695 */ 3696 else { 3697 ih = (struct idtentry *) & p->slot[si]; 3698 si = ih->next; 3699 name = ih->name; 3700 namlen = ih->namlen; 3701 len = min(namlen, DTIHDRDATALEN); 3702 } 3703 3704 /* compare with head/only segment */ 3705 len = min(klen, len); 3706 for (i = 0; i < len; i++, kname++, name++) { 3707 /* only uppercase if case-insensitive support is on */ 3708 if ((flag & JFS_OS2) == JFS_OS2) 3709 x = UniToupper(le16_to_cpu(*name)); 3710 else 3711 x = le16_to_cpu(*name); 3712 if ((rc = *kname - x)) 3713 return rc; 3714 } 3715 3716 klen -= len; 3717 namlen -= len; 3718 3719 /* compare with additional segment(s) */ 3720 while (klen > 0 && namlen > 0) { 3721 /* compare with next name segment */ 3722 t = (struct dtslot *) & p->slot[si]; 3723 len = min(namlen, DTSLOTDATALEN); 3724 len = min(klen, len); 3725 name = t->name; 3726 for (i = 0; i < len; i++, kname++, name++) { 3727 /* only uppercase if case-insensitive support is on */ 3728 if ((flag & JFS_OS2) == JFS_OS2) 3729 x = UniToupper(le16_to_cpu(*name)); 3730 else 3731 x = le16_to_cpu(*name); 3732 3733 if ((rc = *kname - x)) 3734 return rc; 3735 } 3736 3737 klen -= len; 3738 namlen -= len; 3739 si = t->next; 3740 } 3741 3742 return (klen - namlen); 3743 } 3744 3745 3746 /* 3747 * ciGetLeafPrefixKey() 3748 * 3749 * function: compute prefix of suffix compression 3750 * from two adjacent leaf entries 3751 * across page boundary 3752 * 3753 * return: non-zero on error 3754 * 3755 */ 3756 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 3757 int ri, struct component_name * key, int flag) 3758 { 3759 int klen, namlen; 3760 wchar_t *pl, *pr, *kname; 3761 struct component_name lkey; 3762 struct component_name rkey; 3763 3764 lkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), 3765 GFP_KERNEL); 3766 if (lkey.name == NULL) 3767 return -ENOSPC; 3768 3769 rkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), 3770 GFP_KERNEL); 3771 if (rkey.name == NULL) { 3772 kfree(lkey.name); 3773 return -ENOSPC; 3774 } 3775 3776 /* get left and right key */ 3777 dtGetKey(lp, li, &lkey, flag); 3778 lkey.name[lkey.namlen] = 0; 3779 3780 if ((flag & JFS_OS2) == JFS_OS2) 3781 ciToUpper(&lkey); 3782 3783 dtGetKey(rp, ri, &rkey, flag); 3784 rkey.name[rkey.namlen] = 0; 3785 3786 3787 if ((flag & JFS_OS2) == JFS_OS2) 3788 ciToUpper(&rkey); 3789 3790 /* compute prefix */ 3791 klen = 0; 3792 kname = key->name; 3793 namlen = min(lkey.namlen, rkey.namlen); 3794 for (pl = lkey.name, pr = rkey.name; 3795 namlen; pl++, pr++, namlen--, klen++, kname++) { 3796 *kname = *pr; 3797 if (*pl != *pr) { 3798 key->namlen = klen + 1; 3799 goto free_names; 3800 } 3801 } 3802 3803 /* l->namlen <= r->namlen since l <= r */ 3804 if (lkey.namlen < rkey.namlen) { 3805 *kname = *pr; 3806 key->namlen = klen + 1; 3807 } else /* l->namelen == r->namelen */ 3808 key->namlen = klen; 3809 3810 free_names: 3811 kfree(lkey.name); 3812 kfree(rkey.name); 3813 return 0; 3814 } 3815 3816 3817 3818 /* 3819 * dtGetKey() 3820 * 3821 * function: get key of the entry 3822 */ 3823 static void dtGetKey(dtpage_t * p, int i, /* entry index */ 3824 struct component_name * key, int flag) 3825 { 3826 int si; 3827 s8 *stbl; 3828 struct ldtentry *lh; 3829 struct idtentry *ih; 3830 struct dtslot *t; 3831 int namlen, len; 3832 wchar_t *kname; 3833 __le16 *name; 3834 3835 /* get entry */ 3836 stbl = DT_GETSTBL(p); 3837 si = stbl[i]; 3838 if (p->header.flag & BT_LEAF) { 3839 lh = (struct ldtentry *) & p->slot[si]; 3840 si = lh->next; 3841 namlen = lh->namlen; 3842 name = lh->name; 3843 if (flag & JFS_DIR_INDEX) 3844 len = min(namlen, DTLHDRDATALEN); 3845 else 3846 len = min(namlen, DTLHDRDATALEN_LEGACY); 3847 } else { 3848 ih = (struct idtentry *) & p->slot[si]; 3849 si = ih->next; 3850 namlen = ih->namlen; 3851 name = ih->name; 3852 len = min(namlen, DTIHDRDATALEN); 3853 } 3854 3855 key->namlen = namlen; 3856 kname = key->name; 3857 3858 /* 3859 * move head/only segment 3860 */ 3861 UniStrncpy_from_le(kname, name, len); 3862 3863 /* 3864 * move additional segment(s) 3865 */ 3866 while (si >= 0) { 3867 /* get next segment */ 3868 t = &p->slot[si]; 3869 kname += len; 3870 namlen -= len; 3871 len = min(namlen, DTSLOTDATALEN); 3872 UniStrncpy_from_le(kname, t->name, len); 3873 3874 si = t->next; 3875 } 3876 } 3877 3878 3879 /* 3880 * dtInsertEntry() 3881 * 3882 * function: allocate free slot(s) and 3883 * write a leaf/internal entry 3884 * 3885 * return: entry slot index 3886 */ 3887 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 3888 ddata_t * data, struct dt_lock ** dtlock) 3889 { 3890 struct dtslot *h, *t; 3891 struct ldtentry *lh = NULL; 3892 struct idtentry *ih = NULL; 3893 int hsi, fsi, klen, len, nextindex; 3894 wchar_t *kname; 3895 __le16 *name; 3896 s8 *stbl; 3897 pxd_t *xd; 3898 struct dt_lock *dtlck = *dtlock; 3899 struct lv *lv; 3900 int xsi, n; 3901 s64 bn = 0; 3902 struct metapage *mp = NULL; 3903 3904 klen = key->namlen; 3905 kname = key->name; 3906 3907 /* allocate a free slot */ 3908 hsi = fsi = p->header.freelist; 3909 h = &p->slot[fsi]; 3910 p->header.freelist = h->next; 3911 --p->header.freecnt; 3912 3913 /* open new linelock */ 3914 if (dtlck->index >= dtlck->maxcnt) 3915 dtlck = (struct dt_lock *) txLinelock(dtlck); 3916 3917 lv = & dtlck->lv[dtlck->index]; 3918 lv->offset = hsi; 3919 3920 /* write head/only segment */ 3921 if (p->header.flag & BT_LEAF) { 3922 lh = (struct ldtentry *) h; 3923 lh->next = h->next; 3924 lh->inumber = cpu_to_le32(data->leaf.ino); 3925 lh->namlen = klen; 3926 name = lh->name; 3927 if (data->leaf.ip) { 3928 len = min(klen, DTLHDRDATALEN); 3929 if (!(p->header.flag & BT_ROOT)) 3930 bn = addressPXD(&p->header.self); 3931 lh->index = cpu_to_le32(add_index(data->leaf.tid, 3932 data->leaf.ip, 3933 bn, index)); 3934 } else 3935 len = min(klen, DTLHDRDATALEN_LEGACY); 3936 } else { 3937 ih = (struct idtentry *) h; 3938 ih->next = h->next; 3939 xd = (pxd_t *) ih; 3940 *xd = data->xd; 3941 ih->namlen = klen; 3942 name = ih->name; 3943 len = min(klen, DTIHDRDATALEN); 3944 } 3945 3946 UniStrncpy_to_le(name, kname, len); 3947 3948 n = 1; 3949 xsi = hsi; 3950 3951 /* write additional segment(s) */ 3952 t = h; 3953 klen -= len; 3954 while (klen) { 3955 /* get free slot */ 3956 fsi = p->header.freelist; 3957 t = &p->slot[fsi]; 3958 p->header.freelist = t->next; 3959 --p->header.freecnt; 3960 3961 /* is next slot contiguous ? */ 3962 if (fsi != xsi + 1) { 3963 /* close current linelock */ 3964 lv->length = n; 3965 dtlck->index++; 3966 3967 /* open new linelock */ 3968 if (dtlck->index < dtlck->maxcnt) 3969 lv++; 3970 else { 3971 dtlck = (struct dt_lock *) txLinelock(dtlck); 3972 lv = & dtlck->lv[0]; 3973 } 3974 3975 lv->offset = fsi; 3976 n = 0; 3977 } 3978 3979 kname += len; 3980 len = min(klen, DTSLOTDATALEN); 3981 UniStrncpy_to_le(t->name, kname, len); 3982 3983 n++; 3984 xsi = fsi; 3985 klen -= len; 3986 } 3987 3988 /* close current linelock */ 3989 lv->length = n; 3990 dtlck->index++; 3991 3992 *dtlock = dtlck; 3993 3994 /* terminate last/only segment */ 3995 if (h == t) { 3996 /* single segment entry */ 3997 if (p->header.flag & BT_LEAF) 3998 lh->next = -1; 3999 else 4000 ih->next = -1; 4001 } else 4002 /* multi-segment entry */ 4003 t->next = -1; 4004 4005 /* if insert into middle, shift right succeeding entries in stbl */ 4006 stbl = DT_GETSTBL(p); 4007 nextindex = p->header.nextindex; 4008 if (index < nextindex) { 4009 memmove(stbl + index + 1, stbl + index, nextindex - index); 4010 4011 if ((p->header.flag & BT_LEAF) && data->leaf.ip) { 4012 s64 lblock; 4013 4014 /* 4015 * Need to update slot number for entries that moved 4016 * in the stbl 4017 */ 4018 mp = NULL; 4019 for (n = index + 1; n <= nextindex; n++) { 4020 lh = (struct ldtentry *) & (p->slot[stbl[n]]); 4021 modify_index(data->leaf.tid, data->leaf.ip, 4022 le32_to_cpu(lh->index), bn, n, 4023 &mp, &lblock); 4024 } 4025 if (mp) 4026 release_metapage(mp); 4027 } 4028 } 4029 4030 stbl[index] = hsi; 4031 4032 /* advance next available entry index of stbl */ 4033 ++p->header.nextindex; 4034 } 4035 4036 4037 /* 4038 * dtMoveEntry() 4039 * 4040 * function: move entries from split/left page to new/right page 4041 * 4042 * nextindex of dst page and freelist/freecnt of both pages 4043 * are updated. 4044 */ 4045 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 4046 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 4047 int do_index) 4048 { 4049 int ssi, next; /* src slot index */ 4050 int di; /* dst entry index */ 4051 int dsi; /* dst slot index */ 4052 s8 *sstbl, *dstbl; /* sorted entry table */ 4053 int snamlen, len; 4054 struct ldtentry *slh, *dlh = NULL; 4055 struct idtentry *sih, *dih = NULL; 4056 struct dtslot *h, *s, *d; 4057 struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock; 4058 struct lv *slv, *dlv; 4059 int xssi, ns, nd; 4060 int sfsi; 4061 4062 sstbl = (s8 *) & sp->slot[sp->header.stblindex]; 4063 dstbl = (s8 *) & dp->slot[dp->header.stblindex]; 4064 4065 dsi = dp->header.freelist; /* first (whole page) free slot */ 4066 sfsi = sp->header.freelist; 4067 4068 /* linelock destination entry slot */ 4069 dlv = & ddtlck->lv[ddtlck->index]; 4070 dlv->offset = dsi; 4071 4072 /* linelock source entry slot */ 4073 slv = & sdtlck->lv[sdtlck->index]; 4074 slv->offset = sstbl[si]; 4075 xssi = slv->offset - 1; 4076 4077 /* 4078 * move entries 4079 */ 4080 ns = nd = 0; 4081 for (di = 0; si < sp->header.nextindex; si++, di++) { 4082 ssi = sstbl[si]; 4083 dstbl[di] = dsi; 4084 4085 /* is next slot contiguous ? */ 4086 if (ssi != xssi + 1) { 4087 /* close current linelock */ 4088 slv->length = ns; 4089 sdtlck->index++; 4090 4091 /* open new linelock */ 4092 if (sdtlck->index < sdtlck->maxcnt) 4093 slv++; 4094 else { 4095 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 4096 slv = & sdtlck->lv[0]; 4097 } 4098 4099 slv->offset = ssi; 4100 ns = 0; 4101 } 4102 4103 /* 4104 * move head/only segment of an entry 4105 */ 4106 /* get dst slot */ 4107 h = d = &dp->slot[dsi]; 4108 4109 /* get src slot and move */ 4110 s = &sp->slot[ssi]; 4111 if (sp->header.flag & BT_LEAF) { 4112 /* get source entry */ 4113 slh = (struct ldtentry *) s; 4114 dlh = (struct ldtentry *) h; 4115 snamlen = slh->namlen; 4116 4117 if (do_index) { 4118 len = min(snamlen, DTLHDRDATALEN); 4119 dlh->index = slh->index; /* little-endian */ 4120 } else 4121 len = min(snamlen, DTLHDRDATALEN_LEGACY); 4122 4123 memcpy(dlh, slh, 6 + len * 2); 4124 4125 next = slh->next; 4126 4127 /* update dst head/only segment next field */ 4128 dsi++; 4129 dlh->next = dsi; 4130 } else { 4131 sih = (struct idtentry *) s; 4132 snamlen = sih->namlen; 4133 4134 len = min(snamlen, DTIHDRDATALEN); 4135 dih = (struct idtentry *) h; 4136 memcpy(dih, sih, 10 + len * 2); 4137 next = sih->next; 4138 4139 dsi++; 4140 dih->next = dsi; 4141 } 4142 4143 /* free src head/only segment */ 4144 s->next = sfsi; 4145 s->cnt = 1; 4146 sfsi = ssi; 4147 4148 ns++; 4149 nd++; 4150 xssi = ssi; 4151 4152 /* 4153 * move additional segment(s) of the entry 4154 */ 4155 snamlen -= len; 4156 while ((ssi = next) >= 0) { 4157 /* is next slot contiguous ? */ 4158 if (ssi != xssi + 1) { 4159 /* close current linelock */ 4160 slv->length = ns; 4161 sdtlck->index++; 4162 4163 /* open new linelock */ 4164 if (sdtlck->index < sdtlck->maxcnt) 4165 slv++; 4166 else { 4167 sdtlck = 4168 (struct dt_lock *) 4169 txLinelock(sdtlck); 4170 slv = & sdtlck->lv[0]; 4171 } 4172 4173 slv->offset = ssi; 4174 ns = 0; 4175 } 4176 4177 /* get next source segment */ 4178 s = &sp->slot[ssi]; 4179 4180 /* get next destination free slot */ 4181 d++; 4182 4183 len = min(snamlen, DTSLOTDATALEN); 4184 UniStrncpy_le(d->name, s->name, len); 4185 4186 ns++; 4187 nd++; 4188 xssi = ssi; 4189 4190 dsi++; 4191 d->next = dsi; 4192 4193 /* free source segment */ 4194 next = s->next; 4195 s->next = sfsi; 4196 s->cnt = 1; 4197 sfsi = ssi; 4198 4199 snamlen -= len; 4200 } /* end while */ 4201 4202 /* terminate dst last/only segment */ 4203 if (h == d) { 4204 /* single segment entry */ 4205 if (dp->header.flag & BT_LEAF) 4206 dlh->next = -1; 4207 else 4208 dih->next = -1; 4209 } else 4210 /* multi-segment entry */ 4211 d->next = -1; 4212 } /* end for */ 4213 4214 /* close current linelock */ 4215 slv->length = ns; 4216 sdtlck->index++; 4217 *sdtlock = sdtlck; 4218 4219 dlv->length = nd; 4220 ddtlck->index++; 4221 *ddtlock = ddtlck; 4222 4223 /* update source header */ 4224 sp->header.freelist = sfsi; 4225 sp->header.freecnt += nd; 4226 4227 /* update destination header */ 4228 dp->header.nextindex = di; 4229 4230 dp->header.freelist = dsi; 4231 dp->header.freecnt -= nd; 4232 } 4233 4234 4235 /* 4236 * dtDeleteEntry() 4237 * 4238 * function: free a (leaf/internal) entry 4239 * 4240 * log freelist header, stbl, and each segment slot of entry 4241 * (even though last/only segment next field is modified, 4242 * physical image logging requires all segment slots of 4243 * the entry logged to avoid applying previous updates 4244 * to the same slots) 4245 */ 4246 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock) 4247 { 4248 int fsi; /* free entry slot index */ 4249 s8 *stbl; 4250 struct dtslot *t; 4251 int si, freecnt; 4252 struct dt_lock *dtlck = *dtlock; 4253 struct lv *lv; 4254 int xsi, n; 4255 4256 /* get free entry slot index */ 4257 stbl = DT_GETSTBL(p); 4258 fsi = stbl[fi]; 4259 4260 /* open new linelock */ 4261 if (dtlck->index >= dtlck->maxcnt) 4262 dtlck = (struct dt_lock *) txLinelock(dtlck); 4263 lv = & dtlck->lv[dtlck->index]; 4264 4265 lv->offset = fsi; 4266 4267 /* get the head/only segment */ 4268 t = &p->slot[fsi]; 4269 if (p->header.flag & BT_LEAF) 4270 si = ((struct ldtentry *) t)->next; 4271 else 4272 si = ((struct idtentry *) t)->next; 4273 t->next = si; 4274 t->cnt = 1; 4275 4276 n = freecnt = 1; 4277 xsi = fsi; 4278 4279 /* find the last/only segment */ 4280 while (si >= 0) { 4281 /* is next slot contiguous ? */ 4282 if (si != xsi + 1) { 4283 /* close current linelock */ 4284 lv->length = n; 4285 dtlck->index++; 4286 4287 /* open new linelock */ 4288 if (dtlck->index < dtlck->maxcnt) 4289 lv++; 4290 else { 4291 dtlck = (struct dt_lock *) txLinelock(dtlck); 4292 lv = & dtlck->lv[0]; 4293 } 4294 4295 lv->offset = si; 4296 n = 0; 4297 } 4298 4299 n++; 4300 xsi = si; 4301 freecnt++; 4302 4303 t = &p->slot[si]; 4304 t->cnt = 1; 4305 si = t->next; 4306 } 4307 4308 /* close current linelock */ 4309 lv->length = n; 4310 dtlck->index++; 4311 4312 *dtlock = dtlck; 4313 4314 /* update freelist */ 4315 t->next = p->header.freelist; 4316 p->header.freelist = fsi; 4317 p->header.freecnt += freecnt; 4318 4319 /* if delete from middle, 4320 * shift left the succedding entries in the stbl 4321 */ 4322 si = p->header.nextindex; 4323 if (fi < si - 1) 4324 memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1); 4325 4326 p->header.nextindex--; 4327 } 4328 4329 4330 /* 4331 * dtTruncateEntry() 4332 * 4333 * function: truncate a (leaf/internal) entry 4334 * 4335 * log freelist header, stbl, and each segment slot of entry 4336 * (even though last/only segment next field is modified, 4337 * physical image logging requires all segment slots of 4338 * the entry logged to avoid applying previous updates 4339 * to the same slots) 4340 */ 4341 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock) 4342 { 4343 int tsi; /* truncate entry slot index */ 4344 s8 *stbl; 4345 struct dtslot *t; 4346 int si, freecnt; 4347 struct dt_lock *dtlck = *dtlock; 4348 struct lv *lv; 4349 int fsi, xsi, n; 4350 4351 /* get free entry slot index */ 4352 stbl = DT_GETSTBL(p); 4353 tsi = stbl[ti]; 4354 4355 /* open new linelock */ 4356 if (dtlck->index >= dtlck->maxcnt) 4357 dtlck = (struct dt_lock *) txLinelock(dtlck); 4358 lv = & dtlck->lv[dtlck->index]; 4359 4360 lv->offset = tsi; 4361 4362 /* get the head/only segment */ 4363 t = &p->slot[tsi]; 4364 ASSERT(p->header.flag & BT_INTERNAL); 4365 ((struct idtentry *) t)->namlen = 0; 4366 si = ((struct idtentry *) t)->next; 4367 ((struct idtentry *) t)->next = -1; 4368 4369 n = 1; 4370 freecnt = 0; 4371 fsi = si; 4372 xsi = tsi; 4373 4374 /* find the last/only segment */ 4375 while (si >= 0) { 4376 /* is next slot contiguous ? */ 4377 if (si != xsi + 1) { 4378 /* close current linelock */ 4379 lv->length = n; 4380 dtlck->index++; 4381 4382 /* open new linelock */ 4383 if (dtlck->index < dtlck->maxcnt) 4384 lv++; 4385 else { 4386 dtlck = (struct dt_lock *) txLinelock(dtlck); 4387 lv = & dtlck->lv[0]; 4388 } 4389 4390 lv->offset = si; 4391 n = 0; 4392 } 4393 4394 n++; 4395 xsi = si; 4396 freecnt++; 4397 4398 t = &p->slot[si]; 4399 t->cnt = 1; 4400 si = t->next; 4401 } 4402 4403 /* close current linelock */ 4404 lv->length = n; 4405 dtlck->index++; 4406 4407 *dtlock = dtlck; 4408 4409 /* update freelist */ 4410 if (freecnt == 0) 4411 return; 4412 t->next = p->header.freelist; 4413 p->header.freelist = fsi; 4414 p->header.freecnt += freecnt; 4415 } 4416 4417 4418 /* 4419 * dtLinelockFreelist() 4420 */ 4421 static void dtLinelockFreelist(dtpage_t * p, /* directory page */ 4422 int m, /* max slot index */ 4423 struct dt_lock ** dtlock) 4424 { 4425 int fsi; /* free entry slot index */ 4426 struct dtslot *t; 4427 int si; 4428 struct dt_lock *dtlck = *dtlock; 4429 struct lv *lv; 4430 int xsi, n; 4431 4432 /* get free entry slot index */ 4433 fsi = p->header.freelist; 4434 4435 /* open new linelock */ 4436 if (dtlck->index >= dtlck->maxcnt) 4437 dtlck = (struct dt_lock *) txLinelock(dtlck); 4438 lv = & dtlck->lv[dtlck->index]; 4439 4440 lv->offset = fsi; 4441 4442 n = 1; 4443 xsi = fsi; 4444 4445 t = &p->slot[fsi]; 4446 si = t->next; 4447 4448 /* find the last/only segment */ 4449 while (si < m && si >= 0) { 4450 /* is next slot contiguous ? */ 4451 if (si != xsi + 1) { 4452 /* close current linelock */ 4453 lv->length = n; 4454 dtlck->index++; 4455 4456 /* open new linelock */ 4457 if (dtlck->index < dtlck->maxcnt) 4458 lv++; 4459 else { 4460 dtlck = (struct dt_lock *) txLinelock(dtlck); 4461 lv = & dtlck->lv[0]; 4462 } 4463 4464 lv->offset = si; 4465 n = 0; 4466 } 4467 4468 n++; 4469 xsi = si; 4470 4471 t = &p->slot[si]; 4472 si = t->next; 4473 } 4474 4475 /* close current linelock */ 4476 lv->length = n; 4477 dtlck->index++; 4478 4479 *dtlock = dtlck; 4480 } 4481 4482 4483 /* 4484 * NAME: dtModify 4485 * 4486 * FUNCTION: Modify the inode number part of a directory entry 4487 * 4488 * PARAMETERS: 4489 * tid - Transaction id 4490 * ip - Inode of parent directory 4491 * key - Name of entry to be modified 4492 * orig_ino - Original inode number expected in entry 4493 * new_ino - New inode number to put into entry 4494 * flag - JFS_RENAME 4495 * 4496 * RETURNS: 4497 * -ESTALE - If entry found does not match orig_ino passed in 4498 * -ENOENT - If no entry can be found to match key 4499 * 0 - If successfully modified entry 4500 */ 4501 int dtModify(tid_t tid, struct inode *ip, 4502 struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag) 4503 { 4504 int rc; 4505 s64 bn; 4506 struct metapage *mp; 4507 dtpage_t *p; 4508 int index; 4509 struct btstack btstack; 4510 struct tlock *tlck; 4511 struct dt_lock *dtlck; 4512 struct lv *lv; 4513 s8 *stbl; 4514 int entry_si; /* entry slot index */ 4515 struct ldtentry *entry; 4516 4517 /* 4518 * search for the entry to modify: 4519 * 4520 * dtSearch() returns (leaf page pinned, index at which to modify). 4521 */ 4522 if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag))) 4523 return rc; 4524 4525 /* retrieve search result */ 4526 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4527 4528 BT_MARK_DIRTY(mp, ip); 4529 /* 4530 * acquire a transaction lock on the leaf page of named entry 4531 */ 4532 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 4533 dtlck = (struct dt_lock *) & tlck->lock; 4534 4535 /* get slot index of the entry */ 4536 stbl = DT_GETSTBL(p); 4537 entry_si = stbl[index]; 4538 4539 /* linelock entry */ 4540 ASSERT(dtlck->index == 0); 4541 lv = & dtlck->lv[0]; 4542 lv->offset = entry_si; 4543 lv->length = 1; 4544 dtlck->index++; 4545 4546 /* get the head/only segment */ 4547 entry = (struct ldtentry *) & p->slot[entry_si]; 4548 4549 /* substitute the inode number of the entry */ 4550 entry->inumber = cpu_to_le32(new_ino); 4551 4552 /* unpin the leaf page */ 4553 DT_PUTPAGE(mp); 4554 4555 return 0; 4556 } 4557 4558 #ifdef _JFS_DEBUG_DTREE 4559 /* 4560 * dtDisplayTree() 4561 * 4562 * function: traverse forward 4563 */ 4564 int dtDisplayTree(struct inode *ip) 4565 { 4566 int rc; 4567 struct metapage *mp; 4568 dtpage_t *p; 4569 s64 bn, pbn; 4570 int index, lastindex, v, h; 4571 pxd_t *xd; 4572 struct btstack btstack; 4573 struct btframe *btsp; 4574 struct btframe *parent; 4575 u8 *stbl; 4576 int psize = 256; 4577 4578 printk("display B+-tree.\n"); 4579 4580 /* clear stack */ 4581 btsp = btstack.stack; 4582 4583 /* 4584 * start with root 4585 * 4586 * root resides in the inode 4587 */ 4588 bn = 0; 4589 v = h = 0; 4590 4591 /* 4592 * first access of each page: 4593 */ 4594 newPage: 4595 DT_GETPAGE(ip, bn, mp, psize, p, rc); 4596 if (rc) 4597 return rc; 4598 4599 /* process entries forward from first index */ 4600 index = 0; 4601 lastindex = p->header.nextindex - 1; 4602 4603 if (p->header.flag & BT_INTERNAL) { 4604 /* 4605 * first access of each internal page 4606 */ 4607 printf("internal page "); 4608 dtDisplayPage(ip, bn, p); 4609 4610 goto getChild; 4611 } else { /* (p->header.flag & BT_LEAF) */ 4612 4613 /* 4614 * first access of each leaf page 4615 */ 4616 printf("leaf page "); 4617 dtDisplayPage(ip, bn, p); 4618 4619 /* 4620 * process leaf page entries 4621 * 4622 for ( ; index <= lastindex; index++) 4623 { 4624 } 4625 */ 4626 4627 /* unpin the leaf page */ 4628 DT_PUTPAGE(mp); 4629 } 4630 4631 /* 4632 * go back up to the parent page 4633 */ 4634 getParent: 4635 /* pop/restore parent entry for the current child page */ 4636 if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL) 4637 /* current page must have been root */ 4638 return; 4639 4640 /* 4641 * parent page scan completed 4642 */ 4643 if ((index = parent->index) == (lastindex = parent->lastindex)) { 4644 /* go back up to the parent page */ 4645 goto getParent; 4646 } 4647 4648 /* 4649 * parent page has entries remaining 4650 */ 4651 /* get back the parent page */ 4652 bn = parent->bn; 4653 /* v = parent->level; */ 4654 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4655 if (rc) 4656 return rc; 4657 4658 /* get next parent entry */ 4659 index++; 4660 4661 /* 4662 * internal page: go down to child page of current entry 4663 */ 4664 getChild: 4665 /* push/save current parent entry for the child page */ 4666 btsp->bn = pbn = bn; 4667 btsp->index = index; 4668 btsp->lastindex = lastindex; 4669 /* btsp->level = v; */ 4670 /* btsp->node = h; */ 4671 ++btsp; 4672 4673 /* get current entry for the child page */ 4674 stbl = DT_GETSTBL(p); 4675 xd = (pxd_t *) & p->slot[stbl[index]]; 4676 4677 /* 4678 * first access of each internal entry: 4679 */ 4680 4681 /* get child page */ 4682 bn = addressPXD(xd); 4683 psize = lengthPXD(xd) << ip->i_ipmnt->i_l2bsize; 4684 4685 printk("traverse down 0x%Lx[%d]->0x%Lx\n", pbn, index, bn); 4686 v++; 4687 h = index; 4688 4689 /* release parent page */ 4690 DT_PUTPAGE(mp); 4691 4692 /* process the child page */ 4693 goto newPage; 4694 } 4695 4696 4697 /* 4698 * dtDisplayPage() 4699 * 4700 * function: display page 4701 */ 4702 int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p) 4703 { 4704 int rc; 4705 struct metapage *mp; 4706 struct ldtentry *lh; 4707 struct idtentry *ih; 4708 pxd_t *xd; 4709 int i, j; 4710 u8 *stbl; 4711 wchar_t name[JFS_NAME_MAX + 1]; 4712 struct component_name key = { 0, name }; 4713 int freepage = 0; 4714 4715 if (p == NULL) { 4716 freepage = 1; 4717 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4718 if (rc) 4719 return rc; 4720 } 4721 4722 /* display page control */ 4723 printk("bn:0x%Lx flag:0x%08x nextindex:%d\n", 4724 bn, p->header.flag, p->header.nextindex); 4725 4726 /* display entries */ 4727 stbl = DT_GETSTBL(p); 4728 for (i = 0, j = 1; i < p->header.nextindex; i++, j++) { 4729 dtGetKey(p, i, &key, JFS_SBI(ip->i_sb)->mntflag); 4730 key.name[key.namlen] = '\0'; 4731 if (p->header.flag & BT_LEAF) { 4732 lh = (struct ldtentry *) & p->slot[stbl[i]]; 4733 printf("\t[%d] %s:%d", i, key.name, 4734 le32_to_cpu(lh->inumber)); 4735 } else { 4736 ih = (struct idtentry *) & p->slot[stbl[i]]; 4737 xd = (pxd_t *) ih; 4738 bn = addressPXD(xd); 4739 printf("\t[%d] %s:0x%Lx", i, key.name, bn); 4740 } 4741 4742 if (j == 4) { 4743 printf("\n"); 4744 j = 0; 4745 } 4746 } 4747 4748 printf("\n"); 4749 4750 if (freepage) 4751 DT_PUTPAGE(mp); 4752 4753 return 0; 4754 } 4755 #endif /* _JFS_DEBUG_DTREE */ 4756