1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) International Business Machines Corp., 2000-2004 4 */ 5 6 /* 7 * jfs_dtree.c: directory B+-tree manager 8 * 9 * B+-tree with variable length key directory: 10 * 11 * each directory page is structured as an array of 32-byte 12 * directory entry slots initialized as a freelist 13 * to avoid search/compaction of free space at insertion. 14 * when an entry is inserted, a number of slots are allocated 15 * from the freelist as required to store variable length data 16 * of the entry; when the entry is deleted, slots of the entry 17 * are returned to freelist. 18 * 19 * leaf entry stores full name as key and file serial number 20 * (aka inode number) as data. 21 * internal/router entry stores sufffix compressed name 22 * as key and simple extent descriptor as data. 23 * 24 * each directory page maintains a sorted entry index table 25 * which stores the start slot index of sorted entries 26 * to allow binary search on the table. 27 * 28 * directory starts as a root/leaf page in on-disk inode 29 * inline data area. 30 * when it becomes full, it starts a leaf of a external extent 31 * of length of 1 block. each time the first leaf becomes full, 32 * it is extended rather than split (its size is doubled), 33 * until its length becoms 4 KBytes, from then the extent is split 34 * with new 4 Kbyte extent when it becomes full 35 * to reduce external fragmentation of small directories. 36 * 37 * blah, blah, blah, for linear scan of directory in pieces by 38 * readdir(). 39 * 40 * 41 * case-insensitive directory file system 42 * 43 * names are stored in case-sensitive way in leaf entry. 44 * but stored, searched and compared in case-insensitive (uppercase) order 45 * (i.e., both search key and entry key are folded for search/compare): 46 * (note that case-sensitive order is BROKEN in storage, e.g., 47 * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad 48 * 49 * entries which folds to the same key makes up a equivalent class 50 * whose members are stored as contiguous cluster (may cross page boundary) 51 * but whose order is arbitrary and acts as duplicate, e.g., 52 * abc, Abc, aBc, abC) 53 * 54 * once match is found at leaf, requires scan forward/backward 55 * either for, in case-insensitive search, duplicate 56 * or for, in case-sensitive search, for exact match 57 * 58 * router entry must be created/stored in case-insensitive way 59 * in internal entry: 60 * (right most key of left page and left most key of right page 61 * are folded, and its suffix compression is propagated as router 62 * key in parent) 63 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB> 64 * should be made the router key for the split) 65 * 66 * case-insensitive search: 67 * 68 * fold search key; 69 * 70 * case-insensitive search of B-tree: 71 * for internal entry, router key is already folded; 72 * for leaf entry, fold the entry key before comparison. 73 * 74 * if (leaf entry case-insensitive match found) 75 * if (next entry satisfies case-insensitive match) 76 * return EDUPLICATE; 77 * if (prev entry satisfies case-insensitive match) 78 * return EDUPLICATE; 79 * return match; 80 * else 81 * return no match; 82 * 83 * serialization: 84 * target directory inode lock is being held on entry/exit 85 * of all main directory service routines. 86 * 87 * log based recovery: 88 */ 89 90 #include <linux/fs.h> 91 #include <linux/quotaops.h> 92 #include <linux/slab.h> 93 #include "jfs_incore.h" 94 #include "jfs_superblock.h" 95 #include "jfs_filsys.h" 96 #include "jfs_metapage.h" 97 #include "jfs_dmap.h" 98 #include "jfs_unicode.h" 99 #include "jfs_debug.h" 100 101 /* dtree split parameter */ 102 struct dtsplit { 103 struct metapage *mp; 104 s16 index; 105 s16 nslot; 106 struct component_name *key; 107 ddata_t *data; 108 struct pxdlist *pxdlist; 109 }; 110 111 #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) 112 113 /* get page buffer for specified block address */ 114 #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ 115 do { \ 116 BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot); \ 117 if (!(RC)) { \ 118 if ((BN) && !check_dtpage(P)) { \ 119 BT_PUTPAGE(MP); \ 120 jfs_error((IP)->i_sb, \ 121 "DT_GETPAGE: dtree page corrupt\n"); \ 122 MP = NULL; \ 123 RC = -EIO; \ 124 } \ 125 } \ 126 } while (0) 127 128 /* for consistency */ 129 #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) 130 131 #define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 132 BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot) 133 134 /* 135 * forward references 136 */ 137 static int dtSplitUp(tid_t tid, struct inode *ip, 138 struct dtsplit * split, struct btstack * btstack); 139 140 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 141 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp); 142 143 static int dtExtendPage(tid_t tid, struct inode *ip, 144 struct dtsplit * split, struct btstack * btstack); 145 146 static int dtSplitRoot(tid_t tid, struct inode *ip, 147 struct dtsplit * split, struct metapage ** rmpp); 148 149 static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 150 dtpage_t * fp, struct btstack * btstack); 151 152 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p); 153 154 static int dtReadFirst(struct inode *ip, struct btstack * btstack); 155 156 static int dtReadNext(struct inode *ip, 157 loff_t * offset, struct btstack * btstack); 158 159 static int dtCompare(struct component_name * key, dtpage_t * p, int si); 160 161 static int ciCompare(struct component_name * key, dtpage_t * p, int si, 162 int flag); 163 164 static void dtGetKey(dtpage_t * p, int i, struct component_name * key, 165 int flag); 166 167 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 168 int ri, struct component_name * key, int flag); 169 170 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 171 ddata_t * data, struct dt_lock **); 172 173 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 174 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 175 int do_index); 176 177 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock); 178 179 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock); 180 181 static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock); 182 183 #define ciToUpper(c) UniStrupr((c)->name) 184 185 /* 186 * read_index_page() 187 * 188 * Reads a page of a directory's index table. 189 * Having metadata mapped into the directory inode's address space 190 * presents a multitude of problems. We avoid this by mapping to 191 * the absolute address space outside of the *_metapage routines 192 */ 193 static struct metapage *read_index_page(struct inode *inode, s64 blkno) 194 { 195 int rc; 196 s64 xaddr; 197 int xflag; 198 s32 xlen; 199 200 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 201 if (rc || (xaddr == 0)) 202 return NULL; 203 204 return read_metapage(inode, xaddr, PSIZE, 1); 205 } 206 207 /* 208 * get_index_page() 209 * 210 * Same as get_index_page(), but get's a new page without reading 211 */ 212 static struct metapage *get_index_page(struct inode *inode, s64 blkno) 213 { 214 int rc; 215 s64 xaddr; 216 int xflag; 217 s32 xlen; 218 219 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 220 if (rc || (xaddr == 0)) 221 return NULL; 222 223 return get_metapage(inode, xaddr, PSIZE, 1); 224 } 225 226 /* 227 * find_index() 228 * 229 * Returns dtree page containing directory table entry for specified 230 * index and pointer to its entry. 231 * 232 * mp must be released by caller. 233 */ 234 static struct dir_table_slot *find_index(struct inode *ip, u32 index, 235 struct metapage ** mp, s64 *lblock) 236 { 237 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 238 s64 blkno; 239 s64 offset; 240 int page_offset; 241 struct dir_table_slot *slot; 242 static int maxWarnings = 10; 243 244 if (index < 2) { 245 if (maxWarnings) { 246 jfs_warn("find_entry called with index = %d", index); 247 maxWarnings--; 248 } 249 return NULL; 250 } 251 252 if (index >= jfs_ip->next_index) { 253 jfs_warn("find_entry called with index >= next_index"); 254 return NULL; 255 } 256 257 if (jfs_dirtable_inline(ip)) { 258 /* 259 * Inline directory table 260 */ 261 *mp = NULL; 262 slot = &jfs_ip->i_dirtable[index - 2]; 263 } else { 264 offset = (index - 2) * sizeof(struct dir_table_slot); 265 page_offset = offset & (PSIZE - 1); 266 blkno = ((offset + 1) >> L2PSIZE) << 267 JFS_SBI(ip->i_sb)->l2nbperpage; 268 269 if (*mp && (*lblock != blkno)) { 270 release_metapage(*mp); 271 *mp = NULL; 272 } 273 if (!(*mp)) { 274 *lblock = blkno; 275 *mp = read_index_page(ip, blkno); 276 } 277 if (!(*mp)) { 278 jfs_err("free_index: error reading directory table"); 279 return NULL; 280 } 281 282 slot = 283 (struct dir_table_slot *) ((char *) (*mp)->data + 284 page_offset); 285 } 286 return slot; 287 } 288 289 static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, 290 u32 index) 291 { 292 struct tlock *tlck; 293 struct linelock *llck; 294 struct lv *lv; 295 296 tlck = txLock(tid, ip, mp, tlckDATA); 297 llck = (struct linelock *) tlck->lock; 298 299 if (llck->index >= llck->maxcnt) 300 llck = txLinelock(llck); 301 lv = &llck->lv[llck->index]; 302 303 /* 304 * Linelock slot size is twice the size of directory table 305 * slot size. 512 entries per page. 306 */ 307 lv->offset = ((index - 2) & 511) >> 1; 308 lv->length = 1; 309 llck->index++; 310 } 311 312 /* 313 * add_index() 314 * 315 * Adds an entry to the directory index table. This is used to provide 316 * each directory entry with a persistent index in which to resume 317 * directory traversals 318 */ 319 static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) 320 { 321 struct super_block *sb = ip->i_sb; 322 struct jfs_sb_info *sbi = JFS_SBI(sb); 323 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 324 u64 blkno; 325 struct dir_table_slot *dirtab_slot; 326 u32 index; 327 struct linelock *llck; 328 struct lv *lv; 329 struct metapage *mp; 330 s64 offset; 331 uint page_offset; 332 struct tlock *tlck; 333 s64 xaddr; 334 335 ASSERT(DO_INDEX(ip)); 336 337 if (jfs_ip->next_index < 2) { 338 jfs_warn("add_index: next_index = %d. Resetting!", 339 jfs_ip->next_index); 340 jfs_ip->next_index = 2; 341 } 342 343 index = jfs_ip->next_index++; 344 345 if (index <= MAX_INLINE_DIRTABLE_ENTRY) { 346 /* 347 * i_size reflects size of index table, or 8 bytes per entry. 348 */ 349 ip->i_size = (loff_t) (index - 1) << 3; 350 351 /* 352 * dir table fits inline within inode 353 */ 354 dirtab_slot = &jfs_ip->i_dirtable[index-2]; 355 dirtab_slot->flag = DIR_INDEX_VALID; 356 dirtab_slot->slot = slot; 357 DTSaddress(dirtab_slot, bn); 358 359 set_cflag(COMMIT_Dirtable, ip); 360 361 return index; 362 } 363 if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) { 364 struct dir_table_slot temp_table[12]; 365 366 /* 367 * It's time to move the inline table to an external 368 * page and begin to build the xtree 369 */ 370 if (dquot_alloc_block(ip, sbi->nbperpage)) 371 goto clean_up; 372 if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { 373 dquot_free_block(ip, sbi->nbperpage); 374 goto clean_up; 375 } 376 377 /* 378 * Save the table, we're going to overwrite it with the 379 * xtree root 380 */ 381 memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table)); 382 383 /* 384 * Initialize empty x-tree 385 */ 386 xtInitRoot(tid, ip); 387 388 /* 389 * Add the first block to the xtree 390 */ 391 if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) { 392 /* This really shouldn't fail */ 393 jfs_warn("add_index: xtInsert failed!"); 394 memcpy(&jfs_ip->i_dirtable, temp_table, 395 sizeof (temp_table)); 396 dbFree(ip, xaddr, sbi->nbperpage); 397 dquot_free_block(ip, sbi->nbperpage); 398 goto clean_up; 399 } 400 ip->i_size = PSIZE; 401 402 mp = get_index_page(ip, 0); 403 if (!mp) { 404 jfs_err("add_index: get_metapage failed!"); 405 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 406 memcpy(&jfs_ip->i_dirtable, temp_table, 407 sizeof (temp_table)); 408 goto clean_up; 409 } 410 tlck = txLock(tid, ip, mp, tlckDATA); 411 llck = (struct linelock *) & tlck->lock; 412 ASSERT(llck->index == 0); 413 lv = &llck->lv[0]; 414 415 lv->offset = 0; 416 lv->length = 6; /* tlckDATA slot size is 16 bytes */ 417 llck->index++; 418 419 memcpy(mp->data, temp_table, sizeof(temp_table)); 420 421 mark_metapage_dirty(mp); 422 release_metapage(mp); 423 424 /* 425 * Logging is now directed by xtree tlocks 426 */ 427 clear_cflag(COMMIT_Dirtable, ip); 428 } 429 430 offset = (index - 2) * sizeof(struct dir_table_slot); 431 page_offset = offset & (PSIZE - 1); 432 blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage; 433 if (page_offset == 0) { 434 /* 435 * This will be the beginning of a new page 436 */ 437 xaddr = 0; 438 if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) { 439 jfs_warn("add_index: xtInsert failed!"); 440 goto clean_up; 441 } 442 ip->i_size += PSIZE; 443 444 if ((mp = get_index_page(ip, blkno))) 445 memset(mp->data, 0, PSIZE); /* Just looks better */ 446 else 447 xtTruncate(tid, ip, offset, COMMIT_PWMAP); 448 } else 449 mp = read_index_page(ip, blkno); 450 451 if (!mp) { 452 jfs_err("add_index: get/read_metapage failed!"); 453 goto clean_up; 454 } 455 456 lock_index(tid, ip, mp, index); 457 458 dirtab_slot = 459 (struct dir_table_slot *) ((char *) mp->data + page_offset); 460 dirtab_slot->flag = DIR_INDEX_VALID; 461 dirtab_slot->slot = slot; 462 DTSaddress(dirtab_slot, bn); 463 464 mark_metapage_dirty(mp); 465 release_metapage(mp); 466 467 return index; 468 469 clean_up: 470 471 jfs_ip->next_index--; 472 473 return 0; 474 } 475 476 /* 477 * free_index() 478 * 479 * Marks an entry to the directory index table as free. 480 */ 481 static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) 482 { 483 struct dir_table_slot *dirtab_slot; 484 s64 lblock; 485 struct metapage *mp = NULL; 486 487 dirtab_slot = find_index(ip, index, &mp, &lblock); 488 489 if (!dirtab_slot) 490 return; 491 492 dirtab_slot->flag = DIR_INDEX_FREE; 493 dirtab_slot->slot = dirtab_slot->addr1 = 0; 494 dirtab_slot->addr2 = cpu_to_le32(next); 495 496 if (mp) { 497 lock_index(tid, ip, mp, index); 498 mark_metapage_dirty(mp); 499 release_metapage(mp); 500 } else 501 set_cflag(COMMIT_Dirtable, ip); 502 } 503 504 /* 505 * modify_index() 506 * 507 * Changes an entry in the directory index table 508 */ 509 static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, 510 int slot, struct metapage ** mp, s64 *lblock) 511 { 512 struct dir_table_slot *dirtab_slot; 513 514 dirtab_slot = find_index(ip, index, mp, lblock); 515 516 if (!dirtab_slot) 517 return; 518 519 DTSaddress(dirtab_slot, bn); 520 dirtab_slot->slot = slot; 521 522 if (*mp) { 523 lock_index(tid, ip, *mp, index); 524 mark_metapage_dirty(*mp); 525 } else 526 set_cflag(COMMIT_Dirtable, ip); 527 } 528 529 /* 530 * read_index() 531 * 532 * reads a directory table slot 533 */ 534 static int read_index(struct inode *ip, u32 index, 535 struct dir_table_slot * dirtab_slot) 536 { 537 s64 lblock; 538 struct metapage *mp = NULL; 539 struct dir_table_slot *slot; 540 541 slot = find_index(ip, index, &mp, &lblock); 542 if (!slot) { 543 return -EIO; 544 } 545 546 memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot)); 547 548 if (mp) 549 release_metapage(mp); 550 551 return 0; 552 } 553 554 /* 555 * dtSearch() 556 * 557 * function: 558 * Search for the entry with specified key 559 * 560 * parameter: 561 * 562 * return: 0 - search result on stack, leaf page pinned; 563 * errno - I/O error 564 */ 565 int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, 566 struct btstack * btstack, int flag) 567 { 568 int rc = 0; 569 int cmp = 1; /* init for empty page */ 570 s64 bn; 571 struct metapage *mp; 572 dtpage_t *p; 573 s8 *stbl; 574 int base, index, lim; 575 struct btframe *btsp; 576 pxd_t *pxd; 577 int psize = 288; /* initial in-line directory */ 578 ino_t inumber; 579 struct component_name ciKey; 580 struct super_block *sb = ip->i_sb; 581 582 ciKey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 583 GFP_NOFS); 584 if (!ciKey.name) { 585 rc = -ENOMEM; 586 goto dtSearch_Exit2; 587 } 588 589 590 /* uppercase search key for c-i directory */ 591 UniStrcpy(ciKey.name, key->name); 592 ciKey.namlen = key->namlen; 593 594 /* only uppercase if case-insensitive support is on */ 595 if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) { 596 ciToUpper(&ciKey); 597 } 598 BT_CLR(btstack); /* reset stack */ 599 600 /* init level count for max pages to split */ 601 btstack->nsplit = 1; 602 603 /* 604 * search down tree from root: 605 * 606 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 607 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 608 * 609 * if entry with search key K is not found 610 * internal page search find the entry with largest key Ki 611 * less than K which point to the child page to search; 612 * leaf page search find the entry with smallest key Kj 613 * greater than K so that the returned index is the position of 614 * the entry to be shifted right for insertion of new entry. 615 * for empty tree, search key is greater than any key of the tree. 616 * 617 * by convention, root bn = 0. 618 */ 619 for (bn = 0;;) { 620 /* get/pin the page to search */ 621 DT_GETPAGE(ip, bn, mp, psize, p, rc); 622 if (rc) 623 goto dtSearch_Exit1; 624 625 /* get sorted entry table of the page */ 626 stbl = DT_GETSTBL(p); 627 628 /* 629 * binary search with search key K on the current page. 630 */ 631 for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { 632 index = base + (lim >> 1); 633 634 if (stbl[index] < 0) { 635 rc = -EIO; 636 goto out; 637 } 638 639 if (p->header.flag & BT_LEAF) { 640 /* uppercase leaf name to compare */ 641 cmp = 642 ciCompare(&ciKey, p, stbl[index], 643 JFS_SBI(sb)->mntflag); 644 } else { 645 /* router key is in uppercase */ 646 647 cmp = dtCompare(&ciKey, p, stbl[index]); 648 649 650 } 651 if (cmp == 0) { 652 /* 653 * search hit 654 */ 655 /* search hit - leaf page: 656 * return the entry found 657 */ 658 if (p->header.flag & BT_LEAF) { 659 inumber = le32_to_cpu( 660 ((struct ldtentry *) & p->slot[stbl[index]])->inumber); 661 662 /* 663 * search for JFS_LOOKUP 664 */ 665 if (flag == JFS_LOOKUP) { 666 *data = inumber; 667 rc = 0; 668 goto out; 669 } 670 671 /* 672 * search for JFS_CREATE 673 */ 674 if (flag == JFS_CREATE) { 675 *data = inumber; 676 rc = -EEXIST; 677 goto out; 678 } 679 680 /* 681 * search for JFS_REMOVE or JFS_RENAME 682 */ 683 if ((flag == JFS_REMOVE || 684 flag == JFS_RENAME) && 685 *data != inumber) { 686 rc = -ESTALE; 687 goto out; 688 } 689 690 /* 691 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME 692 */ 693 /* save search result */ 694 *data = inumber; 695 btsp = btstack->top; 696 btsp->bn = bn; 697 btsp->index = index; 698 btsp->mp = mp; 699 700 rc = 0; 701 goto dtSearch_Exit1; 702 } 703 704 /* search hit - internal page: 705 * descend/search its child page 706 */ 707 goto getChild; 708 } 709 710 if (cmp > 0) { 711 base = index + 1; 712 --lim; 713 } 714 } 715 716 /* 717 * search miss 718 * 719 * base is the smallest index with key (Kj) greater than 720 * search key (K) and may be zero or (maxindex + 1) index. 721 */ 722 /* 723 * search miss - leaf page 724 * 725 * return location of entry (base) where new entry with 726 * search key K is to be inserted. 727 */ 728 if (p->header.flag & BT_LEAF) { 729 /* 730 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME 731 */ 732 if (flag == JFS_LOOKUP || flag == JFS_REMOVE || 733 flag == JFS_RENAME) { 734 rc = -ENOENT; 735 goto out; 736 } 737 738 /* 739 * search for JFS_CREATE|JFS_FINDDIR: 740 * 741 * save search result 742 */ 743 *data = 0; 744 btsp = btstack->top; 745 btsp->bn = bn; 746 btsp->index = base; 747 btsp->mp = mp; 748 749 rc = 0; 750 goto dtSearch_Exit1; 751 } 752 753 /* 754 * search miss - internal page 755 * 756 * if base is non-zero, decrement base by one to get the parent 757 * entry of the child page to search. 758 */ 759 index = base ? base - 1 : base; 760 761 /* 762 * go down to child page 763 */ 764 getChild: 765 /* update max. number of pages to split */ 766 if (BT_STACK_FULL(btstack)) { 767 /* Something's corrupted, mark filesystem dirty so 768 * chkdsk will fix it. 769 */ 770 jfs_error(sb, "stack overrun!\n"); 771 BT_STACK_DUMP(btstack); 772 rc = -EIO; 773 goto out; 774 } 775 btstack->nsplit++; 776 777 /* push (bn, index) of the parent page/entry */ 778 BT_PUSH(btstack, bn, index); 779 780 /* get the child page block number */ 781 pxd = (pxd_t *) & p->slot[stbl[index]]; 782 bn = addressPXD(pxd); 783 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 784 785 /* unpin the parent page */ 786 DT_PUTPAGE(mp); 787 } 788 789 out: 790 DT_PUTPAGE(mp); 791 792 dtSearch_Exit1: 793 794 kfree(ciKey.name); 795 796 dtSearch_Exit2: 797 798 return rc; 799 } 800 801 802 /* 803 * dtInsert() 804 * 805 * function: insert an entry to directory tree 806 * 807 * parameter: 808 * 809 * return: 0 - success; 810 * errno - failure; 811 */ 812 int dtInsert(tid_t tid, struct inode *ip, 813 struct component_name * name, ino_t * fsn, struct btstack * btstack) 814 { 815 int rc = 0; 816 struct metapage *mp; /* meta-page buffer */ 817 dtpage_t *p; /* base B+-tree index page */ 818 s64 bn; 819 int index; 820 struct dtsplit split; /* split information */ 821 ddata_t data; 822 struct dt_lock *dtlck; 823 int n; 824 struct tlock *tlck; 825 struct lv *lv; 826 827 /* 828 * retrieve search result 829 * 830 * dtSearch() returns (leaf page pinned, index at which to insert). 831 * n.b. dtSearch() may return index of (maxindex + 1) of 832 * the full page. 833 */ 834 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 835 if (p->header.freelist == 0) 836 return -EINVAL; 837 838 /* 839 * insert entry for new key 840 */ 841 if (DO_INDEX(ip)) { 842 if (JFS_IP(ip)->next_index == DIREND) { 843 DT_PUTPAGE(mp); 844 return -EMLINK; 845 } 846 n = NDTLEAF(name->namlen); 847 data.leaf.tid = tid; 848 data.leaf.ip = ip; 849 } else { 850 n = NDTLEAF_LEGACY(name->namlen); 851 data.leaf.ip = NULL; /* signifies legacy directory format */ 852 } 853 data.leaf.ino = *fsn; 854 855 /* 856 * leaf page does not have enough room for new entry: 857 * 858 * extend/split the leaf page; 859 * 860 * dtSplitUp() will insert the entry and unpin the leaf page. 861 */ 862 if (n > p->header.freecnt) { 863 split.mp = mp; 864 split.index = index; 865 split.nslot = n; 866 split.key = name; 867 split.data = &data; 868 rc = dtSplitUp(tid, ip, &split, btstack); 869 return rc; 870 } 871 872 /* 873 * leaf page does have enough room for new entry: 874 * 875 * insert the new data entry into the leaf page; 876 */ 877 BT_MARK_DIRTY(mp, ip); 878 /* 879 * acquire a transaction lock on the leaf page 880 */ 881 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 882 dtlck = (struct dt_lock *) & tlck->lock; 883 ASSERT(dtlck->index == 0); 884 lv = & dtlck->lv[0]; 885 886 /* linelock header */ 887 lv->offset = 0; 888 lv->length = 1; 889 dtlck->index++; 890 891 dtInsertEntry(p, index, name, &data, &dtlck); 892 893 /* linelock stbl of non-root leaf page */ 894 if (!(p->header.flag & BT_ROOT)) { 895 if (dtlck->index >= dtlck->maxcnt) 896 dtlck = (struct dt_lock *) txLinelock(dtlck); 897 lv = & dtlck->lv[dtlck->index]; 898 n = index >> L2DTSLOTSIZE; 899 lv->offset = p->header.stblindex + n; 900 lv->length = 901 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 902 dtlck->index++; 903 } 904 905 /* unpin the leaf page */ 906 DT_PUTPAGE(mp); 907 908 return 0; 909 } 910 911 912 /* 913 * dtSplitUp() 914 * 915 * function: propagate insertion bottom up; 916 * 917 * parameter: 918 * 919 * return: 0 - success; 920 * errno - failure; 921 * leaf page unpinned; 922 */ 923 static int dtSplitUp(tid_t tid, 924 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 925 { 926 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 927 int rc = 0; 928 struct metapage *smp; 929 dtpage_t *sp; /* split page */ 930 struct metapage *rmp; 931 dtpage_t *rp; /* new right page split from sp */ 932 pxd_t rpxd; /* new right page extent descriptor */ 933 struct metapage *lmp; 934 dtpage_t *lp; /* left child page */ 935 int skip; /* index of entry of insertion */ 936 struct btframe *parent; /* parent page entry on traverse stack */ 937 s64 xaddr, nxaddr; 938 int xlen, xsize; 939 struct pxdlist pxdlist; 940 pxd_t *pxd; 941 struct component_name key = { 0, NULL }; 942 ddata_t *data = split->data; 943 int n; 944 struct dt_lock *dtlck; 945 struct tlock *tlck; 946 struct lv *lv; 947 int quota_allocation = 0; 948 949 /* get split page */ 950 smp = split->mp; 951 sp = DT_PAGE(ip, smp); 952 953 key.name = kmalloc_array(JFS_NAME_MAX + 2, sizeof(wchar_t), GFP_NOFS); 954 if (!key.name) { 955 DT_PUTPAGE(smp); 956 rc = -ENOMEM; 957 goto dtSplitUp_Exit; 958 } 959 960 /* 961 * split leaf page 962 * 963 * The split routines insert the new entry, and 964 * acquire txLock as appropriate. 965 */ 966 /* 967 * split root leaf page: 968 */ 969 if (sp->header.flag & BT_ROOT) { 970 /* 971 * allocate a single extent child page 972 */ 973 xlen = 1; 974 n = sbi->bsize >> L2DTSLOTSIZE; 975 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 976 n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */ 977 if (n <= split->nslot) 978 xlen++; 979 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) { 980 DT_PUTPAGE(smp); 981 goto freeKeyName; 982 } 983 984 pxdlist.maxnpxd = 1; 985 pxdlist.npxd = 0; 986 pxd = &pxdlist.pxd[0]; 987 PXDaddress(pxd, xaddr); 988 PXDlength(pxd, xlen); 989 split->pxdlist = &pxdlist; 990 rc = dtSplitRoot(tid, ip, split, &rmp); 991 992 if (rc) 993 dbFree(ip, xaddr, xlen); 994 else 995 DT_PUTPAGE(rmp); 996 997 DT_PUTPAGE(smp); 998 999 if (!DO_INDEX(ip)) 1000 ip->i_size = xlen << sbi->l2bsize; 1001 1002 goto freeKeyName; 1003 } 1004 1005 /* 1006 * extend first leaf page 1007 * 1008 * extend the 1st extent if less than buffer page size 1009 * (dtExtendPage() reurns leaf page unpinned) 1010 */ 1011 pxd = &sp->header.self; 1012 xlen = lengthPXD(pxd); 1013 xsize = xlen << sbi->l2bsize; 1014 if (xsize < PSIZE) { 1015 xaddr = addressPXD(pxd); 1016 n = xsize >> L2DTSLOTSIZE; 1017 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 1018 if ((n + sp->header.freecnt) <= split->nslot) 1019 n = xlen + (xlen << 1); 1020 else 1021 n = xlen; 1022 1023 /* Allocate blocks to quota. */ 1024 rc = dquot_alloc_block(ip, n); 1025 if (rc) 1026 goto extendOut; 1027 quota_allocation += n; 1028 1029 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, 1030 (s64) n, &nxaddr))) 1031 goto extendOut; 1032 1033 pxdlist.maxnpxd = 1; 1034 pxdlist.npxd = 0; 1035 pxd = &pxdlist.pxd[0]; 1036 PXDaddress(pxd, nxaddr); 1037 PXDlength(pxd, xlen + n); 1038 split->pxdlist = &pxdlist; 1039 if ((rc = dtExtendPage(tid, ip, split, btstack))) { 1040 nxaddr = addressPXD(pxd); 1041 if (xaddr != nxaddr) { 1042 /* free relocated extent */ 1043 xlen = lengthPXD(pxd); 1044 dbFree(ip, nxaddr, (s64) xlen); 1045 } else { 1046 /* free extended delta */ 1047 xlen = lengthPXD(pxd) - n; 1048 xaddr = addressPXD(pxd) + xlen; 1049 dbFree(ip, xaddr, (s64) n); 1050 } 1051 } else if (!DO_INDEX(ip)) 1052 ip->i_size = lengthPXD(pxd) << sbi->l2bsize; 1053 1054 1055 extendOut: 1056 DT_PUTPAGE(smp); 1057 goto freeKeyName; 1058 } 1059 1060 /* 1061 * split leaf page <sp> into <sp> and a new right page <rp>. 1062 * 1063 * return <rp> pinned and its extent descriptor <rpxd> 1064 */ 1065 /* 1066 * allocate new directory page extent and 1067 * new index page(s) to cover page split(s) 1068 * 1069 * allocation hint: ? 1070 */ 1071 n = btstack->nsplit; 1072 pxdlist.maxnpxd = pxdlist.npxd = 0; 1073 xlen = sbi->nbperpage; 1074 for (pxd = pxdlist.pxd; n > 0; n--, pxd++) { 1075 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) { 1076 PXDaddress(pxd, xaddr); 1077 PXDlength(pxd, xlen); 1078 pxdlist.maxnpxd++; 1079 continue; 1080 } 1081 1082 DT_PUTPAGE(smp); 1083 1084 /* undo allocation */ 1085 goto splitOut; 1086 } 1087 1088 split->pxdlist = &pxdlist; 1089 if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) { 1090 DT_PUTPAGE(smp); 1091 1092 /* undo allocation */ 1093 goto splitOut; 1094 } 1095 1096 if (!DO_INDEX(ip)) 1097 ip->i_size += PSIZE; 1098 1099 /* 1100 * propagate up the router entry for the leaf page just split 1101 * 1102 * insert a router entry for the new page into the parent page, 1103 * propagate the insert/split up the tree by walking back the stack 1104 * of (bn of parent page, index of child page entry in parent page) 1105 * that were traversed during the search for the page that split. 1106 * 1107 * the propagation of insert/split up the tree stops if the root 1108 * splits or the page inserted into doesn't have to split to hold 1109 * the new entry. 1110 * 1111 * the parent entry for the split page remains the same, and 1112 * a new entry is inserted at its right with the first key and 1113 * block number of the new right page. 1114 * 1115 * There are a maximum of 4 pages pinned at any time: 1116 * two children, left parent and right parent (when the parent splits). 1117 * keep the child pages pinned while working on the parent. 1118 * make sure that all pins are released at exit. 1119 */ 1120 while ((parent = BT_POP(btstack)) != NULL) { 1121 /* parent page specified by stack frame <parent> */ 1122 1123 /* keep current child pages (<lp>, <rp>) pinned */ 1124 lmp = smp; 1125 lp = sp; 1126 1127 /* 1128 * insert router entry in parent for new right child page <rp> 1129 */ 1130 /* get the parent page <sp> */ 1131 DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1132 if (rc) { 1133 DT_PUTPAGE(lmp); 1134 DT_PUTPAGE(rmp); 1135 goto splitOut; 1136 } 1137 1138 /* 1139 * The new key entry goes ONE AFTER the index of parent entry, 1140 * because the split was to the right. 1141 */ 1142 skip = parent->index + 1; 1143 1144 /* 1145 * compute the key for the router entry 1146 * 1147 * key suffix compression: 1148 * for internal pages that have leaf pages as children, 1149 * retain only what's needed to distinguish between 1150 * the new entry and the entry on the page to its left. 1151 * If the keys compare equal, retain the entire key. 1152 * 1153 * note that compression is performed only at computing 1154 * router key at the lowest internal level. 1155 * further compression of the key between pairs of higher 1156 * level internal pages loses too much information and 1157 * the search may fail. 1158 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,} 1159 * results in two adjacent parent entries (a)(xx). 1160 * if split occurs between these two entries, and 1161 * if compression is applied, the router key of parent entry 1162 * of right page (x) will divert search for x into right 1163 * subtree and miss x in the left subtree.) 1164 * 1165 * the entire key must be retained for the next-to-leftmost 1166 * internal key at any level of the tree, or search may fail 1167 * (e.g., ?) 1168 */ 1169 switch (rp->header.flag & BT_TYPE) { 1170 case BT_LEAF: 1171 /* 1172 * compute the length of prefix for suffix compression 1173 * between last entry of left page and first entry 1174 * of right page 1175 */ 1176 if ((sp->header.flag & BT_ROOT && skip > 1) || 1177 sp->header.prev != 0 || skip > 1) { 1178 /* compute uppercase router prefix key */ 1179 rc = ciGetLeafPrefixKey(lp, 1180 lp->header.nextindex-1, 1181 rp, 0, &key, 1182 sbi->mntflag); 1183 if (rc) { 1184 DT_PUTPAGE(lmp); 1185 DT_PUTPAGE(rmp); 1186 DT_PUTPAGE(smp); 1187 goto splitOut; 1188 } 1189 } else { 1190 /* next to leftmost entry of 1191 lowest internal level */ 1192 1193 /* compute uppercase router key */ 1194 dtGetKey(rp, 0, &key, sbi->mntflag); 1195 key.name[key.namlen] = 0; 1196 1197 if ((sbi->mntflag & JFS_OS2) == JFS_OS2) 1198 ciToUpper(&key); 1199 } 1200 1201 n = NDTINTERNAL(key.namlen); 1202 break; 1203 1204 case BT_INTERNAL: 1205 dtGetKey(rp, 0, &key, sbi->mntflag); 1206 n = NDTINTERNAL(key.namlen); 1207 break; 1208 1209 default: 1210 jfs_err("dtSplitUp(): UFO!"); 1211 break; 1212 } 1213 1214 /* unpin left child page */ 1215 DT_PUTPAGE(lmp); 1216 1217 /* 1218 * compute the data for the router entry 1219 */ 1220 data->xd = rpxd; /* child page xd */ 1221 1222 /* 1223 * parent page is full - split the parent page 1224 */ 1225 if (n > sp->header.freecnt) { 1226 /* init for parent page split */ 1227 split->mp = smp; 1228 split->index = skip; /* index at insert */ 1229 split->nslot = n; 1230 split->key = &key; 1231 /* split->data = data; */ 1232 1233 /* unpin right child page */ 1234 DT_PUTPAGE(rmp); 1235 1236 /* The split routines insert the new entry, 1237 * acquire txLock as appropriate. 1238 * return <rp> pinned and its block number <rbn>. 1239 */ 1240 rc = (sp->header.flag & BT_ROOT) ? 1241 dtSplitRoot(tid, ip, split, &rmp) : 1242 dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd); 1243 if (rc) { 1244 DT_PUTPAGE(smp); 1245 goto splitOut; 1246 } 1247 1248 /* smp and rmp are pinned */ 1249 } 1250 /* 1251 * parent page is not full - insert router entry in parent page 1252 */ 1253 else { 1254 BT_MARK_DIRTY(smp, ip); 1255 /* 1256 * acquire a transaction lock on the parent page 1257 */ 1258 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1259 dtlck = (struct dt_lock *) & tlck->lock; 1260 ASSERT(dtlck->index == 0); 1261 lv = & dtlck->lv[0]; 1262 1263 /* linelock header */ 1264 lv->offset = 0; 1265 lv->length = 1; 1266 dtlck->index++; 1267 1268 /* linelock stbl of non-root parent page */ 1269 if (!(sp->header.flag & BT_ROOT)) { 1270 lv++; 1271 n = skip >> L2DTSLOTSIZE; 1272 lv->offset = sp->header.stblindex + n; 1273 lv->length = 1274 ((sp->header.nextindex - 1275 1) >> L2DTSLOTSIZE) - n + 1; 1276 dtlck->index++; 1277 } 1278 1279 dtInsertEntry(sp, skip, &key, data, &dtlck); 1280 1281 /* exit propagate up */ 1282 break; 1283 } 1284 } 1285 1286 /* unpin current split and its right page */ 1287 DT_PUTPAGE(smp); 1288 DT_PUTPAGE(rmp); 1289 1290 /* 1291 * free remaining extents allocated for split 1292 */ 1293 splitOut: 1294 n = pxdlist.npxd; 1295 pxd = &pxdlist.pxd[n]; 1296 for (; n < pxdlist.maxnpxd; n++, pxd++) 1297 dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd)); 1298 1299 freeKeyName: 1300 kfree(key.name); 1301 1302 /* Rollback quota allocation */ 1303 if (rc && quota_allocation) 1304 dquot_free_block(ip, quota_allocation); 1305 1306 dtSplitUp_Exit: 1307 1308 return rc; 1309 } 1310 1311 1312 /* 1313 * dtSplitPage() 1314 * 1315 * function: Split a non-root page of a btree. 1316 * 1317 * parameter: 1318 * 1319 * return: 0 - success; 1320 * errno - failure; 1321 * return split and new page pinned; 1322 */ 1323 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 1324 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp) 1325 { 1326 int rc = 0; 1327 struct metapage *smp; 1328 dtpage_t *sp; 1329 struct metapage *rmp; 1330 dtpage_t *rp; /* new right page allocated */ 1331 s64 rbn; /* new right page block number */ 1332 struct metapage *mp; 1333 dtpage_t *p; 1334 s64 nextbn; 1335 struct pxdlist *pxdlist; 1336 pxd_t *pxd; 1337 int skip, nextindex, half, left, nxt, off, si; 1338 struct ldtentry *ldtentry; 1339 struct idtentry *idtentry; 1340 u8 *stbl; 1341 struct dtslot *f; 1342 int fsi, stblsize; 1343 int n; 1344 struct dt_lock *sdtlck, *rdtlck; 1345 struct tlock *tlck; 1346 struct dt_lock *dtlck; 1347 struct lv *slv, *rlv, *lv; 1348 1349 /* get split page */ 1350 smp = split->mp; 1351 sp = DT_PAGE(ip, smp); 1352 1353 /* 1354 * allocate the new right page for the split 1355 */ 1356 pxdlist = split->pxdlist; 1357 pxd = &pxdlist->pxd[pxdlist->npxd]; 1358 pxdlist->npxd++; 1359 rbn = addressPXD(pxd); 1360 rmp = get_metapage(ip, rbn, PSIZE, 1); 1361 if (rmp == NULL) 1362 return -EIO; 1363 1364 /* Allocate blocks to quota. */ 1365 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 1366 if (rc) { 1367 release_metapage(rmp); 1368 return rc; 1369 } 1370 1371 jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1372 1373 BT_MARK_DIRTY(rmp, ip); 1374 /* 1375 * acquire a transaction lock on the new right page 1376 */ 1377 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1378 rdtlck = (struct dt_lock *) & tlck->lock; 1379 1380 rp = (dtpage_t *) rmp->data; 1381 *rpp = rp; 1382 rp->header.self = *pxd; 1383 1384 BT_MARK_DIRTY(smp, ip); 1385 /* 1386 * acquire a transaction lock on the split page 1387 * 1388 * action: 1389 */ 1390 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1391 sdtlck = (struct dt_lock *) & tlck->lock; 1392 1393 /* linelock header of split page */ 1394 ASSERT(sdtlck->index == 0); 1395 slv = & sdtlck->lv[0]; 1396 slv->offset = 0; 1397 slv->length = 1; 1398 sdtlck->index++; 1399 1400 /* 1401 * initialize/update sibling pointers between sp and rp 1402 */ 1403 nextbn = le64_to_cpu(sp->header.next); 1404 rp->header.next = cpu_to_le64(nextbn); 1405 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1406 sp->header.next = cpu_to_le64(rbn); 1407 1408 /* 1409 * initialize new right page 1410 */ 1411 rp->header.flag = sp->header.flag; 1412 1413 /* compute sorted entry table at start of extent data area */ 1414 rp->header.nextindex = 0; 1415 rp->header.stblindex = 1; 1416 1417 n = PSIZE >> L2DTSLOTSIZE; 1418 rp->header.maxslot = n; 1419 stblsize = (n + 31) >> L2DTSLOTSIZE; /* in unit of slot */ 1420 1421 /* init freelist */ 1422 fsi = rp->header.stblindex + stblsize; 1423 rp->header.freelist = fsi; 1424 rp->header.freecnt = rp->header.maxslot - fsi; 1425 1426 /* 1427 * sequential append at tail: append without split 1428 * 1429 * If splitting the last page on a level because of appending 1430 * a entry to it (skip is maxentry), it's likely that the access is 1431 * sequential. Adding an empty page on the side of the level is less 1432 * work and can push the fill factor much higher than normal. 1433 * If we're wrong it's no big deal, we'll just do the split the right 1434 * way next time. 1435 * (It may look like it's equally easy to do a similar hack for 1436 * reverse sorted data, that is, split the tree left, 1437 * but it's not. Be my guest.) 1438 */ 1439 if (nextbn == 0 && split->index == sp->header.nextindex) { 1440 /* linelock header + stbl (first slot) of new page */ 1441 rlv = & rdtlck->lv[rdtlck->index]; 1442 rlv->offset = 0; 1443 rlv->length = 2; 1444 rdtlck->index++; 1445 1446 /* 1447 * initialize freelist of new right page 1448 */ 1449 f = &rp->slot[fsi]; 1450 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1451 f->next = fsi; 1452 f->next = -1; 1453 1454 /* insert entry at the first entry of the new right page */ 1455 dtInsertEntry(rp, 0, split->key, split->data, &rdtlck); 1456 1457 goto out; 1458 } 1459 1460 /* 1461 * non-sequential insert (at possibly middle page) 1462 */ 1463 1464 /* 1465 * update prev pointer of previous right sibling page; 1466 */ 1467 if (nextbn != 0) { 1468 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1469 if (rc) { 1470 discard_metapage(rmp); 1471 return rc; 1472 } 1473 1474 BT_MARK_DIRTY(mp, ip); 1475 /* 1476 * acquire a transaction lock on the next page 1477 */ 1478 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 1479 jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p", 1480 tlck, ip, mp); 1481 dtlck = (struct dt_lock *) & tlck->lock; 1482 1483 /* linelock header of previous right sibling page */ 1484 lv = & dtlck->lv[dtlck->index]; 1485 lv->offset = 0; 1486 lv->length = 1; 1487 dtlck->index++; 1488 1489 p->header.prev = cpu_to_le64(rbn); 1490 1491 DT_PUTPAGE(mp); 1492 } 1493 1494 /* 1495 * split the data between the split and right pages. 1496 */ 1497 skip = split->index; 1498 half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */ 1499 left = 0; 1500 1501 /* 1502 * compute fill factor for split pages 1503 * 1504 * <nxt> traces the next entry to move to rp 1505 * <off> traces the next entry to stay in sp 1506 */ 1507 stbl = (u8 *) & sp->slot[sp->header.stblindex]; 1508 nextindex = sp->header.nextindex; 1509 for (nxt = off = 0; nxt < nextindex; ++off) { 1510 if (off == skip) 1511 /* check for fill factor with new entry size */ 1512 n = split->nslot; 1513 else { 1514 si = stbl[nxt]; 1515 switch (sp->header.flag & BT_TYPE) { 1516 case BT_LEAF: 1517 ldtentry = (struct ldtentry *) & sp->slot[si]; 1518 if (DO_INDEX(ip)) 1519 n = NDTLEAF(ldtentry->namlen); 1520 else 1521 n = NDTLEAF_LEGACY(ldtentry-> 1522 namlen); 1523 break; 1524 1525 case BT_INTERNAL: 1526 idtentry = (struct idtentry *) & sp->slot[si]; 1527 n = NDTINTERNAL(idtentry->namlen); 1528 break; 1529 1530 default: 1531 break; 1532 } 1533 1534 ++nxt; /* advance to next entry to move in sp */ 1535 } 1536 1537 left += n; 1538 if (left >= half) 1539 break; 1540 } 1541 1542 /* <nxt> poins to the 1st entry to move */ 1543 1544 /* 1545 * move entries to right page 1546 * 1547 * dtMoveEntry() initializes rp and reserves entry for insertion 1548 * 1549 * split page moved out entries are linelocked; 1550 * new/right page moved in entries are linelocked; 1551 */ 1552 /* linelock header + stbl of new right page */ 1553 rlv = & rdtlck->lv[rdtlck->index]; 1554 rlv->offset = 0; 1555 rlv->length = 5; 1556 rdtlck->index++; 1557 1558 dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip)); 1559 1560 sp->header.nextindex = nxt; 1561 1562 /* 1563 * finalize freelist of new right page 1564 */ 1565 fsi = rp->header.freelist; 1566 f = &rp->slot[fsi]; 1567 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1568 f->next = fsi; 1569 f->next = -1; 1570 1571 /* 1572 * Update directory index table for entries now in right page 1573 */ 1574 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1575 s64 lblock; 1576 1577 mp = NULL; 1578 stbl = DT_GETSTBL(rp); 1579 for (n = 0; n < rp->header.nextindex; n++) { 1580 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1581 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1582 rbn, n, &mp, &lblock); 1583 } 1584 if (mp) 1585 release_metapage(mp); 1586 } 1587 1588 /* 1589 * the skipped index was on the left page, 1590 */ 1591 if (skip <= off) { 1592 /* insert the new entry in the split page */ 1593 dtInsertEntry(sp, skip, split->key, split->data, &sdtlck); 1594 1595 /* linelock stbl of split page */ 1596 if (sdtlck->index >= sdtlck->maxcnt) 1597 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 1598 slv = & sdtlck->lv[sdtlck->index]; 1599 n = skip >> L2DTSLOTSIZE; 1600 slv->offset = sp->header.stblindex + n; 1601 slv->length = 1602 ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 1603 sdtlck->index++; 1604 } 1605 /* 1606 * the skipped index was on the right page, 1607 */ 1608 else { 1609 /* adjust the skip index to reflect the new position */ 1610 skip -= nxt; 1611 1612 /* insert the new entry in the right page */ 1613 dtInsertEntry(rp, skip, split->key, split->data, &rdtlck); 1614 } 1615 1616 out: 1617 *rmpp = rmp; 1618 *rpxdp = *pxd; 1619 1620 return rc; 1621 } 1622 1623 1624 /* 1625 * dtExtendPage() 1626 * 1627 * function: extend 1st/only directory leaf page 1628 * 1629 * parameter: 1630 * 1631 * return: 0 - success; 1632 * errno - failure; 1633 * return extended page pinned; 1634 */ 1635 static int dtExtendPage(tid_t tid, 1636 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 1637 { 1638 struct super_block *sb = ip->i_sb; 1639 int rc; 1640 struct metapage *smp, *pmp, *mp; 1641 dtpage_t *sp, *pp; 1642 struct pxdlist *pxdlist; 1643 pxd_t *pxd, *tpxd; 1644 int xlen, xsize; 1645 int newstblindex, newstblsize; 1646 int oldstblindex, oldstblsize; 1647 int fsi, last; 1648 struct dtslot *f; 1649 struct btframe *parent; 1650 int n; 1651 struct dt_lock *dtlck; 1652 s64 xaddr, txaddr; 1653 struct tlock *tlck; 1654 struct pxd_lock *pxdlock; 1655 struct lv *lv; 1656 uint type; 1657 struct ldtentry *ldtentry; 1658 u8 *stbl; 1659 1660 /* get page to extend */ 1661 smp = split->mp; 1662 sp = DT_PAGE(ip, smp); 1663 1664 /* get parent/root page */ 1665 parent = BT_POP(btstack); 1666 DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc); 1667 if (rc) 1668 return (rc); 1669 1670 /* 1671 * extend the extent 1672 */ 1673 pxdlist = split->pxdlist; 1674 pxd = &pxdlist->pxd[pxdlist->npxd]; 1675 pxdlist->npxd++; 1676 1677 xaddr = addressPXD(pxd); 1678 tpxd = &sp->header.self; 1679 txaddr = addressPXD(tpxd); 1680 /* in-place extension */ 1681 if (xaddr == txaddr) { 1682 type = tlckEXTEND; 1683 } 1684 /* relocation */ 1685 else { 1686 type = tlckNEW; 1687 1688 /* save moved extent descriptor for later free */ 1689 tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE); 1690 pxdlock = (struct pxd_lock *) & tlck->lock; 1691 pxdlock->flag = mlckFREEPXD; 1692 pxdlock->pxd = sp->header.self; 1693 pxdlock->index = 1; 1694 1695 /* 1696 * Update directory index table to reflect new page address 1697 */ 1698 if (DO_INDEX(ip)) { 1699 s64 lblock; 1700 1701 mp = NULL; 1702 stbl = DT_GETSTBL(sp); 1703 for (n = 0; n < sp->header.nextindex; n++) { 1704 ldtentry = 1705 (struct ldtentry *) & sp->slot[stbl[n]]; 1706 modify_index(tid, ip, 1707 le32_to_cpu(ldtentry->index), 1708 xaddr, n, &mp, &lblock); 1709 } 1710 if (mp) 1711 release_metapage(mp); 1712 } 1713 } 1714 1715 /* 1716 * extend the page 1717 */ 1718 sp->header.self = *pxd; 1719 1720 jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp); 1721 1722 BT_MARK_DIRTY(smp, ip); 1723 /* 1724 * acquire a transaction lock on the extended/leaf page 1725 */ 1726 tlck = txLock(tid, ip, smp, tlckDTREE | type); 1727 dtlck = (struct dt_lock *) & tlck->lock; 1728 lv = & dtlck->lv[0]; 1729 1730 /* update buffer extent descriptor of extended page */ 1731 xlen = lengthPXD(pxd); 1732 xsize = xlen << JFS_SBI(sb)->l2bsize; 1733 1734 /* 1735 * copy old stbl to new stbl at start of extended area 1736 */ 1737 oldstblindex = sp->header.stblindex; 1738 oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE; 1739 newstblindex = sp->header.maxslot; 1740 n = xsize >> L2DTSLOTSIZE; 1741 newstblsize = (n + 31) >> L2DTSLOTSIZE; 1742 memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex], 1743 sp->header.nextindex); 1744 1745 /* 1746 * in-line extension: linelock old area of extended page 1747 */ 1748 if (type == tlckEXTEND) { 1749 /* linelock header */ 1750 lv->offset = 0; 1751 lv->length = 1; 1752 dtlck->index++; 1753 lv++; 1754 1755 /* linelock new stbl of extended page */ 1756 lv->offset = newstblindex; 1757 lv->length = newstblsize; 1758 } 1759 /* 1760 * relocation: linelock whole relocated area 1761 */ 1762 else { 1763 lv->offset = 0; 1764 lv->length = sp->header.maxslot + newstblsize; 1765 } 1766 1767 dtlck->index++; 1768 1769 sp->header.maxslot = n; 1770 sp->header.stblindex = newstblindex; 1771 /* sp->header.nextindex remains the same */ 1772 1773 /* 1774 * add old stbl region at head of freelist 1775 */ 1776 fsi = oldstblindex; 1777 f = &sp->slot[fsi]; 1778 last = sp->header.freelist; 1779 for (n = 0; n < oldstblsize; n++, fsi++, f++) { 1780 f->next = last; 1781 last = fsi; 1782 } 1783 sp->header.freelist = last; 1784 sp->header.freecnt += oldstblsize; 1785 1786 /* 1787 * append free region of newly extended area at tail of freelist 1788 */ 1789 /* init free region of newly extended area */ 1790 fsi = n = newstblindex + newstblsize; 1791 f = &sp->slot[fsi]; 1792 for (fsi++; fsi < sp->header.maxslot; f++, fsi++) 1793 f->next = fsi; 1794 f->next = -1; 1795 1796 /* append new free region at tail of old freelist */ 1797 fsi = sp->header.freelist; 1798 if (fsi == -1) 1799 sp->header.freelist = n; 1800 else { 1801 do { 1802 f = &sp->slot[fsi]; 1803 fsi = f->next; 1804 } while (fsi != -1); 1805 1806 f->next = n; 1807 } 1808 1809 sp->header.freecnt += sp->header.maxslot - n; 1810 1811 /* 1812 * insert the new entry 1813 */ 1814 dtInsertEntry(sp, split->index, split->key, split->data, &dtlck); 1815 1816 BT_MARK_DIRTY(pmp, ip); 1817 /* 1818 * linelock any freeslots residing in old extent 1819 */ 1820 if (type == tlckEXTEND) { 1821 n = sp->header.maxslot >> 2; 1822 if (sp->header.freelist < n) 1823 dtLinelockFreelist(sp, n, &dtlck); 1824 } 1825 1826 /* 1827 * update parent entry on the parent/root page 1828 */ 1829 /* 1830 * acquire a transaction lock on the parent/root page 1831 */ 1832 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 1833 dtlck = (struct dt_lock *) & tlck->lock; 1834 lv = & dtlck->lv[dtlck->index]; 1835 1836 /* linelock parent entry - 1st slot */ 1837 lv->offset = 1; 1838 lv->length = 1; 1839 dtlck->index++; 1840 1841 /* update the parent pxd for page extension */ 1842 tpxd = (pxd_t *) & pp->slot[1]; 1843 *tpxd = *pxd; 1844 1845 DT_PUTPAGE(pmp); 1846 return 0; 1847 } 1848 1849 1850 /* 1851 * dtSplitRoot() 1852 * 1853 * function: 1854 * split the full root page into 1855 * original/root/split page and new right page 1856 * i.e., root remains fixed in tree anchor (inode) and 1857 * the root is copied to a single new right child page 1858 * since root page << non-root page, and 1859 * the split root page contains a single entry for the 1860 * new right child page. 1861 * 1862 * parameter: 1863 * 1864 * return: 0 - success; 1865 * errno - failure; 1866 * return new page pinned; 1867 */ 1868 static int dtSplitRoot(tid_t tid, 1869 struct inode *ip, struct dtsplit * split, struct metapage ** rmpp) 1870 { 1871 struct super_block *sb = ip->i_sb; 1872 struct metapage *smp; 1873 dtroot_t *sp; 1874 struct metapage *rmp; 1875 dtpage_t *rp; 1876 s64 rbn; 1877 int xlen; 1878 int xsize; 1879 struct dtslot *f; 1880 s8 *stbl; 1881 int fsi, stblsize, n; 1882 struct idtentry *s; 1883 pxd_t *ppxd; 1884 struct pxdlist *pxdlist; 1885 pxd_t *pxd; 1886 struct dt_lock *dtlck; 1887 struct tlock *tlck; 1888 struct lv *lv; 1889 int rc; 1890 1891 /* get split root page */ 1892 smp = split->mp; 1893 sp = &JFS_IP(ip)->i_dtroot; 1894 1895 /* 1896 * allocate/initialize a single (right) child page 1897 * 1898 * N.B. at first split, a one (or two) block to fit new entry 1899 * is allocated; at subsequent split, a full page is allocated; 1900 */ 1901 pxdlist = split->pxdlist; 1902 pxd = &pxdlist->pxd[pxdlist->npxd]; 1903 pxdlist->npxd++; 1904 rbn = addressPXD(pxd); 1905 xlen = lengthPXD(pxd); 1906 xsize = xlen << JFS_SBI(sb)->l2bsize; 1907 rmp = get_metapage(ip, rbn, xsize, 1); 1908 if (!rmp) 1909 return -EIO; 1910 1911 rp = rmp->data; 1912 1913 /* Allocate blocks to quota. */ 1914 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 1915 if (rc) { 1916 release_metapage(rmp); 1917 return rc; 1918 } 1919 1920 BT_MARK_DIRTY(rmp, ip); 1921 /* 1922 * acquire a transaction lock on the new right page 1923 */ 1924 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1925 dtlck = (struct dt_lock *) & tlck->lock; 1926 1927 rp->header.flag = 1928 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1929 rp->header.self = *pxd; 1930 1931 /* initialize sibling pointers */ 1932 rp->header.next = 0; 1933 rp->header.prev = 0; 1934 1935 /* 1936 * move in-line root page into new right page extent 1937 */ 1938 /* linelock header + copied entries + new stbl (1st slot) in new page */ 1939 ASSERT(dtlck->index == 0); 1940 lv = & dtlck->lv[0]; 1941 lv->offset = 0; 1942 lv->length = 10; /* 1 + 8 + 1 */ 1943 dtlck->index++; 1944 1945 n = xsize >> L2DTSLOTSIZE; 1946 rp->header.maxslot = n; 1947 stblsize = (n + 31) >> L2DTSLOTSIZE; 1948 1949 /* copy old stbl to new stbl at start of extended area */ 1950 rp->header.stblindex = DTROOTMAXSLOT; 1951 stbl = (s8 *) & rp->slot[DTROOTMAXSLOT]; 1952 memcpy(stbl, sp->header.stbl, sp->header.nextindex); 1953 rp->header.nextindex = sp->header.nextindex; 1954 1955 /* copy old data area to start of new data area */ 1956 memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE); 1957 1958 /* 1959 * append free region of newly extended area at tail of freelist 1960 */ 1961 /* init free region of newly extended area */ 1962 fsi = n = DTROOTMAXSLOT + stblsize; 1963 f = &rp->slot[fsi]; 1964 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1965 f->next = fsi; 1966 f->next = -1; 1967 1968 /* append new free region at tail of old freelist */ 1969 fsi = sp->header.freelist; 1970 if (fsi == -1) 1971 rp->header.freelist = n; 1972 else { 1973 rp->header.freelist = fsi; 1974 1975 do { 1976 f = &rp->slot[fsi]; 1977 fsi = f->next; 1978 } while (fsi >= 0); 1979 1980 f->next = n; 1981 } 1982 1983 rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n; 1984 1985 /* 1986 * Update directory index table for entries now in right page 1987 */ 1988 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1989 s64 lblock; 1990 struct metapage *mp = NULL; 1991 struct ldtentry *ldtentry; 1992 1993 stbl = DT_GETSTBL(rp); 1994 for (n = 0; n < rp->header.nextindex; n++) { 1995 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1996 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1997 rbn, n, &mp, &lblock); 1998 } 1999 if (mp) 2000 release_metapage(mp); 2001 } 2002 /* 2003 * insert the new entry into the new right/child page 2004 * (skip index in the new right page will not change) 2005 */ 2006 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); 2007 2008 /* 2009 * reset parent/root page 2010 * 2011 * set the 1st entry offset to 0, which force the left-most key 2012 * at any level of the tree to be less than any search key. 2013 * 2014 * The btree comparison code guarantees that the left-most key on any 2015 * level of the tree is never used, so it doesn't need to be filled in. 2016 */ 2017 BT_MARK_DIRTY(smp, ip); 2018 /* 2019 * acquire a transaction lock on the root page (in-memory inode) 2020 */ 2021 tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT); 2022 dtlck = (struct dt_lock *) & tlck->lock; 2023 2024 /* linelock root */ 2025 ASSERT(dtlck->index == 0); 2026 lv = & dtlck->lv[0]; 2027 lv->offset = 0; 2028 lv->length = DTROOTMAXSLOT; 2029 dtlck->index++; 2030 2031 /* update page header of root */ 2032 if (sp->header.flag & BT_LEAF) { 2033 sp->header.flag &= ~BT_LEAF; 2034 sp->header.flag |= BT_INTERNAL; 2035 } 2036 2037 /* init the first entry */ 2038 s = (struct idtentry *) & sp->slot[DTENTRYSTART]; 2039 ppxd = (pxd_t *) s; 2040 *ppxd = *pxd; 2041 s->next = -1; 2042 s->namlen = 0; 2043 2044 stbl = sp->header.stbl; 2045 stbl[0] = DTENTRYSTART; 2046 sp->header.nextindex = 1; 2047 2048 /* init freelist */ 2049 fsi = DTENTRYSTART + 1; 2050 f = &sp->slot[fsi]; 2051 2052 /* init free region of remaining area */ 2053 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2054 f->next = fsi; 2055 f->next = -1; 2056 2057 sp->header.freelist = DTENTRYSTART + 1; 2058 sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1); 2059 2060 *rmpp = rmp; 2061 2062 return 0; 2063 } 2064 2065 2066 /* 2067 * dtDelete() 2068 * 2069 * function: delete the entry(s) referenced by a key. 2070 * 2071 * parameter: 2072 * 2073 * return: 2074 */ 2075 int dtDelete(tid_t tid, 2076 struct inode *ip, struct component_name * key, ino_t * ino, int flag) 2077 { 2078 int rc = 0; 2079 s64 bn; 2080 struct metapage *mp, *imp; 2081 dtpage_t *p; 2082 int index; 2083 struct btstack btstack; 2084 struct dt_lock *dtlck; 2085 struct tlock *tlck; 2086 struct lv *lv; 2087 int i; 2088 struct ldtentry *ldtentry; 2089 u8 *stbl; 2090 u32 table_index, next_index; 2091 struct metapage *nmp; 2092 dtpage_t *np; 2093 2094 /* 2095 * search for the entry to delete: 2096 * 2097 * dtSearch() returns (leaf page pinned, index at which to delete). 2098 */ 2099 if ((rc = dtSearch(ip, key, ino, &btstack, flag))) 2100 return rc; 2101 2102 /* retrieve search result */ 2103 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2104 2105 /* 2106 * We need to find put the index of the next entry into the 2107 * directory index table in order to resume a readdir from this 2108 * entry. 2109 */ 2110 if (DO_INDEX(ip)) { 2111 stbl = DT_GETSTBL(p); 2112 ldtentry = (struct ldtentry *) & p->slot[stbl[index]]; 2113 table_index = le32_to_cpu(ldtentry->index); 2114 if (index == (p->header.nextindex - 1)) { 2115 /* 2116 * Last entry in this leaf page 2117 */ 2118 if ((p->header.flag & BT_ROOT) 2119 || (p->header.next == 0)) 2120 next_index = -1; 2121 else { 2122 /* Read next leaf page */ 2123 DT_GETPAGE(ip, le64_to_cpu(p->header.next), 2124 nmp, PSIZE, np, rc); 2125 if (rc) 2126 next_index = -1; 2127 else { 2128 stbl = DT_GETSTBL(np); 2129 ldtentry = 2130 (struct ldtentry *) & np-> 2131 slot[stbl[0]]; 2132 next_index = 2133 le32_to_cpu(ldtentry->index); 2134 DT_PUTPAGE(nmp); 2135 } 2136 } 2137 } else { 2138 ldtentry = 2139 (struct ldtentry *) & p->slot[stbl[index + 1]]; 2140 next_index = le32_to_cpu(ldtentry->index); 2141 } 2142 free_index(tid, ip, table_index, next_index); 2143 } 2144 /* 2145 * the leaf page becomes empty, delete the page 2146 */ 2147 if (p->header.nextindex == 1) { 2148 /* delete empty page */ 2149 rc = dtDeleteUp(tid, ip, mp, p, &btstack); 2150 } 2151 /* 2152 * the leaf page has other entries remaining: 2153 * 2154 * delete the entry from the leaf page. 2155 */ 2156 else { 2157 BT_MARK_DIRTY(mp, ip); 2158 /* 2159 * acquire a transaction lock on the leaf page 2160 */ 2161 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2162 dtlck = (struct dt_lock *) & tlck->lock; 2163 2164 /* 2165 * Do not assume that dtlck->index will be zero. During a 2166 * rename within a directory, this transaction may have 2167 * modified this page already when adding the new entry. 2168 */ 2169 2170 /* linelock header */ 2171 if (dtlck->index >= dtlck->maxcnt) 2172 dtlck = (struct dt_lock *) txLinelock(dtlck); 2173 lv = & dtlck->lv[dtlck->index]; 2174 lv->offset = 0; 2175 lv->length = 1; 2176 dtlck->index++; 2177 2178 /* linelock stbl of non-root leaf page */ 2179 if (!(p->header.flag & BT_ROOT)) { 2180 if (dtlck->index >= dtlck->maxcnt) 2181 dtlck = (struct dt_lock *) txLinelock(dtlck); 2182 lv = & dtlck->lv[dtlck->index]; 2183 i = index >> L2DTSLOTSIZE; 2184 lv->offset = p->header.stblindex + i; 2185 lv->length = 2186 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2187 i + 1; 2188 dtlck->index++; 2189 } 2190 2191 /* free the leaf entry */ 2192 dtDeleteEntry(p, index, &dtlck); 2193 2194 /* 2195 * Update directory index table for entries moved in stbl 2196 */ 2197 if (DO_INDEX(ip) && index < p->header.nextindex) { 2198 s64 lblock; 2199 2200 imp = NULL; 2201 stbl = DT_GETSTBL(p); 2202 for (i = index; i < p->header.nextindex; i++) { 2203 ldtentry = 2204 (struct ldtentry *) & p->slot[stbl[i]]; 2205 modify_index(tid, ip, 2206 le32_to_cpu(ldtentry->index), 2207 bn, i, &imp, &lblock); 2208 } 2209 if (imp) 2210 release_metapage(imp); 2211 } 2212 2213 DT_PUTPAGE(mp); 2214 } 2215 2216 return rc; 2217 } 2218 2219 2220 /* 2221 * dtDeleteUp() 2222 * 2223 * function: 2224 * free empty pages as propagating deletion up the tree 2225 * 2226 * parameter: 2227 * 2228 * return: 2229 */ 2230 static int dtDeleteUp(tid_t tid, struct inode *ip, 2231 struct metapage * fmp, dtpage_t * fp, struct btstack * btstack) 2232 { 2233 int rc = 0; 2234 struct metapage *mp; 2235 dtpage_t *p; 2236 int index, nextindex; 2237 int xlen; 2238 struct btframe *parent; 2239 struct dt_lock *dtlck; 2240 struct tlock *tlck; 2241 struct lv *lv; 2242 struct pxd_lock *pxdlock; 2243 int i; 2244 2245 /* 2246 * keep the root leaf page which has become empty 2247 */ 2248 if (BT_IS_ROOT(fmp)) { 2249 /* 2250 * reset the root 2251 * 2252 * dtInitRoot() acquires txlock on the root 2253 */ 2254 dtInitRoot(tid, ip, PARENT(ip)); 2255 2256 DT_PUTPAGE(fmp); 2257 2258 return 0; 2259 } 2260 2261 /* 2262 * free the non-root leaf page 2263 */ 2264 /* 2265 * acquire a transaction lock on the page 2266 * 2267 * write FREEXTENT|NOREDOPAGE log record 2268 * N.B. linelock is overlaid as freed extent descriptor, and 2269 * the buffer page is freed; 2270 */ 2271 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2272 pxdlock = (struct pxd_lock *) & tlck->lock; 2273 pxdlock->flag = mlckFREEPXD; 2274 pxdlock->pxd = fp->header.self; 2275 pxdlock->index = 1; 2276 2277 /* update sibling pointers */ 2278 if ((rc = dtRelink(tid, ip, fp))) { 2279 BT_PUTPAGE(fmp); 2280 return rc; 2281 } 2282 2283 xlen = lengthPXD(&fp->header.self); 2284 2285 /* Free quota allocation. */ 2286 dquot_free_block(ip, xlen); 2287 2288 /* free/invalidate its buffer page */ 2289 discard_metapage(fmp); 2290 2291 /* 2292 * propagate page deletion up the directory tree 2293 * 2294 * If the delete from the parent page makes it empty, 2295 * continue all the way up the tree. 2296 * stop if the root page is reached (which is never deleted) or 2297 * if the entry deletion does not empty the page. 2298 */ 2299 while ((parent = BT_POP(btstack)) != NULL) { 2300 /* pin the parent page <sp> */ 2301 DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2302 if (rc) 2303 return rc; 2304 2305 /* 2306 * free the extent of the child page deleted 2307 */ 2308 index = parent->index; 2309 2310 /* 2311 * delete the entry for the child page from parent 2312 */ 2313 nextindex = p->header.nextindex; 2314 2315 /* 2316 * the parent has the single entry being deleted: 2317 * 2318 * free the parent page which has become empty. 2319 */ 2320 if (nextindex == 1) { 2321 /* 2322 * keep the root internal page which has become empty 2323 */ 2324 if (p->header.flag & BT_ROOT) { 2325 /* 2326 * reset the root 2327 * 2328 * dtInitRoot() acquires txlock on the root 2329 */ 2330 dtInitRoot(tid, ip, PARENT(ip)); 2331 2332 DT_PUTPAGE(mp); 2333 2334 return 0; 2335 } 2336 /* 2337 * free the parent page 2338 */ 2339 else { 2340 /* 2341 * acquire a transaction lock on the page 2342 * 2343 * write FREEXTENT|NOREDOPAGE log record 2344 */ 2345 tlck = 2346 txMaplock(tid, ip, 2347 tlckDTREE | tlckFREE); 2348 pxdlock = (struct pxd_lock *) & tlck->lock; 2349 pxdlock->flag = mlckFREEPXD; 2350 pxdlock->pxd = p->header.self; 2351 pxdlock->index = 1; 2352 2353 /* update sibling pointers */ 2354 if ((rc = dtRelink(tid, ip, p))) { 2355 DT_PUTPAGE(mp); 2356 return rc; 2357 } 2358 2359 xlen = lengthPXD(&p->header.self); 2360 2361 /* Free quota allocation */ 2362 dquot_free_block(ip, xlen); 2363 2364 /* free/invalidate its buffer page */ 2365 discard_metapage(mp); 2366 2367 /* propagate up */ 2368 continue; 2369 } 2370 } 2371 2372 /* 2373 * the parent has other entries remaining: 2374 * 2375 * delete the router entry from the parent page. 2376 */ 2377 BT_MARK_DIRTY(mp, ip); 2378 /* 2379 * acquire a transaction lock on the page 2380 * 2381 * action: router entry deletion 2382 */ 2383 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2384 dtlck = (struct dt_lock *) & tlck->lock; 2385 2386 /* linelock header */ 2387 if (dtlck->index >= dtlck->maxcnt) 2388 dtlck = (struct dt_lock *) txLinelock(dtlck); 2389 lv = & dtlck->lv[dtlck->index]; 2390 lv->offset = 0; 2391 lv->length = 1; 2392 dtlck->index++; 2393 2394 /* linelock stbl of non-root leaf page */ 2395 if (!(p->header.flag & BT_ROOT)) { 2396 if (dtlck->index < dtlck->maxcnt) 2397 lv++; 2398 else { 2399 dtlck = (struct dt_lock *) txLinelock(dtlck); 2400 lv = & dtlck->lv[0]; 2401 } 2402 i = index >> L2DTSLOTSIZE; 2403 lv->offset = p->header.stblindex + i; 2404 lv->length = 2405 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2406 i + 1; 2407 dtlck->index++; 2408 } 2409 2410 /* free the router entry */ 2411 dtDeleteEntry(p, index, &dtlck); 2412 2413 /* reset key of new leftmost entry of level (for consistency) */ 2414 if (index == 0 && 2415 ((p->header.flag & BT_ROOT) || p->header.prev == 0)) 2416 dtTruncateEntry(p, 0, &dtlck); 2417 2418 /* unpin the parent page */ 2419 DT_PUTPAGE(mp); 2420 2421 /* exit propagation up */ 2422 break; 2423 } 2424 2425 if (!DO_INDEX(ip)) 2426 ip->i_size -= PSIZE; 2427 2428 return 0; 2429 } 2430 2431 /* 2432 * dtRelink() 2433 * 2434 * function: 2435 * link around a freed page. 2436 * 2437 * parameter: 2438 * fp: page to be freed 2439 * 2440 * return: 2441 */ 2442 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p) 2443 { 2444 int rc; 2445 struct metapage *mp; 2446 s64 nextbn, prevbn; 2447 struct tlock *tlck; 2448 struct dt_lock *dtlck; 2449 struct lv *lv; 2450 2451 nextbn = le64_to_cpu(p->header.next); 2452 prevbn = le64_to_cpu(p->header.prev); 2453 2454 /* update prev pointer of the next page */ 2455 if (nextbn != 0) { 2456 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 2457 if (rc) 2458 return rc; 2459 2460 BT_MARK_DIRTY(mp, ip); 2461 /* 2462 * acquire a transaction lock on the next page 2463 * 2464 * action: update prev pointer; 2465 */ 2466 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2467 jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2468 tlck, ip, mp); 2469 dtlck = (struct dt_lock *) & tlck->lock; 2470 2471 /* linelock header */ 2472 if (dtlck->index >= dtlck->maxcnt) 2473 dtlck = (struct dt_lock *) txLinelock(dtlck); 2474 lv = & dtlck->lv[dtlck->index]; 2475 lv->offset = 0; 2476 lv->length = 1; 2477 dtlck->index++; 2478 2479 p->header.prev = cpu_to_le64(prevbn); 2480 DT_PUTPAGE(mp); 2481 } 2482 2483 /* update next pointer of the previous page */ 2484 if (prevbn != 0) { 2485 DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 2486 if (rc) 2487 return rc; 2488 2489 BT_MARK_DIRTY(mp, ip); 2490 /* 2491 * acquire a transaction lock on the prev page 2492 * 2493 * action: update next pointer; 2494 */ 2495 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2496 jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2497 tlck, ip, mp); 2498 dtlck = (struct dt_lock *) & tlck->lock; 2499 2500 /* linelock header */ 2501 if (dtlck->index >= dtlck->maxcnt) 2502 dtlck = (struct dt_lock *) txLinelock(dtlck); 2503 lv = & dtlck->lv[dtlck->index]; 2504 lv->offset = 0; 2505 lv->length = 1; 2506 dtlck->index++; 2507 2508 p->header.next = cpu_to_le64(nextbn); 2509 DT_PUTPAGE(mp); 2510 } 2511 2512 return 0; 2513 } 2514 2515 2516 /* 2517 * dtInitRoot() 2518 * 2519 * initialize directory root (inline in inode) 2520 */ 2521 void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot) 2522 { 2523 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2524 dtroot_t *p; 2525 int fsi; 2526 struct dtslot *f; 2527 struct tlock *tlck; 2528 struct dt_lock *dtlck; 2529 struct lv *lv; 2530 u16 xflag_save; 2531 2532 /* 2533 * If this was previously an non-empty directory, we need to remove 2534 * the old directory table. 2535 */ 2536 if (DO_INDEX(ip)) { 2537 if (!jfs_dirtable_inline(ip)) { 2538 struct tblock *tblk = tid_to_tblock(tid); 2539 /* 2540 * We're playing games with the tid's xflag. If 2541 * we're removing a regular file, the file's xtree 2542 * is committed with COMMIT_PMAP, but we always 2543 * commit the directories xtree with COMMIT_PWMAP. 2544 */ 2545 xflag_save = tblk->xflag; 2546 tblk->xflag = 0; 2547 /* 2548 * xtTruncate isn't guaranteed to fully truncate 2549 * the xtree. The caller needs to check i_size 2550 * after committing the transaction to see if 2551 * additional truncation is needed. The 2552 * COMMIT_Stale flag tells caller that we 2553 * initiated the truncation. 2554 */ 2555 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 2556 set_cflag(COMMIT_Stale, ip); 2557 2558 tblk->xflag = xflag_save; 2559 } else 2560 ip->i_size = 1; 2561 2562 jfs_ip->next_index = 2; 2563 } else 2564 ip->i_size = IDATASIZE; 2565 2566 /* 2567 * acquire a transaction lock on the root 2568 * 2569 * action: directory initialization; 2570 */ 2571 tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag, 2572 tlckDTREE | tlckENTRY | tlckBTROOT); 2573 dtlck = (struct dt_lock *) & tlck->lock; 2574 2575 /* linelock root */ 2576 ASSERT(dtlck->index == 0); 2577 lv = & dtlck->lv[0]; 2578 lv->offset = 0; 2579 lv->length = DTROOTMAXSLOT; 2580 dtlck->index++; 2581 2582 p = &jfs_ip->i_dtroot; 2583 2584 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 2585 2586 p->header.nextindex = 0; 2587 2588 /* init freelist */ 2589 fsi = 1; 2590 f = &p->slot[fsi]; 2591 2592 /* init data area of root */ 2593 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2594 f->next = fsi; 2595 f->next = -1; 2596 2597 p->header.freelist = 1; 2598 p->header.freecnt = 8; 2599 2600 /* init '..' entry */ 2601 p->header.idotdot = cpu_to_le32(idotdot); 2602 2603 return; 2604 } 2605 2606 /* 2607 * add_missing_indices() 2608 * 2609 * function: Fix dtree page in which one or more entries has an invalid index. 2610 * fsck.jfs should really fix this, but it currently does not. 2611 * Called from jfs_readdir when bad index is detected. 2612 */ 2613 static int add_missing_indices(struct inode *inode, s64 bn) 2614 { 2615 struct ldtentry *d; 2616 struct dt_lock *dtlck; 2617 int i; 2618 uint index; 2619 struct lv *lv; 2620 struct metapage *mp; 2621 dtpage_t *p; 2622 int rc = 0; 2623 s8 *stbl; 2624 tid_t tid; 2625 struct tlock *tlck; 2626 2627 tid = txBegin(inode->i_sb, 0); 2628 2629 DT_GETPAGE(inode, bn, mp, PSIZE, p, rc); 2630 2631 if (rc) { 2632 printk(KERN_ERR "DT_GETPAGE failed!\n"); 2633 goto end; 2634 } 2635 BT_MARK_DIRTY(mp, inode); 2636 2637 ASSERT(p->header.flag & BT_LEAF); 2638 2639 tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY); 2640 if (BT_IS_ROOT(mp)) 2641 tlck->type |= tlckBTROOT; 2642 2643 dtlck = (struct dt_lock *) &tlck->lock; 2644 2645 stbl = DT_GETSTBL(p); 2646 for (i = 0; i < p->header.nextindex; i++) { 2647 if (stbl[i] < 0) { 2648 jfs_err("jfs: add_missing_indices: Invalid stbl[%d] = %d for inode %ld, block = %lld", 2649 i, stbl[i], (long)inode->i_ino, (long long)bn); 2650 rc = -EIO; 2651 2652 DT_PUTPAGE(mp); 2653 txAbort(tid, 0); 2654 goto end; 2655 } 2656 2657 d = (struct ldtentry *) &p->slot[stbl[i]]; 2658 index = le32_to_cpu(d->index); 2659 if ((index < 2) || (index >= JFS_IP(inode)->next_index)) { 2660 d->index = cpu_to_le32(add_index(tid, inode, bn, i)); 2661 if (dtlck->index >= dtlck->maxcnt) 2662 dtlck = (struct dt_lock *) txLinelock(dtlck); 2663 lv = &dtlck->lv[dtlck->index]; 2664 lv->offset = stbl[i]; 2665 lv->length = 1; 2666 dtlck->index++; 2667 } 2668 } 2669 2670 DT_PUTPAGE(mp); 2671 (void) txCommit(tid, 1, &inode, 0); 2672 end: 2673 txEnd(tid); 2674 return rc; 2675 } 2676 2677 /* 2678 * Buffer to hold directory entry info while traversing a dtree page 2679 * before being fed to the filldir function 2680 */ 2681 struct jfs_dirent { 2682 loff_t position; 2683 int ino; 2684 u16 name_len; 2685 char name[]; 2686 }; 2687 2688 /* 2689 * function to determine next variable-sized jfs_dirent in buffer 2690 */ 2691 static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) 2692 { 2693 return (struct jfs_dirent *) 2694 ((char *)dirent + 2695 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 + 2696 sizeof (loff_t) - 1) & 2697 ~(sizeof (loff_t) - 1))); 2698 } 2699 2700 /* 2701 * jfs_readdir() 2702 * 2703 * function: read directory entries sequentially 2704 * from the specified entry offset 2705 * 2706 * parameter: 2707 * 2708 * return: offset = (pn, index) of start entry 2709 * of next jfs_readdir()/dtRead() 2710 */ 2711 int jfs_readdir(struct file *file, struct dir_context *ctx) 2712 { 2713 struct inode *ip = file_inode(file); 2714 struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; 2715 int rc = 0; 2716 loff_t dtpos; /* legacy OS/2 style position */ 2717 struct dtoffset { 2718 s16 pn; 2719 s16 index; 2720 s32 unused; 2721 } *dtoffset = (struct dtoffset *) &dtpos; 2722 s64 bn; 2723 struct metapage *mp; 2724 dtpage_t *p; 2725 int index; 2726 s8 *stbl; 2727 struct btstack btstack; 2728 int i, next; 2729 struct ldtentry *d; 2730 struct dtslot *t; 2731 int d_namleft, len, outlen; 2732 unsigned long dirent_buf; 2733 char *name_ptr; 2734 u32 dir_index; 2735 int do_index = 0; 2736 uint loop_count = 0; 2737 struct jfs_dirent *jfs_dirent; 2738 int jfs_dirents; 2739 int overflow, fix_page, page_fixed = 0; 2740 static int unique_pos = 2; /* If we can't fix broken index */ 2741 2742 if (ctx->pos == DIREND) 2743 return 0; 2744 2745 if (DO_INDEX(ip)) { 2746 /* 2747 * persistent index is stored in directory entries. 2748 * Special cases: 0 = . 2749 * 1 = .. 2750 * -1 = End of directory 2751 */ 2752 do_index = 1; 2753 2754 dir_index = (u32) ctx->pos; 2755 2756 /* 2757 * NFSv4 reserves cookies 1 and 2 for . and .. so the value 2758 * we return to the vfs is one greater than the one we use 2759 * internally. 2760 */ 2761 if (dir_index) 2762 dir_index--; 2763 2764 if (dir_index > 1) { 2765 struct dir_table_slot dirtab_slot; 2766 2767 if (dtEmpty(ip) || 2768 (dir_index >= JFS_IP(ip)->next_index)) { 2769 /* Stale position. Directory has shrunk */ 2770 ctx->pos = DIREND; 2771 return 0; 2772 } 2773 repeat: 2774 rc = read_index(ip, dir_index, &dirtab_slot); 2775 if (rc) { 2776 ctx->pos = DIREND; 2777 return rc; 2778 } 2779 if (dirtab_slot.flag == DIR_INDEX_FREE) { 2780 if (loop_count++ > JFS_IP(ip)->next_index) { 2781 jfs_err("jfs_readdir detected infinite loop!"); 2782 ctx->pos = DIREND; 2783 return 0; 2784 } 2785 dir_index = le32_to_cpu(dirtab_slot.addr2); 2786 if (dir_index == -1) { 2787 ctx->pos = DIREND; 2788 return 0; 2789 } 2790 goto repeat; 2791 } 2792 bn = addressDTS(&dirtab_slot); 2793 index = dirtab_slot.slot; 2794 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2795 if (rc) { 2796 ctx->pos = DIREND; 2797 return 0; 2798 } 2799 if (p->header.flag & BT_INTERNAL) { 2800 jfs_err("jfs_readdir: bad index table"); 2801 DT_PUTPAGE(mp); 2802 ctx->pos = DIREND; 2803 return 0; 2804 } 2805 } else { 2806 if (dir_index == 0) { 2807 /* 2808 * self "." 2809 */ 2810 ctx->pos = 1; 2811 if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) 2812 return 0; 2813 } 2814 /* 2815 * parent ".." 2816 */ 2817 ctx->pos = 2; 2818 if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) 2819 return 0; 2820 2821 /* 2822 * Find first entry of left-most leaf 2823 */ 2824 if (dtEmpty(ip)) { 2825 ctx->pos = DIREND; 2826 return 0; 2827 } 2828 2829 if ((rc = dtReadFirst(ip, &btstack))) 2830 return rc; 2831 2832 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2833 } 2834 } else { 2835 /* 2836 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 2837 * 2838 * pn = 0; index = 1: First entry "." 2839 * pn = 0; index = 2: Second entry ".." 2840 * pn > 0: Real entries, pn=1 -> leftmost page 2841 * pn = index = -1: No more entries 2842 */ 2843 dtpos = ctx->pos; 2844 if (dtpos < 2) { 2845 /* build "." entry */ 2846 ctx->pos = 1; 2847 if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) 2848 return 0; 2849 dtoffset->index = 2; 2850 ctx->pos = dtpos; 2851 } 2852 2853 if (dtoffset->pn == 0) { 2854 if (dtoffset->index == 2) { 2855 /* build ".." entry */ 2856 if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) 2857 return 0; 2858 } else { 2859 jfs_err("jfs_readdir called with invalid offset!"); 2860 } 2861 dtoffset->pn = 1; 2862 dtoffset->index = 0; 2863 ctx->pos = dtpos; 2864 } 2865 2866 if (dtEmpty(ip)) { 2867 ctx->pos = DIREND; 2868 return 0; 2869 } 2870 2871 if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) { 2872 jfs_err("jfs_readdir: unexpected rc = %d from dtReadNext", 2873 rc); 2874 ctx->pos = DIREND; 2875 return 0; 2876 } 2877 /* get start leaf page and index */ 2878 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2879 2880 /* offset beyond directory eof ? */ 2881 if (bn < 0) { 2882 ctx->pos = DIREND; 2883 return 0; 2884 } 2885 } 2886 2887 dirent_buf = __get_free_page(GFP_KERNEL); 2888 if (dirent_buf == 0) { 2889 DT_PUTPAGE(mp); 2890 jfs_warn("jfs_readdir: __get_free_page failed!"); 2891 ctx->pos = DIREND; 2892 return -ENOMEM; 2893 } 2894 2895 while (1) { 2896 jfs_dirent = (struct jfs_dirent *) dirent_buf; 2897 jfs_dirents = 0; 2898 overflow = fix_page = 0; 2899 2900 stbl = DT_GETSTBL(p); 2901 2902 for (i = index; i < p->header.nextindex; i++) { 2903 if (stbl[i] < 0) { 2904 jfs_err("JFS: Invalid stbl[%d] = %d for inode %ld, block = %lld", 2905 i, stbl[i], (long)ip->i_ino, (long long)bn); 2906 free_page(dirent_buf); 2907 DT_PUTPAGE(mp); 2908 return -EIO; 2909 } 2910 2911 d = (struct ldtentry *) & p->slot[stbl[i]]; 2912 2913 if (((long) jfs_dirent + d->namlen + 1) > 2914 (dirent_buf + PAGE_SIZE)) { 2915 /* DBCS codepages could overrun dirent_buf */ 2916 index = i; 2917 overflow = 1; 2918 break; 2919 } 2920 2921 d_namleft = d->namlen; 2922 name_ptr = jfs_dirent->name; 2923 jfs_dirent->ino = le32_to_cpu(d->inumber); 2924 2925 if (do_index) { 2926 len = min(d_namleft, DTLHDRDATALEN); 2927 jfs_dirent->position = le32_to_cpu(d->index); 2928 /* 2929 * d->index should always be valid, but it 2930 * isn't. fsck.jfs doesn't create the 2931 * directory index for the lost+found 2932 * directory. Rather than let it go, 2933 * we can try to fix it. 2934 */ 2935 if ((jfs_dirent->position < 2) || 2936 (jfs_dirent->position >= 2937 JFS_IP(ip)->next_index)) { 2938 if (!page_fixed && !isReadOnly(ip)) { 2939 fix_page = 1; 2940 /* 2941 * setting overflow and setting 2942 * index to i will cause the 2943 * same page to be processed 2944 * again starting here 2945 */ 2946 overflow = 1; 2947 index = i; 2948 break; 2949 } 2950 jfs_dirent->position = unique_pos++; 2951 } 2952 /* 2953 * We add 1 to the index because we may 2954 * use a value of 2 internally, and NFSv4 2955 * doesn't like that. 2956 */ 2957 jfs_dirent->position++; 2958 } else { 2959 jfs_dirent->position = dtpos; 2960 len = min(d_namleft, DTLHDRDATALEN_LEGACY); 2961 } 2962 2963 /* copy the name of head/only segment */ 2964 outlen = jfs_strfromUCS_le(name_ptr, d->name, len, 2965 codepage); 2966 jfs_dirent->name_len = outlen; 2967 2968 /* copy name in the additional segment(s) */ 2969 next = d->next; 2970 while (next >= 0) { 2971 t = (struct dtslot *) & p->slot[next]; 2972 name_ptr += outlen; 2973 d_namleft -= len; 2974 /* Sanity Check */ 2975 if (d_namleft == 0) { 2976 jfs_error(ip->i_sb, 2977 "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n", 2978 (long)ip->i_ino, 2979 (long long)bn, 2980 i); 2981 goto skip_one; 2982 } 2983 len = min(d_namleft, DTSLOTDATALEN); 2984 outlen = jfs_strfromUCS_le(name_ptr, t->name, 2985 len, codepage); 2986 jfs_dirent->name_len += outlen; 2987 2988 next = t->next; 2989 } 2990 2991 jfs_dirents++; 2992 jfs_dirent = next_jfs_dirent(jfs_dirent); 2993 skip_one: 2994 if (!do_index) 2995 dtoffset->index++; 2996 } 2997 2998 if (!overflow) { 2999 /* Point to next leaf page */ 3000 if (p->header.flag & BT_ROOT) 3001 bn = 0; 3002 else { 3003 bn = le64_to_cpu(p->header.next); 3004 index = 0; 3005 /* update offset (pn:index) for new page */ 3006 if (!do_index) { 3007 dtoffset->pn++; 3008 dtoffset->index = 0; 3009 } 3010 } 3011 page_fixed = 0; 3012 } 3013 3014 /* unpin previous leaf page */ 3015 DT_PUTPAGE(mp); 3016 3017 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3018 while (jfs_dirents--) { 3019 ctx->pos = jfs_dirent->position; 3020 if (!dir_emit(ctx, jfs_dirent->name, 3021 jfs_dirent->name_len, 3022 jfs_dirent->ino, DT_UNKNOWN)) 3023 goto out; 3024 jfs_dirent = next_jfs_dirent(jfs_dirent); 3025 } 3026 3027 if (fix_page) { 3028 if ((rc = add_missing_indices(ip, bn))) 3029 goto out; 3030 page_fixed = 1; 3031 } 3032 3033 if (!overflow && (bn == 0)) { 3034 ctx->pos = DIREND; 3035 break; 3036 } 3037 3038 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3039 if (rc) { 3040 free_page(dirent_buf); 3041 return rc; 3042 } 3043 } 3044 3045 out: 3046 free_page(dirent_buf); 3047 3048 return rc; 3049 } 3050 3051 3052 /* 3053 * dtReadFirst() 3054 * 3055 * function: get the leftmost page of the directory 3056 */ 3057 static int dtReadFirst(struct inode *ip, struct btstack * btstack) 3058 { 3059 int rc = 0; 3060 s64 bn; 3061 int psize = 288; /* initial in-line directory */ 3062 struct metapage *mp; 3063 dtpage_t *p; 3064 s8 *stbl; 3065 struct btframe *btsp; 3066 pxd_t *xd; 3067 3068 BT_CLR(btstack); /* reset stack */ 3069 3070 /* 3071 * descend leftmost path of the tree 3072 * 3073 * by convention, root bn = 0. 3074 */ 3075 for (bn = 0;;) { 3076 DT_GETPAGE(ip, bn, mp, psize, p, rc); 3077 if (rc) 3078 return rc; 3079 3080 /* 3081 * leftmost leaf page 3082 */ 3083 if (p->header.flag & BT_LEAF) { 3084 /* return leftmost entry */ 3085 btsp = btstack->top; 3086 btsp->bn = bn; 3087 btsp->index = 0; 3088 btsp->mp = mp; 3089 3090 return 0; 3091 } 3092 3093 /* 3094 * descend down to leftmost child page 3095 */ 3096 if (BT_STACK_FULL(btstack)) { 3097 DT_PUTPAGE(mp); 3098 jfs_error(ip->i_sb, "btstack overrun\n"); 3099 BT_STACK_DUMP(btstack); 3100 return -EIO; 3101 } 3102 /* push (bn, index) of the parent page/entry */ 3103 BT_PUSH(btstack, bn, 0); 3104 3105 /* get the leftmost entry */ 3106 stbl = DT_GETSTBL(p); 3107 3108 if (stbl[0] < 0) { 3109 DT_PUTPAGE(mp); 3110 jfs_error(ip->i_sb, "stbl[0] out of bound\n"); 3111 return -EIO; 3112 } 3113 3114 xd = (pxd_t *) & p->slot[stbl[0]]; 3115 3116 /* get the child page block address */ 3117 bn = addressPXD(xd); 3118 psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize; 3119 3120 /* unpin the parent page */ 3121 DT_PUTPAGE(mp); 3122 } 3123 } 3124 3125 3126 /* 3127 * dtReadNext() 3128 * 3129 * function: get the page of the specified offset (pn:index) 3130 * 3131 * return: if (offset > eof), bn = -1; 3132 * 3133 * note: if index > nextindex of the target leaf page, 3134 * start with 1st entry of next leaf page; 3135 */ 3136 static int dtReadNext(struct inode *ip, loff_t * offset, 3137 struct btstack * btstack) 3138 { 3139 int rc = 0; 3140 struct dtoffset { 3141 s16 pn; 3142 s16 index; 3143 s32 unused; 3144 } *dtoffset = (struct dtoffset *) offset; 3145 s64 bn; 3146 struct metapage *mp; 3147 dtpage_t *p; 3148 int index; 3149 int pn; 3150 s8 *stbl; 3151 struct btframe *btsp, *parent; 3152 pxd_t *xd; 3153 3154 /* 3155 * get leftmost leaf page pinned 3156 */ 3157 if ((rc = dtReadFirst(ip, btstack))) 3158 return rc; 3159 3160 /* get leaf page */ 3161 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 3162 3163 /* get the start offset (pn:index) */ 3164 pn = dtoffset->pn - 1; /* Now pn = 0 represents leftmost leaf */ 3165 index = dtoffset->index; 3166 3167 /* start at leftmost page ? */ 3168 if (pn == 0) { 3169 /* offset beyond eof ? */ 3170 if (index < p->header.nextindex) 3171 goto out; 3172 3173 if (p->header.flag & BT_ROOT) { 3174 bn = -1; 3175 goto out; 3176 } 3177 3178 /* start with 1st entry of next leaf page */ 3179 dtoffset->pn++; 3180 dtoffset->index = index = 0; 3181 goto a; 3182 } 3183 3184 /* start at non-leftmost page: scan parent pages for large pn */ 3185 if (p->header.flag & BT_ROOT) { 3186 bn = -1; 3187 goto out; 3188 } 3189 3190 /* start after next leaf page ? */ 3191 if (pn > 1) 3192 goto b; 3193 3194 /* get leaf page pn = 1 */ 3195 a: 3196 bn = le64_to_cpu(p->header.next); 3197 3198 /* unpin leaf page */ 3199 DT_PUTPAGE(mp); 3200 3201 /* offset beyond eof ? */ 3202 if (bn == 0) { 3203 bn = -1; 3204 goto out; 3205 } 3206 3207 goto c; 3208 3209 /* 3210 * scan last internal page level to get target leaf page 3211 */ 3212 b: 3213 /* unpin leftmost leaf page */ 3214 DT_PUTPAGE(mp); 3215 3216 /* get left most parent page */ 3217 btsp = btstack->top; 3218 parent = btsp - 1; 3219 bn = parent->bn; 3220 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3221 if (rc) 3222 return rc; 3223 3224 /* scan parent pages at last internal page level */ 3225 while (pn >= p->header.nextindex) { 3226 pn -= p->header.nextindex; 3227 3228 /* get next parent page address */ 3229 bn = le64_to_cpu(p->header.next); 3230 3231 /* unpin current parent page */ 3232 DT_PUTPAGE(mp); 3233 3234 /* offset beyond eof ? */ 3235 if (bn == 0) { 3236 bn = -1; 3237 goto out; 3238 } 3239 3240 /* get next parent page */ 3241 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3242 if (rc) 3243 return rc; 3244 3245 /* update parent page stack frame */ 3246 parent->bn = bn; 3247 } 3248 3249 /* get leaf page address */ 3250 stbl = DT_GETSTBL(p); 3251 xd = (pxd_t *) & p->slot[stbl[pn]]; 3252 bn = addressPXD(xd); 3253 3254 /* unpin parent page */ 3255 DT_PUTPAGE(mp); 3256 3257 /* 3258 * get target leaf page 3259 */ 3260 c: 3261 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3262 if (rc) 3263 return rc; 3264 3265 /* 3266 * leaf page has been completed: 3267 * start with 1st entry of next leaf page 3268 */ 3269 if (index >= p->header.nextindex) { 3270 bn = le64_to_cpu(p->header.next); 3271 3272 /* unpin leaf page */ 3273 DT_PUTPAGE(mp); 3274 3275 /* offset beyond eof ? */ 3276 if (bn == 0) { 3277 bn = -1; 3278 goto out; 3279 } 3280 3281 /* get next leaf page */ 3282 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3283 if (rc) 3284 return rc; 3285 3286 /* start with 1st entry of next leaf page */ 3287 dtoffset->pn++; 3288 dtoffset->index = 0; 3289 } 3290 3291 out: 3292 /* return target leaf page pinned */ 3293 btsp = btstack->top; 3294 btsp->bn = bn; 3295 btsp->index = dtoffset->index; 3296 btsp->mp = mp; 3297 3298 return 0; 3299 } 3300 3301 3302 /* 3303 * dtCompare() 3304 * 3305 * function: compare search key with an internal entry 3306 * 3307 * return: 3308 * < 0 if k is < record 3309 * = 0 if k is = record 3310 * > 0 if k is > record 3311 */ 3312 static int dtCompare(struct component_name * key, /* search key */ 3313 dtpage_t * p, /* directory page */ 3314 int si) 3315 { /* entry slot index */ 3316 wchar_t *kname; 3317 __le16 *name; 3318 int klen, namlen, len, rc; 3319 struct idtentry *ih; 3320 struct dtslot *t; 3321 3322 /* 3323 * force the left-most key on internal pages, at any level of 3324 * the tree, to be less than any search key. 3325 * this obviates having to update the leftmost key on an internal 3326 * page when the user inserts a new key in the tree smaller than 3327 * anything that has been stored. 3328 * 3329 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3330 * at any internal page at any level of the tree, 3331 * it descends to child of the entry anyway - 3332 * ? make the entry as min size dummy entry) 3333 * 3334 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3335 * return (1); 3336 */ 3337 3338 kname = key->name; 3339 klen = key->namlen; 3340 3341 ih = (struct idtentry *) & p->slot[si]; 3342 si = ih->next; 3343 name = ih->name; 3344 namlen = ih->namlen; 3345 len = min(namlen, DTIHDRDATALEN); 3346 3347 /* compare with head/only segment */ 3348 len = min(klen, len); 3349 if ((rc = UniStrncmp_le(kname, name, len))) 3350 return rc; 3351 3352 klen -= len; 3353 namlen -= len; 3354 3355 /* compare with additional segment(s) */ 3356 kname += len; 3357 while (klen > 0 && namlen > 0) { 3358 /* compare with next name segment */ 3359 t = (struct dtslot *) & p->slot[si]; 3360 len = min(namlen, DTSLOTDATALEN); 3361 len = min(klen, len); 3362 name = t->name; 3363 if ((rc = UniStrncmp_le(kname, name, len))) 3364 return rc; 3365 3366 klen -= len; 3367 namlen -= len; 3368 kname += len; 3369 si = t->next; 3370 } 3371 3372 return (klen - namlen); 3373 } 3374 3375 3376 3377 3378 /* 3379 * ciCompare() 3380 * 3381 * function: compare search key with an (leaf/internal) entry 3382 * 3383 * return: 3384 * < 0 if k is < record 3385 * = 0 if k is = record 3386 * > 0 if k is > record 3387 */ 3388 static int ciCompare(struct component_name * key, /* search key */ 3389 dtpage_t * p, /* directory page */ 3390 int si, /* entry slot index */ 3391 int flag) 3392 { 3393 wchar_t *kname, x; 3394 __le16 *name; 3395 int klen, namlen, len, rc; 3396 struct ldtentry *lh; 3397 struct idtentry *ih; 3398 struct dtslot *t; 3399 int i; 3400 3401 /* 3402 * force the left-most key on internal pages, at any level of 3403 * the tree, to be less than any search key. 3404 * this obviates having to update the leftmost key on an internal 3405 * page when the user inserts a new key in the tree smaller than 3406 * anything that has been stored. 3407 * 3408 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3409 * at any internal page at any level of the tree, 3410 * it descends to child of the entry anyway - 3411 * ? make the entry as min size dummy entry) 3412 * 3413 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3414 * return (1); 3415 */ 3416 3417 kname = key->name; 3418 klen = key->namlen; 3419 3420 /* 3421 * leaf page entry 3422 */ 3423 if (p->header.flag & BT_LEAF) { 3424 lh = (struct ldtentry *) & p->slot[si]; 3425 si = lh->next; 3426 name = lh->name; 3427 namlen = lh->namlen; 3428 if (flag & JFS_DIR_INDEX) 3429 len = min(namlen, DTLHDRDATALEN); 3430 else 3431 len = min(namlen, DTLHDRDATALEN_LEGACY); 3432 } 3433 /* 3434 * internal page entry 3435 */ 3436 else { 3437 ih = (struct idtentry *) & p->slot[si]; 3438 si = ih->next; 3439 name = ih->name; 3440 namlen = ih->namlen; 3441 len = min(namlen, DTIHDRDATALEN); 3442 } 3443 3444 /* compare with head/only segment */ 3445 len = min(klen, len); 3446 for (i = 0; i < len; i++, kname++, name++) { 3447 /* only uppercase if case-insensitive support is on */ 3448 if ((flag & JFS_OS2) == JFS_OS2) 3449 x = UniToupper(le16_to_cpu(*name)); 3450 else 3451 x = le16_to_cpu(*name); 3452 if ((rc = *kname - x)) 3453 return rc; 3454 } 3455 3456 klen -= len; 3457 namlen -= len; 3458 3459 /* compare with additional segment(s) */ 3460 while (klen > 0 && namlen > 0) { 3461 /* compare with next name segment */ 3462 t = (struct dtslot *) & p->slot[si]; 3463 len = min(namlen, DTSLOTDATALEN); 3464 len = min(klen, len); 3465 name = t->name; 3466 for (i = 0; i < len; i++, kname++, name++) { 3467 /* only uppercase if case-insensitive support is on */ 3468 if ((flag & JFS_OS2) == JFS_OS2) 3469 x = UniToupper(le16_to_cpu(*name)); 3470 else 3471 x = le16_to_cpu(*name); 3472 3473 if ((rc = *kname - x)) 3474 return rc; 3475 } 3476 3477 klen -= len; 3478 namlen -= len; 3479 si = t->next; 3480 } 3481 3482 return (klen - namlen); 3483 } 3484 3485 3486 /* 3487 * ciGetLeafPrefixKey() 3488 * 3489 * function: compute prefix of suffix compression 3490 * from two adjacent leaf entries 3491 * across page boundary 3492 * 3493 * return: non-zero on error 3494 * 3495 */ 3496 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 3497 int ri, struct component_name * key, int flag) 3498 { 3499 int klen, namlen; 3500 wchar_t *pl, *pr, *kname; 3501 struct component_name lkey; 3502 struct component_name rkey; 3503 3504 lkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 3505 GFP_KERNEL); 3506 if (lkey.name == NULL) 3507 return -ENOMEM; 3508 3509 rkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 3510 GFP_KERNEL); 3511 if (rkey.name == NULL) { 3512 kfree(lkey.name); 3513 return -ENOMEM; 3514 } 3515 3516 /* get left and right key */ 3517 dtGetKey(lp, li, &lkey, flag); 3518 lkey.name[lkey.namlen] = 0; 3519 3520 if ((flag & JFS_OS2) == JFS_OS2) 3521 ciToUpper(&lkey); 3522 3523 dtGetKey(rp, ri, &rkey, flag); 3524 rkey.name[rkey.namlen] = 0; 3525 3526 3527 if ((flag & JFS_OS2) == JFS_OS2) 3528 ciToUpper(&rkey); 3529 3530 /* compute prefix */ 3531 klen = 0; 3532 kname = key->name; 3533 namlen = min(lkey.namlen, rkey.namlen); 3534 for (pl = lkey.name, pr = rkey.name; 3535 namlen; pl++, pr++, namlen--, klen++, kname++) { 3536 *kname = *pr; 3537 if (*pl != *pr) { 3538 key->namlen = klen + 1; 3539 goto free_names; 3540 } 3541 } 3542 3543 /* l->namlen <= r->namlen since l <= r */ 3544 if (lkey.namlen < rkey.namlen) { 3545 *kname = *pr; 3546 key->namlen = klen + 1; 3547 } else /* l->namelen == r->namelen */ 3548 key->namlen = klen; 3549 3550 free_names: 3551 kfree(lkey.name); 3552 kfree(rkey.name); 3553 return 0; 3554 } 3555 3556 3557 3558 /* 3559 * dtGetKey() 3560 * 3561 * function: get key of the entry 3562 */ 3563 static void dtGetKey(dtpage_t * p, int i, /* entry index */ 3564 struct component_name * key, int flag) 3565 { 3566 int si; 3567 s8 *stbl; 3568 struct ldtentry *lh; 3569 struct idtentry *ih; 3570 struct dtslot *t; 3571 int namlen, len; 3572 wchar_t *kname; 3573 __le16 *name; 3574 3575 /* get entry */ 3576 stbl = DT_GETSTBL(p); 3577 si = stbl[i]; 3578 if (p->header.flag & BT_LEAF) { 3579 lh = (struct ldtentry *) & p->slot[si]; 3580 si = lh->next; 3581 namlen = lh->namlen; 3582 name = lh->name; 3583 if (flag & JFS_DIR_INDEX) 3584 len = min(namlen, DTLHDRDATALEN); 3585 else 3586 len = min(namlen, DTLHDRDATALEN_LEGACY); 3587 } else { 3588 ih = (struct idtentry *) & p->slot[si]; 3589 si = ih->next; 3590 namlen = ih->namlen; 3591 name = ih->name; 3592 len = min(namlen, DTIHDRDATALEN); 3593 } 3594 3595 key->namlen = namlen; 3596 kname = key->name; 3597 3598 /* 3599 * move head/only segment 3600 */ 3601 UniStrncpy_from_le(kname, name, len); 3602 3603 /* 3604 * move additional segment(s) 3605 */ 3606 while (si >= 0) { 3607 /* get next segment */ 3608 t = &p->slot[si]; 3609 kname += len; 3610 namlen -= len; 3611 len = min(namlen, DTSLOTDATALEN); 3612 UniStrncpy_from_le(kname, t->name, len); 3613 3614 si = t->next; 3615 } 3616 } 3617 3618 3619 /* 3620 * dtInsertEntry() 3621 * 3622 * function: allocate free slot(s) and 3623 * write a leaf/internal entry 3624 * 3625 * return: entry slot index 3626 */ 3627 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 3628 ddata_t * data, struct dt_lock ** dtlock) 3629 { 3630 struct dtslot *h, *t; 3631 struct ldtentry *lh = NULL; 3632 struct idtentry *ih = NULL; 3633 int hsi, fsi, klen, len, nextindex; 3634 wchar_t *kname; 3635 __le16 *name; 3636 s8 *stbl; 3637 pxd_t *xd; 3638 struct dt_lock *dtlck = *dtlock; 3639 struct lv *lv; 3640 int xsi, n; 3641 s64 bn = 0; 3642 struct metapage *mp = NULL; 3643 3644 klen = key->namlen; 3645 kname = key->name; 3646 3647 /* allocate a free slot */ 3648 hsi = fsi = p->header.freelist; 3649 h = &p->slot[fsi]; 3650 p->header.freelist = h->next; 3651 --p->header.freecnt; 3652 3653 /* open new linelock */ 3654 if (dtlck->index >= dtlck->maxcnt) 3655 dtlck = (struct dt_lock *) txLinelock(dtlck); 3656 3657 lv = & dtlck->lv[dtlck->index]; 3658 lv->offset = hsi; 3659 3660 /* write head/only segment */ 3661 if (p->header.flag & BT_LEAF) { 3662 lh = (struct ldtentry *) h; 3663 lh->next = h->next; 3664 lh->inumber = cpu_to_le32(data->leaf.ino); 3665 lh->namlen = klen; 3666 name = lh->name; 3667 if (data->leaf.ip) { 3668 len = min(klen, DTLHDRDATALEN); 3669 if (!(p->header.flag & BT_ROOT)) 3670 bn = addressPXD(&p->header.self); 3671 lh->index = cpu_to_le32(add_index(data->leaf.tid, 3672 data->leaf.ip, 3673 bn, index)); 3674 } else 3675 len = min(klen, DTLHDRDATALEN_LEGACY); 3676 } else { 3677 ih = (struct idtentry *) h; 3678 ih->next = h->next; 3679 xd = (pxd_t *) ih; 3680 *xd = data->xd; 3681 ih->namlen = klen; 3682 name = ih->name; 3683 len = min(klen, DTIHDRDATALEN); 3684 } 3685 3686 UniStrncpy_to_le(name, kname, len); 3687 3688 n = 1; 3689 xsi = hsi; 3690 3691 /* write additional segment(s) */ 3692 t = h; 3693 klen -= len; 3694 while (klen) { 3695 /* get free slot */ 3696 fsi = p->header.freelist; 3697 t = &p->slot[fsi]; 3698 p->header.freelist = t->next; 3699 --p->header.freecnt; 3700 3701 /* is next slot contiguous ? */ 3702 if (fsi != xsi + 1) { 3703 /* close current linelock */ 3704 lv->length = n; 3705 dtlck->index++; 3706 3707 /* open new linelock */ 3708 if (dtlck->index < dtlck->maxcnt) 3709 lv++; 3710 else { 3711 dtlck = (struct dt_lock *) txLinelock(dtlck); 3712 lv = & dtlck->lv[0]; 3713 } 3714 3715 lv->offset = fsi; 3716 n = 0; 3717 } 3718 3719 kname += len; 3720 len = min(klen, DTSLOTDATALEN); 3721 UniStrncpy_to_le(t->name, kname, len); 3722 3723 n++; 3724 xsi = fsi; 3725 klen -= len; 3726 } 3727 3728 /* close current linelock */ 3729 lv->length = n; 3730 dtlck->index++; 3731 3732 *dtlock = dtlck; 3733 3734 /* terminate last/only segment */ 3735 if (h == t) { 3736 /* single segment entry */ 3737 if (p->header.flag & BT_LEAF) 3738 lh->next = -1; 3739 else 3740 ih->next = -1; 3741 } else 3742 /* multi-segment entry */ 3743 t->next = -1; 3744 3745 /* if insert into middle, shift right succeeding entries in stbl */ 3746 stbl = DT_GETSTBL(p); 3747 nextindex = p->header.nextindex; 3748 if (index < nextindex) { 3749 memmove(stbl + index + 1, stbl + index, nextindex - index); 3750 3751 if ((p->header.flag & BT_LEAF) && data->leaf.ip) { 3752 s64 lblock; 3753 3754 /* 3755 * Need to update slot number for entries that moved 3756 * in the stbl 3757 */ 3758 mp = NULL; 3759 for (n = index + 1; n <= nextindex; n++) { 3760 lh = (struct ldtentry *) & (p->slot[stbl[n]]); 3761 modify_index(data->leaf.tid, data->leaf.ip, 3762 le32_to_cpu(lh->index), bn, n, 3763 &mp, &lblock); 3764 } 3765 if (mp) 3766 release_metapage(mp); 3767 } 3768 } 3769 3770 stbl[index] = hsi; 3771 3772 /* advance next available entry index of stbl */ 3773 ++p->header.nextindex; 3774 } 3775 3776 3777 /* 3778 * dtMoveEntry() 3779 * 3780 * function: move entries from split/left page to new/right page 3781 * 3782 * nextindex of dst page and freelist/freecnt of both pages 3783 * are updated. 3784 */ 3785 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 3786 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 3787 int do_index) 3788 { 3789 int ssi, next; /* src slot index */ 3790 int di; /* dst entry index */ 3791 int dsi; /* dst slot index */ 3792 s8 *sstbl, *dstbl; /* sorted entry table */ 3793 int snamlen, len; 3794 struct ldtentry *slh, *dlh = NULL; 3795 struct idtentry *sih, *dih = NULL; 3796 struct dtslot *h, *s, *d; 3797 struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock; 3798 struct lv *slv, *dlv; 3799 int xssi, ns, nd; 3800 int sfsi; 3801 3802 sstbl = (s8 *) & sp->slot[sp->header.stblindex]; 3803 dstbl = (s8 *) & dp->slot[dp->header.stblindex]; 3804 3805 dsi = dp->header.freelist; /* first (whole page) free slot */ 3806 sfsi = sp->header.freelist; 3807 3808 /* linelock destination entry slot */ 3809 dlv = & ddtlck->lv[ddtlck->index]; 3810 dlv->offset = dsi; 3811 3812 /* linelock source entry slot */ 3813 slv = & sdtlck->lv[sdtlck->index]; 3814 slv->offset = sstbl[si]; 3815 xssi = slv->offset - 1; 3816 3817 /* 3818 * move entries 3819 */ 3820 ns = nd = 0; 3821 for (di = 0; si < sp->header.nextindex; si++, di++) { 3822 ssi = sstbl[si]; 3823 dstbl[di] = dsi; 3824 3825 /* is next slot contiguous ? */ 3826 if (ssi != xssi + 1) { 3827 /* close current linelock */ 3828 slv->length = ns; 3829 sdtlck->index++; 3830 3831 /* open new linelock */ 3832 if (sdtlck->index < sdtlck->maxcnt) 3833 slv++; 3834 else { 3835 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 3836 slv = & sdtlck->lv[0]; 3837 } 3838 3839 slv->offset = ssi; 3840 ns = 0; 3841 } 3842 3843 /* 3844 * move head/only segment of an entry 3845 */ 3846 /* get dst slot */ 3847 h = d = &dp->slot[dsi]; 3848 3849 /* get src slot and move */ 3850 s = &sp->slot[ssi]; 3851 if (sp->header.flag & BT_LEAF) { 3852 /* get source entry */ 3853 slh = (struct ldtentry *) s; 3854 dlh = (struct ldtentry *) h; 3855 snamlen = slh->namlen; 3856 3857 if (do_index) { 3858 len = min(snamlen, DTLHDRDATALEN); 3859 dlh->index = slh->index; /* little-endian */ 3860 } else 3861 len = min(snamlen, DTLHDRDATALEN_LEGACY); 3862 3863 memcpy(dlh, slh, 6 + len * 2); 3864 3865 next = slh->next; 3866 3867 /* update dst head/only segment next field */ 3868 dsi++; 3869 dlh->next = dsi; 3870 } else { 3871 sih = (struct idtentry *) s; 3872 snamlen = sih->namlen; 3873 3874 len = min(snamlen, DTIHDRDATALEN); 3875 dih = (struct idtentry *) h; 3876 memcpy(dih, sih, 10 + len * 2); 3877 next = sih->next; 3878 3879 dsi++; 3880 dih->next = dsi; 3881 } 3882 3883 /* free src head/only segment */ 3884 s->next = sfsi; 3885 s->cnt = 1; 3886 sfsi = ssi; 3887 3888 ns++; 3889 nd++; 3890 xssi = ssi; 3891 3892 /* 3893 * move additional segment(s) of the entry 3894 */ 3895 snamlen -= len; 3896 while ((ssi = next) >= 0) { 3897 /* is next slot contiguous ? */ 3898 if (ssi != xssi + 1) { 3899 /* close current linelock */ 3900 slv->length = ns; 3901 sdtlck->index++; 3902 3903 /* open new linelock */ 3904 if (sdtlck->index < sdtlck->maxcnt) 3905 slv++; 3906 else { 3907 sdtlck = 3908 (struct dt_lock *) 3909 txLinelock(sdtlck); 3910 slv = & sdtlck->lv[0]; 3911 } 3912 3913 slv->offset = ssi; 3914 ns = 0; 3915 } 3916 3917 /* get next source segment */ 3918 s = &sp->slot[ssi]; 3919 3920 /* get next destination free slot */ 3921 d++; 3922 3923 len = min(snamlen, DTSLOTDATALEN); 3924 UniStrncpy_le(d->name, s->name, len); 3925 3926 ns++; 3927 nd++; 3928 xssi = ssi; 3929 3930 dsi++; 3931 d->next = dsi; 3932 3933 /* free source segment */ 3934 next = s->next; 3935 s->next = sfsi; 3936 s->cnt = 1; 3937 sfsi = ssi; 3938 3939 snamlen -= len; 3940 } /* end while */ 3941 3942 /* terminate dst last/only segment */ 3943 if (h == d) { 3944 /* single segment entry */ 3945 if (dp->header.flag & BT_LEAF) 3946 dlh->next = -1; 3947 else 3948 dih->next = -1; 3949 } else 3950 /* multi-segment entry */ 3951 d->next = -1; 3952 } /* end for */ 3953 3954 /* close current linelock */ 3955 slv->length = ns; 3956 sdtlck->index++; 3957 *sdtlock = sdtlck; 3958 3959 dlv->length = nd; 3960 ddtlck->index++; 3961 *ddtlock = ddtlck; 3962 3963 /* update source header */ 3964 sp->header.freelist = sfsi; 3965 sp->header.freecnt += nd; 3966 3967 /* update destination header */ 3968 dp->header.nextindex = di; 3969 3970 dp->header.freelist = dsi; 3971 dp->header.freecnt -= nd; 3972 } 3973 3974 3975 /* 3976 * dtDeleteEntry() 3977 * 3978 * function: free a (leaf/internal) entry 3979 * 3980 * log freelist header, stbl, and each segment slot of entry 3981 * (even though last/only segment next field is modified, 3982 * physical image logging requires all segment slots of 3983 * the entry logged to avoid applying previous updates 3984 * to the same slots) 3985 */ 3986 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock) 3987 { 3988 int fsi; /* free entry slot index */ 3989 s8 *stbl; 3990 struct dtslot *t; 3991 int si, freecnt; 3992 struct dt_lock *dtlck = *dtlock; 3993 struct lv *lv; 3994 int xsi, n; 3995 3996 /* get free entry slot index */ 3997 stbl = DT_GETSTBL(p); 3998 fsi = stbl[fi]; 3999 4000 /* open new linelock */ 4001 if (dtlck->index >= dtlck->maxcnt) 4002 dtlck = (struct dt_lock *) txLinelock(dtlck); 4003 lv = & dtlck->lv[dtlck->index]; 4004 4005 lv->offset = fsi; 4006 4007 /* get the head/only segment */ 4008 t = &p->slot[fsi]; 4009 if (p->header.flag & BT_LEAF) 4010 si = ((struct ldtentry *) t)->next; 4011 else 4012 si = ((struct idtentry *) t)->next; 4013 t->next = si; 4014 t->cnt = 1; 4015 4016 n = freecnt = 1; 4017 xsi = fsi; 4018 4019 /* find the last/only segment */ 4020 while (si >= 0) { 4021 /* is next slot contiguous ? */ 4022 if (si != xsi + 1) { 4023 /* close current linelock */ 4024 lv->length = n; 4025 dtlck->index++; 4026 4027 /* open new linelock */ 4028 if (dtlck->index < dtlck->maxcnt) 4029 lv++; 4030 else { 4031 dtlck = (struct dt_lock *) txLinelock(dtlck); 4032 lv = & dtlck->lv[0]; 4033 } 4034 4035 lv->offset = si; 4036 n = 0; 4037 } 4038 4039 n++; 4040 xsi = si; 4041 freecnt++; 4042 4043 t = &p->slot[si]; 4044 t->cnt = 1; 4045 si = t->next; 4046 } 4047 4048 /* close current linelock */ 4049 lv->length = n; 4050 dtlck->index++; 4051 4052 *dtlock = dtlck; 4053 4054 /* update freelist */ 4055 t->next = p->header.freelist; 4056 p->header.freelist = fsi; 4057 p->header.freecnt += freecnt; 4058 4059 /* if delete from middle, 4060 * shift left the succedding entries in the stbl 4061 */ 4062 si = p->header.nextindex; 4063 if (fi < si - 1) 4064 memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1); 4065 4066 p->header.nextindex--; 4067 } 4068 4069 4070 /* 4071 * dtTruncateEntry() 4072 * 4073 * function: truncate a (leaf/internal) entry 4074 * 4075 * log freelist header, stbl, and each segment slot of entry 4076 * (even though last/only segment next field is modified, 4077 * physical image logging requires all segment slots of 4078 * the entry logged to avoid applying previous updates 4079 * to the same slots) 4080 */ 4081 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock) 4082 { 4083 int tsi; /* truncate entry slot index */ 4084 s8 *stbl; 4085 struct dtslot *t; 4086 int si, freecnt; 4087 struct dt_lock *dtlck = *dtlock; 4088 struct lv *lv; 4089 int fsi, xsi, n; 4090 4091 /* get free entry slot index */ 4092 stbl = DT_GETSTBL(p); 4093 tsi = stbl[ti]; 4094 4095 /* open new linelock */ 4096 if (dtlck->index >= dtlck->maxcnt) 4097 dtlck = (struct dt_lock *) txLinelock(dtlck); 4098 lv = & dtlck->lv[dtlck->index]; 4099 4100 lv->offset = tsi; 4101 4102 /* get the head/only segment */ 4103 t = &p->slot[tsi]; 4104 ASSERT(p->header.flag & BT_INTERNAL); 4105 ((struct idtentry *) t)->namlen = 0; 4106 si = ((struct idtentry *) t)->next; 4107 ((struct idtentry *) t)->next = -1; 4108 4109 n = 1; 4110 freecnt = 0; 4111 fsi = si; 4112 xsi = tsi; 4113 4114 /* find the last/only segment */ 4115 while (si >= 0) { 4116 /* is next slot contiguous ? */ 4117 if (si != xsi + 1) { 4118 /* close current linelock */ 4119 lv->length = n; 4120 dtlck->index++; 4121 4122 /* open new linelock */ 4123 if (dtlck->index < dtlck->maxcnt) 4124 lv++; 4125 else { 4126 dtlck = (struct dt_lock *) txLinelock(dtlck); 4127 lv = & dtlck->lv[0]; 4128 } 4129 4130 lv->offset = si; 4131 n = 0; 4132 } 4133 4134 n++; 4135 xsi = si; 4136 freecnt++; 4137 4138 t = &p->slot[si]; 4139 t->cnt = 1; 4140 si = t->next; 4141 } 4142 4143 /* close current linelock */ 4144 lv->length = n; 4145 dtlck->index++; 4146 4147 *dtlock = dtlck; 4148 4149 /* update freelist */ 4150 if (freecnt == 0) 4151 return; 4152 t->next = p->header.freelist; 4153 p->header.freelist = fsi; 4154 p->header.freecnt += freecnt; 4155 } 4156 4157 4158 /* 4159 * dtLinelockFreelist() 4160 */ 4161 static void dtLinelockFreelist(dtpage_t * p, /* directory page */ 4162 int m, /* max slot index */ 4163 struct dt_lock ** dtlock) 4164 { 4165 int fsi; /* free entry slot index */ 4166 struct dtslot *t; 4167 int si; 4168 struct dt_lock *dtlck = *dtlock; 4169 struct lv *lv; 4170 int xsi, n; 4171 4172 /* get free entry slot index */ 4173 fsi = p->header.freelist; 4174 4175 /* open new linelock */ 4176 if (dtlck->index >= dtlck->maxcnt) 4177 dtlck = (struct dt_lock *) txLinelock(dtlck); 4178 lv = & dtlck->lv[dtlck->index]; 4179 4180 lv->offset = fsi; 4181 4182 n = 1; 4183 xsi = fsi; 4184 4185 t = &p->slot[fsi]; 4186 si = t->next; 4187 4188 /* find the last/only segment */ 4189 while (si < m && si >= 0) { 4190 /* is next slot contiguous ? */ 4191 if (si != xsi + 1) { 4192 /* close current linelock */ 4193 lv->length = n; 4194 dtlck->index++; 4195 4196 /* open new linelock */ 4197 if (dtlck->index < dtlck->maxcnt) 4198 lv++; 4199 else { 4200 dtlck = (struct dt_lock *) txLinelock(dtlck); 4201 lv = & dtlck->lv[0]; 4202 } 4203 4204 lv->offset = si; 4205 n = 0; 4206 } 4207 4208 n++; 4209 xsi = si; 4210 4211 t = &p->slot[si]; 4212 si = t->next; 4213 } 4214 4215 /* close current linelock */ 4216 lv->length = n; 4217 dtlck->index++; 4218 4219 *dtlock = dtlck; 4220 } 4221 4222 4223 /* 4224 * NAME: dtModify 4225 * 4226 * FUNCTION: Modify the inode number part of a directory entry 4227 * 4228 * PARAMETERS: 4229 * tid - Transaction id 4230 * ip - Inode of parent directory 4231 * key - Name of entry to be modified 4232 * orig_ino - Original inode number expected in entry 4233 * new_ino - New inode number to put into entry 4234 * flag - JFS_RENAME 4235 * 4236 * RETURNS: 4237 * -ESTALE - If entry found does not match orig_ino passed in 4238 * -ENOENT - If no entry can be found to match key 4239 * 0 - If successfully modified entry 4240 */ 4241 int dtModify(tid_t tid, struct inode *ip, 4242 struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag) 4243 { 4244 int rc; 4245 s64 bn; 4246 struct metapage *mp; 4247 dtpage_t *p; 4248 int index; 4249 struct btstack btstack; 4250 struct tlock *tlck; 4251 struct dt_lock *dtlck; 4252 struct lv *lv; 4253 s8 *stbl; 4254 int entry_si; /* entry slot index */ 4255 struct ldtentry *entry; 4256 4257 /* 4258 * search for the entry to modify: 4259 * 4260 * dtSearch() returns (leaf page pinned, index at which to modify). 4261 */ 4262 if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag))) 4263 return rc; 4264 4265 /* retrieve search result */ 4266 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4267 4268 BT_MARK_DIRTY(mp, ip); 4269 /* 4270 * acquire a transaction lock on the leaf page of named entry 4271 */ 4272 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 4273 dtlck = (struct dt_lock *) & tlck->lock; 4274 4275 /* get slot index of the entry */ 4276 stbl = DT_GETSTBL(p); 4277 entry_si = stbl[index]; 4278 4279 /* linelock entry */ 4280 ASSERT(dtlck->index == 0); 4281 lv = & dtlck->lv[0]; 4282 lv->offset = entry_si; 4283 lv->length = 1; 4284 dtlck->index++; 4285 4286 /* get the head/only segment */ 4287 entry = (struct ldtentry *) & p->slot[entry_si]; 4288 4289 /* substitute the inode number of the entry */ 4290 entry->inumber = cpu_to_le32(new_ino); 4291 4292 /* unpin the leaf page */ 4293 DT_PUTPAGE(mp); 4294 4295 return 0; 4296 } 4297 4298 bool check_dtroot(dtroot_t *p) 4299 { 4300 DECLARE_BITMAP(bitmap, DTROOTMAXSLOT) = {0}; 4301 int i; 4302 4303 /* freecnt cannot be negative or exceed DTROOTMAXSLOT-1 4304 * (since slot[0] is occupied by the header). 4305 */ 4306 if (unlikely(p->header.freecnt < 0 || 4307 p->header.freecnt > DTROOTMAXSLOT - 1)) { 4308 jfs_err("Bad freecnt:%d in dtroot\n", p->header.freecnt); 4309 return false; 4310 } else if (p->header.freecnt == 0) { 4311 /* No free slots: freelist must be -1 */ 4312 if (unlikely(p->header.freelist != -1)) { 4313 jfs_err("freecnt=0, but freelist=%d in dtroot\n", 4314 p->header.freelist); 4315 return false; 4316 } 4317 } else { 4318 int fsi, i; 4319 /* When there are free slots, freelist must be a valid slot index in 4320 * 1~DTROOTMAXSLOT-1(since slot[0] is occupied by the header). 4321 */ 4322 if (unlikely(p->header.freelist < 1 || 4323 p->header.freelist >= DTROOTMAXSLOT)) { 4324 jfs_err("Bad freelist:%d in dtroot\n", p->header.freelist); 4325 return false; 4326 } 4327 4328 /* Traverse the free list to check validity of all node indices */ 4329 fsi = p->header.freelist; 4330 for (i = 0; i < p->header.freecnt - 1; i++) { 4331 /* Check for duplicate indices in the free list */ 4332 if (unlikely(__test_and_set_bit(fsi, bitmap))) { 4333 jfs_err("duplicate index%d in slot in dtroot\n", fsi); 4334 return false; 4335 } 4336 fsi = p->slot[fsi].next; 4337 4338 /* Ensure the next slot index in the free list is valid */ 4339 if (unlikely(fsi < 1 || fsi >= DTROOTMAXSLOT)) { 4340 jfs_err("Bad index:%d in slot in dtroot\n", fsi); 4341 return false; 4342 } 4343 } 4344 4345 /* The last node in the free list must terminate with next = -1 */ 4346 if (unlikely(p->slot[fsi].next != -1)) { 4347 jfs_err("Bad next:%d of the last slot in dtroot\n", 4348 p->slot[fsi].next); 4349 return false; 4350 } 4351 } 4352 4353 /* Validate nextindex (next free entry index in stbl) 4354 * stbl array has size 8 (indices 0~7). 4355 * It may get set to 8 when the last free slot has been filled. 4356 */ 4357 if (unlikely(p->header.nextindex > ARRAY_SIZE(p->header.stbl))) { 4358 jfs_err("Bad nextindex:%d in dtroot\n", p->header.nextindex); 4359 return false; 4360 } 4361 4362 /* Validate index validity of stbl array (8 elements) 4363 * Each entry in stbl is a slot index, with valid range: -1 (invalid) 4364 * or 0~8 (slot[0]~slot[8]) 4365 */ 4366 for (i = 0; i < p->header.nextindex; i++) { 4367 int idx = p->header.stbl[i]; 4368 4369 if (unlikely(idx < 0 || idx >= 9)) { 4370 jfs_err("Bad index:%d of stbl[%d] in dtroot\n", idx, i); 4371 return false; /* stbl entry points out of slot array range */ 4372 } 4373 4374 /* Check for duplicate valid indices (skip check for idx=0) */ 4375 if (unlikely(idx && __test_and_set_bit(idx, bitmap))) { 4376 jfs_err("Duplicate index:%d in stbl in dtroot\n", idx); 4377 return false; 4378 } 4379 } 4380 4381 return true; 4382 } 4383 4384 bool check_dtpage(dtpage_t *p) 4385 { 4386 DECLARE_BITMAP(bitmap, DTPAGEMAXSLOT) = {0}; 4387 const int stblsize = ((PSIZE >> L2DTSLOTSIZE) + 31) >> L2DTSLOTSIZE; 4388 int i; 4389 4390 /* Validate maxslot (maximum number of slots in the page) 4391 * dtpage_t slot array is defined to hold up to DTPAGEMAXSLOT (128) slots 4392 */ 4393 if (unlikely(p->header.maxslot != DTPAGEMAXSLOT)) { 4394 jfs_err("Bad maxslot:%d in dtpage (expected %d)\n", 4395 p->header.maxslot, DTPAGEMAXSLOT); 4396 return false; 4397 } 4398 4399 /* freecnt cannot be negative or exceed DTPAGEMAXSLOT-1 4400 * (since slot[0] is occupied by the header). 4401 */ 4402 if (unlikely(p->header.freecnt < 0 || 4403 p->header.freecnt > DTPAGEMAXSLOT - 1)) { 4404 jfs_err("Bad freecnt:%d in dtpage\n", p->header.freecnt); 4405 return false; 4406 } else if (p->header.freecnt == 0) { 4407 /* No free slots: freelist must be -1 */ 4408 if (unlikely(p->header.freelist != -1)) { 4409 jfs_err("freecnt=0 but freelist=%d in dtpage\n", 4410 p->header.freelist); 4411 return false; 4412 } 4413 } else { 4414 int fsi; 4415 4416 if (unlikely(p->header.freelist < 1)) { 4417 jfs_err("Bad freelist:%d in dtpage\n", p->header.freelist); 4418 return false; 4419 } 4420 4421 /* Traverse the free list to check validity of all node indices */ 4422 fsi = p->header.freelist; 4423 for (i = 0; i < p->header.freecnt - 1; i++) { 4424 /* Check for duplicate indices in the free list */ 4425 if (unlikely(__test_and_set_bit(fsi, bitmap))) { 4426 jfs_err("duplicate index%d in slot in dtpage\n", fsi); 4427 return false; 4428 } 4429 fsi = p->slot[fsi].next; 4430 4431 /* Ensure the next slot index in the free list is valid */ 4432 if (unlikely(fsi < 1 || fsi >= DTPAGEMAXSLOT)) { 4433 jfs_err("Bad index:%d in slot in dtpage\n", fsi); 4434 return false; 4435 } 4436 } 4437 4438 /* The last node in the free list must terminate with next = -1 */ 4439 if (unlikely(p->slot[fsi].next != -1)) { 4440 jfs_err("Bad next:%d of the last slot in dtpage\n", 4441 p->slot[fsi].next); 4442 return false; 4443 } 4444 } 4445 4446 /* stbl must be little then DTPAGEMAXSLOT */ 4447 if (unlikely(p->header.stblindex >= DTPAGEMAXSLOT - stblsize)) { 4448 jfs_err("Bad stblindex:%d in dtpage (stbl size %d)\n", 4449 p->header.stblindex, stblsize); 4450 return false; 4451 } 4452 4453 /* nextindex must be little then stblsize*32 */ 4454 if (unlikely(p->header.nextindex > (stblsize << L2DTSLOTSIZE))) { 4455 jfs_err("Bad nextindex:%d in dtpage (stbl size %d)\n", 4456 p->header.nextindex, stblsize); 4457 return false; 4458 } 4459 4460 /* Validate stbl entries 4461 * Each entry is a slot index, valid range: -1 (invalid) or 4462 * [0, nextindex-1] (valid data slots) 4463 * (stblindex and higher slots are reserved for stbl itself) 4464 */ 4465 for (i = 0; i < p->header.nextindex; i++) { 4466 int idx = DT_GETSTBL(p)[i]; 4467 4468 /* Check if index is out of valid data slot range */ 4469 if (unlikely(idx < 1 || idx >= DTPAGEMAXSLOT)) { 4470 jfs_err("Bad stbl[%d] index:%d (stblindex %d) in dtpage\n", 4471 i, idx, p->header.stblindex); 4472 return false; 4473 } 4474 4475 /* Check for duplicate valid indices (skip -1) */ 4476 if (unlikely(__test_and_set_bit(idx, bitmap))) { 4477 jfs_err("Duplicate index:%d in stbl of dtpage\n", idx); 4478 return false; 4479 } 4480 } 4481 4482 return true; 4483 } 4484