1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) International Business Machines Corp., 2000-2004 4 */ 5 6 /* 7 * jfs_dtree.c: directory B+-tree manager 8 * 9 * B+-tree with variable length key directory: 10 * 11 * each directory page is structured as an array of 32-byte 12 * directory entry slots initialized as a freelist 13 * to avoid search/compaction of free space at insertion. 14 * when an entry is inserted, a number of slots are allocated 15 * from the freelist as required to store variable length data 16 * of the entry; when the entry is deleted, slots of the entry 17 * are returned to freelist. 18 * 19 * leaf entry stores full name as key and file serial number 20 * (aka inode number) as data. 21 * internal/router entry stores sufffix compressed name 22 * as key and simple extent descriptor as data. 23 * 24 * each directory page maintains a sorted entry index table 25 * which stores the start slot index of sorted entries 26 * to allow binary search on the table. 27 * 28 * directory starts as a root/leaf page in on-disk inode 29 * inline data area. 30 * when it becomes full, it starts a leaf of a external extent 31 * of length of 1 block. each time the first leaf becomes full, 32 * it is extended rather than split (its size is doubled), 33 * until its length becoms 4 KBytes, from then the extent is split 34 * with new 4 Kbyte extent when it becomes full 35 * to reduce external fragmentation of small directories. 36 * 37 * blah, blah, blah, for linear scan of directory in pieces by 38 * readdir(). 39 * 40 * 41 * case-insensitive directory file system 42 * 43 * names are stored in case-sensitive way in leaf entry. 44 * but stored, searched and compared in case-insensitive (uppercase) order 45 * (i.e., both search key and entry key are folded for search/compare): 46 * (note that case-sensitive order is BROKEN in storage, e.g., 47 * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad 48 * 49 * entries which folds to the same key makes up a equivalent class 50 * whose members are stored as contiguous cluster (may cross page boundary) 51 * but whose order is arbitrary and acts as duplicate, e.g., 52 * abc, Abc, aBc, abC) 53 * 54 * once match is found at leaf, requires scan forward/backward 55 * either for, in case-insensitive search, duplicate 56 * or for, in case-sensitive search, for exact match 57 * 58 * router entry must be created/stored in case-insensitive way 59 * in internal entry: 60 * (right most key of left page and left most key of right page 61 * are folded, and its suffix compression is propagated as router 62 * key in parent) 63 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB> 64 * should be made the router key for the split) 65 * 66 * case-insensitive search: 67 * 68 * fold search key; 69 * 70 * case-insensitive search of B-tree: 71 * for internal entry, router key is already folded; 72 * for leaf entry, fold the entry key before comparison. 73 * 74 * if (leaf entry case-insensitive match found) 75 * if (next entry satisfies case-insensitive match) 76 * return EDUPLICATE; 77 * if (prev entry satisfies case-insensitive match) 78 * return EDUPLICATE; 79 * return match; 80 * else 81 * return no match; 82 * 83 * serialization: 84 * target directory inode lock is being held on entry/exit 85 * of all main directory service routines. 86 * 87 * log based recovery: 88 */ 89 90 #include <linux/fs.h> 91 #include <linux/quotaops.h> 92 #include <linux/slab.h> 93 #include "jfs_incore.h" 94 #include "jfs_superblock.h" 95 #include "jfs_filsys.h" 96 #include "jfs_metapage.h" 97 #include "jfs_dmap.h" 98 #include "jfs_unicode.h" 99 #include "jfs_debug.h" 100 101 /* dtree split parameter */ 102 struct dtsplit { 103 struct metapage *mp; 104 s16 index; 105 s16 nslot; 106 struct component_name *key; 107 ddata_t *data; 108 struct pxdlist *pxdlist; 109 }; 110 111 #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) 112 113 /* get page buffer for specified block address */ 114 #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ 115 do { \ 116 BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot); \ 117 if (!(RC)) { \ 118 if (((P)->header.nextindex > \ 119 (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \ 120 ((BN) && (((P)->header.maxslot > DTPAGEMAXSLOT) || \ 121 ((P)->header.stblindex >= DTPAGEMAXSLOT)))) { \ 122 BT_PUTPAGE(MP); \ 123 jfs_error((IP)->i_sb, \ 124 "DT_GETPAGE: dtree page corrupt\n"); \ 125 MP = NULL; \ 126 RC = -EIO; \ 127 } \ 128 } \ 129 } while (0) 130 131 /* for consistency */ 132 #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) 133 134 #define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 135 BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot) 136 137 /* 138 * forward references 139 */ 140 static int dtSplitUp(tid_t tid, struct inode *ip, 141 struct dtsplit * split, struct btstack * btstack); 142 143 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 144 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp); 145 146 static int dtExtendPage(tid_t tid, struct inode *ip, 147 struct dtsplit * split, struct btstack * btstack); 148 149 static int dtSplitRoot(tid_t tid, struct inode *ip, 150 struct dtsplit * split, struct metapage ** rmpp); 151 152 static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 153 dtpage_t * fp, struct btstack * btstack); 154 155 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p); 156 157 static int dtReadFirst(struct inode *ip, struct btstack * btstack); 158 159 static int dtReadNext(struct inode *ip, 160 loff_t * offset, struct btstack * btstack); 161 162 static int dtCompare(struct component_name * key, dtpage_t * p, int si); 163 164 static int ciCompare(struct component_name * key, dtpage_t * p, int si, 165 int flag); 166 167 static void dtGetKey(dtpage_t * p, int i, struct component_name * key, 168 int flag); 169 170 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 171 int ri, struct component_name * key, int flag); 172 173 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 174 ddata_t * data, struct dt_lock **); 175 176 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 177 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 178 int do_index); 179 180 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock); 181 182 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock); 183 184 static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock); 185 186 #define ciToUpper(c) UniStrupr((c)->name) 187 188 /* 189 * read_index_page() 190 * 191 * Reads a page of a directory's index table. 192 * Having metadata mapped into the directory inode's address space 193 * presents a multitude of problems. We avoid this by mapping to 194 * the absolute address space outside of the *_metapage routines 195 */ 196 static struct metapage *read_index_page(struct inode *inode, s64 blkno) 197 { 198 int rc; 199 s64 xaddr; 200 int xflag; 201 s32 xlen; 202 203 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 204 if (rc || (xaddr == 0)) 205 return NULL; 206 207 return read_metapage(inode, xaddr, PSIZE, 1); 208 } 209 210 /* 211 * get_index_page() 212 * 213 * Same as get_index_page(), but get's a new page without reading 214 */ 215 static struct metapage *get_index_page(struct inode *inode, s64 blkno) 216 { 217 int rc; 218 s64 xaddr; 219 int xflag; 220 s32 xlen; 221 222 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 223 if (rc || (xaddr == 0)) 224 return NULL; 225 226 return get_metapage(inode, xaddr, PSIZE, 1); 227 } 228 229 /* 230 * find_index() 231 * 232 * Returns dtree page containing directory table entry for specified 233 * index and pointer to its entry. 234 * 235 * mp must be released by caller. 236 */ 237 static struct dir_table_slot *find_index(struct inode *ip, u32 index, 238 struct metapage ** mp, s64 *lblock) 239 { 240 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 241 s64 blkno; 242 s64 offset; 243 int page_offset; 244 struct dir_table_slot *slot; 245 static int maxWarnings = 10; 246 247 if (index < 2) { 248 if (maxWarnings) { 249 jfs_warn("find_entry called with index = %d", index); 250 maxWarnings--; 251 } 252 return NULL; 253 } 254 255 if (index >= jfs_ip->next_index) { 256 jfs_warn("find_entry called with index >= next_index"); 257 return NULL; 258 } 259 260 if (jfs_dirtable_inline(ip)) { 261 /* 262 * Inline directory table 263 */ 264 *mp = NULL; 265 slot = &jfs_ip->i_dirtable[index - 2]; 266 } else { 267 offset = (index - 2) * sizeof(struct dir_table_slot); 268 page_offset = offset & (PSIZE - 1); 269 blkno = ((offset + 1) >> L2PSIZE) << 270 JFS_SBI(ip->i_sb)->l2nbperpage; 271 272 if (*mp && (*lblock != blkno)) { 273 release_metapage(*mp); 274 *mp = NULL; 275 } 276 if (!(*mp)) { 277 *lblock = blkno; 278 *mp = read_index_page(ip, blkno); 279 } 280 if (!(*mp)) { 281 jfs_err("free_index: error reading directory table"); 282 return NULL; 283 } 284 285 slot = 286 (struct dir_table_slot *) ((char *) (*mp)->data + 287 page_offset); 288 } 289 return slot; 290 } 291 292 static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, 293 u32 index) 294 { 295 struct tlock *tlck; 296 struct linelock *llck; 297 struct lv *lv; 298 299 tlck = txLock(tid, ip, mp, tlckDATA); 300 llck = (struct linelock *) tlck->lock; 301 302 if (llck->index >= llck->maxcnt) 303 llck = txLinelock(llck); 304 lv = &llck->lv[llck->index]; 305 306 /* 307 * Linelock slot size is twice the size of directory table 308 * slot size. 512 entries per page. 309 */ 310 lv->offset = ((index - 2) & 511) >> 1; 311 lv->length = 1; 312 llck->index++; 313 } 314 315 /* 316 * add_index() 317 * 318 * Adds an entry to the directory index table. This is used to provide 319 * each directory entry with a persistent index in which to resume 320 * directory traversals 321 */ 322 static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) 323 { 324 struct super_block *sb = ip->i_sb; 325 struct jfs_sb_info *sbi = JFS_SBI(sb); 326 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 327 u64 blkno; 328 struct dir_table_slot *dirtab_slot; 329 u32 index; 330 struct linelock *llck; 331 struct lv *lv; 332 struct metapage *mp; 333 s64 offset; 334 uint page_offset; 335 struct tlock *tlck; 336 s64 xaddr; 337 338 ASSERT(DO_INDEX(ip)); 339 340 if (jfs_ip->next_index < 2) { 341 jfs_warn("add_index: next_index = %d. Resetting!", 342 jfs_ip->next_index); 343 jfs_ip->next_index = 2; 344 } 345 346 index = jfs_ip->next_index++; 347 348 if (index <= MAX_INLINE_DIRTABLE_ENTRY) { 349 /* 350 * i_size reflects size of index table, or 8 bytes per entry. 351 */ 352 ip->i_size = (loff_t) (index - 1) << 3; 353 354 /* 355 * dir table fits inline within inode 356 */ 357 dirtab_slot = &jfs_ip->i_dirtable[index-2]; 358 dirtab_slot->flag = DIR_INDEX_VALID; 359 dirtab_slot->slot = slot; 360 DTSaddress(dirtab_slot, bn); 361 362 set_cflag(COMMIT_Dirtable, ip); 363 364 return index; 365 } 366 if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) { 367 struct dir_table_slot temp_table[12]; 368 369 /* 370 * It's time to move the inline table to an external 371 * page and begin to build the xtree 372 */ 373 if (dquot_alloc_block(ip, sbi->nbperpage)) 374 goto clean_up; 375 if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { 376 dquot_free_block(ip, sbi->nbperpage); 377 goto clean_up; 378 } 379 380 /* 381 * Save the table, we're going to overwrite it with the 382 * xtree root 383 */ 384 memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table)); 385 386 /* 387 * Initialize empty x-tree 388 */ 389 xtInitRoot(tid, ip); 390 391 /* 392 * Add the first block to the xtree 393 */ 394 if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) { 395 /* This really shouldn't fail */ 396 jfs_warn("add_index: xtInsert failed!"); 397 memcpy(&jfs_ip->i_dirtable, temp_table, 398 sizeof (temp_table)); 399 dbFree(ip, xaddr, sbi->nbperpage); 400 dquot_free_block(ip, sbi->nbperpage); 401 goto clean_up; 402 } 403 ip->i_size = PSIZE; 404 405 mp = get_index_page(ip, 0); 406 if (!mp) { 407 jfs_err("add_index: get_metapage failed!"); 408 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 409 memcpy(&jfs_ip->i_dirtable, temp_table, 410 sizeof (temp_table)); 411 goto clean_up; 412 } 413 tlck = txLock(tid, ip, mp, tlckDATA); 414 llck = (struct linelock *) & tlck->lock; 415 ASSERT(llck->index == 0); 416 lv = &llck->lv[0]; 417 418 lv->offset = 0; 419 lv->length = 6; /* tlckDATA slot size is 16 bytes */ 420 llck->index++; 421 422 memcpy(mp->data, temp_table, sizeof(temp_table)); 423 424 mark_metapage_dirty(mp); 425 release_metapage(mp); 426 427 /* 428 * Logging is now directed by xtree tlocks 429 */ 430 clear_cflag(COMMIT_Dirtable, ip); 431 } 432 433 offset = (index - 2) * sizeof(struct dir_table_slot); 434 page_offset = offset & (PSIZE - 1); 435 blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage; 436 if (page_offset == 0) { 437 /* 438 * This will be the beginning of a new page 439 */ 440 xaddr = 0; 441 if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) { 442 jfs_warn("add_index: xtInsert failed!"); 443 goto clean_up; 444 } 445 ip->i_size += PSIZE; 446 447 if ((mp = get_index_page(ip, blkno))) 448 memset(mp->data, 0, PSIZE); /* Just looks better */ 449 else 450 xtTruncate(tid, ip, offset, COMMIT_PWMAP); 451 } else 452 mp = read_index_page(ip, blkno); 453 454 if (!mp) { 455 jfs_err("add_index: get/read_metapage failed!"); 456 goto clean_up; 457 } 458 459 lock_index(tid, ip, mp, index); 460 461 dirtab_slot = 462 (struct dir_table_slot *) ((char *) mp->data + page_offset); 463 dirtab_slot->flag = DIR_INDEX_VALID; 464 dirtab_slot->slot = slot; 465 DTSaddress(dirtab_slot, bn); 466 467 mark_metapage_dirty(mp); 468 release_metapage(mp); 469 470 return index; 471 472 clean_up: 473 474 jfs_ip->next_index--; 475 476 return 0; 477 } 478 479 /* 480 * free_index() 481 * 482 * Marks an entry to the directory index table as free. 483 */ 484 static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) 485 { 486 struct dir_table_slot *dirtab_slot; 487 s64 lblock; 488 struct metapage *mp = NULL; 489 490 dirtab_slot = find_index(ip, index, &mp, &lblock); 491 492 if (!dirtab_slot) 493 return; 494 495 dirtab_slot->flag = DIR_INDEX_FREE; 496 dirtab_slot->slot = dirtab_slot->addr1 = 0; 497 dirtab_slot->addr2 = cpu_to_le32(next); 498 499 if (mp) { 500 lock_index(tid, ip, mp, index); 501 mark_metapage_dirty(mp); 502 release_metapage(mp); 503 } else 504 set_cflag(COMMIT_Dirtable, ip); 505 } 506 507 /* 508 * modify_index() 509 * 510 * Changes an entry in the directory index table 511 */ 512 static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, 513 int slot, struct metapage ** mp, s64 *lblock) 514 { 515 struct dir_table_slot *dirtab_slot; 516 517 dirtab_slot = find_index(ip, index, mp, lblock); 518 519 if (!dirtab_slot) 520 return; 521 522 DTSaddress(dirtab_slot, bn); 523 dirtab_slot->slot = slot; 524 525 if (*mp) { 526 lock_index(tid, ip, *mp, index); 527 mark_metapage_dirty(*mp); 528 } else 529 set_cflag(COMMIT_Dirtable, ip); 530 } 531 532 /* 533 * read_index() 534 * 535 * reads a directory table slot 536 */ 537 static int read_index(struct inode *ip, u32 index, 538 struct dir_table_slot * dirtab_slot) 539 { 540 s64 lblock; 541 struct metapage *mp = NULL; 542 struct dir_table_slot *slot; 543 544 slot = find_index(ip, index, &mp, &lblock); 545 if (!slot) { 546 return -EIO; 547 } 548 549 memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot)); 550 551 if (mp) 552 release_metapage(mp); 553 554 return 0; 555 } 556 557 /* 558 * dtSearch() 559 * 560 * function: 561 * Search for the entry with specified key 562 * 563 * parameter: 564 * 565 * return: 0 - search result on stack, leaf page pinned; 566 * errno - I/O error 567 */ 568 int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, 569 struct btstack * btstack, int flag) 570 { 571 int rc = 0; 572 int cmp = 1; /* init for empty page */ 573 s64 bn; 574 struct metapage *mp; 575 dtpage_t *p; 576 s8 *stbl; 577 int base, index, lim; 578 struct btframe *btsp; 579 pxd_t *pxd; 580 int psize = 288; /* initial in-line directory */ 581 ino_t inumber; 582 struct component_name ciKey; 583 struct super_block *sb = ip->i_sb; 584 585 ciKey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 586 GFP_NOFS); 587 if (!ciKey.name) { 588 rc = -ENOMEM; 589 goto dtSearch_Exit2; 590 } 591 592 593 /* uppercase search key for c-i directory */ 594 UniStrcpy(ciKey.name, key->name); 595 ciKey.namlen = key->namlen; 596 597 /* only uppercase if case-insensitive support is on */ 598 if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) { 599 ciToUpper(&ciKey); 600 } 601 BT_CLR(btstack); /* reset stack */ 602 603 /* init level count for max pages to split */ 604 btstack->nsplit = 1; 605 606 /* 607 * search down tree from root: 608 * 609 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 610 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 611 * 612 * if entry with search key K is not found 613 * internal page search find the entry with largest key Ki 614 * less than K which point to the child page to search; 615 * leaf page search find the entry with smallest key Kj 616 * greater than K so that the returned index is the position of 617 * the entry to be shifted right for insertion of new entry. 618 * for empty tree, search key is greater than any key of the tree. 619 * 620 * by convention, root bn = 0. 621 */ 622 for (bn = 0;;) { 623 /* get/pin the page to search */ 624 DT_GETPAGE(ip, bn, mp, psize, p, rc); 625 if (rc) 626 goto dtSearch_Exit1; 627 628 /* get sorted entry table of the page */ 629 stbl = DT_GETSTBL(p); 630 631 /* 632 * binary search with search key K on the current page. 633 */ 634 for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { 635 index = base + (lim >> 1); 636 637 if (stbl[index] < 0) { 638 rc = -EIO; 639 goto out; 640 } 641 642 if (p->header.flag & BT_LEAF) { 643 /* uppercase leaf name to compare */ 644 cmp = 645 ciCompare(&ciKey, p, stbl[index], 646 JFS_SBI(sb)->mntflag); 647 } else { 648 /* router key is in uppercase */ 649 650 cmp = dtCompare(&ciKey, p, stbl[index]); 651 652 653 } 654 if (cmp == 0) { 655 /* 656 * search hit 657 */ 658 /* search hit - leaf page: 659 * return the entry found 660 */ 661 if (p->header.flag & BT_LEAF) { 662 inumber = le32_to_cpu( 663 ((struct ldtentry *) & p->slot[stbl[index]])->inumber); 664 665 /* 666 * search for JFS_LOOKUP 667 */ 668 if (flag == JFS_LOOKUP) { 669 *data = inumber; 670 rc = 0; 671 goto out; 672 } 673 674 /* 675 * search for JFS_CREATE 676 */ 677 if (flag == JFS_CREATE) { 678 *data = inumber; 679 rc = -EEXIST; 680 goto out; 681 } 682 683 /* 684 * search for JFS_REMOVE or JFS_RENAME 685 */ 686 if ((flag == JFS_REMOVE || 687 flag == JFS_RENAME) && 688 *data != inumber) { 689 rc = -ESTALE; 690 goto out; 691 } 692 693 /* 694 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME 695 */ 696 /* save search result */ 697 *data = inumber; 698 btsp = btstack->top; 699 btsp->bn = bn; 700 btsp->index = index; 701 btsp->mp = mp; 702 703 rc = 0; 704 goto dtSearch_Exit1; 705 } 706 707 /* search hit - internal page: 708 * descend/search its child page 709 */ 710 goto getChild; 711 } 712 713 if (cmp > 0) { 714 base = index + 1; 715 --lim; 716 } 717 } 718 719 /* 720 * search miss 721 * 722 * base is the smallest index with key (Kj) greater than 723 * search key (K) and may be zero or (maxindex + 1) index. 724 */ 725 /* 726 * search miss - leaf page 727 * 728 * return location of entry (base) where new entry with 729 * search key K is to be inserted. 730 */ 731 if (p->header.flag & BT_LEAF) { 732 /* 733 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME 734 */ 735 if (flag == JFS_LOOKUP || flag == JFS_REMOVE || 736 flag == JFS_RENAME) { 737 rc = -ENOENT; 738 goto out; 739 } 740 741 /* 742 * search for JFS_CREATE|JFS_FINDDIR: 743 * 744 * save search result 745 */ 746 *data = 0; 747 btsp = btstack->top; 748 btsp->bn = bn; 749 btsp->index = base; 750 btsp->mp = mp; 751 752 rc = 0; 753 goto dtSearch_Exit1; 754 } 755 756 /* 757 * search miss - internal page 758 * 759 * if base is non-zero, decrement base by one to get the parent 760 * entry of the child page to search. 761 */ 762 index = base ? base - 1 : base; 763 764 /* 765 * go down to child page 766 */ 767 getChild: 768 /* update max. number of pages to split */ 769 if (BT_STACK_FULL(btstack)) { 770 /* Something's corrupted, mark filesystem dirty so 771 * chkdsk will fix it. 772 */ 773 jfs_error(sb, "stack overrun!\n"); 774 BT_STACK_DUMP(btstack); 775 rc = -EIO; 776 goto out; 777 } 778 btstack->nsplit++; 779 780 /* push (bn, index) of the parent page/entry */ 781 BT_PUSH(btstack, bn, index); 782 783 /* get the child page block number */ 784 pxd = (pxd_t *) & p->slot[stbl[index]]; 785 bn = addressPXD(pxd); 786 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 787 788 /* unpin the parent page */ 789 DT_PUTPAGE(mp); 790 } 791 792 out: 793 DT_PUTPAGE(mp); 794 795 dtSearch_Exit1: 796 797 kfree(ciKey.name); 798 799 dtSearch_Exit2: 800 801 return rc; 802 } 803 804 805 /* 806 * dtInsert() 807 * 808 * function: insert an entry to directory tree 809 * 810 * parameter: 811 * 812 * return: 0 - success; 813 * errno - failure; 814 */ 815 int dtInsert(tid_t tid, struct inode *ip, 816 struct component_name * name, ino_t * fsn, struct btstack * btstack) 817 { 818 int rc = 0; 819 struct metapage *mp; /* meta-page buffer */ 820 dtpage_t *p; /* base B+-tree index page */ 821 s64 bn; 822 int index; 823 struct dtsplit split; /* split information */ 824 ddata_t data; 825 struct dt_lock *dtlck; 826 int n; 827 struct tlock *tlck; 828 struct lv *lv; 829 830 /* 831 * retrieve search result 832 * 833 * dtSearch() returns (leaf page pinned, index at which to insert). 834 * n.b. dtSearch() may return index of (maxindex + 1) of 835 * the full page. 836 */ 837 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 838 if (p->header.freelist == 0) 839 return -EINVAL; 840 841 /* 842 * insert entry for new key 843 */ 844 if (DO_INDEX(ip)) { 845 if (JFS_IP(ip)->next_index == DIREND) { 846 DT_PUTPAGE(mp); 847 return -EMLINK; 848 } 849 n = NDTLEAF(name->namlen); 850 data.leaf.tid = tid; 851 data.leaf.ip = ip; 852 } else { 853 n = NDTLEAF_LEGACY(name->namlen); 854 data.leaf.ip = NULL; /* signifies legacy directory format */ 855 } 856 data.leaf.ino = *fsn; 857 858 /* 859 * leaf page does not have enough room for new entry: 860 * 861 * extend/split the leaf page; 862 * 863 * dtSplitUp() will insert the entry and unpin the leaf page. 864 */ 865 if (n > p->header.freecnt) { 866 split.mp = mp; 867 split.index = index; 868 split.nslot = n; 869 split.key = name; 870 split.data = &data; 871 rc = dtSplitUp(tid, ip, &split, btstack); 872 return rc; 873 } 874 875 /* 876 * leaf page does have enough room for new entry: 877 * 878 * insert the new data entry into the leaf page; 879 */ 880 BT_MARK_DIRTY(mp, ip); 881 /* 882 * acquire a transaction lock on the leaf page 883 */ 884 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 885 dtlck = (struct dt_lock *) & tlck->lock; 886 ASSERT(dtlck->index == 0); 887 lv = & dtlck->lv[0]; 888 889 /* linelock header */ 890 lv->offset = 0; 891 lv->length = 1; 892 dtlck->index++; 893 894 dtInsertEntry(p, index, name, &data, &dtlck); 895 896 /* linelock stbl of non-root leaf page */ 897 if (!(p->header.flag & BT_ROOT)) { 898 if (dtlck->index >= dtlck->maxcnt) 899 dtlck = (struct dt_lock *) txLinelock(dtlck); 900 lv = & dtlck->lv[dtlck->index]; 901 n = index >> L2DTSLOTSIZE; 902 lv->offset = p->header.stblindex + n; 903 lv->length = 904 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 905 dtlck->index++; 906 } 907 908 /* unpin the leaf page */ 909 DT_PUTPAGE(mp); 910 911 return 0; 912 } 913 914 915 /* 916 * dtSplitUp() 917 * 918 * function: propagate insertion bottom up; 919 * 920 * parameter: 921 * 922 * return: 0 - success; 923 * errno - failure; 924 * leaf page unpinned; 925 */ 926 static int dtSplitUp(tid_t tid, 927 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 928 { 929 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 930 int rc = 0; 931 struct metapage *smp; 932 dtpage_t *sp; /* split page */ 933 struct metapage *rmp; 934 dtpage_t *rp; /* new right page split from sp */ 935 pxd_t rpxd; /* new right page extent descriptor */ 936 struct metapage *lmp; 937 dtpage_t *lp; /* left child page */ 938 int skip; /* index of entry of insertion */ 939 struct btframe *parent; /* parent page entry on traverse stack */ 940 s64 xaddr, nxaddr; 941 int xlen, xsize; 942 struct pxdlist pxdlist; 943 pxd_t *pxd; 944 struct component_name key = { 0, NULL }; 945 ddata_t *data = split->data; 946 int n; 947 struct dt_lock *dtlck; 948 struct tlock *tlck; 949 struct lv *lv; 950 int quota_allocation = 0; 951 952 /* get split page */ 953 smp = split->mp; 954 sp = DT_PAGE(ip, smp); 955 956 key.name = kmalloc_array(JFS_NAME_MAX + 2, sizeof(wchar_t), GFP_NOFS); 957 if (!key.name) { 958 DT_PUTPAGE(smp); 959 rc = -ENOMEM; 960 goto dtSplitUp_Exit; 961 } 962 963 /* 964 * split leaf page 965 * 966 * The split routines insert the new entry, and 967 * acquire txLock as appropriate. 968 */ 969 /* 970 * split root leaf page: 971 */ 972 if (sp->header.flag & BT_ROOT) { 973 /* 974 * allocate a single extent child page 975 */ 976 xlen = 1; 977 n = sbi->bsize >> L2DTSLOTSIZE; 978 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 979 n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */ 980 if (n <= split->nslot) 981 xlen++; 982 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) { 983 DT_PUTPAGE(smp); 984 goto freeKeyName; 985 } 986 987 pxdlist.maxnpxd = 1; 988 pxdlist.npxd = 0; 989 pxd = &pxdlist.pxd[0]; 990 PXDaddress(pxd, xaddr); 991 PXDlength(pxd, xlen); 992 split->pxdlist = &pxdlist; 993 rc = dtSplitRoot(tid, ip, split, &rmp); 994 995 if (rc) 996 dbFree(ip, xaddr, xlen); 997 else 998 DT_PUTPAGE(rmp); 999 1000 DT_PUTPAGE(smp); 1001 1002 if (!DO_INDEX(ip)) 1003 ip->i_size = xlen << sbi->l2bsize; 1004 1005 goto freeKeyName; 1006 } 1007 1008 /* 1009 * extend first leaf page 1010 * 1011 * extend the 1st extent if less than buffer page size 1012 * (dtExtendPage() reurns leaf page unpinned) 1013 */ 1014 pxd = &sp->header.self; 1015 xlen = lengthPXD(pxd); 1016 xsize = xlen << sbi->l2bsize; 1017 if (xsize < PSIZE) { 1018 xaddr = addressPXD(pxd); 1019 n = xsize >> L2DTSLOTSIZE; 1020 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 1021 if ((n + sp->header.freecnt) <= split->nslot) 1022 n = xlen + (xlen << 1); 1023 else 1024 n = xlen; 1025 1026 /* Allocate blocks to quota. */ 1027 rc = dquot_alloc_block(ip, n); 1028 if (rc) 1029 goto extendOut; 1030 quota_allocation += n; 1031 1032 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, 1033 (s64) n, &nxaddr))) 1034 goto extendOut; 1035 1036 pxdlist.maxnpxd = 1; 1037 pxdlist.npxd = 0; 1038 pxd = &pxdlist.pxd[0]; 1039 PXDaddress(pxd, nxaddr); 1040 PXDlength(pxd, xlen + n); 1041 split->pxdlist = &pxdlist; 1042 if ((rc = dtExtendPage(tid, ip, split, btstack))) { 1043 nxaddr = addressPXD(pxd); 1044 if (xaddr != nxaddr) { 1045 /* free relocated extent */ 1046 xlen = lengthPXD(pxd); 1047 dbFree(ip, nxaddr, (s64) xlen); 1048 } else { 1049 /* free extended delta */ 1050 xlen = lengthPXD(pxd) - n; 1051 xaddr = addressPXD(pxd) + xlen; 1052 dbFree(ip, xaddr, (s64) n); 1053 } 1054 } else if (!DO_INDEX(ip)) 1055 ip->i_size = lengthPXD(pxd) << sbi->l2bsize; 1056 1057 1058 extendOut: 1059 DT_PUTPAGE(smp); 1060 goto freeKeyName; 1061 } 1062 1063 /* 1064 * split leaf page <sp> into <sp> and a new right page <rp>. 1065 * 1066 * return <rp> pinned and its extent descriptor <rpxd> 1067 */ 1068 /* 1069 * allocate new directory page extent and 1070 * new index page(s) to cover page split(s) 1071 * 1072 * allocation hint: ? 1073 */ 1074 n = btstack->nsplit; 1075 pxdlist.maxnpxd = pxdlist.npxd = 0; 1076 xlen = sbi->nbperpage; 1077 for (pxd = pxdlist.pxd; n > 0; n--, pxd++) { 1078 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) { 1079 PXDaddress(pxd, xaddr); 1080 PXDlength(pxd, xlen); 1081 pxdlist.maxnpxd++; 1082 continue; 1083 } 1084 1085 DT_PUTPAGE(smp); 1086 1087 /* undo allocation */ 1088 goto splitOut; 1089 } 1090 1091 split->pxdlist = &pxdlist; 1092 if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) { 1093 DT_PUTPAGE(smp); 1094 1095 /* undo allocation */ 1096 goto splitOut; 1097 } 1098 1099 if (!DO_INDEX(ip)) 1100 ip->i_size += PSIZE; 1101 1102 /* 1103 * propagate up the router entry for the leaf page just split 1104 * 1105 * insert a router entry for the new page into the parent page, 1106 * propagate the insert/split up the tree by walking back the stack 1107 * of (bn of parent page, index of child page entry in parent page) 1108 * that were traversed during the search for the page that split. 1109 * 1110 * the propagation of insert/split up the tree stops if the root 1111 * splits or the page inserted into doesn't have to split to hold 1112 * the new entry. 1113 * 1114 * the parent entry for the split page remains the same, and 1115 * a new entry is inserted at its right with the first key and 1116 * block number of the new right page. 1117 * 1118 * There are a maximum of 4 pages pinned at any time: 1119 * two children, left parent and right parent (when the parent splits). 1120 * keep the child pages pinned while working on the parent. 1121 * make sure that all pins are released at exit. 1122 */ 1123 while ((parent = BT_POP(btstack)) != NULL) { 1124 /* parent page specified by stack frame <parent> */ 1125 1126 /* keep current child pages (<lp>, <rp>) pinned */ 1127 lmp = smp; 1128 lp = sp; 1129 1130 /* 1131 * insert router entry in parent for new right child page <rp> 1132 */ 1133 /* get the parent page <sp> */ 1134 DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1135 if (rc) { 1136 DT_PUTPAGE(lmp); 1137 DT_PUTPAGE(rmp); 1138 goto splitOut; 1139 } 1140 1141 /* 1142 * The new key entry goes ONE AFTER the index of parent entry, 1143 * because the split was to the right. 1144 */ 1145 skip = parent->index + 1; 1146 1147 /* 1148 * compute the key for the router entry 1149 * 1150 * key suffix compression: 1151 * for internal pages that have leaf pages as children, 1152 * retain only what's needed to distinguish between 1153 * the new entry and the entry on the page to its left. 1154 * If the keys compare equal, retain the entire key. 1155 * 1156 * note that compression is performed only at computing 1157 * router key at the lowest internal level. 1158 * further compression of the key between pairs of higher 1159 * level internal pages loses too much information and 1160 * the search may fail. 1161 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,} 1162 * results in two adjacent parent entries (a)(xx). 1163 * if split occurs between these two entries, and 1164 * if compression is applied, the router key of parent entry 1165 * of right page (x) will divert search for x into right 1166 * subtree and miss x in the left subtree.) 1167 * 1168 * the entire key must be retained for the next-to-leftmost 1169 * internal key at any level of the tree, or search may fail 1170 * (e.g., ?) 1171 */ 1172 switch (rp->header.flag & BT_TYPE) { 1173 case BT_LEAF: 1174 /* 1175 * compute the length of prefix for suffix compression 1176 * between last entry of left page and first entry 1177 * of right page 1178 */ 1179 if ((sp->header.flag & BT_ROOT && skip > 1) || 1180 sp->header.prev != 0 || skip > 1) { 1181 /* compute uppercase router prefix key */ 1182 rc = ciGetLeafPrefixKey(lp, 1183 lp->header.nextindex-1, 1184 rp, 0, &key, 1185 sbi->mntflag); 1186 if (rc) { 1187 DT_PUTPAGE(lmp); 1188 DT_PUTPAGE(rmp); 1189 DT_PUTPAGE(smp); 1190 goto splitOut; 1191 } 1192 } else { 1193 /* next to leftmost entry of 1194 lowest internal level */ 1195 1196 /* compute uppercase router key */ 1197 dtGetKey(rp, 0, &key, sbi->mntflag); 1198 key.name[key.namlen] = 0; 1199 1200 if ((sbi->mntflag & JFS_OS2) == JFS_OS2) 1201 ciToUpper(&key); 1202 } 1203 1204 n = NDTINTERNAL(key.namlen); 1205 break; 1206 1207 case BT_INTERNAL: 1208 dtGetKey(rp, 0, &key, sbi->mntflag); 1209 n = NDTINTERNAL(key.namlen); 1210 break; 1211 1212 default: 1213 jfs_err("dtSplitUp(): UFO!"); 1214 break; 1215 } 1216 1217 /* unpin left child page */ 1218 DT_PUTPAGE(lmp); 1219 1220 /* 1221 * compute the data for the router entry 1222 */ 1223 data->xd = rpxd; /* child page xd */ 1224 1225 /* 1226 * parent page is full - split the parent page 1227 */ 1228 if (n > sp->header.freecnt) { 1229 /* init for parent page split */ 1230 split->mp = smp; 1231 split->index = skip; /* index at insert */ 1232 split->nslot = n; 1233 split->key = &key; 1234 /* split->data = data; */ 1235 1236 /* unpin right child page */ 1237 DT_PUTPAGE(rmp); 1238 1239 /* The split routines insert the new entry, 1240 * acquire txLock as appropriate. 1241 * return <rp> pinned and its block number <rbn>. 1242 */ 1243 rc = (sp->header.flag & BT_ROOT) ? 1244 dtSplitRoot(tid, ip, split, &rmp) : 1245 dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd); 1246 if (rc) { 1247 DT_PUTPAGE(smp); 1248 goto splitOut; 1249 } 1250 1251 /* smp and rmp are pinned */ 1252 } 1253 /* 1254 * parent page is not full - insert router entry in parent page 1255 */ 1256 else { 1257 BT_MARK_DIRTY(smp, ip); 1258 /* 1259 * acquire a transaction lock on the parent page 1260 */ 1261 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1262 dtlck = (struct dt_lock *) & tlck->lock; 1263 ASSERT(dtlck->index == 0); 1264 lv = & dtlck->lv[0]; 1265 1266 /* linelock header */ 1267 lv->offset = 0; 1268 lv->length = 1; 1269 dtlck->index++; 1270 1271 /* linelock stbl of non-root parent page */ 1272 if (!(sp->header.flag & BT_ROOT)) { 1273 lv++; 1274 n = skip >> L2DTSLOTSIZE; 1275 lv->offset = sp->header.stblindex + n; 1276 lv->length = 1277 ((sp->header.nextindex - 1278 1) >> L2DTSLOTSIZE) - n + 1; 1279 dtlck->index++; 1280 } 1281 1282 dtInsertEntry(sp, skip, &key, data, &dtlck); 1283 1284 /* exit propagate up */ 1285 break; 1286 } 1287 } 1288 1289 /* unpin current split and its right page */ 1290 DT_PUTPAGE(smp); 1291 DT_PUTPAGE(rmp); 1292 1293 /* 1294 * free remaining extents allocated for split 1295 */ 1296 splitOut: 1297 n = pxdlist.npxd; 1298 pxd = &pxdlist.pxd[n]; 1299 for (; n < pxdlist.maxnpxd; n++, pxd++) 1300 dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd)); 1301 1302 freeKeyName: 1303 kfree(key.name); 1304 1305 /* Rollback quota allocation */ 1306 if (rc && quota_allocation) 1307 dquot_free_block(ip, quota_allocation); 1308 1309 dtSplitUp_Exit: 1310 1311 return rc; 1312 } 1313 1314 1315 /* 1316 * dtSplitPage() 1317 * 1318 * function: Split a non-root page of a btree. 1319 * 1320 * parameter: 1321 * 1322 * return: 0 - success; 1323 * errno - failure; 1324 * return split and new page pinned; 1325 */ 1326 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 1327 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp) 1328 { 1329 int rc = 0; 1330 struct metapage *smp; 1331 dtpage_t *sp; 1332 struct metapage *rmp; 1333 dtpage_t *rp; /* new right page allocated */ 1334 s64 rbn; /* new right page block number */ 1335 struct metapage *mp; 1336 dtpage_t *p; 1337 s64 nextbn; 1338 struct pxdlist *pxdlist; 1339 pxd_t *pxd; 1340 int skip, nextindex, half, left, nxt, off, si; 1341 struct ldtentry *ldtentry; 1342 struct idtentry *idtentry; 1343 u8 *stbl; 1344 struct dtslot *f; 1345 int fsi, stblsize; 1346 int n; 1347 struct dt_lock *sdtlck, *rdtlck; 1348 struct tlock *tlck; 1349 struct dt_lock *dtlck; 1350 struct lv *slv, *rlv, *lv; 1351 1352 /* get split page */ 1353 smp = split->mp; 1354 sp = DT_PAGE(ip, smp); 1355 1356 /* 1357 * allocate the new right page for the split 1358 */ 1359 pxdlist = split->pxdlist; 1360 pxd = &pxdlist->pxd[pxdlist->npxd]; 1361 pxdlist->npxd++; 1362 rbn = addressPXD(pxd); 1363 rmp = get_metapage(ip, rbn, PSIZE, 1); 1364 if (rmp == NULL) 1365 return -EIO; 1366 1367 /* Allocate blocks to quota. */ 1368 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 1369 if (rc) { 1370 release_metapage(rmp); 1371 return rc; 1372 } 1373 1374 jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1375 1376 BT_MARK_DIRTY(rmp, ip); 1377 /* 1378 * acquire a transaction lock on the new right page 1379 */ 1380 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1381 rdtlck = (struct dt_lock *) & tlck->lock; 1382 1383 rp = (dtpage_t *) rmp->data; 1384 *rpp = rp; 1385 rp->header.self = *pxd; 1386 1387 BT_MARK_DIRTY(smp, ip); 1388 /* 1389 * acquire a transaction lock on the split page 1390 * 1391 * action: 1392 */ 1393 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1394 sdtlck = (struct dt_lock *) & tlck->lock; 1395 1396 /* linelock header of split page */ 1397 ASSERT(sdtlck->index == 0); 1398 slv = & sdtlck->lv[0]; 1399 slv->offset = 0; 1400 slv->length = 1; 1401 sdtlck->index++; 1402 1403 /* 1404 * initialize/update sibling pointers between sp and rp 1405 */ 1406 nextbn = le64_to_cpu(sp->header.next); 1407 rp->header.next = cpu_to_le64(nextbn); 1408 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1409 sp->header.next = cpu_to_le64(rbn); 1410 1411 /* 1412 * initialize new right page 1413 */ 1414 rp->header.flag = sp->header.flag; 1415 1416 /* compute sorted entry table at start of extent data area */ 1417 rp->header.nextindex = 0; 1418 rp->header.stblindex = 1; 1419 1420 n = PSIZE >> L2DTSLOTSIZE; 1421 rp->header.maxslot = n; 1422 stblsize = (n + 31) >> L2DTSLOTSIZE; /* in unit of slot */ 1423 1424 /* init freelist */ 1425 fsi = rp->header.stblindex + stblsize; 1426 rp->header.freelist = fsi; 1427 rp->header.freecnt = rp->header.maxslot - fsi; 1428 1429 /* 1430 * sequential append at tail: append without split 1431 * 1432 * If splitting the last page on a level because of appending 1433 * a entry to it (skip is maxentry), it's likely that the access is 1434 * sequential. Adding an empty page on the side of the level is less 1435 * work and can push the fill factor much higher than normal. 1436 * If we're wrong it's no big deal, we'll just do the split the right 1437 * way next time. 1438 * (It may look like it's equally easy to do a similar hack for 1439 * reverse sorted data, that is, split the tree left, 1440 * but it's not. Be my guest.) 1441 */ 1442 if (nextbn == 0 && split->index == sp->header.nextindex) { 1443 /* linelock header + stbl (first slot) of new page */ 1444 rlv = & rdtlck->lv[rdtlck->index]; 1445 rlv->offset = 0; 1446 rlv->length = 2; 1447 rdtlck->index++; 1448 1449 /* 1450 * initialize freelist of new right page 1451 */ 1452 f = &rp->slot[fsi]; 1453 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1454 f->next = fsi; 1455 f->next = -1; 1456 1457 /* insert entry at the first entry of the new right page */ 1458 dtInsertEntry(rp, 0, split->key, split->data, &rdtlck); 1459 1460 goto out; 1461 } 1462 1463 /* 1464 * non-sequential insert (at possibly middle page) 1465 */ 1466 1467 /* 1468 * update prev pointer of previous right sibling page; 1469 */ 1470 if (nextbn != 0) { 1471 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1472 if (rc) { 1473 discard_metapage(rmp); 1474 return rc; 1475 } 1476 1477 BT_MARK_DIRTY(mp, ip); 1478 /* 1479 * acquire a transaction lock on the next page 1480 */ 1481 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 1482 jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p", 1483 tlck, ip, mp); 1484 dtlck = (struct dt_lock *) & tlck->lock; 1485 1486 /* linelock header of previous right sibling page */ 1487 lv = & dtlck->lv[dtlck->index]; 1488 lv->offset = 0; 1489 lv->length = 1; 1490 dtlck->index++; 1491 1492 p->header.prev = cpu_to_le64(rbn); 1493 1494 DT_PUTPAGE(mp); 1495 } 1496 1497 /* 1498 * split the data between the split and right pages. 1499 */ 1500 skip = split->index; 1501 half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */ 1502 left = 0; 1503 1504 /* 1505 * compute fill factor for split pages 1506 * 1507 * <nxt> traces the next entry to move to rp 1508 * <off> traces the next entry to stay in sp 1509 */ 1510 stbl = (u8 *) & sp->slot[sp->header.stblindex]; 1511 nextindex = sp->header.nextindex; 1512 for (nxt = off = 0; nxt < nextindex; ++off) { 1513 if (off == skip) 1514 /* check for fill factor with new entry size */ 1515 n = split->nslot; 1516 else { 1517 si = stbl[nxt]; 1518 switch (sp->header.flag & BT_TYPE) { 1519 case BT_LEAF: 1520 ldtentry = (struct ldtentry *) & sp->slot[si]; 1521 if (DO_INDEX(ip)) 1522 n = NDTLEAF(ldtentry->namlen); 1523 else 1524 n = NDTLEAF_LEGACY(ldtentry-> 1525 namlen); 1526 break; 1527 1528 case BT_INTERNAL: 1529 idtentry = (struct idtentry *) & sp->slot[si]; 1530 n = NDTINTERNAL(idtentry->namlen); 1531 break; 1532 1533 default: 1534 break; 1535 } 1536 1537 ++nxt; /* advance to next entry to move in sp */ 1538 } 1539 1540 left += n; 1541 if (left >= half) 1542 break; 1543 } 1544 1545 /* <nxt> poins to the 1st entry to move */ 1546 1547 /* 1548 * move entries to right page 1549 * 1550 * dtMoveEntry() initializes rp and reserves entry for insertion 1551 * 1552 * split page moved out entries are linelocked; 1553 * new/right page moved in entries are linelocked; 1554 */ 1555 /* linelock header + stbl of new right page */ 1556 rlv = & rdtlck->lv[rdtlck->index]; 1557 rlv->offset = 0; 1558 rlv->length = 5; 1559 rdtlck->index++; 1560 1561 dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip)); 1562 1563 sp->header.nextindex = nxt; 1564 1565 /* 1566 * finalize freelist of new right page 1567 */ 1568 fsi = rp->header.freelist; 1569 f = &rp->slot[fsi]; 1570 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1571 f->next = fsi; 1572 f->next = -1; 1573 1574 /* 1575 * Update directory index table for entries now in right page 1576 */ 1577 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1578 s64 lblock; 1579 1580 mp = NULL; 1581 stbl = DT_GETSTBL(rp); 1582 for (n = 0; n < rp->header.nextindex; n++) { 1583 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1584 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1585 rbn, n, &mp, &lblock); 1586 } 1587 if (mp) 1588 release_metapage(mp); 1589 } 1590 1591 /* 1592 * the skipped index was on the left page, 1593 */ 1594 if (skip <= off) { 1595 /* insert the new entry in the split page */ 1596 dtInsertEntry(sp, skip, split->key, split->data, &sdtlck); 1597 1598 /* linelock stbl of split page */ 1599 if (sdtlck->index >= sdtlck->maxcnt) 1600 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 1601 slv = & sdtlck->lv[sdtlck->index]; 1602 n = skip >> L2DTSLOTSIZE; 1603 slv->offset = sp->header.stblindex + n; 1604 slv->length = 1605 ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 1606 sdtlck->index++; 1607 } 1608 /* 1609 * the skipped index was on the right page, 1610 */ 1611 else { 1612 /* adjust the skip index to reflect the new position */ 1613 skip -= nxt; 1614 1615 /* insert the new entry in the right page */ 1616 dtInsertEntry(rp, skip, split->key, split->data, &rdtlck); 1617 } 1618 1619 out: 1620 *rmpp = rmp; 1621 *rpxdp = *pxd; 1622 1623 return rc; 1624 } 1625 1626 1627 /* 1628 * dtExtendPage() 1629 * 1630 * function: extend 1st/only directory leaf page 1631 * 1632 * parameter: 1633 * 1634 * return: 0 - success; 1635 * errno - failure; 1636 * return extended page pinned; 1637 */ 1638 static int dtExtendPage(tid_t tid, 1639 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 1640 { 1641 struct super_block *sb = ip->i_sb; 1642 int rc; 1643 struct metapage *smp, *pmp, *mp; 1644 dtpage_t *sp, *pp; 1645 struct pxdlist *pxdlist; 1646 pxd_t *pxd, *tpxd; 1647 int xlen, xsize; 1648 int newstblindex, newstblsize; 1649 int oldstblindex, oldstblsize; 1650 int fsi, last; 1651 struct dtslot *f; 1652 struct btframe *parent; 1653 int n; 1654 struct dt_lock *dtlck; 1655 s64 xaddr, txaddr; 1656 struct tlock *tlck; 1657 struct pxd_lock *pxdlock; 1658 struct lv *lv; 1659 uint type; 1660 struct ldtentry *ldtentry; 1661 u8 *stbl; 1662 1663 /* get page to extend */ 1664 smp = split->mp; 1665 sp = DT_PAGE(ip, smp); 1666 1667 /* get parent/root page */ 1668 parent = BT_POP(btstack); 1669 DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc); 1670 if (rc) 1671 return (rc); 1672 1673 /* 1674 * extend the extent 1675 */ 1676 pxdlist = split->pxdlist; 1677 pxd = &pxdlist->pxd[pxdlist->npxd]; 1678 pxdlist->npxd++; 1679 1680 xaddr = addressPXD(pxd); 1681 tpxd = &sp->header.self; 1682 txaddr = addressPXD(tpxd); 1683 /* in-place extension */ 1684 if (xaddr == txaddr) { 1685 type = tlckEXTEND; 1686 } 1687 /* relocation */ 1688 else { 1689 type = tlckNEW; 1690 1691 /* save moved extent descriptor for later free */ 1692 tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE); 1693 pxdlock = (struct pxd_lock *) & tlck->lock; 1694 pxdlock->flag = mlckFREEPXD; 1695 pxdlock->pxd = sp->header.self; 1696 pxdlock->index = 1; 1697 1698 /* 1699 * Update directory index table to reflect new page address 1700 */ 1701 if (DO_INDEX(ip)) { 1702 s64 lblock; 1703 1704 mp = NULL; 1705 stbl = DT_GETSTBL(sp); 1706 for (n = 0; n < sp->header.nextindex; n++) { 1707 ldtentry = 1708 (struct ldtentry *) & sp->slot[stbl[n]]; 1709 modify_index(tid, ip, 1710 le32_to_cpu(ldtentry->index), 1711 xaddr, n, &mp, &lblock); 1712 } 1713 if (mp) 1714 release_metapage(mp); 1715 } 1716 } 1717 1718 /* 1719 * extend the page 1720 */ 1721 sp->header.self = *pxd; 1722 1723 jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp); 1724 1725 BT_MARK_DIRTY(smp, ip); 1726 /* 1727 * acquire a transaction lock on the extended/leaf page 1728 */ 1729 tlck = txLock(tid, ip, smp, tlckDTREE | type); 1730 dtlck = (struct dt_lock *) & tlck->lock; 1731 lv = & dtlck->lv[0]; 1732 1733 /* update buffer extent descriptor of extended page */ 1734 xlen = lengthPXD(pxd); 1735 xsize = xlen << JFS_SBI(sb)->l2bsize; 1736 1737 /* 1738 * copy old stbl to new stbl at start of extended area 1739 */ 1740 oldstblindex = sp->header.stblindex; 1741 oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE; 1742 newstblindex = sp->header.maxslot; 1743 n = xsize >> L2DTSLOTSIZE; 1744 newstblsize = (n + 31) >> L2DTSLOTSIZE; 1745 memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex], 1746 sp->header.nextindex); 1747 1748 /* 1749 * in-line extension: linelock old area of extended page 1750 */ 1751 if (type == tlckEXTEND) { 1752 /* linelock header */ 1753 lv->offset = 0; 1754 lv->length = 1; 1755 dtlck->index++; 1756 lv++; 1757 1758 /* linelock new stbl of extended page */ 1759 lv->offset = newstblindex; 1760 lv->length = newstblsize; 1761 } 1762 /* 1763 * relocation: linelock whole relocated area 1764 */ 1765 else { 1766 lv->offset = 0; 1767 lv->length = sp->header.maxslot + newstblsize; 1768 } 1769 1770 dtlck->index++; 1771 1772 sp->header.maxslot = n; 1773 sp->header.stblindex = newstblindex; 1774 /* sp->header.nextindex remains the same */ 1775 1776 /* 1777 * add old stbl region at head of freelist 1778 */ 1779 fsi = oldstblindex; 1780 f = &sp->slot[fsi]; 1781 last = sp->header.freelist; 1782 for (n = 0; n < oldstblsize; n++, fsi++, f++) { 1783 f->next = last; 1784 last = fsi; 1785 } 1786 sp->header.freelist = last; 1787 sp->header.freecnt += oldstblsize; 1788 1789 /* 1790 * append free region of newly extended area at tail of freelist 1791 */ 1792 /* init free region of newly extended area */ 1793 fsi = n = newstblindex + newstblsize; 1794 f = &sp->slot[fsi]; 1795 for (fsi++; fsi < sp->header.maxslot; f++, fsi++) 1796 f->next = fsi; 1797 f->next = -1; 1798 1799 /* append new free region at tail of old freelist */ 1800 fsi = sp->header.freelist; 1801 if (fsi == -1) 1802 sp->header.freelist = n; 1803 else { 1804 do { 1805 f = &sp->slot[fsi]; 1806 fsi = f->next; 1807 } while (fsi != -1); 1808 1809 f->next = n; 1810 } 1811 1812 sp->header.freecnt += sp->header.maxslot - n; 1813 1814 /* 1815 * insert the new entry 1816 */ 1817 dtInsertEntry(sp, split->index, split->key, split->data, &dtlck); 1818 1819 BT_MARK_DIRTY(pmp, ip); 1820 /* 1821 * linelock any freeslots residing in old extent 1822 */ 1823 if (type == tlckEXTEND) { 1824 n = sp->header.maxslot >> 2; 1825 if (sp->header.freelist < n) 1826 dtLinelockFreelist(sp, n, &dtlck); 1827 } 1828 1829 /* 1830 * update parent entry on the parent/root page 1831 */ 1832 /* 1833 * acquire a transaction lock on the parent/root page 1834 */ 1835 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 1836 dtlck = (struct dt_lock *) & tlck->lock; 1837 lv = & dtlck->lv[dtlck->index]; 1838 1839 /* linelock parent entry - 1st slot */ 1840 lv->offset = 1; 1841 lv->length = 1; 1842 dtlck->index++; 1843 1844 /* update the parent pxd for page extension */ 1845 tpxd = (pxd_t *) & pp->slot[1]; 1846 *tpxd = *pxd; 1847 1848 DT_PUTPAGE(pmp); 1849 return 0; 1850 } 1851 1852 1853 /* 1854 * dtSplitRoot() 1855 * 1856 * function: 1857 * split the full root page into 1858 * original/root/split page and new right page 1859 * i.e., root remains fixed in tree anchor (inode) and 1860 * the root is copied to a single new right child page 1861 * since root page << non-root page, and 1862 * the split root page contains a single entry for the 1863 * new right child page. 1864 * 1865 * parameter: 1866 * 1867 * return: 0 - success; 1868 * errno - failure; 1869 * return new page pinned; 1870 */ 1871 static int dtSplitRoot(tid_t tid, 1872 struct inode *ip, struct dtsplit * split, struct metapage ** rmpp) 1873 { 1874 struct super_block *sb = ip->i_sb; 1875 struct metapage *smp; 1876 dtroot_t *sp; 1877 struct metapage *rmp; 1878 dtpage_t *rp; 1879 s64 rbn; 1880 int xlen; 1881 int xsize; 1882 struct dtslot *f; 1883 s8 *stbl; 1884 int fsi, stblsize, n; 1885 struct idtentry *s; 1886 pxd_t *ppxd; 1887 struct pxdlist *pxdlist; 1888 pxd_t *pxd; 1889 struct dt_lock *dtlck; 1890 struct tlock *tlck; 1891 struct lv *lv; 1892 int rc; 1893 1894 /* get split root page */ 1895 smp = split->mp; 1896 sp = &JFS_IP(ip)->i_dtroot; 1897 1898 /* 1899 * allocate/initialize a single (right) child page 1900 * 1901 * N.B. at first split, a one (or two) block to fit new entry 1902 * is allocated; at subsequent split, a full page is allocated; 1903 */ 1904 pxdlist = split->pxdlist; 1905 pxd = &pxdlist->pxd[pxdlist->npxd]; 1906 pxdlist->npxd++; 1907 rbn = addressPXD(pxd); 1908 xlen = lengthPXD(pxd); 1909 xsize = xlen << JFS_SBI(sb)->l2bsize; 1910 rmp = get_metapage(ip, rbn, xsize, 1); 1911 if (!rmp) 1912 return -EIO; 1913 1914 rp = rmp->data; 1915 1916 /* Allocate blocks to quota. */ 1917 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 1918 if (rc) { 1919 release_metapage(rmp); 1920 return rc; 1921 } 1922 1923 BT_MARK_DIRTY(rmp, ip); 1924 /* 1925 * acquire a transaction lock on the new right page 1926 */ 1927 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1928 dtlck = (struct dt_lock *) & tlck->lock; 1929 1930 rp->header.flag = 1931 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1932 rp->header.self = *pxd; 1933 1934 /* initialize sibling pointers */ 1935 rp->header.next = 0; 1936 rp->header.prev = 0; 1937 1938 /* 1939 * move in-line root page into new right page extent 1940 */ 1941 /* linelock header + copied entries + new stbl (1st slot) in new page */ 1942 ASSERT(dtlck->index == 0); 1943 lv = & dtlck->lv[0]; 1944 lv->offset = 0; 1945 lv->length = 10; /* 1 + 8 + 1 */ 1946 dtlck->index++; 1947 1948 n = xsize >> L2DTSLOTSIZE; 1949 rp->header.maxslot = n; 1950 stblsize = (n + 31) >> L2DTSLOTSIZE; 1951 1952 /* copy old stbl to new stbl at start of extended area */ 1953 rp->header.stblindex = DTROOTMAXSLOT; 1954 stbl = (s8 *) & rp->slot[DTROOTMAXSLOT]; 1955 memcpy(stbl, sp->header.stbl, sp->header.nextindex); 1956 rp->header.nextindex = sp->header.nextindex; 1957 1958 /* copy old data area to start of new data area */ 1959 memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE); 1960 1961 /* 1962 * append free region of newly extended area at tail of freelist 1963 */ 1964 /* init free region of newly extended area */ 1965 fsi = n = DTROOTMAXSLOT + stblsize; 1966 f = &rp->slot[fsi]; 1967 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1968 f->next = fsi; 1969 f->next = -1; 1970 1971 /* append new free region at tail of old freelist */ 1972 fsi = sp->header.freelist; 1973 if (fsi == -1) 1974 rp->header.freelist = n; 1975 else { 1976 rp->header.freelist = fsi; 1977 1978 do { 1979 f = &rp->slot[fsi]; 1980 fsi = f->next; 1981 } while (fsi >= 0); 1982 1983 f->next = n; 1984 } 1985 1986 rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n; 1987 1988 /* 1989 * Update directory index table for entries now in right page 1990 */ 1991 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1992 s64 lblock; 1993 struct metapage *mp = NULL; 1994 struct ldtentry *ldtentry; 1995 1996 stbl = DT_GETSTBL(rp); 1997 for (n = 0; n < rp->header.nextindex; n++) { 1998 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1999 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 2000 rbn, n, &mp, &lblock); 2001 } 2002 if (mp) 2003 release_metapage(mp); 2004 } 2005 /* 2006 * insert the new entry into the new right/child page 2007 * (skip index in the new right page will not change) 2008 */ 2009 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); 2010 2011 /* 2012 * reset parent/root page 2013 * 2014 * set the 1st entry offset to 0, which force the left-most key 2015 * at any level of the tree to be less than any search key. 2016 * 2017 * The btree comparison code guarantees that the left-most key on any 2018 * level of the tree is never used, so it doesn't need to be filled in. 2019 */ 2020 BT_MARK_DIRTY(smp, ip); 2021 /* 2022 * acquire a transaction lock on the root page (in-memory inode) 2023 */ 2024 tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT); 2025 dtlck = (struct dt_lock *) & tlck->lock; 2026 2027 /* linelock root */ 2028 ASSERT(dtlck->index == 0); 2029 lv = & dtlck->lv[0]; 2030 lv->offset = 0; 2031 lv->length = DTROOTMAXSLOT; 2032 dtlck->index++; 2033 2034 /* update page header of root */ 2035 if (sp->header.flag & BT_LEAF) { 2036 sp->header.flag &= ~BT_LEAF; 2037 sp->header.flag |= BT_INTERNAL; 2038 } 2039 2040 /* init the first entry */ 2041 s = (struct idtentry *) & sp->slot[DTENTRYSTART]; 2042 ppxd = (pxd_t *) s; 2043 *ppxd = *pxd; 2044 s->next = -1; 2045 s->namlen = 0; 2046 2047 stbl = sp->header.stbl; 2048 stbl[0] = DTENTRYSTART; 2049 sp->header.nextindex = 1; 2050 2051 /* init freelist */ 2052 fsi = DTENTRYSTART + 1; 2053 f = &sp->slot[fsi]; 2054 2055 /* init free region of remaining area */ 2056 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2057 f->next = fsi; 2058 f->next = -1; 2059 2060 sp->header.freelist = DTENTRYSTART + 1; 2061 sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1); 2062 2063 *rmpp = rmp; 2064 2065 return 0; 2066 } 2067 2068 2069 /* 2070 * dtDelete() 2071 * 2072 * function: delete the entry(s) referenced by a key. 2073 * 2074 * parameter: 2075 * 2076 * return: 2077 */ 2078 int dtDelete(tid_t tid, 2079 struct inode *ip, struct component_name * key, ino_t * ino, int flag) 2080 { 2081 int rc = 0; 2082 s64 bn; 2083 struct metapage *mp, *imp; 2084 dtpage_t *p; 2085 int index; 2086 struct btstack btstack; 2087 struct dt_lock *dtlck; 2088 struct tlock *tlck; 2089 struct lv *lv; 2090 int i; 2091 struct ldtentry *ldtentry; 2092 u8 *stbl; 2093 u32 table_index, next_index; 2094 struct metapage *nmp; 2095 dtpage_t *np; 2096 2097 /* 2098 * search for the entry to delete: 2099 * 2100 * dtSearch() returns (leaf page pinned, index at which to delete). 2101 */ 2102 if ((rc = dtSearch(ip, key, ino, &btstack, flag))) 2103 return rc; 2104 2105 /* retrieve search result */ 2106 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2107 2108 /* 2109 * We need to find put the index of the next entry into the 2110 * directory index table in order to resume a readdir from this 2111 * entry. 2112 */ 2113 if (DO_INDEX(ip)) { 2114 stbl = DT_GETSTBL(p); 2115 ldtentry = (struct ldtentry *) & p->slot[stbl[index]]; 2116 table_index = le32_to_cpu(ldtentry->index); 2117 if (index == (p->header.nextindex - 1)) { 2118 /* 2119 * Last entry in this leaf page 2120 */ 2121 if ((p->header.flag & BT_ROOT) 2122 || (p->header.next == 0)) 2123 next_index = -1; 2124 else { 2125 /* Read next leaf page */ 2126 DT_GETPAGE(ip, le64_to_cpu(p->header.next), 2127 nmp, PSIZE, np, rc); 2128 if (rc) 2129 next_index = -1; 2130 else { 2131 stbl = DT_GETSTBL(np); 2132 ldtentry = 2133 (struct ldtentry *) & np-> 2134 slot[stbl[0]]; 2135 next_index = 2136 le32_to_cpu(ldtentry->index); 2137 DT_PUTPAGE(nmp); 2138 } 2139 } 2140 } else { 2141 ldtentry = 2142 (struct ldtentry *) & p->slot[stbl[index + 1]]; 2143 next_index = le32_to_cpu(ldtentry->index); 2144 } 2145 free_index(tid, ip, table_index, next_index); 2146 } 2147 /* 2148 * the leaf page becomes empty, delete the page 2149 */ 2150 if (p->header.nextindex == 1) { 2151 /* delete empty page */ 2152 rc = dtDeleteUp(tid, ip, mp, p, &btstack); 2153 } 2154 /* 2155 * the leaf page has other entries remaining: 2156 * 2157 * delete the entry from the leaf page. 2158 */ 2159 else { 2160 BT_MARK_DIRTY(mp, ip); 2161 /* 2162 * acquire a transaction lock on the leaf page 2163 */ 2164 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2165 dtlck = (struct dt_lock *) & tlck->lock; 2166 2167 /* 2168 * Do not assume that dtlck->index will be zero. During a 2169 * rename within a directory, this transaction may have 2170 * modified this page already when adding the new entry. 2171 */ 2172 2173 /* linelock header */ 2174 if (dtlck->index >= dtlck->maxcnt) 2175 dtlck = (struct dt_lock *) txLinelock(dtlck); 2176 lv = & dtlck->lv[dtlck->index]; 2177 lv->offset = 0; 2178 lv->length = 1; 2179 dtlck->index++; 2180 2181 /* linelock stbl of non-root leaf page */ 2182 if (!(p->header.flag & BT_ROOT)) { 2183 if (dtlck->index >= dtlck->maxcnt) 2184 dtlck = (struct dt_lock *) txLinelock(dtlck); 2185 lv = & dtlck->lv[dtlck->index]; 2186 i = index >> L2DTSLOTSIZE; 2187 lv->offset = p->header.stblindex + i; 2188 lv->length = 2189 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2190 i + 1; 2191 dtlck->index++; 2192 } 2193 2194 /* free the leaf entry */ 2195 dtDeleteEntry(p, index, &dtlck); 2196 2197 /* 2198 * Update directory index table for entries moved in stbl 2199 */ 2200 if (DO_INDEX(ip) && index < p->header.nextindex) { 2201 s64 lblock; 2202 2203 imp = NULL; 2204 stbl = DT_GETSTBL(p); 2205 for (i = index; i < p->header.nextindex; i++) { 2206 ldtentry = 2207 (struct ldtentry *) & p->slot[stbl[i]]; 2208 modify_index(tid, ip, 2209 le32_to_cpu(ldtentry->index), 2210 bn, i, &imp, &lblock); 2211 } 2212 if (imp) 2213 release_metapage(imp); 2214 } 2215 2216 DT_PUTPAGE(mp); 2217 } 2218 2219 return rc; 2220 } 2221 2222 2223 /* 2224 * dtDeleteUp() 2225 * 2226 * function: 2227 * free empty pages as propagating deletion up the tree 2228 * 2229 * parameter: 2230 * 2231 * return: 2232 */ 2233 static int dtDeleteUp(tid_t tid, struct inode *ip, 2234 struct metapage * fmp, dtpage_t * fp, struct btstack * btstack) 2235 { 2236 int rc = 0; 2237 struct metapage *mp; 2238 dtpage_t *p; 2239 int index, nextindex; 2240 int xlen; 2241 struct btframe *parent; 2242 struct dt_lock *dtlck; 2243 struct tlock *tlck; 2244 struct lv *lv; 2245 struct pxd_lock *pxdlock; 2246 int i; 2247 2248 /* 2249 * keep the root leaf page which has become empty 2250 */ 2251 if (BT_IS_ROOT(fmp)) { 2252 /* 2253 * reset the root 2254 * 2255 * dtInitRoot() acquires txlock on the root 2256 */ 2257 dtInitRoot(tid, ip, PARENT(ip)); 2258 2259 DT_PUTPAGE(fmp); 2260 2261 return 0; 2262 } 2263 2264 /* 2265 * free the non-root leaf page 2266 */ 2267 /* 2268 * acquire a transaction lock on the page 2269 * 2270 * write FREEXTENT|NOREDOPAGE log record 2271 * N.B. linelock is overlaid as freed extent descriptor, and 2272 * the buffer page is freed; 2273 */ 2274 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2275 pxdlock = (struct pxd_lock *) & tlck->lock; 2276 pxdlock->flag = mlckFREEPXD; 2277 pxdlock->pxd = fp->header.self; 2278 pxdlock->index = 1; 2279 2280 /* update sibling pointers */ 2281 if ((rc = dtRelink(tid, ip, fp))) { 2282 BT_PUTPAGE(fmp); 2283 return rc; 2284 } 2285 2286 xlen = lengthPXD(&fp->header.self); 2287 2288 /* Free quota allocation. */ 2289 dquot_free_block(ip, xlen); 2290 2291 /* free/invalidate its buffer page */ 2292 discard_metapage(fmp); 2293 2294 /* 2295 * propagate page deletion up the directory tree 2296 * 2297 * If the delete from the parent page makes it empty, 2298 * continue all the way up the tree. 2299 * stop if the root page is reached (which is never deleted) or 2300 * if the entry deletion does not empty the page. 2301 */ 2302 while ((parent = BT_POP(btstack)) != NULL) { 2303 /* pin the parent page <sp> */ 2304 DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2305 if (rc) 2306 return rc; 2307 2308 /* 2309 * free the extent of the child page deleted 2310 */ 2311 index = parent->index; 2312 2313 /* 2314 * delete the entry for the child page from parent 2315 */ 2316 nextindex = p->header.nextindex; 2317 2318 /* 2319 * the parent has the single entry being deleted: 2320 * 2321 * free the parent page which has become empty. 2322 */ 2323 if (nextindex == 1) { 2324 /* 2325 * keep the root internal page which has become empty 2326 */ 2327 if (p->header.flag & BT_ROOT) { 2328 /* 2329 * reset the root 2330 * 2331 * dtInitRoot() acquires txlock on the root 2332 */ 2333 dtInitRoot(tid, ip, PARENT(ip)); 2334 2335 DT_PUTPAGE(mp); 2336 2337 return 0; 2338 } 2339 /* 2340 * free the parent page 2341 */ 2342 else { 2343 /* 2344 * acquire a transaction lock on the page 2345 * 2346 * write FREEXTENT|NOREDOPAGE log record 2347 */ 2348 tlck = 2349 txMaplock(tid, ip, 2350 tlckDTREE | tlckFREE); 2351 pxdlock = (struct pxd_lock *) & tlck->lock; 2352 pxdlock->flag = mlckFREEPXD; 2353 pxdlock->pxd = p->header.self; 2354 pxdlock->index = 1; 2355 2356 /* update sibling pointers */ 2357 if ((rc = dtRelink(tid, ip, p))) { 2358 DT_PUTPAGE(mp); 2359 return rc; 2360 } 2361 2362 xlen = lengthPXD(&p->header.self); 2363 2364 /* Free quota allocation */ 2365 dquot_free_block(ip, xlen); 2366 2367 /* free/invalidate its buffer page */ 2368 discard_metapage(mp); 2369 2370 /* propagate up */ 2371 continue; 2372 } 2373 } 2374 2375 /* 2376 * the parent has other entries remaining: 2377 * 2378 * delete the router entry from the parent page. 2379 */ 2380 BT_MARK_DIRTY(mp, ip); 2381 /* 2382 * acquire a transaction lock on the page 2383 * 2384 * action: router entry deletion 2385 */ 2386 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2387 dtlck = (struct dt_lock *) & tlck->lock; 2388 2389 /* linelock header */ 2390 if (dtlck->index >= dtlck->maxcnt) 2391 dtlck = (struct dt_lock *) txLinelock(dtlck); 2392 lv = & dtlck->lv[dtlck->index]; 2393 lv->offset = 0; 2394 lv->length = 1; 2395 dtlck->index++; 2396 2397 /* linelock stbl of non-root leaf page */ 2398 if (!(p->header.flag & BT_ROOT)) { 2399 if (dtlck->index < dtlck->maxcnt) 2400 lv++; 2401 else { 2402 dtlck = (struct dt_lock *) txLinelock(dtlck); 2403 lv = & dtlck->lv[0]; 2404 } 2405 i = index >> L2DTSLOTSIZE; 2406 lv->offset = p->header.stblindex + i; 2407 lv->length = 2408 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2409 i + 1; 2410 dtlck->index++; 2411 } 2412 2413 /* free the router entry */ 2414 dtDeleteEntry(p, index, &dtlck); 2415 2416 /* reset key of new leftmost entry of level (for consistency) */ 2417 if (index == 0 && 2418 ((p->header.flag & BT_ROOT) || p->header.prev == 0)) 2419 dtTruncateEntry(p, 0, &dtlck); 2420 2421 /* unpin the parent page */ 2422 DT_PUTPAGE(mp); 2423 2424 /* exit propagation up */ 2425 break; 2426 } 2427 2428 if (!DO_INDEX(ip)) 2429 ip->i_size -= PSIZE; 2430 2431 return 0; 2432 } 2433 2434 /* 2435 * dtRelink() 2436 * 2437 * function: 2438 * link around a freed page. 2439 * 2440 * parameter: 2441 * fp: page to be freed 2442 * 2443 * return: 2444 */ 2445 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p) 2446 { 2447 int rc; 2448 struct metapage *mp; 2449 s64 nextbn, prevbn; 2450 struct tlock *tlck; 2451 struct dt_lock *dtlck; 2452 struct lv *lv; 2453 2454 nextbn = le64_to_cpu(p->header.next); 2455 prevbn = le64_to_cpu(p->header.prev); 2456 2457 /* update prev pointer of the next page */ 2458 if (nextbn != 0) { 2459 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 2460 if (rc) 2461 return rc; 2462 2463 BT_MARK_DIRTY(mp, ip); 2464 /* 2465 * acquire a transaction lock on the next page 2466 * 2467 * action: update prev pointer; 2468 */ 2469 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2470 jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2471 tlck, ip, mp); 2472 dtlck = (struct dt_lock *) & tlck->lock; 2473 2474 /* linelock header */ 2475 if (dtlck->index >= dtlck->maxcnt) 2476 dtlck = (struct dt_lock *) txLinelock(dtlck); 2477 lv = & dtlck->lv[dtlck->index]; 2478 lv->offset = 0; 2479 lv->length = 1; 2480 dtlck->index++; 2481 2482 p->header.prev = cpu_to_le64(prevbn); 2483 DT_PUTPAGE(mp); 2484 } 2485 2486 /* update next pointer of the previous page */ 2487 if (prevbn != 0) { 2488 DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 2489 if (rc) 2490 return rc; 2491 2492 BT_MARK_DIRTY(mp, ip); 2493 /* 2494 * acquire a transaction lock on the prev page 2495 * 2496 * action: update next pointer; 2497 */ 2498 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2499 jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2500 tlck, ip, mp); 2501 dtlck = (struct dt_lock *) & tlck->lock; 2502 2503 /* linelock header */ 2504 if (dtlck->index >= dtlck->maxcnt) 2505 dtlck = (struct dt_lock *) txLinelock(dtlck); 2506 lv = & dtlck->lv[dtlck->index]; 2507 lv->offset = 0; 2508 lv->length = 1; 2509 dtlck->index++; 2510 2511 p->header.next = cpu_to_le64(nextbn); 2512 DT_PUTPAGE(mp); 2513 } 2514 2515 return 0; 2516 } 2517 2518 2519 /* 2520 * dtInitRoot() 2521 * 2522 * initialize directory root (inline in inode) 2523 */ 2524 void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot) 2525 { 2526 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2527 dtroot_t *p; 2528 int fsi; 2529 struct dtslot *f; 2530 struct tlock *tlck; 2531 struct dt_lock *dtlck; 2532 struct lv *lv; 2533 u16 xflag_save; 2534 2535 /* 2536 * If this was previously an non-empty directory, we need to remove 2537 * the old directory table. 2538 */ 2539 if (DO_INDEX(ip)) { 2540 if (!jfs_dirtable_inline(ip)) { 2541 struct tblock *tblk = tid_to_tblock(tid); 2542 /* 2543 * We're playing games with the tid's xflag. If 2544 * we're removing a regular file, the file's xtree 2545 * is committed with COMMIT_PMAP, but we always 2546 * commit the directories xtree with COMMIT_PWMAP. 2547 */ 2548 xflag_save = tblk->xflag; 2549 tblk->xflag = 0; 2550 /* 2551 * xtTruncate isn't guaranteed to fully truncate 2552 * the xtree. The caller needs to check i_size 2553 * after committing the transaction to see if 2554 * additional truncation is needed. The 2555 * COMMIT_Stale flag tells caller that we 2556 * initiated the truncation. 2557 */ 2558 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 2559 set_cflag(COMMIT_Stale, ip); 2560 2561 tblk->xflag = xflag_save; 2562 } else 2563 ip->i_size = 1; 2564 2565 jfs_ip->next_index = 2; 2566 } else 2567 ip->i_size = IDATASIZE; 2568 2569 /* 2570 * acquire a transaction lock on the root 2571 * 2572 * action: directory initialization; 2573 */ 2574 tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag, 2575 tlckDTREE | tlckENTRY | tlckBTROOT); 2576 dtlck = (struct dt_lock *) & tlck->lock; 2577 2578 /* linelock root */ 2579 ASSERT(dtlck->index == 0); 2580 lv = & dtlck->lv[0]; 2581 lv->offset = 0; 2582 lv->length = DTROOTMAXSLOT; 2583 dtlck->index++; 2584 2585 p = &jfs_ip->i_dtroot; 2586 2587 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 2588 2589 p->header.nextindex = 0; 2590 2591 /* init freelist */ 2592 fsi = 1; 2593 f = &p->slot[fsi]; 2594 2595 /* init data area of root */ 2596 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2597 f->next = fsi; 2598 f->next = -1; 2599 2600 p->header.freelist = 1; 2601 p->header.freecnt = 8; 2602 2603 /* init '..' entry */ 2604 p->header.idotdot = cpu_to_le32(idotdot); 2605 2606 return; 2607 } 2608 2609 /* 2610 * add_missing_indices() 2611 * 2612 * function: Fix dtree page in which one or more entries has an invalid index. 2613 * fsck.jfs should really fix this, but it currently does not. 2614 * Called from jfs_readdir when bad index is detected. 2615 */ 2616 static int add_missing_indices(struct inode *inode, s64 bn) 2617 { 2618 struct ldtentry *d; 2619 struct dt_lock *dtlck; 2620 int i; 2621 uint index; 2622 struct lv *lv; 2623 struct metapage *mp; 2624 dtpage_t *p; 2625 int rc = 0; 2626 s8 *stbl; 2627 tid_t tid; 2628 struct tlock *tlck; 2629 2630 tid = txBegin(inode->i_sb, 0); 2631 2632 DT_GETPAGE(inode, bn, mp, PSIZE, p, rc); 2633 2634 if (rc) { 2635 printk(KERN_ERR "DT_GETPAGE failed!\n"); 2636 goto end; 2637 } 2638 BT_MARK_DIRTY(mp, inode); 2639 2640 ASSERT(p->header.flag & BT_LEAF); 2641 2642 tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY); 2643 if (BT_IS_ROOT(mp)) 2644 tlck->type |= tlckBTROOT; 2645 2646 dtlck = (struct dt_lock *) &tlck->lock; 2647 2648 stbl = DT_GETSTBL(p); 2649 for (i = 0; i < p->header.nextindex; i++) { 2650 if (stbl[i] < 0) { 2651 jfs_err("jfs: add_missing_indices: Invalid stbl[%d] = %d for inode %ld, block = %lld", 2652 i, stbl[i], (long)inode->i_ino, (long long)bn); 2653 rc = -EIO; 2654 2655 DT_PUTPAGE(mp); 2656 txAbort(tid, 0); 2657 goto end; 2658 } 2659 2660 d = (struct ldtentry *) &p->slot[stbl[i]]; 2661 index = le32_to_cpu(d->index); 2662 if ((index < 2) || (index >= JFS_IP(inode)->next_index)) { 2663 d->index = cpu_to_le32(add_index(tid, inode, bn, i)); 2664 if (dtlck->index >= dtlck->maxcnt) 2665 dtlck = (struct dt_lock *) txLinelock(dtlck); 2666 lv = &dtlck->lv[dtlck->index]; 2667 lv->offset = stbl[i]; 2668 lv->length = 1; 2669 dtlck->index++; 2670 } 2671 } 2672 2673 DT_PUTPAGE(mp); 2674 (void) txCommit(tid, 1, &inode, 0); 2675 end: 2676 txEnd(tid); 2677 return rc; 2678 } 2679 2680 /* 2681 * Buffer to hold directory entry info while traversing a dtree page 2682 * before being fed to the filldir function 2683 */ 2684 struct jfs_dirent { 2685 loff_t position; 2686 int ino; 2687 u16 name_len; 2688 char name[]; 2689 }; 2690 2691 /* 2692 * function to determine next variable-sized jfs_dirent in buffer 2693 */ 2694 static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) 2695 { 2696 return (struct jfs_dirent *) 2697 ((char *)dirent + 2698 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 + 2699 sizeof (loff_t) - 1) & 2700 ~(sizeof (loff_t) - 1))); 2701 } 2702 2703 /* 2704 * jfs_readdir() 2705 * 2706 * function: read directory entries sequentially 2707 * from the specified entry offset 2708 * 2709 * parameter: 2710 * 2711 * return: offset = (pn, index) of start entry 2712 * of next jfs_readdir()/dtRead() 2713 */ 2714 int jfs_readdir(struct file *file, struct dir_context *ctx) 2715 { 2716 struct inode *ip = file_inode(file); 2717 struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; 2718 int rc = 0; 2719 loff_t dtpos; /* legacy OS/2 style position */ 2720 struct dtoffset { 2721 s16 pn; 2722 s16 index; 2723 s32 unused; 2724 } *dtoffset = (struct dtoffset *) &dtpos; 2725 s64 bn; 2726 struct metapage *mp; 2727 dtpage_t *p; 2728 int index; 2729 s8 *stbl; 2730 struct btstack btstack; 2731 int i, next; 2732 struct ldtentry *d; 2733 struct dtslot *t; 2734 int d_namleft, len, outlen; 2735 unsigned long dirent_buf; 2736 char *name_ptr; 2737 u32 dir_index; 2738 int do_index = 0; 2739 uint loop_count = 0; 2740 struct jfs_dirent *jfs_dirent; 2741 int jfs_dirents; 2742 int overflow, fix_page, page_fixed = 0; 2743 static int unique_pos = 2; /* If we can't fix broken index */ 2744 2745 if (ctx->pos == DIREND) 2746 return 0; 2747 2748 if (DO_INDEX(ip)) { 2749 /* 2750 * persistent index is stored in directory entries. 2751 * Special cases: 0 = . 2752 * 1 = .. 2753 * -1 = End of directory 2754 */ 2755 do_index = 1; 2756 2757 dir_index = (u32) ctx->pos; 2758 2759 /* 2760 * NFSv4 reserves cookies 1 and 2 for . and .. so the value 2761 * we return to the vfs is one greater than the one we use 2762 * internally. 2763 */ 2764 if (dir_index) 2765 dir_index--; 2766 2767 if (dir_index > 1) { 2768 struct dir_table_slot dirtab_slot; 2769 2770 if (dtEmpty(ip) || 2771 (dir_index >= JFS_IP(ip)->next_index)) { 2772 /* Stale position. Directory has shrunk */ 2773 ctx->pos = DIREND; 2774 return 0; 2775 } 2776 repeat: 2777 rc = read_index(ip, dir_index, &dirtab_slot); 2778 if (rc) { 2779 ctx->pos = DIREND; 2780 return rc; 2781 } 2782 if (dirtab_slot.flag == DIR_INDEX_FREE) { 2783 if (loop_count++ > JFS_IP(ip)->next_index) { 2784 jfs_err("jfs_readdir detected infinite loop!"); 2785 ctx->pos = DIREND; 2786 return 0; 2787 } 2788 dir_index = le32_to_cpu(dirtab_slot.addr2); 2789 if (dir_index == -1) { 2790 ctx->pos = DIREND; 2791 return 0; 2792 } 2793 goto repeat; 2794 } 2795 bn = addressDTS(&dirtab_slot); 2796 index = dirtab_slot.slot; 2797 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2798 if (rc) { 2799 ctx->pos = DIREND; 2800 return 0; 2801 } 2802 if (p->header.flag & BT_INTERNAL) { 2803 jfs_err("jfs_readdir: bad index table"); 2804 DT_PUTPAGE(mp); 2805 ctx->pos = DIREND; 2806 return 0; 2807 } 2808 } else { 2809 if (dir_index == 0) { 2810 /* 2811 * self "." 2812 */ 2813 ctx->pos = 1; 2814 if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) 2815 return 0; 2816 } 2817 /* 2818 * parent ".." 2819 */ 2820 ctx->pos = 2; 2821 if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) 2822 return 0; 2823 2824 /* 2825 * Find first entry of left-most leaf 2826 */ 2827 if (dtEmpty(ip)) { 2828 ctx->pos = DIREND; 2829 return 0; 2830 } 2831 2832 if ((rc = dtReadFirst(ip, &btstack))) 2833 return rc; 2834 2835 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2836 } 2837 } else { 2838 /* 2839 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 2840 * 2841 * pn = 0; index = 1: First entry "." 2842 * pn = 0; index = 2: Second entry ".." 2843 * pn > 0: Real entries, pn=1 -> leftmost page 2844 * pn = index = -1: No more entries 2845 */ 2846 dtpos = ctx->pos; 2847 if (dtpos < 2) { 2848 /* build "." entry */ 2849 ctx->pos = 1; 2850 if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) 2851 return 0; 2852 dtoffset->index = 2; 2853 ctx->pos = dtpos; 2854 } 2855 2856 if (dtoffset->pn == 0) { 2857 if (dtoffset->index == 2) { 2858 /* build ".." entry */ 2859 if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) 2860 return 0; 2861 } else { 2862 jfs_err("jfs_readdir called with invalid offset!"); 2863 } 2864 dtoffset->pn = 1; 2865 dtoffset->index = 0; 2866 ctx->pos = dtpos; 2867 } 2868 2869 if (dtEmpty(ip)) { 2870 ctx->pos = DIREND; 2871 return 0; 2872 } 2873 2874 if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) { 2875 jfs_err("jfs_readdir: unexpected rc = %d from dtReadNext", 2876 rc); 2877 ctx->pos = DIREND; 2878 return 0; 2879 } 2880 /* get start leaf page and index */ 2881 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2882 2883 /* offset beyond directory eof ? */ 2884 if (bn < 0) { 2885 ctx->pos = DIREND; 2886 return 0; 2887 } 2888 } 2889 2890 dirent_buf = __get_free_page(GFP_KERNEL); 2891 if (dirent_buf == 0) { 2892 DT_PUTPAGE(mp); 2893 jfs_warn("jfs_readdir: __get_free_page failed!"); 2894 ctx->pos = DIREND; 2895 return -ENOMEM; 2896 } 2897 2898 while (1) { 2899 jfs_dirent = (struct jfs_dirent *) dirent_buf; 2900 jfs_dirents = 0; 2901 overflow = fix_page = 0; 2902 2903 stbl = DT_GETSTBL(p); 2904 2905 for (i = index; i < p->header.nextindex; i++) { 2906 if (stbl[i] < 0 || stbl[i] >= DTPAGEMAXSLOT) { 2907 jfs_err("JFS: Invalid stbl[%d] = %d for inode %ld, block = %lld", 2908 i, stbl[i], (long)ip->i_ino, (long long)bn); 2909 free_page(dirent_buf); 2910 DT_PUTPAGE(mp); 2911 return -EIO; 2912 } 2913 2914 d = (struct ldtentry *) & p->slot[stbl[i]]; 2915 2916 if (((long) jfs_dirent + d->namlen + 1) > 2917 (dirent_buf + PAGE_SIZE)) { 2918 /* DBCS codepages could overrun dirent_buf */ 2919 index = i; 2920 overflow = 1; 2921 break; 2922 } 2923 2924 d_namleft = d->namlen; 2925 name_ptr = jfs_dirent->name; 2926 jfs_dirent->ino = le32_to_cpu(d->inumber); 2927 2928 if (do_index) { 2929 len = min(d_namleft, DTLHDRDATALEN); 2930 jfs_dirent->position = le32_to_cpu(d->index); 2931 /* 2932 * d->index should always be valid, but it 2933 * isn't. fsck.jfs doesn't create the 2934 * directory index for the lost+found 2935 * directory. Rather than let it go, 2936 * we can try to fix it. 2937 */ 2938 if ((jfs_dirent->position < 2) || 2939 (jfs_dirent->position >= 2940 JFS_IP(ip)->next_index)) { 2941 if (!page_fixed && !isReadOnly(ip)) { 2942 fix_page = 1; 2943 /* 2944 * setting overflow and setting 2945 * index to i will cause the 2946 * same page to be processed 2947 * again starting here 2948 */ 2949 overflow = 1; 2950 index = i; 2951 break; 2952 } 2953 jfs_dirent->position = unique_pos++; 2954 } 2955 /* 2956 * We add 1 to the index because we may 2957 * use a value of 2 internally, and NFSv4 2958 * doesn't like that. 2959 */ 2960 jfs_dirent->position++; 2961 } else { 2962 jfs_dirent->position = dtpos; 2963 len = min(d_namleft, DTLHDRDATALEN_LEGACY); 2964 } 2965 2966 /* copy the name of head/only segment */ 2967 outlen = jfs_strfromUCS_le(name_ptr, d->name, len, 2968 codepage); 2969 jfs_dirent->name_len = outlen; 2970 2971 /* copy name in the additional segment(s) */ 2972 next = d->next; 2973 while (next >= 0) { 2974 t = (struct dtslot *) & p->slot[next]; 2975 name_ptr += outlen; 2976 d_namleft -= len; 2977 /* Sanity Check */ 2978 if (d_namleft == 0) { 2979 jfs_error(ip->i_sb, 2980 "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n", 2981 (long)ip->i_ino, 2982 (long long)bn, 2983 i); 2984 goto skip_one; 2985 } 2986 len = min(d_namleft, DTSLOTDATALEN); 2987 outlen = jfs_strfromUCS_le(name_ptr, t->name, 2988 len, codepage); 2989 jfs_dirent->name_len += outlen; 2990 2991 next = t->next; 2992 } 2993 2994 jfs_dirents++; 2995 jfs_dirent = next_jfs_dirent(jfs_dirent); 2996 skip_one: 2997 if (!do_index) 2998 dtoffset->index++; 2999 } 3000 3001 if (!overflow) { 3002 /* Point to next leaf page */ 3003 if (p->header.flag & BT_ROOT) 3004 bn = 0; 3005 else { 3006 bn = le64_to_cpu(p->header.next); 3007 index = 0; 3008 /* update offset (pn:index) for new page */ 3009 if (!do_index) { 3010 dtoffset->pn++; 3011 dtoffset->index = 0; 3012 } 3013 } 3014 page_fixed = 0; 3015 } 3016 3017 /* unpin previous leaf page */ 3018 DT_PUTPAGE(mp); 3019 3020 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3021 while (jfs_dirents--) { 3022 ctx->pos = jfs_dirent->position; 3023 if (!dir_emit(ctx, jfs_dirent->name, 3024 jfs_dirent->name_len, 3025 jfs_dirent->ino, DT_UNKNOWN)) 3026 goto out; 3027 jfs_dirent = next_jfs_dirent(jfs_dirent); 3028 } 3029 3030 if (fix_page) { 3031 if ((rc = add_missing_indices(ip, bn))) 3032 goto out; 3033 page_fixed = 1; 3034 } 3035 3036 if (!overflow && (bn == 0)) { 3037 ctx->pos = DIREND; 3038 break; 3039 } 3040 3041 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3042 if (rc) { 3043 free_page(dirent_buf); 3044 return rc; 3045 } 3046 } 3047 3048 out: 3049 free_page(dirent_buf); 3050 3051 return rc; 3052 } 3053 3054 3055 /* 3056 * dtReadFirst() 3057 * 3058 * function: get the leftmost page of the directory 3059 */ 3060 static int dtReadFirst(struct inode *ip, struct btstack * btstack) 3061 { 3062 int rc = 0; 3063 s64 bn; 3064 int psize = 288; /* initial in-line directory */ 3065 struct metapage *mp; 3066 dtpage_t *p; 3067 s8 *stbl; 3068 struct btframe *btsp; 3069 pxd_t *xd; 3070 3071 BT_CLR(btstack); /* reset stack */ 3072 3073 /* 3074 * descend leftmost path of the tree 3075 * 3076 * by convention, root bn = 0. 3077 */ 3078 for (bn = 0;;) { 3079 DT_GETPAGE(ip, bn, mp, psize, p, rc); 3080 if (rc) 3081 return rc; 3082 3083 /* 3084 * leftmost leaf page 3085 */ 3086 if (p->header.flag & BT_LEAF) { 3087 /* return leftmost entry */ 3088 btsp = btstack->top; 3089 btsp->bn = bn; 3090 btsp->index = 0; 3091 btsp->mp = mp; 3092 3093 return 0; 3094 } 3095 3096 /* 3097 * descend down to leftmost child page 3098 */ 3099 if (BT_STACK_FULL(btstack)) { 3100 DT_PUTPAGE(mp); 3101 jfs_error(ip->i_sb, "btstack overrun\n"); 3102 BT_STACK_DUMP(btstack); 3103 return -EIO; 3104 } 3105 /* push (bn, index) of the parent page/entry */ 3106 BT_PUSH(btstack, bn, 0); 3107 3108 /* get the leftmost entry */ 3109 stbl = DT_GETSTBL(p); 3110 3111 if (stbl[0] < 0 || stbl[0] >= DTPAGEMAXSLOT) { 3112 DT_PUTPAGE(mp); 3113 jfs_error(ip->i_sb, "stbl[0] out of bound\n"); 3114 return -EIO; 3115 } 3116 3117 xd = (pxd_t *) & p->slot[stbl[0]]; 3118 3119 /* get the child page block address */ 3120 bn = addressPXD(xd); 3121 psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize; 3122 3123 /* unpin the parent page */ 3124 DT_PUTPAGE(mp); 3125 } 3126 } 3127 3128 3129 /* 3130 * dtReadNext() 3131 * 3132 * function: get the page of the specified offset (pn:index) 3133 * 3134 * return: if (offset > eof), bn = -1; 3135 * 3136 * note: if index > nextindex of the target leaf page, 3137 * start with 1st entry of next leaf page; 3138 */ 3139 static int dtReadNext(struct inode *ip, loff_t * offset, 3140 struct btstack * btstack) 3141 { 3142 int rc = 0; 3143 struct dtoffset { 3144 s16 pn; 3145 s16 index; 3146 s32 unused; 3147 } *dtoffset = (struct dtoffset *) offset; 3148 s64 bn; 3149 struct metapage *mp; 3150 dtpage_t *p; 3151 int index; 3152 int pn; 3153 s8 *stbl; 3154 struct btframe *btsp, *parent; 3155 pxd_t *xd; 3156 3157 /* 3158 * get leftmost leaf page pinned 3159 */ 3160 if ((rc = dtReadFirst(ip, btstack))) 3161 return rc; 3162 3163 /* get leaf page */ 3164 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 3165 3166 /* get the start offset (pn:index) */ 3167 pn = dtoffset->pn - 1; /* Now pn = 0 represents leftmost leaf */ 3168 index = dtoffset->index; 3169 3170 /* start at leftmost page ? */ 3171 if (pn == 0) { 3172 /* offset beyond eof ? */ 3173 if (index < p->header.nextindex) 3174 goto out; 3175 3176 if (p->header.flag & BT_ROOT) { 3177 bn = -1; 3178 goto out; 3179 } 3180 3181 /* start with 1st entry of next leaf page */ 3182 dtoffset->pn++; 3183 dtoffset->index = index = 0; 3184 goto a; 3185 } 3186 3187 /* start at non-leftmost page: scan parent pages for large pn */ 3188 if (p->header.flag & BT_ROOT) { 3189 bn = -1; 3190 goto out; 3191 } 3192 3193 /* start after next leaf page ? */ 3194 if (pn > 1) 3195 goto b; 3196 3197 /* get leaf page pn = 1 */ 3198 a: 3199 bn = le64_to_cpu(p->header.next); 3200 3201 /* unpin leaf page */ 3202 DT_PUTPAGE(mp); 3203 3204 /* offset beyond eof ? */ 3205 if (bn == 0) { 3206 bn = -1; 3207 goto out; 3208 } 3209 3210 goto c; 3211 3212 /* 3213 * scan last internal page level to get target leaf page 3214 */ 3215 b: 3216 /* unpin leftmost leaf page */ 3217 DT_PUTPAGE(mp); 3218 3219 /* get left most parent page */ 3220 btsp = btstack->top; 3221 parent = btsp - 1; 3222 bn = parent->bn; 3223 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3224 if (rc) 3225 return rc; 3226 3227 /* scan parent pages at last internal page level */ 3228 while (pn >= p->header.nextindex) { 3229 pn -= p->header.nextindex; 3230 3231 /* get next parent page address */ 3232 bn = le64_to_cpu(p->header.next); 3233 3234 /* unpin current parent page */ 3235 DT_PUTPAGE(mp); 3236 3237 /* offset beyond eof ? */ 3238 if (bn == 0) { 3239 bn = -1; 3240 goto out; 3241 } 3242 3243 /* get next parent page */ 3244 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3245 if (rc) 3246 return rc; 3247 3248 /* update parent page stack frame */ 3249 parent->bn = bn; 3250 } 3251 3252 /* get leaf page address */ 3253 stbl = DT_GETSTBL(p); 3254 xd = (pxd_t *) & p->slot[stbl[pn]]; 3255 bn = addressPXD(xd); 3256 3257 /* unpin parent page */ 3258 DT_PUTPAGE(mp); 3259 3260 /* 3261 * get target leaf page 3262 */ 3263 c: 3264 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3265 if (rc) 3266 return rc; 3267 3268 /* 3269 * leaf page has been completed: 3270 * start with 1st entry of next leaf page 3271 */ 3272 if (index >= p->header.nextindex) { 3273 bn = le64_to_cpu(p->header.next); 3274 3275 /* unpin leaf page */ 3276 DT_PUTPAGE(mp); 3277 3278 /* offset beyond eof ? */ 3279 if (bn == 0) { 3280 bn = -1; 3281 goto out; 3282 } 3283 3284 /* get next leaf page */ 3285 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3286 if (rc) 3287 return rc; 3288 3289 /* start with 1st entry of next leaf page */ 3290 dtoffset->pn++; 3291 dtoffset->index = 0; 3292 } 3293 3294 out: 3295 /* return target leaf page pinned */ 3296 btsp = btstack->top; 3297 btsp->bn = bn; 3298 btsp->index = dtoffset->index; 3299 btsp->mp = mp; 3300 3301 return 0; 3302 } 3303 3304 3305 /* 3306 * dtCompare() 3307 * 3308 * function: compare search key with an internal entry 3309 * 3310 * return: 3311 * < 0 if k is < record 3312 * = 0 if k is = record 3313 * > 0 if k is > record 3314 */ 3315 static int dtCompare(struct component_name * key, /* search key */ 3316 dtpage_t * p, /* directory page */ 3317 int si) 3318 { /* entry slot index */ 3319 wchar_t *kname; 3320 __le16 *name; 3321 int klen, namlen, len, rc; 3322 struct idtentry *ih; 3323 struct dtslot *t; 3324 3325 /* 3326 * force the left-most key on internal pages, at any level of 3327 * the tree, to be less than any search key. 3328 * this obviates having to update the leftmost key on an internal 3329 * page when the user inserts a new key in the tree smaller than 3330 * anything that has been stored. 3331 * 3332 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3333 * at any internal page at any level of the tree, 3334 * it descends to child of the entry anyway - 3335 * ? make the entry as min size dummy entry) 3336 * 3337 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3338 * return (1); 3339 */ 3340 3341 kname = key->name; 3342 klen = key->namlen; 3343 3344 ih = (struct idtentry *) & p->slot[si]; 3345 si = ih->next; 3346 name = ih->name; 3347 namlen = ih->namlen; 3348 len = min(namlen, DTIHDRDATALEN); 3349 3350 /* compare with head/only segment */ 3351 len = min(klen, len); 3352 if ((rc = UniStrncmp_le(kname, name, len))) 3353 return rc; 3354 3355 klen -= len; 3356 namlen -= len; 3357 3358 /* compare with additional segment(s) */ 3359 kname += len; 3360 while (klen > 0 && namlen > 0) { 3361 /* compare with next name segment */ 3362 t = (struct dtslot *) & p->slot[si]; 3363 len = min(namlen, DTSLOTDATALEN); 3364 len = min(klen, len); 3365 name = t->name; 3366 if ((rc = UniStrncmp_le(kname, name, len))) 3367 return rc; 3368 3369 klen -= len; 3370 namlen -= len; 3371 kname += len; 3372 si = t->next; 3373 } 3374 3375 return (klen - namlen); 3376 } 3377 3378 3379 3380 3381 /* 3382 * ciCompare() 3383 * 3384 * function: compare search key with an (leaf/internal) entry 3385 * 3386 * return: 3387 * < 0 if k is < record 3388 * = 0 if k is = record 3389 * > 0 if k is > record 3390 */ 3391 static int ciCompare(struct component_name * key, /* search key */ 3392 dtpage_t * p, /* directory page */ 3393 int si, /* entry slot index */ 3394 int flag) 3395 { 3396 wchar_t *kname, x; 3397 __le16 *name; 3398 int klen, namlen, len, rc; 3399 struct ldtentry *lh; 3400 struct idtentry *ih; 3401 struct dtslot *t; 3402 int i; 3403 3404 /* 3405 * force the left-most key on internal pages, at any level of 3406 * the tree, to be less than any search key. 3407 * this obviates having to update the leftmost key on an internal 3408 * page when the user inserts a new key in the tree smaller than 3409 * anything that has been stored. 3410 * 3411 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3412 * at any internal page at any level of the tree, 3413 * it descends to child of the entry anyway - 3414 * ? make the entry as min size dummy entry) 3415 * 3416 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3417 * return (1); 3418 */ 3419 3420 kname = key->name; 3421 klen = key->namlen; 3422 3423 /* 3424 * leaf page entry 3425 */ 3426 if (p->header.flag & BT_LEAF) { 3427 lh = (struct ldtentry *) & p->slot[si]; 3428 si = lh->next; 3429 name = lh->name; 3430 namlen = lh->namlen; 3431 if (flag & JFS_DIR_INDEX) 3432 len = min(namlen, DTLHDRDATALEN); 3433 else 3434 len = min(namlen, DTLHDRDATALEN_LEGACY); 3435 } 3436 /* 3437 * internal page entry 3438 */ 3439 else { 3440 ih = (struct idtentry *) & p->slot[si]; 3441 si = ih->next; 3442 name = ih->name; 3443 namlen = ih->namlen; 3444 len = min(namlen, DTIHDRDATALEN); 3445 } 3446 3447 /* compare with head/only segment */ 3448 len = min(klen, len); 3449 for (i = 0; i < len; i++, kname++, name++) { 3450 /* only uppercase if case-insensitive support is on */ 3451 if ((flag & JFS_OS2) == JFS_OS2) 3452 x = UniToupper(le16_to_cpu(*name)); 3453 else 3454 x = le16_to_cpu(*name); 3455 if ((rc = *kname - x)) 3456 return rc; 3457 } 3458 3459 klen -= len; 3460 namlen -= len; 3461 3462 /* compare with additional segment(s) */ 3463 while (klen > 0 && namlen > 0) { 3464 /* compare with next name segment */ 3465 t = (struct dtslot *) & p->slot[si]; 3466 len = min(namlen, DTSLOTDATALEN); 3467 len = min(klen, len); 3468 name = t->name; 3469 for (i = 0; i < len; i++, kname++, name++) { 3470 /* only uppercase if case-insensitive support is on */ 3471 if ((flag & JFS_OS2) == JFS_OS2) 3472 x = UniToupper(le16_to_cpu(*name)); 3473 else 3474 x = le16_to_cpu(*name); 3475 3476 if ((rc = *kname - x)) 3477 return rc; 3478 } 3479 3480 klen -= len; 3481 namlen -= len; 3482 si = t->next; 3483 } 3484 3485 return (klen - namlen); 3486 } 3487 3488 3489 /* 3490 * ciGetLeafPrefixKey() 3491 * 3492 * function: compute prefix of suffix compression 3493 * from two adjacent leaf entries 3494 * across page boundary 3495 * 3496 * return: non-zero on error 3497 * 3498 */ 3499 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 3500 int ri, struct component_name * key, int flag) 3501 { 3502 int klen, namlen; 3503 wchar_t *pl, *pr, *kname; 3504 struct component_name lkey; 3505 struct component_name rkey; 3506 3507 lkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 3508 GFP_KERNEL); 3509 if (lkey.name == NULL) 3510 return -ENOMEM; 3511 3512 rkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 3513 GFP_KERNEL); 3514 if (rkey.name == NULL) { 3515 kfree(lkey.name); 3516 return -ENOMEM; 3517 } 3518 3519 /* get left and right key */ 3520 dtGetKey(lp, li, &lkey, flag); 3521 lkey.name[lkey.namlen] = 0; 3522 3523 if ((flag & JFS_OS2) == JFS_OS2) 3524 ciToUpper(&lkey); 3525 3526 dtGetKey(rp, ri, &rkey, flag); 3527 rkey.name[rkey.namlen] = 0; 3528 3529 3530 if ((flag & JFS_OS2) == JFS_OS2) 3531 ciToUpper(&rkey); 3532 3533 /* compute prefix */ 3534 klen = 0; 3535 kname = key->name; 3536 namlen = min(lkey.namlen, rkey.namlen); 3537 for (pl = lkey.name, pr = rkey.name; 3538 namlen; pl++, pr++, namlen--, klen++, kname++) { 3539 *kname = *pr; 3540 if (*pl != *pr) { 3541 key->namlen = klen + 1; 3542 goto free_names; 3543 } 3544 } 3545 3546 /* l->namlen <= r->namlen since l <= r */ 3547 if (lkey.namlen < rkey.namlen) { 3548 *kname = *pr; 3549 key->namlen = klen + 1; 3550 } else /* l->namelen == r->namelen */ 3551 key->namlen = klen; 3552 3553 free_names: 3554 kfree(lkey.name); 3555 kfree(rkey.name); 3556 return 0; 3557 } 3558 3559 3560 3561 /* 3562 * dtGetKey() 3563 * 3564 * function: get key of the entry 3565 */ 3566 static void dtGetKey(dtpage_t * p, int i, /* entry index */ 3567 struct component_name * key, int flag) 3568 { 3569 int si; 3570 s8 *stbl; 3571 struct ldtentry *lh; 3572 struct idtentry *ih; 3573 struct dtslot *t; 3574 int namlen, len; 3575 wchar_t *kname; 3576 __le16 *name; 3577 3578 /* get entry */ 3579 stbl = DT_GETSTBL(p); 3580 si = stbl[i]; 3581 if (p->header.flag & BT_LEAF) { 3582 lh = (struct ldtentry *) & p->slot[si]; 3583 si = lh->next; 3584 namlen = lh->namlen; 3585 name = lh->name; 3586 if (flag & JFS_DIR_INDEX) 3587 len = min(namlen, DTLHDRDATALEN); 3588 else 3589 len = min(namlen, DTLHDRDATALEN_LEGACY); 3590 } else { 3591 ih = (struct idtentry *) & p->slot[si]; 3592 si = ih->next; 3593 namlen = ih->namlen; 3594 name = ih->name; 3595 len = min(namlen, DTIHDRDATALEN); 3596 } 3597 3598 key->namlen = namlen; 3599 kname = key->name; 3600 3601 /* 3602 * move head/only segment 3603 */ 3604 UniStrncpy_from_le(kname, name, len); 3605 3606 /* 3607 * move additional segment(s) 3608 */ 3609 while (si >= 0) { 3610 /* get next segment */ 3611 t = &p->slot[si]; 3612 kname += len; 3613 namlen -= len; 3614 len = min(namlen, DTSLOTDATALEN); 3615 UniStrncpy_from_le(kname, t->name, len); 3616 3617 si = t->next; 3618 } 3619 } 3620 3621 3622 /* 3623 * dtInsertEntry() 3624 * 3625 * function: allocate free slot(s) and 3626 * write a leaf/internal entry 3627 * 3628 * return: entry slot index 3629 */ 3630 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 3631 ddata_t * data, struct dt_lock ** dtlock) 3632 { 3633 struct dtslot *h, *t; 3634 struct ldtentry *lh = NULL; 3635 struct idtentry *ih = NULL; 3636 int hsi, fsi, klen, len, nextindex; 3637 wchar_t *kname; 3638 __le16 *name; 3639 s8 *stbl; 3640 pxd_t *xd; 3641 struct dt_lock *dtlck = *dtlock; 3642 struct lv *lv; 3643 int xsi, n; 3644 s64 bn = 0; 3645 struct metapage *mp = NULL; 3646 3647 klen = key->namlen; 3648 kname = key->name; 3649 3650 /* allocate a free slot */ 3651 hsi = fsi = p->header.freelist; 3652 h = &p->slot[fsi]; 3653 p->header.freelist = h->next; 3654 --p->header.freecnt; 3655 3656 /* open new linelock */ 3657 if (dtlck->index >= dtlck->maxcnt) 3658 dtlck = (struct dt_lock *) txLinelock(dtlck); 3659 3660 lv = & dtlck->lv[dtlck->index]; 3661 lv->offset = hsi; 3662 3663 /* write head/only segment */ 3664 if (p->header.flag & BT_LEAF) { 3665 lh = (struct ldtentry *) h; 3666 lh->next = h->next; 3667 lh->inumber = cpu_to_le32(data->leaf.ino); 3668 lh->namlen = klen; 3669 name = lh->name; 3670 if (data->leaf.ip) { 3671 len = min(klen, DTLHDRDATALEN); 3672 if (!(p->header.flag & BT_ROOT)) 3673 bn = addressPXD(&p->header.self); 3674 lh->index = cpu_to_le32(add_index(data->leaf.tid, 3675 data->leaf.ip, 3676 bn, index)); 3677 } else 3678 len = min(klen, DTLHDRDATALEN_LEGACY); 3679 } else { 3680 ih = (struct idtentry *) h; 3681 ih->next = h->next; 3682 xd = (pxd_t *) ih; 3683 *xd = data->xd; 3684 ih->namlen = klen; 3685 name = ih->name; 3686 len = min(klen, DTIHDRDATALEN); 3687 } 3688 3689 UniStrncpy_to_le(name, kname, len); 3690 3691 n = 1; 3692 xsi = hsi; 3693 3694 /* write additional segment(s) */ 3695 t = h; 3696 klen -= len; 3697 while (klen) { 3698 /* get free slot */ 3699 fsi = p->header.freelist; 3700 t = &p->slot[fsi]; 3701 p->header.freelist = t->next; 3702 --p->header.freecnt; 3703 3704 /* is next slot contiguous ? */ 3705 if (fsi != xsi + 1) { 3706 /* close current linelock */ 3707 lv->length = n; 3708 dtlck->index++; 3709 3710 /* open new linelock */ 3711 if (dtlck->index < dtlck->maxcnt) 3712 lv++; 3713 else { 3714 dtlck = (struct dt_lock *) txLinelock(dtlck); 3715 lv = & dtlck->lv[0]; 3716 } 3717 3718 lv->offset = fsi; 3719 n = 0; 3720 } 3721 3722 kname += len; 3723 len = min(klen, DTSLOTDATALEN); 3724 UniStrncpy_to_le(t->name, kname, len); 3725 3726 n++; 3727 xsi = fsi; 3728 klen -= len; 3729 } 3730 3731 /* close current linelock */ 3732 lv->length = n; 3733 dtlck->index++; 3734 3735 *dtlock = dtlck; 3736 3737 /* terminate last/only segment */ 3738 if (h == t) { 3739 /* single segment entry */ 3740 if (p->header.flag & BT_LEAF) 3741 lh->next = -1; 3742 else 3743 ih->next = -1; 3744 } else 3745 /* multi-segment entry */ 3746 t->next = -1; 3747 3748 /* if insert into middle, shift right succeeding entries in stbl */ 3749 stbl = DT_GETSTBL(p); 3750 nextindex = p->header.nextindex; 3751 if (index < nextindex) { 3752 memmove(stbl + index + 1, stbl + index, nextindex - index); 3753 3754 if ((p->header.flag & BT_LEAF) && data->leaf.ip) { 3755 s64 lblock; 3756 3757 /* 3758 * Need to update slot number for entries that moved 3759 * in the stbl 3760 */ 3761 mp = NULL; 3762 for (n = index + 1; n <= nextindex; n++) { 3763 lh = (struct ldtentry *) & (p->slot[stbl[n]]); 3764 modify_index(data->leaf.tid, data->leaf.ip, 3765 le32_to_cpu(lh->index), bn, n, 3766 &mp, &lblock); 3767 } 3768 if (mp) 3769 release_metapage(mp); 3770 } 3771 } 3772 3773 stbl[index] = hsi; 3774 3775 /* advance next available entry index of stbl */ 3776 ++p->header.nextindex; 3777 } 3778 3779 3780 /* 3781 * dtMoveEntry() 3782 * 3783 * function: move entries from split/left page to new/right page 3784 * 3785 * nextindex of dst page and freelist/freecnt of both pages 3786 * are updated. 3787 */ 3788 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 3789 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 3790 int do_index) 3791 { 3792 int ssi, next; /* src slot index */ 3793 int di; /* dst entry index */ 3794 int dsi; /* dst slot index */ 3795 s8 *sstbl, *dstbl; /* sorted entry table */ 3796 int snamlen, len; 3797 struct ldtentry *slh, *dlh = NULL; 3798 struct idtentry *sih, *dih = NULL; 3799 struct dtslot *h, *s, *d; 3800 struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock; 3801 struct lv *slv, *dlv; 3802 int xssi, ns, nd; 3803 int sfsi; 3804 3805 sstbl = (s8 *) & sp->slot[sp->header.stblindex]; 3806 dstbl = (s8 *) & dp->slot[dp->header.stblindex]; 3807 3808 dsi = dp->header.freelist; /* first (whole page) free slot */ 3809 sfsi = sp->header.freelist; 3810 3811 /* linelock destination entry slot */ 3812 dlv = & ddtlck->lv[ddtlck->index]; 3813 dlv->offset = dsi; 3814 3815 /* linelock source entry slot */ 3816 slv = & sdtlck->lv[sdtlck->index]; 3817 slv->offset = sstbl[si]; 3818 xssi = slv->offset - 1; 3819 3820 /* 3821 * move entries 3822 */ 3823 ns = nd = 0; 3824 for (di = 0; si < sp->header.nextindex; si++, di++) { 3825 ssi = sstbl[si]; 3826 dstbl[di] = dsi; 3827 3828 /* is next slot contiguous ? */ 3829 if (ssi != xssi + 1) { 3830 /* close current linelock */ 3831 slv->length = ns; 3832 sdtlck->index++; 3833 3834 /* open new linelock */ 3835 if (sdtlck->index < sdtlck->maxcnt) 3836 slv++; 3837 else { 3838 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 3839 slv = & sdtlck->lv[0]; 3840 } 3841 3842 slv->offset = ssi; 3843 ns = 0; 3844 } 3845 3846 /* 3847 * move head/only segment of an entry 3848 */ 3849 /* get dst slot */ 3850 h = d = &dp->slot[dsi]; 3851 3852 /* get src slot and move */ 3853 s = &sp->slot[ssi]; 3854 if (sp->header.flag & BT_LEAF) { 3855 /* get source entry */ 3856 slh = (struct ldtentry *) s; 3857 dlh = (struct ldtentry *) h; 3858 snamlen = slh->namlen; 3859 3860 if (do_index) { 3861 len = min(snamlen, DTLHDRDATALEN); 3862 dlh->index = slh->index; /* little-endian */ 3863 } else 3864 len = min(snamlen, DTLHDRDATALEN_LEGACY); 3865 3866 memcpy(dlh, slh, 6 + len * 2); 3867 3868 next = slh->next; 3869 3870 /* update dst head/only segment next field */ 3871 dsi++; 3872 dlh->next = dsi; 3873 } else { 3874 sih = (struct idtentry *) s; 3875 snamlen = sih->namlen; 3876 3877 len = min(snamlen, DTIHDRDATALEN); 3878 dih = (struct idtentry *) h; 3879 memcpy(dih, sih, 10 + len * 2); 3880 next = sih->next; 3881 3882 dsi++; 3883 dih->next = dsi; 3884 } 3885 3886 /* free src head/only segment */ 3887 s->next = sfsi; 3888 s->cnt = 1; 3889 sfsi = ssi; 3890 3891 ns++; 3892 nd++; 3893 xssi = ssi; 3894 3895 /* 3896 * move additional segment(s) of the entry 3897 */ 3898 snamlen -= len; 3899 while ((ssi = next) >= 0) { 3900 /* is next slot contiguous ? */ 3901 if (ssi != xssi + 1) { 3902 /* close current linelock */ 3903 slv->length = ns; 3904 sdtlck->index++; 3905 3906 /* open new linelock */ 3907 if (sdtlck->index < sdtlck->maxcnt) 3908 slv++; 3909 else { 3910 sdtlck = 3911 (struct dt_lock *) 3912 txLinelock(sdtlck); 3913 slv = & sdtlck->lv[0]; 3914 } 3915 3916 slv->offset = ssi; 3917 ns = 0; 3918 } 3919 3920 /* get next source segment */ 3921 s = &sp->slot[ssi]; 3922 3923 /* get next destination free slot */ 3924 d++; 3925 3926 len = min(snamlen, DTSLOTDATALEN); 3927 UniStrncpy_le(d->name, s->name, len); 3928 3929 ns++; 3930 nd++; 3931 xssi = ssi; 3932 3933 dsi++; 3934 d->next = dsi; 3935 3936 /* free source segment */ 3937 next = s->next; 3938 s->next = sfsi; 3939 s->cnt = 1; 3940 sfsi = ssi; 3941 3942 snamlen -= len; 3943 } /* end while */ 3944 3945 /* terminate dst last/only segment */ 3946 if (h == d) { 3947 /* single segment entry */ 3948 if (dp->header.flag & BT_LEAF) 3949 dlh->next = -1; 3950 else 3951 dih->next = -1; 3952 } else 3953 /* multi-segment entry */ 3954 d->next = -1; 3955 } /* end for */ 3956 3957 /* close current linelock */ 3958 slv->length = ns; 3959 sdtlck->index++; 3960 *sdtlock = sdtlck; 3961 3962 dlv->length = nd; 3963 ddtlck->index++; 3964 *ddtlock = ddtlck; 3965 3966 /* update source header */ 3967 sp->header.freelist = sfsi; 3968 sp->header.freecnt += nd; 3969 3970 /* update destination header */ 3971 dp->header.nextindex = di; 3972 3973 dp->header.freelist = dsi; 3974 dp->header.freecnt -= nd; 3975 } 3976 3977 3978 /* 3979 * dtDeleteEntry() 3980 * 3981 * function: free a (leaf/internal) entry 3982 * 3983 * log freelist header, stbl, and each segment slot of entry 3984 * (even though last/only segment next field is modified, 3985 * physical image logging requires all segment slots of 3986 * the entry logged to avoid applying previous updates 3987 * to the same slots) 3988 */ 3989 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock) 3990 { 3991 int fsi; /* free entry slot index */ 3992 s8 *stbl; 3993 struct dtslot *t; 3994 int si, freecnt; 3995 struct dt_lock *dtlck = *dtlock; 3996 struct lv *lv; 3997 int xsi, n; 3998 3999 /* get free entry slot index */ 4000 stbl = DT_GETSTBL(p); 4001 fsi = stbl[fi]; 4002 4003 /* open new linelock */ 4004 if (dtlck->index >= dtlck->maxcnt) 4005 dtlck = (struct dt_lock *) txLinelock(dtlck); 4006 lv = & dtlck->lv[dtlck->index]; 4007 4008 lv->offset = fsi; 4009 4010 /* get the head/only segment */ 4011 t = &p->slot[fsi]; 4012 if (p->header.flag & BT_LEAF) 4013 si = ((struct ldtentry *) t)->next; 4014 else 4015 si = ((struct idtentry *) t)->next; 4016 t->next = si; 4017 t->cnt = 1; 4018 4019 n = freecnt = 1; 4020 xsi = fsi; 4021 4022 /* find the last/only segment */ 4023 while (si >= 0) { 4024 /* is next slot contiguous ? */ 4025 if (si != xsi + 1) { 4026 /* close current linelock */ 4027 lv->length = n; 4028 dtlck->index++; 4029 4030 /* open new linelock */ 4031 if (dtlck->index < dtlck->maxcnt) 4032 lv++; 4033 else { 4034 dtlck = (struct dt_lock *) txLinelock(dtlck); 4035 lv = & dtlck->lv[0]; 4036 } 4037 4038 lv->offset = si; 4039 n = 0; 4040 } 4041 4042 n++; 4043 xsi = si; 4044 freecnt++; 4045 4046 t = &p->slot[si]; 4047 t->cnt = 1; 4048 si = t->next; 4049 } 4050 4051 /* close current linelock */ 4052 lv->length = n; 4053 dtlck->index++; 4054 4055 *dtlock = dtlck; 4056 4057 /* update freelist */ 4058 t->next = p->header.freelist; 4059 p->header.freelist = fsi; 4060 p->header.freecnt += freecnt; 4061 4062 /* if delete from middle, 4063 * shift left the succedding entries in the stbl 4064 */ 4065 si = p->header.nextindex; 4066 if (fi < si - 1) 4067 memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1); 4068 4069 p->header.nextindex--; 4070 } 4071 4072 4073 /* 4074 * dtTruncateEntry() 4075 * 4076 * function: truncate a (leaf/internal) entry 4077 * 4078 * log freelist header, stbl, and each segment slot of entry 4079 * (even though last/only segment next field is modified, 4080 * physical image logging requires all segment slots of 4081 * the entry logged to avoid applying previous updates 4082 * to the same slots) 4083 */ 4084 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock) 4085 { 4086 int tsi; /* truncate entry slot index */ 4087 s8 *stbl; 4088 struct dtslot *t; 4089 int si, freecnt; 4090 struct dt_lock *dtlck = *dtlock; 4091 struct lv *lv; 4092 int fsi, xsi, n; 4093 4094 /* get free entry slot index */ 4095 stbl = DT_GETSTBL(p); 4096 tsi = stbl[ti]; 4097 4098 /* open new linelock */ 4099 if (dtlck->index >= dtlck->maxcnt) 4100 dtlck = (struct dt_lock *) txLinelock(dtlck); 4101 lv = & dtlck->lv[dtlck->index]; 4102 4103 lv->offset = tsi; 4104 4105 /* get the head/only segment */ 4106 t = &p->slot[tsi]; 4107 ASSERT(p->header.flag & BT_INTERNAL); 4108 ((struct idtentry *) t)->namlen = 0; 4109 si = ((struct idtentry *) t)->next; 4110 ((struct idtentry *) t)->next = -1; 4111 4112 n = 1; 4113 freecnt = 0; 4114 fsi = si; 4115 xsi = tsi; 4116 4117 /* find the last/only segment */ 4118 while (si >= 0) { 4119 /* is next slot contiguous ? */ 4120 if (si != xsi + 1) { 4121 /* close current linelock */ 4122 lv->length = n; 4123 dtlck->index++; 4124 4125 /* open new linelock */ 4126 if (dtlck->index < dtlck->maxcnt) 4127 lv++; 4128 else { 4129 dtlck = (struct dt_lock *) txLinelock(dtlck); 4130 lv = & dtlck->lv[0]; 4131 } 4132 4133 lv->offset = si; 4134 n = 0; 4135 } 4136 4137 n++; 4138 xsi = si; 4139 freecnt++; 4140 4141 t = &p->slot[si]; 4142 t->cnt = 1; 4143 si = t->next; 4144 } 4145 4146 /* close current linelock */ 4147 lv->length = n; 4148 dtlck->index++; 4149 4150 *dtlock = dtlck; 4151 4152 /* update freelist */ 4153 if (freecnt == 0) 4154 return; 4155 t->next = p->header.freelist; 4156 p->header.freelist = fsi; 4157 p->header.freecnt += freecnt; 4158 } 4159 4160 4161 /* 4162 * dtLinelockFreelist() 4163 */ 4164 static void dtLinelockFreelist(dtpage_t * p, /* directory page */ 4165 int m, /* max slot index */ 4166 struct dt_lock ** dtlock) 4167 { 4168 int fsi; /* free entry slot index */ 4169 struct dtslot *t; 4170 int si; 4171 struct dt_lock *dtlck = *dtlock; 4172 struct lv *lv; 4173 int xsi, n; 4174 4175 /* get free entry slot index */ 4176 fsi = p->header.freelist; 4177 4178 /* open new linelock */ 4179 if (dtlck->index >= dtlck->maxcnt) 4180 dtlck = (struct dt_lock *) txLinelock(dtlck); 4181 lv = & dtlck->lv[dtlck->index]; 4182 4183 lv->offset = fsi; 4184 4185 n = 1; 4186 xsi = fsi; 4187 4188 t = &p->slot[fsi]; 4189 si = t->next; 4190 4191 /* find the last/only segment */ 4192 while (si < m && si >= 0) { 4193 /* is next slot contiguous ? */ 4194 if (si != xsi + 1) { 4195 /* close current linelock */ 4196 lv->length = n; 4197 dtlck->index++; 4198 4199 /* open new linelock */ 4200 if (dtlck->index < dtlck->maxcnt) 4201 lv++; 4202 else { 4203 dtlck = (struct dt_lock *) txLinelock(dtlck); 4204 lv = & dtlck->lv[0]; 4205 } 4206 4207 lv->offset = si; 4208 n = 0; 4209 } 4210 4211 n++; 4212 xsi = si; 4213 4214 t = &p->slot[si]; 4215 si = t->next; 4216 } 4217 4218 /* close current linelock */ 4219 lv->length = n; 4220 dtlck->index++; 4221 4222 *dtlock = dtlck; 4223 } 4224 4225 4226 /* 4227 * NAME: dtModify 4228 * 4229 * FUNCTION: Modify the inode number part of a directory entry 4230 * 4231 * PARAMETERS: 4232 * tid - Transaction id 4233 * ip - Inode of parent directory 4234 * key - Name of entry to be modified 4235 * orig_ino - Original inode number expected in entry 4236 * new_ino - New inode number to put into entry 4237 * flag - JFS_RENAME 4238 * 4239 * RETURNS: 4240 * -ESTALE - If entry found does not match orig_ino passed in 4241 * -ENOENT - If no entry can be found to match key 4242 * 0 - If successfully modified entry 4243 */ 4244 int dtModify(tid_t tid, struct inode *ip, 4245 struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag) 4246 { 4247 int rc; 4248 s64 bn; 4249 struct metapage *mp; 4250 dtpage_t *p; 4251 int index; 4252 struct btstack btstack; 4253 struct tlock *tlck; 4254 struct dt_lock *dtlck; 4255 struct lv *lv; 4256 s8 *stbl; 4257 int entry_si; /* entry slot index */ 4258 struct ldtentry *entry; 4259 4260 /* 4261 * search for the entry to modify: 4262 * 4263 * dtSearch() returns (leaf page pinned, index at which to modify). 4264 */ 4265 if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag))) 4266 return rc; 4267 4268 /* retrieve search result */ 4269 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4270 4271 BT_MARK_DIRTY(mp, ip); 4272 /* 4273 * acquire a transaction lock on the leaf page of named entry 4274 */ 4275 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 4276 dtlck = (struct dt_lock *) & tlck->lock; 4277 4278 /* get slot index of the entry */ 4279 stbl = DT_GETSTBL(p); 4280 entry_si = stbl[index]; 4281 4282 /* linelock entry */ 4283 ASSERT(dtlck->index == 0); 4284 lv = & dtlck->lv[0]; 4285 lv->offset = entry_si; 4286 lv->length = 1; 4287 dtlck->index++; 4288 4289 /* get the head/only segment */ 4290 entry = (struct ldtentry *) & p->slot[entry_si]; 4291 4292 /* substitute the inode number of the entry */ 4293 entry->inumber = cpu_to_le32(new_ino); 4294 4295 /* unpin the leaf page */ 4296 DT_PUTPAGE(mp); 4297 4298 return 0; 4299 } 4300