1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) International Business Machines Corp., 2000-2004 4 */ 5 6 /* 7 * jfs_dtree.c: directory B+-tree manager 8 * 9 * B+-tree with variable length key directory: 10 * 11 * each directory page is structured as an array of 32-byte 12 * directory entry slots initialized as a freelist 13 * to avoid search/compaction of free space at insertion. 14 * when an entry is inserted, a number of slots are allocated 15 * from the freelist as required to store variable length data 16 * of the entry; when the entry is deleted, slots of the entry 17 * are returned to freelist. 18 * 19 * leaf entry stores full name as key and file serial number 20 * (aka inode number) as data. 21 * internal/router entry stores sufffix compressed name 22 * as key and simple extent descriptor as data. 23 * 24 * each directory page maintains a sorted entry index table 25 * which stores the start slot index of sorted entries 26 * to allow binary search on the table. 27 * 28 * directory starts as a root/leaf page in on-disk inode 29 * inline data area. 30 * when it becomes full, it starts a leaf of a external extent 31 * of length of 1 block. each time the first leaf becomes full, 32 * it is extended rather than split (its size is doubled), 33 * until its length becoms 4 KBytes, from then the extent is split 34 * with new 4 Kbyte extent when it becomes full 35 * to reduce external fragmentation of small directories. 36 * 37 * blah, blah, blah, for linear scan of directory in pieces by 38 * readdir(). 39 * 40 * 41 * case-insensitive directory file system 42 * 43 * names are stored in case-sensitive way in leaf entry. 44 * but stored, searched and compared in case-insensitive (uppercase) order 45 * (i.e., both search key and entry key are folded for search/compare): 46 * (note that case-sensitive order is BROKEN in storage, e.g., 47 * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad 48 * 49 * entries which folds to the same key makes up a equivalent class 50 * whose members are stored as contiguous cluster (may cross page boundary) 51 * but whose order is arbitrary and acts as duplicate, e.g., 52 * abc, Abc, aBc, abC) 53 * 54 * once match is found at leaf, requires scan forward/backward 55 * either for, in case-insensitive search, duplicate 56 * or for, in case-sensitive search, for exact match 57 * 58 * router entry must be created/stored in case-insensitive way 59 * in internal entry: 60 * (right most key of left page and left most key of right page 61 * are folded, and its suffix compression is propagated as router 62 * key in parent) 63 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB> 64 * should be made the router key for the split) 65 * 66 * case-insensitive search: 67 * 68 * fold search key; 69 * 70 * case-insensitive search of B-tree: 71 * for internal entry, router key is already folded; 72 * for leaf entry, fold the entry key before comparison. 73 * 74 * if (leaf entry case-insensitive match found) 75 * if (next entry satisfies case-insensitive match) 76 * return EDUPLICATE; 77 * if (prev entry satisfies case-insensitive match) 78 * return EDUPLICATE; 79 * return match; 80 * else 81 * return no match; 82 * 83 * serialization: 84 * target directory inode lock is being held on entry/exit 85 * of all main directory service routines. 86 * 87 * log based recovery: 88 */ 89 90 #include <linux/fs.h> 91 #include <linux/quotaops.h> 92 #include <linux/slab.h> 93 #include "jfs_incore.h" 94 #include "jfs_superblock.h" 95 #include "jfs_filsys.h" 96 #include "jfs_metapage.h" 97 #include "jfs_dmap.h" 98 #include "jfs_unicode.h" 99 #include "jfs_debug.h" 100 101 /* dtree split parameter */ 102 struct dtsplit { 103 struct metapage *mp; 104 s16 index; 105 s16 nslot; 106 struct component_name *key; 107 ddata_t *data; 108 struct pxdlist *pxdlist; 109 }; 110 111 #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) 112 113 /* get page buffer for specified block address */ 114 #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ 115 do { \ 116 BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot); \ 117 if (!(RC)) { \ 118 if (((P)->header.nextindex > \ 119 (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \ 120 ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) { \ 121 BT_PUTPAGE(MP); \ 122 jfs_error((IP)->i_sb, \ 123 "DT_GETPAGE: dtree page corrupt\n"); \ 124 MP = NULL; \ 125 RC = -EIO; \ 126 } \ 127 } \ 128 } while (0) 129 130 /* for consistency */ 131 #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) 132 133 #define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 134 BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot) 135 136 /* 137 * forward references 138 */ 139 static int dtSplitUp(tid_t tid, struct inode *ip, 140 struct dtsplit * split, struct btstack * btstack); 141 142 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 143 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp); 144 145 static int dtExtendPage(tid_t tid, struct inode *ip, 146 struct dtsplit * split, struct btstack * btstack); 147 148 static int dtSplitRoot(tid_t tid, struct inode *ip, 149 struct dtsplit * split, struct metapage ** rmpp); 150 151 static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 152 dtpage_t * fp, struct btstack * btstack); 153 154 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p); 155 156 static int dtReadFirst(struct inode *ip, struct btstack * btstack); 157 158 static int dtReadNext(struct inode *ip, 159 loff_t * offset, struct btstack * btstack); 160 161 static int dtCompare(struct component_name * key, dtpage_t * p, int si); 162 163 static int ciCompare(struct component_name * key, dtpage_t * p, int si, 164 int flag); 165 166 static void dtGetKey(dtpage_t * p, int i, struct component_name * key, 167 int flag); 168 169 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 170 int ri, struct component_name * key, int flag); 171 172 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 173 ddata_t * data, struct dt_lock **); 174 175 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 176 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 177 int do_index); 178 179 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock); 180 181 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock); 182 183 static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock); 184 185 #define ciToUpper(c) UniStrupr((c)->name) 186 187 /* 188 * read_index_page() 189 * 190 * Reads a page of a directory's index table. 191 * Having metadata mapped into the directory inode's address space 192 * presents a multitude of problems. We avoid this by mapping to 193 * the absolute address space outside of the *_metapage routines 194 */ 195 static struct metapage *read_index_page(struct inode *inode, s64 blkno) 196 { 197 int rc; 198 s64 xaddr; 199 int xflag; 200 s32 xlen; 201 202 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 203 if (rc || (xaddr == 0)) 204 return NULL; 205 206 return read_metapage(inode, xaddr, PSIZE, 1); 207 } 208 209 /* 210 * get_index_page() 211 * 212 * Same as get_index_page(), but get's a new page without reading 213 */ 214 static struct metapage *get_index_page(struct inode *inode, s64 blkno) 215 { 216 int rc; 217 s64 xaddr; 218 int xflag; 219 s32 xlen; 220 221 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 222 if (rc || (xaddr == 0)) 223 return NULL; 224 225 return get_metapage(inode, xaddr, PSIZE, 1); 226 } 227 228 /* 229 * find_index() 230 * 231 * Returns dtree page containing directory table entry for specified 232 * index and pointer to its entry. 233 * 234 * mp must be released by caller. 235 */ 236 static struct dir_table_slot *find_index(struct inode *ip, u32 index, 237 struct metapage ** mp, s64 *lblock) 238 { 239 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 240 s64 blkno; 241 s64 offset; 242 int page_offset; 243 struct dir_table_slot *slot; 244 static int maxWarnings = 10; 245 246 if (index < 2) { 247 if (maxWarnings) { 248 jfs_warn("find_entry called with index = %d", index); 249 maxWarnings--; 250 } 251 return NULL; 252 } 253 254 if (index >= jfs_ip->next_index) { 255 jfs_warn("find_entry called with index >= next_index"); 256 return NULL; 257 } 258 259 if (jfs_dirtable_inline(ip)) { 260 /* 261 * Inline directory table 262 */ 263 *mp = NULL; 264 slot = &jfs_ip->i_dirtable[index - 2]; 265 } else { 266 offset = (index - 2) * sizeof(struct dir_table_slot); 267 page_offset = offset & (PSIZE - 1); 268 blkno = ((offset + 1) >> L2PSIZE) << 269 JFS_SBI(ip->i_sb)->l2nbperpage; 270 271 if (*mp && (*lblock != blkno)) { 272 release_metapage(*mp); 273 *mp = NULL; 274 } 275 if (!(*mp)) { 276 *lblock = blkno; 277 *mp = read_index_page(ip, blkno); 278 } 279 if (!(*mp)) { 280 jfs_err("free_index: error reading directory table"); 281 return NULL; 282 } 283 284 slot = 285 (struct dir_table_slot *) ((char *) (*mp)->data + 286 page_offset); 287 } 288 return slot; 289 } 290 291 static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, 292 u32 index) 293 { 294 struct tlock *tlck; 295 struct linelock *llck; 296 struct lv *lv; 297 298 tlck = txLock(tid, ip, mp, tlckDATA); 299 llck = (struct linelock *) tlck->lock; 300 301 if (llck->index >= llck->maxcnt) 302 llck = txLinelock(llck); 303 lv = &llck->lv[llck->index]; 304 305 /* 306 * Linelock slot size is twice the size of directory table 307 * slot size. 512 entries per page. 308 */ 309 lv->offset = ((index - 2) & 511) >> 1; 310 lv->length = 1; 311 llck->index++; 312 } 313 314 /* 315 * add_index() 316 * 317 * Adds an entry to the directory index table. This is used to provide 318 * each directory entry with a persistent index in which to resume 319 * directory traversals 320 */ 321 static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) 322 { 323 struct super_block *sb = ip->i_sb; 324 struct jfs_sb_info *sbi = JFS_SBI(sb); 325 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 326 u64 blkno; 327 struct dir_table_slot *dirtab_slot; 328 u32 index; 329 struct linelock *llck; 330 struct lv *lv; 331 struct metapage *mp; 332 s64 offset; 333 uint page_offset; 334 struct tlock *tlck; 335 s64 xaddr; 336 337 ASSERT(DO_INDEX(ip)); 338 339 if (jfs_ip->next_index < 2) { 340 jfs_warn("add_index: next_index = %d. Resetting!", 341 jfs_ip->next_index); 342 jfs_ip->next_index = 2; 343 } 344 345 index = jfs_ip->next_index++; 346 347 if (index <= MAX_INLINE_DIRTABLE_ENTRY) { 348 /* 349 * i_size reflects size of index table, or 8 bytes per entry. 350 */ 351 ip->i_size = (loff_t) (index - 1) << 3; 352 353 /* 354 * dir table fits inline within inode 355 */ 356 dirtab_slot = &jfs_ip->i_dirtable[index-2]; 357 dirtab_slot->flag = DIR_INDEX_VALID; 358 dirtab_slot->slot = slot; 359 DTSaddress(dirtab_slot, bn); 360 361 set_cflag(COMMIT_Dirtable, ip); 362 363 return index; 364 } 365 if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) { 366 struct dir_table_slot temp_table[12]; 367 368 /* 369 * It's time to move the inline table to an external 370 * page and begin to build the xtree 371 */ 372 if (dquot_alloc_block(ip, sbi->nbperpage)) 373 goto clean_up; 374 if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { 375 dquot_free_block(ip, sbi->nbperpage); 376 goto clean_up; 377 } 378 379 /* 380 * Save the table, we're going to overwrite it with the 381 * xtree root 382 */ 383 memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table)); 384 385 /* 386 * Initialize empty x-tree 387 */ 388 xtInitRoot(tid, ip); 389 390 /* 391 * Add the first block to the xtree 392 */ 393 if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) { 394 /* This really shouldn't fail */ 395 jfs_warn("add_index: xtInsert failed!"); 396 memcpy(&jfs_ip->i_dirtable, temp_table, 397 sizeof (temp_table)); 398 dbFree(ip, xaddr, sbi->nbperpage); 399 dquot_free_block(ip, sbi->nbperpage); 400 goto clean_up; 401 } 402 ip->i_size = PSIZE; 403 404 mp = get_index_page(ip, 0); 405 if (!mp) { 406 jfs_err("add_index: get_metapage failed!"); 407 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 408 memcpy(&jfs_ip->i_dirtable, temp_table, 409 sizeof (temp_table)); 410 goto clean_up; 411 } 412 tlck = txLock(tid, ip, mp, tlckDATA); 413 llck = (struct linelock *) & tlck->lock; 414 ASSERT(llck->index == 0); 415 lv = &llck->lv[0]; 416 417 lv->offset = 0; 418 lv->length = 6; /* tlckDATA slot size is 16 bytes */ 419 llck->index++; 420 421 memcpy(mp->data, temp_table, sizeof(temp_table)); 422 423 mark_metapage_dirty(mp); 424 release_metapage(mp); 425 426 /* 427 * Logging is now directed by xtree tlocks 428 */ 429 clear_cflag(COMMIT_Dirtable, ip); 430 } 431 432 offset = (index - 2) * sizeof(struct dir_table_slot); 433 page_offset = offset & (PSIZE - 1); 434 blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage; 435 if (page_offset == 0) { 436 /* 437 * This will be the beginning of a new page 438 */ 439 xaddr = 0; 440 if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) { 441 jfs_warn("add_index: xtInsert failed!"); 442 goto clean_up; 443 } 444 ip->i_size += PSIZE; 445 446 if ((mp = get_index_page(ip, blkno))) 447 memset(mp->data, 0, PSIZE); /* Just looks better */ 448 else 449 xtTruncate(tid, ip, offset, COMMIT_PWMAP); 450 } else 451 mp = read_index_page(ip, blkno); 452 453 if (!mp) { 454 jfs_err("add_index: get/read_metapage failed!"); 455 goto clean_up; 456 } 457 458 lock_index(tid, ip, mp, index); 459 460 dirtab_slot = 461 (struct dir_table_slot *) ((char *) mp->data + page_offset); 462 dirtab_slot->flag = DIR_INDEX_VALID; 463 dirtab_slot->slot = slot; 464 DTSaddress(dirtab_slot, bn); 465 466 mark_metapage_dirty(mp); 467 release_metapage(mp); 468 469 return index; 470 471 clean_up: 472 473 jfs_ip->next_index--; 474 475 return 0; 476 } 477 478 /* 479 * free_index() 480 * 481 * Marks an entry to the directory index table as free. 482 */ 483 static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) 484 { 485 struct dir_table_slot *dirtab_slot; 486 s64 lblock; 487 struct metapage *mp = NULL; 488 489 dirtab_slot = find_index(ip, index, &mp, &lblock); 490 491 if (!dirtab_slot) 492 return; 493 494 dirtab_slot->flag = DIR_INDEX_FREE; 495 dirtab_slot->slot = dirtab_slot->addr1 = 0; 496 dirtab_slot->addr2 = cpu_to_le32(next); 497 498 if (mp) { 499 lock_index(tid, ip, mp, index); 500 mark_metapage_dirty(mp); 501 release_metapage(mp); 502 } else 503 set_cflag(COMMIT_Dirtable, ip); 504 } 505 506 /* 507 * modify_index() 508 * 509 * Changes an entry in the directory index table 510 */ 511 static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, 512 int slot, struct metapage ** mp, s64 *lblock) 513 { 514 struct dir_table_slot *dirtab_slot; 515 516 dirtab_slot = find_index(ip, index, mp, lblock); 517 518 if (!dirtab_slot) 519 return; 520 521 DTSaddress(dirtab_slot, bn); 522 dirtab_slot->slot = slot; 523 524 if (*mp) { 525 lock_index(tid, ip, *mp, index); 526 mark_metapage_dirty(*mp); 527 } else 528 set_cflag(COMMIT_Dirtable, ip); 529 } 530 531 /* 532 * read_index() 533 * 534 * reads a directory table slot 535 */ 536 static int read_index(struct inode *ip, u32 index, 537 struct dir_table_slot * dirtab_slot) 538 { 539 s64 lblock; 540 struct metapage *mp = NULL; 541 struct dir_table_slot *slot; 542 543 slot = find_index(ip, index, &mp, &lblock); 544 if (!slot) { 545 return -EIO; 546 } 547 548 memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot)); 549 550 if (mp) 551 release_metapage(mp); 552 553 return 0; 554 } 555 556 /* 557 * dtSearch() 558 * 559 * function: 560 * Search for the entry with specified key 561 * 562 * parameter: 563 * 564 * return: 0 - search result on stack, leaf page pinned; 565 * errno - I/O error 566 */ 567 int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, 568 struct btstack * btstack, int flag) 569 { 570 int rc = 0; 571 int cmp = 1; /* init for empty page */ 572 s64 bn; 573 struct metapage *mp; 574 dtpage_t *p; 575 s8 *stbl; 576 int base, index, lim; 577 struct btframe *btsp; 578 pxd_t *pxd; 579 int psize = 288; /* initial in-line directory */ 580 ino_t inumber; 581 struct component_name ciKey; 582 struct super_block *sb = ip->i_sb; 583 584 ciKey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 585 GFP_NOFS); 586 if (!ciKey.name) { 587 rc = -ENOMEM; 588 goto dtSearch_Exit2; 589 } 590 591 592 /* uppercase search key for c-i directory */ 593 UniStrcpy(ciKey.name, key->name); 594 ciKey.namlen = key->namlen; 595 596 /* only uppercase if case-insensitive support is on */ 597 if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) { 598 ciToUpper(&ciKey); 599 } 600 BT_CLR(btstack); /* reset stack */ 601 602 /* init level count for max pages to split */ 603 btstack->nsplit = 1; 604 605 /* 606 * search down tree from root: 607 * 608 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 609 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 610 * 611 * if entry with search key K is not found 612 * internal page search find the entry with largest key Ki 613 * less than K which point to the child page to search; 614 * leaf page search find the entry with smallest key Kj 615 * greater than K so that the returned index is the position of 616 * the entry to be shifted right for insertion of new entry. 617 * for empty tree, search key is greater than any key of the tree. 618 * 619 * by convention, root bn = 0. 620 */ 621 for (bn = 0;;) { 622 /* get/pin the page to search */ 623 DT_GETPAGE(ip, bn, mp, psize, p, rc); 624 if (rc) 625 goto dtSearch_Exit1; 626 627 /* get sorted entry table of the page */ 628 stbl = DT_GETSTBL(p); 629 630 /* 631 * binary search with search key K on the current page. 632 */ 633 for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { 634 index = base + (lim >> 1); 635 636 if (stbl[index] < 0) { 637 rc = -EIO; 638 goto out; 639 } 640 641 if (p->header.flag & BT_LEAF) { 642 /* uppercase leaf name to compare */ 643 cmp = 644 ciCompare(&ciKey, p, stbl[index], 645 JFS_SBI(sb)->mntflag); 646 } else { 647 /* router key is in uppercase */ 648 649 cmp = dtCompare(&ciKey, p, stbl[index]); 650 651 652 } 653 if (cmp == 0) { 654 /* 655 * search hit 656 */ 657 /* search hit - leaf page: 658 * return the entry found 659 */ 660 if (p->header.flag & BT_LEAF) { 661 inumber = le32_to_cpu( 662 ((struct ldtentry *) & p->slot[stbl[index]])->inumber); 663 664 /* 665 * search for JFS_LOOKUP 666 */ 667 if (flag == JFS_LOOKUP) { 668 *data = inumber; 669 rc = 0; 670 goto out; 671 } 672 673 /* 674 * search for JFS_CREATE 675 */ 676 if (flag == JFS_CREATE) { 677 *data = inumber; 678 rc = -EEXIST; 679 goto out; 680 } 681 682 /* 683 * search for JFS_REMOVE or JFS_RENAME 684 */ 685 if ((flag == JFS_REMOVE || 686 flag == JFS_RENAME) && 687 *data != inumber) { 688 rc = -ESTALE; 689 goto out; 690 } 691 692 /* 693 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME 694 */ 695 /* save search result */ 696 *data = inumber; 697 btsp = btstack->top; 698 btsp->bn = bn; 699 btsp->index = index; 700 btsp->mp = mp; 701 702 rc = 0; 703 goto dtSearch_Exit1; 704 } 705 706 /* search hit - internal page: 707 * descend/search its child page 708 */ 709 goto getChild; 710 } 711 712 if (cmp > 0) { 713 base = index + 1; 714 --lim; 715 } 716 } 717 718 /* 719 * search miss 720 * 721 * base is the smallest index with key (Kj) greater than 722 * search key (K) and may be zero or (maxindex + 1) index. 723 */ 724 /* 725 * search miss - leaf page 726 * 727 * return location of entry (base) where new entry with 728 * search key K is to be inserted. 729 */ 730 if (p->header.flag & BT_LEAF) { 731 /* 732 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME 733 */ 734 if (flag == JFS_LOOKUP || flag == JFS_REMOVE || 735 flag == JFS_RENAME) { 736 rc = -ENOENT; 737 goto out; 738 } 739 740 /* 741 * search for JFS_CREATE|JFS_FINDDIR: 742 * 743 * save search result 744 */ 745 *data = 0; 746 btsp = btstack->top; 747 btsp->bn = bn; 748 btsp->index = base; 749 btsp->mp = mp; 750 751 rc = 0; 752 goto dtSearch_Exit1; 753 } 754 755 /* 756 * search miss - internal page 757 * 758 * if base is non-zero, decrement base by one to get the parent 759 * entry of the child page to search. 760 */ 761 index = base ? base - 1 : base; 762 763 /* 764 * go down to child page 765 */ 766 getChild: 767 /* update max. number of pages to split */ 768 if (BT_STACK_FULL(btstack)) { 769 /* Something's corrupted, mark filesystem dirty so 770 * chkdsk will fix it. 771 */ 772 jfs_error(sb, "stack overrun!\n"); 773 BT_STACK_DUMP(btstack); 774 rc = -EIO; 775 goto out; 776 } 777 btstack->nsplit++; 778 779 /* push (bn, index) of the parent page/entry */ 780 BT_PUSH(btstack, bn, index); 781 782 /* get the child page block number */ 783 pxd = (pxd_t *) & p->slot[stbl[index]]; 784 bn = addressPXD(pxd); 785 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 786 787 /* unpin the parent page */ 788 DT_PUTPAGE(mp); 789 } 790 791 out: 792 DT_PUTPAGE(mp); 793 794 dtSearch_Exit1: 795 796 kfree(ciKey.name); 797 798 dtSearch_Exit2: 799 800 return rc; 801 } 802 803 804 /* 805 * dtInsert() 806 * 807 * function: insert an entry to directory tree 808 * 809 * parameter: 810 * 811 * return: 0 - success; 812 * errno - failure; 813 */ 814 int dtInsert(tid_t tid, struct inode *ip, 815 struct component_name * name, ino_t * fsn, struct btstack * btstack) 816 { 817 int rc = 0; 818 struct metapage *mp; /* meta-page buffer */ 819 dtpage_t *p; /* base B+-tree index page */ 820 s64 bn; 821 int index; 822 struct dtsplit split; /* split information */ 823 ddata_t data; 824 struct dt_lock *dtlck; 825 int n; 826 struct tlock *tlck; 827 struct lv *lv; 828 829 /* 830 * retrieve search result 831 * 832 * dtSearch() returns (leaf page pinned, index at which to insert). 833 * n.b. dtSearch() may return index of (maxindex + 1) of 834 * the full page. 835 */ 836 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 837 if (p->header.freelist == 0) 838 return -EINVAL; 839 840 /* 841 * insert entry for new key 842 */ 843 if (DO_INDEX(ip)) { 844 if (JFS_IP(ip)->next_index == DIREND) { 845 DT_PUTPAGE(mp); 846 return -EMLINK; 847 } 848 n = NDTLEAF(name->namlen); 849 data.leaf.tid = tid; 850 data.leaf.ip = ip; 851 } else { 852 n = NDTLEAF_LEGACY(name->namlen); 853 data.leaf.ip = NULL; /* signifies legacy directory format */ 854 } 855 data.leaf.ino = *fsn; 856 857 /* 858 * leaf page does not have enough room for new entry: 859 * 860 * extend/split the leaf page; 861 * 862 * dtSplitUp() will insert the entry and unpin the leaf page. 863 */ 864 if (n > p->header.freecnt) { 865 split.mp = mp; 866 split.index = index; 867 split.nslot = n; 868 split.key = name; 869 split.data = &data; 870 rc = dtSplitUp(tid, ip, &split, btstack); 871 return rc; 872 } 873 874 /* 875 * leaf page does have enough room for new entry: 876 * 877 * insert the new data entry into the leaf page; 878 */ 879 BT_MARK_DIRTY(mp, ip); 880 /* 881 * acquire a transaction lock on the leaf page 882 */ 883 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 884 dtlck = (struct dt_lock *) & tlck->lock; 885 ASSERT(dtlck->index == 0); 886 lv = & dtlck->lv[0]; 887 888 /* linelock header */ 889 lv->offset = 0; 890 lv->length = 1; 891 dtlck->index++; 892 893 dtInsertEntry(p, index, name, &data, &dtlck); 894 895 /* linelock stbl of non-root leaf page */ 896 if (!(p->header.flag & BT_ROOT)) { 897 if (dtlck->index >= dtlck->maxcnt) 898 dtlck = (struct dt_lock *) txLinelock(dtlck); 899 lv = & dtlck->lv[dtlck->index]; 900 n = index >> L2DTSLOTSIZE; 901 lv->offset = p->header.stblindex + n; 902 lv->length = 903 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 904 dtlck->index++; 905 } 906 907 /* unpin the leaf page */ 908 DT_PUTPAGE(mp); 909 910 return 0; 911 } 912 913 914 /* 915 * dtSplitUp() 916 * 917 * function: propagate insertion bottom up; 918 * 919 * parameter: 920 * 921 * return: 0 - success; 922 * errno - failure; 923 * leaf page unpinned; 924 */ 925 static int dtSplitUp(tid_t tid, 926 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 927 { 928 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 929 int rc = 0; 930 struct metapage *smp; 931 dtpage_t *sp; /* split page */ 932 struct metapage *rmp; 933 dtpage_t *rp; /* new right page split from sp */ 934 pxd_t rpxd; /* new right page extent descriptor */ 935 struct metapage *lmp; 936 dtpage_t *lp; /* left child page */ 937 int skip; /* index of entry of insertion */ 938 struct btframe *parent; /* parent page entry on traverse stack */ 939 s64 xaddr, nxaddr; 940 int xlen, xsize; 941 struct pxdlist pxdlist; 942 pxd_t *pxd; 943 struct component_name key = { 0, NULL }; 944 ddata_t *data = split->data; 945 int n; 946 struct dt_lock *dtlck; 947 struct tlock *tlck; 948 struct lv *lv; 949 int quota_allocation = 0; 950 951 /* get split page */ 952 smp = split->mp; 953 sp = DT_PAGE(ip, smp); 954 955 key.name = kmalloc_array(JFS_NAME_MAX + 2, sizeof(wchar_t), GFP_NOFS); 956 if (!key.name) { 957 DT_PUTPAGE(smp); 958 rc = -ENOMEM; 959 goto dtSplitUp_Exit; 960 } 961 962 /* 963 * split leaf page 964 * 965 * The split routines insert the new entry, and 966 * acquire txLock as appropriate. 967 */ 968 /* 969 * split root leaf page: 970 */ 971 if (sp->header.flag & BT_ROOT) { 972 /* 973 * allocate a single extent child page 974 */ 975 xlen = 1; 976 n = sbi->bsize >> L2DTSLOTSIZE; 977 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 978 n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */ 979 if (n <= split->nslot) 980 xlen++; 981 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) { 982 DT_PUTPAGE(smp); 983 goto freeKeyName; 984 } 985 986 pxdlist.maxnpxd = 1; 987 pxdlist.npxd = 0; 988 pxd = &pxdlist.pxd[0]; 989 PXDaddress(pxd, xaddr); 990 PXDlength(pxd, xlen); 991 split->pxdlist = &pxdlist; 992 rc = dtSplitRoot(tid, ip, split, &rmp); 993 994 if (rc) 995 dbFree(ip, xaddr, xlen); 996 else 997 DT_PUTPAGE(rmp); 998 999 DT_PUTPAGE(smp); 1000 1001 if (!DO_INDEX(ip)) 1002 ip->i_size = xlen << sbi->l2bsize; 1003 1004 goto freeKeyName; 1005 } 1006 1007 /* 1008 * extend first leaf page 1009 * 1010 * extend the 1st extent if less than buffer page size 1011 * (dtExtendPage() reurns leaf page unpinned) 1012 */ 1013 pxd = &sp->header.self; 1014 xlen = lengthPXD(pxd); 1015 xsize = xlen << sbi->l2bsize; 1016 if (xsize < PSIZE) { 1017 xaddr = addressPXD(pxd); 1018 n = xsize >> L2DTSLOTSIZE; 1019 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 1020 if ((n + sp->header.freecnt) <= split->nslot) 1021 n = xlen + (xlen << 1); 1022 else 1023 n = xlen; 1024 1025 /* Allocate blocks to quota. */ 1026 rc = dquot_alloc_block(ip, n); 1027 if (rc) 1028 goto extendOut; 1029 quota_allocation += n; 1030 1031 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, 1032 (s64) n, &nxaddr))) 1033 goto extendOut; 1034 1035 pxdlist.maxnpxd = 1; 1036 pxdlist.npxd = 0; 1037 pxd = &pxdlist.pxd[0]; 1038 PXDaddress(pxd, nxaddr); 1039 PXDlength(pxd, xlen + n); 1040 split->pxdlist = &pxdlist; 1041 if ((rc = dtExtendPage(tid, ip, split, btstack))) { 1042 nxaddr = addressPXD(pxd); 1043 if (xaddr != nxaddr) { 1044 /* free relocated extent */ 1045 xlen = lengthPXD(pxd); 1046 dbFree(ip, nxaddr, (s64) xlen); 1047 } else { 1048 /* free extended delta */ 1049 xlen = lengthPXD(pxd) - n; 1050 xaddr = addressPXD(pxd) + xlen; 1051 dbFree(ip, xaddr, (s64) n); 1052 } 1053 } else if (!DO_INDEX(ip)) 1054 ip->i_size = lengthPXD(pxd) << sbi->l2bsize; 1055 1056 1057 extendOut: 1058 DT_PUTPAGE(smp); 1059 goto freeKeyName; 1060 } 1061 1062 /* 1063 * split leaf page <sp> into <sp> and a new right page <rp>. 1064 * 1065 * return <rp> pinned and its extent descriptor <rpxd> 1066 */ 1067 /* 1068 * allocate new directory page extent and 1069 * new index page(s) to cover page split(s) 1070 * 1071 * allocation hint: ? 1072 */ 1073 n = btstack->nsplit; 1074 pxdlist.maxnpxd = pxdlist.npxd = 0; 1075 xlen = sbi->nbperpage; 1076 for (pxd = pxdlist.pxd; n > 0; n--, pxd++) { 1077 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) { 1078 PXDaddress(pxd, xaddr); 1079 PXDlength(pxd, xlen); 1080 pxdlist.maxnpxd++; 1081 continue; 1082 } 1083 1084 DT_PUTPAGE(smp); 1085 1086 /* undo allocation */ 1087 goto splitOut; 1088 } 1089 1090 split->pxdlist = &pxdlist; 1091 if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) { 1092 DT_PUTPAGE(smp); 1093 1094 /* undo allocation */ 1095 goto splitOut; 1096 } 1097 1098 if (!DO_INDEX(ip)) 1099 ip->i_size += PSIZE; 1100 1101 /* 1102 * propagate up the router entry for the leaf page just split 1103 * 1104 * insert a router entry for the new page into the parent page, 1105 * propagate the insert/split up the tree by walking back the stack 1106 * of (bn of parent page, index of child page entry in parent page) 1107 * that were traversed during the search for the page that split. 1108 * 1109 * the propagation of insert/split up the tree stops if the root 1110 * splits or the page inserted into doesn't have to split to hold 1111 * the new entry. 1112 * 1113 * the parent entry for the split page remains the same, and 1114 * a new entry is inserted at its right with the first key and 1115 * block number of the new right page. 1116 * 1117 * There are a maximum of 4 pages pinned at any time: 1118 * two children, left parent and right parent (when the parent splits). 1119 * keep the child pages pinned while working on the parent. 1120 * make sure that all pins are released at exit. 1121 */ 1122 while ((parent = BT_POP(btstack)) != NULL) { 1123 /* parent page specified by stack frame <parent> */ 1124 1125 /* keep current child pages (<lp>, <rp>) pinned */ 1126 lmp = smp; 1127 lp = sp; 1128 1129 /* 1130 * insert router entry in parent for new right child page <rp> 1131 */ 1132 /* get the parent page <sp> */ 1133 DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1134 if (rc) { 1135 DT_PUTPAGE(lmp); 1136 DT_PUTPAGE(rmp); 1137 goto splitOut; 1138 } 1139 1140 /* 1141 * The new key entry goes ONE AFTER the index of parent entry, 1142 * because the split was to the right. 1143 */ 1144 skip = parent->index + 1; 1145 1146 /* 1147 * compute the key for the router entry 1148 * 1149 * key suffix compression: 1150 * for internal pages that have leaf pages as children, 1151 * retain only what's needed to distinguish between 1152 * the new entry and the entry on the page to its left. 1153 * If the keys compare equal, retain the entire key. 1154 * 1155 * note that compression is performed only at computing 1156 * router key at the lowest internal level. 1157 * further compression of the key between pairs of higher 1158 * level internal pages loses too much information and 1159 * the search may fail. 1160 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,} 1161 * results in two adjacent parent entries (a)(xx). 1162 * if split occurs between these two entries, and 1163 * if compression is applied, the router key of parent entry 1164 * of right page (x) will divert search for x into right 1165 * subtree and miss x in the left subtree.) 1166 * 1167 * the entire key must be retained for the next-to-leftmost 1168 * internal key at any level of the tree, or search may fail 1169 * (e.g., ?) 1170 */ 1171 switch (rp->header.flag & BT_TYPE) { 1172 case BT_LEAF: 1173 /* 1174 * compute the length of prefix for suffix compression 1175 * between last entry of left page and first entry 1176 * of right page 1177 */ 1178 if ((sp->header.flag & BT_ROOT && skip > 1) || 1179 sp->header.prev != 0 || skip > 1) { 1180 /* compute uppercase router prefix key */ 1181 rc = ciGetLeafPrefixKey(lp, 1182 lp->header.nextindex-1, 1183 rp, 0, &key, 1184 sbi->mntflag); 1185 if (rc) { 1186 DT_PUTPAGE(lmp); 1187 DT_PUTPAGE(rmp); 1188 DT_PUTPAGE(smp); 1189 goto splitOut; 1190 } 1191 } else { 1192 /* next to leftmost entry of 1193 lowest internal level */ 1194 1195 /* compute uppercase router key */ 1196 dtGetKey(rp, 0, &key, sbi->mntflag); 1197 key.name[key.namlen] = 0; 1198 1199 if ((sbi->mntflag & JFS_OS2) == JFS_OS2) 1200 ciToUpper(&key); 1201 } 1202 1203 n = NDTINTERNAL(key.namlen); 1204 break; 1205 1206 case BT_INTERNAL: 1207 dtGetKey(rp, 0, &key, sbi->mntflag); 1208 n = NDTINTERNAL(key.namlen); 1209 break; 1210 1211 default: 1212 jfs_err("dtSplitUp(): UFO!"); 1213 break; 1214 } 1215 1216 /* unpin left child page */ 1217 DT_PUTPAGE(lmp); 1218 1219 /* 1220 * compute the data for the router entry 1221 */ 1222 data->xd = rpxd; /* child page xd */ 1223 1224 /* 1225 * parent page is full - split the parent page 1226 */ 1227 if (n > sp->header.freecnt) { 1228 /* init for parent page split */ 1229 split->mp = smp; 1230 split->index = skip; /* index at insert */ 1231 split->nslot = n; 1232 split->key = &key; 1233 /* split->data = data; */ 1234 1235 /* unpin right child page */ 1236 DT_PUTPAGE(rmp); 1237 1238 /* The split routines insert the new entry, 1239 * acquire txLock as appropriate. 1240 * return <rp> pinned and its block number <rbn>. 1241 */ 1242 rc = (sp->header.flag & BT_ROOT) ? 1243 dtSplitRoot(tid, ip, split, &rmp) : 1244 dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd); 1245 if (rc) { 1246 DT_PUTPAGE(smp); 1247 goto splitOut; 1248 } 1249 1250 /* smp and rmp are pinned */ 1251 } 1252 /* 1253 * parent page is not full - insert router entry in parent page 1254 */ 1255 else { 1256 BT_MARK_DIRTY(smp, ip); 1257 /* 1258 * acquire a transaction lock on the parent page 1259 */ 1260 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1261 dtlck = (struct dt_lock *) & tlck->lock; 1262 ASSERT(dtlck->index == 0); 1263 lv = & dtlck->lv[0]; 1264 1265 /* linelock header */ 1266 lv->offset = 0; 1267 lv->length = 1; 1268 dtlck->index++; 1269 1270 /* linelock stbl of non-root parent page */ 1271 if (!(sp->header.flag & BT_ROOT)) { 1272 lv++; 1273 n = skip >> L2DTSLOTSIZE; 1274 lv->offset = sp->header.stblindex + n; 1275 lv->length = 1276 ((sp->header.nextindex - 1277 1) >> L2DTSLOTSIZE) - n + 1; 1278 dtlck->index++; 1279 } 1280 1281 dtInsertEntry(sp, skip, &key, data, &dtlck); 1282 1283 /* exit propagate up */ 1284 break; 1285 } 1286 } 1287 1288 /* unpin current split and its right page */ 1289 DT_PUTPAGE(smp); 1290 DT_PUTPAGE(rmp); 1291 1292 /* 1293 * free remaining extents allocated for split 1294 */ 1295 splitOut: 1296 n = pxdlist.npxd; 1297 pxd = &pxdlist.pxd[n]; 1298 for (; n < pxdlist.maxnpxd; n++, pxd++) 1299 dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd)); 1300 1301 freeKeyName: 1302 kfree(key.name); 1303 1304 /* Rollback quota allocation */ 1305 if (rc && quota_allocation) 1306 dquot_free_block(ip, quota_allocation); 1307 1308 dtSplitUp_Exit: 1309 1310 return rc; 1311 } 1312 1313 1314 /* 1315 * dtSplitPage() 1316 * 1317 * function: Split a non-root page of a btree. 1318 * 1319 * parameter: 1320 * 1321 * return: 0 - success; 1322 * errno - failure; 1323 * return split and new page pinned; 1324 */ 1325 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 1326 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp) 1327 { 1328 int rc = 0; 1329 struct metapage *smp; 1330 dtpage_t *sp; 1331 struct metapage *rmp; 1332 dtpage_t *rp; /* new right page allocated */ 1333 s64 rbn; /* new right page block number */ 1334 struct metapage *mp; 1335 dtpage_t *p; 1336 s64 nextbn; 1337 struct pxdlist *pxdlist; 1338 pxd_t *pxd; 1339 int skip, nextindex, half, left, nxt, off, si; 1340 struct ldtentry *ldtentry; 1341 struct idtentry *idtentry; 1342 u8 *stbl; 1343 struct dtslot *f; 1344 int fsi, stblsize; 1345 int n; 1346 struct dt_lock *sdtlck, *rdtlck; 1347 struct tlock *tlck; 1348 struct dt_lock *dtlck; 1349 struct lv *slv, *rlv, *lv; 1350 1351 /* get split page */ 1352 smp = split->mp; 1353 sp = DT_PAGE(ip, smp); 1354 1355 /* 1356 * allocate the new right page for the split 1357 */ 1358 pxdlist = split->pxdlist; 1359 pxd = &pxdlist->pxd[pxdlist->npxd]; 1360 pxdlist->npxd++; 1361 rbn = addressPXD(pxd); 1362 rmp = get_metapage(ip, rbn, PSIZE, 1); 1363 if (rmp == NULL) 1364 return -EIO; 1365 1366 /* Allocate blocks to quota. */ 1367 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 1368 if (rc) { 1369 release_metapage(rmp); 1370 return rc; 1371 } 1372 1373 jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1374 1375 BT_MARK_DIRTY(rmp, ip); 1376 /* 1377 * acquire a transaction lock on the new right page 1378 */ 1379 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1380 rdtlck = (struct dt_lock *) & tlck->lock; 1381 1382 rp = (dtpage_t *) rmp->data; 1383 *rpp = rp; 1384 rp->header.self = *pxd; 1385 1386 BT_MARK_DIRTY(smp, ip); 1387 /* 1388 * acquire a transaction lock on the split page 1389 * 1390 * action: 1391 */ 1392 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1393 sdtlck = (struct dt_lock *) & tlck->lock; 1394 1395 /* linelock header of split page */ 1396 ASSERT(sdtlck->index == 0); 1397 slv = & sdtlck->lv[0]; 1398 slv->offset = 0; 1399 slv->length = 1; 1400 sdtlck->index++; 1401 1402 /* 1403 * initialize/update sibling pointers between sp and rp 1404 */ 1405 nextbn = le64_to_cpu(sp->header.next); 1406 rp->header.next = cpu_to_le64(nextbn); 1407 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1408 sp->header.next = cpu_to_le64(rbn); 1409 1410 /* 1411 * initialize new right page 1412 */ 1413 rp->header.flag = sp->header.flag; 1414 1415 /* compute sorted entry table at start of extent data area */ 1416 rp->header.nextindex = 0; 1417 rp->header.stblindex = 1; 1418 1419 n = PSIZE >> L2DTSLOTSIZE; 1420 rp->header.maxslot = n; 1421 stblsize = (n + 31) >> L2DTSLOTSIZE; /* in unit of slot */ 1422 1423 /* init freelist */ 1424 fsi = rp->header.stblindex + stblsize; 1425 rp->header.freelist = fsi; 1426 rp->header.freecnt = rp->header.maxslot - fsi; 1427 1428 /* 1429 * sequential append at tail: append without split 1430 * 1431 * If splitting the last page on a level because of appending 1432 * a entry to it (skip is maxentry), it's likely that the access is 1433 * sequential. Adding an empty page on the side of the level is less 1434 * work and can push the fill factor much higher than normal. 1435 * If we're wrong it's no big deal, we'll just do the split the right 1436 * way next time. 1437 * (It may look like it's equally easy to do a similar hack for 1438 * reverse sorted data, that is, split the tree left, 1439 * but it's not. Be my guest.) 1440 */ 1441 if (nextbn == 0 && split->index == sp->header.nextindex) { 1442 /* linelock header + stbl (first slot) of new page */ 1443 rlv = & rdtlck->lv[rdtlck->index]; 1444 rlv->offset = 0; 1445 rlv->length = 2; 1446 rdtlck->index++; 1447 1448 /* 1449 * initialize freelist of new right page 1450 */ 1451 f = &rp->slot[fsi]; 1452 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1453 f->next = fsi; 1454 f->next = -1; 1455 1456 /* insert entry at the first entry of the new right page */ 1457 dtInsertEntry(rp, 0, split->key, split->data, &rdtlck); 1458 1459 goto out; 1460 } 1461 1462 /* 1463 * non-sequential insert (at possibly middle page) 1464 */ 1465 1466 /* 1467 * update prev pointer of previous right sibling page; 1468 */ 1469 if (nextbn != 0) { 1470 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1471 if (rc) { 1472 discard_metapage(rmp); 1473 return rc; 1474 } 1475 1476 BT_MARK_DIRTY(mp, ip); 1477 /* 1478 * acquire a transaction lock on the next page 1479 */ 1480 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 1481 jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p", 1482 tlck, ip, mp); 1483 dtlck = (struct dt_lock *) & tlck->lock; 1484 1485 /* linelock header of previous right sibling page */ 1486 lv = & dtlck->lv[dtlck->index]; 1487 lv->offset = 0; 1488 lv->length = 1; 1489 dtlck->index++; 1490 1491 p->header.prev = cpu_to_le64(rbn); 1492 1493 DT_PUTPAGE(mp); 1494 } 1495 1496 /* 1497 * split the data between the split and right pages. 1498 */ 1499 skip = split->index; 1500 half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */ 1501 left = 0; 1502 1503 /* 1504 * compute fill factor for split pages 1505 * 1506 * <nxt> traces the next entry to move to rp 1507 * <off> traces the next entry to stay in sp 1508 */ 1509 stbl = (u8 *) & sp->slot[sp->header.stblindex]; 1510 nextindex = sp->header.nextindex; 1511 for (nxt = off = 0; nxt < nextindex; ++off) { 1512 if (off == skip) 1513 /* check for fill factor with new entry size */ 1514 n = split->nslot; 1515 else { 1516 si = stbl[nxt]; 1517 switch (sp->header.flag & BT_TYPE) { 1518 case BT_LEAF: 1519 ldtentry = (struct ldtentry *) & sp->slot[si]; 1520 if (DO_INDEX(ip)) 1521 n = NDTLEAF(ldtentry->namlen); 1522 else 1523 n = NDTLEAF_LEGACY(ldtentry-> 1524 namlen); 1525 break; 1526 1527 case BT_INTERNAL: 1528 idtentry = (struct idtentry *) & sp->slot[si]; 1529 n = NDTINTERNAL(idtentry->namlen); 1530 break; 1531 1532 default: 1533 break; 1534 } 1535 1536 ++nxt; /* advance to next entry to move in sp */ 1537 } 1538 1539 left += n; 1540 if (left >= half) 1541 break; 1542 } 1543 1544 /* <nxt> poins to the 1st entry to move */ 1545 1546 /* 1547 * move entries to right page 1548 * 1549 * dtMoveEntry() initializes rp and reserves entry for insertion 1550 * 1551 * split page moved out entries are linelocked; 1552 * new/right page moved in entries are linelocked; 1553 */ 1554 /* linelock header + stbl of new right page */ 1555 rlv = & rdtlck->lv[rdtlck->index]; 1556 rlv->offset = 0; 1557 rlv->length = 5; 1558 rdtlck->index++; 1559 1560 dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip)); 1561 1562 sp->header.nextindex = nxt; 1563 1564 /* 1565 * finalize freelist of new right page 1566 */ 1567 fsi = rp->header.freelist; 1568 f = &rp->slot[fsi]; 1569 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1570 f->next = fsi; 1571 f->next = -1; 1572 1573 /* 1574 * Update directory index table for entries now in right page 1575 */ 1576 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1577 s64 lblock; 1578 1579 mp = NULL; 1580 stbl = DT_GETSTBL(rp); 1581 for (n = 0; n < rp->header.nextindex; n++) { 1582 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1583 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1584 rbn, n, &mp, &lblock); 1585 } 1586 if (mp) 1587 release_metapage(mp); 1588 } 1589 1590 /* 1591 * the skipped index was on the left page, 1592 */ 1593 if (skip <= off) { 1594 /* insert the new entry in the split page */ 1595 dtInsertEntry(sp, skip, split->key, split->data, &sdtlck); 1596 1597 /* linelock stbl of split page */ 1598 if (sdtlck->index >= sdtlck->maxcnt) 1599 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 1600 slv = & sdtlck->lv[sdtlck->index]; 1601 n = skip >> L2DTSLOTSIZE; 1602 slv->offset = sp->header.stblindex + n; 1603 slv->length = 1604 ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 1605 sdtlck->index++; 1606 } 1607 /* 1608 * the skipped index was on the right page, 1609 */ 1610 else { 1611 /* adjust the skip index to reflect the new position */ 1612 skip -= nxt; 1613 1614 /* insert the new entry in the right page */ 1615 dtInsertEntry(rp, skip, split->key, split->data, &rdtlck); 1616 } 1617 1618 out: 1619 *rmpp = rmp; 1620 *rpxdp = *pxd; 1621 1622 return rc; 1623 } 1624 1625 1626 /* 1627 * dtExtendPage() 1628 * 1629 * function: extend 1st/only directory leaf page 1630 * 1631 * parameter: 1632 * 1633 * return: 0 - success; 1634 * errno - failure; 1635 * return extended page pinned; 1636 */ 1637 static int dtExtendPage(tid_t tid, 1638 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 1639 { 1640 struct super_block *sb = ip->i_sb; 1641 int rc; 1642 struct metapage *smp, *pmp, *mp; 1643 dtpage_t *sp, *pp; 1644 struct pxdlist *pxdlist; 1645 pxd_t *pxd, *tpxd; 1646 int xlen, xsize; 1647 int newstblindex, newstblsize; 1648 int oldstblindex, oldstblsize; 1649 int fsi, last; 1650 struct dtslot *f; 1651 struct btframe *parent; 1652 int n; 1653 struct dt_lock *dtlck; 1654 s64 xaddr, txaddr; 1655 struct tlock *tlck; 1656 struct pxd_lock *pxdlock; 1657 struct lv *lv; 1658 uint type; 1659 struct ldtentry *ldtentry; 1660 u8 *stbl; 1661 1662 /* get page to extend */ 1663 smp = split->mp; 1664 sp = DT_PAGE(ip, smp); 1665 1666 /* get parent/root page */ 1667 parent = BT_POP(btstack); 1668 DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc); 1669 if (rc) 1670 return (rc); 1671 1672 /* 1673 * extend the extent 1674 */ 1675 pxdlist = split->pxdlist; 1676 pxd = &pxdlist->pxd[pxdlist->npxd]; 1677 pxdlist->npxd++; 1678 1679 xaddr = addressPXD(pxd); 1680 tpxd = &sp->header.self; 1681 txaddr = addressPXD(tpxd); 1682 /* in-place extension */ 1683 if (xaddr == txaddr) { 1684 type = tlckEXTEND; 1685 } 1686 /* relocation */ 1687 else { 1688 type = tlckNEW; 1689 1690 /* save moved extent descriptor for later free */ 1691 tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE); 1692 pxdlock = (struct pxd_lock *) & tlck->lock; 1693 pxdlock->flag = mlckFREEPXD; 1694 pxdlock->pxd = sp->header.self; 1695 pxdlock->index = 1; 1696 1697 /* 1698 * Update directory index table to reflect new page address 1699 */ 1700 if (DO_INDEX(ip)) { 1701 s64 lblock; 1702 1703 mp = NULL; 1704 stbl = DT_GETSTBL(sp); 1705 for (n = 0; n < sp->header.nextindex; n++) { 1706 ldtentry = 1707 (struct ldtentry *) & sp->slot[stbl[n]]; 1708 modify_index(tid, ip, 1709 le32_to_cpu(ldtentry->index), 1710 xaddr, n, &mp, &lblock); 1711 } 1712 if (mp) 1713 release_metapage(mp); 1714 } 1715 } 1716 1717 /* 1718 * extend the page 1719 */ 1720 sp->header.self = *pxd; 1721 1722 jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp); 1723 1724 BT_MARK_DIRTY(smp, ip); 1725 /* 1726 * acquire a transaction lock on the extended/leaf page 1727 */ 1728 tlck = txLock(tid, ip, smp, tlckDTREE | type); 1729 dtlck = (struct dt_lock *) & tlck->lock; 1730 lv = & dtlck->lv[0]; 1731 1732 /* update buffer extent descriptor of extended page */ 1733 xlen = lengthPXD(pxd); 1734 xsize = xlen << JFS_SBI(sb)->l2bsize; 1735 1736 /* 1737 * copy old stbl to new stbl at start of extended area 1738 */ 1739 oldstblindex = sp->header.stblindex; 1740 oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE; 1741 newstblindex = sp->header.maxslot; 1742 n = xsize >> L2DTSLOTSIZE; 1743 newstblsize = (n + 31) >> L2DTSLOTSIZE; 1744 memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex], 1745 sp->header.nextindex); 1746 1747 /* 1748 * in-line extension: linelock old area of extended page 1749 */ 1750 if (type == tlckEXTEND) { 1751 /* linelock header */ 1752 lv->offset = 0; 1753 lv->length = 1; 1754 dtlck->index++; 1755 lv++; 1756 1757 /* linelock new stbl of extended page */ 1758 lv->offset = newstblindex; 1759 lv->length = newstblsize; 1760 } 1761 /* 1762 * relocation: linelock whole relocated area 1763 */ 1764 else { 1765 lv->offset = 0; 1766 lv->length = sp->header.maxslot + newstblsize; 1767 } 1768 1769 dtlck->index++; 1770 1771 sp->header.maxslot = n; 1772 sp->header.stblindex = newstblindex; 1773 /* sp->header.nextindex remains the same */ 1774 1775 /* 1776 * add old stbl region at head of freelist 1777 */ 1778 fsi = oldstblindex; 1779 f = &sp->slot[fsi]; 1780 last = sp->header.freelist; 1781 for (n = 0; n < oldstblsize; n++, fsi++, f++) { 1782 f->next = last; 1783 last = fsi; 1784 } 1785 sp->header.freelist = last; 1786 sp->header.freecnt += oldstblsize; 1787 1788 /* 1789 * append free region of newly extended area at tail of freelist 1790 */ 1791 /* init free region of newly extended area */ 1792 fsi = n = newstblindex + newstblsize; 1793 f = &sp->slot[fsi]; 1794 for (fsi++; fsi < sp->header.maxslot; f++, fsi++) 1795 f->next = fsi; 1796 f->next = -1; 1797 1798 /* append new free region at tail of old freelist */ 1799 fsi = sp->header.freelist; 1800 if (fsi == -1) 1801 sp->header.freelist = n; 1802 else { 1803 do { 1804 f = &sp->slot[fsi]; 1805 fsi = f->next; 1806 } while (fsi != -1); 1807 1808 f->next = n; 1809 } 1810 1811 sp->header.freecnt += sp->header.maxslot - n; 1812 1813 /* 1814 * insert the new entry 1815 */ 1816 dtInsertEntry(sp, split->index, split->key, split->data, &dtlck); 1817 1818 BT_MARK_DIRTY(pmp, ip); 1819 /* 1820 * linelock any freeslots residing in old extent 1821 */ 1822 if (type == tlckEXTEND) { 1823 n = sp->header.maxslot >> 2; 1824 if (sp->header.freelist < n) 1825 dtLinelockFreelist(sp, n, &dtlck); 1826 } 1827 1828 /* 1829 * update parent entry on the parent/root page 1830 */ 1831 /* 1832 * acquire a transaction lock on the parent/root page 1833 */ 1834 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 1835 dtlck = (struct dt_lock *) & tlck->lock; 1836 lv = & dtlck->lv[dtlck->index]; 1837 1838 /* linelock parent entry - 1st slot */ 1839 lv->offset = 1; 1840 lv->length = 1; 1841 dtlck->index++; 1842 1843 /* update the parent pxd for page extension */ 1844 tpxd = (pxd_t *) & pp->slot[1]; 1845 *tpxd = *pxd; 1846 1847 DT_PUTPAGE(pmp); 1848 return 0; 1849 } 1850 1851 1852 /* 1853 * dtSplitRoot() 1854 * 1855 * function: 1856 * split the full root page into 1857 * original/root/split page and new right page 1858 * i.e., root remains fixed in tree anchor (inode) and 1859 * the root is copied to a single new right child page 1860 * since root page << non-root page, and 1861 * the split root page contains a single entry for the 1862 * new right child page. 1863 * 1864 * parameter: 1865 * 1866 * return: 0 - success; 1867 * errno - failure; 1868 * return new page pinned; 1869 */ 1870 static int dtSplitRoot(tid_t tid, 1871 struct inode *ip, struct dtsplit * split, struct metapage ** rmpp) 1872 { 1873 struct super_block *sb = ip->i_sb; 1874 struct metapage *smp; 1875 dtroot_t *sp; 1876 struct metapage *rmp; 1877 dtpage_t *rp; 1878 s64 rbn; 1879 int xlen; 1880 int xsize; 1881 struct dtslot *f; 1882 s8 *stbl; 1883 int fsi, stblsize, n; 1884 struct idtentry *s; 1885 pxd_t *ppxd; 1886 struct pxdlist *pxdlist; 1887 pxd_t *pxd; 1888 struct dt_lock *dtlck; 1889 struct tlock *tlck; 1890 struct lv *lv; 1891 int rc; 1892 1893 /* get split root page */ 1894 smp = split->mp; 1895 sp = &JFS_IP(ip)->i_dtroot; 1896 1897 /* 1898 * allocate/initialize a single (right) child page 1899 * 1900 * N.B. at first split, a one (or two) block to fit new entry 1901 * is allocated; at subsequent split, a full page is allocated; 1902 */ 1903 pxdlist = split->pxdlist; 1904 pxd = &pxdlist->pxd[pxdlist->npxd]; 1905 pxdlist->npxd++; 1906 rbn = addressPXD(pxd); 1907 xlen = lengthPXD(pxd); 1908 xsize = xlen << JFS_SBI(sb)->l2bsize; 1909 rmp = get_metapage(ip, rbn, xsize, 1); 1910 if (!rmp) 1911 return -EIO; 1912 1913 rp = rmp->data; 1914 1915 /* Allocate blocks to quota. */ 1916 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 1917 if (rc) { 1918 release_metapage(rmp); 1919 return rc; 1920 } 1921 1922 BT_MARK_DIRTY(rmp, ip); 1923 /* 1924 * acquire a transaction lock on the new right page 1925 */ 1926 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1927 dtlck = (struct dt_lock *) & tlck->lock; 1928 1929 rp->header.flag = 1930 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1931 rp->header.self = *pxd; 1932 1933 /* initialize sibling pointers */ 1934 rp->header.next = 0; 1935 rp->header.prev = 0; 1936 1937 /* 1938 * move in-line root page into new right page extent 1939 */ 1940 /* linelock header + copied entries + new stbl (1st slot) in new page */ 1941 ASSERT(dtlck->index == 0); 1942 lv = & dtlck->lv[0]; 1943 lv->offset = 0; 1944 lv->length = 10; /* 1 + 8 + 1 */ 1945 dtlck->index++; 1946 1947 n = xsize >> L2DTSLOTSIZE; 1948 rp->header.maxslot = n; 1949 stblsize = (n + 31) >> L2DTSLOTSIZE; 1950 1951 /* copy old stbl to new stbl at start of extended area */ 1952 rp->header.stblindex = DTROOTMAXSLOT; 1953 stbl = (s8 *) & rp->slot[DTROOTMAXSLOT]; 1954 memcpy(stbl, sp->header.stbl, sp->header.nextindex); 1955 rp->header.nextindex = sp->header.nextindex; 1956 1957 /* copy old data area to start of new data area */ 1958 memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE); 1959 1960 /* 1961 * append free region of newly extended area at tail of freelist 1962 */ 1963 /* init free region of newly extended area */ 1964 fsi = n = DTROOTMAXSLOT + stblsize; 1965 f = &rp->slot[fsi]; 1966 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1967 f->next = fsi; 1968 f->next = -1; 1969 1970 /* append new free region at tail of old freelist */ 1971 fsi = sp->header.freelist; 1972 if (fsi == -1) 1973 rp->header.freelist = n; 1974 else { 1975 rp->header.freelist = fsi; 1976 1977 do { 1978 f = &rp->slot[fsi]; 1979 fsi = f->next; 1980 } while (fsi >= 0); 1981 1982 f->next = n; 1983 } 1984 1985 rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n; 1986 1987 /* 1988 * Update directory index table for entries now in right page 1989 */ 1990 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1991 s64 lblock; 1992 struct metapage *mp = NULL; 1993 struct ldtentry *ldtentry; 1994 1995 stbl = DT_GETSTBL(rp); 1996 for (n = 0; n < rp->header.nextindex; n++) { 1997 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1998 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1999 rbn, n, &mp, &lblock); 2000 } 2001 if (mp) 2002 release_metapage(mp); 2003 } 2004 /* 2005 * insert the new entry into the new right/child page 2006 * (skip index in the new right page will not change) 2007 */ 2008 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); 2009 2010 /* 2011 * reset parent/root page 2012 * 2013 * set the 1st entry offset to 0, which force the left-most key 2014 * at any level of the tree to be less than any search key. 2015 * 2016 * The btree comparison code guarantees that the left-most key on any 2017 * level of the tree is never used, so it doesn't need to be filled in. 2018 */ 2019 BT_MARK_DIRTY(smp, ip); 2020 /* 2021 * acquire a transaction lock on the root page (in-memory inode) 2022 */ 2023 tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT); 2024 dtlck = (struct dt_lock *) & tlck->lock; 2025 2026 /* linelock root */ 2027 ASSERT(dtlck->index == 0); 2028 lv = & dtlck->lv[0]; 2029 lv->offset = 0; 2030 lv->length = DTROOTMAXSLOT; 2031 dtlck->index++; 2032 2033 /* update page header of root */ 2034 if (sp->header.flag & BT_LEAF) { 2035 sp->header.flag &= ~BT_LEAF; 2036 sp->header.flag |= BT_INTERNAL; 2037 } 2038 2039 /* init the first entry */ 2040 s = (struct idtentry *) & sp->slot[DTENTRYSTART]; 2041 ppxd = (pxd_t *) s; 2042 *ppxd = *pxd; 2043 s->next = -1; 2044 s->namlen = 0; 2045 2046 stbl = sp->header.stbl; 2047 stbl[0] = DTENTRYSTART; 2048 sp->header.nextindex = 1; 2049 2050 /* init freelist */ 2051 fsi = DTENTRYSTART + 1; 2052 f = &sp->slot[fsi]; 2053 2054 /* init free region of remaining area */ 2055 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2056 f->next = fsi; 2057 f->next = -1; 2058 2059 sp->header.freelist = DTENTRYSTART + 1; 2060 sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1); 2061 2062 *rmpp = rmp; 2063 2064 return 0; 2065 } 2066 2067 2068 /* 2069 * dtDelete() 2070 * 2071 * function: delete the entry(s) referenced by a key. 2072 * 2073 * parameter: 2074 * 2075 * return: 2076 */ 2077 int dtDelete(tid_t tid, 2078 struct inode *ip, struct component_name * key, ino_t * ino, int flag) 2079 { 2080 int rc = 0; 2081 s64 bn; 2082 struct metapage *mp, *imp; 2083 dtpage_t *p; 2084 int index; 2085 struct btstack btstack; 2086 struct dt_lock *dtlck; 2087 struct tlock *tlck; 2088 struct lv *lv; 2089 int i; 2090 struct ldtentry *ldtentry; 2091 u8 *stbl; 2092 u32 table_index, next_index; 2093 struct metapage *nmp; 2094 dtpage_t *np; 2095 2096 /* 2097 * search for the entry to delete: 2098 * 2099 * dtSearch() returns (leaf page pinned, index at which to delete). 2100 */ 2101 if ((rc = dtSearch(ip, key, ino, &btstack, flag))) 2102 return rc; 2103 2104 /* retrieve search result */ 2105 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2106 2107 /* 2108 * We need to find put the index of the next entry into the 2109 * directory index table in order to resume a readdir from this 2110 * entry. 2111 */ 2112 if (DO_INDEX(ip)) { 2113 stbl = DT_GETSTBL(p); 2114 ldtentry = (struct ldtentry *) & p->slot[stbl[index]]; 2115 table_index = le32_to_cpu(ldtentry->index); 2116 if (index == (p->header.nextindex - 1)) { 2117 /* 2118 * Last entry in this leaf page 2119 */ 2120 if ((p->header.flag & BT_ROOT) 2121 || (p->header.next == 0)) 2122 next_index = -1; 2123 else { 2124 /* Read next leaf page */ 2125 DT_GETPAGE(ip, le64_to_cpu(p->header.next), 2126 nmp, PSIZE, np, rc); 2127 if (rc) 2128 next_index = -1; 2129 else { 2130 stbl = DT_GETSTBL(np); 2131 ldtentry = 2132 (struct ldtentry *) & np-> 2133 slot[stbl[0]]; 2134 next_index = 2135 le32_to_cpu(ldtentry->index); 2136 DT_PUTPAGE(nmp); 2137 } 2138 } 2139 } else { 2140 ldtentry = 2141 (struct ldtentry *) & p->slot[stbl[index + 1]]; 2142 next_index = le32_to_cpu(ldtentry->index); 2143 } 2144 free_index(tid, ip, table_index, next_index); 2145 } 2146 /* 2147 * the leaf page becomes empty, delete the page 2148 */ 2149 if (p->header.nextindex == 1) { 2150 /* delete empty page */ 2151 rc = dtDeleteUp(tid, ip, mp, p, &btstack); 2152 } 2153 /* 2154 * the leaf page has other entries remaining: 2155 * 2156 * delete the entry from the leaf page. 2157 */ 2158 else { 2159 BT_MARK_DIRTY(mp, ip); 2160 /* 2161 * acquire a transaction lock on the leaf page 2162 */ 2163 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2164 dtlck = (struct dt_lock *) & tlck->lock; 2165 2166 /* 2167 * Do not assume that dtlck->index will be zero. During a 2168 * rename within a directory, this transaction may have 2169 * modified this page already when adding the new entry. 2170 */ 2171 2172 /* linelock header */ 2173 if (dtlck->index >= dtlck->maxcnt) 2174 dtlck = (struct dt_lock *) txLinelock(dtlck); 2175 lv = & dtlck->lv[dtlck->index]; 2176 lv->offset = 0; 2177 lv->length = 1; 2178 dtlck->index++; 2179 2180 /* linelock stbl of non-root leaf page */ 2181 if (!(p->header.flag & BT_ROOT)) { 2182 if (dtlck->index >= dtlck->maxcnt) 2183 dtlck = (struct dt_lock *) txLinelock(dtlck); 2184 lv = & dtlck->lv[dtlck->index]; 2185 i = index >> L2DTSLOTSIZE; 2186 lv->offset = p->header.stblindex + i; 2187 lv->length = 2188 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2189 i + 1; 2190 dtlck->index++; 2191 } 2192 2193 /* free the leaf entry */ 2194 dtDeleteEntry(p, index, &dtlck); 2195 2196 /* 2197 * Update directory index table for entries moved in stbl 2198 */ 2199 if (DO_INDEX(ip) && index < p->header.nextindex) { 2200 s64 lblock; 2201 2202 imp = NULL; 2203 stbl = DT_GETSTBL(p); 2204 for (i = index; i < p->header.nextindex; i++) { 2205 ldtentry = 2206 (struct ldtentry *) & p->slot[stbl[i]]; 2207 modify_index(tid, ip, 2208 le32_to_cpu(ldtentry->index), 2209 bn, i, &imp, &lblock); 2210 } 2211 if (imp) 2212 release_metapage(imp); 2213 } 2214 2215 DT_PUTPAGE(mp); 2216 } 2217 2218 return rc; 2219 } 2220 2221 2222 /* 2223 * dtDeleteUp() 2224 * 2225 * function: 2226 * free empty pages as propagating deletion up the tree 2227 * 2228 * parameter: 2229 * 2230 * return: 2231 */ 2232 static int dtDeleteUp(tid_t tid, struct inode *ip, 2233 struct metapage * fmp, dtpage_t * fp, struct btstack * btstack) 2234 { 2235 int rc = 0; 2236 struct metapage *mp; 2237 dtpage_t *p; 2238 int index, nextindex; 2239 int xlen; 2240 struct btframe *parent; 2241 struct dt_lock *dtlck; 2242 struct tlock *tlck; 2243 struct lv *lv; 2244 struct pxd_lock *pxdlock; 2245 int i; 2246 2247 /* 2248 * keep the root leaf page which has become empty 2249 */ 2250 if (BT_IS_ROOT(fmp)) { 2251 /* 2252 * reset the root 2253 * 2254 * dtInitRoot() acquires txlock on the root 2255 */ 2256 dtInitRoot(tid, ip, PARENT(ip)); 2257 2258 DT_PUTPAGE(fmp); 2259 2260 return 0; 2261 } 2262 2263 /* 2264 * free the non-root leaf page 2265 */ 2266 /* 2267 * acquire a transaction lock on the page 2268 * 2269 * write FREEXTENT|NOREDOPAGE log record 2270 * N.B. linelock is overlaid as freed extent descriptor, and 2271 * the buffer page is freed; 2272 */ 2273 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2274 pxdlock = (struct pxd_lock *) & tlck->lock; 2275 pxdlock->flag = mlckFREEPXD; 2276 pxdlock->pxd = fp->header.self; 2277 pxdlock->index = 1; 2278 2279 /* update sibling pointers */ 2280 if ((rc = dtRelink(tid, ip, fp))) { 2281 BT_PUTPAGE(fmp); 2282 return rc; 2283 } 2284 2285 xlen = lengthPXD(&fp->header.self); 2286 2287 /* Free quota allocation. */ 2288 dquot_free_block(ip, xlen); 2289 2290 /* free/invalidate its buffer page */ 2291 discard_metapage(fmp); 2292 2293 /* 2294 * propagate page deletion up the directory tree 2295 * 2296 * If the delete from the parent page makes it empty, 2297 * continue all the way up the tree. 2298 * stop if the root page is reached (which is never deleted) or 2299 * if the entry deletion does not empty the page. 2300 */ 2301 while ((parent = BT_POP(btstack)) != NULL) { 2302 /* pin the parent page <sp> */ 2303 DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2304 if (rc) 2305 return rc; 2306 2307 /* 2308 * free the extent of the child page deleted 2309 */ 2310 index = parent->index; 2311 2312 /* 2313 * delete the entry for the child page from parent 2314 */ 2315 nextindex = p->header.nextindex; 2316 2317 /* 2318 * the parent has the single entry being deleted: 2319 * 2320 * free the parent page which has become empty. 2321 */ 2322 if (nextindex == 1) { 2323 /* 2324 * keep the root internal page which has become empty 2325 */ 2326 if (p->header.flag & BT_ROOT) { 2327 /* 2328 * reset the root 2329 * 2330 * dtInitRoot() acquires txlock on the root 2331 */ 2332 dtInitRoot(tid, ip, PARENT(ip)); 2333 2334 DT_PUTPAGE(mp); 2335 2336 return 0; 2337 } 2338 /* 2339 * free the parent page 2340 */ 2341 else { 2342 /* 2343 * acquire a transaction lock on the page 2344 * 2345 * write FREEXTENT|NOREDOPAGE log record 2346 */ 2347 tlck = 2348 txMaplock(tid, ip, 2349 tlckDTREE | tlckFREE); 2350 pxdlock = (struct pxd_lock *) & tlck->lock; 2351 pxdlock->flag = mlckFREEPXD; 2352 pxdlock->pxd = p->header.self; 2353 pxdlock->index = 1; 2354 2355 /* update sibling pointers */ 2356 if ((rc = dtRelink(tid, ip, p))) { 2357 DT_PUTPAGE(mp); 2358 return rc; 2359 } 2360 2361 xlen = lengthPXD(&p->header.self); 2362 2363 /* Free quota allocation */ 2364 dquot_free_block(ip, xlen); 2365 2366 /* free/invalidate its buffer page */ 2367 discard_metapage(mp); 2368 2369 /* propagate up */ 2370 continue; 2371 } 2372 } 2373 2374 /* 2375 * the parent has other entries remaining: 2376 * 2377 * delete the router entry from the parent page. 2378 */ 2379 BT_MARK_DIRTY(mp, ip); 2380 /* 2381 * acquire a transaction lock on the page 2382 * 2383 * action: router entry deletion 2384 */ 2385 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2386 dtlck = (struct dt_lock *) & tlck->lock; 2387 2388 /* linelock header */ 2389 if (dtlck->index >= dtlck->maxcnt) 2390 dtlck = (struct dt_lock *) txLinelock(dtlck); 2391 lv = & dtlck->lv[dtlck->index]; 2392 lv->offset = 0; 2393 lv->length = 1; 2394 dtlck->index++; 2395 2396 /* linelock stbl of non-root leaf page */ 2397 if (!(p->header.flag & BT_ROOT)) { 2398 if (dtlck->index < dtlck->maxcnt) 2399 lv++; 2400 else { 2401 dtlck = (struct dt_lock *) txLinelock(dtlck); 2402 lv = & dtlck->lv[0]; 2403 } 2404 i = index >> L2DTSLOTSIZE; 2405 lv->offset = p->header.stblindex + i; 2406 lv->length = 2407 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2408 i + 1; 2409 dtlck->index++; 2410 } 2411 2412 /* free the router entry */ 2413 dtDeleteEntry(p, index, &dtlck); 2414 2415 /* reset key of new leftmost entry of level (for consistency) */ 2416 if (index == 0 && 2417 ((p->header.flag & BT_ROOT) || p->header.prev == 0)) 2418 dtTruncateEntry(p, 0, &dtlck); 2419 2420 /* unpin the parent page */ 2421 DT_PUTPAGE(mp); 2422 2423 /* exit propagation up */ 2424 break; 2425 } 2426 2427 if (!DO_INDEX(ip)) 2428 ip->i_size -= PSIZE; 2429 2430 return 0; 2431 } 2432 2433 /* 2434 * dtRelink() 2435 * 2436 * function: 2437 * link around a freed page. 2438 * 2439 * parameter: 2440 * fp: page to be freed 2441 * 2442 * return: 2443 */ 2444 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p) 2445 { 2446 int rc; 2447 struct metapage *mp; 2448 s64 nextbn, prevbn; 2449 struct tlock *tlck; 2450 struct dt_lock *dtlck; 2451 struct lv *lv; 2452 2453 nextbn = le64_to_cpu(p->header.next); 2454 prevbn = le64_to_cpu(p->header.prev); 2455 2456 /* update prev pointer of the next page */ 2457 if (nextbn != 0) { 2458 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 2459 if (rc) 2460 return rc; 2461 2462 BT_MARK_DIRTY(mp, ip); 2463 /* 2464 * acquire a transaction lock on the next page 2465 * 2466 * action: update prev pointer; 2467 */ 2468 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2469 jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2470 tlck, ip, mp); 2471 dtlck = (struct dt_lock *) & tlck->lock; 2472 2473 /* linelock header */ 2474 if (dtlck->index >= dtlck->maxcnt) 2475 dtlck = (struct dt_lock *) txLinelock(dtlck); 2476 lv = & dtlck->lv[dtlck->index]; 2477 lv->offset = 0; 2478 lv->length = 1; 2479 dtlck->index++; 2480 2481 p->header.prev = cpu_to_le64(prevbn); 2482 DT_PUTPAGE(mp); 2483 } 2484 2485 /* update next pointer of the previous page */ 2486 if (prevbn != 0) { 2487 DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 2488 if (rc) 2489 return rc; 2490 2491 BT_MARK_DIRTY(mp, ip); 2492 /* 2493 * acquire a transaction lock on the prev page 2494 * 2495 * action: update next pointer; 2496 */ 2497 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2498 jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2499 tlck, ip, mp); 2500 dtlck = (struct dt_lock *) & tlck->lock; 2501 2502 /* linelock header */ 2503 if (dtlck->index >= dtlck->maxcnt) 2504 dtlck = (struct dt_lock *) txLinelock(dtlck); 2505 lv = & dtlck->lv[dtlck->index]; 2506 lv->offset = 0; 2507 lv->length = 1; 2508 dtlck->index++; 2509 2510 p->header.next = cpu_to_le64(nextbn); 2511 DT_PUTPAGE(mp); 2512 } 2513 2514 return 0; 2515 } 2516 2517 2518 /* 2519 * dtInitRoot() 2520 * 2521 * initialize directory root (inline in inode) 2522 */ 2523 void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot) 2524 { 2525 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2526 dtroot_t *p; 2527 int fsi; 2528 struct dtslot *f; 2529 struct tlock *tlck; 2530 struct dt_lock *dtlck; 2531 struct lv *lv; 2532 u16 xflag_save; 2533 2534 /* 2535 * If this was previously an non-empty directory, we need to remove 2536 * the old directory table. 2537 */ 2538 if (DO_INDEX(ip)) { 2539 if (!jfs_dirtable_inline(ip)) { 2540 struct tblock *tblk = tid_to_tblock(tid); 2541 /* 2542 * We're playing games with the tid's xflag. If 2543 * we're removing a regular file, the file's xtree 2544 * is committed with COMMIT_PMAP, but we always 2545 * commit the directories xtree with COMMIT_PWMAP. 2546 */ 2547 xflag_save = tblk->xflag; 2548 tblk->xflag = 0; 2549 /* 2550 * xtTruncate isn't guaranteed to fully truncate 2551 * the xtree. The caller needs to check i_size 2552 * after committing the transaction to see if 2553 * additional truncation is needed. The 2554 * COMMIT_Stale flag tells caller that we 2555 * initiated the truncation. 2556 */ 2557 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 2558 set_cflag(COMMIT_Stale, ip); 2559 2560 tblk->xflag = xflag_save; 2561 } else 2562 ip->i_size = 1; 2563 2564 jfs_ip->next_index = 2; 2565 } else 2566 ip->i_size = IDATASIZE; 2567 2568 /* 2569 * acquire a transaction lock on the root 2570 * 2571 * action: directory initialization; 2572 */ 2573 tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag, 2574 tlckDTREE | tlckENTRY | tlckBTROOT); 2575 dtlck = (struct dt_lock *) & tlck->lock; 2576 2577 /* linelock root */ 2578 ASSERT(dtlck->index == 0); 2579 lv = & dtlck->lv[0]; 2580 lv->offset = 0; 2581 lv->length = DTROOTMAXSLOT; 2582 dtlck->index++; 2583 2584 p = &jfs_ip->i_dtroot; 2585 2586 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 2587 2588 p->header.nextindex = 0; 2589 2590 /* init freelist */ 2591 fsi = 1; 2592 f = &p->slot[fsi]; 2593 2594 /* init data area of root */ 2595 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2596 f->next = fsi; 2597 f->next = -1; 2598 2599 p->header.freelist = 1; 2600 p->header.freecnt = 8; 2601 2602 /* init '..' entry */ 2603 p->header.idotdot = cpu_to_le32(idotdot); 2604 2605 return; 2606 } 2607 2608 /* 2609 * add_missing_indices() 2610 * 2611 * function: Fix dtree page in which one or more entries has an invalid index. 2612 * fsck.jfs should really fix this, but it currently does not. 2613 * Called from jfs_readdir when bad index is detected. 2614 */ 2615 static void add_missing_indices(struct inode *inode, s64 bn) 2616 { 2617 struct ldtentry *d; 2618 struct dt_lock *dtlck; 2619 int i; 2620 uint index; 2621 struct lv *lv; 2622 struct metapage *mp; 2623 dtpage_t *p; 2624 int rc; 2625 s8 *stbl; 2626 tid_t tid; 2627 struct tlock *tlck; 2628 2629 tid = txBegin(inode->i_sb, 0); 2630 2631 DT_GETPAGE(inode, bn, mp, PSIZE, p, rc); 2632 2633 if (rc) { 2634 printk(KERN_ERR "DT_GETPAGE failed!\n"); 2635 goto end; 2636 } 2637 BT_MARK_DIRTY(mp, inode); 2638 2639 ASSERT(p->header.flag & BT_LEAF); 2640 2641 tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY); 2642 if (BT_IS_ROOT(mp)) 2643 tlck->type |= tlckBTROOT; 2644 2645 dtlck = (struct dt_lock *) &tlck->lock; 2646 2647 stbl = DT_GETSTBL(p); 2648 for (i = 0; i < p->header.nextindex; i++) { 2649 d = (struct ldtentry *) &p->slot[stbl[i]]; 2650 index = le32_to_cpu(d->index); 2651 if ((index < 2) || (index >= JFS_IP(inode)->next_index)) { 2652 d->index = cpu_to_le32(add_index(tid, inode, bn, i)); 2653 if (dtlck->index >= dtlck->maxcnt) 2654 dtlck = (struct dt_lock *) txLinelock(dtlck); 2655 lv = &dtlck->lv[dtlck->index]; 2656 lv->offset = stbl[i]; 2657 lv->length = 1; 2658 dtlck->index++; 2659 } 2660 } 2661 2662 DT_PUTPAGE(mp); 2663 (void) txCommit(tid, 1, &inode, 0); 2664 end: 2665 txEnd(tid); 2666 } 2667 2668 /* 2669 * Buffer to hold directory entry info while traversing a dtree page 2670 * before being fed to the filldir function 2671 */ 2672 struct jfs_dirent { 2673 loff_t position; 2674 int ino; 2675 u16 name_len; 2676 char name[]; 2677 }; 2678 2679 /* 2680 * function to determine next variable-sized jfs_dirent in buffer 2681 */ 2682 static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) 2683 { 2684 return (struct jfs_dirent *) 2685 ((char *)dirent + 2686 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 + 2687 sizeof (loff_t) - 1) & 2688 ~(sizeof (loff_t) - 1))); 2689 } 2690 2691 /* 2692 * jfs_readdir() 2693 * 2694 * function: read directory entries sequentially 2695 * from the specified entry offset 2696 * 2697 * parameter: 2698 * 2699 * return: offset = (pn, index) of start entry 2700 * of next jfs_readdir()/dtRead() 2701 */ 2702 int jfs_readdir(struct file *file, struct dir_context *ctx) 2703 { 2704 struct inode *ip = file_inode(file); 2705 struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; 2706 int rc = 0; 2707 loff_t dtpos; /* legacy OS/2 style position */ 2708 struct dtoffset { 2709 s16 pn; 2710 s16 index; 2711 s32 unused; 2712 } *dtoffset = (struct dtoffset *) &dtpos; 2713 s64 bn; 2714 struct metapage *mp; 2715 dtpage_t *p; 2716 int index; 2717 s8 *stbl; 2718 struct btstack btstack; 2719 int i, next; 2720 struct ldtentry *d; 2721 struct dtslot *t; 2722 int d_namleft, len, outlen; 2723 unsigned long dirent_buf; 2724 char *name_ptr; 2725 u32 dir_index; 2726 int do_index = 0; 2727 uint loop_count = 0; 2728 struct jfs_dirent *jfs_dirent; 2729 int jfs_dirents; 2730 int overflow, fix_page, page_fixed = 0; 2731 static int unique_pos = 2; /* If we can't fix broken index */ 2732 2733 if (ctx->pos == DIREND) 2734 return 0; 2735 2736 if (DO_INDEX(ip)) { 2737 /* 2738 * persistent index is stored in directory entries. 2739 * Special cases: 0 = . 2740 * 1 = .. 2741 * -1 = End of directory 2742 */ 2743 do_index = 1; 2744 2745 dir_index = (u32) ctx->pos; 2746 2747 /* 2748 * NFSv4 reserves cookies 1 and 2 for . and .. so the value 2749 * we return to the vfs is one greater than the one we use 2750 * internally. 2751 */ 2752 if (dir_index) 2753 dir_index--; 2754 2755 if (dir_index > 1) { 2756 struct dir_table_slot dirtab_slot; 2757 2758 if (dtEmpty(ip) || 2759 (dir_index >= JFS_IP(ip)->next_index)) { 2760 /* Stale position. Directory has shrunk */ 2761 ctx->pos = DIREND; 2762 return 0; 2763 } 2764 repeat: 2765 rc = read_index(ip, dir_index, &dirtab_slot); 2766 if (rc) { 2767 ctx->pos = DIREND; 2768 return rc; 2769 } 2770 if (dirtab_slot.flag == DIR_INDEX_FREE) { 2771 if (loop_count++ > JFS_IP(ip)->next_index) { 2772 jfs_err("jfs_readdir detected infinite loop!"); 2773 ctx->pos = DIREND; 2774 return 0; 2775 } 2776 dir_index = le32_to_cpu(dirtab_slot.addr2); 2777 if (dir_index == -1) { 2778 ctx->pos = DIREND; 2779 return 0; 2780 } 2781 goto repeat; 2782 } 2783 bn = addressDTS(&dirtab_slot); 2784 index = dirtab_slot.slot; 2785 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2786 if (rc) { 2787 ctx->pos = DIREND; 2788 return 0; 2789 } 2790 if (p->header.flag & BT_INTERNAL) { 2791 jfs_err("jfs_readdir: bad index table"); 2792 DT_PUTPAGE(mp); 2793 ctx->pos = DIREND; 2794 return 0; 2795 } 2796 } else { 2797 if (dir_index == 0) { 2798 /* 2799 * self "." 2800 */ 2801 ctx->pos = 1; 2802 if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) 2803 return 0; 2804 } 2805 /* 2806 * parent ".." 2807 */ 2808 ctx->pos = 2; 2809 if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) 2810 return 0; 2811 2812 /* 2813 * Find first entry of left-most leaf 2814 */ 2815 if (dtEmpty(ip)) { 2816 ctx->pos = DIREND; 2817 return 0; 2818 } 2819 2820 if ((rc = dtReadFirst(ip, &btstack))) 2821 return rc; 2822 2823 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2824 } 2825 } else { 2826 /* 2827 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 2828 * 2829 * pn = 0; index = 1: First entry "." 2830 * pn = 0; index = 2: Second entry ".." 2831 * pn > 0: Real entries, pn=1 -> leftmost page 2832 * pn = index = -1: No more entries 2833 */ 2834 dtpos = ctx->pos; 2835 if (dtpos < 2) { 2836 /* build "." entry */ 2837 ctx->pos = 1; 2838 if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) 2839 return 0; 2840 dtoffset->index = 2; 2841 ctx->pos = dtpos; 2842 } 2843 2844 if (dtoffset->pn == 0) { 2845 if (dtoffset->index == 2) { 2846 /* build ".." entry */ 2847 if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) 2848 return 0; 2849 } else { 2850 jfs_err("jfs_readdir called with invalid offset!"); 2851 } 2852 dtoffset->pn = 1; 2853 dtoffset->index = 0; 2854 ctx->pos = dtpos; 2855 } 2856 2857 if (dtEmpty(ip)) { 2858 ctx->pos = DIREND; 2859 return 0; 2860 } 2861 2862 if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) { 2863 jfs_err("jfs_readdir: unexpected rc = %d from dtReadNext", 2864 rc); 2865 ctx->pos = DIREND; 2866 return 0; 2867 } 2868 /* get start leaf page and index */ 2869 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2870 2871 /* offset beyond directory eof ? */ 2872 if (bn < 0) { 2873 ctx->pos = DIREND; 2874 return 0; 2875 } 2876 } 2877 2878 dirent_buf = __get_free_page(GFP_KERNEL); 2879 if (dirent_buf == 0) { 2880 DT_PUTPAGE(mp); 2881 jfs_warn("jfs_readdir: __get_free_page failed!"); 2882 ctx->pos = DIREND; 2883 return -ENOMEM; 2884 } 2885 2886 while (1) { 2887 jfs_dirent = (struct jfs_dirent *) dirent_buf; 2888 jfs_dirents = 0; 2889 overflow = fix_page = 0; 2890 2891 stbl = DT_GETSTBL(p); 2892 2893 for (i = index; i < p->header.nextindex; i++) { 2894 if (stbl[i] < 0 || stbl[i] > 127) { 2895 jfs_err("JFS: Invalid stbl[%d] = %d for inode %ld, block = %lld", 2896 i, stbl[i], (long)ip->i_ino, (long long)bn); 2897 free_page(dirent_buf); 2898 DT_PUTPAGE(mp); 2899 return -EIO; 2900 } 2901 2902 d = (struct ldtentry *) & p->slot[stbl[i]]; 2903 2904 if (((long) jfs_dirent + d->namlen + 1) > 2905 (dirent_buf + PAGE_SIZE)) { 2906 /* DBCS codepages could overrun dirent_buf */ 2907 index = i; 2908 overflow = 1; 2909 break; 2910 } 2911 2912 d_namleft = d->namlen; 2913 name_ptr = jfs_dirent->name; 2914 jfs_dirent->ino = le32_to_cpu(d->inumber); 2915 2916 if (do_index) { 2917 len = min(d_namleft, DTLHDRDATALEN); 2918 jfs_dirent->position = le32_to_cpu(d->index); 2919 /* 2920 * d->index should always be valid, but it 2921 * isn't. fsck.jfs doesn't create the 2922 * directory index for the lost+found 2923 * directory. Rather than let it go, 2924 * we can try to fix it. 2925 */ 2926 if ((jfs_dirent->position < 2) || 2927 (jfs_dirent->position >= 2928 JFS_IP(ip)->next_index)) { 2929 if (!page_fixed && !isReadOnly(ip)) { 2930 fix_page = 1; 2931 /* 2932 * setting overflow and setting 2933 * index to i will cause the 2934 * same page to be processed 2935 * again starting here 2936 */ 2937 overflow = 1; 2938 index = i; 2939 break; 2940 } 2941 jfs_dirent->position = unique_pos++; 2942 } 2943 /* 2944 * We add 1 to the index because we may 2945 * use a value of 2 internally, and NFSv4 2946 * doesn't like that. 2947 */ 2948 jfs_dirent->position++; 2949 } else { 2950 jfs_dirent->position = dtpos; 2951 len = min(d_namleft, DTLHDRDATALEN_LEGACY); 2952 } 2953 2954 /* copy the name of head/only segment */ 2955 outlen = jfs_strfromUCS_le(name_ptr, d->name, len, 2956 codepage); 2957 jfs_dirent->name_len = outlen; 2958 2959 /* copy name in the additional segment(s) */ 2960 next = d->next; 2961 while (next >= 0) { 2962 t = (struct dtslot *) & p->slot[next]; 2963 name_ptr += outlen; 2964 d_namleft -= len; 2965 /* Sanity Check */ 2966 if (d_namleft == 0) { 2967 jfs_error(ip->i_sb, 2968 "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n", 2969 (long)ip->i_ino, 2970 (long long)bn, 2971 i); 2972 goto skip_one; 2973 } 2974 len = min(d_namleft, DTSLOTDATALEN); 2975 outlen = jfs_strfromUCS_le(name_ptr, t->name, 2976 len, codepage); 2977 jfs_dirent->name_len += outlen; 2978 2979 next = t->next; 2980 } 2981 2982 jfs_dirents++; 2983 jfs_dirent = next_jfs_dirent(jfs_dirent); 2984 skip_one: 2985 if (!do_index) 2986 dtoffset->index++; 2987 } 2988 2989 if (!overflow) { 2990 /* Point to next leaf page */ 2991 if (p->header.flag & BT_ROOT) 2992 bn = 0; 2993 else { 2994 bn = le64_to_cpu(p->header.next); 2995 index = 0; 2996 /* update offset (pn:index) for new page */ 2997 if (!do_index) { 2998 dtoffset->pn++; 2999 dtoffset->index = 0; 3000 } 3001 } 3002 page_fixed = 0; 3003 } 3004 3005 /* unpin previous leaf page */ 3006 DT_PUTPAGE(mp); 3007 3008 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3009 while (jfs_dirents--) { 3010 ctx->pos = jfs_dirent->position; 3011 if (!dir_emit(ctx, jfs_dirent->name, 3012 jfs_dirent->name_len, 3013 jfs_dirent->ino, DT_UNKNOWN)) 3014 goto out; 3015 jfs_dirent = next_jfs_dirent(jfs_dirent); 3016 } 3017 3018 if (fix_page) { 3019 add_missing_indices(ip, bn); 3020 page_fixed = 1; 3021 } 3022 3023 if (!overflow && (bn == 0)) { 3024 ctx->pos = DIREND; 3025 break; 3026 } 3027 3028 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3029 if (rc) { 3030 free_page(dirent_buf); 3031 return rc; 3032 } 3033 } 3034 3035 out: 3036 free_page(dirent_buf); 3037 3038 return rc; 3039 } 3040 3041 3042 /* 3043 * dtReadFirst() 3044 * 3045 * function: get the leftmost page of the directory 3046 */ 3047 static int dtReadFirst(struct inode *ip, struct btstack * btstack) 3048 { 3049 int rc = 0; 3050 s64 bn; 3051 int psize = 288; /* initial in-line directory */ 3052 struct metapage *mp; 3053 dtpage_t *p; 3054 s8 *stbl; 3055 struct btframe *btsp; 3056 pxd_t *xd; 3057 3058 BT_CLR(btstack); /* reset stack */ 3059 3060 /* 3061 * descend leftmost path of the tree 3062 * 3063 * by convention, root bn = 0. 3064 */ 3065 for (bn = 0;;) { 3066 DT_GETPAGE(ip, bn, mp, psize, p, rc); 3067 if (rc) 3068 return rc; 3069 3070 /* 3071 * leftmost leaf page 3072 */ 3073 if (p->header.flag & BT_LEAF) { 3074 /* return leftmost entry */ 3075 btsp = btstack->top; 3076 btsp->bn = bn; 3077 btsp->index = 0; 3078 btsp->mp = mp; 3079 3080 return 0; 3081 } 3082 3083 /* 3084 * descend down to leftmost child page 3085 */ 3086 if (BT_STACK_FULL(btstack)) { 3087 DT_PUTPAGE(mp); 3088 jfs_error(ip->i_sb, "btstack overrun\n"); 3089 BT_STACK_DUMP(btstack); 3090 return -EIO; 3091 } 3092 /* push (bn, index) of the parent page/entry */ 3093 BT_PUSH(btstack, bn, 0); 3094 3095 /* get the leftmost entry */ 3096 stbl = DT_GETSTBL(p); 3097 3098 if (stbl[0] < 0 || stbl[0] > 127) { 3099 DT_PUTPAGE(mp); 3100 jfs_error(ip->i_sb, "stbl[0] out of bound\n"); 3101 return -EIO; 3102 } 3103 3104 xd = (pxd_t *) & p->slot[stbl[0]]; 3105 3106 /* get the child page block address */ 3107 bn = addressPXD(xd); 3108 psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize; 3109 3110 /* unpin the parent page */ 3111 DT_PUTPAGE(mp); 3112 } 3113 } 3114 3115 3116 /* 3117 * dtReadNext() 3118 * 3119 * function: get the page of the specified offset (pn:index) 3120 * 3121 * return: if (offset > eof), bn = -1; 3122 * 3123 * note: if index > nextindex of the target leaf page, 3124 * start with 1st entry of next leaf page; 3125 */ 3126 static int dtReadNext(struct inode *ip, loff_t * offset, 3127 struct btstack * btstack) 3128 { 3129 int rc = 0; 3130 struct dtoffset { 3131 s16 pn; 3132 s16 index; 3133 s32 unused; 3134 } *dtoffset = (struct dtoffset *) offset; 3135 s64 bn; 3136 struct metapage *mp; 3137 dtpage_t *p; 3138 int index; 3139 int pn; 3140 s8 *stbl; 3141 struct btframe *btsp, *parent; 3142 pxd_t *xd; 3143 3144 /* 3145 * get leftmost leaf page pinned 3146 */ 3147 if ((rc = dtReadFirst(ip, btstack))) 3148 return rc; 3149 3150 /* get leaf page */ 3151 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 3152 3153 /* get the start offset (pn:index) */ 3154 pn = dtoffset->pn - 1; /* Now pn = 0 represents leftmost leaf */ 3155 index = dtoffset->index; 3156 3157 /* start at leftmost page ? */ 3158 if (pn == 0) { 3159 /* offset beyond eof ? */ 3160 if (index < p->header.nextindex) 3161 goto out; 3162 3163 if (p->header.flag & BT_ROOT) { 3164 bn = -1; 3165 goto out; 3166 } 3167 3168 /* start with 1st entry of next leaf page */ 3169 dtoffset->pn++; 3170 dtoffset->index = index = 0; 3171 goto a; 3172 } 3173 3174 /* start at non-leftmost page: scan parent pages for large pn */ 3175 if (p->header.flag & BT_ROOT) { 3176 bn = -1; 3177 goto out; 3178 } 3179 3180 /* start after next leaf page ? */ 3181 if (pn > 1) 3182 goto b; 3183 3184 /* get leaf page pn = 1 */ 3185 a: 3186 bn = le64_to_cpu(p->header.next); 3187 3188 /* unpin leaf page */ 3189 DT_PUTPAGE(mp); 3190 3191 /* offset beyond eof ? */ 3192 if (bn == 0) { 3193 bn = -1; 3194 goto out; 3195 } 3196 3197 goto c; 3198 3199 /* 3200 * scan last internal page level to get target leaf page 3201 */ 3202 b: 3203 /* unpin leftmost leaf page */ 3204 DT_PUTPAGE(mp); 3205 3206 /* get left most parent page */ 3207 btsp = btstack->top; 3208 parent = btsp - 1; 3209 bn = parent->bn; 3210 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3211 if (rc) 3212 return rc; 3213 3214 /* scan parent pages at last internal page level */ 3215 while (pn >= p->header.nextindex) { 3216 pn -= p->header.nextindex; 3217 3218 /* get next parent page address */ 3219 bn = le64_to_cpu(p->header.next); 3220 3221 /* unpin current parent page */ 3222 DT_PUTPAGE(mp); 3223 3224 /* offset beyond eof ? */ 3225 if (bn == 0) { 3226 bn = -1; 3227 goto out; 3228 } 3229 3230 /* get next parent page */ 3231 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3232 if (rc) 3233 return rc; 3234 3235 /* update parent page stack frame */ 3236 parent->bn = bn; 3237 } 3238 3239 /* get leaf page address */ 3240 stbl = DT_GETSTBL(p); 3241 xd = (pxd_t *) & p->slot[stbl[pn]]; 3242 bn = addressPXD(xd); 3243 3244 /* unpin parent page */ 3245 DT_PUTPAGE(mp); 3246 3247 /* 3248 * get target leaf page 3249 */ 3250 c: 3251 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3252 if (rc) 3253 return rc; 3254 3255 /* 3256 * leaf page has been completed: 3257 * start with 1st entry of next leaf page 3258 */ 3259 if (index >= p->header.nextindex) { 3260 bn = le64_to_cpu(p->header.next); 3261 3262 /* unpin leaf page */ 3263 DT_PUTPAGE(mp); 3264 3265 /* offset beyond eof ? */ 3266 if (bn == 0) { 3267 bn = -1; 3268 goto out; 3269 } 3270 3271 /* get next leaf page */ 3272 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3273 if (rc) 3274 return rc; 3275 3276 /* start with 1st entry of next leaf page */ 3277 dtoffset->pn++; 3278 dtoffset->index = 0; 3279 } 3280 3281 out: 3282 /* return target leaf page pinned */ 3283 btsp = btstack->top; 3284 btsp->bn = bn; 3285 btsp->index = dtoffset->index; 3286 btsp->mp = mp; 3287 3288 return 0; 3289 } 3290 3291 3292 /* 3293 * dtCompare() 3294 * 3295 * function: compare search key with an internal entry 3296 * 3297 * return: 3298 * < 0 if k is < record 3299 * = 0 if k is = record 3300 * > 0 if k is > record 3301 */ 3302 static int dtCompare(struct component_name * key, /* search key */ 3303 dtpage_t * p, /* directory page */ 3304 int si) 3305 { /* entry slot index */ 3306 wchar_t *kname; 3307 __le16 *name; 3308 int klen, namlen, len, rc; 3309 struct idtentry *ih; 3310 struct dtslot *t; 3311 3312 /* 3313 * force the left-most key on internal pages, at any level of 3314 * the tree, to be less than any search key. 3315 * this obviates having to update the leftmost key on an internal 3316 * page when the user inserts a new key in the tree smaller than 3317 * anything that has been stored. 3318 * 3319 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3320 * at any internal page at any level of the tree, 3321 * it descends to child of the entry anyway - 3322 * ? make the entry as min size dummy entry) 3323 * 3324 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3325 * return (1); 3326 */ 3327 3328 kname = key->name; 3329 klen = key->namlen; 3330 3331 ih = (struct idtentry *) & p->slot[si]; 3332 si = ih->next; 3333 name = ih->name; 3334 namlen = ih->namlen; 3335 len = min(namlen, DTIHDRDATALEN); 3336 3337 /* compare with head/only segment */ 3338 len = min(klen, len); 3339 if ((rc = UniStrncmp_le(kname, name, len))) 3340 return rc; 3341 3342 klen -= len; 3343 namlen -= len; 3344 3345 /* compare with additional segment(s) */ 3346 kname += len; 3347 while (klen > 0 && namlen > 0) { 3348 /* compare with next name segment */ 3349 t = (struct dtslot *) & p->slot[si]; 3350 len = min(namlen, DTSLOTDATALEN); 3351 len = min(klen, len); 3352 name = t->name; 3353 if ((rc = UniStrncmp_le(kname, name, len))) 3354 return rc; 3355 3356 klen -= len; 3357 namlen -= len; 3358 kname += len; 3359 si = t->next; 3360 } 3361 3362 return (klen - namlen); 3363 } 3364 3365 3366 3367 3368 /* 3369 * ciCompare() 3370 * 3371 * function: compare search key with an (leaf/internal) entry 3372 * 3373 * return: 3374 * < 0 if k is < record 3375 * = 0 if k is = record 3376 * > 0 if k is > record 3377 */ 3378 static int ciCompare(struct component_name * key, /* search key */ 3379 dtpage_t * p, /* directory page */ 3380 int si, /* entry slot index */ 3381 int flag) 3382 { 3383 wchar_t *kname, x; 3384 __le16 *name; 3385 int klen, namlen, len, rc; 3386 struct ldtentry *lh; 3387 struct idtentry *ih; 3388 struct dtslot *t; 3389 int i; 3390 3391 /* 3392 * force the left-most key on internal pages, at any level of 3393 * the tree, to be less than any search key. 3394 * this obviates having to update the leftmost key on an internal 3395 * page when the user inserts a new key in the tree smaller than 3396 * anything that has been stored. 3397 * 3398 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3399 * at any internal page at any level of the tree, 3400 * it descends to child of the entry anyway - 3401 * ? make the entry as min size dummy entry) 3402 * 3403 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3404 * return (1); 3405 */ 3406 3407 kname = key->name; 3408 klen = key->namlen; 3409 3410 /* 3411 * leaf page entry 3412 */ 3413 if (p->header.flag & BT_LEAF) { 3414 lh = (struct ldtentry *) & p->slot[si]; 3415 si = lh->next; 3416 name = lh->name; 3417 namlen = lh->namlen; 3418 if (flag & JFS_DIR_INDEX) 3419 len = min(namlen, DTLHDRDATALEN); 3420 else 3421 len = min(namlen, DTLHDRDATALEN_LEGACY); 3422 } 3423 /* 3424 * internal page entry 3425 */ 3426 else { 3427 ih = (struct idtentry *) & p->slot[si]; 3428 si = ih->next; 3429 name = ih->name; 3430 namlen = ih->namlen; 3431 len = min(namlen, DTIHDRDATALEN); 3432 } 3433 3434 /* compare with head/only segment */ 3435 len = min(klen, len); 3436 for (i = 0; i < len; i++, kname++, name++) { 3437 /* only uppercase if case-insensitive support is on */ 3438 if ((flag & JFS_OS2) == JFS_OS2) 3439 x = UniToupper(le16_to_cpu(*name)); 3440 else 3441 x = le16_to_cpu(*name); 3442 if ((rc = *kname - x)) 3443 return rc; 3444 } 3445 3446 klen -= len; 3447 namlen -= len; 3448 3449 /* compare with additional segment(s) */ 3450 while (klen > 0 && namlen > 0) { 3451 /* compare with next name segment */ 3452 t = (struct dtslot *) & p->slot[si]; 3453 len = min(namlen, DTSLOTDATALEN); 3454 len = min(klen, len); 3455 name = t->name; 3456 for (i = 0; i < len; i++, kname++, name++) { 3457 /* only uppercase if case-insensitive support is on */ 3458 if ((flag & JFS_OS2) == JFS_OS2) 3459 x = UniToupper(le16_to_cpu(*name)); 3460 else 3461 x = le16_to_cpu(*name); 3462 3463 if ((rc = *kname - x)) 3464 return rc; 3465 } 3466 3467 klen -= len; 3468 namlen -= len; 3469 si = t->next; 3470 } 3471 3472 return (klen - namlen); 3473 } 3474 3475 3476 /* 3477 * ciGetLeafPrefixKey() 3478 * 3479 * function: compute prefix of suffix compression 3480 * from two adjacent leaf entries 3481 * across page boundary 3482 * 3483 * return: non-zero on error 3484 * 3485 */ 3486 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 3487 int ri, struct component_name * key, int flag) 3488 { 3489 int klen, namlen; 3490 wchar_t *pl, *pr, *kname; 3491 struct component_name lkey; 3492 struct component_name rkey; 3493 3494 lkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 3495 GFP_KERNEL); 3496 if (lkey.name == NULL) 3497 return -ENOMEM; 3498 3499 rkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 3500 GFP_KERNEL); 3501 if (rkey.name == NULL) { 3502 kfree(lkey.name); 3503 return -ENOMEM; 3504 } 3505 3506 /* get left and right key */ 3507 dtGetKey(lp, li, &lkey, flag); 3508 lkey.name[lkey.namlen] = 0; 3509 3510 if ((flag & JFS_OS2) == JFS_OS2) 3511 ciToUpper(&lkey); 3512 3513 dtGetKey(rp, ri, &rkey, flag); 3514 rkey.name[rkey.namlen] = 0; 3515 3516 3517 if ((flag & JFS_OS2) == JFS_OS2) 3518 ciToUpper(&rkey); 3519 3520 /* compute prefix */ 3521 klen = 0; 3522 kname = key->name; 3523 namlen = min(lkey.namlen, rkey.namlen); 3524 for (pl = lkey.name, pr = rkey.name; 3525 namlen; pl++, pr++, namlen--, klen++, kname++) { 3526 *kname = *pr; 3527 if (*pl != *pr) { 3528 key->namlen = klen + 1; 3529 goto free_names; 3530 } 3531 } 3532 3533 /* l->namlen <= r->namlen since l <= r */ 3534 if (lkey.namlen < rkey.namlen) { 3535 *kname = *pr; 3536 key->namlen = klen + 1; 3537 } else /* l->namelen == r->namelen */ 3538 key->namlen = klen; 3539 3540 free_names: 3541 kfree(lkey.name); 3542 kfree(rkey.name); 3543 return 0; 3544 } 3545 3546 3547 3548 /* 3549 * dtGetKey() 3550 * 3551 * function: get key of the entry 3552 */ 3553 static void dtGetKey(dtpage_t * p, int i, /* entry index */ 3554 struct component_name * key, int flag) 3555 { 3556 int si; 3557 s8 *stbl; 3558 struct ldtentry *lh; 3559 struct idtentry *ih; 3560 struct dtslot *t; 3561 int namlen, len; 3562 wchar_t *kname; 3563 __le16 *name; 3564 3565 /* get entry */ 3566 stbl = DT_GETSTBL(p); 3567 si = stbl[i]; 3568 if (p->header.flag & BT_LEAF) { 3569 lh = (struct ldtentry *) & p->slot[si]; 3570 si = lh->next; 3571 namlen = lh->namlen; 3572 name = lh->name; 3573 if (flag & JFS_DIR_INDEX) 3574 len = min(namlen, DTLHDRDATALEN); 3575 else 3576 len = min(namlen, DTLHDRDATALEN_LEGACY); 3577 } else { 3578 ih = (struct idtentry *) & p->slot[si]; 3579 si = ih->next; 3580 namlen = ih->namlen; 3581 name = ih->name; 3582 len = min(namlen, DTIHDRDATALEN); 3583 } 3584 3585 key->namlen = namlen; 3586 kname = key->name; 3587 3588 /* 3589 * move head/only segment 3590 */ 3591 UniStrncpy_from_le(kname, name, len); 3592 3593 /* 3594 * move additional segment(s) 3595 */ 3596 while (si >= 0) { 3597 /* get next segment */ 3598 t = &p->slot[si]; 3599 kname += len; 3600 namlen -= len; 3601 len = min(namlen, DTSLOTDATALEN); 3602 UniStrncpy_from_le(kname, t->name, len); 3603 3604 si = t->next; 3605 } 3606 } 3607 3608 3609 /* 3610 * dtInsertEntry() 3611 * 3612 * function: allocate free slot(s) and 3613 * write a leaf/internal entry 3614 * 3615 * return: entry slot index 3616 */ 3617 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 3618 ddata_t * data, struct dt_lock ** dtlock) 3619 { 3620 struct dtslot *h, *t; 3621 struct ldtentry *lh = NULL; 3622 struct idtentry *ih = NULL; 3623 int hsi, fsi, klen, len, nextindex; 3624 wchar_t *kname; 3625 __le16 *name; 3626 s8 *stbl; 3627 pxd_t *xd; 3628 struct dt_lock *dtlck = *dtlock; 3629 struct lv *lv; 3630 int xsi, n; 3631 s64 bn = 0; 3632 struct metapage *mp = NULL; 3633 3634 klen = key->namlen; 3635 kname = key->name; 3636 3637 /* allocate a free slot */ 3638 hsi = fsi = p->header.freelist; 3639 h = &p->slot[fsi]; 3640 p->header.freelist = h->next; 3641 --p->header.freecnt; 3642 3643 /* open new linelock */ 3644 if (dtlck->index >= dtlck->maxcnt) 3645 dtlck = (struct dt_lock *) txLinelock(dtlck); 3646 3647 lv = & dtlck->lv[dtlck->index]; 3648 lv->offset = hsi; 3649 3650 /* write head/only segment */ 3651 if (p->header.flag & BT_LEAF) { 3652 lh = (struct ldtentry *) h; 3653 lh->next = h->next; 3654 lh->inumber = cpu_to_le32(data->leaf.ino); 3655 lh->namlen = klen; 3656 name = lh->name; 3657 if (data->leaf.ip) { 3658 len = min(klen, DTLHDRDATALEN); 3659 if (!(p->header.flag & BT_ROOT)) 3660 bn = addressPXD(&p->header.self); 3661 lh->index = cpu_to_le32(add_index(data->leaf.tid, 3662 data->leaf.ip, 3663 bn, index)); 3664 } else 3665 len = min(klen, DTLHDRDATALEN_LEGACY); 3666 } else { 3667 ih = (struct idtentry *) h; 3668 ih->next = h->next; 3669 xd = (pxd_t *) ih; 3670 *xd = data->xd; 3671 ih->namlen = klen; 3672 name = ih->name; 3673 len = min(klen, DTIHDRDATALEN); 3674 } 3675 3676 UniStrncpy_to_le(name, kname, len); 3677 3678 n = 1; 3679 xsi = hsi; 3680 3681 /* write additional segment(s) */ 3682 t = h; 3683 klen -= len; 3684 while (klen) { 3685 /* get free slot */ 3686 fsi = p->header.freelist; 3687 t = &p->slot[fsi]; 3688 p->header.freelist = t->next; 3689 --p->header.freecnt; 3690 3691 /* is next slot contiguous ? */ 3692 if (fsi != xsi + 1) { 3693 /* close current linelock */ 3694 lv->length = n; 3695 dtlck->index++; 3696 3697 /* open new linelock */ 3698 if (dtlck->index < dtlck->maxcnt) 3699 lv++; 3700 else { 3701 dtlck = (struct dt_lock *) txLinelock(dtlck); 3702 lv = & dtlck->lv[0]; 3703 } 3704 3705 lv->offset = fsi; 3706 n = 0; 3707 } 3708 3709 kname += len; 3710 len = min(klen, DTSLOTDATALEN); 3711 UniStrncpy_to_le(t->name, kname, len); 3712 3713 n++; 3714 xsi = fsi; 3715 klen -= len; 3716 } 3717 3718 /* close current linelock */ 3719 lv->length = n; 3720 dtlck->index++; 3721 3722 *dtlock = dtlck; 3723 3724 /* terminate last/only segment */ 3725 if (h == t) { 3726 /* single segment entry */ 3727 if (p->header.flag & BT_LEAF) 3728 lh->next = -1; 3729 else 3730 ih->next = -1; 3731 } else 3732 /* multi-segment entry */ 3733 t->next = -1; 3734 3735 /* if insert into middle, shift right succeeding entries in stbl */ 3736 stbl = DT_GETSTBL(p); 3737 nextindex = p->header.nextindex; 3738 if (index < nextindex) { 3739 memmove(stbl + index + 1, stbl + index, nextindex - index); 3740 3741 if ((p->header.flag & BT_LEAF) && data->leaf.ip) { 3742 s64 lblock; 3743 3744 /* 3745 * Need to update slot number for entries that moved 3746 * in the stbl 3747 */ 3748 mp = NULL; 3749 for (n = index + 1; n <= nextindex; n++) { 3750 lh = (struct ldtentry *) & (p->slot[stbl[n]]); 3751 modify_index(data->leaf.tid, data->leaf.ip, 3752 le32_to_cpu(lh->index), bn, n, 3753 &mp, &lblock); 3754 } 3755 if (mp) 3756 release_metapage(mp); 3757 } 3758 } 3759 3760 stbl[index] = hsi; 3761 3762 /* advance next available entry index of stbl */ 3763 ++p->header.nextindex; 3764 } 3765 3766 3767 /* 3768 * dtMoveEntry() 3769 * 3770 * function: move entries from split/left page to new/right page 3771 * 3772 * nextindex of dst page and freelist/freecnt of both pages 3773 * are updated. 3774 */ 3775 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 3776 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 3777 int do_index) 3778 { 3779 int ssi, next; /* src slot index */ 3780 int di; /* dst entry index */ 3781 int dsi; /* dst slot index */ 3782 s8 *sstbl, *dstbl; /* sorted entry table */ 3783 int snamlen, len; 3784 struct ldtentry *slh, *dlh = NULL; 3785 struct idtentry *sih, *dih = NULL; 3786 struct dtslot *h, *s, *d; 3787 struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock; 3788 struct lv *slv, *dlv; 3789 int xssi, ns, nd; 3790 int sfsi; 3791 3792 sstbl = (s8 *) & sp->slot[sp->header.stblindex]; 3793 dstbl = (s8 *) & dp->slot[dp->header.stblindex]; 3794 3795 dsi = dp->header.freelist; /* first (whole page) free slot */ 3796 sfsi = sp->header.freelist; 3797 3798 /* linelock destination entry slot */ 3799 dlv = & ddtlck->lv[ddtlck->index]; 3800 dlv->offset = dsi; 3801 3802 /* linelock source entry slot */ 3803 slv = & sdtlck->lv[sdtlck->index]; 3804 slv->offset = sstbl[si]; 3805 xssi = slv->offset - 1; 3806 3807 /* 3808 * move entries 3809 */ 3810 ns = nd = 0; 3811 for (di = 0; si < sp->header.nextindex; si++, di++) { 3812 ssi = sstbl[si]; 3813 dstbl[di] = dsi; 3814 3815 /* is next slot contiguous ? */ 3816 if (ssi != xssi + 1) { 3817 /* close current linelock */ 3818 slv->length = ns; 3819 sdtlck->index++; 3820 3821 /* open new linelock */ 3822 if (sdtlck->index < sdtlck->maxcnt) 3823 slv++; 3824 else { 3825 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 3826 slv = & sdtlck->lv[0]; 3827 } 3828 3829 slv->offset = ssi; 3830 ns = 0; 3831 } 3832 3833 /* 3834 * move head/only segment of an entry 3835 */ 3836 /* get dst slot */ 3837 h = d = &dp->slot[dsi]; 3838 3839 /* get src slot and move */ 3840 s = &sp->slot[ssi]; 3841 if (sp->header.flag & BT_LEAF) { 3842 /* get source entry */ 3843 slh = (struct ldtentry *) s; 3844 dlh = (struct ldtentry *) h; 3845 snamlen = slh->namlen; 3846 3847 if (do_index) { 3848 len = min(snamlen, DTLHDRDATALEN); 3849 dlh->index = slh->index; /* little-endian */ 3850 } else 3851 len = min(snamlen, DTLHDRDATALEN_LEGACY); 3852 3853 memcpy(dlh, slh, 6 + len * 2); 3854 3855 next = slh->next; 3856 3857 /* update dst head/only segment next field */ 3858 dsi++; 3859 dlh->next = dsi; 3860 } else { 3861 sih = (struct idtentry *) s; 3862 snamlen = sih->namlen; 3863 3864 len = min(snamlen, DTIHDRDATALEN); 3865 dih = (struct idtentry *) h; 3866 memcpy(dih, sih, 10 + len * 2); 3867 next = sih->next; 3868 3869 dsi++; 3870 dih->next = dsi; 3871 } 3872 3873 /* free src head/only segment */ 3874 s->next = sfsi; 3875 s->cnt = 1; 3876 sfsi = ssi; 3877 3878 ns++; 3879 nd++; 3880 xssi = ssi; 3881 3882 /* 3883 * move additional segment(s) of the entry 3884 */ 3885 snamlen -= len; 3886 while ((ssi = next) >= 0) { 3887 /* is next slot contiguous ? */ 3888 if (ssi != xssi + 1) { 3889 /* close current linelock */ 3890 slv->length = ns; 3891 sdtlck->index++; 3892 3893 /* open new linelock */ 3894 if (sdtlck->index < sdtlck->maxcnt) 3895 slv++; 3896 else { 3897 sdtlck = 3898 (struct dt_lock *) 3899 txLinelock(sdtlck); 3900 slv = & sdtlck->lv[0]; 3901 } 3902 3903 slv->offset = ssi; 3904 ns = 0; 3905 } 3906 3907 /* get next source segment */ 3908 s = &sp->slot[ssi]; 3909 3910 /* get next destination free slot */ 3911 d++; 3912 3913 len = min(snamlen, DTSLOTDATALEN); 3914 UniStrncpy_le(d->name, s->name, len); 3915 3916 ns++; 3917 nd++; 3918 xssi = ssi; 3919 3920 dsi++; 3921 d->next = dsi; 3922 3923 /* free source segment */ 3924 next = s->next; 3925 s->next = sfsi; 3926 s->cnt = 1; 3927 sfsi = ssi; 3928 3929 snamlen -= len; 3930 } /* end while */ 3931 3932 /* terminate dst last/only segment */ 3933 if (h == d) { 3934 /* single segment entry */ 3935 if (dp->header.flag & BT_LEAF) 3936 dlh->next = -1; 3937 else 3938 dih->next = -1; 3939 } else 3940 /* multi-segment entry */ 3941 d->next = -1; 3942 } /* end for */ 3943 3944 /* close current linelock */ 3945 slv->length = ns; 3946 sdtlck->index++; 3947 *sdtlock = sdtlck; 3948 3949 dlv->length = nd; 3950 ddtlck->index++; 3951 *ddtlock = ddtlck; 3952 3953 /* update source header */ 3954 sp->header.freelist = sfsi; 3955 sp->header.freecnt += nd; 3956 3957 /* update destination header */ 3958 dp->header.nextindex = di; 3959 3960 dp->header.freelist = dsi; 3961 dp->header.freecnt -= nd; 3962 } 3963 3964 3965 /* 3966 * dtDeleteEntry() 3967 * 3968 * function: free a (leaf/internal) entry 3969 * 3970 * log freelist header, stbl, and each segment slot of entry 3971 * (even though last/only segment next field is modified, 3972 * physical image logging requires all segment slots of 3973 * the entry logged to avoid applying previous updates 3974 * to the same slots) 3975 */ 3976 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock) 3977 { 3978 int fsi; /* free entry slot index */ 3979 s8 *stbl; 3980 struct dtslot *t; 3981 int si, freecnt; 3982 struct dt_lock *dtlck = *dtlock; 3983 struct lv *lv; 3984 int xsi, n; 3985 3986 /* get free entry slot index */ 3987 stbl = DT_GETSTBL(p); 3988 fsi = stbl[fi]; 3989 3990 /* open new linelock */ 3991 if (dtlck->index >= dtlck->maxcnt) 3992 dtlck = (struct dt_lock *) txLinelock(dtlck); 3993 lv = & dtlck->lv[dtlck->index]; 3994 3995 lv->offset = fsi; 3996 3997 /* get the head/only segment */ 3998 t = &p->slot[fsi]; 3999 if (p->header.flag & BT_LEAF) 4000 si = ((struct ldtentry *) t)->next; 4001 else 4002 si = ((struct idtentry *) t)->next; 4003 t->next = si; 4004 t->cnt = 1; 4005 4006 n = freecnt = 1; 4007 xsi = fsi; 4008 4009 /* find the last/only segment */ 4010 while (si >= 0) { 4011 /* is next slot contiguous ? */ 4012 if (si != xsi + 1) { 4013 /* close current linelock */ 4014 lv->length = n; 4015 dtlck->index++; 4016 4017 /* open new linelock */ 4018 if (dtlck->index < dtlck->maxcnt) 4019 lv++; 4020 else { 4021 dtlck = (struct dt_lock *) txLinelock(dtlck); 4022 lv = & dtlck->lv[0]; 4023 } 4024 4025 lv->offset = si; 4026 n = 0; 4027 } 4028 4029 n++; 4030 xsi = si; 4031 freecnt++; 4032 4033 t = &p->slot[si]; 4034 t->cnt = 1; 4035 si = t->next; 4036 } 4037 4038 /* close current linelock */ 4039 lv->length = n; 4040 dtlck->index++; 4041 4042 *dtlock = dtlck; 4043 4044 /* update freelist */ 4045 t->next = p->header.freelist; 4046 p->header.freelist = fsi; 4047 p->header.freecnt += freecnt; 4048 4049 /* if delete from middle, 4050 * shift left the succedding entries in the stbl 4051 */ 4052 si = p->header.nextindex; 4053 if (fi < si - 1) 4054 memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1); 4055 4056 p->header.nextindex--; 4057 } 4058 4059 4060 /* 4061 * dtTruncateEntry() 4062 * 4063 * function: truncate a (leaf/internal) entry 4064 * 4065 * log freelist header, stbl, and each segment slot of entry 4066 * (even though last/only segment next field is modified, 4067 * physical image logging requires all segment slots of 4068 * the entry logged to avoid applying previous updates 4069 * to the same slots) 4070 */ 4071 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock) 4072 { 4073 int tsi; /* truncate entry slot index */ 4074 s8 *stbl; 4075 struct dtslot *t; 4076 int si, freecnt; 4077 struct dt_lock *dtlck = *dtlock; 4078 struct lv *lv; 4079 int fsi, xsi, n; 4080 4081 /* get free entry slot index */ 4082 stbl = DT_GETSTBL(p); 4083 tsi = stbl[ti]; 4084 4085 /* open new linelock */ 4086 if (dtlck->index >= dtlck->maxcnt) 4087 dtlck = (struct dt_lock *) txLinelock(dtlck); 4088 lv = & dtlck->lv[dtlck->index]; 4089 4090 lv->offset = tsi; 4091 4092 /* get the head/only segment */ 4093 t = &p->slot[tsi]; 4094 ASSERT(p->header.flag & BT_INTERNAL); 4095 ((struct idtentry *) t)->namlen = 0; 4096 si = ((struct idtentry *) t)->next; 4097 ((struct idtentry *) t)->next = -1; 4098 4099 n = 1; 4100 freecnt = 0; 4101 fsi = si; 4102 xsi = tsi; 4103 4104 /* find the last/only segment */ 4105 while (si >= 0) { 4106 /* is next slot contiguous ? */ 4107 if (si != xsi + 1) { 4108 /* close current linelock */ 4109 lv->length = n; 4110 dtlck->index++; 4111 4112 /* open new linelock */ 4113 if (dtlck->index < dtlck->maxcnt) 4114 lv++; 4115 else { 4116 dtlck = (struct dt_lock *) txLinelock(dtlck); 4117 lv = & dtlck->lv[0]; 4118 } 4119 4120 lv->offset = si; 4121 n = 0; 4122 } 4123 4124 n++; 4125 xsi = si; 4126 freecnt++; 4127 4128 t = &p->slot[si]; 4129 t->cnt = 1; 4130 si = t->next; 4131 } 4132 4133 /* close current linelock */ 4134 lv->length = n; 4135 dtlck->index++; 4136 4137 *dtlock = dtlck; 4138 4139 /* update freelist */ 4140 if (freecnt == 0) 4141 return; 4142 t->next = p->header.freelist; 4143 p->header.freelist = fsi; 4144 p->header.freecnt += freecnt; 4145 } 4146 4147 4148 /* 4149 * dtLinelockFreelist() 4150 */ 4151 static void dtLinelockFreelist(dtpage_t * p, /* directory page */ 4152 int m, /* max slot index */ 4153 struct dt_lock ** dtlock) 4154 { 4155 int fsi; /* free entry slot index */ 4156 struct dtslot *t; 4157 int si; 4158 struct dt_lock *dtlck = *dtlock; 4159 struct lv *lv; 4160 int xsi, n; 4161 4162 /* get free entry slot index */ 4163 fsi = p->header.freelist; 4164 4165 /* open new linelock */ 4166 if (dtlck->index >= dtlck->maxcnt) 4167 dtlck = (struct dt_lock *) txLinelock(dtlck); 4168 lv = & dtlck->lv[dtlck->index]; 4169 4170 lv->offset = fsi; 4171 4172 n = 1; 4173 xsi = fsi; 4174 4175 t = &p->slot[fsi]; 4176 si = t->next; 4177 4178 /* find the last/only segment */ 4179 while (si < m && si >= 0) { 4180 /* is next slot contiguous ? */ 4181 if (si != xsi + 1) { 4182 /* close current linelock */ 4183 lv->length = n; 4184 dtlck->index++; 4185 4186 /* open new linelock */ 4187 if (dtlck->index < dtlck->maxcnt) 4188 lv++; 4189 else { 4190 dtlck = (struct dt_lock *) txLinelock(dtlck); 4191 lv = & dtlck->lv[0]; 4192 } 4193 4194 lv->offset = si; 4195 n = 0; 4196 } 4197 4198 n++; 4199 xsi = si; 4200 4201 t = &p->slot[si]; 4202 si = t->next; 4203 } 4204 4205 /* close current linelock */ 4206 lv->length = n; 4207 dtlck->index++; 4208 4209 *dtlock = dtlck; 4210 } 4211 4212 4213 /* 4214 * NAME: dtModify 4215 * 4216 * FUNCTION: Modify the inode number part of a directory entry 4217 * 4218 * PARAMETERS: 4219 * tid - Transaction id 4220 * ip - Inode of parent directory 4221 * key - Name of entry to be modified 4222 * orig_ino - Original inode number expected in entry 4223 * new_ino - New inode number to put into entry 4224 * flag - JFS_RENAME 4225 * 4226 * RETURNS: 4227 * -ESTALE - If entry found does not match orig_ino passed in 4228 * -ENOENT - If no entry can be found to match key 4229 * 0 - If successfully modified entry 4230 */ 4231 int dtModify(tid_t tid, struct inode *ip, 4232 struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag) 4233 { 4234 int rc; 4235 s64 bn; 4236 struct metapage *mp; 4237 dtpage_t *p; 4238 int index; 4239 struct btstack btstack; 4240 struct tlock *tlck; 4241 struct dt_lock *dtlck; 4242 struct lv *lv; 4243 s8 *stbl; 4244 int entry_si; /* entry slot index */ 4245 struct ldtentry *entry; 4246 4247 /* 4248 * search for the entry to modify: 4249 * 4250 * dtSearch() returns (leaf page pinned, index at which to modify). 4251 */ 4252 if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag))) 4253 return rc; 4254 4255 /* retrieve search result */ 4256 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4257 4258 BT_MARK_DIRTY(mp, ip); 4259 /* 4260 * acquire a transaction lock on the leaf page of named entry 4261 */ 4262 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 4263 dtlck = (struct dt_lock *) & tlck->lock; 4264 4265 /* get slot index of the entry */ 4266 stbl = DT_GETSTBL(p); 4267 entry_si = stbl[index]; 4268 4269 /* linelock entry */ 4270 ASSERT(dtlck->index == 0); 4271 lv = & dtlck->lv[0]; 4272 lv->offset = entry_si; 4273 lv->length = 1; 4274 dtlck->index++; 4275 4276 /* get the head/only segment */ 4277 entry = (struct ldtentry *) & p->slot[entry_si]; 4278 4279 /* substitute the inode number of the entry */ 4280 entry->inumber = cpu_to_le32(new_ino); 4281 4282 /* unpin the leaf page */ 4283 DT_PUTPAGE(mp); 4284 4285 return 0; 4286 } 4287