1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2004 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19 /* 20 * jfs_dtree.c: directory B+-tree manager 21 * 22 * B+-tree with variable length key directory: 23 * 24 * each directory page is structured as an array of 32-byte 25 * directory entry slots initialized as a freelist 26 * to avoid search/compaction of free space at insertion. 27 * when an entry is inserted, a number of slots are allocated 28 * from the freelist as required to store variable length data 29 * of the entry; when the entry is deleted, slots of the entry 30 * are returned to freelist. 31 * 32 * leaf entry stores full name as key and file serial number 33 * (aka inode number) as data. 34 * internal/router entry stores sufffix compressed name 35 * as key and simple extent descriptor as data. 36 * 37 * each directory page maintains a sorted entry index table 38 * which stores the start slot index of sorted entries 39 * to allow binary search on the table. 40 * 41 * directory starts as a root/leaf page in on-disk inode 42 * inline data area. 43 * when it becomes full, it starts a leaf of a external extent 44 * of length of 1 block. each time the first leaf becomes full, 45 * it is extended rather than split (its size is doubled), 46 * until its length becoms 4 KBytes, from then the extent is split 47 * with new 4 Kbyte extent when it becomes full 48 * to reduce external fragmentation of small directories. 49 * 50 * blah, blah, blah, for linear scan of directory in pieces by 51 * readdir(). 52 * 53 * 54 * case-insensitive directory file system 55 * 56 * names are stored in case-sensitive way in leaf entry. 57 * but stored, searched and compared in case-insensitive (uppercase) order 58 * (i.e., both search key and entry key are folded for search/compare): 59 * (note that case-sensitive order is BROKEN in storage, e.g., 60 * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad 61 * 62 * entries which folds to the same key makes up a equivalent class 63 * whose members are stored as contiguous cluster (may cross page boundary) 64 * but whose order is arbitrary and acts as duplicate, e.g., 65 * abc, Abc, aBc, abC) 66 * 67 * once match is found at leaf, requires scan forward/backward 68 * either for, in case-insensitive search, duplicate 69 * or for, in case-sensitive search, for exact match 70 * 71 * router entry must be created/stored in case-insensitive way 72 * in internal entry: 73 * (right most key of left page and left most key of right page 74 * are folded, and its suffix compression is propagated as router 75 * key in parent) 76 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB> 77 * should be made the router key for the split) 78 * 79 * case-insensitive search: 80 * 81 * fold search key; 82 * 83 * case-insensitive search of B-tree: 84 * for internal entry, router key is already folded; 85 * for leaf entry, fold the entry key before comparison. 86 * 87 * if (leaf entry case-insensitive match found) 88 * if (next entry satisfies case-insensitive match) 89 * return EDUPLICATE; 90 * if (prev entry satisfies case-insensitive match) 91 * return EDUPLICATE; 92 * return match; 93 * else 94 * return no match; 95 * 96 * serialization: 97 * target directory inode lock is being held on entry/exit 98 * of all main directory service routines. 99 * 100 * log based recovery: 101 */ 102 103 #include <linux/fs.h> 104 #include <linux/quotaops.h> 105 #include "jfs_incore.h" 106 #include "jfs_superblock.h" 107 #include "jfs_filsys.h" 108 #include "jfs_metapage.h" 109 #include "jfs_dmap.h" 110 #include "jfs_unicode.h" 111 #include "jfs_debug.h" 112 113 /* dtree split parameter */ 114 struct dtsplit { 115 struct metapage *mp; 116 s16 index; 117 s16 nslot; 118 struct component_name *key; 119 ddata_t *data; 120 struct pxdlist *pxdlist; 121 }; 122 123 #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) 124 125 /* get page buffer for specified block address */ 126 #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 127 {\ 128 BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ 129 if (!(RC))\ 130 {\ 131 if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ 132 ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ 133 {\ 134 BT_PUTPAGE(MP);\ 135 jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\ 136 MP = NULL;\ 137 RC = -EIO;\ 138 }\ 139 }\ 140 } 141 142 /* for consistency */ 143 #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) 144 145 #define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 146 BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot) 147 148 /* 149 * forward references 150 */ 151 static int dtSplitUp(tid_t tid, struct inode *ip, 152 struct dtsplit * split, struct btstack * btstack); 153 154 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 155 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp); 156 157 static int dtExtendPage(tid_t tid, struct inode *ip, 158 struct dtsplit * split, struct btstack * btstack); 159 160 static int dtSplitRoot(tid_t tid, struct inode *ip, 161 struct dtsplit * split, struct metapage ** rmpp); 162 163 static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 164 dtpage_t * fp, struct btstack * btstack); 165 166 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p); 167 168 static int dtReadFirst(struct inode *ip, struct btstack * btstack); 169 170 static int dtReadNext(struct inode *ip, 171 loff_t * offset, struct btstack * btstack); 172 173 static int dtCompare(struct component_name * key, dtpage_t * p, int si); 174 175 static int ciCompare(struct component_name * key, dtpage_t * p, int si, 176 int flag); 177 178 static void dtGetKey(dtpage_t * p, int i, struct component_name * key, 179 int flag); 180 181 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 182 int ri, struct component_name * key, int flag); 183 184 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 185 ddata_t * data, struct dt_lock **); 186 187 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 188 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 189 int do_index); 190 191 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock); 192 193 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock); 194 195 static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock); 196 197 #define ciToUpper(c) UniStrupr((c)->name) 198 199 /* 200 * read_index_page() 201 * 202 * Reads a page of a directory's index table. 203 * Having metadata mapped into the directory inode's address space 204 * presents a multitude of problems. We avoid this by mapping to 205 * the absolute address space outside of the *_metapage routines 206 */ 207 static struct metapage *read_index_page(struct inode *inode, s64 blkno) 208 { 209 int rc; 210 s64 xaddr; 211 int xflag; 212 s32 xlen; 213 214 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 215 if (rc || (xaddr == 0)) 216 return NULL; 217 218 return read_metapage(inode, xaddr, PSIZE, 1); 219 } 220 221 /* 222 * get_index_page() 223 * 224 * Same as get_index_page(), but get's a new page without reading 225 */ 226 static struct metapage *get_index_page(struct inode *inode, s64 blkno) 227 { 228 int rc; 229 s64 xaddr; 230 int xflag; 231 s32 xlen; 232 233 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 234 if (rc || (xaddr == 0)) 235 return NULL; 236 237 return get_metapage(inode, xaddr, PSIZE, 1); 238 } 239 240 /* 241 * find_index() 242 * 243 * Returns dtree page containing directory table entry for specified 244 * index and pointer to its entry. 245 * 246 * mp must be released by caller. 247 */ 248 static struct dir_table_slot *find_index(struct inode *ip, u32 index, 249 struct metapage ** mp, s64 *lblock) 250 { 251 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 252 s64 blkno; 253 s64 offset; 254 int page_offset; 255 struct dir_table_slot *slot; 256 static int maxWarnings = 10; 257 258 if (index < 2) { 259 if (maxWarnings) { 260 jfs_warn("find_entry called with index = %d", index); 261 maxWarnings--; 262 } 263 return NULL; 264 } 265 266 if (index >= jfs_ip->next_index) { 267 jfs_warn("find_entry called with index >= next_index"); 268 return NULL; 269 } 270 271 if (jfs_dirtable_inline(ip)) { 272 /* 273 * Inline directory table 274 */ 275 *mp = NULL; 276 slot = &jfs_ip->i_dirtable[index - 2]; 277 } else { 278 offset = (index - 2) * sizeof(struct dir_table_slot); 279 page_offset = offset & (PSIZE - 1); 280 blkno = ((offset + 1) >> L2PSIZE) << 281 JFS_SBI(ip->i_sb)->l2nbperpage; 282 283 if (*mp && (*lblock != blkno)) { 284 release_metapage(*mp); 285 *mp = NULL; 286 } 287 if (!(*mp)) { 288 *lblock = blkno; 289 *mp = read_index_page(ip, blkno); 290 } 291 if (!(*mp)) { 292 jfs_err("free_index: error reading directory table"); 293 return NULL; 294 } 295 296 slot = 297 (struct dir_table_slot *) ((char *) (*mp)->data + 298 page_offset); 299 } 300 return slot; 301 } 302 303 static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, 304 u32 index) 305 { 306 struct tlock *tlck; 307 struct linelock *llck; 308 struct lv *lv; 309 310 tlck = txLock(tid, ip, mp, tlckDATA); 311 llck = (struct linelock *) tlck->lock; 312 313 if (llck->index >= llck->maxcnt) 314 llck = txLinelock(llck); 315 lv = &llck->lv[llck->index]; 316 317 /* 318 * Linelock slot size is twice the size of directory table 319 * slot size. 512 entries per page. 320 */ 321 lv->offset = ((index - 2) & 511) >> 1; 322 lv->length = 1; 323 llck->index++; 324 } 325 326 /* 327 * add_index() 328 * 329 * Adds an entry to the directory index table. This is used to provide 330 * each directory entry with a persistent index in which to resume 331 * directory traversals 332 */ 333 static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) 334 { 335 struct super_block *sb = ip->i_sb; 336 struct jfs_sb_info *sbi = JFS_SBI(sb); 337 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 338 u64 blkno; 339 struct dir_table_slot *dirtab_slot; 340 u32 index; 341 struct linelock *llck; 342 struct lv *lv; 343 struct metapage *mp; 344 s64 offset; 345 uint page_offset; 346 struct tlock *tlck; 347 s64 xaddr; 348 349 ASSERT(DO_INDEX(ip)); 350 351 if (jfs_ip->next_index < 2) { 352 jfs_warn("add_index: next_index = %d. Resetting!", 353 jfs_ip->next_index); 354 jfs_ip->next_index = 2; 355 } 356 357 index = jfs_ip->next_index++; 358 359 if (index <= MAX_INLINE_DIRTABLE_ENTRY) { 360 /* 361 * i_size reflects size of index table, or 8 bytes per entry. 362 */ 363 ip->i_size = (loff_t) (index - 1) << 3; 364 365 /* 366 * dir table fits inline within inode 367 */ 368 dirtab_slot = &jfs_ip->i_dirtable[index-2]; 369 dirtab_slot->flag = DIR_INDEX_VALID; 370 dirtab_slot->slot = slot; 371 DTSaddress(dirtab_slot, bn); 372 373 set_cflag(COMMIT_Dirtable, ip); 374 375 return index; 376 } 377 if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) { 378 struct dir_table_slot temp_table[12]; 379 380 /* 381 * It's time to move the inline table to an external 382 * page and begin to build the xtree 383 */ 384 if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage)) 385 goto clean_up; 386 if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { 387 DQUOT_FREE_BLOCK(ip, sbi->nbperpage); 388 goto clean_up; 389 } 390 391 /* 392 * Save the table, we're going to overwrite it with the 393 * xtree root 394 */ 395 memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table)); 396 397 /* 398 * Initialize empty x-tree 399 */ 400 xtInitRoot(tid, ip); 401 402 /* 403 * Add the first block to the xtree 404 */ 405 if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) { 406 /* This really shouldn't fail */ 407 jfs_warn("add_index: xtInsert failed!"); 408 memcpy(&jfs_ip->i_dirtable, temp_table, 409 sizeof (temp_table)); 410 dbFree(ip, xaddr, sbi->nbperpage); 411 DQUOT_FREE_BLOCK(ip, sbi->nbperpage); 412 goto clean_up; 413 } 414 ip->i_size = PSIZE; 415 416 mp = get_index_page(ip, 0); 417 if (!mp) { 418 jfs_err("add_index: get_metapage failed!"); 419 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 420 memcpy(&jfs_ip->i_dirtable, temp_table, 421 sizeof (temp_table)); 422 goto clean_up; 423 } 424 tlck = txLock(tid, ip, mp, tlckDATA); 425 llck = (struct linelock *) & tlck->lock; 426 ASSERT(llck->index == 0); 427 lv = &llck->lv[0]; 428 429 lv->offset = 0; 430 lv->length = 6; /* tlckDATA slot size is 16 bytes */ 431 llck->index++; 432 433 memcpy(mp->data, temp_table, sizeof(temp_table)); 434 435 mark_metapage_dirty(mp); 436 release_metapage(mp); 437 438 /* 439 * Logging is now directed by xtree tlocks 440 */ 441 clear_cflag(COMMIT_Dirtable, ip); 442 } 443 444 offset = (index - 2) * sizeof(struct dir_table_slot); 445 page_offset = offset & (PSIZE - 1); 446 blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage; 447 if (page_offset == 0) { 448 /* 449 * This will be the beginning of a new page 450 */ 451 xaddr = 0; 452 if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) { 453 jfs_warn("add_index: xtInsert failed!"); 454 goto clean_up; 455 } 456 ip->i_size += PSIZE; 457 458 if ((mp = get_index_page(ip, blkno))) 459 memset(mp->data, 0, PSIZE); /* Just looks better */ 460 else 461 xtTruncate(tid, ip, offset, COMMIT_PWMAP); 462 } else 463 mp = read_index_page(ip, blkno); 464 465 if (!mp) { 466 jfs_err("add_index: get/read_metapage failed!"); 467 goto clean_up; 468 } 469 470 lock_index(tid, ip, mp, index); 471 472 dirtab_slot = 473 (struct dir_table_slot *) ((char *) mp->data + page_offset); 474 dirtab_slot->flag = DIR_INDEX_VALID; 475 dirtab_slot->slot = slot; 476 DTSaddress(dirtab_slot, bn); 477 478 mark_metapage_dirty(mp); 479 release_metapage(mp); 480 481 return index; 482 483 clean_up: 484 485 jfs_ip->next_index--; 486 487 return 0; 488 } 489 490 /* 491 * free_index() 492 * 493 * Marks an entry to the directory index table as free. 494 */ 495 static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) 496 { 497 struct dir_table_slot *dirtab_slot; 498 s64 lblock; 499 struct metapage *mp = NULL; 500 501 dirtab_slot = find_index(ip, index, &mp, &lblock); 502 503 if (!dirtab_slot) 504 return; 505 506 dirtab_slot->flag = DIR_INDEX_FREE; 507 dirtab_slot->slot = dirtab_slot->addr1 = 0; 508 dirtab_slot->addr2 = cpu_to_le32(next); 509 510 if (mp) { 511 lock_index(tid, ip, mp, index); 512 mark_metapage_dirty(mp); 513 release_metapage(mp); 514 } else 515 set_cflag(COMMIT_Dirtable, ip); 516 } 517 518 /* 519 * modify_index() 520 * 521 * Changes an entry in the directory index table 522 */ 523 static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, 524 int slot, struct metapage ** mp, s64 *lblock) 525 { 526 struct dir_table_slot *dirtab_slot; 527 528 dirtab_slot = find_index(ip, index, mp, lblock); 529 530 if (!dirtab_slot) 531 return; 532 533 DTSaddress(dirtab_slot, bn); 534 dirtab_slot->slot = slot; 535 536 if (*mp) { 537 lock_index(tid, ip, *mp, index); 538 mark_metapage_dirty(*mp); 539 } else 540 set_cflag(COMMIT_Dirtable, ip); 541 } 542 543 /* 544 * read_index() 545 * 546 * reads a directory table slot 547 */ 548 static int read_index(struct inode *ip, u32 index, 549 struct dir_table_slot * dirtab_slot) 550 { 551 s64 lblock; 552 struct metapage *mp = NULL; 553 struct dir_table_slot *slot; 554 555 slot = find_index(ip, index, &mp, &lblock); 556 if (!slot) { 557 return -EIO; 558 } 559 560 memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot)); 561 562 if (mp) 563 release_metapage(mp); 564 565 return 0; 566 } 567 568 /* 569 * dtSearch() 570 * 571 * function: 572 * Search for the entry with specified key 573 * 574 * parameter: 575 * 576 * return: 0 - search result on stack, leaf page pinned; 577 * errno - I/O error 578 */ 579 int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, 580 struct btstack * btstack, int flag) 581 { 582 int rc = 0; 583 int cmp = 1; /* init for empty page */ 584 s64 bn; 585 struct metapage *mp; 586 dtpage_t *p; 587 s8 *stbl; 588 int base, index, lim; 589 struct btframe *btsp; 590 pxd_t *pxd; 591 int psize = 288; /* initial in-line directory */ 592 ino_t inumber; 593 struct component_name ciKey; 594 struct super_block *sb = ip->i_sb; 595 596 ciKey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_NOFS); 597 if (!ciKey.name) { 598 rc = -ENOMEM; 599 goto dtSearch_Exit2; 600 } 601 602 603 /* uppercase search key for c-i directory */ 604 UniStrcpy(ciKey.name, key->name); 605 ciKey.namlen = key->namlen; 606 607 /* only uppercase if case-insensitive support is on */ 608 if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) { 609 ciToUpper(&ciKey); 610 } 611 BT_CLR(btstack); /* reset stack */ 612 613 /* init level count for max pages to split */ 614 btstack->nsplit = 1; 615 616 /* 617 * search down tree from root: 618 * 619 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 620 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 621 * 622 * if entry with search key K is not found 623 * internal page search find the entry with largest key Ki 624 * less than K which point to the child page to search; 625 * leaf page search find the entry with smallest key Kj 626 * greater than K so that the returned index is the position of 627 * the entry to be shifted right for insertion of new entry. 628 * for empty tree, search key is greater than any key of the tree. 629 * 630 * by convention, root bn = 0. 631 */ 632 for (bn = 0;;) { 633 /* get/pin the page to search */ 634 DT_GETPAGE(ip, bn, mp, psize, p, rc); 635 if (rc) 636 goto dtSearch_Exit1; 637 638 /* get sorted entry table of the page */ 639 stbl = DT_GETSTBL(p); 640 641 /* 642 * binary search with search key K on the current page. 643 */ 644 for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { 645 index = base + (lim >> 1); 646 647 if (p->header.flag & BT_LEAF) { 648 /* uppercase leaf name to compare */ 649 cmp = 650 ciCompare(&ciKey, p, stbl[index], 651 JFS_SBI(sb)->mntflag); 652 } else { 653 /* router key is in uppercase */ 654 655 cmp = dtCompare(&ciKey, p, stbl[index]); 656 657 658 } 659 if (cmp == 0) { 660 /* 661 * search hit 662 */ 663 /* search hit - leaf page: 664 * return the entry found 665 */ 666 if (p->header.flag & BT_LEAF) { 667 inumber = le32_to_cpu( 668 ((struct ldtentry *) & p->slot[stbl[index]])->inumber); 669 670 /* 671 * search for JFS_LOOKUP 672 */ 673 if (flag == JFS_LOOKUP) { 674 *data = inumber; 675 rc = 0; 676 goto out; 677 } 678 679 /* 680 * search for JFS_CREATE 681 */ 682 if (flag == JFS_CREATE) { 683 *data = inumber; 684 rc = -EEXIST; 685 goto out; 686 } 687 688 /* 689 * search for JFS_REMOVE or JFS_RENAME 690 */ 691 if ((flag == JFS_REMOVE || 692 flag == JFS_RENAME) && 693 *data != inumber) { 694 rc = -ESTALE; 695 goto out; 696 } 697 698 /* 699 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME 700 */ 701 /* save search result */ 702 *data = inumber; 703 btsp = btstack->top; 704 btsp->bn = bn; 705 btsp->index = index; 706 btsp->mp = mp; 707 708 rc = 0; 709 goto dtSearch_Exit1; 710 } 711 712 /* search hit - internal page: 713 * descend/search its child page 714 */ 715 goto getChild; 716 } 717 718 if (cmp > 0) { 719 base = index + 1; 720 --lim; 721 } 722 } 723 724 /* 725 * search miss 726 * 727 * base is the smallest index with key (Kj) greater than 728 * search key (K) and may be zero or (maxindex + 1) index. 729 */ 730 /* 731 * search miss - leaf page 732 * 733 * return location of entry (base) where new entry with 734 * search key K is to be inserted. 735 */ 736 if (p->header.flag & BT_LEAF) { 737 /* 738 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME 739 */ 740 if (flag == JFS_LOOKUP || flag == JFS_REMOVE || 741 flag == JFS_RENAME) { 742 rc = -ENOENT; 743 goto out; 744 } 745 746 /* 747 * search for JFS_CREATE|JFS_FINDDIR: 748 * 749 * save search result 750 */ 751 *data = 0; 752 btsp = btstack->top; 753 btsp->bn = bn; 754 btsp->index = base; 755 btsp->mp = mp; 756 757 rc = 0; 758 goto dtSearch_Exit1; 759 } 760 761 /* 762 * search miss - internal page 763 * 764 * if base is non-zero, decrement base by one to get the parent 765 * entry of the child page to search. 766 */ 767 index = base ? base - 1 : base; 768 769 /* 770 * go down to child page 771 */ 772 getChild: 773 /* update max. number of pages to split */ 774 if (BT_STACK_FULL(btstack)) { 775 /* Something's corrupted, mark filesystem dirty so 776 * chkdsk will fix it. 777 */ 778 jfs_error(sb, "stack overrun in dtSearch!"); 779 BT_STACK_DUMP(btstack); 780 rc = -EIO; 781 goto out; 782 } 783 btstack->nsplit++; 784 785 /* push (bn, index) of the parent page/entry */ 786 BT_PUSH(btstack, bn, index); 787 788 /* get the child page block number */ 789 pxd = (pxd_t *) & p->slot[stbl[index]]; 790 bn = addressPXD(pxd); 791 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 792 793 /* unpin the parent page */ 794 DT_PUTPAGE(mp); 795 } 796 797 out: 798 DT_PUTPAGE(mp); 799 800 dtSearch_Exit1: 801 802 kfree(ciKey.name); 803 804 dtSearch_Exit2: 805 806 return rc; 807 } 808 809 810 /* 811 * dtInsert() 812 * 813 * function: insert an entry to directory tree 814 * 815 * parameter: 816 * 817 * return: 0 - success; 818 * errno - failure; 819 */ 820 int dtInsert(tid_t tid, struct inode *ip, 821 struct component_name * name, ino_t * fsn, struct btstack * btstack) 822 { 823 int rc = 0; 824 struct metapage *mp; /* meta-page buffer */ 825 dtpage_t *p; /* base B+-tree index page */ 826 s64 bn; 827 int index; 828 struct dtsplit split; /* split information */ 829 ddata_t data; 830 struct dt_lock *dtlck; 831 int n; 832 struct tlock *tlck; 833 struct lv *lv; 834 835 /* 836 * retrieve search result 837 * 838 * dtSearch() returns (leaf page pinned, index at which to insert). 839 * n.b. dtSearch() may return index of (maxindex + 1) of 840 * the full page. 841 */ 842 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 843 844 /* 845 * insert entry for new key 846 */ 847 if (DO_INDEX(ip)) { 848 if (JFS_IP(ip)->next_index == DIREND) { 849 DT_PUTPAGE(mp); 850 return -EMLINK; 851 } 852 n = NDTLEAF(name->namlen); 853 data.leaf.tid = tid; 854 data.leaf.ip = ip; 855 } else { 856 n = NDTLEAF_LEGACY(name->namlen); 857 data.leaf.ip = NULL; /* signifies legacy directory format */ 858 } 859 data.leaf.ino = *fsn; 860 861 /* 862 * leaf page does not have enough room for new entry: 863 * 864 * extend/split the leaf page; 865 * 866 * dtSplitUp() will insert the entry and unpin the leaf page. 867 */ 868 if (n > p->header.freecnt) { 869 split.mp = mp; 870 split.index = index; 871 split.nslot = n; 872 split.key = name; 873 split.data = &data; 874 rc = dtSplitUp(tid, ip, &split, btstack); 875 return rc; 876 } 877 878 /* 879 * leaf page does have enough room for new entry: 880 * 881 * insert the new data entry into the leaf page; 882 */ 883 BT_MARK_DIRTY(mp, ip); 884 /* 885 * acquire a transaction lock on the leaf page 886 */ 887 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 888 dtlck = (struct dt_lock *) & tlck->lock; 889 ASSERT(dtlck->index == 0); 890 lv = & dtlck->lv[0]; 891 892 /* linelock header */ 893 lv->offset = 0; 894 lv->length = 1; 895 dtlck->index++; 896 897 dtInsertEntry(p, index, name, &data, &dtlck); 898 899 /* linelock stbl of non-root leaf page */ 900 if (!(p->header.flag & BT_ROOT)) { 901 if (dtlck->index >= dtlck->maxcnt) 902 dtlck = (struct dt_lock *) txLinelock(dtlck); 903 lv = & dtlck->lv[dtlck->index]; 904 n = index >> L2DTSLOTSIZE; 905 lv->offset = p->header.stblindex + n; 906 lv->length = 907 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 908 dtlck->index++; 909 } 910 911 /* unpin the leaf page */ 912 DT_PUTPAGE(mp); 913 914 return 0; 915 } 916 917 918 /* 919 * dtSplitUp() 920 * 921 * function: propagate insertion bottom up; 922 * 923 * parameter: 924 * 925 * return: 0 - success; 926 * errno - failure; 927 * leaf page unpinned; 928 */ 929 static int dtSplitUp(tid_t tid, 930 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 931 { 932 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 933 int rc = 0; 934 struct metapage *smp; 935 dtpage_t *sp; /* split page */ 936 struct metapage *rmp; 937 dtpage_t *rp; /* new right page split from sp */ 938 pxd_t rpxd; /* new right page extent descriptor */ 939 struct metapage *lmp; 940 dtpage_t *lp; /* left child page */ 941 int skip; /* index of entry of insertion */ 942 struct btframe *parent; /* parent page entry on traverse stack */ 943 s64 xaddr, nxaddr; 944 int xlen, xsize; 945 struct pxdlist pxdlist; 946 pxd_t *pxd; 947 struct component_name key = { 0, NULL }; 948 ddata_t *data = split->data; 949 int n; 950 struct dt_lock *dtlck; 951 struct tlock *tlck; 952 struct lv *lv; 953 int quota_allocation = 0; 954 955 /* get split page */ 956 smp = split->mp; 957 sp = DT_PAGE(ip, smp); 958 959 key.name = kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), GFP_NOFS); 960 if (!key.name) { 961 DT_PUTPAGE(smp); 962 rc = -ENOMEM; 963 goto dtSplitUp_Exit; 964 } 965 966 /* 967 * split leaf page 968 * 969 * The split routines insert the new entry, and 970 * acquire txLock as appropriate. 971 */ 972 /* 973 * split root leaf page: 974 */ 975 if (sp->header.flag & BT_ROOT) { 976 /* 977 * allocate a single extent child page 978 */ 979 xlen = 1; 980 n = sbi->bsize >> L2DTSLOTSIZE; 981 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 982 n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */ 983 if (n <= split->nslot) 984 xlen++; 985 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) { 986 DT_PUTPAGE(smp); 987 goto freeKeyName; 988 } 989 990 pxdlist.maxnpxd = 1; 991 pxdlist.npxd = 0; 992 pxd = &pxdlist.pxd[0]; 993 PXDaddress(pxd, xaddr); 994 PXDlength(pxd, xlen); 995 split->pxdlist = &pxdlist; 996 rc = dtSplitRoot(tid, ip, split, &rmp); 997 998 if (rc) 999 dbFree(ip, xaddr, xlen); 1000 else 1001 DT_PUTPAGE(rmp); 1002 1003 DT_PUTPAGE(smp); 1004 1005 if (!DO_INDEX(ip)) 1006 ip->i_size = xlen << sbi->l2bsize; 1007 1008 goto freeKeyName; 1009 } 1010 1011 /* 1012 * extend first leaf page 1013 * 1014 * extend the 1st extent if less than buffer page size 1015 * (dtExtendPage() reurns leaf page unpinned) 1016 */ 1017 pxd = &sp->header.self; 1018 xlen = lengthPXD(pxd); 1019 xsize = xlen << sbi->l2bsize; 1020 if (xsize < PSIZE) { 1021 xaddr = addressPXD(pxd); 1022 n = xsize >> L2DTSLOTSIZE; 1023 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 1024 if ((n + sp->header.freecnt) <= split->nslot) 1025 n = xlen + (xlen << 1); 1026 else 1027 n = xlen; 1028 1029 /* Allocate blocks to quota. */ 1030 if (DQUOT_ALLOC_BLOCK(ip, n)) { 1031 rc = -EDQUOT; 1032 goto extendOut; 1033 } 1034 quota_allocation += n; 1035 1036 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, 1037 (s64) n, &nxaddr))) 1038 goto extendOut; 1039 1040 pxdlist.maxnpxd = 1; 1041 pxdlist.npxd = 0; 1042 pxd = &pxdlist.pxd[0]; 1043 PXDaddress(pxd, nxaddr) 1044 PXDlength(pxd, xlen + n); 1045 split->pxdlist = &pxdlist; 1046 if ((rc = dtExtendPage(tid, ip, split, btstack))) { 1047 nxaddr = addressPXD(pxd); 1048 if (xaddr != nxaddr) { 1049 /* free relocated extent */ 1050 xlen = lengthPXD(pxd); 1051 dbFree(ip, nxaddr, (s64) xlen); 1052 } else { 1053 /* free extended delta */ 1054 xlen = lengthPXD(pxd) - n; 1055 xaddr = addressPXD(pxd) + xlen; 1056 dbFree(ip, xaddr, (s64) n); 1057 } 1058 } else if (!DO_INDEX(ip)) 1059 ip->i_size = lengthPXD(pxd) << sbi->l2bsize; 1060 1061 1062 extendOut: 1063 DT_PUTPAGE(smp); 1064 goto freeKeyName; 1065 } 1066 1067 /* 1068 * split leaf page <sp> into <sp> and a new right page <rp>. 1069 * 1070 * return <rp> pinned and its extent descriptor <rpxd> 1071 */ 1072 /* 1073 * allocate new directory page extent and 1074 * new index page(s) to cover page split(s) 1075 * 1076 * allocation hint: ? 1077 */ 1078 n = btstack->nsplit; 1079 pxdlist.maxnpxd = pxdlist.npxd = 0; 1080 xlen = sbi->nbperpage; 1081 for (pxd = pxdlist.pxd; n > 0; n--, pxd++) { 1082 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) { 1083 PXDaddress(pxd, xaddr); 1084 PXDlength(pxd, xlen); 1085 pxdlist.maxnpxd++; 1086 continue; 1087 } 1088 1089 DT_PUTPAGE(smp); 1090 1091 /* undo allocation */ 1092 goto splitOut; 1093 } 1094 1095 split->pxdlist = &pxdlist; 1096 if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) { 1097 DT_PUTPAGE(smp); 1098 1099 /* undo allocation */ 1100 goto splitOut; 1101 } 1102 1103 if (!DO_INDEX(ip)) 1104 ip->i_size += PSIZE; 1105 1106 /* 1107 * propagate up the router entry for the leaf page just split 1108 * 1109 * insert a router entry for the new page into the parent page, 1110 * propagate the insert/split up the tree by walking back the stack 1111 * of (bn of parent page, index of child page entry in parent page) 1112 * that were traversed during the search for the page that split. 1113 * 1114 * the propagation of insert/split up the tree stops if the root 1115 * splits or the page inserted into doesn't have to split to hold 1116 * the new entry. 1117 * 1118 * the parent entry for the split page remains the same, and 1119 * a new entry is inserted at its right with the first key and 1120 * block number of the new right page. 1121 * 1122 * There are a maximum of 4 pages pinned at any time: 1123 * two children, left parent and right parent (when the parent splits). 1124 * keep the child pages pinned while working on the parent. 1125 * make sure that all pins are released at exit. 1126 */ 1127 while ((parent = BT_POP(btstack)) != NULL) { 1128 /* parent page specified by stack frame <parent> */ 1129 1130 /* keep current child pages (<lp>, <rp>) pinned */ 1131 lmp = smp; 1132 lp = sp; 1133 1134 /* 1135 * insert router entry in parent for new right child page <rp> 1136 */ 1137 /* get the parent page <sp> */ 1138 DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1139 if (rc) { 1140 DT_PUTPAGE(lmp); 1141 DT_PUTPAGE(rmp); 1142 goto splitOut; 1143 } 1144 1145 /* 1146 * The new key entry goes ONE AFTER the index of parent entry, 1147 * because the split was to the right. 1148 */ 1149 skip = parent->index + 1; 1150 1151 /* 1152 * compute the key for the router entry 1153 * 1154 * key suffix compression: 1155 * for internal pages that have leaf pages as children, 1156 * retain only what's needed to distinguish between 1157 * the new entry and the entry on the page to its left. 1158 * If the keys compare equal, retain the entire key. 1159 * 1160 * note that compression is performed only at computing 1161 * router key at the lowest internal level. 1162 * further compression of the key between pairs of higher 1163 * level internal pages loses too much information and 1164 * the search may fail. 1165 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,} 1166 * results in two adjacent parent entries (a)(xx). 1167 * if split occurs between these two entries, and 1168 * if compression is applied, the router key of parent entry 1169 * of right page (x) will divert search for x into right 1170 * subtree and miss x in the left subtree.) 1171 * 1172 * the entire key must be retained for the next-to-leftmost 1173 * internal key at any level of the tree, or search may fail 1174 * (e.g., ?) 1175 */ 1176 switch (rp->header.flag & BT_TYPE) { 1177 case BT_LEAF: 1178 /* 1179 * compute the length of prefix for suffix compression 1180 * between last entry of left page and first entry 1181 * of right page 1182 */ 1183 if ((sp->header.flag & BT_ROOT && skip > 1) || 1184 sp->header.prev != 0 || skip > 1) { 1185 /* compute uppercase router prefix key */ 1186 rc = ciGetLeafPrefixKey(lp, 1187 lp->header.nextindex-1, 1188 rp, 0, &key, 1189 sbi->mntflag); 1190 if (rc) { 1191 DT_PUTPAGE(lmp); 1192 DT_PUTPAGE(rmp); 1193 DT_PUTPAGE(smp); 1194 goto splitOut; 1195 } 1196 } else { 1197 /* next to leftmost entry of 1198 lowest internal level */ 1199 1200 /* compute uppercase router key */ 1201 dtGetKey(rp, 0, &key, sbi->mntflag); 1202 key.name[key.namlen] = 0; 1203 1204 if ((sbi->mntflag & JFS_OS2) == JFS_OS2) 1205 ciToUpper(&key); 1206 } 1207 1208 n = NDTINTERNAL(key.namlen); 1209 break; 1210 1211 case BT_INTERNAL: 1212 dtGetKey(rp, 0, &key, sbi->mntflag); 1213 n = NDTINTERNAL(key.namlen); 1214 break; 1215 1216 default: 1217 jfs_err("dtSplitUp(): UFO!"); 1218 break; 1219 } 1220 1221 /* unpin left child page */ 1222 DT_PUTPAGE(lmp); 1223 1224 /* 1225 * compute the data for the router entry 1226 */ 1227 data->xd = rpxd; /* child page xd */ 1228 1229 /* 1230 * parent page is full - split the parent page 1231 */ 1232 if (n > sp->header.freecnt) { 1233 /* init for parent page split */ 1234 split->mp = smp; 1235 split->index = skip; /* index at insert */ 1236 split->nslot = n; 1237 split->key = &key; 1238 /* split->data = data; */ 1239 1240 /* unpin right child page */ 1241 DT_PUTPAGE(rmp); 1242 1243 /* The split routines insert the new entry, 1244 * acquire txLock as appropriate. 1245 * return <rp> pinned and its block number <rbn>. 1246 */ 1247 rc = (sp->header.flag & BT_ROOT) ? 1248 dtSplitRoot(tid, ip, split, &rmp) : 1249 dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd); 1250 if (rc) { 1251 DT_PUTPAGE(smp); 1252 goto splitOut; 1253 } 1254 1255 /* smp and rmp are pinned */ 1256 } 1257 /* 1258 * parent page is not full - insert router entry in parent page 1259 */ 1260 else { 1261 BT_MARK_DIRTY(smp, ip); 1262 /* 1263 * acquire a transaction lock on the parent page 1264 */ 1265 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1266 dtlck = (struct dt_lock *) & tlck->lock; 1267 ASSERT(dtlck->index == 0); 1268 lv = & dtlck->lv[0]; 1269 1270 /* linelock header */ 1271 lv->offset = 0; 1272 lv->length = 1; 1273 dtlck->index++; 1274 1275 /* linelock stbl of non-root parent page */ 1276 if (!(sp->header.flag & BT_ROOT)) { 1277 lv++; 1278 n = skip >> L2DTSLOTSIZE; 1279 lv->offset = sp->header.stblindex + n; 1280 lv->length = 1281 ((sp->header.nextindex - 1282 1) >> L2DTSLOTSIZE) - n + 1; 1283 dtlck->index++; 1284 } 1285 1286 dtInsertEntry(sp, skip, &key, data, &dtlck); 1287 1288 /* exit propagate up */ 1289 break; 1290 } 1291 } 1292 1293 /* unpin current split and its right page */ 1294 DT_PUTPAGE(smp); 1295 DT_PUTPAGE(rmp); 1296 1297 /* 1298 * free remaining extents allocated for split 1299 */ 1300 splitOut: 1301 n = pxdlist.npxd; 1302 pxd = &pxdlist.pxd[n]; 1303 for (; n < pxdlist.maxnpxd; n++, pxd++) 1304 dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd)); 1305 1306 freeKeyName: 1307 kfree(key.name); 1308 1309 /* Rollback quota allocation */ 1310 if (rc && quota_allocation) 1311 DQUOT_FREE_BLOCK(ip, quota_allocation); 1312 1313 dtSplitUp_Exit: 1314 1315 return rc; 1316 } 1317 1318 1319 /* 1320 * dtSplitPage() 1321 * 1322 * function: Split a non-root page of a btree. 1323 * 1324 * parameter: 1325 * 1326 * return: 0 - success; 1327 * errno - failure; 1328 * return split and new page pinned; 1329 */ 1330 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 1331 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp) 1332 { 1333 int rc = 0; 1334 struct metapage *smp; 1335 dtpage_t *sp; 1336 struct metapage *rmp; 1337 dtpage_t *rp; /* new right page allocated */ 1338 s64 rbn; /* new right page block number */ 1339 struct metapage *mp; 1340 dtpage_t *p; 1341 s64 nextbn; 1342 struct pxdlist *pxdlist; 1343 pxd_t *pxd; 1344 int skip, nextindex, half, left, nxt, off, si; 1345 struct ldtentry *ldtentry; 1346 struct idtentry *idtentry; 1347 u8 *stbl; 1348 struct dtslot *f; 1349 int fsi, stblsize; 1350 int n; 1351 struct dt_lock *sdtlck, *rdtlck; 1352 struct tlock *tlck; 1353 struct dt_lock *dtlck; 1354 struct lv *slv, *rlv, *lv; 1355 1356 /* get split page */ 1357 smp = split->mp; 1358 sp = DT_PAGE(ip, smp); 1359 1360 /* 1361 * allocate the new right page for the split 1362 */ 1363 pxdlist = split->pxdlist; 1364 pxd = &pxdlist->pxd[pxdlist->npxd]; 1365 pxdlist->npxd++; 1366 rbn = addressPXD(pxd); 1367 rmp = get_metapage(ip, rbn, PSIZE, 1); 1368 if (rmp == NULL) 1369 return -EIO; 1370 1371 /* Allocate blocks to quota. */ 1372 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1373 release_metapage(rmp); 1374 return -EDQUOT; 1375 } 1376 1377 jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1378 1379 BT_MARK_DIRTY(rmp, ip); 1380 /* 1381 * acquire a transaction lock on the new right page 1382 */ 1383 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1384 rdtlck = (struct dt_lock *) & tlck->lock; 1385 1386 rp = (dtpage_t *) rmp->data; 1387 *rpp = rp; 1388 rp->header.self = *pxd; 1389 1390 BT_MARK_DIRTY(smp, ip); 1391 /* 1392 * acquire a transaction lock on the split page 1393 * 1394 * action: 1395 */ 1396 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1397 sdtlck = (struct dt_lock *) & tlck->lock; 1398 1399 /* linelock header of split page */ 1400 ASSERT(sdtlck->index == 0); 1401 slv = & sdtlck->lv[0]; 1402 slv->offset = 0; 1403 slv->length = 1; 1404 sdtlck->index++; 1405 1406 /* 1407 * initialize/update sibling pointers between sp and rp 1408 */ 1409 nextbn = le64_to_cpu(sp->header.next); 1410 rp->header.next = cpu_to_le64(nextbn); 1411 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1412 sp->header.next = cpu_to_le64(rbn); 1413 1414 /* 1415 * initialize new right page 1416 */ 1417 rp->header.flag = sp->header.flag; 1418 1419 /* compute sorted entry table at start of extent data area */ 1420 rp->header.nextindex = 0; 1421 rp->header.stblindex = 1; 1422 1423 n = PSIZE >> L2DTSLOTSIZE; 1424 rp->header.maxslot = n; 1425 stblsize = (n + 31) >> L2DTSLOTSIZE; /* in unit of slot */ 1426 1427 /* init freelist */ 1428 fsi = rp->header.stblindex + stblsize; 1429 rp->header.freelist = fsi; 1430 rp->header.freecnt = rp->header.maxslot - fsi; 1431 1432 /* 1433 * sequential append at tail: append without split 1434 * 1435 * If splitting the last page on a level because of appending 1436 * a entry to it (skip is maxentry), it's likely that the access is 1437 * sequential. Adding an empty page on the side of the level is less 1438 * work and can push the fill factor much higher than normal. 1439 * If we're wrong it's no big deal, we'll just do the split the right 1440 * way next time. 1441 * (It may look like it's equally easy to do a similar hack for 1442 * reverse sorted data, that is, split the tree left, 1443 * but it's not. Be my guest.) 1444 */ 1445 if (nextbn == 0 && split->index == sp->header.nextindex) { 1446 /* linelock header + stbl (first slot) of new page */ 1447 rlv = & rdtlck->lv[rdtlck->index]; 1448 rlv->offset = 0; 1449 rlv->length = 2; 1450 rdtlck->index++; 1451 1452 /* 1453 * initialize freelist of new right page 1454 */ 1455 f = &rp->slot[fsi]; 1456 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1457 f->next = fsi; 1458 f->next = -1; 1459 1460 /* insert entry at the first entry of the new right page */ 1461 dtInsertEntry(rp, 0, split->key, split->data, &rdtlck); 1462 1463 goto out; 1464 } 1465 1466 /* 1467 * non-sequential insert (at possibly middle page) 1468 */ 1469 1470 /* 1471 * update prev pointer of previous right sibling page; 1472 */ 1473 if (nextbn != 0) { 1474 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1475 if (rc) { 1476 discard_metapage(rmp); 1477 return rc; 1478 } 1479 1480 BT_MARK_DIRTY(mp, ip); 1481 /* 1482 * acquire a transaction lock on the next page 1483 */ 1484 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 1485 jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p", 1486 tlck, ip, mp); 1487 dtlck = (struct dt_lock *) & tlck->lock; 1488 1489 /* linelock header of previous right sibling page */ 1490 lv = & dtlck->lv[dtlck->index]; 1491 lv->offset = 0; 1492 lv->length = 1; 1493 dtlck->index++; 1494 1495 p->header.prev = cpu_to_le64(rbn); 1496 1497 DT_PUTPAGE(mp); 1498 } 1499 1500 /* 1501 * split the data between the split and right pages. 1502 */ 1503 skip = split->index; 1504 half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */ 1505 left = 0; 1506 1507 /* 1508 * compute fill factor for split pages 1509 * 1510 * <nxt> traces the next entry to move to rp 1511 * <off> traces the next entry to stay in sp 1512 */ 1513 stbl = (u8 *) & sp->slot[sp->header.stblindex]; 1514 nextindex = sp->header.nextindex; 1515 for (nxt = off = 0; nxt < nextindex; ++off) { 1516 if (off == skip) 1517 /* check for fill factor with new entry size */ 1518 n = split->nslot; 1519 else { 1520 si = stbl[nxt]; 1521 switch (sp->header.flag & BT_TYPE) { 1522 case BT_LEAF: 1523 ldtentry = (struct ldtentry *) & sp->slot[si]; 1524 if (DO_INDEX(ip)) 1525 n = NDTLEAF(ldtentry->namlen); 1526 else 1527 n = NDTLEAF_LEGACY(ldtentry-> 1528 namlen); 1529 break; 1530 1531 case BT_INTERNAL: 1532 idtentry = (struct idtentry *) & sp->slot[si]; 1533 n = NDTINTERNAL(idtentry->namlen); 1534 break; 1535 1536 default: 1537 break; 1538 } 1539 1540 ++nxt; /* advance to next entry to move in sp */ 1541 } 1542 1543 left += n; 1544 if (left >= half) 1545 break; 1546 } 1547 1548 /* <nxt> poins to the 1st entry to move */ 1549 1550 /* 1551 * move entries to right page 1552 * 1553 * dtMoveEntry() initializes rp and reserves entry for insertion 1554 * 1555 * split page moved out entries are linelocked; 1556 * new/right page moved in entries are linelocked; 1557 */ 1558 /* linelock header + stbl of new right page */ 1559 rlv = & rdtlck->lv[rdtlck->index]; 1560 rlv->offset = 0; 1561 rlv->length = 5; 1562 rdtlck->index++; 1563 1564 dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip)); 1565 1566 sp->header.nextindex = nxt; 1567 1568 /* 1569 * finalize freelist of new right page 1570 */ 1571 fsi = rp->header.freelist; 1572 f = &rp->slot[fsi]; 1573 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1574 f->next = fsi; 1575 f->next = -1; 1576 1577 /* 1578 * Update directory index table for entries now in right page 1579 */ 1580 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1581 s64 lblock; 1582 1583 mp = NULL; 1584 stbl = DT_GETSTBL(rp); 1585 for (n = 0; n < rp->header.nextindex; n++) { 1586 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1587 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1588 rbn, n, &mp, &lblock); 1589 } 1590 if (mp) 1591 release_metapage(mp); 1592 } 1593 1594 /* 1595 * the skipped index was on the left page, 1596 */ 1597 if (skip <= off) { 1598 /* insert the new entry in the split page */ 1599 dtInsertEntry(sp, skip, split->key, split->data, &sdtlck); 1600 1601 /* linelock stbl of split page */ 1602 if (sdtlck->index >= sdtlck->maxcnt) 1603 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 1604 slv = & sdtlck->lv[sdtlck->index]; 1605 n = skip >> L2DTSLOTSIZE; 1606 slv->offset = sp->header.stblindex + n; 1607 slv->length = 1608 ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 1609 sdtlck->index++; 1610 } 1611 /* 1612 * the skipped index was on the right page, 1613 */ 1614 else { 1615 /* adjust the skip index to reflect the new position */ 1616 skip -= nxt; 1617 1618 /* insert the new entry in the right page */ 1619 dtInsertEntry(rp, skip, split->key, split->data, &rdtlck); 1620 } 1621 1622 out: 1623 *rmpp = rmp; 1624 *rpxdp = *pxd; 1625 1626 return rc; 1627 } 1628 1629 1630 /* 1631 * dtExtendPage() 1632 * 1633 * function: extend 1st/only directory leaf page 1634 * 1635 * parameter: 1636 * 1637 * return: 0 - success; 1638 * errno - failure; 1639 * return extended page pinned; 1640 */ 1641 static int dtExtendPage(tid_t tid, 1642 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 1643 { 1644 struct super_block *sb = ip->i_sb; 1645 int rc; 1646 struct metapage *smp, *pmp, *mp; 1647 dtpage_t *sp, *pp; 1648 struct pxdlist *pxdlist; 1649 pxd_t *pxd, *tpxd; 1650 int xlen, xsize; 1651 int newstblindex, newstblsize; 1652 int oldstblindex, oldstblsize; 1653 int fsi, last; 1654 struct dtslot *f; 1655 struct btframe *parent; 1656 int n; 1657 struct dt_lock *dtlck; 1658 s64 xaddr, txaddr; 1659 struct tlock *tlck; 1660 struct pxd_lock *pxdlock; 1661 struct lv *lv; 1662 uint type; 1663 struct ldtentry *ldtentry; 1664 u8 *stbl; 1665 1666 /* get page to extend */ 1667 smp = split->mp; 1668 sp = DT_PAGE(ip, smp); 1669 1670 /* get parent/root page */ 1671 parent = BT_POP(btstack); 1672 DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc); 1673 if (rc) 1674 return (rc); 1675 1676 /* 1677 * extend the extent 1678 */ 1679 pxdlist = split->pxdlist; 1680 pxd = &pxdlist->pxd[pxdlist->npxd]; 1681 pxdlist->npxd++; 1682 1683 xaddr = addressPXD(pxd); 1684 tpxd = &sp->header.self; 1685 txaddr = addressPXD(tpxd); 1686 /* in-place extension */ 1687 if (xaddr == txaddr) { 1688 type = tlckEXTEND; 1689 } 1690 /* relocation */ 1691 else { 1692 type = tlckNEW; 1693 1694 /* save moved extent descriptor for later free */ 1695 tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE); 1696 pxdlock = (struct pxd_lock *) & tlck->lock; 1697 pxdlock->flag = mlckFREEPXD; 1698 pxdlock->pxd = sp->header.self; 1699 pxdlock->index = 1; 1700 1701 /* 1702 * Update directory index table to reflect new page address 1703 */ 1704 if (DO_INDEX(ip)) { 1705 s64 lblock; 1706 1707 mp = NULL; 1708 stbl = DT_GETSTBL(sp); 1709 for (n = 0; n < sp->header.nextindex; n++) { 1710 ldtentry = 1711 (struct ldtentry *) & sp->slot[stbl[n]]; 1712 modify_index(tid, ip, 1713 le32_to_cpu(ldtentry->index), 1714 xaddr, n, &mp, &lblock); 1715 } 1716 if (mp) 1717 release_metapage(mp); 1718 } 1719 } 1720 1721 /* 1722 * extend the page 1723 */ 1724 sp->header.self = *pxd; 1725 1726 jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp); 1727 1728 BT_MARK_DIRTY(smp, ip); 1729 /* 1730 * acquire a transaction lock on the extended/leaf page 1731 */ 1732 tlck = txLock(tid, ip, smp, tlckDTREE | type); 1733 dtlck = (struct dt_lock *) & tlck->lock; 1734 lv = & dtlck->lv[0]; 1735 1736 /* update buffer extent descriptor of extended page */ 1737 xlen = lengthPXD(pxd); 1738 xsize = xlen << JFS_SBI(sb)->l2bsize; 1739 1740 /* 1741 * copy old stbl to new stbl at start of extended area 1742 */ 1743 oldstblindex = sp->header.stblindex; 1744 oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE; 1745 newstblindex = sp->header.maxslot; 1746 n = xsize >> L2DTSLOTSIZE; 1747 newstblsize = (n + 31) >> L2DTSLOTSIZE; 1748 memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex], 1749 sp->header.nextindex); 1750 1751 /* 1752 * in-line extension: linelock old area of extended page 1753 */ 1754 if (type == tlckEXTEND) { 1755 /* linelock header */ 1756 lv->offset = 0; 1757 lv->length = 1; 1758 dtlck->index++; 1759 lv++; 1760 1761 /* linelock new stbl of extended page */ 1762 lv->offset = newstblindex; 1763 lv->length = newstblsize; 1764 } 1765 /* 1766 * relocation: linelock whole relocated area 1767 */ 1768 else { 1769 lv->offset = 0; 1770 lv->length = sp->header.maxslot + newstblsize; 1771 } 1772 1773 dtlck->index++; 1774 1775 sp->header.maxslot = n; 1776 sp->header.stblindex = newstblindex; 1777 /* sp->header.nextindex remains the same */ 1778 1779 /* 1780 * add old stbl region at head of freelist 1781 */ 1782 fsi = oldstblindex; 1783 f = &sp->slot[fsi]; 1784 last = sp->header.freelist; 1785 for (n = 0; n < oldstblsize; n++, fsi++, f++) { 1786 f->next = last; 1787 last = fsi; 1788 } 1789 sp->header.freelist = last; 1790 sp->header.freecnt += oldstblsize; 1791 1792 /* 1793 * append free region of newly extended area at tail of freelist 1794 */ 1795 /* init free region of newly extended area */ 1796 fsi = n = newstblindex + newstblsize; 1797 f = &sp->slot[fsi]; 1798 for (fsi++; fsi < sp->header.maxslot; f++, fsi++) 1799 f->next = fsi; 1800 f->next = -1; 1801 1802 /* append new free region at tail of old freelist */ 1803 fsi = sp->header.freelist; 1804 if (fsi == -1) 1805 sp->header.freelist = n; 1806 else { 1807 do { 1808 f = &sp->slot[fsi]; 1809 fsi = f->next; 1810 } while (fsi != -1); 1811 1812 f->next = n; 1813 } 1814 1815 sp->header.freecnt += sp->header.maxslot - n; 1816 1817 /* 1818 * insert the new entry 1819 */ 1820 dtInsertEntry(sp, split->index, split->key, split->data, &dtlck); 1821 1822 BT_MARK_DIRTY(pmp, ip); 1823 /* 1824 * linelock any freeslots residing in old extent 1825 */ 1826 if (type == tlckEXTEND) { 1827 n = sp->header.maxslot >> 2; 1828 if (sp->header.freelist < n) 1829 dtLinelockFreelist(sp, n, &dtlck); 1830 } 1831 1832 /* 1833 * update parent entry on the parent/root page 1834 */ 1835 /* 1836 * acquire a transaction lock on the parent/root page 1837 */ 1838 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 1839 dtlck = (struct dt_lock *) & tlck->lock; 1840 lv = & dtlck->lv[dtlck->index]; 1841 1842 /* linelock parent entry - 1st slot */ 1843 lv->offset = 1; 1844 lv->length = 1; 1845 dtlck->index++; 1846 1847 /* update the parent pxd for page extension */ 1848 tpxd = (pxd_t *) & pp->slot[1]; 1849 *tpxd = *pxd; 1850 1851 DT_PUTPAGE(pmp); 1852 return 0; 1853 } 1854 1855 1856 /* 1857 * dtSplitRoot() 1858 * 1859 * function: 1860 * split the full root page into 1861 * original/root/split page and new right page 1862 * i.e., root remains fixed in tree anchor (inode) and 1863 * the root is copied to a single new right child page 1864 * since root page << non-root page, and 1865 * the split root page contains a single entry for the 1866 * new right child page. 1867 * 1868 * parameter: 1869 * 1870 * return: 0 - success; 1871 * errno - failure; 1872 * return new page pinned; 1873 */ 1874 static int dtSplitRoot(tid_t tid, 1875 struct inode *ip, struct dtsplit * split, struct metapage ** rmpp) 1876 { 1877 struct super_block *sb = ip->i_sb; 1878 struct metapage *smp; 1879 dtroot_t *sp; 1880 struct metapage *rmp; 1881 dtpage_t *rp; 1882 s64 rbn; 1883 int xlen; 1884 int xsize; 1885 struct dtslot *f; 1886 s8 *stbl; 1887 int fsi, stblsize, n; 1888 struct idtentry *s; 1889 pxd_t *ppxd; 1890 struct pxdlist *pxdlist; 1891 pxd_t *pxd; 1892 struct dt_lock *dtlck; 1893 struct tlock *tlck; 1894 struct lv *lv; 1895 1896 /* get split root page */ 1897 smp = split->mp; 1898 sp = &JFS_IP(ip)->i_dtroot; 1899 1900 /* 1901 * allocate/initialize a single (right) child page 1902 * 1903 * N.B. at first split, a one (or two) block to fit new entry 1904 * is allocated; at subsequent split, a full page is allocated; 1905 */ 1906 pxdlist = split->pxdlist; 1907 pxd = &pxdlist->pxd[pxdlist->npxd]; 1908 pxdlist->npxd++; 1909 rbn = addressPXD(pxd); 1910 xlen = lengthPXD(pxd); 1911 xsize = xlen << JFS_SBI(sb)->l2bsize; 1912 rmp = get_metapage(ip, rbn, xsize, 1); 1913 if (!rmp) 1914 return -EIO; 1915 1916 rp = rmp->data; 1917 1918 /* Allocate blocks to quota. */ 1919 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1920 release_metapage(rmp); 1921 return -EDQUOT; 1922 } 1923 1924 BT_MARK_DIRTY(rmp, ip); 1925 /* 1926 * acquire a transaction lock on the new right page 1927 */ 1928 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1929 dtlck = (struct dt_lock *) & tlck->lock; 1930 1931 rp->header.flag = 1932 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1933 rp->header.self = *pxd; 1934 1935 /* initialize sibling pointers */ 1936 rp->header.next = 0; 1937 rp->header.prev = 0; 1938 1939 /* 1940 * move in-line root page into new right page extent 1941 */ 1942 /* linelock header + copied entries + new stbl (1st slot) in new page */ 1943 ASSERT(dtlck->index == 0); 1944 lv = & dtlck->lv[0]; 1945 lv->offset = 0; 1946 lv->length = 10; /* 1 + 8 + 1 */ 1947 dtlck->index++; 1948 1949 n = xsize >> L2DTSLOTSIZE; 1950 rp->header.maxslot = n; 1951 stblsize = (n + 31) >> L2DTSLOTSIZE; 1952 1953 /* copy old stbl to new stbl at start of extended area */ 1954 rp->header.stblindex = DTROOTMAXSLOT; 1955 stbl = (s8 *) & rp->slot[DTROOTMAXSLOT]; 1956 memcpy(stbl, sp->header.stbl, sp->header.nextindex); 1957 rp->header.nextindex = sp->header.nextindex; 1958 1959 /* copy old data area to start of new data area */ 1960 memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE); 1961 1962 /* 1963 * append free region of newly extended area at tail of freelist 1964 */ 1965 /* init free region of newly extended area */ 1966 fsi = n = DTROOTMAXSLOT + stblsize; 1967 f = &rp->slot[fsi]; 1968 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1969 f->next = fsi; 1970 f->next = -1; 1971 1972 /* append new free region at tail of old freelist */ 1973 fsi = sp->header.freelist; 1974 if (fsi == -1) 1975 rp->header.freelist = n; 1976 else { 1977 rp->header.freelist = fsi; 1978 1979 do { 1980 f = &rp->slot[fsi]; 1981 fsi = f->next; 1982 } while (fsi != -1); 1983 1984 f->next = n; 1985 } 1986 1987 rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n; 1988 1989 /* 1990 * Update directory index table for entries now in right page 1991 */ 1992 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1993 s64 lblock; 1994 struct metapage *mp = NULL; 1995 struct ldtentry *ldtentry; 1996 1997 stbl = DT_GETSTBL(rp); 1998 for (n = 0; n < rp->header.nextindex; n++) { 1999 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 2000 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 2001 rbn, n, &mp, &lblock); 2002 } 2003 if (mp) 2004 release_metapage(mp); 2005 } 2006 /* 2007 * insert the new entry into the new right/child page 2008 * (skip index in the new right page will not change) 2009 */ 2010 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); 2011 2012 /* 2013 * reset parent/root page 2014 * 2015 * set the 1st entry offset to 0, which force the left-most key 2016 * at any level of the tree to be less than any search key. 2017 * 2018 * The btree comparison code guarantees that the left-most key on any 2019 * level of the tree is never used, so it doesn't need to be filled in. 2020 */ 2021 BT_MARK_DIRTY(smp, ip); 2022 /* 2023 * acquire a transaction lock on the root page (in-memory inode) 2024 */ 2025 tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT); 2026 dtlck = (struct dt_lock *) & tlck->lock; 2027 2028 /* linelock root */ 2029 ASSERT(dtlck->index == 0); 2030 lv = & dtlck->lv[0]; 2031 lv->offset = 0; 2032 lv->length = DTROOTMAXSLOT; 2033 dtlck->index++; 2034 2035 /* update page header of root */ 2036 if (sp->header.flag & BT_LEAF) { 2037 sp->header.flag &= ~BT_LEAF; 2038 sp->header.flag |= BT_INTERNAL; 2039 } 2040 2041 /* init the first entry */ 2042 s = (struct idtentry *) & sp->slot[DTENTRYSTART]; 2043 ppxd = (pxd_t *) s; 2044 *ppxd = *pxd; 2045 s->next = -1; 2046 s->namlen = 0; 2047 2048 stbl = sp->header.stbl; 2049 stbl[0] = DTENTRYSTART; 2050 sp->header.nextindex = 1; 2051 2052 /* init freelist */ 2053 fsi = DTENTRYSTART + 1; 2054 f = &sp->slot[fsi]; 2055 2056 /* init free region of remaining area */ 2057 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2058 f->next = fsi; 2059 f->next = -1; 2060 2061 sp->header.freelist = DTENTRYSTART + 1; 2062 sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1); 2063 2064 *rmpp = rmp; 2065 2066 return 0; 2067 } 2068 2069 2070 /* 2071 * dtDelete() 2072 * 2073 * function: delete the entry(s) referenced by a key. 2074 * 2075 * parameter: 2076 * 2077 * return: 2078 */ 2079 int dtDelete(tid_t tid, 2080 struct inode *ip, struct component_name * key, ino_t * ino, int flag) 2081 { 2082 int rc = 0; 2083 s64 bn; 2084 struct metapage *mp, *imp; 2085 dtpage_t *p; 2086 int index; 2087 struct btstack btstack; 2088 struct dt_lock *dtlck; 2089 struct tlock *tlck; 2090 struct lv *lv; 2091 int i; 2092 struct ldtentry *ldtentry; 2093 u8 *stbl; 2094 u32 table_index, next_index; 2095 struct metapage *nmp; 2096 dtpage_t *np; 2097 2098 /* 2099 * search for the entry to delete: 2100 * 2101 * dtSearch() returns (leaf page pinned, index at which to delete). 2102 */ 2103 if ((rc = dtSearch(ip, key, ino, &btstack, flag))) 2104 return rc; 2105 2106 /* retrieve search result */ 2107 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2108 2109 /* 2110 * We need to find put the index of the next entry into the 2111 * directory index table in order to resume a readdir from this 2112 * entry. 2113 */ 2114 if (DO_INDEX(ip)) { 2115 stbl = DT_GETSTBL(p); 2116 ldtentry = (struct ldtentry *) & p->slot[stbl[index]]; 2117 table_index = le32_to_cpu(ldtentry->index); 2118 if (index == (p->header.nextindex - 1)) { 2119 /* 2120 * Last entry in this leaf page 2121 */ 2122 if ((p->header.flag & BT_ROOT) 2123 || (p->header.next == 0)) 2124 next_index = -1; 2125 else { 2126 /* Read next leaf page */ 2127 DT_GETPAGE(ip, le64_to_cpu(p->header.next), 2128 nmp, PSIZE, np, rc); 2129 if (rc) 2130 next_index = -1; 2131 else { 2132 stbl = DT_GETSTBL(np); 2133 ldtentry = 2134 (struct ldtentry *) & np-> 2135 slot[stbl[0]]; 2136 next_index = 2137 le32_to_cpu(ldtentry->index); 2138 DT_PUTPAGE(nmp); 2139 } 2140 } 2141 } else { 2142 ldtentry = 2143 (struct ldtentry *) & p->slot[stbl[index + 1]]; 2144 next_index = le32_to_cpu(ldtentry->index); 2145 } 2146 free_index(tid, ip, table_index, next_index); 2147 } 2148 /* 2149 * the leaf page becomes empty, delete the page 2150 */ 2151 if (p->header.nextindex == 1) { 2152 /* delete empty page */ 2153 rc = dtDeleteUp(tid, ip, mp, p, &btstack); 2154 } 2155 /* 2156 * the leaf page has other entries remaining: 2157 * 2158 * delete the entry from the leaf page. 2159 */ 2160 else { 2161 BT_MARK_DIRTY(mp, ip); 2162 /* 2163 * acquire a transaction lock on the leaf page 2164 */ 2165 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2166 dtlck = (struct dt_lock *) & tlck->lock; 2167 2168 /* 2169 * Do not assume that dtlck->index will be zero. During a 2170 * rename within a directory, this transaction may have 2171 * modified this page already when adding the new entry. 2172 */ 2173 2174 /* linelock header */ 2175 if (dtlck->index >= dtlck->maxcnt) 2176 dtlck = (struct dt_lock *) txLinelock(dtlck); 2177 lv = & dtlck->lv[dtlck->index]; 2178 lv->offset = 0; 2179 lv->length = 1; 2180 dtlck->index++; 2181 2182 /* linelock stbl of non-root leaf page */ 2183 if (!(p->header.flag & BT_ROOT)) { 2184 if (dtlck->index >= dtlck->maxcnt) 2185 dtlck = (struct dt_lock *) txLinelock(dtlck); 2186 lv = & dtlck->lv[dtlck->index]; 2187 i = index >> L2DTSLOTSIZE; 2188 lv->offset = p->header.stblindex + i; 2189 lv->length = 2190 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2191 i + 1; 2192 dtlck->index++; 2193 } 2194 2195 /* free the leaf entry */ 2196 dtDeleteEntry(p, index, &dtlck); 2197 2198 /* 2199 * Update directory index table for entries moved in stbl 2200 */ 2201 if (DO_INDEX(ip) && index < p->header.nextindex) { 2202 s64 lblock; 2203 2204 imp = NULL; 2205 stbl = DT_GETSTBL(p); 2206 for (i = index; i < p->header.nextindex; i++) { 2207 ldtentry = 2208 (struct ldtentry *) & p->slot[stbl[i]]; 2209 modify_index(tid, ip, 2210 le32_to_cpu(ldtentry->index), 2211 bn, i, &imp, &lblock); 2212 } 2213 if (imp) 2214 release_metapage(imp); 2215 } 2216 2217 DT_PUTPAGE(mp); 2218 } 2219 2220 return rc; 2221 } 2222 2223 2224 /* 2225 * dtDeleteUp() 2226 * 2227 * function: 2228 * free empty pages as propagating deletion up the tree 2229 * 2230 * parameter: 2231 * 2232 * return: 2233 */ 2234 static int dtDeleteUp(tid_t tid, struct inode *ip, 2235 struct metapage * fmp, dtpage_t * fp, struct btstack * btstack) 2236 { 2237 int rc = 0; 2238 struct metapage *mp; 2239 dtpage_t *p; 2240 int index, nextindex; 2241 int xlen; 2242 struct btframe *parent; 2243 struct dt_lock *dtlck; 2244 struct tlock *tlck; 2245 struct lv *lv; 2246 struct pxd_lock *pxdlock; 2247 int i; 2248 2249 /* 2250 * keep the root leaf page which has become empty 2251 */ 2252 if (BT_IS_ROOT(fmp)) { 2253 /* 2254 * reset the root 2255 * 2256 * dtInitRoot() acquires txlock on the root 2257 */ 2258 dtInitRoot(tid, ip, PARENT(ip)); 2259 2260 DT_PUTPAGE(fmp); 2261 2262 return 0; 2263 } 2264 2265 /* 2266 * free the non-root leaf page 2267 */ 2268 /* 2269 * acquire a transaction lock on the page 2270 * 2271 * write FREEXTENT|NOREDOPAGE log record 2272 * N.B. linelock is overlaid as freed extent descriptor, and 2273 * the buffer page is freed; 2274 */ 2275 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2276 pxdlock = (struct pxd_lock *) & tlck->lock; 2277 pxdlock->flag = mlckFREEPXD; 2278 pxdlock->pxd = fp->header.self; 2279 pxdlock->index = 1; 2280 2281 /* update sibling pointers */ 2282 if ((rc = dtRelink(tid, ip, fp))) { 2283 BT_PUTPAGE(fmp); 2284 return rc; 2285 } 2286 2287 xlen = lengthPXD(&fp->header.self); 2288 2289 /* Free quota allocation. */ 2290 DQUOT_FREE_BLOCK(ip, xlen); 2291 2292 /* free/invalidate its buffer page */ 2293 discard_metapage(fmp); 2294 2295 /* 2296 * propagate page deletion up the directory tree 2297 * 2298 * If the delete from the parent page makes it empty, 2299 * continue all the way up the tree. 2300 * stop if the root page is reached (which is never deleted) or 2301 * if the entry deletion does not empty the page. 2302 */ 2303 while ((parent = BT_POP(btstack)) != NULL) { 2304 /* pin the parent page <sp> */ 2305 DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2306 if (rc) 2307 return rc; 2308 2309 /* 2310 * free the extent of the child page deleted 2311 */ 2312 index = parent->index; 2313 2314 /* 2315 * delete the entry for the child page from parent 2316 */ 2317 nextindex = p->header.nextindex; 2318 2319 /* 2320 * the parent has the single entry being deleted: 2321 * 2322 * free the parent page which has become empty. 2323 */ 2324 if (nextindex == 1) { 2325 /* 2326 * keep the root internal page which has become empty 2327 */ 2328 if (p->header.flag & BT_ROOT) { 2329 /* 2330 * reset the root 2331 * 2332 * dtInitRoot() acquires txlock on the root 2333 */ 2334 dtInitRoot(tid, ip, PARENT(ip)); 2335 2336 DT_PUTPAGE(mp); 2337 2338 return 0; 2339 } 2340 /* 2341 * free the parent page 2342 */ 2343 else { 2344 /* 2345 * acquire a transaction lock on the page 2346 * 2347 * write FREEXTENT|NOREDOPAGE log record 2348 */ 2349 tlck = 2350 txMaplock(tid, ip, 2351 tlckDTREE | tlckFREE); 2352 pxdlock = (struct pxd_lock *) & tlck->lock; 2353 pxdlock->flag = mlckFREEPXD; 2354 pxdlock->pxd = p->header.self; 2355 pxdlock->index = 1; 2356 2357 /* update sibling pointers */ 2358 if ((rc = dtRelink(tid, ip, p))) { 2359 DT_PUTPAGE(mp); 2360 return rc; 2361 } 2362 2363 xlen = lengthPXD(&p->header.self); 2364 2365 /* Free quota allocation */ 2366 DQUOT_FREE_BLOCK(ip, xlen); 2367 2368 /* free/invalidate its buffer page */ 2369 discard_metapage(mp); 2370 2371 /* propagate up */ 2372 continue; 2373 } 2374 } 2375 2376 /* 2377 * the parent has other entries remaining: 2378 * 2379 * delete the router entry from the parent page. 2380 */ 2381 BT_MARK_DIRTY(mp, ip); 2382 /* 2383 * acquire a transaction lock on the page 2384 * 2385 * action: router entry deletion 2386 */ 2387 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2388 dtlck = (struct dt_lock *) & tlck->lock; 2389 2390 /* linelock header */ 2391 if (dtlck->index >= dtlck->maxcnt) 2392 dtlck = (struct dt_lock *) txLinelock(dtlck); 2393 lv = & dtlck->lv[dtlck->index]; 2394 lv->offset = 0; 2395 lv->length = 1; 2396 dtlck->index++; 2397 2398 /* linelock stbl of non-root leaf page */ 2399 if (!(p->header.flag & BT_ROOT)) { 2400 if (dtlck->index < dtlck->maxcnt) 2401 lv++; 2402 else { 2403 dtlck = (struct dt_lock *) txLinelock(dtlck); 2404 lv = & dtlck->lv[0]; 2405 } 2406 i = index >> L2DTSLOTSIZE; 2407 lv->offset = p->header.stblindex + i; 2408 lv->length = 2409 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2410 i + 1; 2411 dtlck->index++; 2412 } 2413 2414 /* free the router entry */ 2415 dtDeleteEntry(p, index, &dtlck); 2416 2417 /* reset key of new leftmost entry of level (for consistency) */ 2418 if (index == 0 && 2419 ((p->header.flag & BT_ROOT) || p->header.prev == 0)) 2420 dtTruncateEntry(p, 0, &dtlck); 2421 2422 /* unpin the parent page */ 2423 DT_PUTPAGE(mp); 2424 2425 /* exit propagation up */ 2426 break; 2427 } 2428 2429 if (!DO_INDEX(ip)) 2430 ip->i_size -= PSIZE; 2431 2432 return 0; 2433 } 2434 2435 #ifdef _NOTYET 2436 /* 2437 * NAME: dtRelocate() 2438 * 2439 * FUNCTION: relocate dtpage (internal or leaf) of directory; 2440 * This function is mainly used by defragfs utility. 2441 */ 2442 int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, 2443 s64 nxaddr) 2444 { 2445 int rc = 0; 2446 struct metapage *mp, *pmp, *lmp, *rmp; 2447 dtpage_t *p, *pp, *rp = 0, *lp= 0; 2448 s64 bn; 2449 int index; 2450 struct btstack btstack; 2451 pxd_t *pxd; 2452 s64 oxaddr, nextbn, prevbn; 2453 int xlen, xsize; 2454 struct tlock *tlck; 2455 struct dt_lock *dtlck; 2456 struct pxd_lock *pxdlock; 2457 s8 *stbl; 2458 struct lv *lv; 2459 2460 oxaddr = addressPXD(opxd); 2461 xlen = lengthPXD(opxd); 2462 2463 jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d", 2464 (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr, 2465 xlen); 2466 2467 /* 2468 * 1. get the internal parent dtpage covering 2469 * router entry for the tartget page to be relocated; 2470 */ 2471 rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); 2472 if (rc) 2473 return rc; 2474 2475 /* retrieve search result */ 2476 DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2477 jfs_info("dtRelocate: parent router entry validated."); 2478 2479 /* 2480 * 2. relocate the target dtpage 2481 */ 2482 /* read in the target page from src extent */ 2483 DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2484 if (rc) { 2485 /* release the pinned parent page */ 2486 DT_PUTPAGE(pmp); 2487 return rc; 2488 } 2489 2490 /* 2491 * read in sibling pages if any to update sibling pointers; 2492 */ 2493 rmp = NULL; 2494 if (p->header.next) { 2495 nextbn = le64_to_cpu(p->header.next); 2496 DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2497 if (rc) { 2498 DT_PUTPAGE(mp); 2499 DT_PUTPAGE(pmp); 2500 return (rc); 2501 } 2502 } 2503 2504 lmp = NULL; 2505 if (p->header.prev) { 2506 prevbn = le64_to_cpu(p->header.prev); 2507 DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 2508 if (rc) { 2509 DT_PUTPAGE(mp); 2510 DT_PUTPAGE(pmp); 2511 if (rmp) 2512 DT_PUTPAGE(rmp); 2513 return (rc); 2514 } 2515 } 2516 2517 /* at this point, all xtpages to be updated are in memory */ 2518 2519 /* 2520 * update sibling pointers of sibling dtpages if any; 2521 */ 2522 if (lmp) { 2523 tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK); 2524 dtlck = (struct dt_lock *) & tlck->lock; 2525 /* linelock header */ 2526 ASSERT(dtlck->index == 0); 2527 lv = & dtlck->lv[0]; 2528 lv->offset = 0; 2529 lv->length = 1; 2530 dtlck->index++; 2531 2532 lp->header.next = cpu_to_le64(nxaddr); 2533 DT_PUTPAGE(lmp); 2534 } 2535 2536 if (rmp) { 2537 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK); 2538 dtlck = (struct dt_lock *) & tlck->lock; 2539 /* linelock header */ 2540 ASSERT(dtlck->index == 0); 2541 lv = & dtlck->lv[0]; 2542 lv->offset = 0; 2543 lv->length = 1; 2544 dtlck->index++; 2545 2546 rp->header.prev = cpu_to_le64(nxaddr); 2547 DT_PUTPAGE(rmp); 2548 } 2549 2550 /* 2551 * update the target dtpage to be relocated 2552 * 2553 * write LOG_REDOPAGE of LOG_NEW type for dst page 2554 * for the whole target page (logredo() will apply 2555 * after image and update bmap for allocation of the 2556 * dst extent), and update bmap for allocation of 2557 * the dst extent; 2558 */ 2559 tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW); 2560 dtlck = (struct dt_lock *) & tlck->lock; 2561 /* linelock header */ 2562 ASSERT(dtlck->index == 0); 2563 lv = & dtlck->lv[0]; 2564 2565 /* update the self address in the dtpage header */ 2566 pxd = &p->header.self; 2567 PXDaddress(pxd, nxaddr); 2568 2569 /* the dst page is the same as the src page, i.e., 2570 * linelock for afterimage of the whole page; 2571 */ 2572 lv->offset = 0; 2573 lv->length = p->header.maxslot; 2574 dtlck->index++; 2575 2576 /* update the buffer extent descriptor of the dtpage */ 2577 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2578 2579 /* unpin the relocated page */ 2580 DT_PUTPAGE(mp); 2581 jfs_info("dtRelocate: target dtpage relocated."); 2582 2583 /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec 2584 * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec 2585 * will also force a bmap update ). 2586 */ 2587 2588 /* 2589 * 3. acquire maplock for the source extent to be freed; 2590 */ 2591 /* for dtpage relocation, write a LOG_NOREDOPAGE record 2592 * for the source dtpage (logredo() will init NoRedoPage 2593 * filter and will also update bmap for free of the source 2594 * dtpage), and upadte bmap for free of the source dtpage; 2595 */ 2596 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2597 pxdlock = (struct pxd_lock *) & tlck->lock; 2598 pxdlock->flag = mlckFREEPXD; 2599 PXDaddress(&pxdlock->pxd, oxaddr); 2600 PXDlength(&pxdlock->pxd, xlen); 2601 pxdlock->index = 1; 2602 2603 /* 2604 * 4. update the parent router entry for relocation; 2605 * 2606 * acquire tlck for the parent entry covering the target dtpage; 2607 * write LOG_REDOPAGE to apply after image only; 2608 */ 2609 jfs_info("dtRelocate: update parent router entry."); 2610 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 2611 dtlck = (struct dt_lock *) & tlck->lock; 2612 lv = & dtlck->lv[dtlck->index]; 2613 2614 /* update the PXD with the new address */ 2615 stbl = DT_GETSTBL(pp); 2616 pxd = (pxd_t *) & pp->slot[stbl[index]]; 2617 PXDaddress(pxd, nxaddr); 2618 lv->offset = stbl[index]; 2619 lv->length = 1; 2620 dtlck->index++; 2621 2622 /* unpin the parent dtpage */ 2623 DT_PUTPAGE(pmp); 2624 2625 return rc; 2626 } 2627 2628 /* 2629 * NAME: dtSearchNode() 2630 * 2631 * FUNCTION: Search for an dtpage containing a specified address 2632 * This function is mainly used by defragfs utility. 2633 * 2634 * NOTE: Search result on stack, the found page is pinned at exit. 2635 * The result page must be an internal dtpage. 2636 * lmxaddr give the address of the left most page of the 2637 * dtree level, in which the required dtpage resides. 2638 */ 2639 static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, 2640 struct btstack * btstack) 2641 { 2642 int rc = 0; 2643 s64 bn; 2644 struct metapage *mp; 2645 dtpage_t *p; 2646 int psize = 288; /* initial in-line directory */ 2647 s8 *stbl; 2648 int i; 2649 pxd_t *pxd; 2650 struct btframe *btsp; 2651 2652 BT_CLR(btstack); /* reset stack */ 2653 2654 /* 2655 * descend tree to the level with specified leftmost page 2656 * 2657 * by convention, root bn = 0. 2658 */ 2659 for (bn = 0;;) { 2660 /* get/pin the page to search */ 2661 DT_GETPAGE(ip, bn, mp, psize, p, rc); 2662 if (rc) 2663 return rc; 2664 2665 /* does the xaddr of leftmost page of the levevl 2666 * matches levevl search key ? 2667 */ 2668 if (p->header.flag & BT_ROOT) { 2669 if (lmxaddr == 0) 2670 break; 2671 } else if (addressPXD(&p->header.self) == lmxaddr) 2672 break; 2673 2674 /* 2675 * descend down to leftmost child page 2676 */ 2677 if (p->header.flag & BT_LEAF) { 2678 DT_PUTPAGE(mp); 2679 return -ESTALE; 2680 } 2681 2682 /* get the leftmost entry */ 2683 stbl = DT_GETSTBL(p); 2684 pxd = (pxd_t *) & p->slot[stbl[0]]; 2685 2686 /* get the child page block address */ 2687 bn = addressPXD(pxd); 2688 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 2689 /* unpin the parent page */ 2690 DT_PUTPAGE(mp); 2691 } 2692 2693 /* 2694 * search each page at the current levevl 2695 */ 2696 loop: 2697 stbl = DT_GETSTBL(p); 2698 for (i = 0; i < p->header.nextindex; i++) { 2699 pxd = (pxd_t *) & p->slot[stbl[i]]; 2700 2701 /* found the specified router entry */ 2702 if (addressPXD(pxd) == addressPXD(kpxd) && 2703 lengthPXD(pxd) == lengthPXD(kpxd)) { 2704 btsp = btstack->top; 2705 btsp->bn = bn; 2706 btsp->index = i; 2707 btsp->mp = mp; 2708 2709 return 0; 2710 } 2711 } 2712 2713 /* get the right sibling page if any */ 2714 if (p->header.next) 2715 bn = le64_to_cpu(p->header.next); 2716 else { 2717 DT_PUTPAGE(mp); 2718 return -ESTALE; 2719 } 2720 2721 /* unpin current page */ 2722 DT_PUTPAGE(mp); 2723 2724 /* get the right sibling page */ 2725 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2726 if (rc) 2727 return rc; 2728 2729 goto loop; 2730 } 2731 #endif /* _NOTYET */ 2732 2733 /* 2734 * dtRelink() 2735 * 2736 * function: 2737 * link around a freed page. 2738 * 2739 * parameter: 2740 * fp: page to be freed 2741 * 2742 * return: 2743 */ 2744 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p) 2745 { 2746 int rc; 2747 struct metapage *mp; 2748 s64 nextbn, prevbn; 2749 struct tlock *tlck; 2750 struct dt_lock *dtlck; 2751 struct lv *lv; 2752 2753 nextbn = le64_to_cpu(p->header.next); 2754 prevbn = le64_to_cpu(p->header.prev); 2755 2756 /* update prev pointer of the next page */ 2757 if (nextbn != 0) { 2758 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 2759 if (rc) 2760 return rc; 2761 2762 BT_MARK_DIRTY(mp, ip); 2763 /* 2764 * acquire a transaction lock on the next page 2765 * 2766 * action: update prev pointer; 2767 */ 2768 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2769 jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2770 tlck, ip, mp); 2771 dtlck = (struct dt_lock *) & tlck->lock; 2772 2773 /* linelock header */ 2774 if (dtlck->index >= dtlck->maxcnt) 2775 dtlck = (struct dt_lock *) txLinelock(dtlck); 2776 lv = & dtlck->lv[dtlck->index]; 2777 lv->offset = 0; 2778 lv->length = 1; 2779 dtlck->index++; 2780 2781 p->header.prev = cpu_to_le64(prevbn); 2782 DT_PUTPAGE(mp); 2783 } 2784 2785 /* update next pointer of the previous page */ 2786 if (prevbn != 0) { 2787 DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 2788 if (rc) 2789 return rc; 2790 2791 BT_MARK_DIRTY(mp, ip); 2792 /* 2793 * acquire a transaction lock on the prev page 2794 * 2795 * action: update next pointer; 2796 */ 2797 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2798 jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2799 tlck, ip, mp); 2800 dtlck = (struct dt_lock *) & tlck->lock; 2801 2802 /* linelock header */ 2803 if (dtlck->index >= dtlck->maxcnt) 2804 dtlck = (struct dt_lock *) txLinelock(dtlck); 2805 lv = & dtlck->lv[dtlck->index]; 2806 lv->offset = 0; 2807 lv->length = 1; 2808 dtlck->index++; 2809 2810 p->header.next = cpu_to_le64(nextbn); 2811 DT_PUTPAGE(mp); 2812 } 2813 2814 return 0; 2815 } 2816 2817 2818 /* 2819 * dtInitRoot() 2820 * 2821 * initialize directory root (inline in inode) 2822 */ 2823 void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot) 2824 { 2825 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2826 dtroot_t *p; 2827 int fsi; 2828 struct dtslot *f; 2829 struct tlock *tlck; 2830 struct dt_lock *dtlck; 2831 struct lv *lv; 2832 u16 xflag_save; 2833 2834 /* 2835 * If this was previously an non-empty directory, we need to remove 2836 * the old directory table. 2837 */ 2838 if (DO_INDEX(ip)) { 2839 if (!jfs_dirtable_inline(ip)) { 2840 struct tblock *tblk = tid_to_tblock(tid); 2841 /* 2842 * We're playing games with the tid's xflag. If 2843 * we're removing a regular file, the file's xtree 2844 * is committed with COMMIT_PMAP, but we always 2845 * commit the directories xtree with COMMIT_PWMAP. 2846 */ 2847 xflag_save = tblk->xflag; 2848 tblk->xflag = 0; 2849 /* 2850 * xtTruncate isn't guaranteed to fully truncate 2851 * the xtree. The caller needs to check i_size 2852 * after committing the transaction to see if 2853 * additional truncation is needed. The 2854 * COMMIT_Stale flag tells caller that we 2855 * initiated the truncation. 2856 */ 2857 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 2858 set_cflag(COMMIT_Stale, ip); 2859 2860 tblk->xflag = xflag_save; 2861 } else 2862 ip->i_size = 1; 2863 2864 jfs_ip->next_index = 2; 2865 } else 2866 ip->i_size = IDATASIZE; 2867 2868 /* 2869 * acquire a transaction lock on the root 2870 * 2871 * action: directory initialization; 2872 */ 2873 tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag, 2874 tlckDTREE | tlckENTRY | tlckBTROOT); 2875 dtlck = (struct dt_lock *) & tlck->lock; 2876 2877 /* linelock root */ 2878 ASSERT(dtlck->index == 0); 2879 lv = & dtlck->lv[0]; 2880 lv->offset = 0; 2881 lv->length = DTROOTMAXSLOT; 2882 dtlck->index++; 2883 2884 p = &jfs_ip->i_dtroot; 2885 2886 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 2887 2888 p->header.nextindex = 0; 2889 2890 /* init freelist */ 2891 fsi = 1; 2892 f = &p->slot[fsi]; 2893 2894 /* init data area of root */ 2895 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2896 f->next = fsi; 2897 f->next = -1; 2898 2899 p->header.freelist = 1; 2900 p->header.freecnt = 8; 2901 2902 /* init '..' entry */ 2903 p->header.idotdot = cpu_to_le32(idotdot); 2904 2905 return; 2906 } 2907 2908 /* 2909 * add_missing_indices() 2910 * 2911 * function: Fix dtree page in which one or more entries has an invalid index. 2912 * fsck.jfs should really fix this, but it currently does not. 2913 * Called from jfs_readdir when bad index is detected. 2914 */ 2915 static void add_missing_indices(struct inode *inode, s64 bn) 2916 { 2917 struct ldtentry *d; 2918 struct dt_lock *dtlck; 2919 int i; 2920 uint index; 2921 struct lv *lv; 2922 struct metapage *mp; 2923 dtpage_t *p; 2924 int rc; 2925 s8 *stbl; 2926 tid_t tid; 2927 struct tlock *tlck; 2928 2929 tid = txBegin(inode->i_sb, 0); 2930 2931 DT_GETPAGE(inode, bn, mp, PSIZE, p, rc); 2932 2933 if (rc) { 2934 printk(KERN_ERR "DT_GETPAGE failed!\n"); 2935 goto end; 2936 } 2937 BT_MARK_DIRTY(mp, inode); 2938 2939 ASSERT(p->header.flag & BT_LEAF); 2940 2941 tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY); 2942 if (BT_IS_ROOT(mp)) 2943 tlck->type |= tlckBTROOT; 2944 2945 dtlck = (struct dt_lock *) &tlck->lock; 2946 2947 stbl = DT_GETSTBL(p); 2948 for (i = 0; i < p->header.nextindex; i++) { 2949 d = (struct ldtentry *) &p->slot[stbl[i]]; 2950 index = le32_to_cpu(d->index); 2951 if ((index < 2) || (index >= JFS_IP(inode)->next_index)) { 2952 d->index = cpu_to_le32(add_index(tid, inode, bn, i)); 2953 if (dtlck->index >= dtlck->maxcnt) 2954 dtlck = (struct dt_lock *) txLinelock(dtlck); 2955 lv = &dtlck->lv[dtlck->index]; 2956 lv->offset = stbl[i]; 2957 lv->length = 1; 2958 dtlck->index++; 2959 } 2960 } 2961 2962 DT_PUTPAGE(mp); 2963 (void) txCommit(tid, 1, &inode, 0); 2964 end: 2965 txEnd(tid); 2966 } 2967 2968 /* 2969 * Buffer to hold directory entry info while traversing a dtree page 2970 * before being fed to the filldir function 2971 */ 2972 struct jfs_dirent { 2973 loff_t position; 2974 int ino; 2975 u16 name_len; 2976 char name[0]; 2977 }; 2978 2979 /* 2980 * function to determine next variable-sized jfs_dirent in buffer 2981 */ 2982 static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) 2983 { 2984 return (struct jfs_dirent *) 2985 ((char *)dirent + 2986 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 + 2987 sizeof (loff_t) - 1) & 2988 ~(sizeof (loff_t) - 1))); 2989 } 2990 2991 /* 2992 * jfs_readdir() 2993 * 2994 * function: read directory entries sequentially 2995 * from the specified entry offset 2996 * 2997 * parameter: 2998 * 2999 * return: offset = (pn, index) of start entry 3000 * of next jfs_readdir()/dtRead() 3001 */ 3002 int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 3003 { 3004 struct inode *ip = filp->f_path.dentry->d_inode; 3005 struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; 3006 int rc = 0; 3007 loff_t dtpos; /* legacy OS/2 style position */ 3008 struct dtoffset { 3009 s16 pn; 3010 s16 index; 3011 s32 unused; 3012 } *dtoffset = (struct dtoffset *) &dtpos; 3013 s64 bn; 3014 struct metapage *mp; 3015 dtpage_t *p; 3016 int index; 3017 s8 *stbl; 3018 struct btstack btstack; 3019 int i, next; 3020 struct ldtentry *d; 3021 struct dtslot *t; 3022 int d_namleft, len, outlen; 3023 unsigned long dirent_buf; 3024 char *name_ptr; 3025 u32 dir_index; 3026 int do_index = 0; 3027 uint loop_count = 0; 3028 struct jfs_dirent *jfs_dirent; 3029 int jfs_dirents; 3030 int overflow, fix_page, page_fixed = 0; 3031 static int unique_pos = 2; /* If we can't fix broken index */ 3032 3033 if (filp->f_pos == DIREND) 3034 return 0; 3035 3036 if (DO_INDEX(ip)) { 3037 /* 3038 * persistent index is stored in directory entries. 3039 * Special cases: 0 = . 3040 * 1 = .. 3041 * -1 = End of directory 3042 */ 3043 do_index = 1; 3044 3045 dir_index = (u32) filp->f_pos; 3046 3047 if (dir_index > 1) { 3048 struct dir_table_slot dirtab_slot; 3049 3050 if (dtEmpty(ip) || 3051 (dir_index >= JFS_IP(ip)->next_index)) { 3052 /* Stale position. Directory has shrunk */ 3053 filp->f_pos = DIREND; 3054 return 0; 3055 } 3056 repeat: 3057 rc = read_index(ip, dir_index, &dirtab_slot); 3058 if (rc) { 3059 filp->f_pos = DIREND; 3060 return rc; 3061 } 3062 if (dirtab_slot.flag == DIR_INDEX_FREE) { 3063 if (loop_count++ > JFS_IP(ip)->next_index) { 3064 jfs_err("jfs_readdir detected " 3065 "infinite loop!"); 3066 filp->f_pos = DIREND; 3067 return 0; 3068 } 3069 dir_index = le32_to_cpu(dirtab_slot.addr2); 3070 if (dir_index == -1) { 3071 filp->f_pos = DIREND; 3072 return 0; 3073 } 3074 goto repeat; 3075 } 3076 bn = addressDTS(&dirtab_slot); 3077 index = dirtab_slot.slot; 3078 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3079 if (rc) { 3080 filp->f_pos = DIREND; 3081 return 0; 3082 } 3083 if (p->header.flag & BT_INTERNAL) { 3084 jfs_err("jfs_readdir: bad index table"); 3085 DT_PUTPAGE(mp); 3086 filp->f_pos = -1; 3087 return 0; 3088 } 3089 } else { 3090 if (dir_index == 0) { 3091 /* 3092 * self "." 3093 */ 3094 filp->f_pos = 0; 3095 if (filldir(dirent, ".", 1, 0, ip->i_ino, 3096 DT_DIR)) 3097 return 0; 3098 } 3099 /* 3100 * parent ".." 3101 */ 3102 filp->f_pos = 1; 3103 if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) 3104 return 0; 3105 3106 /* 3107 * Find first entry of left-most leaf 3108 */ 3109 if (dtEmpty(ip)) { 3110 filp->f_pos = DIREND; 3111 return 0; 3112 } 3113 3114 if ((rc = dtReadFirst(ip, &btstack))) 3115 return rc; 3116 3117 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3118 } 3119 } else { 3120 /* 3121 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 3122 * 3123 * pn = index = 0: First entry "." 3124 * pn = 0; index = 1: Second entry ".." 3125 * pn > 0: Real entries, pn=1 -> leftmost page 3126 * pn = index = -1: No more entries 3127 */ 3128 dtpos = filp->f_pos; 3129 if (dtpos == 0) { 3130 /* build "." entry */ 3131 3132 if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, 3133 DT_DIR)) 3134 return 0; 3135 dtoffset->index = 1; 3136 filp->f_pos = dtpos; 3137 } 3138 3139 if (dtoffset->pn == 0) { 3140 if (dtoffset->index == 1) { 3141 /* build ".." entry */ 3142 3143 if (filldir(dirent, "..", 2, filp->f_pos, 3144 PARENT(ip), DT_DIR)) 3145 return 0; 3146 } else { 3147 jfs_err("jfs_readdir called with " 3148 "invalid offset!"); 3149 } 3150 dtoffset->pn = 1; 3151 dtoffset->index = 0; 3152 filp->f_pos = dtpos; 3153 } 3154 3155 if (dtEmpty(ip)) { 3156 filp->f_pos = DIREND; 3157 return 0; 3158 } 3159 3160 if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) { 3161 jfs_err("jfs_readdir: unexpected rc = %d " 3162 "from dtReadNext", rc); 3163 filp->f_pos = DIREND; 3164 return 0; 3165 } 3166 /* get start leaf page and index */ 3167 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3168 3169 /* offset beyond directory eof ? */ 3170 if (bn < 0) { 3171 filp->f_pos = DIREND; 3172 return 0; 3173 } 3174 } 3175 3176 dirent_buf = __get_free_page(GFP_KERNEL); 3177 if (dirent_buf == 0) { 3178 DT_PUTPAGE(mp); 3179 jfs_warn("jfs_readdir: __get_free_page failed!"); 3180 filp->f_pos = DIREND; 3181 return -ENOMEM; 3182 } 3183 3184 while (1) { 3185 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3186 jfs_dirents = 0; 3187 overflow = fix_page = 0; 3188 3189 stbl = DT_GETSTBL(p); 3190 3191 for (i = index; i < p->header.nextindex; i++) { 3192 d = (struct ldtentry *) & p->slot[stbl[i]]; 3193 3194 if (((long) jfs_dirent + d->namlen + 1) > 3195 (dirent_buf + PAGE_SIZE)) { 3196 /* DBCS codepages could overrun dirent_buf */ 3197 index = i; 3198 overflow = 1; 3199 break; 3200 } 3201 3202 d_namleft = d->namlen; 3203 name_ptr = jfs_dirent->name; 3204 jfs_dirent->ino = le32_to_cpu(d->inumber); 3205 3206 if (do_index) { 3207 len = min(d_namleft, DTLHDRDATALEN); 3208 jfs_dirent->position = le32_to_cpu(d->index); 3209 /* 3210 * d->index should always be valid, but it 3211 * isn't. fsck.jfs doesn't create the 3212 * directory index for the lost+found 3213 * directory. Rather than let it go, 3214 * we can try to fix it. 3215 */ 3216 if ((jfs_dirent->position < 2) || 3217 (jfs_dirent->position >= 3218 JFS_IP(ip)->next_index)) { 3219 if (!page_fixed && !isReadOnly(ip)) { 3220 fix_page = 1; 3221 /* 3222 * setting overflow and setting 3223 * index to i will cause the 3224 * same page to be processed 3225 * again starting here 3226 */ 3227 overflow = 1; 3228 index = i; 3229 break; 3230 } 3231 jfs_dirent->position = unique_pos++; 3232 } 3233 } else { 3234 jfs_dirent->position = dtpos; 3235 len = min(d_namleft, DTLHDRDATALEN_LEGACY); 3236 } 3237 3238 /* copy the name of head/only segment */ 3239 outlen = jfs_strfromUCS_le(name_ptr, d->name, len, 3240 codepage); 3241 jfs_dirent->name_len = outlen; 3242 3243 /* copy name in the additional segment(s) */ 3244 next = d->next; 3245 while (next >= 0) { 3246 t = (struct dtslot *) & p->slot[next]; 3247 name_ptr += outlen; 3248 d_namleft -= len; 3249 /* Sanity Check */ 3250 if (d_namleft == 0) { 3251 jfs_error(ip->i_sb, 3252 "JFS:Dtree error: ino = " 3253 "%ld, bn=%Ld, index = %d", 3254 (long)ip->i_ino, 3255 (long long)bn, 3256 i); 3257 goto skip_one; 3258 } 3259 len = min(d_namleft, DTSLOTDATALEN); 3260 outlen = jfs_strfromUCS_le(name_ptr, t->name, 3261 len, codepage); 3262 jfs_dirent->name_len += outlen; 3263 3264 next = t->next; 3265 } 3266 3267 jfs_dirents++; 3268 jfs_dirent = next_jfs_dirent(jfs_dirent); 3269 skip_one: 3270 if (!do_index) 3271 dtoffset->index++; 3272 } 3273 3274 if (!overflow) { 3275 /* Point to next leaf page */ 3276 if (p->header.flag & BT_ROOT) 3277 bn = 0; 3278 else { 3279 bn = le64_to_cpu(p->header.next); 3280 index = 0; 3281 /* update offset (pn:index) for new page */ 3282 if (!do_index) { 3283 dtoffset->pn++; 3284 dtoffset->index = 0; 3285 } 3286 } 3287 page_fixed = 0; 3288 } 3289 3290 /* unpin previous leaf page */ 3291 DT_PUTPAGE(mp); 3292 3293 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3294 while (jfs_dirents--) { 3295 filp->f_pos = jfs_dirent->position; 3296 if (filldir(dirent, jfs_dirent->name, 3297 jfs_dirent->name_len, filp->f_pos, 3298 jfs_dirent->ino, DT_UNKNOWN)) 3299 goto out; 3300 jfs_dirent = next_jfs_dirent(jfs_dirent); 3301 } 3302 3303 if (fix_page) { 3304 add_missing_indices(ip, bn); 3305 page_fixed = 1; 3306 } 3307 3308 if (!overflow && (bn == 0)) { 3309 filp->f_pos = DIREND; 3310 break; 3311 } 3312 3313 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3314 if (rc) { 3315 free_page(dirent_buf); 3316 return rc; 3317 } 3318 } 3319 3320 out: 3321 free_page(dirent_buf); 3322 3323 return rc; 3324 } 3325 3326 3327 /* 3328 * dtReadFirst() 3329 * 3330 * function: get the leftmost page of the directory 3331 */ 3332 static int dtReadFirst(struct inode *ip, struct btstack * btstack) 3333 { 3334 int rc = 0; 3335 s64 bn; 3336 int psize = 288; /* initial in-line directory */ 3337 struct metapage *mp; 3338 dtpage_t *p; 3339 s8 *stbl; 3340 struct btframe *btsp; 3341 pxd_t *xd; 3342 3343 BT_CLR(btstack); /* reset stack */ 3344 3345 /* 3346 * descend leftmost path of the tree 3347 * 3348 * by convention, root bn = 0. 3349 */ 3350 for (bn = 0;;) { 3351 DT_GETPAGE(ip, bn, mp, psize, p, rc); 3352 if (rc) 3353 return rc; 3354 3355 /* 3356 * leftmost leaf page 3357 */ 3358 if (p->header.flag & BT_LEAF) { 3359 /* return leftmost entry */ 3360 btsp = btstack->top; 3361 btsp->bn = bn; 3362 btsp->index = 0; 3363 btsp->mp = mp; 3364 3365 return 0; 3366 } 3367 3368 /* 3369 * descend down to leftmost child page 3370 */ 3371 if (BT_STACK_FULL(btstack)) { 3372 DT_PUTPAGE(mp); 3373 jfs_error(ip->i_sb, "dtReadFirst: btstack overrun"); 3374 BT_STACK_DUMP(btstack); 3375 return -EIO; 3376 } 3377 /* push (bn, index) of the parent page/entry */ 3378 BT_PUSH(btstack, bn, 0); 3379 3380 /* get the leftmost entry */ 3381 stbl = DT_GETSTBL(p); 3382 xd = (pxd_t *) & p->slot[stbl[0]]; 3383 3384 /* get the child page block address */ 3385 bn = addressPXD(xd); 3386 psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize; 3387 3388 /* unpin the parent page */ 3389 DT_PUTPAGE(mp); 3390 } 3391 } 3392 3393 3394 /* 3395 * dtReadNext() 3396 * 3397 * function: get the page of the specified offset (pn:index) 3398 * 3399 * return: if (offset > eof), bn = -1; 3400 * 3401 * note: if index > nextindex of the target leaf page, 3402 * start with 1st entry of next leaf page; 3403 */ 3404 static int dtReadNext(struct inode *ip, loff_t * offset, 3405 struct btstack * btstack) 3406 { 3407 int rc = 0; 3408 struct dtoffset { 3409 s16 pn; 3410 s16 index; 3411 s32 unused; 3412 } *dtoffset = (struct dtoffset *) offset; 3413 s64 bn; 3414 struct metapage *mp; 3415 dtpage_t *p; 3416 int index; 3417 int pn; 3418 s8 *stbl; 3419 struct btframe *btsp, *parent; 3420 pxd_t *xd; 3421 3422 /* 3423 * get leftmost leaf page pinned 3424 */ 3425 if ((rc = dtReadFirst(ip, btstack))) 3426 return rc; 3427 3428 /* get leaf page */ 3429 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 3430 3431 /* get the start offset (pn:index) */ 3432 pn = dtoffset->pn - 1; /* Now pn = 0 represents leftmost leaf */ 3433 index = dtoffset->index; 3434 3435 /* start at leftmost page ? */ 3436 if (pn == 0) { 3437 /* offset beyond eof ? */ 3438 if (index < p->header.nextindex) 3439 goto out; 3440 3441 if (p->header.flag & BT_ROOT) { 3442 bn = -1; 3443 goto out; 3444 } 3445 3446 /* start with 1st entry of next leaf page */ 3447 dtoffset->pn++; 3448 dtoffset->index = index = 0; 3449 goto a; 3450 } 3451 3452 /* start at non-leftmost page: scan parent pages for large pn */ 3453 if (p->header.flag & BT_ROOT) { 3454 bn = -1; 3455 goto out; 3456 } 3457 3458 /* start after next leaf page ? */ 3459 if (pn > 1) 3460 goto b; 3461 3462 /* get leaf page pn = 1 */ 3463 a: 3464 bn = le64_to_cpu(p->header.next); 3465 3466 /* unpin leaf page */ 3467 DT_PUTPAGE(mp); 3468 3469 /* offset beyond eof ? */ 3470 if (bn == 0) { 3471 bn = -1; 3472 goto out; 3473 } 3474 3475 goto c; 3476 3477 /* 3478 * scan last internal page level to get target leaf page 3479 */ 3480 b: 3481 /* unpin leftmost leaf page */ 3482 DT_PUTPAGE(mp); 3483 3484 /* get left most parent page */ 3485 btsp = btstack->top; 3486 parent = btsp - 1; 3487 bn = parent->bn; 3488 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3489 if (rc) 3490 return rc; 3491 3492 /* scan parent pages at last internal page level */ 3493 while (pn >= p->header.nextindex) { 3494 pn -= p->header.nextindex; 3495 3496 /* get next parent page address */ 3497 bn = le64_to_cpu(p->header.next); 3498 3499 /* unpin current parent page */ 3500 DT_PUTPAGE(mp); 3501 3502 /* offset beyond eof ? */ 3503 if (bn == 0) { 3504 bn = -1; 3505 goto out; 3506 } 3507 3508 /* get next parent page */ 3509 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3510 if (rc) 3511 return rc; 3512 3513 /* update parent page stack frame */ 3514 parent->bn = bn; 3515 } 3516 3517 /* get leaf page address */ 3518 stbl = DT_GETSTBL(p); 3519 xd = (pxd_t *) & p->slot[stbl[pn]]; 3520 bn = addressPXD(xd); 3521 3522 /* unpin parent page */ 3523 DT_PUTPAGE(mp); 3524 3525 /* 3526 * get target leaf page 3527 */ 3528 c: 3529 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3530 if (rc) 3531 return rc; 3532 3533 /* 3534 * leaf page has been completed: 3535 * start with 1st entry of next leaf page 3536 */ 3537 if (index >= p->header.nextindex) { 3538 bn = le64_to_cpu(p->header.next); 3539 3540 /* unpin leaf page */ 3541 DT_PUTPAGE(mp); 3542 3543 /* offset beyond eof ? */ 3544 if (bn == 0) { 3545 bn = -1; 3546 goto out; 3547 } 3548 3549 /* get next leaf page */ 3550 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3551 if (rc) 3552 return rc; 3553 3554 /* start with 1st entry of next leaf page */ 3555 dtoffset->pn++; 3556 dtoffset->index = 0; 3557 } 3558 3559 out: 3560 /* return target leaf page pinned */ 3561 btsp = btstack->top; 3562 btsp->bn = bn; 3563 btsp->index = dtoffset->index; 3564 btsp->mp = mp; 3565 3566 return 0; 3567 } 3568 3569 3570 /* 3571 * dtCompare() 3572 * 3573 * function: compare search key with an internal entry 3574 * 3575 * return: 3576 * < 0 if k is < record 3577 * = 0 if k is = record 3578 * > 0 if k is > record 3579 */ 3580 static int dtCompare(struct component_name * key, /* search key */ 3581 dtpage_t * p, /* directory page */ 3582 int si) 3583 { /* entry slot index */ 3584 wchar_t *kname; 3585 __le16 *name; 3586 int klen, namlen, len, rc; 3587 struct idtentry *ih; 3588 struct dtslot *t; 3589 3590 /* 3591 * force the left-most key on internal pages, at any level of 3592 * the tree, to be less than any search key. 3593 * this obviates having to update the leftmost key on an internal 3594 * page when the user inserts a new key in the tree smaller than 3595 * anything that has been stored. 3596 * 3597 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3598 * at any internal page at any level of the tree, 3599 * it descends to child of the entry anyway - 3600 * ? make the entry as min size dummy entry) 3601 * 3602 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3603 * return (1); 3604 */ 3605 3606 kname = key->name; 3607 klen = key->namlen; 3608 3609 ih = (struct idtentry *) & p->slot[si]; 3610 si = ih->next; 3611 name = ih->name; 3612 namlen = ih->namlen; 3613 len = min(namlen, DTIHDRDATALEN); 3614 3615 /* compare with head/only segment */ 3616 len = min(klen, len); 3617 if ((rc = UniStrncmp_le(kname, name, len))) 3618 return rc; 3619 3620 klen -= len; 3621 namlen -= len; 3622 3623 /* compare with additional segment(s) */ 3624 kname += len; 3625 while (klen > 0 && namlen > 0) { 3626 /* compare with next name segment */ 3627 t = (struct dtslot *) & p->slot[si]; 3628 len = min(namlen, DTSLOTDATALEN); 3629 len = min(klen, len); 3630 name = t->name; 3631 if ((rc = UniStrncmp_le(kname, name, len))) 3632 return rc; 3633 3634 klen -= len; 3635 namlen -= len; 3636 kname += len; 3637 si = t->next; 3638 } 3639 3640 return (klen - namlen); 3641 } 3642 3643 3644 3645 3646 /* 3647 * ciCompare() 3648 * 3649 * function: compare search key with an (leaf/internal) entry 3650 * 3651 * return: 3652 * < 0 if k is < record 3653 * = 0 if k is = record 3654 * > 0 if k is > record 3655 */ 3656 static int ciCompare(struct component_name * key, /* search key */ 3657 dtpage_t * p, /* directory page */ 3658 int si, /* entry slot index */ 3659 int flag) 3660 { 3661 wchar_t *kname, x; 3662 __le16 *name; 3663 int klen, namlen, len, rc; 3664 struct ldtentry *lh; 3665 struct idtentry *ih; 3666 struct dtslot *t; 3667 int i; 3668 3669 /* 3670 * force the left-most key on internal pages, at any level of 3671 * the tree, to be less than any search key. 3672 * this obviates having to update the leftmost key on an internal 3673 * page when the user inserts a new key in the tree smaller than 3674 * anything that has been stored. 3675 * 3676 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3677 * at any internal page at any level of the tree, 3678 * it descends to child of the entry anyway - 3679 * ? make the entry as min size dummy entry) 3680 * 3681 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3682 * return (1); 3683 */ 3684 3685 kname = key->name; 3686 klen = key->namlen; 3687 3688 /* 3689 * leaf page entry 3690 */ 3691 if (p->header.flag & BT_LEAF) { 3692 lh = (struct ldtentry *) & p->slot[si]; 3693 si = lh->next; 3694 name = lh->name; 3695 namlen = lh->namlen; 3696 if (flag & JFS_DIR_INDEX) 3697 len = min(namlen, DTLHDRDATALEN); 3698 else 3699 len = min(namlen, DTLHDRDATALEN_LEGACY); 3700 } 3701 /* 3702 * internal page entry 3703 */ 3704 else { 3705 ih = (struct idtentry *) & p->slot[si]; 3706 si = ih->next; 3707 name = ih->name; 3708 namlen = ih->namlen; 3709 len = min(namlen, DTIHDRDATALEN); 3710 } 3711 3712 /* compare with head/only segment */ 3713 len = min(klen, len); 3714 for (i = 0; i < len; i++, kname++, name++) { 3715 /* only uppercase if case-insensitive support is on */ 3716 if ((flag & JFS_OS2) == JFS_OS2) 3717 x = UniToupper(le16_to_cpu(*name)); 3718 else 3719 x = le16_to_cpu(*name); 3720 if ((rc = *kname - x)) 3721 return rc; 3722 } 3723 3724 klen -= len; 3725 namlen -= len; 3726 3727 /* compare with additional segment(s) */ 3728 while (klen > 0 && namlen > 0) { 3729 /* compare with next name segment */ 3730 t = (struct dtslot *) & p->slot[si]; 3731 len = min(namlen, DTSLOTDATALEN); 3732 len = min(klen, len); 3733 name = t->name; 3734 for (i = 0; i < len; i++, kname++, name++) { 3735 /* only uppercase if case-insensitive support is on */ 3736 if ((flag & JFS_OS2) == JFS_OS2) 3737 x = UniToupper(le16_to_cpu(*name)); 3738 else 3739 x = le16_to_cpu(*name); 3740 3741 if ((rc = *kname - x)) 3742 return rc; 3743 } 3744 3745 klen -= len; 3746 namlen -= len; 3747 si = t->next; 3748 } 3749 3750 return (klen - namlen); 3751 } 3752 3753 3754 /* 3755 * ciGetLeafPrefixKey() 3756 * 3757 * function: compute prefix of suffix compression 3758 * from two adjacent leaf entries 3759 * across page boundary 3760 * 3761 * return: non-zero on error 3762 * 3763 */ 3764 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 3765 int ri, struct component_name * key, int flag) 3766 { 3767 int klen, namlen; 3768 wchar_t *pl, *pr, *kname; 3769 struct component_name lkey; 3770 struct component_name rkey; 3771 3772 lkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), 3773 GFP_KERNEL); 3774 if (lkey.name == NULL) 3775 return -ENOMEM; 3776 3777 rkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), 3778 GFP_KERNEL); 3779 if (rkey.name == NULL) { 3780 kfree(lkey.name); 3781 return -ENOMEM; 3782 } 3783 3784 /* get left and right key */ 3785 dtGetKey(lp, li, &lkey, flag); 3786 lkey.name[lkey.namlen] = 0; 3787 3788 if ((flag & JFS_OS2) == JFS_OS2) 3789 ciToUpper(&lkey); 3790 3791 dtGetKey(rp, ri, &rkey, flag); 3792 rkey.name[rkey.namlen] = 0; 3793 3794 3795 if ((flag & JFS_OS2) == JFS_OS2) 3796 ciToUpper(&rkey); 3797 3798 /* compute prefix */ 3799 klen = 0; 3800 kname = key->name; 3801 namlen = min(lkey.namlen, rkey.namlen); 3802 for (pl = lkey.name, pr = rkey.name; 3803 namlen; pl++, pr++, namlen--, klen++, kname++) { 3804 *kname = *pr; 3805 if (*pl != *pr) { 3806 key->namlen = klen + 1; 3807 goto free_names; 3808 } 3809 } 3810 3811 /* l->namlen <= r->namlen since l <= r */ 3812 if (lkey.namlen < rkey.namlen) { 3813 *kname = *pr; 3814 key->namlen = klen + 1; 3815 } else /* l->namelen == r->namelen */ 3816 key->namlen = klen; 3817 3818 free_names: 3819 kfree(lkey.name); 3820 kfree(rkey.name); 3821 return 0; 3822 } 3823 3824 3825 3826 /* 3827 * dtGetKey() 3828 * 3829 * function: get key of the entry 3830 */ 3831 static void dtGetKey(dtpage_t * p, int i, /* entry index */ 3832 struct component_name * key, int flag) 3833 { 3834 int si; 3835 s8 *stbl; 3836 struct ldtentry *lh; 3837 struct idtentry *ih; 3838 struct dtslot *t; 3839 int namlen, len; 3840 wchar_t *kname; 3841 __le16 *name; 3842 3843 /* get entry */ 3844 stbl = DT_GETSTBL(p); 3845 si = stbl[i]; 3846 if (p->header.flag & BT_LEAF) { 3847 lh = (struct ldtentry *) & p->slot[si]; 3848 si = lh->next; 3849 namlen = lh->namlen; 3850 name = lh->name; 3851 if (flag & JFS_DIR_INDEX) 3852 len = min(namlen, DTLHDRDATALEN); 3853 else 3854 len = min(namlen, DTLHDRDATALEN_LEGACY); 3855 } else { 3856 ih = (struct idtentry *) & p->slot[si]; 3857 si = ih->next; 3858 namlen = ih->namlen; 3859 name = ih->name; 3860 len = min(namlen, DTIHDRDATALEN); 3861 } 3862 3863 key->namlen = namlen; 3864 kname = key->name; 3865 3866 /* 3867 * move head/only segment 3868 */ 3869 UniStrncpy_from_le(kname, name, len); 3870 3871 /* 3872 * move additional segment(s) 3873 */ 3874 while (si >= 0) { 3875 /* get next segment */ 3876 t = &p->slot[si]; 3877 kname += len; 3878 namlen -= len; 3879 len = min(namlen, DTSLOTDATALEN); 3880 UniStrncpy_from_le(kname, t->name, len); 3881 3882 si = t->next; 3883 } 3884 } 3885 3886 3887 /* 3888 * dtInsertEntry() 3889 * 3890 * function: allocate free slot(s) and 3891 * write a leaf/internal entry 3892 * 3893 * return: entry slot index 3894 */ 3895 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 3896 ddata_t * data, struct dt_lock ** dtlock) 3897 { 3898 struct dtslot *h, *t; 3899 struct ldtentry *lh = NULL; 3900 struct idtentry *ih = NULL; 3901 int hsi, fsi, klen, len, nextindex; 3902 wchar_t *kname; 3903 __le16 *name; 3904 s8 *stbl; 3905 pxd_t *xd; 3906 struct dt_lock *dtlck = *dtlock; 3907 struct lv *lv; 3908 int xsi, n; 3909 s64 bn = 0; 3910 struct metapage *mp = NULL; 3911 3912 klen = key->namlen; 3913 kname = key->name; 3914 3915 /* allocate a free slot */ 3916 hsi = fsi = p->header.freelist; 3917 h = &p->slot[fsi]; 3918 p->header.freelist = h->next; 3919 --p->header.freecnt; 3920 3921 /* open new linelock */ 3922 if (dtlck->index >= dtlck->maxcnt) 3923 dtlck = (struct dt_lock *) txLinelock(dtlck); 3924 3925 lv = & dtlck->lv[dtlck->index]; 3926 lv->offset = hsi; 3927 3928 /* write head/only segment */ 3929 if (p->header.flag & BT_LEAF) { 3930 lh = (struct ldtentry *) h; 3931 lh->next = h->next; 3932 lh->inumber = cpu_to_le32(data->leaf.ino); 3933 lh->namlen = klen; 3934 name = lh->name; 3935 if (data->leaf.ip) { 3936 len = min(klen, DTLHDRDATALEN); 3937 if (!(p->header.flag & BT_ROOT)) 3938 bn = addressPXD(&p->header.self); 3939 lh->index = cpu_to_le32(add_index(data->leaf.tid, 3940 data->leaf.ip, 3941 bn, index)); 3942 } else 3943 len = min(klen, DTLHDRDATALEN_LEGACY); 3944 } else { 3945 ih = (struct idtentry *) h; 3946 ih->next = h->next; 3947 xd = (pxd_t *) ih; 3948 *xd = data->xd; 3949 ih->namlen = klen; 3950 name = ih->name; 3951 len = min(klen, DTIHDRDATALEN); 3952 } 3953 3954 UniStrncpy_to_le(name, kname, len); 3955 3956 n = 1; 3957 xsi = hsi; 3958 3959 /* write additional segment(s) */ 3960 t = h; 3961 klen -= len; 3962 while (klen) { 3963 /* get free slot */ 3964 fsi = p->header.freelist; 3965 t = &p->slot[fsi]; 3966 p->header.freelist = t->next; 3967 --p->header.freecnt; 3968 3969 /* is next slot contiguous ? */ 3970 if (fsi != xsi + 1) { 3971 /* close current linelock */ 3972 lv->length = n; 3973 dtlck->index++; 3974 3975 /* open new linelock */ 3976 if (dtlck->index < dtlck->maxcnt) 3977 lv++; 3978 else { 3979 dtlck = (struct dt_lock *) txLinelock(dtlck); 3980 lv = & dtlck->lv[0]; 3981 } 3982 3983 lv->offset = fsi; 3984 n = 0; 3985 } 3986 3987 kname += len; 3988 len = min(klen, DTSLOTDATALEN); 3989 UniStrncpy_to_le(t->name, kname, len); 3990 3991 n++; 3992 xsi = fsi; 3993 klen -= len; 3994 } 3995 3996 /* close current linelock */ 3997 lv->length = n; 3998 dtlck->index++; 3999 4000 *dtlock = dtlck; 4001 4002 /* terminate last/only segment */ 4003 if (h == t) { 4004 /* single segment entry */ 4005 if (p->header.flag & BT_LEAF) 4006 lh->next = -1; 4007 else 4008 ih->next = -1; 4009 } else 4010 /* multi-segment entry */ 4011 t->next = -1; 4012 4013 /* if insert into middle, shift right succeeding entries in stbl */ 4014 stbl = DT_GETSTBL(p); 4015 nextindex = p->header.nextindex; 4016 if (index < nextindex) { 4017 memmove(stbl + index + 1, stbl + index, nextindex - index); 4018 4019 if ((p->header.flag & BT_LEAF) && data->leaf.ip) { 4020 s64 lblock; 4021 4022 /* 4023 * Need to update slot number for entries that moved 4024 * in the stbl 4025 */ 4026 mp = NULL; 4027 for (n = index + 1; n <= nextindex; n++) { 4028 lh = (struct ldtentry *) & (p->slot[stbl[n]]); 4029 modify_index(data->leaf.tid, data->leaf.ip, 4030 le32_to_cpu(lh->index), bn, n, 4031 &mp, &lblock); 4032 } 4033 if (mp) 4034 release_metapage(mp); 4035 } 4036 } 4037 4038 stbl[index] = hsi; 4039 4040 /* advance next available entry index of stbl */ 4041 ++p->header.nextindex; 4042 } 4043 4044 4045 /* 4046 * dtMoveEntry() 4047 * 4048 * function: move entries from split/left page to new/right page 4049 * 4050 * nextindex of dst page and freelist/freecnt of both pages 4051 * are updated. 4052 */ 4053 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 4054 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 4055 int do_index) 4056 { 4057 int ssi, next; /* src slot index */ 4058 int di; /* dst entry index */ 4059 int dsi; /* dst slot index */ 4060 s8 *sstbl, *dstbl; /* sorted entry table */ 4061 int snamlen, len; 4062 struct ldtentry *slh, *dlh = NULL; 4063 struct idtentry *sih, *dih = NULL; 4064 struct dtslot *h, *s, *d; 4065 struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock; 4066 struct lv *slv, *dlv; 4067 int xssi, ns, nd; 4068 int sfsi; 4069 4070 sstbl = (s8 *) & sp->slot[sp->header.stblindex]; 4071 dstbl = (s8 *) & dp->slot[dp->header.stblindex]; 4072 4073 dsi = dp->header.freelist; /* first (whole page) free slot */ 4074 sfsi = sp->header.freelist; 4075 4076 /* linelock destination entry slot */ 4077 dlv = & ddtlck->lv[ddtlck->index]; 4078 dlv->offset = dsi; 4079 4080 /* linelock source entry slot */ 4081 slv = & sdtlck->lv[sdtlck->index]; 4082 slv->offset = sstbl[si]; 4083 xssi = slv->offset - 1; 4084 4085 /* 4086 * move entries 4087 */ 4088 ns = nd = 0; 4089 for (di = 0; si < sp->header.nextindex; si++, di++) { 4090 ssi = sstbl[si]; 4091 dstbl[di] = dsi; 4092 4093 /* is next slot contiguous ? */ 4094 if (ssi != xssi + 1) { 4095 /* close current linelock */ 4096 slv->length = ns; 4097 sdtlck->index++; 4098 4099 /* open new linelock */ 4100 if (sdtlck->index < sdtlck->maxcnt) 4101 slv++; 4102 else { 4103 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 4104 slv = & sdtlck->lv[0]; 4105 } 4106 4107 slv->offset = ssi; 4108 ns = 0; 4109 } 4110 4111 /* 4112 * move head/only segment of an entry 4113 */ 4114 /* get dst slot */ 4115 h = d = &dp->slot[dsi]; 4116 4117 /* get src slot and move */ 4118 s = &sp->slot[ssi]; 4119 if (sp->header.flag & BT_LEAF) { 4120 /* get source entry */ 4121 slh = (struct ldtentry *) s; 4122 dlh = (struct ldtentry *) h; 4123 snamlen = slh->namlen; 4124 4125 if (do_index) { 4126 len = min(snamlen, DTLHDRDATALEN); 4127 dlh->index = slh->index; /* little-endian */ 4128 } else 4129 len = min(snamlen, DTLHDRDATALEN_LEGACY); 4130 4131 memcpy(dlh, slh, 6 + len * 2); 4132 4133 next = slh->next; 4134 4135 /* update dst head/only segment next field */ 4136 dsi++; 4137 dlh->next = dsi; 4138 } else { 4139 sih = (struct idtentry *) s; 4140 snamlen = sih->namlen; 4141 4142 len = min(snamlen, DTIHDRDATALEN); 4143 dih = (struct idtentry *) h; 4144 memcpy(dih, sih, 10 + len * 2); 4145 next = sih->next; 4146 4147 dsi++; 4148 dih->next = dsi; 4149 } 4150 4151 /* free src head/only segment */ 4152 s->next = sfsi; 4153 s->cnt = 1; 4154 sfsi = ssi; 4155 4156 ns++; 4157 nd++; 4158 xssi = ssi; 4159 4160 /* 4161 * move additional segment(s) of the entry 4162 */ 4163 snamlen -= len; 4164 while ((ssi = next) >= 0) { 4165 /* is next slot contiguous ? */ 4166 if (ssi != xssi + 1) { 4167 /* close current linelock */ 4168 slv->length = ns; 4169 sdtlck->index++; 4170 4171 /* open new linelock */ 4172 if (sdtlck->index < sdtlck->maxcnt) 4173 slv++; 4174 else { 4175 sdtlck = 4176 (struct dt_lock *) 4177 txLinelock(sdtlck); 4178 slv = & sdtlck->lv[0]; 4179 } 4180 4181 slv->offset = ssi; 4182 ns = 0; 4183 } 4184 4185 /* get next source segment */ 4186 s = &sp->slot[ssi]; 4187 4188 /* get next destination free slot */ 4189 d++; 4190 4191 len = min(snamlen, DTSLOTDATALEN); 4192 UniStrncpy_le(d->name, s->name, len); 4193 4194 ns++; 4195 nd++; 4196 xssi = ssi; 4197 4198 dsi++; 4199 d->next = dsi; 4200 4201 /* free source segment */ 4202 next = s->next; 4203 s->next = sfsi; 4204 s->cnt = 1; 4205 sfsi = ssi; 4206 4207 snamlen -= len; 4208 } /* end while */ 4209 4210 /* terminate dst last/only segment */ 4211 if (h == d) { 4212 /* single segment entry */ 4213 if (dp->header.flag & BT_LEAF) 4214 dlh->next = -1; 4215 else 4216 dih->next = -1; 4217 } else 4218 /* multi-segment entry */ 4219 d->next = -1; 4220 } /* end for */ 4221 4222 /* close current linelock */ 4223 slv->length = ns; 4224 sdtlck->index++; 4225 *sdtlock = sdtlck; 4226 4227 dlv->length = nd; 4228 ddtlck->index++; 4229 *ddtlock = ddtlck; 4230 4231 /* update source header */ 4232 sp->header.freelist = sfsi; 4233 sp->header.freecnt += nd; 4234 4235 /* update destination header */ 4236 dp->header.nextindex = di; 4237 4238 dp->header.freelist = dsi; 4239 dp->header.freecnt -= nd; 4240 } 4241 4242 4243 /* 4244 * dtDeleteEntry() 4245 * 4246 * function: free a (leaf/internal) entry 4247 * 4248 * log freelist header, stbl, and each segment slot of entry 4249 * (even though last/only segment next field is modified, 4250 * physical image logging requires all segment slots of 4251 * the entry logged to avoid applying previous updates 4252 * to the same slots) 4253 */ 4254 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock) 4255 { 4256 int fsi; /* free entry slot index */ 4257 s8 *stbl; 4258 struct dtslot *t; 4259 int si, freecnt; 4260 struct dt_lock *dtlck = *dtlock; 4261 struct lv *lv; 4262 int xsi, n; 4263 4264 /* get free entry slot index */ 4265 stbl = DT_GETSTBL(p); 4266 fsi = stbl[fi]; 4267 4268 /* open new linelock */ 4269 if (dtlck->index >= dtlck->maxcnt) 4270 dtlck = (struct dt_lock *) txLinelock(dtlck); 4271 lv = & dtlck->lv[dtlck->index]; 4272 4273 lv->offset = fsi; 4274 4275 /* get the head/only segment */ 4276 t = &p->slot[fsi]; 4277 if (p->header.flag & BT_LEAF) 4278 si = ((struct ldtentry *) t)->next; 4279 else 4280 si = ((struct idtentry *) t)->next; 4281 t->next = si; 4282 t->cnt = 1; 4283 4284 n = freecnt = 1; 4285 xsi = fsi; 4286 4287 /* find the last/only segment */ 4288 while (si >= 0) { 4289 /* is next slot contiguous ? */ 4290 if (si != xsi + 1) { 4291 /* close current linelock */ 4292 lv->length = n; 4293 dtlck->index++; 4294 4295 /* open new linelock */ 4296 if (dtlck->index < dtlck->maxcnt) 4297 lv++; 4298 else { 4299 dtlck = (struct dt_lock *) txLinelock(dtlck); 4300 lv = & dtlck->lv[0]; 4301 } 4302 4303 lv->offset = si; 4304 n = 0; 4305 } 4306 4307 n++; 4308 xsi = si; 4309 freecnt++; 4310 4311 t = &p->slot[si]; 4312 t->cnt = 1; 4313 si = t->next; 4314 } 4315 4316 /* close current linelock */ 4317 lv->length = n; 4318 dtlck->index++; 4319 4320 *dtlock = dtlck; 4321 4322 /* update freelist */ 4323 t->next = p->header.freelist; 4324 p->header.freelist = fsi; 4325 p->header.freecnt += freecnt; 4326 4327 /* if delete from middle, 4328 * shift left the succedding entries in the stbl 4329 */ 4330 si = p->header.nextindex; 4331 if (fi < si - 1) 4332 memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1); 4333 4334 p->header.nextindex--; 4335 } 4336 4337 4338 /* 4339 * dtTruncateEntry() 4340 * 4341 * function: truncate a (leaf/internal) entry 4342 * 4343 * log freelist header, stbl, and each segment slot of entry 4344 * (even though last/only segment next field is modified, 4345 * physical image logging requires all segment slots of 4346 * the entry logged to avoid applying previous updates 4347 * to the same slots) 4348 */ 4349 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock) 4350 { 4351 int tsi; /* truncate entry slot index */ 4352 s8 *stbl; 4353 struct dtslot *t; 4354 int si, freecnt; 4355 struct dt_lock *dtlck = *dtlock; 4356 struct lv *lv; 4357 int fsi, xsi, n; 4358 4359 /* get free entry slot index */ 4360 stbl = DT_GETSTBL(p); 4361 tsi = stbl[ti]; 4362 4363 /* open new linelock */ 4364 if (dtlck->index >= dtlck->maxcnt) 4365 dtlck = (struct dt_lock *) txLinelock(dtlck); 4366 lv = & dtlck->lv[dtlck->index]; 4367 4368 lv->offset = tsi; 4369 4370 /* get the head/only segment */ 4371 t = &p->slot[tsi]; 4372 ASSERT(p->header.flag & BT_INTERNAL); 4373 ((struct idtentry *) t)->namlen = 0; 4374 si = ((struct idtentry *) t)->next; 4375 ((struct idtentry *) t)->next = -1; 4376 4377 n = 1; 4378 freecnt = 0; 4379 fsi = si; 4380 xsi = tsi; 4381 4382 /* find the last/only segment */ 4383 while (si >= 0) { 4384 /* is next slot contiguous ? */ 4385 if (si != xsi + 1) { 4386 /* close current linelock */ 4387 lv->length = n; 4388 dtlck->index++; 4389 4390 /* open new linelock */ 4391 if (dtlck->index < dtlck->maxcnt) 4392 lv++; 4393 else { 4394 dtlck = (struct dt_lock *) txLinelock(dtlck); 4395 lv = & dtlck->lv[0]; 4396 } 4397 4398 lv->offset = si; 4399 n = 0; 4400 } 4401 4402 n++; 4403 xsi = si; 4404 freecnt++; 4405 4406 t = &p->slot[si]; 4407 t->cnt = 1; 4408 si = t->next; 4409 } 4410 4411 /* close current linelock */ 4412 lv->length = n; 4413 dtlck->index++; 4414 4415 *dtlock = dtlck; 4416 4417 /* update freelist */ 4418 if (freecnt == 0) 4419 return; 4420 t->next = p->header.freelist; 4421 p->header.freelist = fsi; 4422 p->header.freecnt += freecnt; 4423 } 4424 4425 4426 /* 4427 * dtLinelockFreelist() 4428 */ 4429 static void dtLinelockFreelist(dtpage_t * p, /* directory page */ 4430 int m, /* max slot index */ 4431 struct dt_lock ** dtlock) 4432 { 4433 int fsi; /* free entry slot index */ 4434 struct dtslot *t; 4435 int si; 4436 struct dt_lock *dtlck = *dtlock; 4437 struct lv *lv; 4438 int xsi, n; 4439 4440 /* get free entry slot index */ 4441 fsi = p->header.freelist; 4442 4443 /* open new linelock */ 4444 if (dtlck->index >= dtlck->maxcnt) 4445 dtlck = (struct dt_lock *) txLinelock(dtlck); 4446 lv = & dtlck->lv[dtlck->index]; 4447 4448 lv->offset = fsi; 4449 4450 n = 1; 4451 xsi = fsi; 4452 4453 t = &p->slot[fsi]; 4454 si = t->next; 4455 4456 /* find the last/only segment */ 4457 while (si < m && si >= 0) { 4458 /* is next slot contiguous ? */ 4459 if (si != xsi + 1) { 4460 /* close current linelock */ 4461 lv->length = n; 4462 dtlck->index++; 4463 4464 /* open new linelock */ 4465 if (dtlck->index < dtlck->maxcnt) 4466 lv++; 4467 else { 4468 dtlck = (struct dt_lock *) txLinelock(dtlck); 4469 lv = & dtlck->lv[0]; 4470 } 4471 4472 lv->offset = si; 4473 n = 0; 4474 } 4475 4476 n++; 4477 xsi = si; 4478 4479 t = &p->slot[si]; 4480 si = t->next; 4481 } 4482 4483 /* close current linelock */ 4484 lv->length = n; 4485 dtlck->index++; 4486 4487 *dtlock = dtlck; 4488 } 4489 4490 4491 /* 4492 * NAME: dtModify 4493 * 4494 * FUNCTION: Modify the inode number part of a directory entry 4495 * 4496 * PARAMETERS: 4497 * tid - Transaction id 4498 * ip - Inode of parent directory 4499 * key - Name of entry to be modified 4500 * orig_ino - Original inode number expected in entry 4501 * new_ino - New inode number to put into entry 4502 * flag - JFS_RENAME 4503 * 4504 * RETURNS: 4505 * -ESTALE - If entry found does not match orig_ino passed in 4506 * -ENOENT - If no entry can be found to match key 4507 * 0 - If successfully modified entry 4508 */ 4509 int dtModify(tid_t tid, struct inode *ip, 4510 struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag) 4511 { 4512 int rc; 4513 s64 bn; 4514 struct metapage *mp; 4515 dtpage_t *p; 4516 int index; 4517 struct btstack btstack; 4518 struct tlock *tlck; 4519 struct dt_lock *dtlck; 4520 struct lv *lv; 4521 s8 *stbl; 4522 int entry_si; /* entry slot index */ 4523 struct ldtentry *entry; 4524 4525 /* 4526 * search for the entry to modify: 4527 * 4528 * dtSearch() returns (leaf page pinned, index at which to modify). 4529 */ 4530 if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag))) 4531 return rc; 4532 4533 /* retrieve search result */ 4534 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4535 4536 BT_MARK_DIRTY(mp, ip); 4537 /* 4538 * acquire a transaction lock on the leaf page of named entry 4539 */ 4540 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 4541 dtlck = (struct dt_lock *) & tlck->lock; 4542 4543 /* get slot index of the entry */ 4544 stbl = DT_GETSTBL(p); 4545 entry_si = stbl[index]; 4546 4547 /* linelock entry */ 4548 ASSERT(dtlck->index == 0); 4549 lv = & dtlck->lv[0]; 4550 lv->offset = entry_si; 4551 lv->length = 1; 4552 dtlck->index++; 4553 4554 /* get the head/only segment */ 4555 entry = (struct ldtentry *) & p->slot[entry_si]; 4556 4557 /* substitute the inode number of the entry */ 4558 entry->inumber = cpu_to_le32(new_ino); 4559 4560 /* unpin the leaf page */ 4561 DT_PUTPAGE(mp); 4562 4563 return 0; 4564 } 4565