1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2004 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19 /* 20 * jfs_imap.c: inode allocation map manager 21 * 22 * Serialization: 23 * Each AG has a simple lock which is used to control the serialization of 24 * the AG level lists. This lock should be taken first whenever an AG 25 * level list will be modified or accessed. 26 * 27 * Each IAG is locked by obtaining the buffer for the IAG page. 28 * 29 * There is also a inode lock for the inode map inode. A read lock needs to 30 * be taken whenever an IAG is read from the map or the global level 31 * information is read. A write lock needs to be taken whenever the global 32 * level information is modified or an atomic operation needs to be used. 33 * 34 * If more than one IAG is read at one time, the read lock may not 35 * be given up until all of the IAG's are read. Otherwise, a deadlock 36 * may occur when trying to obtain the read lock while another thread 37 * holding the read lock is waiting on the IAG already being held. 38 * 39 * The control page of the inode map is read into memory by diMount(). 40 * Thereafter it should only be modified in memory and then it will be 41 * written out when the filesystem is unmounted by diUnmount(). 42 */ 43 44 #include <linux/fs.h> 45 #include <linux/buffer_head.h> 46 #include <linux/pagemap.h> 47 #include <linux/quotaops.h> 48 49 #include "jfs_incore.h" 50 #include "jfs_inode.h" 51 #include "jfs_filsys.h" 52 #include "jfs_dinode.h" 53 #include "jfs_dmap.h" 54 #include "jfs_imap.h" 55 #include "jfs_metapage.h" 56 #include "jfs_superblock.h" 57 #include "jfs_debug.h" 58 59 /* 60 * __mark_inode_dirty expects inodes to be hashed. Since we don't want 61 * special inodes in the fileset inode space, we hash them to a dummy head 62 */ 63 static HLIST_HEAD(aggregate_hash); 64 65 /* 66 * imap locks 67 */ 68 /* iag free list lock */ 69 #define IAGFREE_LOCK_INIT(imap) init_MUTEX(&imap->im_freelock) 70 #define IAGFREE_LOCK(imap) down(&imap->im_freelock) 71 #define IAGFREE_UNLOCK(imap) up(&imap->im_freelock) 72 73 /* per ag iag list locks */ 74 #define AG_LOCK_INIT(imap,index) init_MUTEX(&(imap->im_aglock[index])) 75 #define AG_LOCK(imap,agno) down(&imap->im_aglock[agno]) 76 #define AG_UNLOCK(imap,agno) up(&imap->im_aglock[agno]) 77 78 /* 79 * forward references 80 */ 81 static int diAllocAG(struct inomap *, int, boolean_t, struct inode *); 82 static int diAllocAny(struct inomap *, int, boolean_t, struct inode *); 83 static int diAllocBit(struct inomap *, struct iag *, int); 84 static int diAllocExt(struct inomap *, int, struct inode *); 85 static int diAllocIno(struct inomap *, int, struct inode *); 86 static int diFindFree(u32, int); 87 static int diNewExt(struct inomap *, struct iag *, int); 88 static int diNewIAG(struct inomap *, int *, int, struct metapage **); 89 static void duplicateIXtree(struct super_block *, s64, int, s64 *); 90 91 static int diIAGRead(struct inomap * imap, int, struct metapage **); 92 static int copy_from_dinode(struct dinode *, struct inode *); 93 static void copy_to_dinode(struct dinode *, struct inode *); 94 95 /* 96 * NAME: diMount() 97 * 98 * FUNCTION: initialize the incore inode map control structures for 99 * a fileset or aggregate init time. 100 * 101 * the inode map's control structure (dinomap) is 102 * brought in from disk and placed in virtual memory. 103 * 104 * PARAMETERS: 105 * ipimap - pointer to inode map inode for the aggregate or fileset. 106 * 107 * RETURN VALUES: 108 * 0 - success 109 * -ENOMEM - insufficient free virtual memory. 110 * -EIO - i/o error. 111 */ 112 int diMount(struct inode *ipimap) 113 { 114 struct inomap *imap; 115 struct metapage *mp; 116 int index; 117 struct dinomap_disk *dinom_le; 118 119 /* 120 * allocate/initialize the in-memory inode map control structure 121 */ 122 /* allocate the in-memory inode map control structure. */ 123 imap = (struct inomap *) kmalloc(sizeof(struct inomap), GFP_KERNEL); 124 if (imap == NULL) { 125 jfs_err("diMount: kmalloc returned NULL!"); 126 return -ENOMEM; 127 } 128 129 /* read the on-disk inode map control structure. */ 130 131 mp = read_metapage(ipimap, 132 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 133 PSIZE, 0); 134 if (mp == NULL) { 135 kfree(imap); 136 return -EIO; 137 } 138 139 /* copy the on-disk version to the in-memory version. */ 140 dinom_le = (struct dinomap_disk *) mp->data; 141 imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); 142 imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); 143 atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); 144 atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); 145 imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); 146 imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); 147 for (index = 0; index < MAXAG; index++) { 148 imap->im_agctl[index].inofree = 149 le32_to_cpu(dinom_le->in_agctl[index].inofree); 150 imap->im_agctl[index].extfree = 151 le32_to_cpu(dinom_le->in_agctl[index].extfree); 152 imap->im_agctl[index].numinos = 153 le32_to_cpu(dinom_le->in_agctl[index].numinos); 154 imap->im_agctl[index].numfree = 155 le32_to_cpu(dinom_le->in_agctl[index].numfree); 156 } 157 158 /* release the buffer. */ 159 release_metapage(mp); 160 161 /* 162 * allocate/initialize inode allocation map locks 163 */ 164 /* allocate and init iag free list lock */ 165 IAGFREE_LOCK_INIT(imap); 166 167 /* allocate and init ag list locks */ 168 for (index = 0; index < MAXAG; index++) { 169 AG_LOCK_INIT(imap, index); 170 } 171 172 /* bind the inode map inode and inode map control structure 173 * to each other. 174 */ 175 imap->im_ipimap = ipimap; 176 JFS_IP(ipimap)->i_imap = imap; 177 178 return (0); 179 } 180 181 182 /* 183 * NAME: diUnmount() 184 * 185 * FUNCTION: write to disk the incore inode map control structures for 186 * a fileset or aggregate at unmount time. 187 * 188 * PARAMETERS: 189 * ipimap - pointer to inode map inode for the aggregate or fileset. 190 * 191 * RETURN VALUES: 192 * 0 - success 193 * -ENOMEM - insufficient free virtual memory. 194 * -EIO - i/o error. 195 */ 196 int diUnmount(struct inode *ipimap, int mounterror) 197 { 198 struct inomap *imap = JFS_IP(ipimap)->i_imap; 199 200 /* 201 * update the on-disk inode map control structure 202 */ 203 204 if (!(mounterror || isReadOnly(ipimap))) 205 diSync(ipimap); 206 207 /* 208 * Invalidate the page cache buffers 209 */ 210 truncate_inode_pages(ipimap->i_mapping, 0); 211 212 /* 213 * free in-memory control structure 214 */ 215 kfree(imap); 216 217 return (0); 218 } 219 220 221 /* 222 * diSync() 223 */ 224 int diSync(struct inode *ipimap) 225 { 226 struct dinomap_disk *dinom_le; 227 struct inomap *imp = JFS_IP(ipimap)->i_imap; 228 struct metapage *mp; 229 int index; 230 231 /* 232 * write imap global conrol page 233 */ 234 /* read the on-disk inode map control structure */ 235 mp = get_metapage(ipimap, 236 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 237 PSIZE, 0); 238 if (mp == NULL) { 239 jfs_err("diSync: get_metapage failed!"); 240 return -EIO; 241 } 242 243 /* copy the in-memory version to the on-disk version */ 244 dinom_le = (struct dinomap_disk *) mp->data; 245 dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); 246 dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); 247 dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); 248 dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); 249 dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); 250 dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); 251 for (index = 0; index < MAXAG; index++) { 252 dinom_le->in_agctl[index].inofree = 253 cpu_to_le32(imp->im_agctl[index].inofree); 254 dinom_le->in_agctl[index].extfree = 255 cpu_to_le32(imp->im_agctl[index].extfree); 256 dinom_le->in_agctl[index].numinos = 257 cpu_to_le32(imp->im_agctl[index].numinos); 258 dinom_le->in_agctl[index].numfree = 259 cpu_to_le32(imp->im_agctl[index].numfree); 260 } 261 262 /* write out the control structure */ 263 write_metapage(mp); 264 265 /* 266 * write out dirty pages of imap 267 */ 268 filemap_fdatawrite(ipimap->i_mapping); 269 filemap_fdatawait(ipimap->i_mapping); 270 271 diWriteSpecial(ipimap, 0); 272 273 return (0); 274 } 275 276 277 /* 278 * NAME: diRead() 279 * 280 * FUNCTION: initialize an incore inode from disk. 281 * 282 * on entry, the specifed incore inode should itself 283 * specify the disk inode number corresponding to the 284 * incore inode (i.e. i_number should be initialized). 285 * 286 * this routine handles incore inode initialization for 287 * both "special" and "regular" inodes. special inodes 288 * are those required early in the mount process and 289 * require special handling since much of the file system 290 * is not yet initialized. these "special" inodes are 291 * identified by a NULL inode map inode pointer and are 292 * actually initialized by a call to diReadSpecial(). 293 * 294 * for regular inodes, the iag describing the disk inode 295 * is read from disk to determine the inode extent address 296 * for the disk inode. with the inode extent address in 297 * hand, the page of the extent that contains the disk 298 * inode is read and the disk inode is copied to the 299 * incore inode. 300 * 301 * PARAMETERS: 302 * ip - pointer to incore inode to be initialized from disk. 303 * 304 * RETURN VALUES: 305 * 0 - success 306 * -EIO - i/o error. 307 * -ENOMEM - insufficient memory 308 * 309 */ 310 int diRead(struct inode *ip) 311 { 312 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 313 int iagno, ino, extno, rc; 314 struct inode *ipimap; 315 struct dinode *dp; 316 struct iag *iagp; 317 struct metapage *mp; 318 s64 blkno, agstart; 319 struct inomap *imap; 320 int block_offset; 321 int inodes_left; 322 uint pageno; 323 int rel_inode; 324 325 jfs_info("diRead: ino = %ld", ip->i_ino); 326 327 ipimap = sbi->ipimap; 328 JFS_IP(ip)->ipimap = ipimap; 329 330 /* determine the iag number for this inode (number) */ 331 iagno = INOTOIAG(ip->i_ino); 332 333 /* read the iag */ 334 imap = JFS_IP(ipimap)->i_imap; 335 IREAD_LOCK(ipimap); 336 rc = diIAGRead(imap, iagno, &mp); 337 IREAD_UNLOCK(ipimap); 338 if (rc) { 339 jfs_err("diRead: diIAGRead returned %d", rc); 340 return (rc); 341 } 342 343 iagp = (struct iag *) mp->data; 344 345 /* determine inode extent that holds the disk inode */ 346 ino = ip->i_ino & (INOSPERIAG - 1); 347 extno = ino >> L2INOSPEREXT; 348 349 if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || 350 (addressPXD(&iagp->inoext[extno]) == 0)) { 351 release_metapage(mp); 352 return -ESTALE; 353 } 354 355 /* get disk block number of the page within the inode extent 356 * that holds the disk inode. 357 */ 358 blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); 359 360 /* get the ag for the iag */ 361 agstart = le64_to_cpu(iagp->agstart); 362 363 release_metapage(mp); 364 365 rel_inode = (ino & (INOSPERPAGE - 1)); 366 pageno = blkno >> sbi->l2nbperpage; 367 368 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 369 /* 370 * OS/2 didn't always align inode extents on page boundaries 371 */ 372 inodes_left = 373 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 374 375 if (rel_inode < inodes_left) 376 rel_inode += block_offset << sbi->l2niperblk; 377 else { 378 pageno += 1; 379 rel_inode -= inodes_left; 380 } 381 } 382 383 /* read the page of disk inode */ 384 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 385 if (mp == 0) { 386 jfs_err("diRead: read_metapage failed"); 387 return -EIO; 388 } 389 390 /* locate the the disk inode requested */ 391 dp = (struct dinode *) mp->data; 392 dp += rel_inode; 393 394 if (ip->i_ino != le32_to_cpu(dp->di_number)) { 395 jfs_error(ip->i_sb, "diRead: i_ino != di_number"); 396 rc = -EIO; 397 } else if (le32_to_cpu(dp->di_nlink) == 0) 398 rc = -ESTALE; 399 else 400 /* copy the disk inode to the in-memory inode */ 401 rc = copy_from_dinode(dp, ip); 402 403 release_metapage(mp); 404 405 /* set the ag for the inode */ 406 JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); 407 JFS_IP(ip)->active_ag = -1; 408 409 return (rc); 410 } 411 412 413 /* 414 * NAME: diReadSpecial() 415 * 416 * FUNCTION: initialize a 'special' inode from disk. 417 * 418 * this routines handles aggregate level inodes. The 419 * inode cache cannot differentiate between the 420 * aggregate inodes and the filesystem inodes, so we 421 * handle these here. We don't actually use the aggregate 422 * inode map, since these inodes are at a fixed location 423 * and in some cases the aggregate inode map isn't initialized 424 * yet. 425 * 426 * PARAMETERS: 427 * sb - filesystem superblock 428 * inum - aggregate inode number 429 * secondary - 1 if secondary aggregate inode table 430 * 431 * RETURN VALUES: 432 * new inode - success 433 * NULL - i/o error. 434 */ 435 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) 436 { 437 struct jfs_sb_info *sbi = JFS_SBI(sb); 438 uint address; 439 struct dinode *dp; 440 struct inode *ip; 441 struct metapage *mp; 442 443 ip = new_inode(sb); 444 if (ip == NULL) { 445 jfs_err("diReadSpecial: new_inode returned NULL!"); 446 return ip; 447 } 448 449 if (secondary) { 450 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 451 JFS_IP(ip)->ipimap = sbi->ipaimap2; 452 } else { 453 address = AITBL_OFF >> L2PSIZE; 454 JFS_IP(ip)->ipimap = sbi->ipaimap; 455 } 456 457 ASSERT(inum < INOSPEREXT); 458 459 ip->i_ino = inum; 460 461 address += inum >> 3; /* 8 inodes per 4K page */ 462 463 /* read the page of fixed disk inode (AIT) in raw mode */ 464 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 465 if (mp == NULL) { 466 ip->i_nlink = 1; /* Don't want iput() deleting it */ 467 iput(ip); 468 return (NULL); 469 } 470 471 /* get the pointer to the disk inode of interest */ 472 dp = (struct dinode *) (mp->data); 473 dp += inum % 8; /* 8 inodes per 4K page */ 474 475 /* copy on-disk inode to in-memory inode */ 476 if ((copy_from_dinode(dp, ip)) != 0) { 477 /* handle bad return by returning NULL for ip */ 478 ip->i_nlink = 1; /* Don't want iput() deleting it */ 479 iput(ip); 480 /* release the page */ 481 release_metapage(mp); 482 return (NULL); 483 484 } 485 486 ip->i_mapping->a_ops = &jfs_metapage_aops; 487 mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); 488 489 /* Allocations to metadata inodes should not affect quotas */ 490 ip->i_flags |= S_NOQUOTA; 491 492 if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { 493 sbi->gengen = le32_to_cpu(dp->di_gengen); 494 sbi->inostamp = le32_to_cpu(dp->di_inostamp); 495 } 496 497 /* release the page */ 498 release_metapage(mp); 499 500 hlist_add_head(&ip->i_hash, &aggregate_hash); 501 502 return (ip); 503 } 504 505 /* 506 * NAME: diWriteSpecial() 507 * 508 * FUNCTION: Write the special inode to disk 509 * 510 * PARAMETERS: 511 * ip - special inode 512 * secondary - 1 if secondary aggregate inode table 513 * 514 * RETURN VALUES: none 515 */ 516 517 void diWriteSpecial(struct inode *ip, int secondary) 518 { 519 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 520 uint address; 521 struct dinode *dp; 522 ino_t inum = ip->i_ino; 523 struct metapage *mp; 524 525 if (secondary) 526 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 527 else 528 address = AITBL_OFF >> L2PSIZE; 529 530 ASSERT(inum < INOSPEREXT); 531 532 address += inum >> 3; /* 8 inodes per 4K page */ 533 534 /* read the page of fixed disk inode (AIT) in raw mode */ 535 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 536 if (mp == NULL) { 537 jfs_err("diWriteSpecial: failed to read aggregate inode " 538 "extent!"); 539 return; 540 } 541 542 /* get the pointer to the disk inode of interest */ 543 dp = (struct dinode *) (mp->data); 544 dp += inum % 8; /* 8 inodes per 4K page */ 545 546 /* copy on-disk inode to in-memory inode */ 547 copy_to_dinode(dp, ip); 548 memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); 549 550 if (inum == FILESYSTEM_I) 551 dp->di_gengen = cpu_to_le32(sbi->gengen); 552 553 /* write the page */ 554 write_metapage(mp); 555 } 556 557 /* 558 * NAME: diFreeSpecial() 559 * 560 * FUNCTION: Free allocated space for special inode 561 */ 562 void diFreeSpecial(struct inode *ip) 563 { 564 if (ip == NULL) { 565 jfs_err("diFreeSpecial called with NULL ip!"); 566 return; 567 } 568 filemap_fdatawrite(ip->i_mapping); 569 filemap_fdatawait(ip->i_mapping); 570 truncate_inode_pages(ip->i_mapping, 0); 571 iput(ip); 572 } 573 574 575 576 /* 577 * NAME: diWrite() 578 * 579 * FUNCTION: write the on-disk inode portion of the in-memory inode 580 * to its corresponding on-disk inode. 581 * 582 * on entry, the specifed incore inode should itself 583 * specify the disk inode number corresponding to the 584 * incore inode (i.e. i_number should be initialized). 585 * 586 * the inode contains the inode extent address for the disk 587 * inode. with the inode extent address in hand, the 588 * page of the extent that contains the disk inode is 589 * read and the disk inode portion of the incore inode 590 * is copied to the disk inode. 591 * 592 * PARAMETERS: 593 * tid - transacation id 594 * ip - pointer to incore inode to be written to the inode extent. 595 * 596 * RETURN VALUES: 597 * 0 - success 598 * -EIO - i/o error. 599 */ 600 int diWrite(tid_t tid, struct inode *ip) 601 { 602 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 603 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 604 int rc = 0; 605 s32 ino; 606 struct dinode *dp; 607 s64 blkno; 608 int block_offset; 609 int inodes_left; 610 struct metapage *mp; 611 uint pageno; 612 int rel_inode; 613 int dioffset; 614 struct inode *ipimap; 615 uint type; 616 lid_t lid; 617 struct tlock *ditlck, *tlck; 618 struct linelock *dilinelock, *ilinelock; 619 struct lv *lv; 620 int n; 621 622 ipimap = jfs_ip->ipimap; 623 624 ino = ip->i_ino & (INOSPERIAG - 1); 625 626 if (!addressPXD(&(jfs_ip->ixpxd)) || 627 (lengthPXD(&(jfs_ip->ixpxd)) != 628 JFS_IP(ipimap)->i_imap->im_nbperiext)) { 629 jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); 630 return -EIO; 631 } 632 633 /* 634 * read the page of disk inode containing the specified inode: 635 */ 636 /* compute the block address of the page */ 637 blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); 638 639 rel_inode = (ino & (INOSPERPAGE - 1)); 640 pageno = blkno >> sbi->l2nbperpage; 641 642 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 643 /* 644 * OS/2 didn't always align inode extents on page boundaries 645 */ 646 inodes_left = 647 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 648 649 if (rel_inode < inodes_left) 650 rel_inode += block_offset << sbi->l2niperblk; 651 else { 652 pageno += 1; 653 rel_inode -= inodes_left; 654 } 655 } 656 /* read the page of disk inode */ 657 retry: 658 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 659 if (mp == 0) 660 return -EIO; 661 662 /* get the pointer to the disk inode */ 663 dp = (struct dinode *) mp->data; 664 dp += rel_inode; 665 666 dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; 667 668 /* 669 * acquire transaction lock on the on-disk inode; 670 * N.B. tlock is acquired on ipimap not ip; 671 */ 672 if ((ditlck = 673 txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) 674 goto retry; 675 dilinelock = (struct linelock *) & ditlck->lock; 676 677 /* 678 * copy btree root from in-memory inode to on-disk inode 679 * 680 * (tlock is taken from inline B+-tree root in in-memory 681 * inode when the B+-tree root is updated, which is pointed 682 * by jfs_ip->blid as well as being on tx tlock list) 683 * 684 * further processing of btree root is based on the copy 685 * in in-memory inode, where txLog() will log from, and, 686 * for xtree root, txUpdateMap() will update map and reset 687 * XAD_NEW bit; 688 */ 689 690 if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { 691 /* 692 * This is the special xtree inside the directory for storing 693 * the directory table 694 */ 695 xtpage_t *p, *xp; 696 xad_t *xad; 697 698 jfs_ip->xtlid = 0; 699 tlck = lid_to_tlock(lid); 700 assert(tlck->type & tlckXTREE); 701 tlck->type |= tlckBTROOT; 702 tlck->mp = mp; 703 ilinelock = (struct linelock *) & tlck->lock; 704 705 /* 706 * copy xtree root from inode to dinode: 707 */ 708 p = &jfs_ip->i_xtroot; 709 xp = (xtpage_t *) &dp->di_dirtable; 710 lv = ilinelock->lv; 711 for (n = 0; n < ilinelock->index; n++, lv++) { 712 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 713 lv->length << L2XTSLOTSIZE); 714 } 715 716 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 717 xad = &xp->xad[XTENTRYSTART]; 718 for (n = XTENTRYSTART; 719 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 720 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 721 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 722 } 723 724 if ((lid = jfs_ip->blid) == 0) 725 goto inlineData; 726 jfs_ip->blid = 0; 727 728 tlck = lid_to_tlock(lid); 729 type = tlck->type; 730 tlck->type |= tlckBTROOT; 731 tlck->mp = mp; 732 ilinelock = (struct linelock *) & tlck->lock; 733 734 /* 735 * regular file: 16 byte (XAD slot) granularity 736 */ 737 if (type & tlckXTREE) { 738 xtpage_t *p, *xp; 739 xad_t *xad; 740 741 /* 742 * copy xtree root from inode to dinode: 743 */ 744 p = &jfs_ip->i_xtroot; 745 xp = &dp->di_xtroot; 746 lv = ilinelock->lv; 747 for (n = 0; n < ilinelock->index; n++, lv++) { 748 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 749 lv->length << L2XTSLOTSIZE); 750 } 751 752 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 753 xad = &xp->xad[XTENTRYSTART]; 754 for (n = XTENTRYSTART; 755 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 756 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 757 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 758 } 759 /* 760 * directory: 32 byte (directory entry slot) granularity 761 */ 762 else if (type & tlckDTREE) { 763 dtpage_t *p, *xp; 764 765 /* 766 * copy dtree root from inode to dinode: 767 */ 768 p = (dtpage_t *) &jfs_ip->i_dtroot; 769 xp = (dtpage_t *) & dp->di_dtroot; 770 lv = ilinelock->lv; 771 for (n = 0; n < ilinelock->index; n++, lv++) { 772 memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], 773 lv->length << L2DTSLOTSIZE); 774 } 775 } else { 776 jfs_err("diWrite: UFO tlock"); 777 } 778 779 inlineData: 780 /* 781 * copy inline symlink from in-memory inode to on-disk inode 782 */ 783 if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) { 784 lv = & dilinelock->lv[dilinelock->index]; 785 lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE; 786 lv->length = 2; 787 memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE); 788 dilinelock->index++; 789 } 790 /* 791 * copy inline data from in-memory inode to on-disk inode: 792 * 128 byte slot granularity 793 */ 794 if (test_cflag(COMMIT_Inlineea, ip)) { 795 lv = & dilinelock->lv[dilinelock->index]; 796 lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; 797 lv->length = 1; 798 memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE); 799 dilinelock->index++; 800 801 clear_cflag(COMMIT_Inlineea, ip); 802 } 803 804 /* 805 * lock/copy inode base: 128 byte slot granularity 806 */ 807 // baseDinode: 808 lv = & dilinelock->lv[dilinelock->index]; 809 lv->offset = dioffset >> L2INODESLOTSIZE; 810 copy_to_dinode(dp, ip); 811 if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { 812 lv->length = 2; 813 memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); 814 } else 815 lv->length = 1; 816 dilinelock->index++; 817 818 #ifdef _JFS_FASTDASD 819 /* 820 * We aren't logging changes to the DASD used in directory inodes, 821 * but we need to write them to disk. If we don't unmount cleanly, 822 * mount will recalculate the DASD used. 823 */ 824 if (S_ISDIR(ip->i_mode) 825 && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED)) 826 memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd)); 827 #endif /* _JFS_FASTDASD */ 828 829 /* release the buffer holding the updated on-disk inode. 830 * the buffer will be later written by commit processing. 831 */ 832 write_metapage(mp); 833 834 return (rc); 835 } 836 837 838 /* 839 * NAME: diFree(ip) 840 * 841 * FUNCTION: free a specified inode from the inode working map 842 * for a fileset or aggregate. 843 * 844 * if the inode to be freed represents the first (only) 845 * free inode within the iag, the iag will be placed on 846 * the ag free inode list. 847 * 848 * freeing the inode will cause the inode extent to be 849 * freed if the inode is the only allocated inode within 850 * the extent. in this case all the disk resource backing 851 * up the inode extent will be freed. in addition, the iag 852 * will be placed on the ag extent free list if the extent 853 * is the first free extent in the iag. if freeing the 854 * extent also means that no free inodes will exist for 855 * the iag, the iag will also be removed from the ag free 856 * inode list. 857 * 858 * the iag describing the inode will be freed if the extent 859 * is to be freed and it is the only backed extent within 860 * the iag. in this case, the iag will be removed from the 861 * ag free extent list and ag free inode list and placed on 862 * the inode map's free iag list. 863 * 864 * a careful update approach is used to provide consistency 865 * in the face of updates to multiple buffers. under this 866 * approach, all required buffers are obtained before making 867 * any updates and are held until all updates are complete. 868 * 869 * PARAMETERS: 870 * ip - inode to be freed. 871 * 872 * RETURN VALUES: 873 * 0 - success 874 * -EIO - i/o error. 875 */ 876 int diFree(struct inode *ip) 877 { 878 int rc; 879 ino_t inum = ip->i_ino; 880 struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp; 881 struct metapage *mp, *amp, *bmp, *cmp, *dmp; 882 int iagno, ino, extno, bitno, sword, agno; 883 int back, fwd; 884 u32 bitmap, mask; 885 struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; 886 struct inomap *imap = JFS_IP(ipimap)->i_imap; 887 pxd_t freepxd; 888 tid_t tid; 889 struct inode *iplist[3]; 890 struct tlock *tlck; 891 struct pxd_lock *pxdlock; 892 893 /* 894 * This is just to suppress compiler warnings. The same logic that 895 * references these variables is used to initialize them. 896 */ 897 aiagp = biagp = ciagp = diagp = NULL; 898 899 /* get the iag number containing the inode. 900 */ 901 iagno = INOTOIAG(inum); 902 903 /* make sure that the iag is contained within 904 * the map. 905 */ 906 if (iagno >= imap->im_nextiag) { 907 dump_mem("imap", imap, 32); 908 jfs_error(ip->i_sb, 909 "diFree: inum = %d, iagno = %d, nextiag = %d", 910 (uint) inum, iagno, imap->im_nextiag); 911 return -EIO; 912 } 913 914 /* get the allocation group for this ino. 915 */ 916 agno = JFS_IP(ip)->agno; 917 918 /* Lock the AG specific inode map information 919 */ 920 AG_LOCK(imap, agno); 921 922 /* Obtain read lock in imap inode. Don't release it until we have 923 * read all of the IAG's that we are going to. 924 */ 925 IREAD_LOCK(ipimap); 926 927 /* read the iag. 928 */ 929 if ((rc = diIAGRead(imap, iagno, &mp))) { 930 IREAD_UNLOCK(ipimap); 931 AG_UNLOCK(imap, agno); 932 return (rc); 933 } 934 iagp = (struct iag *) mp->data; 935 936 /* get the inode number and extent number of the inode within 937 * the iag and the inode number within the extent. 938 */ 939 ino = inum & (INOSPERIAG - 1); 940 extno = ino >> L2INOSPEREXT; 941 bitno = ino & (INOSPEREXT - 1); 942 mask = HIGHORDER >> bitno; 943 944 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 945 jfs_error(ip->i_sb, 946 "diFree: wmap shows inode already free"); 947 } 948 949 if (!addressPXD(&iagp->inoext[extno])) { 950 release_metapage(mp); 951 IREAD_UNLOCK(ipimap); 952 AG_UNLOCK(imap, agno); 953 jfs_error(ip->i_sb, "diFree: invalid inoext"); 954 return -EIO; 955 } 956 957 /* compute the bitmap for the extent reflecting the freed inode. 958 */ 959 bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; 960 961 if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) { 962 release_metapage(mp); 963 IREAD_UNLOCK(ipimap); 964 AG_UNLOCK(imap, agno); 965 jfs_error(ip->i_sb, "diFree: numfree > numinos"); 966 return -EIO; 967 } 968 /* 969 * inode extent still has some inodes or below low water mark: 970 * keep the inode extent; 971 */ 972 if (bitmap || 973 imap->im_agctl[agno].numfree < 96 || 974 (imap->im_agctl[agno].numfree < 288 && 975 (((imap->im_agctl[agno].numfree * 100) / 976 imap->im_agctl[agno].numinos) <= 25))) { 977 /* if the iag currently has no free inodes (i.e., 978 * the inode being freed is the first free inode of iag), 979 * insert the iag at head of the inode free list for the ag. 980 */ 981 if (iagp->nfreeinos == 0) { 982 /* check if there are any iags on the ag inode 983 * free list. if so, read the first one so that 984 * we can link the current iag onto the list at 985 * the head. 986 */ 987 if ((fwd = imap->im_agctl[agno].inofree) >= 0) { 988 /* read the iag that currently is the head 989 * of the list. 990 */ 991 if ((rc = diIAGRead(imap, fwd, &))) { 992 IREAD_UNLOCK(ipimap); 993 AG_UNLOCK(imap, agno); 994 release_metapage(mp); 995 return (rc); 996 } 997 aiagp = (struct iag *) amp->data; 998 999 /* make current head point back to the iag. 1000 */ 1001 aiagp->inofreeback = cpu_to_le32(iagno); 1002 1003 write_metapage(amp); 1004 } 1005 1006 /* iag points forward to current head and iag 1007 * becomes the new head of the list. 1008 */ 1009 iagp->inofreefwd = 1010 cpu_to_le32(imap->im_agctl[agno].inofree); 1011 iagp->inofreeback = cpu_to_le32(-1); 1012 imap->im_agctl[agno].inofree = iagno; 1013 } 1014 IREAD_UNLOCK(ipimap); 1015 1016 /* update the free inode summary map for the extent if 1017 * freeing the inode means the extent will now have free 1018 * inodes (i.e., the inode being freed is the first free 1019 * inode of extent), 1020 */ 1021 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 1022 sword = extno >> L2EXTSPERSUM; 1023 bitno = extno & (EXTSPERSUM - 1); 1024 iagp->inosmap[sword] &= 1025 cpu_to_le32(~(HIGHORDER >> bitno)); 1026 } 1027 1028 /* update the bitmap. 1029 */ 1030 iagp->wmap[extno] = cpu_to_le32(bitmap); 1031 1032 /* update the free inode counts at the iag, ag and 1033 * map level. 1034 */ 1035 iagp->nfreeinos = 1036 cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1); 1037 imap->im_agctl[agno].numfree += 1; 1038 atomic_inc(&imap->im_numfree); 1039 1040 /* release the AG inode map lock 1041 */ 1042 AG_UNLOCK(imap, agno); 1043 1044 /* write the iag */ 1045 write_metapage(mp); 1046 1047 return (0); 1048 } 1049 1050 1051 /* 1052 * inode extent has become free and above low water mark: 1053 * free the inode extent; 1054 */ 1055 1056 /* 1057 * prepare to update iag list(s) (careful update step 1) 1058 */ 1059 amp = bmp = cmp = dmp = NULL; 1060 fwd = back = -1; 1061 1062 /* check if the iag currently has no free extents. if so, 1063 * it will be placed on the head of the ag extent free list. 1064 */ 1065 if (iagp->nfreeexts == 0) { 1066 /* check if the ag extent free list has any iags. 1067 * if so, read the iag at the head of the list now. 1068 * this (head) iag will be updated later to reflect 1069 * the addition of the current iag at the head of 1070 * the list. 1071 */ 1072 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 1073 if ((rc = diIAGRead(imap, fwd, &))) 1074 goto error_out; 1075 aiagp = (struct iag *) amp->data; 1076 } 1077 } else { 1078 /* iag has free extents. check if the addition of a free 1079 * extent will cause all extents to be free within this 1080 * iag. if so, the iag will be removed from the ag extent 1081 * free list and placed on the inode map's free iag list. 1082 */ 1083 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1084 /* in preparation for removing the iag from the 1085 * ag extent free list, read the iags preceeding 1086 * and following the iag on the ag extent free 1087 * list. 1088 */ 1089 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 1090 if ((rc = diIAGRead(imap, fwd, &))) 1091 goto error_out; 1092 aiagp = (struct iag *) amp->data; 1093 } 1094 1095 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 1096 if ((rc = diIAGRead(imap, back, &bmp))) 1097 goto error_out; 1098 biagp = (struct iag *) bmp->data; 1099 } 1100 } 1101 } 1102 1103 /* remove the iag from the ag inode free list if freeing 1104 * this extent cause the iag to have no free inodes. 1105 */ 1106 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1107 int inofreeback = le32_to_cpu(iagp->inofreeback); 1108 int inofreefwd = le32_to_cpu(iagp->inofreefwd); 1109 1110 /* in preparation for removing the iag from the 1111 * ag inode free list, read the iags preceeding 1112 * and following the iag on the ag inode free 1113 * list. before reading these iags, we must make 1114 * sure that we already don't have them in hand 1115 * from up above, since re-reading an iag (buffer) 1116 * we are currently holding would cause a deadlock. 1117 */ 1118 if (inofreefwd >= 0) { 1119 1120 if (inofreefwd == fwd) 1121 ciagp = (struct iag *) amp->data; 1122 else if (inofreefwd == back) 1123 ciagp = (struct iag *) bmp->data; 1124 else { 1125 if ((rc = 1126 diIAGRead(imap, inofreefwd, &cmp))) 1127 goto error_out; 1128 ciagp = (struct iag *) cmp->data; 1129 } 1130 assert(ciagp != NULL); 1131 } 1132 1133 if (inofreeback >= 0) { 1134 if (inofreeback == fwd) 1135 diagp = (struct iag *) amp->data; 1136 else if (inofreeback == back) 1137 diagp = (struct iag *) bmp->data; 1138 else { 1139 if ((rc = 1140 diIAGRead(imap, inofreeback, &dmp))) 1141 goto error_out; 1142 diagp = (struct iag *) dmp->data; 1143 } 1144 assert(diagp != NULL); 1145 } 1146 } 1147 1148 IREAD_UNLOCK(ipimap); 1149 1150 /* 1151 * invalidate any page of the inode extent freed from buffer cache; 1152 */ 1153 freepxd = iagp->inoext[extno]; 1154 invalidate_pxd_metapages(ip, freepxd); 1155 1156 /* 1157 * update iag list(s) (careful update step 2) 1158 */ 1159 /* add the iag to the ag extent free list if this is the 1160 * first free extent for the iag. 1161 */ 1162 if (iagp->nfreeexts == 0) { 1163 if (fwd >= 0) 1164 aiagp->extfreeback = cpu_to_le32(iagno); 1165 1166 iagp->extfreefwd = 1167 cpu_to_le32(imap->im_agctl[agno].extfree); 1168 iagp->extfreeback = cpu_to_le32(-1); 1169 imap->im_agctl[agno].extfree = iagno; 1170 } else { 1171 /* remove the iag from the ag extent list if all extents 1172 * are now free and place it on the inode map iag free list. 1173 */ 1174 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1175 if (fwd >= 0) 1176 aiagp->extfreeback = iagp->extfreeback; 1177 1178 if (back >= 0) 1179 biagp->extfreefwd = iagp->extfreefwd; 1180 else 1181 imap->im_agctl[agno].extfree = 1182 le32_to_cpu(iagp->extfreefwd); 1183 1184 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 1185 1186 IAGFREE_LOCK(imap); 1187 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1188 imap->im_freeiag = iagno; 1189 IAGFREE_UNLOCK(imap); 1190 } 1191 } 1192 1193 /* remove the iag from the ag inode free list if freeing 1194 * this extent causes the iag to have no free inodes. 1195 */ 1196 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1197 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) 1198 ciagp->inofreeback = iagp->inofreeback; 1199 1200 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) 1201 diagp->inofreefwd = iagp->inofreefwd; 1202 else 1203 imap->im_agctl[agno].inofree = 1204 le32_to_cpu(iagp->inofreefwd); 1205 1206 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 1207 } 1208 1209 /* update the inode extent address and working map 1210 * to reflect the free extent. 1211 * the permanent map should have been updated already 1212 * for the inode being freed. 1213 */ 1214 if (iagp->pmap[extno] != 0) { 1215 jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); 1216 } 1217 iagp->wmap[extno] = 0; 1218 PXDlength(&iagp->inoext[extno], 0); 1219 PXDaddress(&iagp->inoext[extno], 0); 1220 1221 /* update the free extent and free inode summary maps 1222 * to reflect the freed extent. 1223 * the inode summary map is marked to indicate no inodes 1224 * available for the freed extent. 1225 */ 1226 sword = extno >> L2EXTSPERSUM; 1227 bitno = extno & (EXTSPERSUM - 1); 1228 mask = HIGHORDER >> bitno; 1229 iagp->inosmap[sword] |= cpu_to_le32(mask); 1230 iagp->extsmap[sword] &= cpu_to_le32(~mask); 1231 1232 /* update the number of free inodes and number of free extents 1233 * for the iag. 1234 */ 1235 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1236 (INOSPEREXT - 1)); 1237 iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) + 1); 1238 1239 /* update the number of free inodes and backed inodes 1240 * at the ag and inode map level. 1241 */ 1242 imap->im_agctl[agno].numfree -= (INOSPEREXT - 1); 1243 imap->im_agctl[agno].numinos -= INOSPEREXT; 1244 atomic_sub(INOSPEREXT - 1, &imap->im_numfree); 1245 atomic_sub(INOSPEREXT, &imap->im_numinos); 1246 1247 if (amp) 1248 write_metapage(amp); 1249 if (bmp) 1250 write_metapage(bmp); 1251 if (cmp) 1252 write_metapage(cmp); 1253 if (dmp) 1254 write_metapage(dmp); 1255 1256 /* 1257 * start transaction to update block allocation map 1258 * for the inode extent freed; 1259 * 1260 * N.B. AG_LOCK is released and iag will be released below, and 1261 * other thread may allocate inode from/reusing the ixad freed 1262 * BUT with new/different backing inode extent from the extent 1263 * to be freed by the transaction; 1264 */ 1265 tid = txBegin(ipimap->i_sb, COMMIT_FORCE); 1266 down(&JFS_IP(ipimap)->commit_sem); 1267 1268 /* acquire tlock of the iag page of the freed ixad 1269 * to force the page NOHOMEOK (even though no data is 1270 * logged from the iag page) until NOREDOPAGE|FREEXTENT log 1271 * for the free of the extent is committed; 1272 * write FREEXTENT|NOREDOPAGE log record 1273 * N.B. linelock is overlaid as freed extent descriptor; 1274 */ 1275 tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE); 1276 pxdlock = (struct pxd_lock *) & tlck->lock; 1277 pxdlock->flag = mlckFREEPXD; 1278 pxdlock->pxd = freepxd; 1279 pxdlock->index = 1; 1280 1281 write_metapage(mp); 1282 1283 iplist[0] = ipimap; 1284 1285 /* 1286 * logredo needs the IAG number and IAG extent index in order 1287 * to ensure that the IMap is consistent. The least disruptive 1288 * way to pass these values through to the transaction manager 1289 * is in the iplist array. 1290 * 1291 * It's not pretty, but it works. 1292 */ 1293 iplist[1] = (struct inode *) (size_t)iagno; 1294 iplist[2] = (struct inode *) (size_t)extno; 1295 1296 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 1297 1298 txEnd(tid); 1299 up(&JFS_IP(ipimap)->commit_sem); 1300 1301 /* unlock the AG inode map information */ 1302 AG_UNLOCK(imap, agno); 1303 1304 return (0); 1305 1306 error_out: 1307 IREAD_UNLOCK(ipimap); 1308 1309 if (amp) 1310 release_metapage(amp); 1311 if (bmp) 1312 release_metapage(bmp); 1313 if (cmp) 1314 release_metapage(cmp); 1315 if (dmp) 1316 release_metapage(dmp); 1317 1318 AG_UNLOCK(imap, agno); 1319 1320 release_metapage(mp); 1321 1322 return (rc); 1323 } 1324 1325 /* 1326 * There are several places in the diAlloc* routines where we initialize 1327 * the inode. 1328 */ 1329 static inline void 1330 diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) 1331 { 1332 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 1333 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 1334 1335 ip->i_ino = (iagno << L2INOSPERIAG) + ino; 1336 jfs_ip->ixpxd = iagp->inoext[extno]; 1337 jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 1338 jfs_ip->active_ag = -1; 1339 } 1340 1341 1342 /* 1343 * NAME: diAlloc(pip,dir,ip) 1344 * 1345 * FUNCTION: allocate a disk inode from the inode working map 1346 * for a fileset or aggregate. 1347 * 1348 * PARAMETERS: 1349 * pip - pointer to incore inode for the parent inode. 1350 * dir - TRUE if the new disk inode is for a directory. 1351 * ip - pointer to a new inode 1352 * 1353 * RETURN VALUES: 1354 * 0 - success. 1355 * -ENOSPC - insufficient disk resources. 1356 * -EIO - i/o error. 1357 */ 1358 int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) 1359 { 1360 int rc, ino, iagno, addext, extno, bitno, sword; 1361 int nwords, rem, i, agno; 1362 u32 mask, inosmap, extsmap; 1363 struct inode *ipimap; 1364 struct metapage *mp; 1365 ino_t inum; 1366 struct iag *iagp; 1367 struct inomap *imap; 1368 1369 /* get the pointers to the inode map inode and the 1370 * corresponding imap control structure. 1371 */ 1372 ipimap = JFS_SBI(pip->i_sb)->ipimap; 1373 imap = JFS_IP(ipimap)->i_imap; 1374 JFS_IP(ip)->ipimap = ipimap; 1375 JFS_IP(ip)->fileset = FILESYSTEM_I; 1376 1377 /* for a directory, the allocation policy is to start 1378 * at the ag level using the preferred ag. 1379 */ 1380 if (dir == TRUE) { 1381 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1382 AG_LOCK(imap, agno); 1383 goto tryag; 1384 } 1385 1386 /* for files, the policy starts off by trying to allocate from 1387 * the same iag containing the parent disk inode: 1388 * try to allocate the new disk inode close to the parent disk 1389 * inode, using parent disk inode number + 1 as the allocation 1390 * hint. (we use a left-to-right policy to attempt to avoid 1391 * moving backward on the disk.) compute the hint within the 1392 * file system and the iag. 1393 */ 1394 1395 /* get the ag number of this iag */ 1396 agno = JFS_IP(pip)->agno; 1397 1398 if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { 1399 /* 1400 * There is an open file actively growing. We want to 1401 * allocate new inodes from a different ag to avoid 1402 * fragmentation problems. 1403 */ 1404 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1405 AG_LOCK(imap, agno); 1406 goto tryag; 1407 } 1408 1409 inum = pip->i_ino + 1; 1410 ino = inum & (INOSPERIAG - 1); 1411 1412 /* back off the the hint if it is outside of the iag */ 1413 if (ino == 0) 1414 inum = pip->i_ino; 1415 1416 /* lock the AG inode map information */ 1417 AG_LOCK(imap, agno); 1418 1419 /* Get read lock on imap inode */ 1420 IREAD_LOCK(ipimap); 1421 1422 /* get the iag number and read the iag */ 1423 iagno = INOTOIAG(inum); 1424 if ((rc = diIAGRead(imap, iagno, &mp))) { 1425 IREAD_UNLOCK(ipimap); 1426 AG_UNLOCK(imap, agno); 1427 return (rc); 1428 } 1429 iagp = (struct iag *) mp->data; 1430 1431 /* determine if new inode extent is allowed to be added to the iag. 1432 * new inode extent can be added to the iag if the ag 1433 * has less than 32 free disk inodes and the iag has free extents. 1434 */ 1435 addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); 1436 1437 /* 1438 * try to allocate from the IAG 1439 */ 1440 /* check if the inode may be allocated from the iag 1441 * (i.e. the inode has free inodes or new extent can be added). 1442 */ 1443 if (iagp->nfreeinos || addext) { 1444 /* determine the extent number of the hint. 1445 */ 1446 extno = ino >> L2INOSPEREXT; 1447 1448 /* check if the extent containing the hint has backed 1449 * inodes. if so, try to allocate within this extent. 1450 */ 1451 if (addressPXD(&iagp->inoext[extno])) { 1452 bitno = ino & (INOSPEREXT - 1); 1453 if ((bitno = 1454 diFindFree(le32_to_cpu(iagp->wmap[extno]), 1455 bitno)) 1456 < INOSPEREXT) { 1457 ino = (extno << L2INOSPEREXT) + bitno; 1458 1459 /* a free inode (bit) was found within this 1460 * extent, so allocate it. 1461 */ 1462 rc = diAllocBit(imap, iagp, ino); 1463 IREAD_UNLOCK(ipimap); 1464 if (rc) { 1465 assert(rc == -EIO); 1466 } else { 1467 /* set the results of the allocation 1468 * and write the iag. 1469 */ 1470 diInitInode(ip, iagno, ino, extno, 1471 iagp); 1472 mark_metapage_dirty(mp); 1473 } 1474 release_metapage(mp); 1475 1476 /* free the AG lock and return. 1477 */ 1478 AG_UNLOCK(imap, agno); 1479 return (rc); 1480 } 1481 1482 if (!addext) 1483 extno = 1484 (extno == 1485 EXTSPERIAG - 1) ? 0 : extno + 1; 1486 } 1487 1488 /* 1489 * no free inodes within the extent containing the hint. 1490 * 1491 * try to allocate from the backed extents following 1492 * hint or, if appropriate (i.e. addext is true), allocate 1493 * an extent of free inodes at or following the extent 1494 * containing the hint. 1495 * 1496 * the free inode and free extent summary maps are used 1497 * here, so determine the starting summary map position 1498 * and the number of words we'll have to examine. again, 1499 * the approach is to allocate following the hint, so we 1500 * might have to initially ignore prior bits of the summary 1501 * map that represent extents prior to the extent containing 1502 * the hint and later revisit these bits. 1503 */ 1504 bitno = extno & (EXTSPERSUM - 1); 1505 nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1; 1506 sword = extno >> L2EXTSPERSUM; 1507 1508 /* mask any prior bits for the starting words of the 1509 * summary map. 1510 */ 1511 mask = ONES << (EXTSPERSUM - bitno); 1512 inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; 1513 extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; 1514 1515 /* scan the free inode and free extent summary maps for 1516 * free resources. 1517 */ 1518 for (i = 0; i < nwords; i++) { 1519 /* check if this word of the free inode summary 1520 * map describes an extent with free inodes. 1521 */ 1522 if (~inosmap) { 1523 /* an extent with free inodes has been 1524 * found. determine the extent number 1525 * and the inode number within the extent. 1526 */ 1527 rem = diFindFree(inosmap, 0); 1528 extno = (sword << L2EXTSPERSUM) + rem; 1529 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 1530 0); 1531 if (rem >= INOSPEREXT) { 1532 IREAD_UNLOCK(ipimap); 1533 release_metapage(mp); 1534 AG_UNLOCK(imap, agno); 1535 jfs_error(ip->i_sb, 1536 "diAlloc: can't find free bit " 1537 "in wmap"); 1538 return EIO; 1539 } 1540 1541 /* determine the inode number within the 1542 * iag and allocate the inode from the 1543 * map. 1544 */ 1545 ino = (extno << L2INOSPEREXT) + rem; 1546 rc = diAllocBit(imap, iagp, ino); 1547 IREAD_UNLOCK(ipimap); 1548 if (rc) 1549 assert(rc == -EIO); 1550 else { 1551 /* set the results of the allocation 1552 * and write the iag. 1553 */ 1554 diInitInode(ip, iagno, ino, extno, 1555 iagp); 1556 mark_metapage_dirty(mp); 1557 } 1558 release_metapage(mp); 1559 1560 /* free the AG lock and return. 1561 */ 1562 AG_UNLOCK(imap, agno); 1563 return (rc); 1564 1565 } 1566 1567 /* check if we may allocate an extent of free 1568 * inodes and whether this word of the free 1569 * extents summary map describes a free extent. 1570 */ 1571 if (addext && ~extsmap) { 1572 /* a free extent has been found. determine 1573 * the extent number. 1574 */ 1575 rem = diFindFree(extsmap, 0); 1576 extno = (sword << L2EXTSPERSUM) + rem; 1577 1578 /* allocate an extent of free inodes. 1579 */ 1580 if ((rc = diNewExt(imap, iagp, extno))) { 1581 /* if there is no disk space for a 1582 * new extent, try to allocate the 1583 * disk inode from somewhere else. 1584 */ 1585 if (rc == -ENOSPC) 1586 break; 1587 1588 assert(rc == -EIO); 1589 } else { 1590 /* set the results of the allocation 1591 * and write the iag. 1592 */ 1593 diInitInode(ip, iagno, 1594 extno << L2INOSPEREXT, 1595 extno, iagp); 1596 mark_metapage_dirty(mp); 1597 } 1598 release_metapage(mp); 1599 /* free the imap inode & the AG lock & return. 1600 */ 1601 IREAD_UNLOCK(ipimap); 1602 AG_UNLOCK(imap, agno); 1603 return (rc); 1604 } 1605 1606 /* move on to the next set of summary map words. 1607 */ 1608 sword = (sword == SMAPSZ - 1) ? 0 : sword + 1; 1609 inosmap = le32_to_cpu(iagp->inosmap[sword]); 1610 extsmap = le32_to_cpu(iagp->extsmap[sword]); 1611 } 1612 } 1613 /* unlock imap inode */ 1614 IREAD_UNLOCK(ipimap); 1615 1616 /* nothing doing in this iag, so release it. */ 1617 release_metapage(mp); 1618 1619 tryag: 1620 /* 1621 * try to allocate anywhere within the same AG as the parent inode. 1622 */ 1623 rc = diAllocAG(imap, agno, dir, ip); 1624 1625 AG_UNLOCK(imap, agno); 1626 1627 if (rc != -ENOSPC) 1628 return (rc); 1629 1630 /* 1631 * try to allocate in any AG. 1632 */ 1633 return (diAllocAny(imap, agno, dir, ip)); 1634 } 1635 1636 1637 /* 1638 * NAME: diAllocAG(imap,agno,dir,ip) 1639 * 1640 * FUNCTION: allocate a disk inode from the allocation group. 1641 * 1642 * this routine first determines if a new extent of free 1643 * inodes should be added for the allocation group, with 1644 * the current request satisfied from this extent. if this 1645 * is the case, an attempt will be made to do just that. if 1646 * this attempt fails or it has been determined that a new 1647 * extent should not be added, an attempt is made to satisfy 1648 * the request by allocating an existing (backed) free inode 1649 * from the allocation group. 1650 * 1651 * PRE CONDITION: Already have the AG lock for this AG. 1652 * 1653 * PARAMETERS: 1654 * imap - pointer to inode map control structure. 1655 * agno - allocation group to allocate from. 1656 * dir - TRUE if the new disk inode is for a directory. 1657 * ip - pointer to the new inode to be filled in on successful return 1658 * with the disk inode number allocated, its extent address 1659 * and the start of the ag. 1660 * 1661 * RETURN VALUES: 1662 * 0 - success. 1663 * -ENOSPC - insufficient disk resources. 1664 * -EIO - i/o error. 1665 */ 1666 static int 1667 diAllocAG(struct inomap * imap, int agno, boolean_t dir, struct inode *ip) 1668 { 1669 int rc, addext, numfree, numinos; 1670 1671 /* get the number of free and the number of backed disk 1672 * inodes currently within the ag. 1673 */ 1674 numfree = imap->im_agctl[agno].numfree; 1675 numinos = imap->im_agctl[agno].numinos; 1676 1677 if (numfree > numinos) { 1678 jfs_error(ip->i_sb, "diAllocAG: numfree > numinos"); 1679 return -EIO; 1680 } 1681 1682 /* determine if we should allocate a new extent of free inodes 1683 * within the ag: for directory inodes, add a new extent 1684 * if there are a small number of free inodes or number of free 1685 * inodes is a small percentage of the number of backed inodes. 1686 */ 1687 if (dir == TRUE) 1688 addext = (numfree < 64 || 1689 (numfree < 256 1690 && ((numfree * 100) / numinos) <= 20)); 1691 else 1692 addext = (numfree == 0); 1693 1694 /* 1695 * try to allocate a new extent of free inodes. 1696 */ 1697 if (addext) { 1698 /* if free space is not avaliable for this new extent, try 1699 * below to allocate a free and existing (already backed) 1700 * inode from the ag. 1701 */ 1702 if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC) 1703 return (rc); 1704 } 1705 1706 /* 1707 * try to allocate an existing free inode from the ag. 1708 */ 1709 return (diAllocIno(imap, agno, ip)); 1710 } 1711 1712 1713 /* 1714 * NAME: diAllocAny(imap,agno,dir,iap) 1715 * 1716 * FUNCTION: allocate a disk inode from any other allocation group. 1717 * 1718 * this routine is called when an allocation attempt within 1719 * the primary allocation group has failed. if attempts to 1720 * allocate an inode from any allocation group other than the 1721 * specified primary group. 1722 * 1723 * PARAMETERS: 1724 * imap - pointer to inode map control structure. 1725 * agno - primary allocation group (to avoid). 1726 * dir - TRUE if the new disk inode is for a directory. 1727 * ip - pointer to a new inode to be filled in on successful return 1728 * with the disk inode number allocated, its extent address 1729 * and the start of the ag. 1730 * 1731 * RETURN VALUES: 1732 * 0 - success. 1733 * -ENOSPC - insufficient disk resources. 1734 * -EIO - i/o error. 1735 */ 1736 static int 1737 diAllocAny(struct inomap * imap, int agno, boolean_t dir, struct inode *ip) 1738 { 1739 int ag, rc; 1740 int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; 1741 1742 1743 /* try to allocate from the ags following agno up to 1744 * the maximum ag number. 1745 */ 1746 for (ag = agno + 1; ag <= maxag; ag++) { 1747 AG_LOCK(imap, ag); 1748 1749 rc = diAllocAG(imap, ag, dir, ip); 1750 1751 AG_UNLOCK(imap, ag); 1752 1753 if (rc != -ENOSPC) 1754 return (rc); 1755 } 1756 1757 /* try to allocate from the ags in front of agno. 1758 */ 1759 for (ag = 0; ag < agno; ag++) { 1760 AG_LOCK(imap, ag); 1761 1762 rc = diAllocAG(imap, ag, dir, ip); 1763 1764 AG_UNLOCK(imap, ag); 1765 1766 if (rc != -ENOSPC) 1767 return (rc); 1768 } 1769 1770 /* no free disk inodes. 1771 */ 1772 return -ENOSPC; 1773 } 1774 1775 1776 /* 1777 * NAME: diAllocIno(imap,agno,ip) 1778 * 1779 * FUNCTION: allocate a disk inode from the allocation group's free 1780 * inode list, returning an error if this free list is 1781 * empty (i.e. no iags on the list). 1782 * 1783 * allocation occurs from the first iag on the list using 1784 * the iag's free inode summary map to find the leftmost 1785 * free inode in the iag. 1786 * 1787 * PRE CONDITION: Already have AG lock for this AG. 1788 * 1789 * PARAMETERS: 1790 * imap - pointer to inode map control structure. 1791 * agno - allocation group. 1792 * ip - pointer to new inode to be filled in on successful return 1793 * with the disk inode number allocated, its extent address 1794 * and the start of the ag. 1795 * 1796 * RETURN VALUES: 1797 * 0 - success. 1798 * -ENOSPC - insufficient disk resources. 1799 * -EIO - i/o error. 1800 */ 1801 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) 1802 { 1803 int iagno, ino, rc, rem, extno, sword; 1804 struct metapage *mp; 1805 struct iag *iagp; 1806 1807 /* check if there are iags on the ag's free inode list. 1808 */ 1809 if ((iagno = imap->im_agctl[agno].inofree) < 0) 1810 return -ENOSPC; 1811 1812 /* obtain read lock on imap inode */ 1813 IREAD_LOCK(imap->im_ipimap); 1814 1815 /* read the iag at the head of the list. 1816 */ 1817 if ((rc = diIAGRead(imap, iagno, &mp))) { 1818 IREAD_UNLOCK(imap->im_ipimap); 1819 return (rc); 1820 } 1821 iagp = (struct iag *) mp->data; 1822 1823 /* better be free inodes in this iag if it is on the 1824 * list. 1825 */ 1826 if (!iagp->nfreeinos) { 1827 IREAD_UNLOCK(imap->im_ipimap); 1828 release_metapage(mp); 1829 jfs_error(ip->i_sb, 1830 "diAllocIno: nfreeinos = 0, but iag on freelist"); 1831 return -EIO; 1832 } 1833 1834 /* scan the free inode summary map to find an extent 1835 * with free inodes. 1836 */ 1837 for (sword = 0;; sword++) { 1838 if (sword >= SMAPSZ) { 1839 IREAD_UNLOCK(imap->im_ipimap); 1840 release_metapage(mp); 1841 jfs_error(ip->i_sb, 1842 "diAllocIno: free inode not found in summary map"); 1843 return -EIO; 1844 } 1845 1846 if (~iagp->inosmap[sword]) 1847 break; 1848 } 1849 1850 /* found a extent with free inodes. determine 1851 * the extent number. 1852 */ 1853 rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0); 1854 if (rem >= EXTSPERSUM) { 1855 IREAD_UNLOCK(imap->im_ipimap); 1856 release_metapage(mp); 1857 jfs_error(ip->i_sb, "diAllocIno: no free extent found"); 1858 return -EIO; 1859 } 1860 extno = (sword << L2EXTSPERSUM) + rem; 1861 1862 /* find the first free inode in the extent. 1863 */ 1864 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); 1865 if (rem >= INOSPEREXT) { 1866 IREAD_UNLOCK(imap->im_ipimap); 1867 release_metapage(mp); 1868 jfs_error(ip->i_sb, "diAllocIno: free inode not found"); 1869 return -EIO; 1870 } 1871 1872 /* compute the inode number within the iag. 1873 */ 1874 ino = (extno << L2INOSPEREXT) + rem; 1875 1876 /* allocate the inode. 1877 */ 1878 rc = diAllocBit(imap, iagp, ino); 1879 IREAD_UNLOCK(imap->im_ipimap); 1880 if (rc) { 1881 release_metapage(mp); 1882 return (rc); 1883 } 1884 1885 /* set the results of the allocation and write the iag. 1886 */ 1887 diInitInode(ip, iagno, ino, extno, iagp); 1888 write_metapage(mp); 1889 1890 return (0); 1891 } 1892 1893 1894 /* 1895 * NAME: diAllocExt(imap,agno,ip) 1896 * 1897 * FUNCTION: add a new extent of free inodes to an iag, allocating 1898 * an inode from this extent to satisfy the current allocation 1899 * request. 1900 * 1901 * this routine first tries to find an existing iag with free 1902 * extents through the ag free extent list. if list is not 1903 * empty, the head of the list will be selected as the home 1904 * of the new extent of free inodes. otherwise (the list is 1905 * empty), a new iag will be allocated for the ag to contain 1906 * the extent. 1907 * 1908 * once an iag has been selected, the free extent summary map 1909 * is used to locate a free extent within the iag and diNewExt() 1910 * is called to initialize the extent, with initialization 1911 * including the allocation of the first inode of the extent 1912 * for the purpose of satisfying this request. 1913 * 1914 * PARAMETERS: 1915 * imap - pointer to inode map control structure. 1916 * agno - allocation group number. 1917 * ip - pointer to new inode to be filled in on successful return 1918 * with the disk inode number allocated, its extent address 1919 * and the start of the ag. 1920 * 1921 * RETURN VALUES: 1922 * 0 - success. 1923 * -ENOSPC - insufficient disk resources. 1924 * -EIO - i/o error. 1925 */ 1926 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) 1927 { 1928 int rem, iagno, sword, extno, rc; 1929 struct metapage *mp; 1930 struct iag *iagp; 1931 1932 /* check if the ag has any iags with free extents. if not, 1933 * allocate a new iag for the ag. 1934 */ 1935 if ((iagno = imap->im_agctl[agno].extfree) < 0) { 1936 /* If successful, diNewIAG will obtain the read lock on the 1937 * imap inode. 1938 */ 1939 if ((rc = diNewIAG(imap, &iagno, agno, &mp))) { 1940 return (rc); 1941 } 1942 iagp = (struct iag *) mp->data; 1943 1944 /* set the ag number if this a brand new iag 1945 */ 1946 iagp->agstart = 1947 cpu_to_le64(AGTOBLK(agno, imap->im_ipimap)); 1948 } else { 1949 /* read the iag. 1950 */ 1951 IREAD_LOCK(imap->im_ipimap); 1952 if ((rc = diIAGRead(imap, iagno, &mp))) { 1953 IREAD_UNLOCK(imap->im_ipimap); 1954 jfs_error(ip->i_sb, "diAllocExt: error reading iag"); 1955 return rc; 1956 } 1957 iagp = (struct iag *) mp->data; 1958 } 1959 1960 /* using the free extent summary map, find a free extent. 1961 */ 1962 for (sword = 0;; sword++) { 1963 if (sword >= SMAPSZ) { 1964 release_metapage(mp); 1965 IREAD_UNLOCK(imap->im_ipimap); 1966 jfs_error(ip->i_sb, 1967 "diAllocExt: free ext summary map not found"); 1968 return -EIO; 1969 } 1970 if (~iagp->extsmap[sword]) 1971 break; 1972 } 1973 1974 /* determine the extent number of the free extent. 1975 */ 1976 rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0); 1977 if (rem >= EXTSPERSUM) { 1978 release_metapage(mp); 1979 IREAD_UNLOCK(imap->im_ipimap); 1980 jfs_error(ip->i_sb, "diAllocExt: free extent not found"); 1981 return -EIO; 1982 } 1983 extno = (sword << L2EXTSPERSUM) + rem; 1984 1985 /* initialize the new extent. 1986 */ 1987 rc = diNewExt(imap, iagp, extno); 1988 IREAD_UNLOCK(imap->im_ipimap); 1989 if (rc) { 1990 /* something bad happened. if a new iag was allocated, 1991 * place it back on the inode map's iag free list, and 1992 * clear the ag number information. 1993 */ 1994 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 1995 IAGFREE_LOCK(imap); 1996 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1997 imap->im_freeiag = iagno; 1998 IAGFREE_UNLOCK(imap); 1999 } 2000 write_metapage(mp); 2001 return (rc); 2002 } 2003 2004 /* set the results of the allocation and write the iag. 2005 */ 2006 diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); 2007 2008 write_metapage(mp); 2009 2010 return (0); 2011 } 2012 2013 2014 /* 2015 * NAME: diAllocBit(imap,iagp,ino) 2016 * 2017 * FUNCTION: allocate a backed inode from an iag. 2018 * 2019 * this routine performs the mechanics of allocating a 2020 * specified inode from a backed extent. 2021 * 2022 * if the inode to be allocated represents the last free 2023 * inode within the iag, the iag will be removed from the 2024 * ag free inode list. 2025 * 2026 * a careful update approach is used to provide consistency 2027 * in the face of updates to multiple buffers. under this 2028 * approach, all required buffers are obtained before making 2029 * any updates and are held all are updates are complete. 2030 * 2031 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2032 * this AG. Must have read lock on imap inode. 2033 * 2034 * PARAMETERS: 2035 * imap - pointer to inode map control structure. 2036 * iagp - pointer to iag. 2037 * ino - inode number to be allocated within the iag. 2038 * 2039 * RETURN VALUES: 2040 * 0 - success. 2041 * -ENOSPC - insufficient disk resources. 2042 * -EIO - i/o error. 2043 */ 2044 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) 2045 { 2046 int extno, bitno, agno, sword, rc; 2047 struct metapage *amp = NULL, *bmp = NULL; 2048 struct iag *aiagp = NULL, *biagp = NULL; 2049 u32 mask; 2050 2051 /* check if this is the last free inode within the iag. 2052 * if so, it will have to be removed from the ag free 2053 * inode list, so get the iags preceeding and following 2054 * it on the list. 2055 */ 2056 if (iagp->nfreeinos == cpu_to_le32(1)) { 2057 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) { 2058 if ((rc = 2059 diIAGRead(imap, le32_to_cpu(iagp->inofreefwd), 2060 &))) 2061 return (rc); 2062 aiagp = (struct iag *) amp->data; 2063 } 2064 2065 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) { 2066 if ((rc = 2067 diIAGRead(imap, 2068 le32_to_cpu(iagp->inofreeback), 2069 &bmp))) { 2070 if (amp) 2071 release_metapage(amp); 2072 return (rc); 2073 } 2074 biagp = (struct iag *) bmp->data; 2075 } 2076 } 2077 2078 /* get the ag number, extent number, inode number within 2079 * the extent. 2080 */ 2081 agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb)); 2082 extno = ino >> L2INOSPEREXT; 2083 bitno = ino & (INOSPEREXT - 1); 2084 2085 /* compute the mask for setting the map. 2086 */ 2087 mask = HIGHORDER >> bitno; 2088 2089 /* the inode should be free and backed. 2090 */ 2091 if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) || 2092 ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) || 2093 (addressPXD(&iagp->inoext[extno]) == 0)) { 2094 if (amp) 2095 release_metapage(amp); 2096 if (bmp) 2097 release_metapage(bmp); 2098 2099 jfs_error(imap->im_ipimap->i_sb, 2100 "diAllocBit: iag inconsistent"); 2101 return -EIO; 2102 } 2103 2104 /* mark the inode as allocated in the working map. 2105 */ 2106 iagp->wmap[extno] |= cpu_to_le32(mask); 2107 2108 /* check if all inodes within the extent are now 2109 * allocated. if so, update the free inode summary 2110 * map to reflect this. 2111 */ 2112 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 2113 sword = extno >> L2EXTSPERSUM; 2114 bitno = extno & (EXTSPERSUM - 1); 2115 iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno); 2116 } 2117 2118 /* if this was the last free inode in the iag, remove the 2119 * iag from the ag free inode list. 2120 */ 2121 if (iagp->nfreeinos == cpu_to_le32(1)) { 2122 if (amp) { 2123 aiagp->inofreeback = iagp->inofreeback; 2124 write_metapage(amp); 2125 } 2126 2127 if (bmp) { 2128 biagp->inofreefwd = iagp->inofreefwd; 2129 write_metapage(bmp); 2130 } else { 2131 imap->im_agctl[agno].inofree = 2132 le32_to_cpu(iagp->inofreefwd); 2133 } 2134 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2135 } 2136 2137 /* update the free inode count at the iag, ag, inode 2138 * map levels. 2139 */ 2140 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1); 2141 imap->im_agctl[agno].numfree -= 1; 2142 atomic_dec(&imap->im_numfree); 2143 2144 return (0); 2145 } 2146 2147 2148 /* 2149 * NAME: diNewExt(imap,iagp,extno) 2150 * 2151 * FUNCTION: initialize a new extent of inodes for an iag, allocating 2152 * the first inode of the extent for use for the current 2153 * allocation request. 2154 * 2155 * disk resources are allocated for the new extent of inodes 2156 * and the inodes themselves are initialized to reflect their 2157 * existence within the extent (i.e. their inode numbers and 2158 * inode extent addresses are set) and their initial state 2159 * (mode and link count are set to zero). 2160 * 2161 * if the iag is new, it is not yet on an ag extent free list 2162 * but will now be placed on this list. 2163 * 2164 * if the allocation of the new extent causes the iag to 2165 * have no free extent, the iag will be removed from the 2166 * ag extent free list. 2167 * 2168 * if the iag has no free backed inodes, it will be placed 2169 * on the ag free inode list, since the addition of the new 2170 * extent will now cause it to have free inodes. 2171 * 2172 * a careful update approach is used to provide consistency 2173 * (i.e. list consistency) in the face of updates to multiple 2174 * buffers. under this approach, all required buffers are 2175 * obtained before making any updates and are held until all 2176 * updates are complete. 2177 * 2178 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2179 * this AG. Must have read lock on imap inode. 2180 * 2181 * PARAMETERS: 2182 * imap - pointer to inode map control structure. 2183 * iagp - pointer to iag. 2184 * extno - extent number. 2185 * 2186 * RETURN VALUES: 2187 * 0 - success. 2188 * -ENOSPC - insufficient disk resources. 2189 * -EIO - i/o error. 2190 */ 2191 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) 2192 { 2193 int agno, iagno, fwd, back, freei = 0, sword, rc; 2194 struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL; 2195 struct metapage *amp, *bmp, *cmp, *dmp; 2196 struct inode *ipimap; 2197 s64 blkno, hint; 2198 int i, j; 2199 u32 mask; 2200 ino_t ino; 2201 struct dinode *dp; 2202 struct jfs_sb_info *sbi; 2203 2204 /* better have free extents. 2205 */ 2206 if (!iagp->nfreeexts) { 2207 jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents"); 2208 return -EIO; 2209 } 2210 2211 /* get the inode map inode. 2212 */ 2213 ipimap = imap->im_ipimap; 2214 sbi = JFS_SBI(ipimap->i_sb); 2215 2216 amp = bmp = cmp = NULL; 2217 2218 /* get the ag and iag numbers for this iag. 2219 */ 2220 agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 2221 iagno = le32_to_cpu(iagp->iagnum); 2222 2223 /* check if this is the last free extent within the 2224 * iag. if so, the iag must be removed from the ag 2225 * free extent list, so get the iags preceeding and 2226 * following the iag on this list. 2227 */ 2228 if (iagp->nfreeexts == cpu_to_le32(1)) { 2229 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 2230 if ((rc = diIAGRead(imap, fwd, &))) 2231 return (rc); 2232 aiagp = (struct iag *) amp->data; 2233 } 2234 2235 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 2236 if ((rc = diIAGRead(imap, back, &bmp))) 2237 goto error_out; 2238 biagp = (struct iag *) bmp->data; 2239 } 2240 } else { 2241 /* the iag has free extents. if all extents are free 2242 * (as is the case for a newly allocated iag), the iag 2243 * must be added to the ag free extent list, so get 2244 * the iag at the head of the list in preparation for 2245 * adding this iag to this list. 2246 */ 2247 fwd = back = -1; 2248 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2249 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 2250 if ((rc = diIAGRead(imap, fwd, &))) 2251 goto error_out; 2252 aiagp = (struct iag *) amp->data; 2253 } 2254 } 2255 } 2256 2257 /* check if the iag has no free inodes. if so, the iag 2258 * will have to be added to the ag free inode list, so get 2259 * the iag at the head of the list in preparation for 2260 * adding this iag to this list. in doing this, we must 2261 * check if we already have the iag at the head of 2262 * the list in hand. 2263 */ 2264 if (iagp->nfreeinos == 0) { 2265 freei = imap->im_agctl[agno].inofree; 2266 2267 if (freei >= 0) { 2268 if (freei == fwd) { 2269 ciagp = aiagp; 2270 } else if (freei == back) { 2271 ciagp = biagp; 2272 } else { 2273 if ((rc = diIAGRead(imap, freei, &cmp))) 2274 goto error_out; 2275 ciagp = (struct iag *) cmp->data; 2276 } 2277 if (ciagp == NULL) { 2278 jfs_error(imap->im_ipimap->i_sb, 2279 "diNewExt: ciagp == NULL"); 2280 rc = -EIO; 2281 goto error_out; 2282 } 2283 } 2284 } 2285 2286 /* allocate disk space for the inode extent. 2287 */ 2288 if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0)) 2289 hint = ((s64) agno << sbi->bmap->db_agl2size) - 1; 2290 else 2291 hint = addressPXD(&iagp->inoext[extno - 1]) + 2292 lengthPXD(&iagp->inoext[extno - 1]) - 1; 2293 2294 if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno))) 2295 goto error_out; 2296 2297 /* compute the inode number of the first inode within the 2298 * extent. 2299 */ 2300 ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT); 2301 2302 /* initialize the inodes within the newly allocated extent a 2303 * page at a time. 2304 */ 2305 for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) { 2306 /* get a buffer for this page of disk inodes. 2307 */ 2308 dmp = get_metapage(ipimap, blkno + i, PSIZE, 1); 2309 if (dmp == NULL) { 2310 rc = -EIO; 2311 goto error_out; 2312 } 2313 dp = (struct dinode *) dmp->data; 2314 2315 /* initialize the inode number, mode, link count and 2316 * inode extent address. 2317 */ 2318 for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) { 2319 dp->di_inostamp = cpu_to_le32(sbi->inostamp); 2320 dp->di_number = cpu_to_le32(ino); 2321 dp->di_fileset = cpu_to_le32(FILESYSTEM_I); 2322 dp->di_mode = 0; 2323 dp->di_nlink = 0; 2324 PXDaddress(&(dp->di_ixpxd), blkno); 2325 PXDlength(&(dp->di_ixpxd), imap->im_nbperiext); 2326 } 2327 write_metapage(dmp); 2328 } 2329 2330 /* if this is the last free extent within the iag, remove the 2331 * iag from the ag free extent list. 2332 */ 2333 if (iagp->nfreeexts == cpu_to_le32(1)) { 2334 if (fwd >= 0) 2335 aiagp->extfreeback = iagp->extfreeback; 2336 2337 if (back >= 0) 2338 biagp->extfreefwd = iagp->extfreefwd; 2339 else 2340 imap->im_agctl[agno].extfree = 2341 le32_to_cpu(iagp->extfreefwd); 2342 2343 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2344 } else { 2345 /* if the iag has all free extents (newly allocated iag), 2346 * add the iag to the ag free extent list. 2347 */ 2348 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2349 if (fwd >= 0) 2350 aiagp->extfreeback = cpu_to_le32(iagno); 2351 2352 iagp->extfreefwd = cpu_to_le32(fwd); 2353 iagp->extfreeback = cpu_to_le32(-1); 2354 imap->im_agctl[agno].extfree = iagno; 2355 } 2356 } 2357 2358 /* if the iag has no free inodes, add the iag to the 2359 * ag free inode list. 2360 */ 2361 if (iagp->nfreeinos == 0) { 2362 if (freei >= 0) 2363 ciagp->inofreeback = cpu_to_le32(iagno); 2364 2365 iagp->inofreefwd = 2366 cpu_to_le32(imap->im_agctl[agno].inofree); 2367 iagp->inofreeback = cpu_to_le32(-1); 2368 imap->im_agctl[agno].inofree = iagno; 2369 } 2370 2371 /* initialize the extent descriptor of the extent. */ 2372 PXDlength(&iagp->inoext[extno], imap->im_nbperiext); 2373 PXDaddress(&iagp->inoext[extno], blkno); 2374 2375 /* initialize the working and persistent map of the extent. 2376 * the working map will be initialized such that 2377 * it indicates the first inode of the extent is allocated. 2378 */ 2379 iagp->wmap[extno] = cpu_to_le32(HIGHORDER); 2380 iagp->pmap[extno] = 0; 2381 2382 /* update the free inode and free extent summary maps 2383 * for the extent to indicate the extent has free inodes 2384 * and no longer represents a free extent. 2385 */ 2386 sword = extno >> L2EXTSPERSUM; 2387 mask = HIGHORDER >> (extno & (EXTSPERSUM - 1)); 2388 iagp->extsmap[sword] |= cpu_to_le32(mask); 2389 iagp->inosmap[sword] &= cpu_to_le32(~mask); 2390 2391 /* update the free inode and free extent counts for the 2392 * iag. 2393 */ 2394 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 2395 (INOSPEREXT - 1)); 2396 iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) - 1); 2397 2398 /* update the free and backed inode counts for the ag. 2399 */ 2400 imap->im_agctl[agno].numfree += (INOSPEREXT - 1); 2401 imap->im_agctl[agno].numinos += INOSPEREXT; 2402 2403 /* update the free and backed inode counts for the inode map. 2404 */ 2405 atomic_add(INOSPEREXT - 1, &imap->im_numfree); 2406 atomic_add(INOSPEREXT, &imap->im_numinos); 2407 2408 /* write the iags. 2409 */ 2410 if (amp) 2411 write_metapage(amp); 2412 if (bmp) 2413 write_metapage(bmp); 2414 if (cmp) 2415 write_metapage(cmp); 2416 2417 return (0); 2418 2419 error_out: 2420 2421 /* release the iags. 2422 */ 2423 if (amp) 2424 release_metapage(amp); 2425 if (bmp) 2426 release_metapage(bmp); 2427 if (cmp) 2428 release_metapage(cmp); 2429 2430 return (rc); 2431 } 2432 2433 2434 /* 2435 * NAME: diNewIAG(imap,iagnop,agno) 2436 * 2437 * FUNCTION: allocate a new iag for an allocation group. 2438 * 2439 * first tries to allocate the iag from the inode map 2440 * iagfree list: 2441 * if the list has free iags, the head of the list is removed 2442 * and returned to satisfy the request. 2443 * if the inode map's iag free list is empty, the inode map 2444 * is extended to hold a new iag. this new iag is initialized 2445 * and returned to satisfy the request. 2446 * 2447 * PARAMETERS: 2448 * imap - pointer to inode map control structure. 2449 * iagnop - pointer to an iag number set with the number of the 2450 * newly allocated iag upon successful return. 2451 * agno - allocation group number. 2452 * bpp - Buffer pointer to be filled in with new IAG's buffer 2453 * 2454 * RETURN VALUES: 2455 * 0 - success. 2456 * -ENOSPC - insufficient disk resources. 2457 * -EIO - i/o error. 2458 * 2459 * serialization: 2460 * AG lock held on entry/exit; 2461 * write lock on the map is held inside; 2462 * read lock on the map is held on successful completion; 2463 * 2464 * note: new iag transaction: 2465 * . synchronously write iag; 2466 * . write log of xtree and inode of imap; 2467 * . commit; 2468 * . synchronous write of xtree (right to left, bottom to top); 2469 * . at start of logredo(): init in-memory imap with one additional iag page; 2470 * . at end of logredo(): re-read imap inode to determine 2471 * new imap size; 2472 */ 2473 static int 2474 diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) 2475 { 2476 int rc; 2477 int iagno, i, xlen; 2478 struct inode *ipimap; 2479 struct super_block *sb; 2480 struct jfs_sb_info *sbi; 2481 struct metapage *mp; 2482 struct iag *iagp; 2483 s64 xaddr = 0; 2484 s64 blkno; 2485 tid_t tid; 2486 #ifdef _STILL_TO_PORT 2487 xad_t xad; 2488 #endif /* _STILL_TO_PORT */ 2489 struct inode *iplist[1]; 2490 2491 /* pick up pointers to the inode map and mount inodes */ 2492 ipimap = imap->im_ipimap; 2493 sb = ipimap->i_sb; 2494 sbi = JFS_SBI(sb); 2495 2496 /* acquire the free iag lock */ 2497 IAGFREE_LOCK(imap); 2498 2499 /* if there are any iags on the inode map free iag list, 2500 * allocate the iag from the head of the list. 2501 */ 2502 if (imap->im_freeiag >= 0) { 2503 /* pick up the iag number at the head of the list */ 2504 iagno = imap->im_freeiag; 2505 2506 /* determine the logical block number of the iag */ 2507 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2508 } else { 2509 /* no free iags. the inode map will have to be extented 2510 * to include a new iag. 2511 */ 2512 2513 /* acquire inode map lock */ 2514 IWRITE_LOCK(ipimap); 2515 2516 if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { 2517 IWRITE_UNLOCK(ipimap); 2518 IAGFREE_UNLOCK(imap); 2519 jfs_error(imap->im_ipimap->i_sb, 2520 "diNewIAG: ipimap->i_size is wrong"); 2521 return -EIO; 2522 } 2523 2524 2525 /* get the next avaliable iag number */ 2526 iagno = imap->im_nextiag; 2527 2528 /* make sure that we have not exceeded the maximum inode 2529 * number limit. 2530 */ 2531 if (iagno > (MAXIAGS - 1)) { 2532 /* release the inode map lock */ 2533 IWRITE_UNLOCK(ipimap); 2534 2535 rc = -ENOSPC; 2536 goto out; 2537 } 2538 2539 /* 2540 * synchronously append new iag page. 2541 */ 2542 /* determine the logical address of iag page to append */ 2543 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2544 2545 /* Allocate extent for new iag page */ 2546 xlen = sbi->nbperpage; 2547 if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) { 2548 /* release the inode map lock */ 2549 IWRITE_UNLOCK(ipimap); 2550 2551 goto out; 2552 } 2553 2554 /* 2555 * start transaction of update of the inode map 2556 * addressing structure pointing to the new iag page; 2557 */ 2558 tid = txBegin(sb, COMMIT_FORCE); 2559 down(&JFS_IP(ipimap)->commit_sem); 2560 2561 /* update the inode map addressing structure to point to it */ 2562 if ((rc = 2563 xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { 2564 txEnd(tid); 2565 up(&JFS_IP(ipimap)->commit_sem); 2566 /* Free the blocks allocated for the iag since it was 2567 * not successfully added to the inode map 2568 */ 2569 dbFree(ipimap, xaddr, (s64) xlen); 2570 2571 /* release the inode map lock */ 2572 IWRITE_UNLOCK(ipimap); 2573 2574 goto out; 2575 } 2576 2577 /* update the inode map's inode to reflect the extension */ 2578 ipimap->i_size += PSIZE; 2579 inode_add_bytes(ipimap, PSIZE); 2580 2581 /* assign a buffer for the page */ 2582 mp = get_metapage(ipimap, blkno, PSIZE, 0); 2583 if (!mp) { 2584 /* 2585 * This is very unlikely since we just created the 2586 * extent, but let's try to handle it correctly 2587 */ 2588 xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, 2589 COMMIT_PWMAP); 2590 2591 txAbort(tid, 0); 2592 txEnd(tid); 2593 2594 /* release the inode map lock */ 2595 IWRITE_UNLOCK(ipimap); 2596 2597 rc = -EIO; 2598 goto out; 2599 } 2600 iagp = (struct iag *) mp->data; 2601 2602 /* init the iag */ 2603 memset(iagp, 0, sizeof(struct iag)); 2604 iagp->iagnum = cpu_to_le32(iagno); 2605 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2606 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2607 iagp->iagfree = cpu_to_le32(-1); 2608 iagp->nfreeinos = 0; 2609 iagp->nfreeexts = cpu_to_le32(EXTSPERIAG); 2610 2611 /* initialize the free inode summary map (free extent 2612 * summary map initialization handled by bzero). 2613 */ 2614 for (i = 0; i < SMAPSZ; i++) 2615 iagp->inosmap[i] = cpu_to_le32(ONES); 2616 2617 /* 2618 * Write and sync the metapage 2619 */ 2620 flush_metapage(mp); 2621 2622 /* 2623 * txCommit(COMMIT_FORCE) will synchronously write address 2624 * index pages and inode after commit in careful update order 2625 * of address index pages (right to left, bottom up); 2626 */ 2627 iplist[0] = ipimap; 2628 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 2629 2630 txEnd(tid); 2631 up(&JFS_IP(ipimap)->commit_sem); 2632 2633 duplicateIXtree(sb, blkno, xlen, &xaddr); 2634 2635 /* update the next avaliable iag number */ 2636 imap->im_nextiag += 1; 2637 2638 /* Add the iag to the iag free list so we don't lose the iag 2639 * if a failure happens now. 2640 */ 2641 imap->im_freeiag = iagno; 2642 2643 /* Until we have logredo working, we want the imap inode & 2644 * control page to be up to date. 2645 */ 2646 diSync(ipimap); 2647 2648 /* release the inode map lock */ 2649 IWRITE_UNLOCK(ipimap); 2650 } 2651 2652 /* obtain read lock on map */ 2653 IREAD_LOCK(ipimap); 2654 2655 /* read the iag */ 2656 if ((rc = diIAGRead(imap, iagno, &mp))) { 2657 IREAD_UNLOCK(ipimap); 2658 rc = -EIO; 2659 goto out; 2660 } 2661 iagp = (struct iag *) mp->data; 2662 2663 /* remove the iag from the iag free list */ 2664 imap->im_freeiag = le32_to_cpu(iagp->iagfree); 2665 iagp->iagfree = cpu_to_le32(-1); 2666 2667 /* set the return iag number and buffer pointer */ 2668 *iagnop = iagno; 2669 *mpp = mp; 2670 2671 out: 2672 /* release the iag free lock */ 2673 IAGFREE_UNLOCK(imap); 2674 2675 return (rc); 2676 } 2677 2678 /* 2679 * NAME: diIAGRead() 2680 * 2681 * FUNCTION: get the buffer for the specified iag within a fileset 2682 * or aggregate inode map. 2683 * 2684 * PARAMETERS: 2685 * imap - pointer to inode map control structure. 2686 * iagno - iag number. 2687 * bpp - point to buffer pointer to be filled in on successful 2688 * exit. 2689 * 2690 * SERIALIZATION: 2691 * must have read lock on imap inode 2692 * (When called by diExtendFS, the filesystem is quiesced, therefore 2693 * the read lock is unnecessary.) 2694 * 2695 * RETURN VALUES: 2696 * 0 - success. 2697 * -EIO - i/o error. 2698 */ 2699 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) 2700 { 2701 struct inode *ipimap = imap->im_ipimap; 2702 s64 blkno; 2703 2704 /* compute the logical block number of the iag. */ 2705 blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage); 2706 2707 /* read the iag. */ 2708 *mpp = read_metapage(ipimap, blkno, PSIZE, 0); 2709 if (*mpp == NULL) { 2710 return -EIO; 2711 } 2712 2713 return (0); 2714 } 2715 2716 /* 2717 * NAME: diFindFree() 2718 * 2719 * FUNCTION: find the first free bit in a word starting at 2720 * the specified bit position. 2721 * 2722 * PARAMETERS: 2723 * word - word to be examined. 2724 * start - starting bit position. 2725 * 2726 * RETURN VALUES: 2727 * bit position of first free bit in the word or 32 if 2728 * no free bits were found. 2729 */ 2730 static int diFindFree(u32 word, int start) 2731 { 2732 int bitno; 2733 assert(start < 32); 2734 /* scan the word for the first free bit. */ 2735 for (word <<= start, bitno = start; bitno < 32; 2736 bitno++, word <<= 1) { 2737 if ((word & HIGHORDER) == 0) 2738 break; 2739 } 2740 return (bitno); 2741 } 2742 2743 /* 2744 * NAME: diUpdatePMap() 2745 * 2746 * FUNCTION: Update the persistent map in an IAG for the allocation or 2747 * freeing of the specified inode. 2748 * 2749 * PRE CONDITIONS: Working map has already been updated for allocate. 2750 * 2751 * PARAMETERS: 2752 * ipimap - Incore inode map inode 2753 * inum - Number of inode to mark in permanent map 2754 * is_free - If TRUE indicates inode should be marked freed, otherwise 2755 * indicates inode should be marked allocated. 2756 * 2757 * RETURN VALUES: 2758 * 0 for success 2759 */ 2760 int 2761 diUpdatePMap(struct inode *ipimap, 2762 unsigned long inum, boolean_t is_free, struct tblock * tblk) 2763 { 2764 int rc; 2765 struct iag *iagp; 2766 struct metapage *mp; 2767 int iagno, ino, extno, bitno; 2768 struct inomap *imap; 2769 u32 mask; 2770 struct jfs_log *log; 2771 int lsn, difft, diffp; 2772 unsigned long flags; 2773 2774 imap = JFS_IP(ipimap)->i_imap; 2775 /* get the iag number containing the inode */ 2776 iagno = INOTOIAG(inum); 2777 /* make sure that the iag is contained within the map */ 2778 if (iagno >= imap->im_nextiag) { 2779 jfs_error(ipimap->i_sb, 2780 "diUpdatePMap: the iag is outside the map"); 2781 return -EIO; 2782 } 2783 /* read the iag */ 2784 IREAD_LOCK(ipimap); 2785 rc = diIAGRead(imap, iagno, &mp); 2786 IREAD_UNLOCK(ipimap); 2787 if (rc) 2788 return (rc); 2789 metapage_wait_for_io(mp); 2790 iagp = (struct iag *) mp->data; 2791 /* get the inode number and extent number of the inode within 2792 * the iag and the inode number within the extent. 2793 */ 2794 ino = inum & (INOSPERIAG - 1); 2795 extno = ino >> L2INOSPEREXT; 2796 bitno = ino & (INOSPEREXT - 1); 2797 mask = HIGHORDER >> bitno; 2798 /* 2799 * mark the inode free in persistent map: 2800 */ 2801 if (is_free == TRUE) { 2802 /* The inode should have been allocated both in working 2803 * map and in persistent map; 2804 * the inode will be freed from working map at the release 2805 * of last reference release; 2806 */ 2807 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2808 jfs_error(ipimap->i_sb, 2809 "diUpdatePMap: inode %ld not marked as " 2810 "allocated in wmap!", inum); 2811 } 2812 if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { 2813 jfs_error(ipimap->i_sb, 2814 "diUpdatePMap: inode %ld not marked as " 2815 "allocated in pmap!", inum); 2816 } 2817 /* update the bitmap for the extent of the freed inode */ 2818 iagp->pmap[extno] &= cpu_to_le32(~mask); 2819 } 2820 /* 2821 * mark the inode allocated in persistent map: 2822 */ 2823 else { 2824 /* The inode should be already allocated in the working map 2825 * and should be free in persistent map; 2826 */ 2827 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2828 release_metapage(mp); 2829 jfs_error(ipimap->i_sb, 2830 "diUpdatePMap: the inode is not allocated in " 2831 "the working map"); 2832 return -EIO; 2833 } 2834 if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { 2835 release_metapage(mp); 2836 jfs_error(ipimap->i_sb, 2837 "diUpdatePMap: the inode is not free in the " 2838 "persistent map"); 2839 return -EIO; 2840 } 2841 /* update the bitmap for the extent of the allocated inode */ 2842 iagp->pmap[extno] |= cpu_to_le32(mask); 2843 } 2844 /* 2845 * update iag lsn 2846 */ 2847 lsn = tblk->lsn; 2848 log = JFS_SBI(tblk->sb)->log; 2849 if (mp->lsn != 0) { 2850 /* inherit older/smaller lsn */ 2851 logdiff(difft, lsn, log); 2852 logdiff(diffp, mp->lsn, log); 2853 LOGSYNC_LOCK(log, flags); 2854 if (difft < diffp) { 2855 mp->lsn = lsn; 2856 /* move mp after tblock in logsync list */ 2857 list_move(&mp->synclist, &tblk->synclist); 2858 } 2859 /* inherit younger/larger clsn */ 2860 assert(mp->clsn); 2861 logdiff(difft, tblk->clsn, log); 2862 logdiff(diffp, mp->clsn, log); 2863 if (difft > diffp) 2864 mp->clsn = tblk->clsn; 2865 LOGSYNC_UNLOCK(log, flags); 2866 } else { 2867 mp->log = log; 2868 mp->lsn = lsn; 2869 /* insert mp after tblock in logsync list */ 2870 LOGSYNC_LOCK(log, flags); 2871 log->count++; 2872 list_add(&mp->synclist, &tblk->synclist); 2873 mp->clsn = tblk->clsn; 2874 LOGSYNC_UNLOCK(log, flags); 2875 } 2876 write_metapage(mp); 2877 return (0); 2878 } 2879 2880 /* 2881 * diExtendFS() 2882 * 2883 * function: update imap for extendfs(); 2884 * 2885 * note: AG size has been increased s.t. each k old contiguous AGs are 2886 * coalesced into a new AG; 2887 */ 2888 int diExtendFS(struct inode *ipimap, struct inode *ipbmap) 2889 { 2890 int rc, rcx = 0; 2891 struct inomap *imap = JFS_IP(ipimap)->i_imap; 2892 struct iag *iagp = NULL, *hiagp = NULL; 2893 struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap; 2894 struct metapage *bp, *hbp; 2895 int i, n, head; 2896 int numinos, xnuminos = 0, xnumfree = 0; 2897 s64 agstart; 2898 2899 jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d", 2900 imap->im_nextiag, atomic_read(&imap->im_numinos), 2901 atomic_read(&imap->im_numfree)); 2902 2903 /* 2904 * reconstruct imap 2905 * 2906 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 2907 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; 2908 * note: new AG size = old AG size * (2**x). 2909 */ 2910 2911 /* init per AG control information im_agctl[] */ 2912 for (i = 0; i < MAXAG; i++) { 2913 imap->im_agctl[i].inofree = -1; 2914 imap->im_agctl[i].extfree = -1; 2915 imap->im_agctl[i].numinos = 0; /* number of backed inodes */ 2916 imap->im_agctl[i].numfree = 0; /* number of free backed inodes */ 2917 } 2918 2919 /* 2920 * process each iag page of the map. 2921 * 2922 * rebuild AG Free Inode List, AG Free Inode Extent List; 2923 */ 2924 for (i = 0; i < imap->im_nextiag; i++) { 2925 if ((rc = diIAGRead(imap, i, &bp))) { 2926 rcx = rc; 2927 continue; 2928 } 2929 iagp = (struct iag *) bp->data; 2930 if (le32_to_cpu(iagp->iagnum) != i) { 2931 release_metapage(bp); 2932 jfs_error(ipimap->i_sb, 2933 "diExtendFs: unexpected value of iagnum"); 2934 return -EIO; 2935 } 2936 2937 /* leave free iag in the free iag list */ 2938 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2939 release_metapage(bp); 2940 continue; 2941 } 2942 2943 /* agstart that computes to the same ag is treated as same; */ 2944 agstart = le64_to_cpu(iagp->agstart); 2945 /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */ 2946 n = agstart >> mp->db_agl2size; 2947 2948 /* compute backed inodes */ 2949 numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) 2950 << L2INOSPEREXT; 2951 if (numinos > 0) { 2952 /* merge AG backed inodes */ 2953 imap->im_agctl[n].numinos += numinos; 2954 xnuminos += numinos; 2955 } 2956 2957 /* if any backed free inodes, insert at AG free inode list */ 2958 if ((int) le32_to_cpu(iagp->nfreeinos) > 0) { 2959 if ((head = imap->im_agctl[n].inofree) == -1) { 2960 iagp->inofreefwd = cpu_to_le32(-1); 2961 iagp->inofreeback = cpu_to_le32(-1); 2962 } else { 2963 if ((rc = diIAGRead(imap, head, &hbp))) { 2964 rcx = rc; 2965 goto nextiag; 2966 } 2967 hiagp = (struct iag *) hbp->data; 2968 hiagp->inofreeback = iagp->iagnum; 2969 iagp->inofreefwd = cpu_to_le32(head); 2970 iagp->inofreeback = cpu_to_le32(-1); 2971 write_metapage(hbp); 2972 } 2973 2974 imap->im_agctl[n].inofree = 2975 le32_to_cpu(iagp->iagnum); 2976 2977 /* merge AG backed free inodes */ 2978 imap->im_agctl[n].numfree += 2979 le32_to_cpu(iagp->nfreeinos); 2980 xnumfree += le32_to_cpu(iagp->nfreeinos); 2981 } 2982 2983 /* if any free extents, insert at AG free extent list */ 2984 if (le32_to_cpu(iagp->nfreeexts) > 0) { 2985 if ((head = imap->im_agctl[n].extfree) == -1) { 2986 iagp->extfreefwd = cpu_to_le32(-1); 2987 iagp->extfreeback = cpu_to_le32(-1); 2988 } else { 2989 if ((rc = diIAGRead(imap, head, &hbp))) { 2990 rcx = rc; 2991 goto nextiag; 2992 } 2993 hiagp = (struct iag *) hbp->data; 2994 hiagp->extfreeback = iagp->iagnum; 2995 iagp->extfreefwd = cpu_to_le32(head); 2996 iagp->extfreeback = cpu_to_le32(-1); 2997 write_metapage(hbp); 2998 } 2999 3000 imap->im_agctl[n].extfree = 3001 le32_to_cpu(iagp->iagnum); 3002 } 3003 3004 nextiag: 3005 write_metapage(bp); 3006 } 3007 3008 if (xnuminos != atomic_read(&imap->im_numinos) || 3009 xnumfree != atomic_read(&imap->im_numfree)) { 3010 jfs_error(ipimap->i_sb, 3011 "diExtendFs: numinos or numfree incorrect"); 3012 return -EIO; 3013 } 3014 3015 return rcx; 3016 } 3017 3018 3019 /* 3020 * duplicateIXtree() 3021 * 3022 * serialization: IWRITE_LOCK held on entry/exit 3023 * 3024 * note: shadow page with regular inode (rel.2); 3025 */ 3026 static void duplicateIXtree(struct super_block *sb, s64 blkno, 3027 int xlen, s64 *xaddr) 3028 { 3029 struct jfs_superblock *j_sb; 3030 struct buffer_head *bh; 3031 struct inode *ip; 3032 tid_t tid; 3033 3034 /* if AIT2 ipmap2 is bad, do not try to update it */ 3035 if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ 3036 return; 3037 ip = diReadSpecial(sb, FILESYSTEM_I, 1); 3038 if (ip == NULL) { 3039 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 3040 if (readSuper(sb, &bh)) 3041 return; 3042 j_sb = (struct jfs_superblock *)bh->b_data; 3043 j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); 3044 3045 mark_buffer_dirty(bh); 3046 sync_dirty_buffer(bh); 3047 brelse(bh); 3048 return; 3049 } 3050 3051 /* start transaction */ 3052 tid = txBegin(sb, COMMIT_FORCE); 3053 /* update the inode map addressing structure to point to it */ 3054 if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) { 3055 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 3056 txAbort(tid, 1); 3057 goto cleanup; 3058 3059 } 3060 /* update the inode map's inode to reflect the extension */ 3061 ip->i_size += PSIZE; 3062 inode_add_bytes(ip, PSIZE); 3063 txCommit(tid, 1, &ip, COMMIT_FORCE); 3064 cleanup: 3065 txEnd(tid); 3066 diFreeSpecial(ip); 3067 } 3068 3069 /* 3070 * NAME: copy_from_dinode() 3071 * 3072 * FUNCTION: Copies inode info from disk inode to in-memory inode 3073 * 3074 * RETURN VALUES: 3075 * 0 - success 3076 * -ENOMEM - insufficient memory 3077 */ 3078 static int copy_from_dinode(struct dinode * dip, struct inode *ip) 3079 { 3080 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3081 3082 jfs_ip->fileset = le32_to_cpu(dip->di_fileset); 3083 jfs_ip->mode2 = le32_to_cpu(dip->di_mode); 3084 3085 ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; 3086 ip->i_nlink = le32_to_cpu(dip->di_nlink); 3087 ip->i_uid = le32_to_cpu(dip->di_uid); 3088 ip->i_gid = le32_to_cpu(dip->di_gid); 3089 ip->i_size = le64_to_cpu(dip->di_size); 3090 ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); 3091 ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); 3092 ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec); 3093 ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); 3094 ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); 3095 ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); 3096 ip->i_blksize = ip->i_sb->s_blocksize; 3097 ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); 3098 ip->i_generation = le32_to_cpu(dip->di_gen); 3099 3100 jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */ 3101 jfs_ip->acl = dip->di_acl; /* as are dxd's */ 3102 jfs_ip->ea = dip->di_ea; 3103 jfs_ip->next_index = le32_to_cpu(dip->di_next_index); 3104 jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec); 3105 jfs_ip->acltype = le32_to_cpu(dip->di_acltype); 3106 3107 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) { 3108 jfs_ip->dev = le32_to_cpu(dip->di_rdev); 3109 ip->i_rdev = new_decode_dev(jfs_ip->dev); 3110 } 3111 3112 if (S_ISDIR(ip->i_mode)) { 3113 memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384); 3114 } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) { 3115 memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288); 3116 } else 3117 memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128); 3118 3119 /* Zero the in-memory-only stuff */ 3120 jfs_ip->cflag = 0; 3121 jfs_ip->btindex = 0; 3122 jfs_ip->btorder = 0; 3123 jfs_ip->bxflag = 0; 3124 jfs_ip->blid = 0; 3125 jfs_ip->atlhead = 0; 3126 jfs_ip->atltail = 0; 3127 jfs_ip->xtlid = 0; 3128 return (0); 3129 } 3130 3131 /* 3132 * NAME: copy_to_dinode() 3133 * 3134 * FUNCTION: Copies inode info from in-memory inode to disk inode 3135 */ 3136 static void copy_to_dinode(struct dinode * dip, struct inode *ip) 3137 { 3138 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3139 3140 dip->di_fileset = cpu_to_le32(jfs_ip->fileset); 3141 dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp); 3142 dip->di_number = cpu_to_le32(ip->i_ino); 3143 dip->di_gen = cpu_to_le32(ip->i_generation); 3144 dip->di_size = cpu_to_le64(ip->i_size); 3145 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); 3146 dip->di_nlink = cpu_to_le32(ip->i_nlink); 3147 dip->di_uid = cpu_to_le32(ip->i_uid); 3148 dip->di_gid = cpu_to_le32(ip->i_gid); 3149 /* 3150 * mode2 is only needed for storing the higher order bits. 3151 * Trust i_mode for the lower order ones 3152 */ 3153 dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | ip->i_mode); 3154 dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); 3155 dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); 3156 dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); 3157 dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec); 3158 dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec); 3159 dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec); 3160 dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ 3161 dip->di_acl = jfs_ip->acl; /* as are dxd's */ 3162 dip->di_ea = jfs_ip->ea; 3163 dip->di_next_index = cpu_to_le32(jfs_ip->next_index); 3164 dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime); 3165 dip->di_otime.tv_nsec = 0; 3166 dip->di_acltype = cpu_to_le32(jfs_ip->acltype); 3167 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) 3168 dip->di_rdev = cpu_to_le32(jfs_ip->dev); 3169 } 3170