1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2004 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19 /* 20 * jfs_imap.c: inode allocation map manager 21 * 22 * Serialization: 23 * Each AG has a simple lock which is used to control the serialization of 24 * the AG level lists. This lock should be taken first whenever an AG 25 * level list will be modified or accessed. 26 * 27 * Each IAG is locked by obtaining the buffer for the IAG page. 28 * 29 * There is also a inode lock for the inode map inode. A read lock needs to 30 * be taken whenever an IAG is read from the map or the global level 31 * information is read. A write lock needs to be taken whenever the global 32 * level information is modified or an atomic operation needs to be used. 33 * 34 * If more than one IAG is read at one time, the read lock may not 35 * be given up until all of the IAG's are read. Otherwise, a deadlock 36 * may occur when trying to obtain the read lock while another thread 37 * holding the read lock is waiting on the IAG already being held. 38 * 39 * The control page of the inode map is read into memory by diMount(). 40 * Thereafter it should only be modified in memory and then it will be 41 * written out when the filesystem is unmounted by diUnmount(). 42 */ 43 44 #include <linux/fs.h> 45 #include <linux/buffer_head.h> 46 #include <linux/pagemap.h> 47 #include <linux/quotaops.h> 48 49 #include "jfs_incore.h" 50 #include "jfs_inode.h" 51 #include "jfs_filsys.h" 52 #include "jfs_dinode.h" 53 #include "jfs_dmap.h" 54 #include "jfs_imap.h" 55 #include "jfs_metapage.h" 56 #include "jfs_superblock.h" 57 #include "jfs_debug.h" 58 59 /* 60 * imap locks 61 */ 62 /* iag free list lock */ 63 #define IAGFREE_LOCK_INIT(imap) init_MUTEX(&imap->im_freelock) 64 #define IAGFREE_LOCK(imap) down(&imap->im_freelock) 65 #define IAGFREE_UNLOCK(imap) up(&imap->im_freelock) 66 67 /* per ag iag list locks */ 68 #define AG_LOCK_INIT(imap,index) init_MUTEX(&(imap->im_aglock[index])) 69 #define AG_LOCK(imap,agno) down(&imap->im_aglock[agno]) 70 #define AG_UNLOCK(imap,agno) up(&imap->im_aglock[agno]) 71 72 /* 73 * forward references 74 */ 75 static int diAllocAG(struct inomap *, int, boolean_t, struct inode *); 76 static int diAllocAny(struct inomap *, int, boolean_t, struct inode *); 77 static int diAllocBit(struct inomap *, struct iag *, int); 78 static int diAllocExt(struct inomap *, int, struct inode *); 79 static int diAllocIno(struct inomap *, int, struct inode *); 80 static int diFindFree(u32, int); 81 static int diNewExt(struct inomap *, struct iag *, int); 82 static int diNewIAG(struct inomap *, int *, int, struct metapage **); 83 static void duplicateIXtree(struct super_block *, s64, int, s64 *); 84 85 static int diIAGRead(struct inomap * imap, int, struct metapage **); 86 static int copy_from_dinode(struct dinode *, struct inode *); 87 static void copy_to_dinode(struct dinode *, struct inode *); 88 89 /* 90 * NAME: diMount() 91 * 92 * FUNCTION: initialize the incore inode map control structures for 93 * a fileset or aggregate init time. 94 * 95 * the inode map's control structure (dinomap) is 96 * brought in from disk and placed in virtual memory. 97 * 98 * PARAMETERS: 99 * ipimap - pointer to inode map inode for the aggregate or fileset. 100 * 101 * RETURN VALUES: 102 * 0 - success 103 * -ENOMEM - insufficient free virtual memory. 104 * -EIO - i/o error. 105 */ 106 int diMount(struct inode *ipimap) 107 { 108 struct inomap *imap; 109 struct metapage *mp; 110 int index; 111 struct dinomap_disk *dinom_le; 112 113 /* 114 * allocate/initialize the in-memory inode map control structure 115 */ 116 /* allocate the in-memory inode map control structure. */ 117 imap = (struct inomap *) kmalloc(sizeof(struct inomap), GFP_KERNEL); 118 if (imap == NULL) { 119 jfs_err("diMount: kmalloc returned NULL!"); 120 return -ENOMEM; 121 } 122 123 /* read the on-disk inode map control structure. */ 124 125 mp = read_metapage(ipimap, 126 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 127 PSIZE, 0); 128 if (mp == NULL) { 129 kfree(imap); 130 return -EIO; 131 } 132 133 /* copy the on-disk version to the in-memory version. */ 134 dinom_le = (struct dinomap_disk *) mp->data; 135 imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); 136 imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); 137 atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); 138 atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); 139 imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); 140 imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); 141 for (index = 0; index < MAXAG; index++) { 142 imap->im_agctl[index].inofree = 143 le32_to_cpu(dinom_le->in_agctl[index].inofree); 144 imap->im_agctl[index].extfree = 145 le32_to_cpu(dinom_le->in_agctl[index].extfree); 146 imap->im_agctl[index].numinos = 147 le32_to_cpu(dinom_le->in_agctl[index].numinos); 148 imap->im_agctl[index].numfree = 149 le32_to_cpu(dinom_le->in_agctl[index].numfree); 150 } 151 152 /* release the buffer. */ 153 release_metapage(mp); 154 155 /* 156 * allocate/initialize inode allocation map locks 157 */ 158 /* allocate and init iag free list lock */ 159 IAGFREE_LOCK_INIT(imap); 160 161 /* allocate and init ag list locks */ 162 for (index = 0; index < MAXAG; index++) { 163 AG_LOCK_INIT(imap, index); 164 } 165 166 /* bind the inode map inode and inode map control structure 167 * to each other. 168 */ 169 imap->im_ipimap = ipimap; 170 JFS_IP(ipimap)->i_imap = imap; 171 172 return (0); 173 } 174 175 176 /* 177 * NAME: diUnmount() 178 * 179 * FUNCTION: write to disk the incore inode map control structures for 180 * a fileset or aggregate at unmount time. 181 * 182 * PARAMETERS: 183 * ipimap - pointer to inode map inode for the aggregate or fileset. 184 * 185 * RETURN VALUES: 186 * 0 - success 187 * -ENOMEM - insufficient free virtual memory. 188 * -EIO - i/o error. 189 */ 190 int diUnmount(struct inode *ipimap, int mounterror) 191 { 192 struct inomap *imap = JFS_IP(ipimap)->i_imap; 193 194 /* 195 * update the on-disk inode map control structure 196 */ 197 198 if (!(mounterror || isReadOnly(ipimap))) 199 diSync(ipimap); 200 201 /* 202 * Invalidate the page cache buffers 203 */ 204 truncate_inode_pages(ipimap->i_mapping, 0); 205 206 /* 207 * free in-memory control structure 208 */ 209 kfree(imap); 210 211 return (0); 212 } 213 214 215 /* 216 * diSync() 217 */ 218 int diSync(struct inode *ipimap) 219 { 220 struct dinomap_disk *dinom_le; 221 struct inomap *imp = JFS_IP(ipimap)->i_imap; 222 struct metapage *mp; 223 int index; 224 225 /* 226 * write imap global conrol page 227 */ 228 /* read the on-disk inode map control structure */ 229 mp = get_metapage(ipimap, 230 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 231 PSIZE, 0); 232 if (mp == NULL) { 233 jfs_err("diSync: get_metapage failed!"); 234 return -EIO; 235 } 236 237 /* copy the in-memory version to the on-disk version */ 238 dinom_le = (struct dinomap_disk *) mp->data; 239 dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); 240 dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); 241 dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); 242 dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); 243 dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); 244 dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); 245 for (index = 0; index < MAXAG; index++) { 246 dinom_le->in_agctl[index].inofree = 247 cpu_to_le32(imp->im_agctl[index].inofree); 248 dinom_le->in_agctl[index].extfree = 249 cpu_to_le32(imp->im_agctl[index].extfree); 250 dinom_le->in_agctl[index].numinos = 251 cpu_to_le32(imp->im_agctl[index].numinos); 252 dinom_le->in_agctl[index].numfree = 253 cpu_to_le32(imp->im_agctl[index].numfree); 254 } 255 256 /* write out the control structure */ 257 write_metapage(mp); 258 259 /* 260 * write out dirty pages of imap 261 */ 262 filemap_fdatawrite(ipimap->i_mapping); 263 filemap_fdatawait(ipimap->i_mapping); 264 265 diWriteSpecial(ipimap, 0); 266 267 return (0); 268 } 269 270 271 /* 272 * NAME: diRead() 273 * 274 * FUNCTION: initialize an incore inode from disk. 275 * 276 * on entry, the specifed incore inode should itself 277 * specify the disk inode number corresponding to the 278 * incore inode (i.e. i_number should be initialized). 279 * 280 * this routine handles incore inode initialization for 281 * both "special" and "regular" inodes. special inodes 282 * are those required early in the mount process and 283 * require special handling since much of the file system 284 * is not yet initialized. these "special" inodes are 285 * identified by a NULL inode map inode pointer and are 286 * actually initialized by a call to diReadSpecial(). 287 * 288 * for regular inodes, the iag describing the disk inode 289 * is read from disk to determine the inode extent address 290 * for the disk inode. with the inode extent address in 291 * hand, the page of the extent that contains the disk 292 * inode is read and the disk inode is copied to the 293 * incore inode. 294 * 295 * PARAMETERS: 296 * ip - pointer to incore inode to be initialized from disk. 297 * 298 * RETURN VALUES: 299 * 0 - success 300 * -EIO - i/o error. 301 * -ENOMEM - insufficient memory 302 * 303 */ 304 int diRead(struct inode *ip) 305 { 306 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 307 int iagno, ino, extno, rc; 308 struct inode *ipimap; 309 struct dinode *dp; 310 struct iag *iagp; 311 struct metapage *mp; 312 s64 blkno, agstart; 313 struct inomap *imap; 314 int block_offset; 315 int inodes_left; 316 uint pageno; 317 int rel_inode; 318 319 jfs_info("diRead: ino = %ld", ip->i_ino); 320 321 ipimap = sbi->ipimap; 322 JFS_IP(ip)->ipimap = ipimap; 323 324 /* determine the iag number for this inode (number) */ 325 iagno = INOTOIAG(ip->i_ino); 326 327 /* read the iag */ 328 imap = JFS_IP(ipimap)->i_imap; 329 IREAD_LOCK(ipimap); 330 rc = diIAGRead(imap, iagno, &mp); 331 IREAD_UNLOCK(ipimap); 332 if (rc) { 333 jfs_err("diRead: diIAGRead returned %d", rc); 334 return (rc); 335 } 336 337 iagp = (struct iag *) mp->data; 338 339 /* determine inode extent that holds the disk inode */ 340 ino = ip->i_ino & (INOSPERIAG - 1); 341 extno = ino >> L2INOSPEREXT; 342 343 if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || 344 (addressPXD(&iagp->inoext[extno]) == 0)) { 345 release_metapage(mp); 346 return -ESTALE; 347 } 348 349 /* get disk block number of the page within the inode extent 350 * that holds the disk inode. 351 */ 352 blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); 353 354 /* get the ag for the iag */ 355 agstart = le64_to_cpu(iagp->agstart); 356 357 release_metapage(mp); 358 359 rel_inode = (ino & (INOSPERPAGE - 1)); 360 pageno = blkno >> sbi->l2nbperpage; 361 362 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 363 /* 364 * OS/2 didn't always align inode extents on page boundaries 365 */ 366 inodes_left = 367 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 368 369 if (rel_inode < inodes_left) 370 rel_inode += block_offset << sbi->l2niperblk; 371 else { 372 pageno += 1; 373 rel_inode -= inodes_left; 374 } 375 } 376 377 /* read the page of disk inode */ 378 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 379 if (mp == 0) { 380 jfs_err("diRead: read_metapage failed"); 381 return -EIO; 382 } 383 384 /* locate the the disk inode requested */ 385 dp = (struct dinode *) mp->data; 386 dp += rel_inode; 387 388 if (ip->i_ino != le32_to_cpu(dp->di_number)) { 389 jfs_error(ip->i_sb, "diRead: i_ino != di_number"); 390 rc = -EIO; 391 } else if (le32_to_cpu(dp->di_nlink) == 0) 392 rc = -ESTALE; 393 else 394 /* copy the disk inode to the in-memory inode */ 395 rc = copy_from_dinode(dp, ip); 396 397 release_metapage(mp); 398 399 /* set the ag for the inode */ 400 JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); 401 JFS_IP(ip)->active_ag = -1; 402 403 return (rc); 404 } 405 406 407 /* 408 * NAME: diReadSpecial() 409 * 410 * FUNCTION: initialize a 'special' inode from disk. 411 * 412 * this routines handles aggregate level inodes. The 413 * inode cache cannot differentiate between the 414 * aggregate inodes and the filesystem inodes, so we 415 * handle these here. We don't actually use the aggregate 416 * inode map, since these inodes are at a fixed location 417 * and in some cases the aggregate inode map isn't initialized 418 * yet. 419 * 420 * PARAMETERS: 421 * sb - filesystem superblock 422 * inum - aggregate inode number 423 * secondary - 1 if secondary aggregate inode table 424 * 425 * RETURN VALUES: 426 * new inode - success 427 * NULL - i/o error. 428 */ 429 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) 430 { 431 struct jfs_sb_info *sbi = JFS_SBI(sb); 432 uint address; 433 struct dinode *dp; 434 struct inode *ip; 435 struct metapage *mp; 436 437 ip = new_inode(sb); 438 if (ip == NULL) { 439 jfs_err("diReadSpecial: new_inode returned NULL!"); 440 return ip; 441 } 442 443 if (secondary) { 444 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 445 JFS_IP(ip)->ipimap = sbi->ipaimap2; 446 } else { 447 address = AITBL_OFF >> L2PSIZE; 448 JFS_IP(ip)->ipimap = sbi->ipaimap; 449 } 450 451 ASSERT(inum < INOSPEREXT); 452 453 ip->i_ino = inum; 454 455 address += inum >> 3; /* 8 inodes per 4K page */ 456 457 /* read the page of fixed disk inode (AIT) in raw mode */ 458 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 459 if (mp == NULL) { 460 ip->i_nlink = 1; /* Don't want iput() deleting it */ 461 iput(ip); 462 return (NULL); 463 } 464 465 /* get the pointer to the disk inode of interest */ 466 dp = (struct dinode *) (mp->data); 467 dp += inum % 8; /* 8 inodes per 4K page */ 468 469 /* copy on-disk inode to in-memory inode */ 470 if ((copy_from_dinode(dp, ip)) != 0) { 471 /* handle bad return by returning NULL for ip */ 472 ip->i_nlink = 1; /* Don't want iput() deleting it */ 473 iput(ip); 474 /* release the page */ 475 release_metapage(mp); 476 return (NULL); 477 478 } 479 480 ip->i_mapping->a_ops = &jfs_metapage_aops; 481 mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); 482 483 /* Allocations to metadata inodes should not affect quotas */ 484 ip->i_flags |= S_NOQUOTA; 485 486 if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { 487 sbi->gengen = le32_to_cpu(dp->di_gengen); 488 sbi->inostamp = le32_to_cpu(dp->di_inostamp); 489 } 490 491 /* release the page */ 492 release_metapage(mp); 493 494 return (ip); 495 } 496 497 /* 498 * NAME: diWriteSpecial() 499 * 500 * FUNCTION: Write the special inode to disk 501 * 502 * PARAMETERS: 503 * ip - special inode 504 * secondary - 1 if secondary aggregate inode table 505 * 506 * RETURN VALUES: none 507 */ 508 509 void diWriteSpecial(struct inode *ip, int secondary) 510 { 511 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 512 uint address; 513 struct dinode *dp; 514 ino_t inum = ip->i_ino; 515 struct metapage *mp; 516 517 ip->i_state &= ~I_DIRTY; 518 519 if (secondary) 520 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 521 else 522 address = AITBL_OFF >> L2PSIZE; 523 524 ASSERT(inum < INOSPEREXT); 525 526 address += inum >> 3; /* 8 inodes per 4K page */ 527 528 /* read the page of fixed disk inode (AIT) in raw mode */ 529 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 530 if (mp == NULL) { 531 jfs_err("diWriteSpecial: failed to read aggregate inode " 532 "extent!"); 533 return; 534 } 535 536 /* get the pointer to the disk inode of interest */ 537 dp = (struct dinode *) (mp->data); 538 dp += inum % 8; /* 8 inodes per 4K page */ 539 540 /* copy on-disk inode to in-memory inode */ 541 copy_to_dinode(dp, ip); 542 memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); 543 544 if (inum == FILESYSTEM_I) 545 dp->di_gengen = cpu_to_le32(sbi->gengen); 546 547 /* write the page */ 548 write_metapage(mp); 549 } 550 551 /* 552 * NAME: diFreeSpecial() 553 * 554 * FUNCTION: Free allocated space for special inode 555 */ 556 void diFreeSpecial(struct inode *ip) 557 { 558 if (ip == NULL) { 559 jfs_err("diFreeSpecial called with NULL ip!"); 560 return; 561 } 562 filemap_fdatawrite(ip->i_mapping); 563 filemap_fdatawait(ip->i_mapping); 564 truncate_inode_pages(ip->i_mapping, 0); 565 iput(ip); 566 } 567 568 569 570 /* 571 * NAME: diWrite() 572 * 573 * FUNCTION: write the on-disk inode portion of the in-memory inode 574 * to its corresponding on-disk inode. 575 * 576 * on entry, the specifed incore inode should itself 577 * specify the disk inode number corresponding to the 578 * incore inode (i.e. i_number should be initialized). 579 * 580 * the inode contains the inode extent address for the disk 581 * inode. with the inode extent address in hand, the 582 * page of the extent that contains the disk inode is 583 * read and the disk inode portion of the incore inode 584 * is copied to the disk inode. 585 * 586 * PARAMETERS: 587 * tid - transacation id 588 * ip - pointer to incore inode to be written to the inode extent. 589 * 590 * RETURN VALUES: 591 * 0 - success 592 * -EIO - i/o error. 593 */ 594 int diWrite(tid_t tid, struct inode *ip) 595 { 596 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 597 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 598 int rc = 0; 599 s32 ino; 600 struct dinode *dp; 601 s64 blkno; 602 int block_offset; 603 int inodes_left; 604 struct metapage *mp; 605 uint pageno; 606 int rel_inode; 607 int dioffset; 608 struct inode *ipimap; 609 uint type; 610 lid_t lid; 611 struct tlock *ditlck, *tlck; 612 struct linelock *dilinelock, *ilinelock; 613 struct lv *lv; 614 int n; 615 616 ipimap = jfs_ip->ipimap; 617 618 ino = ip->i_ino & (INOSPERIAG - 1); 619 620 if (!addressPXD(&(jfs_ip->ixpxd)) || 621 (lengthPXD(&(jfs_ip->ixpxd)) != 622 JFS_IP(ipimap)->i_imap->im_nbperiext)) { 623 jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); 624 return -EIO; 625 } 626 627 /* 628 * read the page of disk inode containing the specified inode: 629 */ 630 /* compute the block address of the page */ 631 blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); 632 633 rel_inode = (ino & (INOSPERPAGE - 1)); 634 pageno = blkno >> sbi->l2nbperpage; 635 636 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 637 /* 638 * OS/2 didn't always align inode extents on page boundaries 639 */ 640 inodes_left = 641 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 642 643 if (rel_inode < inodes_left) 644 rel_inode += block_offset << sbi->l2niperblk; 645 else { 646 pageno += 1; 647 rel_inode -= inodes_left; 648 } 649 } 650 /* read the page of disk inode */ 651 retry: 652 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 653 if (mp == 0) 654 return -EIO; 655 656 /* get the pointer to the disk inode */ 657 dp = (struct dinode *) mp->data; 658 dp += rel_inode; 659 660 dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; 661 662 /* 663 * acquire transaction lock on the on-disk inode; 664 * N.B. tlock is acquired on ipimap not ip; 665 */ 666 if ((ditlck = 667 txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) 668 goto retry; 669 dilinelock = (struct linelock *) & ditlck->lock; 670 671 /* 672 * copy btree root from in-memory inode to on-disk inode 673 * 674 * (tlock is taken from inline B+-tree root in in-memory 675 * inode when the B+-tree root is updated, which is pointed 676 * by jfs_ip->blid as well as being on tx tlock list) 677 * 678 * further processing of btree root is based on the copy 679 * in in-memory inode, where txLog() will log from, and, 680 * for xtree root, txUpdateMap() will update map and reset 681 * XAD_NEW bit; 682 */ 683 684 if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { 685 /* 686 * This is the special xtree inside the directory for storing 687 * the directory table 688 */ 689 xtpage_t *p, *xp; 690 xad_t *xad; 691 692 jfs_ip->xtlid = 0; 693 tlck = lid_to_tlock(lid); 694 assert(tlck->type & tlckXTREE); 695 tlck->type |= tlckBTROOT; 696 tlck->mp = mp; 697 ilinelock = (struct linelock *) & tlck->lock; 698 699 /* 700 * copy xtree root from inode to dinode: 701 */ 702 p = &jfs_ip->i_xtroot; 703 xp = (xtpage_t *) &dp->di_dirtable; 704 lv = ilinelock->lv; 705 for (n = 0; n < ilinelock->index; n++, lv++) { 706 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 707 lv->length << L2XTSLOTSIZE); 708 } 709 710 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 711 xad = &xp->xad[XTENTRYSTART]; 712 for (n = XTENTRYSTART; 713 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 714 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 715 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 716 } 717 718 if ((lid = jfs_ip->blid) == 0) 719 goto inlineData; 720 jfs_ip->blid = 0; 721 722 tlck = lid_to_tlock(lid); 723 type = tlck->type; 724 tlck->type |= tlckBTROOT; 725 tlck->mp = mp; 726 ilinelock = (struct linelock *) & tlck->lock; 727 728 /* 729 * regular file: 16 byte (XAD slot) granularity 730 */ 731 if (type & tlckXTREE) { 732 xtpage_t *p, *xp; 733 xad_t *xad; 734 735 /* 736 * copy xtree root from inode to dinode: 737 */ 738 p = &jfs_ip->i_xtroot; 739 xp = &dp->di_xtroot; 740 lv = ilinelock->lv; 741 for (n = 0; n < ilinelock->index; n++, lv++) { 742 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 743 lv->length << L2XTSLOTSIZE); 744 } 745 746 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 747 xad = &xp->xad[XTENTRYSTART]; 748 for (n = XTENTRYSTART; 749 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 750 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 751 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 752 } 753 /* 754 * directory: 32 byte (directory entry slot) granularity 755 */ 756 else if (type & tlckDTREE) { 757 dtpage_t *p, *xp; 758 759 /* 760 * copy dtree root from inode to dinode: 761 */ 762 p = (dtpage_t *) &jfs_ip->i_dtroot; 763 xp = (dtpage_t *) & dp->di_dtroot; 764 lv = ilinelock->lv; 765 for (n = 0; n < ilinelock->index; n++, lv++) { 766 memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], 767 lv->length << L2DTSLOTSIZE); 768 } 769 } else { 770 jfs_err("diWrite: UFO tlock"); 771 } 772 773 inlineData: 774 /* 775 * copy inline symlink from in-memory inode to on-disk inode 776 */ 777 if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) { 778 lv = & dilinelock->lv[dilinelock->index]; 779 lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE; 780 lv->length = 2; 781 memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE); 782 dilinelock->index++; 783 } 784 /* 785 * copy inline data from in-memory inode to on-disk inode: 786 * 128 byte slot granularity 787 */ 788 if (test_cflag(COMMIT_Inlineea, ip)) { 789 lv = & dilinelock->lv[dilinelock->index]; 790 lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; 791 lv->length = 1; 792 memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE); 793 dilinelock->index++; 794 795 clear_cflag(COMMIT_Inlineea, ip); 796 } 797 798 /* 799 * lock/copy inode base: 128 byte slot granularity 800 */ 801 // baseDinode: 802 lv = & dilinelock->lv[dilinelock->index]; 803 lv->offset = dioffset >> L2INODESLOTSIZE; 804 copy_to_dinode(dp, ip); 805 if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { 806 lv->length = 2; 807 memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); 808 } else 809 lv->length = 1; 810 dilinelock->index++; 811 812 #ifdef _JFS_FASTDASD 813 /* 814 * We aren't logging changes to the DASD used in directory inodes, 815 * but we need to write them to disk. If we don't unmount cleanly, 816 * mount will recalculate the DASD used. 817 */ 818 if (S_ISDIR(ip->i_mode) 819 && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED)) 820 memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd)); 821 #endif /* _JFS_FASTDASD */ 822 823 /* release the buffer holding the updated on-disk inode. 824 * the buffer will be later written by commit processing. 825 */ 826 write_metapage(mp); 827 828 return (rc); 829 } 830 831 832 /* 833 * NAME: diFree(ip) 834 * 835 * FUNCTION: free a specified inode from the inode working map 836 * for a fileset or aggregate. 837 * 838 * if the inode to be freed represents the first (only) 839 * free inode within the iag, the iag will be placed on 840 * the ag free inode list. 841 * 842 * freeing the inode will cause the inode extent to be 843 * freed if the inode is the only allocated inode within 844 * the extent. in this case all the disk resource backing 845 * up the inode extent will be freed. in addition, the iag 846 * will be placed on the ag extent free list if the extent 847 * is the first free extent in the iag. if freeing the 848 * extent also means that no free inodes will exist for 849 * the iag, the iag will also be removed from the ag free 850 * inode list. 851 * 852 * the iag describing the inode will be freed if the extent 853 * is to be freed and it is the only backed extent within 854 * the iag. in this case, the iag will be removed from the 855 * ag free extent list and ag free inode list and placed on 856 * the inode map's free iag list. 857 * 858 * a careful update approach is used to provide consistency 859 * in the face of updates to multiple buffers. under this 860 * approach, all required buffers are obtained before making 861 * any updates and are held until all updates are complete. 862 * 863 * PARAMETERS: 864 * ip - inode to be freed. 865 * 866 * RETURN VALUES: 867 * 0 - success 868 * -EIO - i/o error. 869 */ 870 int diFree(struct inode *ip) 871 { 872 int rc; 873 ino_t inum = ip->i_ino; 874 struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp; 875 struct metapage *mp, *amp, *bmp, *cmp, *dmp; 876 int iagno, ino, extno, bitno, sword, agno; 877 int back, fwd; 878 u32 bitmap, mask; 879 struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; 880 struct inomap *imap = JFS_IP(ipimap)->i_imap; 881 pxd_t freepxd; 882 tid_t tid; 883 struct inode *iplist[3]; 884 struct tlock *tlck; 885 struct pxd_lock *pxdlock; 886 887 /* 888 * This is just to suppress compiler warnings. The same logic that 889 * references these variables is used to initialize them. 890 */ 891 aiagp = biagp = ciagp = diagp = NULL; 892 893 /* get the iag number containing the inode. 894 */ 895 iagno = INOTOIAG(inum); 896 897 /* make sure that the iag is contained within 898 * the map. 899 */ 900 if (iagno >= imap->im_nextiag) { 901 dump_mem("imap", imap, 32); 902 jfs_error(ip->i_sb, 903 "diFree: inum = %d, iagno = %d, nextiag = %d", 904 (uint) inum, iagno, imap->im_nextiag); 905 return -EIO; 906 } 907 908 /* get the allocation group for this ino. 909 */ 910 agno = JFS_IP(ip)->agno; 911 912 /* Lock the AG specific inode map information 913 */ 914 AG_LOCK(imap, agno); 915 916 /* Obtain read lock in imap inode. Don't release it until we have 917 * read all of the IAG's that we are going to. 918 */ 919 IREAD_LOCK(ipimap); 920 921 /* read the iag. 922 */ 923 if ((rc = diIAGRead(imap, iagno, &mp))) { 924 IREAD_UNLOCK(ipimap); 925 AG_UNLOCK(imap, agno); 926 return (rc); 927 } 928 iagp = (struct iag *) mp->data; 929 930 /* get the inode number and extent number of the inode within 931 * the iag and the inode number within the extent. 932 */ 933 ino = inum & (INOSPERIAG - 1); 934 extno = ino >> L2INOSPEREXT; 935 bitno = ino & (INOSPEREXT - 1); 936 mask = HIGHORDER >> bitno; 937 938 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 939 jfs_error(ip->i_sb, 940 "diFree: wmap shows inode already free"); 941 } 942 943 if (!addressPXD(&iagp->inoext[extno])) { 944 release_metapage(mp); 945 IREAD_UNLOCK(ipimap); 946 AG_UNLOCK(imap, agno); 947 jfs_error(ip->i_sb, "diFree: invalid inoext"); 948 return -EIO; 949 } 950 951 /* compute the bitmap for the extent reflecting the freed inode. 952 */ 953 bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; 954 955 if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) { 956 release_metapage(mp); 957 IREAD_UNLOCK(ipimap); 958 AG_UNLOCK(imap, agno); 959 jfs_error(ip->i_sb, "diFree: numfree > numinos"); 960 return -EIO; 961 } 962 /* 963 * inode extent still has some inodes or below low water mark: 964 * keep the inode extent; 965 */ 966 if (bitmap || 967 imap->im_agctl[agno].numfree < 96 || 968 (imap->im_agctl[agno].numfree < 288 && 969 (((imap->im_agctl[agno].numfree * 100) / 970 imap->im_agctl[agno].numinos) <= 25))) { 971 /* if the iag currently has no free inodes (i.e., 972 * the inode being freed is the first free inode of iag), 973 * insert the iag at head of the inode free list for the ag. 974 */ 975 if (iagp->nfreeinos == 0) { 976 /* check if there are any iags on the ag inode 977 * free list. if so, read the first one so that 978 * we can link the current iag onto the list at 979 * the head. 980 */ 981 if ((fwd = imap->im_agctl[agno].inofree) >= 0) { 982 /* read the iag that currently is the head 983 * of the list. 984 */ 985 if ((rc = diIAGRead(imap, fwd, &))) { 986 IREAD_UNLOCK(ipimap); 987 AG_UNLOCK(imap, agno); 988 release_metapage(mp); 989 return (rc); 990 } 991 aiagp = (struct iag *) amp->data; 992 993 /* make current head point back to the iag. 994 */ 995 aiagp->inofreeback = cpu_to_le32(iagno); 996 997 write_metapage(amp); 998 } 999 1000 /* iag points forward to current head and iag 1001 * becomes the new head of the list. 1002 */ 1003 iagp->inofreefwd = 1004 cpu_to_le32(imap->im_agctl[agno].inofree); 1005 iagp->inofreeback = cpu_to_le32(-1); 1006 imap->im_agctl[agno].inofree = iagno; 1007 } 1008 IREAD_UNLOCK(ipimap); 1009 1010 /* update the free inode summary map for the extent if 1011 * freeing the inode means the extent will now have free 1012 * inodes (i.e., the inode being freed is the first free 1013 * inode of extent), 1014 */ 1015 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 1016 sword = extno >> L2EXTSPERSUM; 1017 bitno = extno & (EXTSPERSUM - 1); 1018 iagp->inosmap[sword] &= 1019 cpu_to_le32(~(HIGHORDER >> bitno)); 1020 } 1021 1022 /* update the bitmap. 1023 */ 1024 iagp->wmap[extno] = cpu_to_le32(bitmap); 1025 1026 /* update the free inode counts at the iag, ag and 1027 * map level. 1028 */ 1029 iagp->nfreeinos = 1030 cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1); 1031 imap->im_agctl[agno].numfree += 1; 1032 atomic_inc(&imap->im_numfree); 1033 1034 /* release the AG inode map lock 1035 */ 1036 AG_UNLOCK(imap, agno); 1037 1038 /* write the iag */ 1039 write_metapage(mp); 1040 1041 return (0); 1042 } 1043 1044 1045 /* 1046 * inode extent has become free and above low water mark: 1047 * free the inode extent; 1048 */ 1049 1050 /* 1051 * prepare to update iag list(s) (careful update step 1) 1052 */ 1053 amp = bmp = cmp = dmp = NULL; 1054 fwd = back = -1; 1055 1056 /* check if the iag currently has no free extents. if so, 1057 * it will be placed on the head of the ag extent free list. 1058 */ 1059 if (iagp->nfreeexts == 0) { 1060 /* check if the ag extent free list has any iags. 1061 * if so, read the iag at the head of the list now. 1062 * this (head) iag will be updated later to reflect 1063 * the addition of the current iag at the head of 1064 * the list. 1065 */ 1066 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 1067 if ((rc = diIAGRead(imap, fwd, &))) 1068 goto error_out; 1069 aiagp = (struct iag *) amp->data; 1070 } 1071 } else { 1072 /* iag has free extents. check if the addition of a free 1073 * extent will cause all extents to be free within this 1074 * iag. if so, the iag will be removed from the ag extent 1075 * free list and placed on the inode map's free iag list. 1076 */ 1077 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1078 /* in preparation for removing the iag from the 1079 * ag extent free list, read the iags preceeding 1080 * and following the iag on the ag extent free 1081 * list. 1082 */ 1083 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 1084 if ((rc = diIAGRead(imap, fwd, &))) 1085 goto error_out; 1086 aiagp = (struct iag *) amp->data; 1087 } 1088 1089 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 1090 if ((rc = diIAGRead(imap, back, &bmp))) 1091 goto error_out; 1092 biagp = (struct iag *) bmp->data; 1093 } 1094 } 1095 } 1096 1097 /* remove the iag from the ag inode free list if freeing 1098 * this extent cause the iag to have no free inodes. 1099 */ 1100 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1101 int inofreeback = le32_to_cpu(iagp->inofreeback); 1102 int inofreefwd = le32_to_cpu(iagp->inofreefwd); 1103 1104 /* in preparation for removing the iag from the 1105 * ag inode free list, read the iags preceeding 1106 * and following the iag on the ag inode free 1107 * list. before reading these iags, we must make 1108 * sure that we already don't have them in hand 1109 * from up above, since re-reading an iag (buffer) 1110 * we are currently holding would cause a deadlock. 1111 */ 1112 if (inofreefwd >= 0) { 1113 1114 if (inofreefwd == fwd) 1115 ciagp = (struct iag *) amp->data; 1116 else if (inofreefwd == back) 1117 ciagp = (struct iag *) bmp->data; 1118 else { 1119 if ((rc = 1120 diIAGRead(imap, inofreefwd, &cmp))) 1121 goto error_out; 1122 ciagp = (struct iag *) cmp->data; 1123 } 1124 assert(ciagp != NULL); 1125 } 1126 1127 if (inofreeback >= 0) { 1128 if (inofreeback == fwd) 1129 diagp = (struct iag *) amp->data; 1130 else if (inofreeback == back) 1131 diagp = (struct iag *) bmp->data; 1132 else { 1133 if ((rc = 1134 diIAGRead(imap, inofreeback, &dmp))) 1135 goto error_out; 1136 diagp = (struct iag *) dmp->data; 1137 } 1138 assert(diagp != NULL); 1139 } 1140 } 1141 1142 IREAD_UNLOCK(ipimap); 1143 1144 /* 1145 * invalidate any page of the inode extent freed from buffer cache; 1146 */ 1147 freepxd = iagp->inoext[extno]; 1148 invalidate_pxd_metapages(ip, freepxd); 1149 1150 /* 1151 * update iag list(s) (careful update step 2) 1152 */ 1153 /* add the iag to the ag extent free list if this is the 1154 * first free extent for the iag. 1155 */ 1156 if (iagp->nfreeexts == 0) { 1157 if (fwd >= 0) 1158 aiagp->extfreeback = cpu_to_le32(iagno); 1159 1160 iagp->extfreefwd = 1161 cpu_to_le32(imap->im_agctl[agno].extfree); 1162 iagp->extfreeback = cpu_to_le32(-1); 1163 imap->im_agctl[agno].extfree = iagno; 1164 } else { 1165 /* remove the iag from the ag extent list if all extents 1166 * are now free and place it on the inode map iag free list. 1167 */ 1168 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1169 if (fwd >= 0) 1170 aiagp->extfreeback = iagp->extfreeback; 1171 1172 if (back >= 0) 1173 biagp->extfreefwd = iagp->extfreefwd; 1174 else 1175 imap->im_agctl[agno].extfree = 1176 le32_to_cpu(iagp->extfreefwd); 1177 1178 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 1179 1180 IAGFREE_LOCK(imap); 1181 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1182 imap->im_freeiag = iagno; 1183 IAGFREE_UNLOCK(imap); 1184 } 1185 } 1186 1187 /* remove the iag from the ag inode free list if freeing 1188 * this extent causes the iag to have no free inodes. 1189 */ 1190 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1191 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) 1192 ciagp->inofreeback = iagp->inofreeback; 1193 1194 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) 1195 diagp->inofreefwd = iagp->inofreefwd; 1196 else 1197 imap->im_agctl[agno].inofree = 1198 le32_to_cpu(iagp->inofreefwd); 1199 1200 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 1201 } 1202 1203 /* update the inode extent address and working map 1204 * to reflect the free extent. 1205 * the permanent map should have been updated already 1206 * for the inode being freed. 1207 */ 1208 if (iagp->pmap[extno] != 0) { 1209 jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); 1210 } 1211 iagp->wmap[extno] = 0; 1212 PXDlength(&iagp->inoext[extno], 0); 1213 PXDaddress(&iagp->inoext[extno], 0); 1214 1215 /* update the free extent and free inode summary maps 1216 * to reflect the freed extent. 1217 * the inode summary map is marked to indicate no inodes 1218 * available for the freed extent. 1219 */ 1220 sword = extno >> L2EXTSPERSUM; 1221 bitno = extno & (EXTSPERSUM - 1); 1222 mask = HIGHORDER >> bitno; 1223 iagp->inosmap[sword] |= cpu_to_le32(mask); 1224 iagp->extsmap[sword] &= cpu_to_le32(~mask); 1225 1226 /* update the number of free inodes and number of free extents 1227 * for the iag. 1228 */ 1229 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1230 (INOSPEREXT - 1)); 1231 iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) + 1); 1232 1233 /* update the number of free inodes and backed inodes 1234 * at the ag and inode map level. 1235 */ 1236 imap->im_agctl[agno].numfree -= (INOSPEREXT - 1); 1237 imap->im_agctl[agno].numinos -= INOSPEREXT; 1238 atomic_sub(INOSPEREXT - 1, &imap->im_numfree); 1239 atomic_sub(INOSPEREXT, &imap->im_numinos); 1240 1241 if (amp) 1242 write_metapage(amp); 1243 if (bmp) 1244 write_metapage(bmp); 1245 if (cmp) 1246 write_metapage(cmp); 1247 if (dmp) 1248 write_metapage(dmp); 1249 1250 /* 1251 * start transaction to update block allocation map 1252 * for the inode extent freed; 1253 * 1254 * N.B. AG_LOCK is released and iag will be released below, and 1255 * other thread may allocate inode from/reusing the ixad freed 1256 * BUT with new/different backing inode extent from the extent 1257 * to be freed by the transaction; 1258 */ 1259 tid = txBegin(ipimap->i_sb, COMMIT_FORCE); 1260 down(&JFS_IP(ipimap)->commit_sem); 1261 1262 /* acquire tlock of the iag page of the freed ixad 1263 * to force the page NOHOMEOK (even though no data is 1264 * logged from the iag page) until NOREDOPAGE|FREEXTENT log 1265 * for the free of the extent is committed; 1266 * write FREEXTENT|NOREDOPAGE log record 1267 * N.B. linelock is overlaid as freed extent descriptor; 1268 */ 1269 tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE); 1270 pxdlock = (struct pxd_lock *) & tlck->lock; 1271 pxdlock->flag = mlckFREEPXD; 1272 pxdlock->pxd = freepxd; 1273 pxdlock->index = 1; 1274 1275 write_metapage(mp); 1276 1277 iplist[0] = ipimap; 1278 1279 /* 1280 * logredo needs the IAG number and IAG extent index in order 1281 * to ensure that the IMap is consistent. The least disruptive 1282 * way to pass these values through to the transaction manager 1283 * is in the iplist array. 1284 * 1285 * It's not pretty, but it works. 1286 */ 1287 iplist[1] = (struct inode *) (size_t)iagno; 1288 iplist[2] = (struct inode *) (size_t)extno; 1289 1290 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 1291 1292 txEnd(tid); 1293 up(&JFS_IP(ipimap)->commit_sem); 1294 1295 /* unlock the AG inode map information */ 1296 AG_UNLOCK(imap, agno); 1297 1298 return (0); 1299 1300 error_out: 1301 IREAD_UNLOCK(ipimap); 1302 1303 if (amp) 1304 release_metapage(amp); 1305 if (bmp) 1306 release_metapage(bmp); 1307 if (cmp) 1308 release_metapage(cmp); 1309 if (dmp) 1310 release_metapage(dmp); 1311 1312 AG_UNLOCK(imap, agno); 1313 1314 release_metapage(mp); 1315 1316 return (rc); 1317 } 1318 1319 /* 1320 * There are several places in the diAlloc* routines where we initialize 1321 * the inode. 1322 */ 1323 static inline void 1324 diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) 1325 { 1326 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 1327 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 1328 1329 ip->i_ino = (iagno << L2INOSPERIAG) + ino; 1330 jfs_ip->ixpxd = iagp->inoext[extno]; 1331 jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 1332 jfs_ip->active_ag = -1; 1333 } 1334 1335 1336 /* 1337 * NAME: diAlloc(pip,dir,ip) 1338 * 1339 * FUNCTION: allocate a disk inode from the inode working map 1340 * for a fileset or aggregate. 1341 * 1342 * PARAMETERS: 1343 * pip - pointer to incore inode for the parent inode. 1344 * dir - TRUE if the new disk inode is for a directory. 1345 * ip - pointer to a new inode 1346 * 1347 * RETURN VALUES: 1348 * 0 - success. 1349 * -ENOSPC - insufficient disk resources. 1350 * -EIO - i/o error. 1351 */ 1352 int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) 1353 { 1354 int rc, ino, iagno, addext, extno, bitno, sword; 1355 int nwords, rem, i, agno; 1356 u32 mask, inosmap, extsmap; 1357 struct inode *ipimap; 1358 struct metapage *mp; 1359 ino_t inum; 1360 struct iag *iagp; 1361 struct inomap *imap; 1362 1363 /* get the pointers to the inode map inode and the 1364 * corresponding imap control structure. 1365 */ 1366 ipimap = JFS_SBI(pip->i_sb)->ipimap; 1367 imap = JFS_IP(ipimap)->i_imap; 1368 JFS_IP(ip)->ipimap = ipimap; 1369 JFS_IP(ip)->fileset = FILESYSTEM_I; 1370 1371 /* for a directory, the allocation policy is to start 1372 * at the ag level using the preferred ag. 1373 */ 1374 if (dir == TRUE) { 1375 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1376 AG_LOCK(imap, agno); 1377 goto tryag; 1378 } 1379 1380 /* for files, the policy starts off by trying to allocate from 1381 * the same iag containing the parent disk inode: 1382 * try to allocate the new disk inode close to the parent disk 1383 * inode, using parent disk inode number + 1 as the allocation 1384 * hint. (we use a left-to-right policy to attempt to avoid 1385 * moving backward on the disk.) compute the hint within the 1386 * file system and the iag. 1387 */ 1388 1389 /* get the ag number of this iag */ 1390 agno = JFS_IP(pip)->agno; 1391 1392 if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { 1393 /* 1394 * There is an open file actively growing. We want to 1395 * allocate new inodes from a different ag to avoid 1396 * fragmentation problems. 1397 */ 1398 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1399 AG_LOCK(imap, agno); 1400 goto tryag; 1401 } 1402 1403 inum = pip->i_ino + 1; 1404 ino = inum & (INOSPERIAG - 1); 1405 1406 /* back off the the hint if it is outside of the iag */ 1407 if (ino == 0) 1408 inum = pip->i_ino; 1409 1410 /* lock the AG inode map information */ 1411 AG_LOCK(imap, agno); 1412 1413 /* Get read lock on imap inode */ 1414 IREAD_LOCK(ipimap); 1415 1416 /* get the iag number and read the iag */ 1417 iagno = INOTOIAG(inum); 1418 if ((rc = diIAGRead(imap, iagno, &mp))) { 1419 IREAD_UNLOCK(ipimap); 1420 AG_UNLOCK(imap, agno); 1421 return (rc); 1422 } 1423 iagp = (struct iag *) mp->data; 1424 1425 /* determine if new inode extent is allowed to be added to the iag. 1426 * new inode extent can be added to the iag if the ag 1427 * has less than 32 free disk inodes and the iag has free extents. 1428 */ 1429 addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); 1430 1431 /* 1432 * try to allocate from the IAG 1433 */ 1434 /* check if the inode may be allocated from the iag 1435 * (i.e. the inode has free inodes or new extent can be added). 1436 */ 1437 if (iagp->nfreeinos || addext) { 1438 /* determine the extent number of the hint. 1439 */ 1440 extno = ino >> L2INOSPEREXT; 1441 1442 /* check if the extent containing the hint has backed 1443 * inodes. if so, try to allocate within this extent. 1444 */ 1445 if (addressPXD(&iagp->inoext[extno])) { 1446 bitno = ino & (INOSPEREXT - 1); 1447 if ((bitno = 1448 diFindFree(le32_to_cpu(iagp->wmap[extno]), 1449 bitno)) 1450 < INOSPEREXT) { 1451 ino = (extno << L2INOSPEREXT) + bitno; 1452 1453 /* a free inode (bit) was found within this 1454 * extent, so allocate it. 1455 */ 1456 rc = diAllocBit(imap, iagp, ino); 1457 IREAD_UNLOCK(ipimap); 1458 if (rc) { 1459 assert(rc == -EIO); 1460 } else { 1461 /* set the results of the allocation 1462 * and write the iag. 1463 */ 1464 diInitInode(ip, iagno, ino, extno, 1465 iagp); 1466 mark_metapage_dirty(mp); 1467 } 1468 release_metapage(mp); 1469 1470 /* free the AG lock and return. 1471 */ 1472 AG_UNLOCK(imap, agno); 1473 return (rc); 1474 } 1475 1476 if (!addext) 1477 extno = 1478 (extno == 1479 EXTSPERIAG - 1) ? 0 : extno + 1; 1480 } 1481 1482 /* 1483 * no free inodes within the extent containing the hint. 1484 * 1485 * try to allocate from the backed extents following 1486 * hint or, if appropriate (i.e. addext is true), allocate 1487 * an extent of free inodes at or following the extent 1488 * containing the hint. 1489 * 1490 * the free inode and free extent summary maps are used 1491 * here, so determine the starting summary map position 1492 * and the number of words we'll have to examine. again, 1493 * the approach is to allocate following the hint, so we 1494 * might have to initially ignore prior bits of the summary 1495 * map that represent extents prior to the extent containing 1496 * the hint and later revisit these bits. 1497 */ 1498 bitno = extno & (EXTSPERSUM - 1); 1499 nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1; 1500 sword = extno >> L2EXTSPERSUM; 1501 1502 /* mask any prior bits for the starting words of the 1503 * summary map. 1504 */ 1505 mask = ONES << (EXTSPERSUM - bitno); 1506 inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; 1507 extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; 1508 1509 /* scan the free inode and free extent summary maps for 1510 * free resources. 1511 */ 1512 for (i = 0; i < nwords; i++) { 1513 /* check if this word of the free inode summary 1514 * map describes an extent with free inodes. 1515 */ 1516 if (~inosmap) { 1517 /* an extent with free inodes has been 1518 * found. determine the extent number 1519 * and the inode number within the extent. 1520 */ 1521 rem = diFindFree(inosmap, 0); 1522 extno = (sword << L2EXTSPERSUM) + rem; 1523 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 1524 0); 1525 if (rem >= INOSPEREXT) { 1526 IREAD_UNLOCK(ipimap); 1527 release_metapage(mp); 1528 AG_UNLOCK(imap, agno); 1529 jfs_error(ip->i_sb, 1530 "diAlloc: can't find free bit " 1531 "in wmap"); 1532 return EIO; 1533 } 1534 1535 /* determine the inode number within the 1536 * iag and allocate the inode from the 1537 * map. 1538 */ 1539 ino = (extno << L2INOSPEREXT) + rem; 1540 rc = diAllocBit(imap, iagp, ino); 1541 IREAD_UNLOCK(ipimap); 1542 if (rc) 1543 assert(rc == -EIO); 1544 else { 1545 /* set the results of the allocation 1546 * and write the iag. 1547 */ 1548 diInitInode(ip, iagno, ino, extno, 1549 iagp); 1550 mark_metapage_dirty(mp); 1551 } 1552 release_metapage(mp); 1553 1554 /* free the AG lock and return. 1555 */ 1556 AG_UNLOCK(imap, agno); 1557 return (rc); 1558 1559 } 1560 1561 /* check if we may allocate an extent of free 1562 * inodes and whether this word of the free 1563 * extents summary map describes a free extent. 1564 */ 1565 if (addext && ~extsmap) { 1566 /* a free extent has been found. determine 1567 * the extent number. 1568 */ 1569 rem = diFindFree(extsmap, 0); 1570 extno = (sword << L2EXTSPERSUM) + rem; 1571 1572 /* allocate an extent of free inodes. 1573 */ 1574 if ((rc = diNewExt(imap, iagp, extno))) { 1575 /* if there is no disk space for a 1576 * new extent, try to allocate the 1577 * disk inode from somewhere else. 1578 */ 1579 if (rc == -ENOSPC) 1580 break; 1581 1582 assert(rc == -EIO); 1583 } else { 1584 /* set the results of the allocation 1585 * and write the iag. 1586 */ 1587 diInitInode(ip, iagno, 1588 extno << L2INOSPEREXT, 1589 extno, iagp); 1590 mark_metapage_dirty(mp); 1591 } 1592 release_metapage(mp); 1593 /* free the imap inode & the AG lock & return. 1594 */ 1595 IREAD_UNLOCK(ipimap); 1596 AG_UNLOCK(imap, agno); 1597 return (rc); 1598 } 1599 1600 /* move on to the next set of summary map words. 1601 */ 1602 sword = (sword == SMAPSZ - 1) ? 0 : sword + 1; 1603 inosmap = le32_to_cpu(iagp->inosmap[sword]); 1604 extsmap = le32_to_cpu(iagp->extsmap[sword]); 1605 } 1606 } 1607 /* unlock imap inode */ 1608 IREAD_UNLOCK(ipimap); 1609 1610 /* nothing doing in this iag, so release it. */ 1611 release_metapage(mp); 1612 1613 tryag: 1614 /* 1615 * try to allocate anywhere within the same AG as the parent inode. 1616 */ 1617 rc = diAllocAG(imap, agno, dir, ip); 1618 1619 AG_UNLOCK(imap, agno); 1620 1621 if (rc != -ENOSPC) 1622 return (rc); 1623 1624 /* 1625 * try to allocate in any AG. 1626 */ 1627 return (diAllocAny(imap, agno, dir, ip)); 1628 } 1629 1630 1631 /* 1632 * NAME: diAllocAG(imap,agno,dir,ip) 1633 * 1634 * FUNCTION: allocate a disk inode from the allocation group. 1635 * 1636 * this routine first determines if a new extent of free 1637 * inodes should be added for the allocation group, with 1638 * the current request satisfied from this extent. if this 1639 * is the case, an attempt will be made to do just that. if 1640 * this attempt fails or it has been determined that a new 1641 * extent should not be added, an attempt is made to satisfy 1642 * the request by allocating an existing (backed) free inode 1643 * from the allocation group. 1644 * 1645 * PRE CONDITION: Already have the AG lock for this AG. 1646 * 1647 * PARAMETERS: 1648 * imap - pointer to inode map control structure. 1649 * agno - allocation group to allocate from. 1650 * dir - TRUE if the new disk inode is for a directory. 1651 * ip - pointer to the new inode to be filled in on successful return 1652 * with the disk inode number allocated, its extent address 1653 * and the start of the ag. 1654 * 1655 * RETURN VALUES: 1656 * 0 - success. 1657 * -ENOSPC - insufficient disk resources. 1658 * -EIO - i/o error. 1659 */ 1660 static int 1661 diAllocAG(struct inomap * imap, int agno, boolean_t dir, struct inode *ip) 1662 { 1663 int rc, addext, numfree, numinos; 1664 1665 /* get the number of free and the number of backed disk 1666 * inodes currently within the ag. 1667 */ 1668 numfree = imap->im_agctl[agno].numfree; 1669 numinos = imap->im_agctl[agno].numinos; 1670 1671 if (numfree > numinos) { 1672 jfs_error(ip->i_sb, "diAllocAG: numfree > numinos"); 1673 return -EIO; 1674 } 1675 1676 /* determine if we should allocate a new extent of free inodes 1677 * within the ag: for directory inodes, add a new extent 1678 * if there are a small number of free inodes or number of free 1679 * inodes is a small percentage of the number of backed inodes. 1680 */ 1681 if (dir == TRUE) 1682 addext = (numfree < 64 || 1683 (numfree < 256 1684 && ((numfree * 100) / numinos) <= 20)); 1685 else 1686 addext = (numfree == 0); 1687 1688 /* 1689 * try to allocate a new extent of free inodes. 1690 */ 1691 if (addext) { 1692 /* if free space is not avaliable for this new extent, try 1693 * below to allocate a free and existing (already backed) 1694 * inode from the ag. 1695 */ 1696 if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC) 1697 return (rc); 1698 } 1699 1700 /* 1701 * try to allocate an existing free inode from the ag. 1702 */ 1703 return (diAllocIno(imap, agno, ip)); 1704 } 1705 1706 1707 /* 1708 * NAME: diAllocAny(imap,agno,dir,iap) 1709 * 1710 * FUNCTION: allocate a disk inode from any other allocation group. 1711 * 1712 * this routine is called when an allocation attempt within 1713 * the primary allocation group has failed. if attempts to 1714 * allocate an inode from any allocation group other than the 1715 * specified primary group. 1716 * 1717 * PARAMETERS: 1718 * imap - pointer to inode map control structure. 1719 * agno - primary allocation group (to avoid). 1720 * dir - TRUE if the new disk inode is for a directory. 1721 * ip - pointer to a new inode to be filled in on successful return 1722 * with the disk inode number allocated, its extent address 1723 * and the start of the ag. 1724 * 1725 * RETURN VALUES: 1726 * 0 - success. 1727 * -ENOSPC - insufficient disk resources. 1728 * -EIO - i/o error. 1729 */ 1730 static int 1731 diAllocAny(struct inomap * imap, int agno, boolean_t dir, struct inode *ip) 1732 { 1733 int ag, rc; 1734 int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; 1735 1736 1737 /* try to allocate from the ags following agno up to 1738 * the maximum ag number. 1739 */ 1740 for (ag = agno + 1; ag <= maxag; ag++) { 1741 AG_LOCK(imap, ag); 1742 1743 rc = diAllocAG(imap, ag, dir, ip); 1744 1745 AG_UNLOCK(imap, ag); 1746 1747 if (rc != -ENOSPC) 1748 return (rc); 1749 } 1750 1751 /* try to allocate from the ags in front of agno. 1752 */ 1753 for (ag = 0; ag < agno; ag++) { 1754 AG_LOCK(imap, ag); 1755 1756 rc = diAllocAG(imap, ag, dir, ip); 1757 1758 AG_UNLOCK(imap, ag); 1759 1760 if (rc != -ENOSPC) 1761 return (rc); 1762 } 1763 1764 /* no free disk inodes. 1765 */ 1766 return -ENOSPC; 1767 } 1768 1769 1770 /* 1771 * NAME: diAllocIno(imap,agno,ip) 1772 * 1773 * FUNCTION: allocate a disk inode from the allocation group's free 1774 * inode list, returning an error if this free list is 1775 * empty (i.e. no iags on the list). 1776 * 1777 * allocation occurs from the first iag on the list using 1778 * the iag's free inode summary map to find the leftmost 1779 * free inode in the iag. 1780 * 1781 * PRE CONDITION: Already have AG lock for this AG. 1782 * 1783 * PARAMETERS: 1784 * imap - pointer to inode map control structure. 1785 * agno - allocation group. 1786 * ip - pointer to new inode to be filled in on successful return 1787 * with the disk inode number allocated, its extent address 1788 * and the start of the ag. 1789 * 1790 * RETURN VALUES: 1791 * 0 - success. 1792 * -ENOSPC - insufficient disk resources. 1793 * -EIO - i/o error. 1794 */ 1795 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) 1796 { 1797 int iagno, ino, rc, rem, extno, sword; 1798 struct metapage *mp; 1799 struct iag *iagp; 1800 1801 /* check if there are iags on the ag's free inode list. 1802 */ 1803 if ((iagno = imap->im_agctl[agno].inofree) < 0) 1804 return -ENOSPC; 1805 1806 /* obtain read lock on imap inode */ 1807 IREAD_LOCK(imap->im_ipimap); 1808 1809 /* read the iag at the head of the list. 1810 */ 1811 if ((rc = diIAGRead(imap, iagno, &mp))) { 1812 IREAD_UNLOCK(imap->im_ipimap); 1813 return (rc); 1814 } 1815 iagp = (struct iag *) mp->data; 1816 1817 /* better be free inodes in this iag if it is on the 1818 * list. 1819 */ 1820 if (!iagp->nfreeinos) { 1821 IREAD_UNLOCK(imap->im_ipimap); 1822 release_metapage(mp); 1823 jfs_error(ip->i_sb, 1824 "diAllocIno: nfreeinos = 0, but iag on freelist"); 1825 return -EIO; 1826 } 1827 1828 /* scan the free inode summary map to find an extent 1829 * with free inodes. 1830 */ 1831 for (sword = 0;; sword++) { 1832 if (sword >= SMAPSZ) { 1833 IREAD_UNLOCK(imap->im_ipimap); 1834 release_metapage(mp); 1835 jfs_error(ip->i_sb, 1836 "diAllocIno: free inode not found in summary map"); 1837 return -EIO; 1838 } 1839 1840 if (~iagp->inosmap[sword]) 1841 break; 1842 } 1843 1844 /* found a extent with free inodes. determine 1845 * the extent number. 1846 */ 1847 rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0); 1848 if (rem >= EXTSPERSUM) { 1849 IREAD_UNLOCK(imap->im_ipimap); 1850 release_metapage(mp); 1851 jfs_error(ip->i_sb, "diAllocIno: no free extent found"); 1852 return -EIO; 1853 } 1854 extno = (sword << L2EXTSPERSUM) + rem; 1855 1856 /* find the first free inode in the extent. 1857 */ 1858 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); 1859 if (rem >= INOSPEREXT) { 1860 IREAD_UNLOCK(imap->im_ipimap); 1861 release_metapage(mp); 1862 jfs_error(ip->i_sb, "diAllocIno: free inode not found"); 1863 return -EIO; 1864 } 1865 1866 /* compute the inode number within the iag. 1867 */ 1868 ino = (extno << L2INOSPEREXT) + rem; 1869 1870 /* allocate the inode. 1871 */ 1872 rc = diAllocBit(imap, iagp, ino); 1873 IREAD_UNLOCK(imap->im_ipimap); 1874 if (rc) { 1875 release_metapage(mp); 1876 return (rc); 1877 } 1878 1879 /* set the results of the allocation and write the iag. 1880 */ 1881 diInitInode(ip, iagno, ino, extno, iagp); 1882 write_metapage(mp); 1883 1884 return (0); 1885 } 1886 1887 1888 /* 1889 * NAME: diAllocExt(imap,agno,ip) 1890 * 1891 * FUNCTION: add a new extent of free inodes to an iag, allocating 1892 * an inode from this extent to satisfy the current allocation 1893 * request. 1894 * 1895 * this routine first tries to find an existing iag with free 1896 * extents through the ag free extent list. if list is not 1897 * empty, the head of the list will be selected as the home 1898 * of the new extent of free inodes. otherwise (the list is 1899 * empty), a new iag will be allocated for the ag to contain 1900 * the extent. 1901 * 1902 * once an iag has been selected, the free extent summary map 1903 * is used to locate a free extent within the iag and diNewExt() 1904 * is called to initialize the extent, with initialization 1905 * including the allocation of the first inode of the extent 1906 * for the purpose of satisfying this request. 1907 * 1908 * PARAMETERS: 1909 * imap - pointer to inode map control structure. 1910 * agno - allocation group number. 1911 * ip - pointer to new inode to be filled in on successful return 1912 * with the disk inode number allocated, its extent address 1913 * and the start of the ag. 1914 * 1915 * RETURN VALUES: 1916 * 0 - success. 1917 * -ENOSPC - insufficient disk resources. 1918 * -EIO - i/o error. 1919 */ 1920 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) 1921 { 1922 int rem, iagno, sword, extno, rc; 1923 struct metapage *mp; 1924 struct iag *iagp; 1925 1926 /* check if the ag has any iags with free extents. if not, 1927 * allocate a new iag for the ag. 1928 */ 1929 if ((iagno = imap->im_agctl[agno].extfree) < 0) { 1930 /* If successful, diNewIAG will obtain the read lock on the 1931 * imap inode. 1932 */ 1933 if ((rc = diNewIAG(imap, &iagno, agno, &mp))) { 1934 return (rc); 1935 } 1936 iagp = (struct iag *) mp->data; 1937 1938 /* set the ag number if this a brand new iag 1939 */ 1940 iagp->agstart = 1941 cpu_to_le64(AGTOBLK(agno, imap->im_ipimap)); 1942 } else { 1943 /* read the iag. 1944 */ 1945 IREAD_LOCK(imap->im_ipimap); 1946 if ((rc = diIAGRead(imap, iagno, &mp))) { 1947 IREAD_UNLOCK(imap->im_ipimap); 1948 jfs_error(ip->i_sb, "diAllocExt: error reading iag"); 1949 return rc; 1950 } 1951 iagp = (struct iag *) mp->data; 1952 } 1953 1954 /* using the free extent summary map, find a free extent. 1955 */ 1956 for (sword = 0;; sword++) { 1957 if (sword >= SMAPSZ) { 1958 release_metapage(mp); 1959 IREAD_UNLOCK(imap->im_ipimap); 1960 jfs_error(ip->i_sb, 1961 "diAllocExt: free ext summary map not found"); 1962 return -EIO; 1963 } 1964 if (~iagp->extsmap[sword]) 1965 break; 1966 } 1967 1968 /* determine the extent number of the free extent. 1969 */ 1970 rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0); 1971 if (rem >= EXTSPERSUM) { 1972 release_metapage(mp); 1973 IREAD_UNLOCK(imap->im_ipimap); 1974 jfs_error(ip->i_sb, "diAllocExt: free extent not found"); 1975 return -EIO; 1976 } 1977 extno = (sword << L2EXTSPERSUM) + rem; 1978 1979 /* initialize the new extent. 1980 */ 1981 rc = diNewExt(imap, iagp, extno); 1982 IREAD_UNLOCK(imap->im_ipimap); 1983 if (rc) { 1984 /* something bad happened. if a new iag was allocated, 1985 * place it back on the inode map's iag free list, and 1986 * clear the ag number information. 1987 */ 1988 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 1989 IAGFREE_LOCK(imap); 1990 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1991 imap->im_freeiag = iagno; 1992 IAGFREE_UNLOCK(imap); 1993 } 1994 write_metapage(mp); 1995 return (rc); 1996 } 1997 1998 /* set the results of the allocation and write the iag. 1999 */ 2000 diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); 2001 2002 write_metapage(mp); 2003 2004 return (0); 2005 } 2006 2007 2008 /* 2009 * NAME: diAllocBit(imap,iagp,ino) 2010 * 2011 * FUNCTION: allocate a backed inode from an iag. 2012 * 2013 * this routine performs the mechanics of allocating a 2014 * specified inode from a backed extent. 2015 * 2016 * if the inode to be allocated represents the last free 2017 * inode within the iag, the iag will be removed from the 2018 * ag free inode list. 2019 * 2020 * a careful update approach is used to provide consistency 2021 * in the face of updates to multiple buffers. under this 2022 * approach, all required buffers are obtained before making 2023 * any updates and are held all are updates are complete. 2024 * 2025 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2026 * this AG. Must have read lock on imap inode. 2027 * 2028 * PARAMETERS: 2029 * imap - pointer to inode map control structure. 2030 * iagp - pointer to iag. 2031 * ino - inode number to be allocated within the iag. 2032 * 2033 * RETURN VALUES: 2034 * 0 - success. 2035 * -ENOSPC - insufficient disk resources. 2036 * -EIO - i/o error. 2037 */ 2038 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) 2039 { 2040 int extno, bitno, agno, sword, rc; 2041 struct metapage *amp = NULL, *bmp = NULL; 2042 struct iag *aiagp = NULL, *biagp = NULL; 2043 u32 mask; 2044 2045 /* check if this is the last free inode within the iag. 2046 * if so, it will have to be removed from the ag free 2047 * inode list, so get the iags preceeding and following 2048 * it on the list. 2049 */ 2050 if (iagp->nfreeinos == cpu_to_le32(1)) { 2051 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) { 2052 if ((rc = 2053 diIAGRead(imap, le32_to_cpu(iagp->inofreefwd), 2054 &))) 2055 return (rc); 2056 aiagp = (struct iag *) amp->data; 2057 } 2058 2059 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) { 2060 if ((rc = 2061 diIAGRead(imap, 2062 le32_to_cpu(iagp->inofreeback), 2063 &bmp))) { 2064 if (amp) 2065 release_metapage(amp); 2066 return (rc); 2067 } 2068 biagp = (struct iag *) bmp->data; 2069 } 2070 } 2071 2072 /* get the ag number, extent number, inode number within 2073 * the extent. 2074 */ 2075 agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb)); 2076 extno = ino >> L2INOSPEREXT; 2077 bitno = ino & (INOSPEREXT - 1); 2078 2079 /* compute the mask for setting the map. 2080 */ 2081 mask = HIGHORDER >> bitno; 2082 2083 /* the inode should be free and backed. 2084 */ 2085 if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) || 2086 ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) || 2087 (addressPXD(&iagp->inoext[extno]) == 0)) { 2088 if (amp) 2089 release_metapage(amp); 2090 if (bmp) 2091 release_metapage(bmp); 2092 2093 jfs_error(imap->im_ipimap->i_sb, 2094 "diAllocBit: iag inconsistent"); 2095 return -EIO; 2096 } 2097 2098 /* mark the inode as allocated in the working map. 2099 */ 2100 iagp->wmap[extno] |= cpu_to_le32(mask); 2101 2102 /* check if all inodes within the extent are now 2103 * allocated. if so, update the free inode summary 2104 * map to reflect this. 2105 */ 2106 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 2107 sword = extno >> L2EXTSPERSUM; 2108 bitno = extno & (EXTSPERSUM - 1); 2109 iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno); 2110 } 2111 2112 /* if this was the last free inode in the iag, remove the 2113 * iag from the ag free inode list. 2114 */ 2115 if (iagp->nfreeinos == cpu_to_le32(1)) { 2116 if (amp) { 2117 aiagp->inofreeback = iagp->inofreeback; 2118 write_metapage(amp); 2119 } 2120 2121 if (bmp) { 2122 biagp->inofreefwd = iagp->inofreefwd; 2123 write_metapage(bmp); 2124 } else { 2125 imap->im_agctl[agno].inofree = 2126 le32_to_cpu(iagp->inofreefwd); 2127 } 2128 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2129 } 2130 2131 /* update the free inode count at the iag, ag, inode 2132 * map levels. 2133 */ 2134 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1); 2135 imap->im_agctl[agno].numfree -= 1; 2136 atomic_dec(&imap->im_numfree); 2137 2138 return (0); 2139 } 2140 2141 2142 /* 2143 * NAME: diNewExt(imap,iagp,extno) 2144 * 2145 * FUNCTION: initialize a new extent of inodes for an iag, allocating 2146 * the first inode of the extent for use for the current 2147 * allocation request. 2148 * 2149 * disk resources are allocated for the new extent of inodes 2150 * and the inodes themselves are initialized to reflect their 2151 * existence within the extent (i.e. their inode numbers and 2152 * inode extent addresses are set) and their initial state 2153 * (mode and link count are set to zero). 2154 * 2155 * if the iag is new, it is not yet on an ag extent free list 2156 * but will now be placed on this list. 2157 * 2158 * if the allocation of the new extent causes the iag to 2159 * have no free extent, the iag will be removed from the 2160 * ag extent free list. 2161 * 2162 * if the iag has no free backed inodes, it will be placed 2163 * on the ag free inode list, since the addition of the new 2164 * extent will now cause it to have free inodes. 2165 * 2166 * a careful update approach is used to provide consistency 2167 * (i.e. list consistency) in the face of updates to multiple 2168 * buffers. under this approach, all required buffers are 2169 * obtained before making any updates and are held until all 2170 * updates are complete. 2171 * 2172 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2173 * this AG. Must have read lock on imap inode. 2174 * 2175 * PARAMETERS: 2176 * imap - pointer to inode map control structure. 2177 * iagp - pointer to iag. 2178 * extno - extent number. 2179 * 2180 * RETURN VALUES: 2181 * 0 - success. 2182 * -ENOSPC - insufficient disk resources. 2183 * -EIO - i/o error. 2184 */ 2185 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) 2186 { 2187 int agno, iagno, fwd, back, freei = 0, sword, rc; 2188 struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL; 2189 struct metapage *amp, *bmp, *cmp, *dmp; 2190 struct inode *ipimap; 2191 s64 blkno, hint; 2192 int i, j; 2193 u32 mask; 2194 ino_t ino; 2195 struct dinode *dp; 2196 struct jfs_sb_info *sbi; 2197 2198 /* better have free extents. 2199 */ 2200 if (!iagp->nfreeexts) { 2201 jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents"); 2202 return -EIO; 2203 } 2204 2205 /* get the inode map inode. 2206 */ 2207 ipimap = imap->im_ipimap; 2208 sbi = JFS_SBI(ipimap->i_sb); 2209 2210 amp = bmp = cmp = NULL; 2211 2212 /* get the ag and iag numbers for this iag. 2213 */ 2214 agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 2215 iagno = le32_to_cpu(iagp->iagnum); 2216 2217 /* check if this is the last free extent within the 2218 * iag. if so, the iag must be removed from the ag 2219 * free extent list, so get the iags preceeding and 2220 * following the iag on this list. 2221 */ 2222 if (iagp->nfreeexts == cpu_to_le32(1)) { 2223 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 2224 if ((rc = diIAGRead(imap, fwd, &))) 2225 return (rc); 2226 aiagp = (struct iag *) amp->data; 2227 } 2228 2229 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 2230 if ((rc = diIAGRead(imap, back, &bmp))) 2231 goto error_out; 2232 biagp = (struct iag *) bmp->data; 2233 } 2234 } else { 2235 /* the iag has free extents. if all extents are free 2236 * (as is the case for a newly allocated iag), the iag 2237 * must be added to the ag free extent list, so get 2238 * the iag at the head of the list in preparation for 2239 * adding this iag to this list. 2240 */ 2241 fwd = back = -1; 2242 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2243 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 2244 if ((rc = diIAGRead(imap, fwd, &))) 2245 goto error_out; 2246 aiagp = (struct iag *) amp->data; 2247 } 2248 } 2249 } 2250 2251 /* check if the iag has no free inodes. if so, the iag 2252 * will have to be added to the ag free inode list, so get 2253 * the iag at the head of the list in preparation for 2254 * adding this iag to this list. in doing this, we must 2255 * check if we already have the iag at the head of 2256 * the list in hand. 2257 */ 2258 if (iagp->nfreeinos == 0) { 2259 freei = imap->im_agctl[agno].inofree; 2260 2261 if (freei >= 0) { 2262 if (freei == fwd) { 2263 ciagp = aiagp; 2264 } else if (freei == back) { 2265 ciagp = biagp; 2266 } else { 2267 if ((rc = diIAGRead(imap, freei, &cmp))) 2268 goto error_out; 2269 ciagp = (struct iag *) cmp->data; 2270 } 2271 if (ciagp == NULL) { 2272 jfs_error(imap->im_ipimap->i_sb, 2273 "diNewExt: ciagp == NULL"); 2274 rc = -EIO; 2275 goto error_out; 2276 } 2277 } 2278 } 2279 2280 /* allocate disk space for the inode extent. 2281 */ 2282 if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0)) 2283 hint = ((s64) agno << sbi->bmap->db_agl2size) - 1; 2284 else 2285 hint = addressPXD(&iagp->inoext[extno - 1]) + 2286 lengthPXD(&iagp->inoext[extno - 1]) - 1; 2287 2288 if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno))) 2289 goto error_out; 2290 2291 /* compute the inode number of the first inode within the 2292 * extent. 2293 */ 2294 ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT); 2295 2296 /* initialize the inodes within the newly allocated extent a 2297 * page at a time. 2298 */ 2299 for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) { 2300 /* get a buffer for this page of disk inodes. 2301 */ 2302 dmp = get_metapage(ipimap, blkno + i, PSIZE, 1); 2303 if (dmp == NULL) { 2304 rc = -EIO; 2305 goto error_out; 2306 } 2307 dp = (struct dinode *) dmp->data; 2308 2309 /* initialize the inode number, mode, link count and 2310 * inode extent address. 2311 */ 2312 for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) { 2313 dp->di_inostamp = cpu_to_le32(sbi->inostamp); 2314 dp->di_number = cpu_to_le32(ino); 2315 dp->di_fileset = cpu_to_le32(FILESYSTEM_I); 2316 dp->di_mode = 0; 2317 dp->di_nlink = 0; 2318 PXDaddress(&(dp->di_ixpxd), blkno); 2319 PXDlength(&(dp->di_ixpxd), imap->im_nbperiext); 2320 } 2321 write_metapage(dmp); 2322 } 2323 2324 /* if this is the last free extent within the iag, remove the 2325 * iag from the ag free extent list. 2326 */ 2327 if (iagp->nfreeexts == cpu_to_le32(1)) { 2328 if (fwd >= 0) 2329 aiagp->extfreeback = iagp->extfreeback; 2330 2331 if (back >= 0) 2332 biagp->extfreefwd = iagp->extfreefwd; 2333 else 2334 imap->im_agctl[agno].extfree = 2335 le32_to_cpu(iagp->extfreefwd); 2336 2337 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2338 } else { 2339 /* if the iag has all free extents (newly allocated iag), 2340 * add the iag to the ag free extent list. 2341 */ 2342 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2343 if (fwd >= 0) 2344 aiagp->extfreeback = cpu_to_le32(iagno); 2345 2346 iagp->extfreefwd = cpu_to_le32(fwd); 2347 iagp->extfreeback = cpu_to_le32(-1); 2348 imap->im_agctl[agno].extfree = iagno; 2349 } 2350 } 2351 2352 /* if the iag has no free inodes, add the iag to the 2353 * ag free inode list. 2354 */ 2355 if (iagp->nfreeinos == 0) { 2356 if (freei >= 0) 2357 ciagp->inofreeback = cpu_to_le32(iagno); 2358 2359 iagp->inofreefwd = 2360 cpu_to_le32(imap->im_agctl[agno].inofree); 2361 iagp->inofreeback = cpu_to_le32(-1); 2362 imap->im_agctl[agno].inofree = iagno; 2363 } 2364 2365 /* initialize the extent descriptor of the extent. */ 2366 PXDlength(&iagp->inoext[extno], imap->im_nbperiext); 2367 PXDaddress(&iagp->inoext[extno], blkno); 2368 2369 /* initialize the working and persistent map of the extent. 2370 * the working map will be initialized such that 2371 * it indicates the first inode of the extent is allocated. 2372 */ 2373 iagp->wmap[extno] = cpu_to_le32(HIGHORDER); 2374 iagp->pmap[extno] = 0; 2375 2376 /* update the free inode and free extent summary maps 2377 * for the extent to indicate the extent has free inodes 2378 * and no longer represents a free extent. 2379 */ 2380 sword = extno >> L2EXTSPERSUM; 2381 mask = HIGHORDER >> (extno & (EXTSPERSUM - 1)); 2382 iagp->extsmap[sword] |= cpu_to_le32(mask); 2383 iagp->inosmap[sword] &= cpu_to_le32(~mask); 2384 2385 /* update the free inode and free extent counts for the 2386 * iag. 2387 */ 2388 iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 2389 (INOSPEREXT - 1)); 2390 iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) - 1); 2391 2392 /* update the free and backed inode counts for the ag. 2393 */ 2394 imap->im_agctl[agno].numfree += (INOSPEREXT - 1); 2395 imap->im_agctl[agno].numinos += INOSPEREXT; 2396 2397 /* update the free and backed inode counts for the inode map. 2398 */ 2399 atomic_add(INOSPEREXT - 1, &imap->im_numfree); 2400 atomic_add(INOSPEREXT, &imap->im_numinos); 2401 2402 /* write the iags. 2403 */ 2404 if (amp) 2405 write_metapage(amp); 2406 if (bmp) 2407 write_metapage(bmp); 2408 if (cmp) 2409 write_metapage(cmp); 2410 2411 return (0); 2412 2413 error_out: 2414 2415 /* release the iags. 2416 */ 2417 if (amp) 2418 release_metapage(amp); 2419 if (bmp) 2420 release_metapage(bmp); 2421 if (cmp) 2422 release_metapage(cmp); 2423 2424 return (rc); 2425 } 2426 2427 2428 /* 2429 * NAME: diNewIAG(imap,iagnop,agno) 2430 * 2431 * FUNCTION: allocate a new iag for an allocation group. 2432 * 2433 * first tries to allocate the iag from the inode map 2434 * iagfree list: 2435 * if the list has free iags, the head of the list is removed 2436 * and returned to satisfy the request. 2437 * if the inode map's iag free list is empty, the inode map 2438 * is extended to hold a new iag. this new iag is initialized 2439 * and returned to satisfy the request. 2440 * 2441 * PARAMETERS: 2442 * imap - pointer to inode map control structure. 2443 * iagnop - pointer to an iag number set with the number of the 2444 * newly allocated iag upon successful return. 2445 * agno - allocation group number. 2446 * bpp - Buffer pointer to be filled in with new IAG's buffer 2447 * 2448 * RETURN VALUES: 2449 * 0 - success. 2450 * -ENOSPC - insufficient disk resources. 2451 * -EIO - i/o error. 2452 * 2453 * serialization: 2454 * AG lock held on entry/exit; 2455 * write lock on the map is held inside; 2456 * read lock on the map is held on successful completion; 2457 * 2458 * note: new iag transaction: 2459 * . synchronously write iag; 2460 * . write log of xtree and inode of imap; 2461 * . commit; 2462 * . synchronous write of xtree (right to left, bottom to top); 2463 * . at start of logredo(): init in-memory imap with one additional iag page; 2464 * . at end of logredo(): re-read imap inode to determine 2465 * new imap size; 2466 */ 2467 static int 2468 diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) 2469 { 2470 int rc; 2471 int iagno, i, xlen; 2472 struct inode *ipimap; 2473 struct super_block *sb; 2474 struct jfs_sb_info *sbi; 2475 struct metapage *mp; 2476 struct iag *iagp; 2477 s64 xaddr = 0; 2478 s64 blkno; 2479 tid_t tid; 2480 #ifdef _STILL_TO_PORT 2481 xad_t xad; 2482 #endif /* _STILL_TO_PORT */ 2483 struct inode *iplist[1]; 2484 2485 /* pick up pointers to the inode map and mount inodes */ 2486 ipimap = imap->im_ipimap; 2487 sb = ipimap->i_sb; 2488 sbi = JFS_SBI(sb); 2489 2490 /* acquire the free iag lock */ 2491 IAGFREE_LOCK(imap); 2492 2493 /* if there are any iags on the inode map free iag list, 2494 * allocate the iag from the head of the list. 2495 */ 2496 if (imap->im_freeiag >= 0) { 2497 /* pick up the iag number at the head of the list */ 2498 iagno = imap->im_freeiag; 2499 2500 /* determine the logical block number of the iag */ 2501 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2502 } else { 2503 /* no free iags. the inode map will have to be extented 2504 * to include a new iag. 2505 */ 2506 2507 /* acquire inode map lock */ 2508 IWRITE_LOCK(ipimap); 2509 2510 if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { 2511 IWRITE_UNLOCK(ipimap); 2512 IAGFREE_UNLOCK(imap); 2513 jfs_error(imap->im_ipimap->i_sb, 2514 "diNewIAG: ipimap->i_size is wrong"); 2515 return -EIO; 2516 } 2517 2518 2519 /* get the next avaliable iag number */ 2520 iagno = imap->im_nextiag; 2521 2522 /* make sure that we have not exceeded the maximum inode 2523 * number limit. 2524 */ 2525 if (iagno > (MAXIAGS - 1)) { 2526 /* release the inode map lock */ 2527 IWRITE_UNLOCK(ipimap); 2528 2529 rc = -ENOSPC; 2530 goto out; 2531 } 2532 2533 /* 2534 * synchronously append new iag page. 2535 */ 2536 /* determine the logical address of iag page to append */ 2537 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2538 2539 /* Allocate extent for new iag page */ 2540 xlen = sbi->nbperpage; 2541 if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) { 2542 /* release the inode map lock */ 2543 IWRITE_UNLOCK(ipimap); 2544 2545 goto out; 2546 } 2547 2548 /* 2549 * start transaction of update of the inode map 2550 * addressing structure pointing to the new iag page; 2551 */ 2552 tid = txBegin(sb, COMMIT_FORCE); 2553 down(&JFS_IP(ipimap)->commit_sem); 2554 2555 /* update the inode map addressing structure to point to it */ 2556 if ((rc = 2557 xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { 2558 txEnd(tid); 2559 up(&JFS_IP(ipimap)->commit_sem); 2560 /* Free the blocks allocated for the iag since it was 2561 * not successfully added to the inode map 2562 */ 2563 dbFree(ipimap, xaddr, (s64) xlen); 2564 2565 /* release the inode map lock */ 2566 IWRITE_UNLOCK(ipimap); 2567 2568 goto out; 2569 } 2570 2571 /* update the inode map's inode to reflect the extension */ 2572 ipimap->i_size += PSIZE; 2573 inode_add_bytes(ipimap, PSIZE); 2574 2575 /* assign a buffer for the page */ 2576 mp = get_metapage(ipimap, blkno, PSIZE, 0); 2577 if (!mp) { 2578 /* 2579 * This is very unlikely since we just created the 2580 * extent, but let's try to handle it correctly 2581 */ 2582 xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, 2583 COMMIT_PWMAP); 2584 2585 txAbort(tid, 0); 2586 txEnd(tid); 2587 2588 /* release the inode map lock */ 2589 IWRITE_UNLOCK(ipimap); 2590 2591 rc = -EIO; 2592 goto out; 2593 } 2594 iagp = (struct iag *) mp->data; 2595 2596 /* init the iag */ 2597 memset(iagp, 0, sizeof(struct iag)); 2598 iagp->iagnum = cpu_to_le32(iagno); 2599 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2600 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2601 iagp->iagfree = cpu_to_le32(-1); 2602 iagp->nfreeinos = 0; 2603 iagp->nfreeexts = cpu_to_le32(EXTSPERIAG); 2604 2605 /* initialize the free inode summary map (free extent 2606 * summary map initialization handled by bzero). 2607 */ 2608 for (i = 0; i < SMAPSZ; i++) 2609 iagp->inosmap[i] = cpu_to_le32(ONES); 2610 2611 /* 2612 * Write and sync the metapage 2613 */ 2614 flush_metapage(mp); 2615 2616 /* 2617 * txCommit(COMMIT_FORCE) will synchronously write address 2618 * index pages and inode after commit in careful update order 2619 * of address index pages (right to left, bottom up); 2620 */ 2621 iplist[0] = ipimap; 2622 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 2623 2624 txEnd(tid); 2625 up(&JFS_IP(ipimap)->commit_sem); 2626 2627 duplicateIXtree(sb, blkno, xlen, &xaddr); 2628 2629 /* update the next avaliable iag number */ 2630 imap->im_nextiag += 1; 2631 2632 /* Add the iag to the iag free list so we don't lose the iag 2633 * if a failure happens now. 2634 */ 2635 imap->im_freeiag = iagno; 2636 2637 /* Until we have logredo working, we want the imap inode & 2638 * control page to be up to date. 2639 */ 2640 diSync(ipimap); 2641 2642 /* release the inode map lock */ 2643 IWRITE_UNLOCK(ipimap); 2644 } 2645 2646 /* obtain read lock on map */ 2647 IREAD_LOCK(ipimap); 2648 2649 /* read the iag */ 2650 if ((rc = diIAGRead(imap, iagno, &mp))) { 2651 IREAD_UNLOCK(ipimap); 2652 rc = -EIO; 2653 goto out; 2654 } 2655 iagp = (struct iag *) mp->data; 2656 2657 /* remove the iag from the iag free list */ 2658 imap->im_freeiag = le32_to_cpu(iagp->iagfree); 2659 iagp->iagfree = cpu_to_le32(-1); 2660 2661 /* set the return iag number and buffer pointer */ 2662 *iagnop = iagno; 2663 *mpp = mp; 2664 2665 out: 2666 /* release the iag free lock */ 2667 IAGFREE_UNLOCK(imap); 2668 2669 return (rc); 2670 } 2671 2672 /* 2673 * NAME: diIAGRead() 2674 * 2675 * FUNCTION: get the buffer for the specified iag within a fileset 2676 * or aggregate inode map. 2677 * 2678 * PARAMETERS: 2679 * imap - pointer to inode map control structure. 2680 * iagno - iag number. 2681 * bpp - point to buffer pointer to be filled in on successful 2682 * exit. 2683 * 2684 * SERIALIZATION: 2685 * must have read lock on imap inode 2686 * (When called by diExtendFS, the filesystem is quiesced, therefore 2687 * the read lock is unnecessary.) 2688 * 2689 * RETURN VALUES: 2690 * 0 - success. 2691 * -EIO - i/o error. 2692 */ 2693 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) 2694 { 2695 struct inode *ipimap = imap->im_ipimap; 2696 s64 blkno; 2697 2698 /* compute the logical block number of the iag. */ 2699 blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage); 2700 2701 /* read the iag. */ 2702 *mpp = read_metapage(ipimap, blkno, PSIZE, 0); 2703 if (*mpp == NULL) { 2704 return -EIO; 2705 } 2706 2707 return (0); 2708 } 2709 2710 /* 2711 * NAME: diFindFree() 2712 * 2713 * FUNCTION: find the first free bit in a word starting at 2714 * the specified bit position. 2715 * 2716 * PARAMETERS: 2717 * word - word to be examined. 2718 * start - starting bit position. 2719 * 2720 * RETURN VALUES: 2721 * bit position of first free bit in the word or 32 if 2722 * no free bits were found. 2723 */ 2724 static int diFindFree(u32 word, int start) 2725 { 2726 int bitno; 2727 assert(start < 32); 2728 /* scan the word for the first free bit. */ 2729 for (word <<= start, bitno = start; bitno < 32; 2730 bitno++, word <<= 1) { 2731 if ((word & HIGHORDER) == 0) 2732 break; 2733 } 2734 return (bitno); 2735 } 2736 2737 /* 2738 * NAME: diUpdatePMap() 2739 * 2740 * FUNCTION: Update the persistent map in an IAG for the allocation or 2741 * freeing of the specified inode. 2742 * 2743 * PRE CONDITIONS: Working map has already been updated for allocate. 2744 * 2745 * PARAMETERS: 2746 * ipimap - Incore inode map inode 2747 * inum - Number of inode to mark in permanent map 2748 * is_free - If TRUE indicates inode should be marked freed, otherwise 2749 * indicates inode should be marked allocated. 2750 * 2751 * RETURN VALUES: 2752 * 0 for success 2753 */ 2754 int 2755 diUpdatePMap(struct inode *ipimap, 2756 unsigned long inum, boolean_t is_free, struct tblock * tblk) 2757 { 2758 int rc; 2759 struct iag *iagp; 2760 struct metapage *mp; 2761 int iagno, ino, extno, bitno; 2762 struct inomap *imap; 2763 u32 mask; 2764 struct jfs_log *log; 2765 int lsn, difft, diffp; 2766 unsigned long flags; 2767 2768 imap = JFS_IP(ipimap)->i_imap; 2769 /* get the iag number containing the inode */ 2770 iagno = INOTOIAG(inum); 2771 /* make sure that the iag is contained within the map */ 2772 if (iagno >= imap->im_nextiag) { 2773 jfs_error(ipimap->i_sb, 2774 "diUpdatePMap: the iag is outside the map"); 2775 return -EIO; 2776 } 2777 /* read the iag */ 2778 IREAD_LOCK(ipimap); 2779 rc = diIAGRead(imap, iagno, &mp); 2780 IREAD_UNLOCK(ipimap); 2781 if (rc) 2782 return (rc); 2783 metapage_wait_for_io(mp); 2784 iagp = (struct iag *) mp->data; 2785 /* get the inode number and extent number of the inode within 2786 * the iag and the inode number within the extent. 2787 */ 2788 ino = inum & (INOSPERIAG - 1); 2789 extno = ino >> L2INOSPEREXT; 2790 bitno = ino & (INOSPEREXT - 1); 2791 mask = HIGHORDER >> bitno; 2792 /* 2793 * mark the inode free in persistent map: 2794 */ 2795 if (is_free == TRUE) { 2796 /* The inode should have been allocated both in working 2797 * map and in persistent map; 2798 * the inode will be freed from working map at the release 2799 * of last reference release; 2800 */ 2801 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2802 jfs_error(ipimap->i_sb, 2803 "diUpdatePMap: inode %ld not marked as " 2804 "allocated in wmap!", inum); 2805 } 2806 if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { 2807 jfs_error(ipimap->i_sb, 2808 "diUpdatePMap: inode %ld not marked as " 2809 "allocated in pmap!", inum); 2810 } 2811 /* update the bitmap for the extent of the freed inode */ 2812 iagp->pmap[extno] &= cpu_to_le32(~mask); 2813 } 2814 /* 2815 * mark the inode allocated in persistent map: 2816 */ 2817 else { 2818 /* The inode should be already allocated in the working map 2819 * and should be free in persistent map; 2820 */ 2821 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2822 release_metapage(mp); 2823 jfs_error(ipimap->i_sb, 2824 "diUpdatePMap: the inode is not allocated in " 2825 "the working map"); 2826 return -EIO; 2827 } 2828 if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { 2829 release_metapage(mp); 2830 jfs_error(ipimap->i_sb, 2831 "diUpdatePMap: the inode is not free in the " 2832 "persistent map"); 2833 return -EIO; 2834 } 2835 /* update the bitmap for the extent of the allocated inode */ 2836 iagp->pmap[extno] |= cpu_to_le32(mask); 2837 } 2838 /* 2839 * update iag lsn 2840 */ 2841 lsn = tblk->lsn; 2842 log = JFS_SBI(tblk->sb)->log; 2843 if (mp->lsn != 0) { 2844 /* inherit older/smaller lsn */ 2845 logdiff(difft, lsn, log); 2846 logdiff(diffp, mp->lsn, log); 2847 LOGSYNC_LOCK(log, flags); 2848 if (difft < diffp) { 2849 mp->lsn = lsn; 2850 /* move mp after tblock in logsync list */ 2851 list_move(&mp->synclist, &tblk->synclist); 2852 } 2853 /* inherit younger/larger clsn */ 2854 assert(mp->clsn); 2855 logdiff(difft, tblk->clsn, log); 2856 logdiff(diffp, mp->clsn, log); 2857 if (difft > diffp) 2858 mp->clsn = tblk->clsn; 2859 LOGSYNC_UNLOCK(log, flags); 2860 } else { 2861 mp->log = log; 2862 mp->lsn = lsn; 2863 /* insert mp after tblock in logsync list */ 2864 LOGSYNC_LOCK(log, flags); 2865 log->count++; 2866 list_add(&mp->synclist, &tblk->synclist); 2867 mp->clsn = tblk->clsn; 2868 LOGSYNC_UNLOCK(log, flags); 2869 } 2870 write_metapage(mp); 2871 return (0); 2872 } 2873 2874 /* 2875 * diExtendFS() 2876 * 2877 * function: update imap for extendfs(); 2878 * 2879 * note: AG size has been increased s.t. each k old contiguous AGs are 2880 * coalesced into a new AG; 2881 */ 2882 int diExtendFS(struct inode *ipimap, struct inode *ipbmap) 2883 { 2884 int rc, rcx = 0; 2885 struct inomap *imap = JFS_IP(ipimap)->i_imap; 2886 struct iag *iagp = NULL, *hiagp = NULL; 2887 struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap; 2888 struct metapage *bp, *hbp; 2889 int i, n, head; 2890 int numinos, xnuminos = 0, xnumfree = 0; 2891 s64 agstart; 2892 2893 jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d", 2894 imap->im_nextiag, atomic_read(&imap->im_numinos), 2895 atomic_read(&imap->im_numfree)); 2896 2897 /* 2898 * reconstruct imap 2899 * 2900 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 2901 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; 2902 * note: new AG size = old AG size * (2**x). 2903 */ 2904 2905 /* init per AG control information im_agctl[] */ 2906 for (i = 0; i < MAXAG; i++) { 2907 imap->im_agctl[i].inofree = -1; 2908 imap->im_agctl[i].extfree = -1; 2909 imap->im_agctl[i].numinos = 0; /* number of backed inodes */ 2910 imap->im_agctl[i].numfree = 0; /* number of free backed inodes */ 2911 } 2912 2913 /* 2914 * process each iag page of the map. 2915 * 2916 * rebuild AG Free Inode List, AG Free Inode Extent List; 2917 */ 2918 for (i = 0; i < imap->im_nextiag; i++) { 2919 if ((rc = diIAGRead(imap, i, &bp))) { 2920 rcx = rc; 2921 continue; 2922 } 2923 iagp = (struct iag *) bp->data; 2924 if (le32_to_cpu(iagp->iagnum) != i) { 2925 release_metapage(bp); 2926 jfs_error(ipimap->i_sb, 2927 "diExtendFs: unexpected value of iagnum"); 2928 return -EIO; 2929 } 2930 2931 /* leave free iag in the free iag list */ 2932 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2933 release_metapage(bp); 2934 continue; 2935 } 2936 2937 /* agstart that computes to the same ag is treated as same; */ 2938 agstart = le64_to_cpu(iagp->agstart); 2939 /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */ 2940 n = agstart >> mp->db_agl2size; 2941 2942 /* compute backed inodes */ 2943 numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) 2944 << L2INOSPEREXT; 2945 if (numinos > 0) { 2946 /* merge AG backed inodes */ 2947 imap->im_agctl[n].numinos += numinos; 2948 xnuminos += numinos; 2949 } 2950 2951 /* if any backed free inodes, insert at AG free inode list */ 2952 if ((int) le32_to_cpu(iagp->nfreeinos) > 0) { 2953 if ((head = imap->im_agctl[n].inofree) == -1) { 2954 iagp->inofreefwd = cpu_to_le32(-1); 2955 iagp->inofreeback = cpu_to_le32(-1); 2956 } else { 2957 if ((rc = diIAGRead(imap, head, &hbp))) { 2958 rcx = rc; 2959 goto nextiag; 2960 } 2961 hiagp = (struct iag *) hbp->data; 2962 hiagp->inofreeback = iagp->iagnum; 2963 iagp->inofreefwd = cpu_to_le32(head); 2964 iagp->inofreeback = cpu_to_le32(-1); 2965 write_metapage(hbp); 2966 } 2967 2968 imap->im_agctl[n].inofree = 2969 le32_to_cpu(iagp->iagnum); 2970 2971 /* merge AG backed free inodes */ 2972 imap->im_agctl[n].numfree += 2973 le32_to_cpu(iagp->nfreeinos); 2974 xnumfree += le32_to_cpu(iagp->nfreeinos); 2975 } 2976 2977 /* if any free extents, insert at AG free extent list */ 2978 if (le32_to_cpu(iagp->nfreeexts) > 0) { 2979 if ((head = imap->im_agctl[n].extfree) == -1) { 2980 iagp->extfreefwd = cpu_to_le32(-1); 2981 iagp->extfreeback = cpu_to_le32(-1); 2982 } else { 2983 if ((rc = diIAGRead(imap, head, &hbp))) { 2984 rcx = rc; 2985 goto nextiag; 2986 } 2987 hiagp = (struct iag *) hbp->data; 2988 hiagp->extfreeback = iagp->iagnum; 2989 iagp->extfreefwd = cpu_to_le32(head); 2990 iagp->extfreeback = cpu_to_le32(-1); 2991 write_metapage(hbp); 2992 } 2993 2994 imap->im_agctl[n].extfree = 2995 le32_to_cpu(iagp->iagnum); 2996 } 2997 2998 nextiag: 2999 write_metapage(bp); 3000 } 3001 3002 if (xnuminos != atomic_read(&imap->im_numinos) || 3003 xnumfree != atomic_read(&imap->im_numfree)) { 3004 jfs_error(ipimap->i_sb, 3005 "diExtendFs: numinos or numfree incorrect"); 3006 return -EIO; 3007 } 3008 3009 return rcx; 3010 } 3011 3012 3013 /* 3014 * duplicateIXtree() 3015 * 3016 * serialization: IWRITE_LOCK held on entry/exit 3017 * 3018 * note: shadow page with regular inode (rel.2); 3019 */ 3020 static void duplicateIXtree(struct super_block *sb, s64 blkno, 3021 int xlen, s64 *xaddr) 3022 { 3023 struct jfs_superblock *j_sb; 3024 struct buffer_head *bh; 3025 struct inode *ip; 3026 tid_t tid; 3027 3028 /* if AIT2 ipmap2 is bad, do not try to update it */ 3029 if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ 3030 return; 3031 ip = diReadSpecial(sb, FILESYSTEM_I, 1); 3032 if (ip == NULL) { 3033 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 3034 if (readSuper(sb, &bh)) 3035 return; 3036 j_sb = (struct jfs_superblock *)bh->b_data; 3037 j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); 3038 3039 mark_buffer_dirty(bh); 3040 sync_dirty_buffer(bh); 3041 brelse(bh); 3042 return; 3043 } 3044 3045 /* start transaction */ 3046 tid = txBegin(sb, COMMIT_FORCE); 3047 /* update the inode map addressing structure to point to it */ 3048 if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) { 3049 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 3050 txAbort(tid, 1); 3051 goto cleanup; 3052 3053 } 3054 /* update the inode map's inode to reflect the extension */ 3055 ip->i_size += PSIZE; 3056 inode_add_bytes(ip, PSIZE); 3057 txCommit(tid, 1, &ip, COMMIT_FORCE); 3058 cleanup: 3059 txEnd(tid); 3060 diFreeSpecial(ip); 3061 } 3062 3063 /* 3064 * NAME: copy_from_dinode() 3065 * 3066 * FUNCTION: Copies inode info from disk inode to in-memory inode 3067 * 3068 * RETURN VALUES: 3069 * 0 - success 3070 * -ENOMEM - insufficient memory 3071 */ 3072 static int copy_from_dinode(struct dinode * dip, struct inode *ip) 3073 { 3074 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3075 3076 jfs_ip->fileset = le32_to_cpu(dip->di_fileset); 3077 jfs_ip->mode2 = le32_to_cpu(dip->di_mode); 3078 3079 ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; 3080 ip->i_nlink = le32_to_cpu(dip->di_nlink); 3081 ip->i_uid = le32_to_cpu(dip->di_uid); 3082 ip->i_gid = le32_to_cpu(dip->di_gid); 3083 ip->i_size = le64_to_cpu(dip->di_size); 3084 ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); 3085 ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); 3086 ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec); 3087 ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); 3088 ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); 3089 ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); 3090 ip->i_blksize = ip->i_sb->s_blocksize; 3091 ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); 3092 ip->i_generation = le32_to_cpu(dip->di_gen); 3093 3094 jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */ 3095 jfs_ip->acl = dip->di_acl; /* as are dxd's */ 3096 jfs_ip->ea = dip->di_ea; 3097 jfs_ip->next_index = le32_to_cpu(dip->di_next_index); 3098 jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec); 3099 jfs_ip->acltype = le32_to_cpu(dip->di_acltype); 3100 3101 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) { 3102 jfs_ip->dev = le32_to_cpu(dip->di_rdev); 3103 ip->i_rdev = new_decode_dev(jfs_ip->dev); 3104 } 3105 3106 if (S_ISDIR(ip->i_mode)) { 3107 memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384); 3108 } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) { 3109 memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288); 3110 } else 3111 memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128); 3112 3113 /* Zero the in-memory-only stuff */ 3114 jfs_ip->cflag = 0; 3115 jfs_ip->btindex = 0; 3116 jfs_ip->btorder = 0; 3117 jfs_ip->bxflag = 0; 3118 jfs_ip->blid = 0; 3119 jfs_ip->atlhead = 0; 3120 jfs_ip->atltail = 0; 3121 jfs_ip->xtlid = 0; 3122 return (0); 3123 } 3124 3125 /* 3126 * NAME: copy_to_dinode() 3127 * 3128 * FUNCTION: Copies inode info from in-memory inode to disk inode 3129 */ 3130 static void copy_to_dinode(struct dinode * dip, struct inode *ip) 3131 { 3132 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3133 3134 dip->di_fileset = cpu_to_le32(jfs_ip->fileset); 3135 dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp); 3136 dip->di_number = cpu_to_le32(ip->i_ino); 3137 dip->di_gen = cpu_to_le32(ip->i_generation); 3138 dip->di_size = cpu_to_le64(ip->i_size); 3139 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); 3140 dip->di_nlink = cpu_to_le32(ip->i_nlink); 3141 dip->di_uid = cpu_to_le32(ip->i_uid); 3142 dip->di_gid = cpu_to_le32(ip->i_gid); 3143 /* 3144 * mode2 is only needed for storing the higher order bits. 3145 * Trust i_mode for the lower order ones 3146 */ 3147 dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | ip->i_mode); 3148 dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); 3149 dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); 3150 dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); 3151 dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec); 3152 dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec); 3153 dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec); 3154 dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ 3155 dip->di_acl = jfs_ip->acl; /* as are dxd's */ 3156 dip->di_ea = jfs_ip->ea; 3157 dip->di_next_index = cpu_to_le32(jfs_ip->next_index); 3158 dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime); 3159 dip->di_otime.tv_nsec = 0; 3160 dip->di_acltype = cpu_to_le32(jfs_ip->acltype); 3161 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) 3162 dip->di_rdev = cpu_to_le32(jfs_ip->dev); 3163 } 3164