1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 26 /* All Rights Reserved */ 27 28 /* 29 * University Copyright- Copyright (c) 1982, 1986, 1988 30 * The Regents of the University of California 31 * All Rights Reserved 32 * 33 * University Acknowledgment- Portions of this document are derived from 34 * software developed by the University of California, Berkeley, and its 35 * contributors. 36 */ 37 38 39 #include <sys/types.h> 40 #include <sys/t_lock.h> 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/signal.h> 44 #include <sys/user.h> 45 #include <sys/vnode.h> 46 #include <sys/buf.h> 47 #include <sys/disp.h> 48 #include <sys/proc.h> 49 #include <sys/conf.h> 50 #include <sys/fs/ufs_inode.h> 51 #include <sys/fs/ufs_fs.h> 52 #include <sys/fs/ufs_quota.h> 53 #include <sys/fs/ufs_trans.h> 54 #include <sys/fs/ufs_bio.h> 55 #include <vm/seg.h> 56 #include <sys/errno.h> 57 #include <sys/sysmacros.h> 58 #include <sys/vfs.h> 59 #include <sys/debug.h> 60 #include <sys/kmem.h> 61 #include <sys/cmn_err.h> 62 63 /* 64 * This structure is used to track blocks as we allocate them, so that 65 * we can free them if we encounter an error during allocation. We 66 * keep track of five pieces of information for each allocated block: 67 * - The number of the newly allocated block 68 * - The size of the block (lets us deal with fragments if we want) 69 * - The number of the block containing a pointer to it; or whether 70 * the pointer is in the inode 71 * - The offset within the block (or inode) containing a pointer to it. 72 * - A flag indicating the usage of the block. (Logging needs to know 73 * this to avoid overwriting a data block if it was previously used 74 * for metadata.) 75 */ 76 77 enum ufs_owner_type { 78 ufs_no_owner, /* Owner has not yet been updated */ 79 ufs_inode_direct, /* Listed in inode's direct block table */ 80 ufs_inode_indirect, /* Listed in inode's indirect block table */ 81 ufs_indirect_block /* Listed in an indirect block */ 82 }; 83 84 struct ufs_allocated_block { 85 daddr_t this_block; /* Number of this block */ 86 off_t block_size; /* Size of this block, in bytes */ 87 enum ufs_owner_type owner; /* Who points to this block? */ 88 daddr_t owner_block; /* Number of the owning block */ 89 uint_t owner_offset; /* Offset within that block or inode */ 90 int usage_flags; /* Usage flags, as expected by free() */ 91 }; 92 93 94 static int findextent(struct fs *fs, daddr32_t *sbp, int n, int *lenp, 95 int maxtrans); 96 97 static void ufs_undo_allocation(inode_t *ip, int block_count, 98 struct ufs_allocated_block table[], int inode_sector_adjust); 99 100 /* 101 * Find the extent and the matching block number. 102 * 103 * bsize > PAGESIZE 104 * boff indicates that we want a page in the middle 105 * min expression is supposed to make sure no extra page[s] after EOF 106 * PAGESIZE >= bsize 107 * we assume that a page is a multiple of bsize, i.e., 108 * boff always == 0 109 * 110 * We always return a length that is suitable for a disk transfer. 111 */ 112 #define DOEXTENT(fs, lbn, boff, bnp, lenp, size, tblp, n, chkfrag, maxtrans) {\ 113 register daddr32_t *dp = (tblp); \ 114 register int _chkfrag = chkfrag; /* for lint. sigh */ \ 115 \ 116 if (*dp == 0) { \ 117 *(bnp) = UFS_HOLE; \ 118 } else { \ 119 register int len; \ 120 \ 121 len = findextent(fs, dp, (int)(n), lenp, maxtrans) << \ 122 (fs)->fs_bshift; \ 123 if (_chkfrag) { \ 124 register u_offset_t tmp; \ 125 \ 126 tmp = fragroundup((fs), size) - \ 127 (((u_offset_t)lbn) << fs->fs_bshift); \ 128 len = (int)MIN(tmp, len); \ 129 } \ 130 len -= (boff); \ 131 if (len <= 0) { \ 132 *(bnp) = UFS_HOLE; \ 133 } else { \ 134 *(bnp) = fsbtodb(fs, *dp) + btodb(boff); \ 135 *(lenp) = len; \ 136 } \ 137 } \ 138 } 139 140 /* 141 * The maximum supported file size is actually somewhat less that 1 142 * terabyte. This is because the total number of blocks used for the 143 * file and its metadata must fit into the ic_blocks field of the 144 * inode, which is a signed 32-bit quantity. The metadata allocated 145 * for a file (that is, the single, double, and triple indirect blocks 146 * used to reference the file blocks) is actually quite small, 147 * but just to make sure, we check for overflow in the ic_blocks 148 * ic_blocks fields for all files whose total block count is 149 * within 1 GB of a terabyte. VERYLARGEFILESIZE below is the number of 150 * 512-byte blocks in a terabyte (2^31), minus the number of 512-byte blocks 151 * in a gigabyte (2^21). We only check for overflow in the ic_blocks 152 * field if the number of blocks currently allocated to the file is 153 * greater than VERYLARGEFILESIZE. 154 * 155 * Note that file "size" is the not the same as file "length". A 156 * file's "size" is the number of blocks allocated to it. A file's 157 * "length" is the maximum offset in the file. A UFS FILE can have a 158 * length of a terabyte, but the size is limited to somewhat less than 159 * a terabyte, as described above. 160 */ 161 #define VERYLARGEFILESIZE 0x7FE00000 162 163 /* 164 * bmap{read,write} define the structure of file system storage by mapping 165 * a logical offset in a file to a physical block number on the device. 166 * It should be called with a locked inode when allocation is to be 167 * done (bmap_write). Note this strangeness: bmap_write is always called from 168 * getpage(), not putpage(), since getpage() is where all the allocation 169 * is done. 170 * 171 * S_READ, S_OTHER -> bmap_read; S_WRITE -> bmap_write. 172 * 173 * NOTICE: the block number returned is the disk block number, not the 174 * file system block number. All the worries about block offsets and 175 * page/block sizes are hidden inside of bmap. Well, not quite, 176 * unfortunately. It's impossible to find one place to hide all this 177 * mess. There are 3 cases: 178 * 179 * PAGESIZE < bsize 180 * In this case, the {get,put}page routines will attempt to align to 181 * a file system block boundry (XXX - maybe this is a mistake?). Since 182 * the kluster routines may be out of memory, we don't always get all 183 * the pages we wanted. If we called bmap first, to find out how much 184 * to kluster, we handed in the block aligned offset. If we didn't get 185 * all the pages, we have to chop off the amount we didn't get from the 186 * amount handed back by bmap. 187 * 188 * PAGESIZE == bsize 189 * Life is quite pleasant here, no extra work needed, mainly because we 190 * (probably?) won't kluster backwards, just forwards. 191 * 192 * PAGESIZE > bsize 193 * This one has a different set of problems, specifically, we may have to 194 * do N reads to fill one page. Let us hope that Sun will stay with small 195 * pages. 196 * 197 * Returns 0 on success, or a non-zero errno if an error occurs. 198 * 199 * TODO 200 * LMXXX - add a bmap cache. This could be a couple of extents in the 201 * inode. Two is nice for PAGESIZE > bsize. 202 */ 203 204 int 205 bmap_read(struct inode *ip, u_offset_t off, daddr_t *bnp, int *lenp) 206 { 207 daddr_t lbn; 208 ufsvfs_t *ufsvfsp = ip->i_ufsvfs; 209 struct fs *fs = ufsvfsp->vfs_fs; 210 struct buf *bp; 211 int i, j, boff; 212 int shft; /* we maintain sh = 1 << shft */ 213 daddr_t ob, nb, tbn; 214 daddr32_t *bap; 215 int nindirshift, nindiroffset; 216 217 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 218 lbn = (daddr_t)lblkno(fs, off); 219 boff = (int)blkoff(fs, off); 220 if (lbn < 0) 221 return (EFBIG); 222 223 /* 224 * The first NDADDR blocks are direct blocks. 225 */ 226 if (lbn < NDADDR) { 227 DOEXTENT(fs, lbn, boff, bnp, lenp, 228 ip->i_size, &ip->i_db[lbn], NDADDR - lbn, 1, 229 ufsvfsp->vfs_iotransz); 230 return (0); 231 } 232 233 nindirshift = ufsvfsp->vfs_nindirshift; 234 nindiroffset = ufsvfsp->vfs_nindiroffset; 235 /* 236 * Determine how many levels of indirection. 237 */ 238 shft = 0; /* sh = 1 */ 239 tbn = lbn - NDADDR; 240 for (j = NIADDR; j > 0; j--) { 241 longlong_t sh; 242 243 shft += nindirshift; /* sh *= nindir */ 244 sh = 1LL << shft; 245 if (tbn < sh) 246 break; 247 tbn -= sh; 248 } 249 if (j == 0) 250 return (EFBIG); 251 252 /* 253 * Fetch the first indirect block. 254 */ 255 nb = ip->i_ib[NIADDR - j]; 256 if (nb == 0) { 257 *bnp = UFS_HOLE; 258 return (0); 259 } 260 261 /* 262 * Fetch through the indirect blocks. 263 */ 264 for (; j <= NIADDR; j++) { 265 ob = nb; 266 bp = UFS_BREAD(ufsvfsp, 267 ip->i_dev, fsbtodb(fs, ob), fs->fs_bsize); 268 if (bp->b_flags & B_ERROR) { 269 brelse(bp); 270 return (EIO); 271 } 272 bap = bp->b_un.b_daddr; 273 274 ASSERT(!ufs_indir_badblock(ip, bap)); 275 276 shft -= nindirshift; /* sh / nindir */ 277 i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */ 278 nb = bap[i]; 279 if (nb == 0) { 280 *bnp = UFS_HOLE; 281 brelse(bp); 282 return (0); 283 } 284 if (j != NIADDR) 285 brelse(bp); 286 } 287 DOEXTENT(fs, lbn, boff, bnp, lenp, ip->i_size, &bap[i], 288 MIN(NINDIR(fs) - i, (daddr_t)lblkno(fs, ip->i_size - 1) - lbn + 1), 289 0, ufsvfsp->vfs_iotransz); 290 brelse(bp); 291 return (0); 292 } 293 294 /* 295 * See bmap_read for general notes. 296 * 297 * The block must be at least size bytes and will be extended or 298 * allocated as needed. If alloc_type is of type BI_ALLOC_ONLY, then bmap 299 * will not create any in-core pages that correspond to the new disk allocation. 300 * If alloc_type is of BI_FALLOCATE, blocks will be stored as (-1) * block addr 301 * and security is maintained b/c upon reading a negative block number pages 302 * are zeroed. For all other allocation types (BI_NORMAL) the in-core pages will 303 * be created and initialized as needed. 304 * 305 * Returns 0 on success, or a non-zero errno if an error occurs. 306 */ 307 int 308 bmap_write(struct inode *ip, u_offset_t off, int size, 309 enum bi_type alloc_type, daddr_t *allocblk, struct cred *cr) 310 { 311 struct fs *fs; 312 struct buf *bp; 313 int i; 314 struct buf *nbp; 315 int j; 316 int shft; /* we maintain sh = 1 << shft */ 317 daddr_t ob, nb, pref, lbn, llbn, tbn; 318 daddr32_t *bap; 319 struct vnode *vp = ITOV(ip); 320 long bsize = VBSIZE(vp); 321 long osize, nsize; 322 int issync, metaflag, isdirquota; 323 int err; 324 dev_t dev; 325 struct fbuf *fbp; 326 int nindirshift; 327 int nindiroffset; 328 struct ufsvfs *ufsvfsp; 329 int added_sectors; /* sectors added to this inode */ 330 int alloced_blocks; /* fs blocks newly allocated */ 331 struct ufs_allocated_block undo_table[NIADDR+1]; 332 int verylargefile = 0; 333 334 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 335 336 if (allocblk) 337 *allocblk = 0; 338 339 ufsvfsp = ip->i_ufsvfs; 340 fs = ufsvfsp->vfs_bufp->b_un.b_fs; 341 lbn = (daddr_t)lblkno(fs, off); 342 if (lbn < 0) 343 return (EFBIG); 344 if (ip->i_blocks >= VERYLARGEFILESIZE) 345 verylargefile = 1; 346 llbn = (daddr_t)((ip->i_size) ? lblkno(fs, ip->i_size - 1) : 0); 347 metaflag = isdirquota = 0; 348 if (((ip->i_mode & IFMT) == IFDIR) || 349 ((ip->i_mode & IFMT) == IFATTRDIR)) 350 isdirquota = metaflag = I_DIR; 351 else if ((ip->i_mode & IFMT) == IFSHAD) 352 metaflag = I_SHAD; 353 else if (ip->i_ufsvfs->vfs_qinod == ip) 354 isdirquota = metaflag = I_QUOTA; 355 356 issync = ((ip->i_flag & ISYNC) != 0); 357 358 if (isdirquota || issync) { 359 alloc_type = BI_NORMAL; /* make sure */ 360 } 361 362 /* 363 * If the next write will extend the file into a new block, 364 * and the file is currently composed of a fragment 365 * this fragment has to be extended to be a full block. 366 */ 367 if (llbn < NDADDR && llbn < lbn && (ob = ip->i_db[llbn]) != 0) { 368 osize = blksize(fs, ip, llbn); 369 if (osize < bsize && osize > 0) { 370 /* 371 * Check to see if doing this will make the file too 372 * big. Only check if we are dealing with a very 373 * large file. 374 */ 375 if (verylargefile == 1) { 376 if (((unsigned)ip->i_blocks + 377 btodb(bsize - osize)) > INT_MAX) { 378 return (EFBIG); 379 } 380 } 381 /* 382 * Make sure we have all needed pages setup correctly. 383 * 384 * We pass S_OTHER to fbread here because we want 385 * an exclusive lock on the page in question 386 * (see ufs_getpage). I/O to the old block location 387 * may still be in progress and we are about to free 388 * the old block. We don't want anyone else to get 389 * a hold of the old block once we free it until 390 * the I/O is complete. 391 */ 392 err = 393 fbread(ITOV(ip), ((offset_t)llbn << fs->fs_bshift), 394 (uint_t)bsize, S_OTHER, &fbp); 395 if (err) 396 return (err); 397 pref = blkpref(ip, llbn, (int)llbn, &ip->i_db[0]); 398 err = realloccg(ip, ob, pref, (int)osize, (int)bsize, 399 &nb, cr); 400 if (err) { 401 if (fbp) 402 fbrelse(fbp, S_OTHER); 403 return (err); 404 } 405 ASSERT(!ufs_badblock(ip, nb)); 406 407 /* 408 * Update the inode before releasing the 409 * lock on the page. If we released the page 410 * lock first, the data could be written to it's 411 * old address and then destroyed. 412 */ 413 TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 0); 414 ip->i_db[llbn] = nb; 415 UFS_SET_ISIZE(((u_offset_t)(llbn + 1)) << fs->fs_bshift, 416 ip); 417 ip->i_blocks += btodb(bsize - osize); 418 ASSERT((unsigned)ip->i_blocks <= INT_MAX); 419 TRANS_INODE(ufsvfsp, ip); 420 ip->i_flag |= IUPD | ICHG | IATTCHG; 421 422 /* Caller is responsible for updating i_seq */ 423 /* 424 * Don't check metaflag here, directories won't do this 425 * 426 */ 427 if (issync) { 428 (void) ufs_fbiwrite(fbp, ip, nb, fs->fs_fsize); 429 } else { 430 ASSERT(fbp); 431 fbrelse(fbp, S_WRITE); 432 } 433 434 if (nb != ob) { 435 (void) free(ip, ob, (off_t)osize, metaflag); 436 } 437 } 438 } 439 440 /* 441 * The first NDADDR blocks are direct blocks. 442 */ 443 if (lbn < NDADDR) { 444 nb = ip->i_db[lbn]; 445 if (nb == 0 || 446 ip->i_size < ((u_offset_t)(lbn + 1)) << fs->fs_bshift) { 447 if (nb != 0) { 448 /* consider need to reallocate a frag */ 449 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 450 nsize = fragroundup(fs, size); 451 if (nsize <= osize) 452 goto gotit; 453 /* 454 * Check to see if doing this will make the 455 * file too big. Only check if we are dealing 456 * with a very large file. 457 */ 458 if (verylargefile == 1) { 459 if (((unsigned)ip->i_blocks + 460 btodb(nsize - osize)) > INT_MAX) { 461 return (EFBIG); 462 } 463 } 464 /* 465 * need to re-allocate a block or frag 466 */ 467 ob = nb; 468 pref = blkpref(ip, lbn, (int)lbn, 469 &ip->i_db[0]); 470 err = realloccg(ip, ob, pref, (int)osize, 471 (int)nsize, &nb, cr); 472 if (err) 473 return (err); 474 if (allocblk) 475 *allocblk = nb; 476 ASSERT(!ufs_badblock(ip, nb)); 477 478 } else { 479 /* 480 * need to allocate a block or frag 481 */ 482 osize = 0; 483 if (ip->i_size < 484 ((u_offset_t)(lbn + 1)) << fs->fs_bshift) 485 nsize = fragroundup(fs, size); 486 else 487 nsize = bsize; 488 /* 489 * Check to see if doing this will make the 490 * file too big. Only check if we are dealing 491 * with a very large file. 492 */ 493 if (verylargefile == 1) { 494 if (((unsigned)ip->i_blocks + 495 btodb(nsize - osize)) > INT_MAX) { 496 return (EFBIG); 497 } 498 } 499 pref = blkpref(ip, lbn, (int)lbn, &ip->i_db[0]); 500 err = alloc(ip, pref, (int)nsize, &nb, cr); 501 if (err) 502 return (err); 503 if (allocblk) 504 *allocblk = nb; 505 ASSERT(!ufs_badblock(ip, nb)); 506 ob = nb; 507 } 508 509 /* 510 * Read old/create new zero pages 511 */ 512 fbp = NULL; 513 if (osize == 0) { 514 /* 515 * mmap S_WRITE faults always enter here 516 */ 517 /* 518 * We zero it if its also BI_FALLOCATE, but 519 * only for direct blocks! 520 */ 521 if (alloc_type == BI_NORMAL || 522 alloc_type == BI_FALLOCATE || 523 P2ROUNDUP_TYPED(size, 524 PAGESIZE, u_offset_t) < nsize) { 525 /* fbzero doesn't cause a pagefault */ 526 fbzero(ITOV(ip), 527 ((offset_t)lbn << fs->fs_bshift), 528 (uint_t)nsize, &fbp); 529 } 530 } else { 531 err = fbread(vp, 532 ((offset_t)lbn << fs->fs_bshift), 533 (uint_t)nsize, S_OTHER, &fbp); 534 if (err) { 535 if (nb != ob) { 536 (void) free(ip, nb, 537 (off_t)nsize, metaflag); 538 } else { 539 (void) free(ip, 540 ob + numfrags(fs, osize), 541 (off_t)(nsize - osize), 542 metaflag); 543 } 544 ASSERT(nsize >= osize); 545 (void) chkdq(ip, 546 -(long)btodb(nsize - osize), 547 0, cr, (char **)NULL, 548 (size_t *)NULL); 549 return (err); 550 } 551 } 552 TRANS_MATA_ALLOC(ufsvfsp, ip, nb, nsize, 0); 553 ip->i_db[lbn] = nb; 554 ip->i_blocks += btodb(nsize - osize); 555 ASSERT((unsigned)ip->i_blocks <= INT_MAX); 556 TRANS_INODE(ufsvfsp, ip); 557 ip->i_flag |= IUPD | ICHG | IATTCHG; 558 559 /* Caller is responsible for updating i_seq */ 560 561 /* 562 * Write directory and shadow blocks synchronously so 563 * that they never appear with garbage in them on the 564 * disk. 565 * 566 */ 567 if (isdirquota && (ip->i_size || 568 TRANS_ISTRANS(ufsvfsp))) { 569 /* 570 * XXX man not be necessary with harpy trans 571 * bug id 1130055 572 */ 573 (void) ufs_fbiwrite(fbp, ip, nb, fs->fs_fsize); 574 } else if (fbp) { 575 fbrelse(fbp, S_WRITE); 576 } 577 578 if (nb != ob) 579 (void) free(ip, ob, (off_t)osize, metaflag); 580 } 581 gotit: 582 return (0); 583 } 584 585 added_sectors = alloced_blocks = 0; /* No blocks alloced yet */ 586 587 /* 588 * Determine how many levels of indirection. 589 */ 590 nindirshift = ip->i_ufsvfs->vfs_nindirshift; 591 nindiroffset = ip->i_ufsvfs->vfs_nindiroffset; 592 pref = 0; 593 shft = 0; /* sh = 1 */ 594 tbn = lbn - NDADDR; 595 for (j = NIADDR; j > 0; j--) { 596 longlong_t sh; 597 598 shft += nindirshift; /* sh *= nindir */ 599 sh = 1LL << shft; 600 if (tbn < sh) 601 break; 602 tbn -= sh; 603 } 604 605 if (j == 0) 606 return (EFBIG); 607 608 /* 609 * Fetch the first indirect block. 610 */ 611 dev = ip->i_dev; 612 nb = ip->i_ib[NIADDR - j]; 613 if (nb == 0) { 614 /* 615 * Check to see if doing this will make the 616 * file too big. Only check if we are dealing 617 * with a very large file. 618 */ 619 if (verylargefile == 1) { 620 if (((unsigned)ip->i_blocks + btodb(bsize)) 621 > INT_MAX) { 622 return (EFBIG); 623 } 624 } 625 /* 626 * Need to allocate an indirect block. 627 */ 628 pref = blkpref(ip, lbn, 0, (daddr32_t *)0); 629 err = alloc(ip, pref, (int)bsize, &nb, cr); 630 if (err) 631 return (err); 632 TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 1); 633 ASSERT(!ufs_badblock(ip, nb)); 634 635 /* 636 * Keep track of this allocation so we can undo it if we 637 * get an error later. 638 */ 639 640 ASSERT(alloced_blocks <= NIADDR); 641 642 undo_table[alloced_blocks].this_block = nb; 643 undo_table[alloced_blocks].block_size = bsize; 644 undo_table[alloced_blocks].owner = ufs_no_owner; 645 undo_table[alloced_blocks].usage_flags = metaflag | I_IBLK; 646 647 alloced_blocks++; 648 649 /* 650 * Write zero block synchronously so that 651 * indirect blocks never point at garbage. 652 */ 653 bp = UFS_GETBLK(ufsvfsp, dev, fsbtodb(fs, nb), bsize); 654 655 clrbuf(bp); 656 /* XXX Maybe special-case this? */ 657 TRANS_BUF(ufsvfsp, 0, bsize, bp, DT_ABZERO); 658 UFS_BWRITE2(ufsvfsp, bp); 659 if (bp->b_flags & B_ERROR) { 660 err = geterror(bp); 661 brelse(bp); 662 ufs_undo_allocation(ip, alloced_blocks, 663 undo_table, added_sectors); 664 return (err); 665 } 666 brelse(bp); 667 668 ip->i_ib[NIADDR - j] = nb; 669 added_sectors += btodb(bsize); 670 ip->i_blocks += btodb(bsize); 671 ASSERT((unsigned)ip->i_blocks <= INT_MAX); 672 TRANS_INODE(ufsvfsp, ip); 673 ip->i_flag |= IUPD | ICHG | IATTCHG; 674 /* Caller is responsible for updating i_seq */ 675 676 /* 677 * Update the 'undo table' now that we've linked this block 678 * to an inode. 679 */ 680 681 undo_table[alloced_blocks-1].owner = ufs_inode_indirect; 682 undo_table[alloced_blocks-1].owner_offset = NIADDR - j; 683 684 /* 685 * In the ISYNC case, wrip will notice that the block 686 * count on the inode has changed and will be sure to 687 * ufs_iupdat the inode at the end of wrip. 688 */ 689 } 690 691 /* 692 * Fetch through the indirect blocks. 693 */ 694 for (; j <= NIADDR; j++) { 695 ob = nb; 696 bp = UFS_BREAD(ufsvfsp, ip->i_dev, fsbtodb(fs, ob), bsize); 697 698 if (bp->b_flags & B_ERROR) { 699 err = geterror(bp); 700 brelse(bp); 701 /* 702 * Return any partial allocations. 703 * 704 * It is possible that we have not yet made any 705 * allocations at this point (if this is the first 706 * pass through the loop and we didn't have to 707 * allocate the first indirect block, above). 708 * In this case, alloced_blocks and added_sectors will 709 * be zero, and ufs_undo_allocation will do nothing. 710 */ 711 ufs_undo_allocation(ip, alloced_blocks, 712 undo_table, added_sectors); 713 return (err); 714 } 715 bap = bp->b_un.b_daddr; 716 shft -= nindirshift; /* sh /= nindir */ 717 i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */ 718 nb = bap[i]; 719 720 if (nb == 0) { 721 /* 722 * Check to see if doing this will make the 723 * file too big. Only check if we are dealing 724 * with a very large file. 725 */ 726 if (verylargefile == 1) { 727 if (((unsigned)ip->i_blocks + btodb(bsize)) 728 > INT_MAX) { 729 brelse(bp); 730 ufs_undo_allocation(ip, alloced_blocks, 731 undo_table, added_sectors); 732 return (EFBIG); 733 } 734 } 735 if (pref == 0) { 736 if (j < NIADDR) { 737 /* Indirect block */ 738 pref = blkpref(ip, lbn, 0, 739 (daddr32_t *)0); 740 } else { 741 /* Data block */ 742 pref = blkpref(ip, lbn, i, &bap[0]); 743 } 744 } 745 746 /* 747 * release "bp" buf to avoid deadlock (re-bread later) 748 */ 749 brelse(bp); 750 751 err = alloc(ip, pref, (int)bsize, &nb, cr); 752 if (err) { 753 /* 754 * Return any partial allocations. 755 */ 756 ufs_undo_allocation(ip, alloced_blocks, 757 undo_table, added_sectors); 758 return (err); 759 } 760 761 ASSERT(!ufs_badblock(ip, nb)); 762 ASSERT(alloced_blocks <= NIADDR); 763 764 if (allocblk) 765 *allocblk = nb; 766 767 undo_table[alloced_blocks].this_block = nb; 768 undo_table[alloced_blocks].block_size = bsize; 769 undo_table[alloced_blocks].owner = ufs_no_owner; 770 undo_table[alloced_blocks].usage_flags = metaflag | 771 ((j < NIADDR) ? I_IBLK : 0); 772 773 alloced_blocks++; 774 775 if (j < NIADDR) { 776 TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 1); 777 /* 778 * Write synchronously so indirect 779 * blocks never point at garbage. 780 */ 781 nbp = UFS_GETBLK( 782 ufsvfsp, dev, fsbtodb(fs, nb), bsize); 783 784 clrbuf(nbp); 785 /* XXX Maybe special-case this? */ 786 TRANS_BUF(ufsvfsp, 0, bsize, nbp, DT_ABZERO); 787 UFS_BWRITE2(ufsvfsp, nbp); 788 if (nbp->b_flags & B_ERROR) { 789 err = geterror(nbp); 790 brelse(nbp); 791 /* 792 * Return any partial 793 * allocations. 794 */ 795 ufs_undo_allocation(ip, 796 alloced_blocks, 797 undo_table, added_sectors); 798 return (err); 799 } 800 brelse(nbp); 801 } else if (alloc_type == BI_NORMAL || 802 P2ROUNDUP_TYPED(size, 803 PAGESIZE, u_offset_t) < bsize) { 804 TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 0); 805 fbzero(ITOV(ip), 806 ((offset_t)lbn << fs->fs_bshift), 807 (uint_t)bsize, &fbp); 808 809 /* 810 * Cases which we need to do a synchronous 811 * write of the zeroed data pages: 812 * 813 * 1) If we are writing a directory then we 814 * want to write synchronously so blocks in 815 * directories never contain garbage. 816 * 817 * 2) If we are filling in a hole and the 818 * indirect block is going to be synchronously 819 * written back below we need to make sure 820 * that the zeroes are written here before 821 * the indirect block is updated so that if 822 * we crash before the real data is pushed 823 * we will not end up with random data is 824 * the middle of the file. 825 * 826 * 3) If the size of the request rounded up 827 * to the system page size is smaller than 828 * the file system block size, we want to 829 * write out all the pages now so that 830 * they are not aborted before they actually 831 * make it to ufs_putpage since the length 832 * of the inode will not include the pages. 833 */ 834 835 if (isdirquota || (issync && 836 lbn < llbn)) 837 (void) ufs_fbiwrite(fbp, ip, nb, 838 fs->fs_fsize); 839 else 840 fbrelse(fbp, S_WRITE); 841 } 842 843 /* 844 * re-acquire "bp" buf 845 */ 846 bp = UFS_BREAD(ufsvfsp, 847 ip->i_dev, fsbtodb(fs, ob), bsize); 848 if (bp->b_flags & B_ERROR) { 849 err = geterror(bp); 850 brelse(bp); 851 /* 852 * Return any partial allocations. 853 */ 854 ufs_undo_allocation(ip, 855 alloced_blocks, 856 undo_table, added_sectors); 857 return (err); 858 } 859 bap = bp->b_un.b_daddr; 860 bap[i] = nb; 861 862 /* 863 * The magic explained: j will be equal to NIADDR 864 * when we are at the lowest level, this is where the 865 * array entries point directly to data blocks. Since 866 * we will be 'fallocate'ing we will go ahead and negate 867 * the addresses. 868 */ 869 if (alloc_type == BI_FALLOCATE && j == NIADDR) 870 bap[i] = -bap[i]; 871 872 TRANS_BUF_ITEM_128(ufsvfsp, bap[i], bap, bp, DT_AB); 873 added_sectors += btodb(bsize); 874 ip->i_blocks += btodb(bsize); 875 ASSERT((unsigned)ip->i_blocks <= INT_MAX); 876 TRANS_INODE(ufsvfsp, ip); 877 ip->i_flag |= IUPD | ICHG | IATTCHG; 878 879 /* Caller is responsible for updating i_seq */ 880 881 undo_table[alloced_blocks-1].owner = 882 ufs_indirect_block; 883 undo_table[alloced_blocks-1].owner_block = ob; 884 undo_table[alloced_blocks-1].owner_offset = i; 885 886 if (issync) { 887 UFS_BWRITE2(ufsvfsp, bp); 888 if (bp->b_flags & B_ERROR) { 889 err = geterror(bp); 890 brelse(bp); 891 /* 892 * Return any partial 893 * allocations. 894 */ 895 ufs_undo_allocation(ip, 896 alloced_blocks, 897 undo_table, added_sectors); 898 return (err); 899 } 900 brelse(bp); 901 } else { 902 bdrwrite(bp); 903 } 904 } else { 905 brelse(bp); 906 } 907 } 908 return (0); 909 } 910 911 /* 912 * Return 1 if inode has unmapped blocks (UFS holes) or if another thread 913 * is in the critical region of wrip(). 914 */ 915 int 916 bmap_has_holes(struct inode *ip) 917 { 918 struct fs *fs = ip->i_fs; 919 uint_t dblks; /* # of data blocks */ 920 uint_t mblks; /* # of data + metadata blocks */ 921 int nindirshift; 922 int nindiroffset; 923 uint_t cnt; 924 int n, j, shft; 925 uint_t nindirblks; 926 927 int fsbshift = fs->fs_bshift; 928 int fsboffset = (1 << fsbshift) - 1; 929 930 /* 931 * Check for writer in critical region, if found then we 932 * cannot trust the values of i_size and i_blocks 933 * simply return true. 934 */ 935 if (ip->i_writer != NULL && ip->i_writer != curthread) { 936 return (1); 937 } 938 939 dblks = (ip->i_size + fsboffset) >> fsbshift; 940 mblks = (ldbtob((u_offset_t)ip->i_blocks) + fsboffset) >> fsbshift; 941 942 /* 943 * File has only direct blocks. 944 */ 945 if (dblks <= NDADDR) 946 return (mblks < dblks); 947 nindirshift = ip->i_ufsvfs->vfs_nindirshift; 948 949 nindiroffset = ip->i_ufsvfs->vfs_nindiroffset; 950 nindirblks = nindiroffset + 1; 951 952 dblks -= NDADDR; 953 shft = 0; 954 /* 955 * Determine how many levels of indirection. 956 */ 957 for (j = NIADDR; j > 0; j--) { 958 longlong_t sh; 959 960 shft += nindirshift; /* sh *= nindir */ 961 sh = 1LL << shft; 962 if (dblks <= sh) 963 break; 964 dblks -= sh; 965 } 966 /* LINTED: warning: logical expression always true: op "||" */ 967 ASSERT(NIADDR <= 3); 968 ASSERT(j <= NIADDR); 969 if (j == NIADDR) /* single level indirection */ 970 cnt = NDADDR + 1 + dblks; 971 else if (j == NIADDR-1) /* double indirection */ 972 cnt = NDADDR + 1 + nindirblks + 973 1 + (dblks + nindiroffset)/nindirblks + dblks; 974 else if (j == NIADDR-2) { /* triple indirection */ 975 n = (dblks + nindiroffset)/nindirblks; 976 cnt = NDADDR + 1 + nindirblks + 977 1 + nindirblks + nindirblks*nindirblks + 978 1 + (n + nindiroffset)/nindirblks + n + dblks; 979 } 980 981 return (mblks < cnt); 982 } 983 984 /* 985 * find some contig blocks starting at *sbp and going for min(n, max_contig) 986 * return the number of blocks (not frags) found. 987 * The array passed in must be at least [0..n-1]. 988 */ 989 static int 990 findextent(struct fs *fs, daddr32_t *sbp, int n, int *lenp, int maxtransfer) 991 { 992 register daddr_t bn, nextbn; 993 register daddr32_t *bp; 994 register int diff; 995 int maxtransblk; 996 997 if (n <= 0) 998 return (0); 999 bn = *sbp; 1000 if (bn == 0) 1001 return (0); 1002 1003 diff = fs->fs_frag; 1004 if (*lenp) { 1005 n = MIN(n, lblkno(fs, *lenp)); 1006 } else { 1007 /* 1008 * If the user has set the value for maxcontig lower than 1009 * the drive transfer size, then assume they want this 1010 * to be the maximum value for the size of the data transfer. 1011 */ 1012 maxtransblk = maxtransfer >> DEV_BSHIFT; 1013 if (fs->fs_maxcontig < maxtransblk) { 1014 n = MIN(n, fs->fs_maxcontig); 1015 } else { 1016 n = MIN(n, maxtransblk); 1017 } 1018 } 1019 bp = sbp; 1020 while (--n > 0) { 1021 nextbn = *(bp + 1); 1022 if (nextbn == 0 || bn + diff != nextbn) 1023 break; 1024 bn = nextbn; 1025 bp++; 1026 } 1027 return ((int)(bp - sbp) + 1); 1028 } 1029 1030 /* 1031 * Free any blocks which had been successfully allocated. Always called 1032 * as a result of an error, so we don't bother returning an error code 1033 * from here. 1034 * 1035 * If block_count and inode_sector_adjust are both zero, we'll do nothing. 1036 * Thus it is safe to call this as part of error handling, whether or not 1037 * any blocks have been allocated. 1038 * 1039 * The ufs_inode_direct case is currently unused. 1040 */ 1041 1042 static void 1043 ufs_undo_allocation( 1044 inode_t *ip, 1045 int block_count, 1046 struct ufs_allocated_block table[], 1047 int inode_sector_adjust) 1048 { 1049 int i; 1050 int inode_changed; 1051 int error_updating_pointers; 1052 struct ufsvfs *ufsvfsp; 1053 1054 inode_changed = 0; 1055 error_updating_pointers = 0; 1056 1057 ufsvfsp = ip->i_ufsvfs; 1058 1059 /* 1060 * Update pointers on disk before freeing blocks. If we fail, 1061 * some blocks may remain busy; but they will be reclaimed by 1062 * an fsck. (This is better than letting a block wind up with 1063 * two owners if we successfully freed it but could not remove 1064 * the pointer to it.) 1065 */ 1066 1067 for (i = 0; i < block_count; i++) { 1068 switch (table[i].owner) { 1069 case ufs_no_owner: 1070 /* Nothing to do here, nobody points to us */ 1071 break; 1072 case ufs_inode_direct: 1073 ASSERT(table[i].owner_offset < NDADDR); 1074 ip->i_db[table[i].owner_offset] = 0; 1075 inode_changed = 1; 1076 break; 1077 case ufs_inode_indirect: 1078 ASSERT(table[i].owner_offset < NIADDR); 1079 ip->i_ib[table[i].owner_offset] = 0; 1080 inode_changed = 1; 1081 break; 1082 case ufs_indirect_block: { 1083 buf_t *bp; 1084 daddr32_t *block_data; 1085 1086 /* Read/modify/log/write. */ 1087 1088 ASSERT(table[i].owner_offset < 1089 (VBSIZE(ITOV(ip)) / sizeof (daddr32_t))); 1090 1091 bp = UFS_BREAD(ufsvfsp, ip->i_dev, 1092 fsbtodb(ufsvfsp->vfs_fs, table[i].owner_block), 1093 VBSIZE(ITOV(ip))); 1094 1095 if (bp->b_flags & B_ERROR) { 1096 /* Couldn't read this block; give up. */ 1097 error_updating_pointers = 1; 1098 brelse(bp); 1099 break; /* out of SWITCH */ 1100 } 1101 1102 block_data = bp->b_un.b_daddr; 1103 block_data[table[i].owner_offset] = 0; 1104 1105 /* Write a log entry which includes the zero. */ 1106 /* It might be possible to optimize this by using */ 1107 /* TRANS_BUF directly and zeroing only the four */ 1108 /* bytes involved, but an attempt to do that led */ 1109 /* to panics in the logging code. The attempt was */ 1110 /* TRANS_BUF(ufsvfsp, */ 1111 /* table[i].owner_offset * sizeof (daddr32_t), */ 1112 /* sizeof (daddr32_t), */ 1113 /* bp, */ 1114 /* DT_ABZERO); */ 1115 1116 TRANS_BUF_ITEM_128(ufsvfsp, 1117 block_data[table[i].owner_offset], 1118 block_data, bp, DT_AB); 1119 1120 /* Now we can write the buffer itself. */ 1121 1122 UFS_BWRITE2(ufsvfsp, bp); 1123 1124 if (bp->b_flags & B_ERROR) { 1125 error_updating_pointers = 1; 1126 } 1127 1128 brelse(bp); 1129 break; 1130 } 1131 default: 1132 (void) ufs_fault(ITOV(ip), 1133 "ufs_undo_allocation failure\n"); 1134 break; 1135 } 1136 } 1137 1138 /* 1139 * If the inode changed, or if we need to update its block count, 1140 * then do that now. We update the inode synchronously on disk 1141 * to ensure that it won't transiently point at a block we've 1142 * freed (only necessary if we're not logging). 1143 * 1144 * NOTE: Currently ufs_iupdat() does not check for errors. When 1145 * it is fixed, we should verify that we successfully updated the 1146 * inode before freeing blocks below. 1147 */ 1148 1149 if (inode_changed || (inode_sector_adjust != 0)) { 1150 ip->i_blocks -= inode_sector_adjust; 1151 ASSERT((unsigned)ip->i_blocks <= INT_MAX); 1152 TRANS_INODE(ufsvfsp, ip); 1153 ip->i_flag |= IUPD | ICHG | IATTCHG; 1154 ip->i_seq++; 1155 if (!TRANS_ISTRANS(ufsvfsp)) 1156 ufs_iupdat(ip, I_SYNC); 1157 } 1158 1159 /* 1160 * Now we go through and actually free the blocks, but only if we 1161 * successfully removed the pointers to them. 1162 */ 1163 1164 if (!error_updating_pointers) { 1165 for (i = 0; i < block_count; i++) { 1166 free(ip, table[i].this_block, table[i].block_size, 1167 table[i].usage_flags); 1168 } 1169 } 1170 } 1171 1172 /* 1173 * Find the next hole or data block in file starting at *off 1174 * Return found offset in *off, which can be less than the 1175 * starting offset if not block aligned. 1176 * This code is based on bmap_read(). 1177 * Errors: ENXIO for end of file 1178 * EIO for block read error. 1179 */ 1180 int 1181 bmap_find(struct inode *ip, boolean_t hole, u_offset_t *off) 1182 { 1183 ufsvfs_t *ufsvfsp = ip->i_ufsvfs; 1184 struct fs *fs = ufsvfsp->vfs_fs; 1185 buf_t *bp[NIADDR]; 1186 int i, j; 1187 int shft; /* we maintain sh = 1 << shft */ 1188 int nindirshift, nindiroffset; 1189 daddr_t ob, nb, tbn, lbn, skip; 1190 daddr32_t *bap; 1191 u_offset_t isz = (offset_t)ip->i_size; 1192 int32_t bs = fs->fs_bsize; /* file system block size */ 1193 int32_t nindir = fs->fs_nindir; 1194 dev_t dev; 1195 int error = 0; 1196 daddr_t limits[NIADDR]; 1197 1198 ASSERT(*off < isz); 1199 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 1200 lbn = (daddr_t)lblkno(fs, *off); 1201 ASSERT(lbn >= 0); 1202 1203 for (i = 0; i < NIADDR; i++) 1204 bp[i] = NULL; 1205 1206 /* 1207 * The first NDADDR blocks are direct blocks. 1208 */ 1209 if (lbn < NDADDR) { 1210 for (; lbn < NDADDR; lbn++) { 1211 if ((hole && (ip->i_db[lbn] == 0)) || 1212 (!hole && (ip->i_db[lbn] != 0))) { 1213 goto out; 1214 } 1215 } 1216 if ((u_offset_t)lbn << fs->fs_bshift >= isz) 1217 goto out; 1218 } 1219 1220 nindir = fs->fs_nindir; 1221 nindirshift = ufsvfsp->vfs_nindirshift; 1222 nindiroffset = ufsvfsp->vfs_nindiroffset; 1223 dev = ip->i_dev; 1224 1225 /* Set up limits array */ 1226 for (limits[0] = NDADDR, j = 1; j < NIADDR; j++) 1227 limits[j] = limits[j-1] + (1ULL << (nindirshift * j)); 1228 1229 loop: 1230 /* 1231 * Determine how many levels of indirection. 1232 */ 1233 shft = 0; /* sh = 1 */ 1234 tbn = lbn - NDADDR; 1235 for (j = NIADDR; j > 0; j--) { 1236 longlong_t sh; 1237 1238 shft += nindirshift; /* sh *= nindir */ 1239 sh = 1LL << shft; 1240 if (tbn < sh) 1241 break; 1242 tbn -= sh; 1243 } 1244 if (j == 0) { 1245 /* must have passed end of file */ 1246 ASSERT(((u_offset_t)lbn << fs->fs_bshift) >= isz); 1247 goto out; 1248 } 1249 1250 /* 1251 * Fetch the first indirect block. 1252 */ 1253 nb = ip->i_ib[NIADDR - j]; 1254 if (nb == 0) { 1255 if (hole) { 1256 lbn = limits[NIADDR - j]; 1257 goto out; 1258 } else { 1259 lbn = limits[NIADDR - j + 1]; 1260 if ((u_offset_t)lbn << fs->fs_bshift >= isz) 1261 goto out; 1262 goto loop; 1263 } 1264 } 1265 1266 /* 1267 * Fetch through the indirect blocks. 1268 */ 1269 for (; ((j <= NIADDR) && (nb != 0)); j++) { 1270 ob = nb; 1271 /* 1272 * if there's a different block at this level then release 1273 * the old one and in with the new. 1274 */ 1275 if ((bp[j-1] == NULL) || bp[j-1]->b_blkno != fsbtodb(fs, ob)) { 1276 if (bp[j-1] != NULL) 1277 brelse(bp[j-1]); 1278 bp[j-1] = UFS_BREAD(ufsvfsp, dev, fsbtodb(fs, ob), bs); 1279 if (bp[j-1]->b_flags & B_ERROR) { 1280 error = EIO; 1281 goto out; 1282 } 1283 } 1284 bap = bp[j-1]->b_un.b_daddr; 1285 1286 shft -= nindirshift; /* sh / nindir */ 1287 i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */ 1288 nb = bap[i]; 1289 skip = 1LL << (nindirshift * (NIADDR - j)); 1290 } 1291 1292 /* 1293 * Scan through the blocks in this array. 1294 */ 1295 for (; i < nindir; i++, lbn += skip) { 1296 if (hole && (bap[i] == 0)) 1297 goto out; 1298 if (!hole && (bap[i] != 0)) { 1299 if (skip == 1) { 1300 /* we're at the lowest level */ 1301 goto out; 1302 } else { 1303 goto loop; 1304 } 1305 } 1306 } 1307 if (((u_offset_t)lbn << fs->fs_bshift) < isz) 1308 goto loop; 1309 out: 1310 for (i = 0; i < NIADDR; i++) { 1311 if (bp[i]) 1312 brelse(bp[i]); 1313 } 1314 if (error == 0) { 1315 if (((u_offset_t)lbn << fs->fs_bshift) >= isz) { 1316 error = ENXIO; 1317 } else { 1318 /* success */ 1319 *off = (u_offset_t)lbn << fs->fs_bshift; 1320 } 1321 } 1322 return (error); 1323 } 1324 1325 /* 1326 * Set a particular offset in the inode list to be a certain block. 1327 * User is responsible for calling TRANS* functions 1328 */ 1329 int 1330 bmap_set_bn(struct vnode *vp, u_offset_t off, daddr32_t bn) 1331 { 1332 daddr_t lbn; 1333 struct inode *ip; 1334 ufsvfs_t *ufsvfsp; 1335 struct fs *fs; 1336 struct buf *bp; 1337 int i, j; 1338 int shft; /* we maintain sh = 1 << shft */ 1339 int err; 1340 daddr_t ob, nb, tbn; 1341 daddr32_t *bap; 1342 int nindirshift, nindiroffset; 1343 1344 ip = VTOI(vp); 1345 ufsvfsp = ip->i_ufsvfs; 1346 fs = ufsvfsp->vfs_fs; 1347 lbn = (daddr_t)lblkno(fs, off); 1348 1349 ASSERT(RW_LOCK_HELD(&ip->i_contents)); 1350 1351 if (lbn < 0) 1352 return (EFBIG); 1353 1354 /* 1355 * Take care of direct block assignment 1356 */ 1357 if (lbn < NDADDR) { 1358 ip->i_db[lbn] = bn; 1359 return (0); 1360 } 1361 1362 nindirshift = ip->i_ufsvfs->vfs_nindirshift; 1363 nindiroffset = ip->i_ufsvfs->vfs_nindiroffset; 1364 /* 1365 * Determine how many levels of indirection. 1366 */ 1367 shft = 0; /* sh = 1 */ 1368 tbn = lbn - NDADDR; 1369 for (j = NIADDR; j > 0; j--) { 1370 longlong_t sh; 1371 1372 shft += nindirshift; /* sh *= nindir */ 1373 sh = 1LL << shft; 1374 if (tbn < sh) 1375 break; 1376 tbn -= sh; 1377 } 1378 if (j == 0) 1379 return (EFBIG); 1380 1381 /* 1382 * Fetch the first indirect block. 1383 */ 1384 nb = ip->i_ib[NIADDR - j]; 1385 if (nb == 0) { 1386 err = ufs_fault(ITOV(ip), "ufs_set_bn: nb == UFS_HOLE"); 1387 return (err); 1388 } 1389 1390 /* 1391 * Fetch through the indirect blocks. 1392 */ 1393 for (; j <= NIADDR; j++) { 1394 ob = nb; 1395 bp = UFS_BREAD(ufsvfsp, 1396 ip->i_dev, fsbtodb(fs, ob), fs->fs_bsize); 1397 if (bp->b_flags & B_ERROR) { 1398 err = geterror(bp); 1399 brelse(bp); 1400 return (err); 1401 } 1402 bap = bp->b_un.b_daddr; 1403 1404 ASSERT(!ufs_indir_badblock(ip, bap)); 1405 1406 shft -= nindirshift; /* sh / nindir */ 1407 i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */ 1408 1409 nb = bap[i]; 1410 if (nb == 0) { 1411 err = ufs_fault(ITOV(ip), "ufs_set_bn: nb == UFS_HOLE"); 1412 return (err); 1413 } 1414 1415 if (j == NIADDR) { 1416 bap[i] = bn; 1417 bdrwrite(bp); 1418 return (0); 1419 } 1420 1421 brelse(bp); 1422 } 1423 return (0); 1424 } 1425