1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/bio.h> 68 #include <sys/buf.h> 69 #include <sys/lock.h> 70 #include <sys/mount.h> 71 #include <sys/vnode.h> 72 73 #include <ufs/ufs/quota.h> 74 #include <ufs/ufs/inode.h> 75 #include <ufs/ufs/ufs_extern.h> 76 #include <ufs/ufs/extattr.h> 77 #include <ufs/ufs/ufsmount.h> 78 79 #include <ufs/ffs/fs.h> 80 #include <ufs/ffs/ffs_extern.h> 81 82 /* 83 * Balloc defines the structure of filesystem storage 84 * by allocating the physical blocks on a device given 85 * the inode and the logical block number in a file. 86 * This is the allocation strategy for UFS1. Below is 87 * the allocation strategy for UFS2. 88 */ 89 int 90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 91 struct ucred *cred, int flags, struct buf **bpp) 92 { 93 struct inode *ip; 94 struct ufs1_dinode *dp; 95 ufs_lbn_t lbn, lastlbn; 96 struct fs *fs; 97 ufs1_daddr_t nb; 98 struct buf *bp, *nbp; 99 struct ufsmount *ump; 100 struct indir indirs[NIADDR + 2]; 101 int deallocated, osize, nsize, num, i, error; 102 ufs2_daddr_t newb; 103 ufs1_daddr_t *bap, pref; 104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 106 int unwindidx = -1; 107 int saved_inbdflush; 108 109 ip = VTOI(vp); 110 dp = ip->i_din1; 111 fs = ip->i_fs; 112 ump = ip->i_ump; 113 lbn = lblkno(fs, startoffset); 114 size = blkoff(fs, startoffset) + size; 115 if (size > fs->fs_bsize) 116 panic("ffs_balloc_ufs1: blk too big"); 117 *bpp = NULL; 118 if (flags & IO_EXT) 119 return (EOPNOTSUPP); 120 if (lbn < 0) 121 return (EFBIG); 122 123 /* 124 * If the next write will extend the file into a new block, 125 * and the file is currently composed of a fragment 126 * this fragment has to be extended to be a full block. 127 */ 128 lastlbn = lblkno(fs, ip->i_size); 129 if (lastlbn < NDADDR && lastlbn < lbn) { 130 nb = lastlbn; 131 osize = blksize(fs, ip, nb); 132 if (osize < fs->fs_bsize && osize > 0) { 133 UFS_LOCK(ump); 134 error = ffs_realloccg(ip, nb, dp->di_db[nb], 135 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 136 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, 137 cred, &bp); 138 if (error) 139 return (error); 140 if (DOINGSOFTDEP(vp)) 141 softdep_setup_allocdirect(ip, nb, 142 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 143 fs->fs_bsize, osize, bp); 144 ip->i_size = smalllblktosize(fs, nb + 1); 145 dp->di_size = ip->i_size; 146 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 147 ip->i_flag |= IN_CHANGE | IN_UPDATE; 148 if (flags & IO_SYNC) 149 bwrite(bp); 150 else 151 bawrite(bp); 152 } 153 } 154 /* 155 * The first NDADDR blocks are direct blocks 156 */ 157 if (lbn < NDADDR) { 158 if (flags & BA_METAONLY) 159 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 160 nb = dp->di_db[lbn]; 161 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 162 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 163 if (error) { 164 brelse(bp); 165 return (error); 166 } 167 bp->b_blkno = fsbtodb(fs, nb); 168 *bpp = bp; 169 return (0); 170 } 171 if (nb != 0) { 172 /* 173 * Consider need to reallocate a fragment. 174 */ 175 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 176 nsize = fragroundup(fs, size); 177 if (nsize <= osize) { 178 error = bread(vp, lbn, osize, NOCRED, &bp); 179 if (error) { 180 brelse(bp); 181 return (error); 182 } 183 bp->b_blkno = fsbtodb(fs, nb); 184 } else { 185 UFS_LOCK(ump); 186 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 187 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 188 &dp->di_db[0]), osize, nsize, flags, 189 cred, &bp); 190 if (error) 191 return (error); 192 if (DOINGSOFTDEP(vp)) 193 softdep_setup_allocdirect(ip, lbn, 194 dbtofsb(fs, bp->b_blkno), nb, 195 nsize, osize, bp); 196 } 197 } else { 198 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 199 nsize = fragroundup(fs, size); 200 else 201 nsize = fs->fs_bsize; 202 UFS_LOCK(ump); 203 error = ffs_alloc(ip, lbn, 204 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 205 nsize, flags, cred, &newb); 206 if (error) 207 return (error); 208 bp = getblk(vp, lbn, nsize, 0, 0, 0); 209 bp->b_blkno = fsbtodb(fs, newb); 210 if (flags & BA_CLRBUF) 211 vfs_bio_clrbuf(bp); 212 if (DOINGSOFTDEP(vp)) 213 softdep_setup_allocdirect(ip, lbn, newb, 0, 214 nsize, 0, bp); 215 } 216 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 217 ip->i_flag |= IN_CHANGE | IN_UPDATE; 218 *bpp = bp; 219 return (0); 220 } 221 /* 222 * Determine the number of levels of indirection. 223 */ 224 pref = 0; 225 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 226 return(error); 227 #ifdef INVARIANTS 228 if (num < 1) 229 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 230 #endif 231 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags & 232 TDP_INBDFLUSH); 233 curthread->td_pflags |= TDP_INBDFLUSH; 234 /* 235 * Fetch the first indirect block allocating if necessary. 236 */ 237 --num; 238 nb = dp->di_ib[indirs[0].in_off]; 239 allocib = NULL; 240 allocblk = allociblk; 241 lbns_remfree = lbns; 242 if (nb == 0) { 243 UFS_LOCK(ump); 244 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 245 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 246 flags, cred, &newb)) != 0) { 247 curthread->td_pflags &= saved_inbdflush; 248 return (error); 249 } 250 nb = newb; 251 *allocblk++ = nb; 252 *lbns_remfree++ = indirs[1].in_lbn; 253 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 254 bp->b_blkno = fsbtodb(fs, nb); 255 vfs_bio_clrbuf(bp); 256 if (DOINGSOFTDEP(vp)) { 257 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 258 newb, 0, fs->fs_bsize, 0, bp); 259 bdwrite(bp); 260 } else { 261 /* 262 * Write synchronously so that indirect blocks 263 * never point at garbage. 264 */ 265 if (DOINGASYNC(vp)) 266 bdwrite(bp); 267 else if ((error = bwrite(bp)) != 0) 268 goto fail; 269 } 270 allocib = &dp->di_ib[indirs[0].in_off]; 271 *allocib = nb; 272 ip->i_flag |= IN_CHANGE | IN_UPDATE; 273 } 274 /* 275 * Fetch through the indirect blocks, allocating as necessary. 276 */ 277 for (i = 1;;) { 278 error = bread(vp, 279 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 280 if (error) { 281 brelse(bp); 282 goto fail; 283 } 284 bap = (ufs1_daddr_t *)bp->b_data; 285 nb = bap[indirs[i].in_off]; 286 if (i == num) 287 break; 288 i += 1; 289 if (nb != 0) { 290 bqrelse(bp); 291 continue; 292 } 293 UFS_LOCK(ump); 294 if (pref == 0) 295 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 296 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 297 flags, cred, &newb)) != 0) { 298 brelse(bp); 299 goto fail; 300 } 301 nb = newb; 302 *allocblk++ = nb; 303 *lbns_remfree++ = indirs[i].in_lbn; 304 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 305 nbp->b_blkno = fsbtodb(fs, nb); 306 vfs_bio_clrbuf(nbp); 307 if (DOINGSOFTDEP(vp)) { 308 softdep_setup_allocindir_meta(nbp, ip, bp, 309 indirs[i - 1].in_off, nb); 310 bdwrite(nbp); 311 } else { 312 /* 313 * Write synchronously so that indirect blocks 314 * never point at garbage. 315 */ 316 if ((error = bwrite(nbp)) != 0) { 317 brelse(bp); 318 goto fail; 319 } 320 } 321 bap[indirs[i - 1].in_off] = nb; 322 if (allocib == NULL && unwindidx < 0) 323 unwindidx = i - 1; 324 /* 325 * If required, write synchronously, otherwise use 326 * delayed write. 327 */ 328 if (flags & IO_SYNC) { 329 bwrite(bp); 330 } else { 331 if (bp->b_bufsize == fs->fs_bsize) 332 bp->b_flags |= B_CLUSTEROK; 333 bdwrite(bp); 334 } 335 } 336 /* 337 * If asked only for the indirect block, then return it. 338 */ 339 if (flags & BA_METAONLY) { 340 curthread->td_pflags &= saved_inbdflush; 341 *bpp = bp; 342 return (0); 343 } 344 /* 345 * Get the data block, allocating if necessary. 346 */ 347 if (nb == 0) { 348 UFS_LOCK(ump); 349 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); 350 error = ffs_alloc(ip, 351 lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); 352 if (error) { 353 brelse(bp); 354 goto fail; 355 } 356 nb = newb; 357 *allocblk++ = nb; 358 *lbns_remfree++ = lbn; 359 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 360 nbp->b_blkno = fsbtodb(fs, nb); 361 if (flags & BA_CLRBUF) 362 vfs_bio_clrbuf(nbp); 363 if (DOINGSOFTDEP(vp)) 364 softdep_setup_allocindir_page(ip, lbn, bp, 365 indirs[i].in_off, nb, 0, nbp); 366 bap[indirs[i].in_off] = nb; 367 /* 368 * If required, write synchronously, otherwise use 369 * delayed write. 370 */ 371 if (flags & IO_SYNC) { 372 bwrite(bp); 373 } else { 374 if (bp->b_bufsize == fs->fs_bsize) 375 bp->b_flags |= B_CLUSTEROK; 376 bdwrite(bp); 377 } 378 curthread->td_pflags &= saved_inbdflush; 379 *bpp = nbp; 380 return (0); 381 } 382 brelse(bp); 383 if (flags & BA_CLRBUF) { 384 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 385 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 386 error = cluster_read(vp, ip->i_size, lbn, 387 (int)fs->fs_bsize, NOCRED, 388 MAXBSIZE, seqcount, &nbp); 389 } else { 390 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 391 } 392 if (error) { 393 brelse(nbp); 394 goto fail; 395 } 396 } else { 397 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 398 nbp->b_blkno = fsbtodb(fs, nb); 399 } 400 curthread->td_pflags &= saved_inbdflush; 401 *bpp = nbp; 402 return (0); 403 fail: 404 curthread->td_pflags &= saved_inbdflush; 405 /* 406 * If we have failed to allocate any blocks, simply return the error. 407 * This is the usual case and avoids the need to fsync the file. 408 */ 409 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 410 return (error); 411 /* 412 * If we have failed part way through block allocation, we 413 * have to deallocate any indirect blocks that we have allocated. 414 * We have to fsync the file before we start to get rid of all 415 * of its dependencies so that we do not leave them dangling. 416 * We have to sync it at the end so that the soft updates code 417 * does not find any untracked changes. Although this is really 418 * slow, running out of disk space is not expected to be a common 419 * occurence. The error return from fsync is ignored as we already 420 * have an error to return to the user. 421 */ 422 (void) ffs_syncvnode(vp, MNT_WAIT); 423 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 424 blkp < allocblk; blkp++, lbns_remfree++) { 425 /* 426 * We shall not leave the freed blocks on the vnode 427 * buffer object lists. 428 */ 429 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 430 if (bp != NULL) { 431 bp->b_flags |= (B_INVAL | B_RELBUF); 432 bp->b_flags &= ~B_ASYNC; 433 brelse(bp); 434 } 435 deallocated += fs->fs_bsize; 436 } 437 if (allocib != NULL) { 438 *allocib = 0; 439 } else if (unwindidx >= 0) { 440 int r; 441 442 r = bread(vp, indirs[unwindidx].in_lbn, 443 (int)fs->fs_bsize, NOCRED, &bp); 444 if (r) { 445 panic("Could not unwind indirect block, error %d", r); 446 brelse(bp); 447 } else { 448 bap = (ufs1_daddr_t *)bp->b_data; 449 bap[indirs[unwindidx].in_off] = 0; 450 if (flags & IO_SYNC) { 451 bwrite(bp); 452 } else { 453 if (bp->b_bufsize == fs->fs_bsize) 454 bp->b_flags |= B_CLUSTEROK; 455 bdwrite(bp); 456 } 457 } 458 } 459 if (deallocated) { 460 #ifdef QUOTA 461 /* 462 * Restore user's disk quota because allocation failed. 463 */ 464 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 465 #endif 466 dp->di_blocks -= btodb(deallocated); 467 ip->i_flag |= IN_CHANGE | IN_UPDATE; 468 } 469 (void) ffs_syncvnode(vp, MNT_WAIT); 470 /* 471 * After the buffers are invalidated and on-disk pointers are 472 * cleared, free the blocks. 473 */ 474 for (blkp = allociblk; blkp < allocblk; blkp++) { 475 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 476 ip->i_number); 477 } 478 return (error); 479 } 480 481 /* 482 * Balloc defines the structure of file system storage 483 * by allocating the physical blocks on a device given 484 * the inode and the logical block number in a file. 485 * This is the allocation strategy for UFS2. Above is 486 * the allocation strategy for UFS1. 487 */ 488 int 489 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 490 struct ucred *cred, int flags, struct buf **bpp) 491 { 492 struct inode *ip; 493 struct ufs2_dinode *dp; 494 ufs_lbn_t lbn, lastlbn; 495 struct fs *fs; 496 struct buf *bp, *nbp; 497 struct ufsmount *ump; 498 struct indir indirs[NIADDR + 2]; 499 ufs2_daddr_t nb, newb, *bap, pref; 500 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 501 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 502 int deallocated, osize, nsize, num, i, error; 503 int unwindidx = -1; 504 int saved_inbdflush; 505 506 ip = VTOI(vp); 507 dp = ip->i_din2; 508 fs = ip->i_fs; 509 ump = ip->i_ump; 510 lbn = lblkno(fs, startoffset); 511 size = blkoff(fs, startoffset) + size; 512 if (size > fs->fs_bsize) 513 panic("ffs_balloc_ufs2: blk too big"); 514 *bpp = NULL; 515 if (lbn < 0) 516 return (EFBIG); 517 518 /* 519 * Check for allocating external data. 520 */ 521 if (flags & IO_EXT) { 522 if (lbn >= NXADDR) 523 return (EFBIG); 524 /* 525 * If the next write will extend the data into a new block, 526 * and the data is currently composed of a fragment 527 * this fragment has to be extended to be a full block. 528 */ 529 lastlbn = lblkno(fs, dp->di_extsize); 530 if (lastlbn < lbn) { 531 nb = lastlbn; 532 osize = sblksize(fs, dp->di_extsize, nb); 533 if (osize < fs->fs_bsize && osize > 0) { 534 UFS_LOCK(ump); 535 error = ffs_realloccg(ip, -1 - nb, 536 dp->di_extb[nb], 537 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 538 &dp->di_extb[0]), osize, 539 (int)fs->fs_bsize, flags, cred, &bp); 540 if (error) 541 return (error); 542 if (DOINGSOFTDEP(vp)) 543 softdep_setup_allocext(ip, nb, 544 dbtofsb(fs, bp->b_blkno), 545 dp->di_extb[nb], 546 fs->fs_bsize, osize, bp); 547 dp->di_extsize = smalllblktosize(fs, nb + 1); 548 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 549 bp->b_xflags |= BX_ALTDATA; 550 ip->i_flag |= IN_CHANGE; 551 if (flags & IO_SYNC) 552 bwrite(bp); 553 else 554 bawrite(bp); 555 } 556 } 557 /* 558 * All blocks are direct blocks 559 */ 560 if (flags & BA_METAONLY) 561 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 562 nb = dp->di_extb[lbn]; 563 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 564 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); 565 if (error) { 566 brelse(bp); 567 return (error); 568 } 569 bp->b_blkno = fsbtodb(fs, nb); 570 bp->b_xflags |= BX_ALTDATA; 571 *bpp = bp; 572 return (0); 573 } 574 if (nb != 0) { 575 /* 576 * Consider need to reallocate a fragment. 577 */ 578 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 579 nsize = fragroundup(fs, size); 580 if (nsize <= osize) { 581 error = bread(vp, -1 - lbn, osize, NOCRED, &bp); 582 if (error) { 583 brelse(bp); 584 return (error); 585 } 586 bp->b_blkno = fsbtodb(fs, nb); 587 bp->b_xflags |= BX_ALTDATA; 588 } else { 589 UFS_LOCK(ump); 590 error = ffs_realloccg(ip, -1 - lbn, 591 dp->di_extb[lbn], 592 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 593 &dp->di_extb[0]), osize, nsize, flags, 594 cred, &bp); 595 if (error) 596 return (error); 597 bp->b_xflags |= BX_ALTDATA; 598 if (DOINGSOFTDEP(vp)) 599 softdep_setup_allocext(ip, lbn, 600 dbtofsb(fs, bp->b_blkno), nb, 601 nsize, osize, bp); 602 } 603 } else { 604 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 605 nsize = fragroundup(fs, size); 606 else 607 nsize = fs->fs_bsize; 608 UFS_LOCK(ump); 609 error = ffs_alloc(ip, lbn, 610 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 611 nsize, flags, cred, &newb); 612 if (error) 613 return (error); 614 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0); 615 bp->b_blkno = fsbtodb(fs, newb); 616 bp->b_xflags |= BX_ALTDATA; 617 if (flags & BA_CLRBUF) 618 vfs_bio_clrbuf(bp); 619 if (DOINGSOFTDEP(vp)) 620 softdep_setup_allocext(ip, lbn, newb, 0, 621 nsize, 0, bp); 622 } 623 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 624 ip->i_flag |= IN_CHANGE; 625 *bpp = bp; 626 return (0); 627 } 628 /* 629 * If the next write will extend the file into a new block, 630 * and the file is currently composed of a fragment 631 * this fragment has to be extended to be a full block. 632 */ 633 lastlbn = lblkno(fs, ip->i_size); 634 if (lastlbn < NDADDR && lastlbn < lbn) { 635 nb = lastlbn; 636 osize = blksize(fs, ip, nb); 637 if (osize < fs->fs_bsize && osize > 0) { 638 UFS_LOCK(ump); 639 error = ffs_realloccg(ip, nb, dp->di_db[nb], 640 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 641 &dp->di_db[0]), osize, (int)fs->fs_bsize, 642 flags, cred, &bp); 643 if (error) 644 return (error); 645 if (DOINGSOFTDEP(vp)) 646 softdep_setup_allocdirect(ip, nb, 647 dbtofsb(fs, bp->b_blkno), 648 dp->di_db[nb], 649 fs->fs_bsize, osize, bp); 650 ip->i_size = smalllblktosize(fs, nb + 1); 651 dp->di_size = ip->i_size; 652 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 653 ip->i_flag |= IN_CHANGE | IN_UPDATE; 654 if (flags & IO_SYNC) 655 bwrite(bp); 656 else 657 bawrite(bp); 658 } 659 } 660 /* 661 * The first NDADDR blocks are direct blocks 662 */ 663 if (lbn < NDADDR) { 664 if (flags & BA_METAONLY) 665 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 666 nb = dp->di_db[lbn]; 667 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 668 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 669 if (error) { 670 brelse(bp); 671 return (error); 672 } 673 bp->b_blkno = fsbtodb(fs, nb); 674 *bpp = bp; 675 return (0); 676 } 677 if (nb != 0) { 678 /* 679 * Consider need to reallocate a fragment. 680 */ 681 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 682 nsize = fragroundup(fs, size); 683 if (nsize <= osize) { 684 error = bread(vp, lbn, osize, NOCRED, &bp); 685 if (error) { 686 brelse(bp); 687 return (error); 688 } 689 bp->b_blkno = fsbtodb(fs, nb); 690 } else { 691 UFS_LOCK(ump); 692 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 693 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 694 &dp->di_db[0]), osize, nsize, flags, 695 cred, &bp); 696 if (error) 697 return (error); 698 if (DOINGSOFTDEP(vp)) 699 softdep_setup_allocdirect(ip, lbn, 700 dbtofsb(fs, bp->b_blkno), nb, 701 nsize, osize, bp); 702 } 703 } else { 704 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 705 nsize = fragroundup(fs, size); 706 else 707 nsize = fs->fs_bsize; 708 UFS_LOCK(ump); 709 error = ffs_alloc(ip, lbn, 710 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 711 &dp->di_db[0]), nsize, flags, cred, &newb); 712 if (error) 713 return (error); 714 bp = getblk(vp, lbn, nsize, 0, 0, 0); 715 bp->b_blkno = fsbtodb(fs, newb); 716 if (flags & BA_CLRBUF) 717 vfs_bio_clrbuf(bp); 718 if (DOINGSOFTDEP(vp)) 719 softdep_setup_allocdirect(ip, lbn, newb, 0, 720 nsize, 0, bp); 721 } 722 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 723 ip->i_flag |= IN_CHANGE | IN_UPDATE; 724 *bpp = bp; 725 return (0); 726 } 727 /* 728 * Determine the number of levels of indirection. 729 */ 730 pref = 0; 731 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 732 return(error); 733 #ifdef INVARIANTS 734 if (num < 1) 735 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 736 #endif 737 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags & 738 TDP_INBDFLUSH); 739 curthread->td_pflags |= TDP_INBDFLUSH; 740 /* 741 * Fetch the first indirect block allocating if necessary. 742 */ 743 --num; 744 nb = dp->di_ib[indirs[0].in_off]; 745 allocib = NULL; 746 allocblk = allociblk; 747 lbns_remfree = lbns; 748 if (nb == 0) { 749 UFS_LOCK(ump); 750 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 751 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 752 flags, cred, &newb)) != 0) { 753 curthread->td_pflags &= saved_inbdflush; 754 return (error); 755 } 756 nb = newb; 757 *allocblk++ = nb; 758 *lbns_remfree++ = indirs[1].in_lbn; 759 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 760 bp->b_blkno = fsbtodb(fs, nb); 761 vfs_bio_clrbuf(bp); 762 if (DOINGSOFTDEP(vp)) { 763 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 764 newb, 0, fs->fs_bsize, 0, bp); 765 bdwrite(bp); 766 } else { 767 /* 768 * Write synchronously so that indirect blocks 769 * never point at garbage. 770 */ 771 if (DOINGASYNC(vp)) 772 bdwrite(bp); 773 else if ((error = bwrite(bp)) != 0) 774 goto fail; 775 } 776 allocib = &dp->di_ib[indirs[0].in_off]; 777 *allocib = nb; 778 ip->i_flag |= IN_CHANGE | IN_UPDATE; 779 } 780 /* 781 * Fetch through the indirect blocks, allocating as necessary. 782 */ 783 for (i = 1;;) { 784 error = bread(vp, 785 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 786 if (error) { 787 brelse(bp); 788 goto fail; 789 } 790 bap = (ufs2_daddr_t *)bp->b_data; 791 nb = bap[indirs[i].in_off]; 792 if (i == num) 793 break; 794 i += 1; 795 if (nb != 0) { 796 bqrelse(bp); 797 continue; 798 } 799 UFS_LOCK(ump); 800 if (pref == 0) 801 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 802 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 803 flags, cred, &newb)) != 0) { 804 brelse(bp); 805 goto fail; 806 } 807 nb = newb; 808 *allocblk++ = nb; 809 *lbns_remfree++ = indirs[i].in_lbn; 810 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 811 nbp->b_blkno = fsbtodb(fs, nb); 812 vfs_bio_clrbuf(nbp); 813 if (DOINGSOFTDEP(vp)) { 814 softdep_setup_allocindir_meta(nbp, ip, bp, 815 indirs[i - 1].in_off, nb); 816 bdwrite(nbp); 817 } else { 818 /* 819 * Write synchronously so that indirect blocks 820 * never point at garbage. 821 */ 822 if ((error = bwrite(nbp)) != 0) { 823 brelse(bp); 824 goto fail; 825 } 826 } 827 bap[indirs[i - 1].in_off] = nb; 828 if (allocib == NULL && unwindidx < 0) 829 unwindidx = i - 1; 830 /* 831 * If required, write synchronously, otherwise use 832 * delayed write. 833 */ 834 if (flags & IO_SYNC) { 835 bwrite(bp); 836 } else { 837 if (bp->b_bufsize == fs->fs_bsize) 838 bp->b_flags |= B_CLUSTEROK; 839 bdwrite(bp); 840 } 841 } 842 /* 843 * If asked only for the indirect block, then return it. 844 */ 845 if (flags & BA_METAONLY) { 846 curthread->td_pflags &= saved_inbdflush; 847 *bpp = bp; 848 return (0); 849 } 850 /* 851 * Get the data block, allocating if necessary. 852 */ 853 if (nb == 0) { 854 UFS_LOCK(ump); 855 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); 856 error = ffs_alloc(ip, 857 lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); 858 if (error) { 859 brelse(bp); 860 goto fail; 861 } 862 nb = newb; 863 *allocblk++ = nb; 864 *lbns_remfree++ = lbn; 865 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 866 nbp->b_blkno = fsbtodb(fs, nb); 867 if (flags & BA_CLRBUF) 868 vfs_bio_clrbuf(nbp); 869 if (DOINGSOFTDEP(vp)) 870 softdep_setup_allocindir_page(ip, lbn, bp, 871 indirs[i].in_off, nb, 0, nbp); 872 bap[indirs[i].in_off] = nb; 873 /* 874 * If required, write synchronously, otherwise use 875 * delayed write. 876 */ 877 if (flags & IO_SYNC) { 878 bwrite(bp); 879 } else { 880 if (bp->b_bufsize == fs->fs_bsize) 881 bp->b_flags |= B_CLUSTEROK; 882 bdwrite(bp); 883 } 884 curthread->td_pflags &= saved_inbdflush; 885 *bpp = nbp; 886 return (0); 887 } 888 brelse(bp); 889 /* 890 * If requested clear invalid portions of the buffer. If we 891 * have to do a read-before-write (typical if BA_CLRBUF is set), 892 * try to do some read-ahead in the sequential case to reduce 893 * the number of I/O transactions. 894 */ 895 if (flags & BA_CLRBUF) { 896 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 897 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 898 error = cluster_read(vp, ip->i_size, lbn, 899 (int)fs->fs_bsize, NOCRED, 900 MAXBSIZE, seqcount, &nbp); 901 } else { 902 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 903 } 904 if (error) { 905 brelse(nbp); 906 goto fail; 907 } 908 } else { 909 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 910 nbp->b_blkno = fsbtodb(fs, nb); 911 } 912 curthread->td_pflags &= saved_inbdflush; 913 *bpp = nbp; 914 return (0); 915 fail: 916 curthread->td_pflags &= saved_inbdflush; 917 /* 918 * If we have failed to allocate any blocks, simply return the error. 919 * This is the usual case and avoids the need to fsync the file. 920 */ 921 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 922 return (error); 923 /* 924 * If we have failed part way through block allocation, we 925 * have to deallocate any indirect blocks that we have allocated. 926 * We have to fsync the file before we start to get rid of all 927 * of its dependencies so that we do not leave them dangling. 928 * We have to sync it at the end so that the soft updates code 929 * does not find any untracked changes. Although this is really 930 * slow, running out of disk space is not expected to be a common 931 * occurence. The error return from fsync is ignored as we already 932 * have an error to return to the user. 933 */ 934 (void) ffs_syncvnode(vp, MNT_WAIT); 935 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 936 blkp < allocblk; blkp++, lbns_remfree++) { 937 /* 938 * We shall not leave the freed blocks on the vnode 939 * buffer object lists. 940 */ 941 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 942 if (bp != NULL) { 943 bp->b_flags |= (B_INVAL | B_RELBUF); 944 bp->b_flags &= ~B_ASYNC; 945 brelse(bp); 946 } 947 deallocated += fs->fs_bsize; 948 } 949 if (allocib != NULL) { 950 *allocib = 0; 951 } else if (unwindidx >= 0) { 952 int r; 953 954 r = bread(vp, indirs[unwindidx].in_lbn, 955 (int)fs->fs_bsize, NOCRED, &bp); 956 if (r) { 957 panic("Could not unwind indirect block, error %d", r); 958 brelse(bp); 959 } else { 960 bap = (ufs2_daddr_t *)bp->b_data; 961 bap[indirs[unwindidx].in_off] = 0; 962 if (flags & IO_SYNC) { 963 bwrite(bp); 964 } else { 965 if (bp->b_bufsize == fs->fs_bsize) 966 bp->b_flags |= B_CLUSTEROK; 967 bdwrite(bp); 968 } 969 } 970 } 971 if (deallocated) { 972 #ifdef QUOTA 973 /* 974 * Restore user's disk quota because allocation failed. 975 */ 976 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 977 #endif 978 dp->di_blocks -= btodb(deallocated); 979 ip->i_flag |= IN_CHANGE | IN_UPDATE; 980 } 981 (void) ffs_syncvnode(vp, MNT_WAIT); 982 /* 983 * After the buffers are invalidated and on-disk pointers are 984 * cleared, free the blocks. 985 */ 986 for (blkp = allociblk; blkp < allocblk; blkp++) { 987 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 988 ip->i_number); 989 } 990 return (error); 991 } 992