1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/bio.h> 68 #include <sys/buf.h> 69 #include <sys/lock.h> 70 #include <sys/mount.h> 71 #include <sys/vnode.h> 72 73 #include <ufs/ufs/quota.h> 74 #include <ufs/ufs/inode.h> 75 #include <ufs/ufs/ufs_extern.h> 76 #include <ufs/ufs/extattr.h> 77 #include <ufs/ufs/ufsmount.h> 78 79 #include <ufs/ffs/fs.h> 80 #include <ufs/ffs/ffs_extern.h> 81 82 /* 83 * Balloc defines the structure of filesystem storage 84 * by allocating the physical blocks on a device given 85 * the inode and the logical block number in a file. 86 * This is the allocation strategy for UFS1. Below is 87 * the allocation strategy for UFS2. 88 */ 89 int 90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 91 struct ucred *cred, int flags, struct buf **bpp) 92 { 93 struct inode *ip; 94 struct ufs1_dinode *dp; 95 ufs_lbn_t lbn, lastlbn; 96 struct fs *fs; 97 ufs1_daddr_t nb; 98 struct buf *bp, *nbp; 99 struct ufsmount *ump; 100 struct indir indirs[NIADDR + 2]; 101 int deallocated, osize, nsize, num, i, error; 102 ufs2_daddr_t newb; 103 ufs1_daddr_t *bap, pref; 104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 106 int unwindidx = -1; 107 int saved_inbdflush; 108 109 ip = VTOI(vp); 110 dp = ip->i_din1; 111 fs = ip->i_fs; 112 ump = ip->i_ump; 113 lbn = lblkno(fs, startoffset); 114 size = blkoff(fs, startoffset) + size; 115 if (size > fs->fs_bsize) 116 panic("ffs_balloc_ufs1: blk too big"); 117 *bpp = NULL; 118 if (flags & IO_EXT) 119 return (EOPNOTSUPP); 120 if (lbn < 0) 121 return (EFBIG); 122 123 /* 124 * If the next write will extend the file into a new block, 125 * and the file is currently composed of a fragment 126 * this fragment has to be extended to be a full block. 127 */ 128 lastlbn = lblkno(fs, ip->i_size); 129 if (lastlbn < NDADDR && lastlbn < lbn) { 130 nb = lastlbn; 131 osize = blksize(fs, ip, nb); 132 if (osize < fs->fs_bsize && osize > 0) { 133 UFS_LOCK(ump); 134 error = ffs_realloccg(ip, nb, dp->di_db[nb], 135 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 136 &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp); 137 if (error) 138 return (error); 139 if (DOINGSOFTDEP(vp)) 140 softdep_setup_allocdirect(ip, nb, 141 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 142 fs->fs_bsize, osize, bp); 143 ip->i_size = smalllblktosize(fs, nb + 1); 144 dp->di_size = ip->i_size; 145 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 146 ip->i_flag |= IN_CHANGE | IN_UPDATE; 147 if (flags & IO_SYNC) 148 bwrite(bp); 149 else 150 bawrite(bp); 151 } 152 } 153 /* 154 * The first NDADDR blocks are direct blocks 155 */ 156 if (lbn < NDADDR) { 157 if (flags & BA_METAONLY) 158 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 159 nb = dp->di_db[lbn]; 160 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 161 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 162 if (error) { 163 brelse(bp); 164 return (error); 165 } 166 bp->b_blkno = fsbtodb(fs, nb); 167 *bpp = bp; 168 return (0); 169 } 170 if (nb != 0) { 171 /* 172 * Consider need to reallocate a fragment. 173 */ 174 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 175 nsize = fragroundup(fs, size); 176 if (nsize <= osize) { 177 error = bread(vp, lbn, osize, NOCRED, &bp); 178 if (error) { 179 brelse(bp); 180 return (error); 181 } 182 bp->b_blkno = fsbtodb(fs, nb); 183 } else { 184 UFS_LOCK(ump); 185 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 186 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 187 &dp->di_db[0]), osize, nsize, cred, &bp); 188 if (error) 189 return (error); 190 if (DOINGSOFTDEP(vp)) 191 softdep_setup_allocdirect(ip, lbn, 192 dbtofsb(fs, bp->b_blkno), nb, 193 nsize, osize, bp); 194 } 195 } else { 196 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 197 nsize = fragroundup(fs, size); 198 else 199 nsize = fs->fs_bsize; 200 UFS_LOCK(ump); 201 error = ffs_alloc(ip, lbn, 202 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 203 nsize, cred, &newb); 204 if (error) 205 return (error); 206 bp = getblk(vp, lbn, nsize, 0, 0, 0); 207 bp->b_blkno = fsbtodb(fs, newb); 208 if (flags & BA_CLRBUF) 209 vfs_bio_clrbuf(bp); 210 if (DOINGSOFTDEP(vp)) 211 softdep_setup_allocdirect(ip, lbn, newb, 0, 212 nsize, 0, bp); 213 } 214 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 215 ip->i_flag |= IN_CHANGE | IN_UPDATE; 216 *bpp = bp; 217 return (0); 218 } 219 /* 220 * Determine the number of levels of indirection. 221 */ 222 pref = 0; 223 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 224 return(error); 225 #ifdef INVARIANTS 226 if (num < 1) 227 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 228 #endif 229 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags & 230 TDP_INBDFLUSH); 231 curthread->td_pflags |= TDP_INBDFLUSH; 232 /* 233 * Fetch the first indirect block allocating if necessary. 234 */ 235 --num; 236 nb = dp->di_ib[indirs[0].in_off]; 237 allocib = NULL; 238 allocblk = allociblk; 239 lbns_remfree = lbns; 240 if (nb == 0) { 241 UFS_LOCK(ump); 242 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 243 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 244 cred, &newb)) != 0) { 245 curthread->td_pflags &= saved_inbdflush; 246 return (error); 247 } 248 nb = newb; 249 *allocblk++ = nb; 250 *lbns_remfree++ = indirs[1].in_lbn; 251 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 252 bp->b_blkno = fsbtodb(fs, nb); 253 vfs_bio_clrbuf(bp); 254 if (DOINGSOFTDEP(vp)) { 255 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 256 newb, 0, fs->fs_bsize, 0, bp); 257 bdwrite(bp); 258 } else { 259 /* 260 * Write synchronously so that indirect blocks 261 * never point at garbage. 262 */ 263 if (DOINGASYNC(vp)) 264 bdwrite(bp); 265 else if ((error = bwrite(bp)) != 0) 266 goto fail; 267 } 268 allocib = &dp->di_ib[indirs[0].in_off]; 269 *allocib = nb; 270 ip->i_flag |= IN_CHANGE | IN_UPDATE; 271 } 272 /* 273 * Fetch through the indirect blocks, allocating as necessary. 274 */ 275 for (i = 1;;) { 276 error = bread(vp, 277 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 278 if (error) { 279 brelse(bp); 280 goto fail; 281 } 282 bap = (ufs1_daddr_t *)bp->b_data; 283 nb = bap[indirs[i].in_off]; 284 if (i == num) 285 break; 286 i += 1; 287 if (nb != 0) { 288 bqrelse(bp); 289 continue; 290 } 291 UFS_LOCK(ump); 292 if (pref == 0) 293 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 294 if ((error = 295 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { 296 brelse(bp); 297 goto fail; 298 } 299 nb = newb; 300 *allocblk++ = nb; 301 *lbns_remfree++ = indirs[i].in_lbn; 302 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 303 nbp->b_blkno = fsbtodb(fs, nb); 304 vfs_bio_clrbuf(nbp); 305 if (DOINGSOFTDEP(vp)) { 306 softdep_setup_allocindir_meta(nbp, ip, bp, 307 indirs[i - 1].in_off, nb); 308 bdwrite(nbp); 309 } else { 310 /* 311 * Write synchronously so that indirect blocks 312 * never point at garbage. 313 */ 314 if ((error = bwrite(nbp)) != 0) { 315 brelse(bp); 316 goto fail; 317 } 318 } 319 bap[indirs[i - 1].in_off] = nb; 320 if (allocib == NULL && unwindidx < 0) 321 unwindidx = i - 1; 322 /* 323 * If required, write synchronously, otherwise use 324 * delayed write. 325 */ 326 if (flags & IO_SYNC) { 327 bwrite(bp); 328 } else { 329 if (bp->b_bufsize == fs->fs_bsize) 330 bp->b_flags |= B_CLUSTEROK; 331 bdwrite(bp); 332 } 333 } 334 /* 335 * If asked only for the indirect block, then return it. 336 */ 337 if (flags & BA_METAONLY) { 338 curthread->td_pflags &= saved_inbdflush; 339 *bpp = bp; 340 return (0); 341 } 342 /* 343 * Get the data block, allocating if necessary. 344 */ 345 if (nb == 0) { 346 UFS_LOCK(ump); 347 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); 348 error = ffs_alloc(ip, 349 lbn, pref, (int)fs->fs_bsize, cred, &newb); 350 if (error) { 351 brelse(bp); 352 goto fail; 353 } 354 nb = newb; 355 *allocblk++ = nb; 356 *lbns_remfree++ = lbn; 357 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 358 nbp->b_blkno = fsbtodb(fs, nb); 359 if (flags & BA_CLRBUF) 360 vfs_bio_clrbuf(nbp); 361 if (DOINGSOFTDEP(vp)) 362 softdep_setup_allocindir_page(ip, lbn, bp, 363 indirs[i].in_off, nb, 0, nbp); 364 bap[indirs[i].in_off] = nb; 365 /* 366 * If required, write synchronously, otherwise use 367 * delayed write. 368 */ 369 if (flags & IO_SYNC) { 370 bwrite(bp); 371 } else { 372 if (bp->b_bufsize == fs->fs_bsize) 373 bp->b_flags |= B_CLUSTEROK; 374 bdwrite(bp); 375 } 376 curthread->td_pflags &= saved_inbdflush; 377 *bpp = nbp; 378 return (0); 379 } 380 brelse(bp); 381 if (flags & BA_CLRBUF) { 382 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 383 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 384 error = cluster_read(vp, ip->i_size, lbn, 385 (int)fs->fs_bsize, NOCRED, 386 MAXBSIZE, seqcount, &nbp); 387 } else { 388 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 389 } 390 if (error) { 391 brelse(nbp); 392 goto fail; 393 } 394 } else { 395 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 396 nbp->b_blkno = fsbtodb(fs, nb); 397 } 398 curthread->td_pflags &= saved_inbdflush; 399 *bpp = nbp; 400 return (0); 401 fail: 402 curthread->td_pflags &= saved_inbdflush; 403 /* 404 * If we have failed to allocate any blocks, simply return the error. 405 * This is the usual case and avoids the need to fsync the file. 406 */ 407 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 408 return (error); 409 /* 410 * If we have failed part way through block allocation, we 411 * have to deallocate any indirect blocks that we have allocated. 412 * We have to fsync the file before we start to get rid of all 413 * of its dependencies so that we do not leave them dangling. 414 * We have to sync it at the end so that the soft updates code 415 * does not find any untracked changes. Although this is really 416 * slow, running out of disk space is not expected to be a common 417 * occurence. The error return from fsync is ignored as we already 418 * have an error to return to the user. 419 */ 420 (void) ffs_syncvnode(vp, MNT_WAIT); 421 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 422 blkp < allocblk; blkp++, lbns_remfree++) { 423 /* 424 * We shall not leave the freed blocks on the vnode 425 * buffer object lists. 426 */ 427 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 428 if (bp != NULL) { 429 bp->b_flags |= (B_INVAL | B_RELBUF); 430 bp->b_flags &= ~B_ASYNC; 431 brelse(bp); 432 } 433 deallocated += fs->fs_bsize; 434 } 435 if (allocib != NULL) { 436 *allocib = 0; 437 } else if (unwindidx >= 0) { 438 int r; 439 440 r = bread(vp, indirs[unwindidx].in_lbn, 441 (int)fs->fs_bsize, NOCRED, &bp); 442 if (r) { 443 panic("Could not unwind indirect block, error %d", r); 444 brelse(bp); 445 } else { 446 bap = (ufs1_daddr_t *)bp->b_data; 447 bap[indirs[unwindidx].in_off] = 0; 448 if (flags & IO_SYNC) { 449 bwrite(bp); 450 } else { 451 if (bp->b_bufsize == fs->fs_bsize) 452 bp->b_flags |= B_CLUSTEROK; 453 bdwrite(bp); 454 } 455 } 456 } 457 if (deallocated) { 458 #ifdef QUOTA 459 /* 460 * Restore user's disk quota because allocation failed. 461 */ 462 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 463 #endif 464 dp->di_blocks -= btodb(deallocated); 465 ip->i_flag |= IN_CHANGE | IN_UPDATE; 466 } 467 (void) ffs_syncvnode(vp, MNT_WAIT); 468 /* 469 * After the buffers are invalidated and on-disk pointers are 470 * cleared, free the blocks. 471 */ 472 for (blkp = allociblk; blkp < allocblk; blkp++) { 473 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 474 ip->i_number); 475 } 476 return (error); 477 } 478 479 /* 480 * Balloc defines the structure of file system storage 481 * by allocating the physical blocks on a device given 482 * the inode and the logical block number in a file. 483 * This is the allocation strategy for UFS2. Above is 484 * the allocation strategy for UFS1. 485 */ 486 int 487 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 488 struct ucred *cred, int flags, struct buf **bpp) 489 { 490 struct inode *ip; 491 struct ufs2_dinode *dp; 492 ufs_lbn_t lbn, lastlbn; 493 struct fs *fs; 494 struct buf *bp, *nbp; 495 struct ufsmount *ump; 496 struct indir indirs[NIADDR + 2]; 497 ufs2_daddr_t nb, newb, *bap, pref; 498 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 499 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 500 int deallocated, osize, nsize, num, i, error; 501 int unwindidx = -1; 502 int saved_inbdflush; 503 504 ip = VTOI(vp); 505 dp = ip->i_din2; 506 fs = ip->i_fs; 507 ump = ip->i_ump; 508 lbn = lblkno(fs, startoffset); 509 size = blkoff(fs, startoffset) + size; 510 if (size > fs->fs_bsize) 511 panic("ffs_balloc_ufs2: blk too big"); 512 *bpp = NULL; 513 if (lbn < 0) 514 return (EFBIG); 515 516 /* 517 * Check for allocating external data. 518 */ 519 if (flags & IO_EXT) { 520 if (lbn >= NXADDR) 521 return (EFBIG); 522 /* 523 * If the next write will extend the data into a new block, 524 * and the data is currently composed of a fragment 525 * this fragment has to be extended to be a full block. 526 */ 527 lastlbn = lblkno(fs, dp->di_extsize); 528 if (lastlbn < lbn) { 529 nb = lastlbn; 530 osize = sblksize(fs, dp->di_extsize, nb); 531 if (osize < fs->fs_bsize && osize > 0) { 532 UFS_LOCK(ump); 533 error = ffs_realloccg(ip, -1 - nb, 534 dp->di_extb[nb], 535 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 536 &dp->di_extb[0]), osize, 537 (int)fs->fs_bsize, cred, &bp); 538 if (error) 539 return (error); 540 if (DOINGSOFTDEP(vp)) 541 softdep_setup_allocext(ip, nb, 542 dbtofsb(fs, bp->b_blkno), 543 dp->di_extb[nb], 544 fs->fs_bsize, osize, bp); 545 dp->di_extsize = smalllblktosize(fs, nb + 1); 546 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 547 bp->b_xflags |= BX_ALTDATA; 548 ip->i_flag |= IN_CHANGE | IN_UPDATE; 549 if (flags & IO_SYNC) 550 bwrite(bp); 551 else 552 bawrite(bp); 553 } 554 } 555 /* 556 * All blocks are direct blocks 557 */ 558 if (flags & BA_METAONLY) 559 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 560 nb = dp->di_extb[lbn]; 561 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 562 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); 563 if (error) { 564 brelse(bp); 565 return (error); 566 } 567 bp->b_blkno = fsbtodb(fs, nb); 568 bp->b_xflags |= BX_ALTDATA; 569 *bpp = bp; 570 return (0); 571 } 572 if (nb != 0) { 573 /* 574 * Consider need to reallocate a fragment. 575 */ 576 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 577 nsize = fragroundup(fs, size); 578 if (nsize <= osize) { 579 error = bread(vp, -1 - lbn, osize, NOCRED, &bp); 580 if (error) { 581 brelse(bp); 582 return (error); 583 } 584 bp->b_blkno = fsbtodb(fs, nb); 585 bp->b_xflags |= BX_ALTDATA; 586 } else { 587 UFS_LOCK(ump); 588 error = ffs_realloccg(ip, -1 - lbn, 589 dp->di_extb[lbn], 590 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 591 &dp->di_extb[0]), osize, nsize, cred, &bp); 592 if (error) 593 return (error); 594 bp->b_xflags |= BX_ALTDATA; 595 if (DOINGSOFTDEP(vp)) 596 softdep_setup_allocext(ip, lbn, 597 dbtofsb(fs, bp->b_blkno), nb, 598 nsize, osize, bp); 599 } 600 } else { 601 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 602 nsize = fragroundup(fs, size); 603 else 604 nsize = fs->fs_bsize; 605 UFS_LOCK(ump); 606 error = ffs_alloc(ip, lbn, 607 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 608 nsize, cred, &newb); 609 if (error) 610 return (error); 611 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0); 612 bp->b_blkno = fsbtodb(fs, newb); 613 bp->b_xflags |= BX_ALTDATA; 614 if (flags & BA_CLRBUF) 615 vfs_bio_clrbuf(bp); 616 if (DOINGSOFTDEP(vp)) 617 softdep_setup_allocext(ip, lbn, newb, 0, 618 nsize, 0, bp); 619 } 620 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 621 ip->i_flag |= IN_CHANGE | IN_UPDATE; 622 *bpp = bp; 623 return (0); 624 } 625 /* 626 * If the next write will extend the file into a new block, 627 * and the file is currently composed of a fragment 628 * this fragment has to be extended to be a full block. 629 */ 630 lastlbn = lblkno(fs, ip->i_size); 631 if (lastlbn < NDADDR && lastlbn < lbn) { 632 nb = lastlbn; 633 osize = blksize(fs, ip, nb); 634 if (osize < fs->fs_bsize && osize > 0) { 635 UFS_LOCK(ump); 636 error = ffs_realloccg(ip, nb, dp->di_db[nb], 637 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 638 &dp->di_db[0]), osize, (int)fs->fs_bsize, 639 cred, &bp); 640 if (error) 641 return (error); 642 if (DOINGSOFTDEP(vp)) 643 softdep_setup_allocdirect(ip, nb, 644 dbtofsb(fs, bp->b_blkno), 645 dp->di_db[nb], 646 fs->fs_bsize, osize, bp); 647 ip->i_size = smalllblktosize(fs, nb + 1); 648 dp->di_size = ip->i_size; 649 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 650 ip->i_flag |= IN_CHANGE | IN_UPDATE; 651 if (flags & IO_SYNC) 652 bwrite(bp); 653 else 654 bawrite(bp); 655 } 656 } 657 /* 658 * The first NDADDR blocks are direct blocks 659 */ 660 if (lbn < NDADDR) { 661 if (flags & BA_METAONLY) 662 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 663 nb = dp->di_db[lbn]; 664 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 665 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 666 if (error) { 667 brelse(bp); 668 return (error); 669 } 670 bp->b_blkno = fsbtodb(fs, nb); 671 *bpp = bp; 672 return (0); 673 } 674 if (nb != 0) { 675 /* 676 * Consider need to reallocate a fragment. 677 */ 678 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 679 nsize = fragroundup(fs, size); 680 if (nsize <= osize) { 681 error = bread(vp, lbn, osize, NOCRED, &bp); 682 if (error) { 683 brelse(bp); 684 return (error); 685 } 686 bp->b_blkno = fsbtodb(fs, nb); 687 } else { 688 UFS_LOCK(ump); 689 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 690 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 691 &dp->di_db[0]), osize, nsize, cred, &bp); 692 if (error) 693 return (error); 694 if (DOINGSOFTDEP(vp)) 695 softdep_setup_allocdirect(ip, lbn, 696 dbtofsb(fs, bp->b_blkno), nb, 697 nsize, osize, bp); 698 } 699 } else { 700 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 701 nsize = fragroundup(fs, size); 702 else 703 nsize = fs->fs_bsize; 704 UFS_LOCK(ump); 705 error = ffs_alloc(ip, lbn, 706 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 707 &dp->di_db[0]), nsize, cred, &newb); 708 if (error) 709 return (error); 710 bp = getblk(vp, lbn, nsize, 0, 0, 0); 711 bp->b_blkno = fsbtodb(fs, newb); 712 if (flags & BA_CLRBUF) 713 vfs_bio_clrbuf(bp); 714 if (DOINGSOFTDEP(vp)) 715 softdep_setup_allocdirect(ip, lbn, newb, 0, 716 nsize, 0, bp); 717 } 718 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 719 ip->i_flag |= IN_CHANGE | IN_UPDATE; 720 *bpp = bp; 721 return (0); 722 } 723 /* 724 * Determine the number of levels of indirection. 725 */ 726 pref = 0; 727 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 728 return(error); 729 #ifdef INVARIANTS 730 if (num < 1) 731 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 732 #endif 733 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags & 734 TDP_INBDFLUSH); 735 curthread->td_pflags |= TDP_INBDFLUSH; 736 /* 737 * Fetch the first indirect block allocating if necessary. 738 */ 739 --num; 740 nb = dp->di_ib[indirs[0].in_off]; 741 allocib = NULL; 742 allocblk = allociblk; 743 lbns_remfree = lbns; 744 if (nb == 0) { 745 UFS_LOCK(ump); 746 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 747 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 748 cred, &newb)) != 0) { 749 curthread->td_pflags &= saved_inbdflush; 750 return (error); 751 } 752 nb = newb; 753 *allocblk++ = nb; 754 *lbns_remfree++ = indirs[1].in_lbn; 755 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 756 bp->b_blkno = fsbtodb(fs, nb); 757 vfs_bio_clrbuf(bp); 758 if (DOINGSOFTDEP(vp)) { 759 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 760 newb, 0, fs->fs_bsize, 0, bp); 761 bdwrite(bp); 762 } else { 763 /* 764 * Write synchronously so that indirect blocks 765 * never point at garbage. 766 */ 767 if (DOINGASYNC(vp)) 768 bdwrite(bp); 769 else if ((error = bwrite(bp)) != 0) 770 goto fail; 771 } 772 allocib = &dp->di_ib[indirs[0].in_off]; 773 *allocib = nb; 774 ip->i_flag |= IN_CHANGE | IN_UPDATE; 775 } 776 /* 777 * Fetch through the indirect blocks, allocating as necessary. 778 */ 779 for (i = 1;;) { 780 error = bread(vp, 781 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 782 if (error) { 783 brelse(bp); 784 goto fail; 785 } 786 bap = (ufs2_daddr_t *)bp->b_data; 787 nb = bap[indirs[i].in_off]; 788 if (i == num) 789 break; 790 i += 1; 791 if (nb != 0) { 792 bqrelse(bp); 793 continue; 794 } 795 UFS_LOCK(ump); 796 if (pref == 0) 797 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 798 if ((error = 799 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { 800 brelse(bp); 801 goto fail; 802 } 803 nb = newb; 804 *allocblk++ = nb; 805 *lbns_remfree++ = indirs[i].in_lbn; 806 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 807 nbp->b_blkno = fsbtodb(fs, nb); 808 vfs_bio_clrbuf(nbp); 809 if (DOINGSOFTDEP(vp)) { 810 softdep_setup_allocindir_meta(nbp, ip, bp, 811 indirs[i - 1].in_off, nb); 812 bdwrite(nbp); 813 } else { 814 /* 815 * Write synchronously so that indirect blocks 816 * never point at garbage. 817 */ 818 if ((error = bwrite(nbp)) != 0) { 819 brelse(bp); 820 goto fail; 821 } 822 } 823 bap[indirs[i - 1].in_off] = nb; 824 if (allocib == NULL && unwindidx < 0) 825 unwindidx = i - 1; 826 /* 827 * If required, write synchronously, otherwise use 828 * delayed write. 829 */ 830 if (flags & IO_SYNC) { 831 bwrite(bp); 832 } else { 833 if (bp->b_bufsize == fs->fs_bsize) 834 bp->b_flags |= B_CLUSTEROK; 835 bdwrite(bp); 836 } 837 } 838 /* 839 * If asked only for the indirect block, then return it. 840 */ 841 if (flags & BA_METAONLY) { 842 curthread->td_pflags &= saved_inbdflush; 843 *bpp = bp; 844 return (0); 845 } 846 /* 847 * Get the data block, allocating if necessary. 848 */ 849 if (nb == 0) { 850 UFS_LOCK(ump); 851 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); 852 error = ffs_alloc(ip, 853 lbn, pref, (int)fs->fs_bsize, cred, &newb); 854 if (error) { 855 brelse(bp); 856 goto fail; 857 } 858 nb = newb; 859 *allocblk++ = nb; 860 *lbns_remfree++ = lbn; 861 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 862 nbp->b_blkno = fsbtodb(fs, nb); 863 if (flags & BA_CLRBUF) 864 vfs_bio_clrbuf(nbp); 865 if (DOINGSOFTDEP(vp)) 866 softdep_setup_allocindir_page(ip, lbn, bp, 867 indirs[i].in_off, nb, 0, nbp); 868 bap[indirs[i].in_off] = nb; 869 /* 870 * If required, write synchronously, otherwise use 871 * delayed write. 872 */ 873 if (flags & IO_SYNC) { 874 bwrite(bp); 875 } else { 876 if (bp->b_bufsize == fs->fs_bsize) 877 bp->b_flags |= B_CLUSTEROK; 878 bdwrite(bp); 879 } 880 curthread->td_pflags &= saved_inbdflush; 881 *bpp = nbp; 882 return (0); 883 } 884 brelse(bp); 885 /* 886 * If requested clear invalid portions of the buffer. If we 887 * have to do a read-before-write (typical if BA_CLRBUF is set), 888 * try to do some read-ahead in the sequential case to reduce 889 * the number of I/O transactions. 890 */ 891 if (flags & BA_CLRBUF) { 892 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 893 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 894 error = cluster_read(vp, ip->i_size, lbn, 895 (int)fs->fs_bsize, NOCRED, 896 MAXBSIZE, seqcount, &nbp); 897 } else { 898 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 899 } 900 if (error) { 901 brelse(nbp); 902 goto fail; 903 } 904 } else { 905 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 906 nbp->b_blkno = fsbtodb(fs, nb); 907 } 908 curthread->td_pflags &= saved_inbdflush; 909 *bpp = nbp; 910 return (0); 911 fail: 912 curthread->td_pflags &= saved_inbdflush; 913 /* 914 * If we have failed to allocate any blocks, simply return the error. 915 * This is the usual case and avoids the need to fsync the file. 916 */ 917 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 918 return (error); 919 /* 920 * If we have failed part way through block allocation, we 921 * have to deallocate any indirect blocks that we have allocated. 922 * We have to fsync the file before we start to get rid of all 923 * of its dependencies so that we do not leave them dangling. 924 * We have to sync it at the end so that the soft updates code 925 * does not find any untracked changes. Although this is really 926 * slow, running out of disk space is not expected to be a common 927 * occurence. The error return from fsync is ignored as we already 928 * have an error to return to the user. 929 */ 930 (void) ffs_syncvnode(vp, MNT_WAIT); 931 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 932 blkp < allocblk; blkp++, lbns_remfree++) { 933 /* 934 * We shall not leave the freed blocks on the vnode 935 * buffer object lists. 936 */ 937 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 938 if (bp != NULL) { 939 bp->b_flags |= (B_INVAL | B_RELBUF); 940 bp->b_flags &= ~B_ASYNC; 941 brelse(bp); 942 } 943 deallocated += fs->fs_bsize; 944 } 945 if (allocib != NULL) { 946 *allocib = 0; 947 } else if (unwindidx >= 0) { 948 int r; 949 950 r = bread(vp, indirs[unwindidx].in_lbn, 951 (int)fs->fs_bsize, NOCRED, &bp); 952 if (r) { 953 panic("Could not unwind indirect block, error %d", r); 954 brelse(bp); 955 } else { 956 bap = (ufs2_daddr_t *)bp->b_data; 957 bap[indirs[unwindidx].in_off] = 0; 958 if (flags & IO_SYNC) { 959 bwrite(bp); 960 } else { 961 if (bp->b_bufsize == fs->fs_bsize) 962 bp->b_flags |= B_CLUSTEROK; 963 bdwrite(bp); 964 } 965 } 966 } 967 if (deallocated) { 968 #ifdef QUOTA 969 /* 970 * Restore user's disk quota because allocation failed. 971 */ 972 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 973 #endif 974 dp->di_blocks -= btodb(deallocated); 975 ip->i_flag |= IN_CHANGE | IN_UPDATE; 976 } 977 (void) ffs_syncvnode(vp, MNT_WAIT); 978 /* 979 * After the buffers are invalidated and on-disk pointers are 980 * cleared, free the blocks. 981 */ 982 for (blkp = allociblk; blkp < allocblk; blkp++) { 983 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 984 ip->i_number); 985 } 986 return (error); 987 } 988