1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/bio.h> 68 #include <sys/buf.h> 69 #include <sys/lock.h> 70 #include <sys/mount.h> 71 #include <sys/vnode.h> 72 73 #include <ufs/ufs/quota.h> 74 #include <ufs/ufs/inode.h> 75 #include <ufs/ufs/ufs_extern.h> 76 #include <ufs/ufs/extattr.h> 77 #include <ufs/ufs/ufsmount.h> 78 79 #include <ufs/ffs/fs.h> 80 #include <ufs/ffs/ffs_extern.h> 81 82 /* 83 * Balloc defines the structure of filesystem storage 84 * by allocating the physical blocks on a device given 85 * the inode and the logical block number in a file. 86 * This is the allocation strategy for UFS1. Below is 87 * the allocation strategy for UFS2. 88 */ 89 int 90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 91 struct ucred *cred, int flags, struct buf **bpp) 92 { 93 struct inode *ip; 94 struct ufs1_dinode *dp; 95 ufs_lbn_t lbn, lastlbn; 96 struct fs *fs; 97 ufs1_daddr_t nb; 98 struct buf *bp, *nbp; 99 struct ufsmount *ump; 100 struct indir indirs[NIADDR + 2]; 101 int deallocated, osize, nsize, num, i, error; 102 ufs2_daddr_t newb; 103 ufs1_daddr_t *bap, pref; 104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 106 int unwindidx = -1; 107 int saved_inbdflush; 108 int reclaimed; 109 110 ip = VTOI(vp); 111 dp = ip->i_din1; 112 fs = ip->i_fs; 113 ump = ip->i_ump; 114 lbn = lblkno(fs, startoffset); 115 size = blkoff(fs, startoffset) + size; 116 reclaimed = 0; 117 if (size > fs->fs_bsize) 118 panic("ffs_balloc_ufs1: blk too big"); 119 *bpp = NULL; 120 if (flags & IO_EXT) 121 return (EOPNOTSUPP); 122 if (lbn < 0) 123 return (EFBIG); 124 125 if (DOINGSOFTDEP(vp)) 126 softdep_prealloc(vp, MNT_WAIT); 127 /* 128 * If the next write will extend the file into a new block, 129 * and the file is currently composed of a fragment 130 * this fragment has to be extended to be a full block. 131 */ 132 lastlbn = lblkno(fs, ip->i_size); 133 if (lastlbn < NDADDR && lastlbn < lbn) { 134 nb = lastlbn; 135 osize = blksize(fs, ip, nb); 136 if (osize < fs->fs_bsize && osize > 0) { 137 UFS_LOCK(ump); 138 error = ffs_realloccg(ip, nb, dp->di_db[nb], 139 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 140 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, 141 cred, &bp); 142 if (error) 143 return (error); 144 if (DOINGSOFTDEP(vp)) 145 softdep_setup_allocdirect(ip, nb, 146 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 147 fs->fs_bsize, osize, bp); 148 ip->i_size = smalllblktosize(fs, nb + 1); 149 dp->di_size = ip->i_size; 150 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 151 ip->i_flag |= IN_CHANGE | IN_UPDATE; 152 if (flags & IO_SYNC) 153 bwrite(bp); 154 else 155 bawrite(bp); 156 } 157 } 158 /* 159 * The first NDADDR blocks are direct blocks 160 */ 161 if (lbn < NDADDR) { 162 if (flags & BA_METAONLY) 163 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 164 nb = dp->di_db[lbn]; 165 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 166 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 167 if (error) { 168 brelse(bp); 169 return (error); 170 } 171 bp->b_blkno = fsbtodb(fs, nb); 172 *bpp = bp; 173 return (0); 174 } 175 if (nb != 0) { 176 /* 177 * Consider need to reallocate a fragment. 178 */ 179 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 180 nsize = fragroundup(fs, size); 181 if (nsize <= osize) { 182 error = bread(vp, lbn, osize, NOCRED, &bp); 183 if (error) { 184 brelse(bp); 185 return (error); 186 } 187 bp->b_blkno = fsbtodb(fs, nb); 188 } else { 189 UFS_LOCK(ump); 190 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 191 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 192 &dp->di_db[0]), osize, nsize, flags, 193 cred, &bp); 194 if (error) 195 return (error); 196 if (DOINGSOFTDEP(vp)) 197 softdep_setup_allocdirect(ip, lbn, 198 dbtofsb(fs, bp->b_blkno), nb, 199 nsize, osize, bp); 200 } 201 } else { 202 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 203 nsize = fragroundup(fs, size); 204 else 205 nsize = fs->fs_bsize; 206 UFS_LOCK(ump); 207 error = ffs_alloc(ip, lbn, 208 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 209 nsize, flags, cred, &newb); 210 if (error) 211 return (error); 212 bp = getblk(vp, lbn, nsize, 0, 0, 0); 213 bp->b_blkno = fsbtodb(fs, newb); 214 if (flags & BA_CLRBUF) 215 vfs_bio_clrbuf(bp); 216 if (DOINGSOFTDEP(vp)) 217 softdep_setup_allocdirect(ip, lbn, newb, 0, 218 nsize, 0, bp); 219 } 220 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 221 ip->i_flag |= IN_CHANGE | IN_UPDATE; 222 *bpp = bp; 223 return (0); 224 } 225 /* 226 * Determine the number of levels of indirection. 227 */ 228 pref = 0; 229 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 230 return(error); 231 #ifdef INVARIANTS 232 if (num < 1) 233 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 234 #endif 235 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags & 236 TDP_INBDFLUSH); 237 curthread->td_pflags |= TDP_INBDFLUSH; 238 /* 239 * Fetch the first indirect block allocating if necessary. 240 */ 241 --num; 242 nb = dp->di_ib[indirs[0].in_off]; 243 allocib = NULL; 244 allocblk = allociblk; 245 lbns_remfree = lbns; 246 if (nb == 0) { 247 UFS_LOCK(ump); 248 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 249 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 250 flags, cred, &newb)) != 0) { 251 curthread->td_pflags &= saved_inbdflush; 252 return (error); 253 } 254 nb = newb; 255 *allocblk++ = nb; 256 *lbns_remfree++ = indirs[1].in_lbn; 257 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 258 bp->b_blkno = fsbtodb(fs, nb); 259 vfs_bio_clrbuf(bp); 260 if (DOINGSOFTDEP(vp)) { 261 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 262 newb, 0, fs->fs_bsize, 0, bp); 263 bdwrite(bp); 264 } else { 265 /* 266 * Write synchronously so that indirect blocks 267 * never point at garbage. 268 */ 269 if (DOINGASYNC(vp)) 270 bdwrite(bp); 271 else if ((error = bwrite(bp)) != 0) 272 goto fail; 273 } 274 allocib = &dp->di_ib[indirs[0].in_off]; 275 *allocib = nb; 276 ip->i_flag |= IN_CHANGE | IN_UPDATE; 277 } 278 /* 279 * Fetch through the indirect blocks, allocating as necessary. 280 */ 281 retry: 282 for (i = 1;;) { 283 error = bread(vp, 284 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 285 if (error) { 286 brelse(bp); 287 goto fail; 288 } 289 bap = (ufs1_daddr_t *)bp->b_data; 290 nb = bap[indirs[i].in_off]; 291 if (i == num) 292 break; 293 i += 1; 294 if (nb != 0) { 295 bqrelse(bp); 296 continue; 297 } 298 UFS_LOCK(ump); 299 if (pref == 0) 300 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 301 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 302 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 303 brelse(bp); 304 if (++reclaimed == 1) { 305 UFS_LOCK(ump); 306 softdep_request_cleanup(fs, vp, cred, 307 FLUSH_BLOCKS_WAIT); 308 UFS_UNLOCK(ump); 309 goto retry; 310 } 311 goto fail; 312 } 313 nb = newb; 314 *allocblk++ = nb; 315 *lbns_remfree++ = indirs[i].in_lbn; 316 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 317 nbp->b_blkno = fsbtodb(fs, nb); 318 vfs_bio_clrbuf(nbp); 319 if (DOINGSOFTDEP(vp)) { 320 softdep_setup_allocindir_meta(nbp, ip, bp, 321 indirs[i - 1].in_off, nb); 322 bdwrite(nbp); 323 } else { 324 /* 325 * Write synchronously so that indirect blocks 326 * never point at garbage. 327 */ 328 if ((error = bwrite(nbp)) != 0) { 329 brelse(bp); 330 goto fail; 331 } 332 } 333 bap[indirs[i - 1].in_off] = nb; 334 if (allocib == NULL && unwindidx < 0) 335 unwindidx = i - 1; 336 /* 337 * If required, write synchronously, otherwise use 338 * delayed write. 339 */ 340 if (flags & IO_SYNC) { 341 bwrite(bp); 342 } else { 343 if (bp->b_bufsize == fs->fs_bsize) 344 bp->b_flags |= B_CLUSTEROK; 345 bdwrite(bp); 346 } 347 } 348 /* 349 * If asked only for the indirect block, then return it. 350 */ 351 if (flags & BA_METAONLY) { 352 curthread->td_pflags &= saved_inbdflush; 353 *bpp = bp; 354 return (0); 355 } 356 /* 357 * Get the data block, allocating if necessary. 358 */ 359 if (nb == 0) { 360 UFS_LOCK(ump); 361 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); 362 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 363 flags | IO_BUFLOCKED, cred, &newb); 364 if (error) { 365 brelse(bp); 366 if (++reclaimed == 1) { 367 UFS_LOCK(ump); 368 softdep_request_cleanup(fs, vp, cred, 369 FLUSH_BLOCKS_WAIT); 370 UFS_UNLOCK(ump); 371 goto retry; 372 } 373 goto fail; 374 } 375 nb = newb; 376 *allocblk++ = nb; 377 *lbns_remfree++ = lbn; 378 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 379 nbp->b_blkno = fsbtodb(fs, nb); 380 if (flags & BA_CLRBUF) 381 vfs_bio_clrbuf(nbp); 382 if (DOINGSOFTDEP(vp)) 383 softdep_setup_allocindir_page(ip, lbn, bp, 384 indirs[i].in_off, nb, 0, nbp); 385 bap[indirs[i].in_off] = nb; 386 /* 387 * If required, write synchronously, otherwise use 388 * delayed write. 389 */ 390 if (flags & IO_SYNC) { 391 bwrite(bp); 392 } else { 393 if (bp->b_bufsize == fs->fs_bsize) 394 bp->b_flags |= B_CLUSTEROK; 395 bdwrite(bp); 396 } 397 curthread->td_pflags &= saved_inbdflush; 398 *bpp = nbp; 399 return (0); 400 } 401 brelse(bp); 402 if (flags & BA_CLRBUF) { 403 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 404 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 405 error = cluster_read(vp, ip->i_size, lbn, 406 (int)fs->fs_bsize, NOCRED, 407 MAXBSIZE, seqcount, &nbp); 408 } else { 409 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 410 } 411 if (error) { 412 brelse(nbp); 413 goto fail; 414 } 415 } else { 416 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 417 nbp->b_blkno = fsbtodb(fs, nb); 418 } 419 curthread->td_pflags &= saved_inbdflush; 420 *bpp = nbp; 421 return (0); 422 fail: 423 curthread->td_pflags &= saved_inbdflush; 424 /* 425 * If we have failed to allocate any blocks, simply return the error. 426 * This is the usual case and avoids the need to fsync the file. 427 */ 428 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 429 return (error); 430 /* 431 * If we have failed part way through block allocation, we 432 * have to deallocate any indirect blocks that we have allocated. 433 * We have to fsync the file before we start to get rid of all 434 * of its dependencies so that we do not leave them dangling. 435 * We have to sync it at the end so that the soft updates code 436 * does not find any untracked changes. Although this is really 437 * slow, running out of disk space is not expected to be a common 438 * occurence. The error return from fsync is ignored as we already 439 * have an error to return to the user. 440 * 441 * XXX Still have to journal the free below 442 */ 443 (void) ffs_syncvnode(vp, MNT_WAIT); 444 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 445 blkp < allocblk; blkp++, lbns_remfree++) { 446 /* 447 * We shall not leave the freed blocks on the vnode 448 * buffer object lists. 449 */ 450 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 451 if (bp != NULL) { 452 bp->b_flags |= (B_INVAL | B_RELBUF); 453 bp->b_flags &= ~B_ASYNC; 454 brelse(bp); 455 } 456 deallocated += fs->fs_bsize; 457 } 458 if (allocib != NULL) { 459 *allocib = 0; 460 } else if (unwindidx >= 0) { 461 int r; 462 463 r = bread(vp, indirs[unwindidx].in_lbn, 464 (int)fs->fs_bsize, NOCRED, &bp); 465 if (r) { 466 panic("Could not unwind indirect block, error %d", r); 467 brelse(bp); 468 } else { 469 bap = (ufs1_daddr_t *)bp->b_data; 470 bap[indirs[unwindidx].in_off] = 0; 471 if (flags & IO_SYNC) { 472 bwrite(bp); 473 } else { 474 if (bp->b_bufsize == fs->fs_bsize) 475 bp->b_flags |= B_CLUSTEROK; 476 bdwrite(bp); 477 } 478 } 479 } 480 if (deallocated) { 481 #ifdef QUOTA 482 /* 483 * Restore user's disk quota because allocation failed. 484 */ 485 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 486 #endif 487 dp->di_blocks -= btodb(deallocated); 488 ip->i_flag |= IN_CHANGE | IN_UPDATE; 489 } 490 (void) ffs_syncvnode(vp, MNT_WAIT); 491 /* 492 * After the buffers are invalidated and on-disk pointers are 493 * cleared, free the blocks. 494 */ 495 for (blkp = allociblk; blkp < allocblk; blkp++) { 496 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 497 ip->i_number, NULL); 498 } 499 return (error); 500 } 501 502 /* 503 * Balloc defines the structure of file system storage 504 * by allocating the physical blocks on a device given 505 * the inode and the logical block number in a file. 506 * This is the allocation strategy for UFS2. Above is 507 * the allocation strategy for UFS1. 508 */ 509 int 510 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 511 struct ucred *cred, int flags, struct buf **bpp) 512 { 513 struct inode *ip; 514 struct ufs2_dinode *dp; 515 ufs_lbn_t lbn, lastlbn; 516 struct fs *fs; 517 struct buf *bp, *nbp; 518 struct ufsmount *ump; 519 struct indir indirs[NIADDR + 2]; 520 ufs2_daddr_t nb, newb, *bap, pref; 521 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 522 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 523 int deallocated, osize, nsize, num, i, error; 524 int unwindidx = -1; 525 int saved_inbdflush; 526 int reclaimed; 527 528 ip = VTOI(vp); 529 dp = ip->i_din2; 530 fs = ip->i_fs; 531 ump = ip->i_ump; 532 lbn = lblkno(fs, startoffset); 533 size = blkoff(fs, startoffset) + size; 534 reclaimed = 0; 535 if (size > fs->fs_bsize) 536 panic("ffs_balloc_ufs2: blk too big"); 537 *bpp = NULL; 538 if (lbn < 0) 539 return (EFBIG); 540 541 if (DOINGSOFTDEP(vp)) 542 softdep_prealloc(vp, MNT_WAIT); 543 544 /* 545 * Check for allocating external data. 546 */ 547 if (flags & IO_EXT) { 548 if (lbn >= NXADDR) 549 return (EFBIG); 550 /* 551 * If the next write will extend the data into a new block, 552 * and the data is currently composed of a fragment 553 * this fragment has to be extended to be a full block. 554 */ 555 lastlbn = lblkno(fs, dp->di_extsize); 556 if (lastlbn < lbn) { 557 nb = lastlbn; 558 osize = sblksize(fs, dp->di_extsize, nb); 559 if (osize < fs->fs_bsize && osize > 0) { 560 UFS_LOCK(ump); 561 error = ffs_realloccg(ip, -1 - nb, 562 dp->di_extb[nb], 563 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 564 &dp->di_extb[0]), osize, 565 (int)fs->fs_bsize, flags, cred, &bp); 566 if (error) 567 return (error); 568 if (DOINGSOFTDEP(vp)) 569 softdep_setup_allocext(ip, nb, 570 dbtofsb(fs, bp->b_blkno), 571 dp->di_extb[nb], 572 fs->fs_bsize, osize, bp); 573 dp->di_extsize = smalllblktosize(fs, nb + 1); 574 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 575 bp->b_xflags |= BX_ALTDATA; 576 ip->i_flag |= IN_CHANGE; 577 if (flags & IO_SYNC) 578 bwrite(bp); 579 else 580 bawrite(bp); 581 } 582 } 583 /* 584 * All blocks are direct blocks 585 */ 586 if (flags & BA_METAONLY) 587 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 588 nb = dp->di_extb[lbn]; 589 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 590 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); 591 if (error) { 592 brelse(bp); 593 return (error); 594 } 595 bp->b_blkno = fsbtodb(fs, nb); 596 bp->b_xflags |= BX_ALTDATA; 597 *bpp = bp; 598 return (0); 599 } 600 if (nb != 0) { 601 /* 602 * Consider need to reallocate a fragment. 603 */ 604 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 605 nsize = fragroundup(fs, size); 606 if (nsize <= osize) { 607 error = bread(vp, -1 - lbn, osize, NOCRED, &bp); 608 if (error) { 609 brelse(bp); 610 return (error); 611 } 612 bp->b_blkno = fsbtodb(fs, nb); 613 bp->b_xflags |= BX_ALTDATA; 614 } else { 615 UFS_LOCK(ump); 616 error = ffs_realloccg(ip, -1 - lbn, 617 dp->di_extb[lbn], 618 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 619 &dp->di_extb[0]), osize, nsize, flags, 620 cred, &bp); 621 if (error) 622 return (error); 623 bp->b_xflags |= BX_ALTDATA; 624 if (DOINGSOFTDEP(vp)) 625 softdep_setup_allocext(ip, lbn, 626 dbtofsb(fs, bp->b_blkno), nb, 627 nsize, osize, bp); 628 } 629 } else { 630 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 631 nsize = fragroundup(fs, size); 632 else 633 nsize = fs->fs_bsize; 634 UFS_LOCK(ump); 635 error = ffs_alloc(ip, lbn, 636 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 637 nsize, flags, cred, &newb); 638 if (error) 639 return (error); 640 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0); 641 bp->b_blkno = fsbtodb(fs, newb); 642 bp->b_xflags |= BX_ALTDATA; 643 if (flags & BA_CLRBUF) 644 vfs_bio_clrbuf(bp); 645 if (DOINGSOFTDEP(vp)) 646 softdep_setup_allocext(ip, lbn, newb, 0, 647 nsize, 0, bp); 648 } 649 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 650 ip->i_flag |= IN_CHANGE; 651 *bpp = bp; 652 return (0); 653 } 654 /* 655 * If the next write will extend the file into a new block, 656 * and the file is currently composed of a fragment 657 * this fragment has to be extended to be a full block. 658 */ 659 lastlbn = lblkno(fs, ip->i_size); 660 if (lastlbn < NDADDR && lastlbn < lbn) { 661 nb = lastlbn; 662 osize = blksize(fs, ip, nb); 663 if (osize < fs->fs_bsize && osize > 0) { 664 UFS_LOCK(ump); 665 error = ffs_realloccg(ip, nb, dp->di_db[nb], 666 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 667 &dp->di_db[0]), osize, (int)fs->fs_bsize, 668 flags, cred, &bp); 669 if (error) 670 return (error); 671 if (DOINGSOFTDEP(vp)) 672 softdep_setup_allocdirect(ip, nb, 673 dbtofsb(fs, bp->b_blkno), 674 dp->di_db[nb], 675 fs->fs_bsize, osize, bp); 676 ip->i_size = smalllblktosize(fs, nb + 1); 677 dp->di_size = ip->i_size; 678 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 679 ip->i_flag |= IN_CHANGE | IN_UPDATE; 680 if (flags & IO_SYNC) 681 bwrite(bp); 682 else 683 bawrite(bp); 684 } 685 } 686 /* 687 * The first NDADDR blocks are direct blocks 688 */ 689 if (lbn < NDADDR) { 690 if (flags & BA_METAONLY) 691 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 692 nb = dp->di_db[lbn]; 693 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 694 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 695 if (error) { 696 brelse(bp); 697 return (error); 698 } 699 bp->b_blkno = fsbtodb(fs, nb); 700 *bpp = bp; 701 return (0); 702 } 703 if (nb != 0) { 704 /* 705 * Consider need to reallocate a fragment. 706 */ 707 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 708 nsize = fragroundup(fs, size); 709 if (nsize <= osize) { 710 error = bread(vp, lbn, osize, NOCRED, &bp); 711 if (error) { 712 brelse(bp); 713 return (error); 714 } 715 bp->b_blkno = fsbtodb(fs, nb); 716 } else { 717 UFS_LOCK(ump); 718 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 719 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 720 &dp->di_db[0]), osize, nsize, flags, 721 cred, &bp); 722 if (error) 723 return (error); 724 if (DOINGSOFTDEP(vp)) 725 softdep_setup_allocdirect(ip, lbn, 726 dbtofsb(fs, bp->b_blkno), nb, 727 nsize, osize, bp); 728 } 729 } else { 730 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 731 nsize = fragroundup(fs, size); 732 else 733 nsize = fs->fs_bsize; 734 UFS_LOCK(ump); 735 error = ffs_alloc(ip, lbn, 736 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 737 &dp->di_db[0]), nsize, flags, cred, &newb); 738 if (error) 739 return (error); 740 bp = getblk(vp, lbn, nsize, 0, 0, 0); 741 bp->b_blkno = fsbtodb(fs, newb); 742 if (flags & BA_CLRBUF) 743 vfs_bio_clrbuf(bp); 744 if (DOINGSOFTDEP(vp)) 745 softdep_setup_allocdirect(ip, lbn, newb, 0, 746 nsize, 0, bp); 747 } 748 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 749 ip->i_flag |= IN_CHANGE | IN_UPDATE; 750 *bpp = bp; 751 return (0); 752 } 753 /* 754 * Determine the number of levels of indirection. 755 */ 756 pref = 0; 757 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 758 return(error); 759 #ifdef INVARIANTS 760 if (num < 1) 761 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 762 #endif 763 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags & 764 TDP_INBDFLUSH); 765 curthread->td_pflags |= TDP_INBDFLUSH; 766 /* 767 * Fetch the first indirect block allocating if necessary. 768 */ 769 --num; 770 nb = dp->di_ib[indirs[0].in_off]; 771 allocib = NULL; 772 allocblk = allociblk; 773 lbns_remfree = lbns; 774 if (nb == 0) { 775 UFS_LOCK(ump); 776 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 777 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 778 flags, cred, &newb)) != 0) { 779 curthread->td_pflags &= saved_inbdflush; 780 return (error); 781 } 782 nb = newb; 783 *allocblk++ = nb; 784 *lbns_remfree++ = indirs[1].in_lbn; 785 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 786 bp->b_blkno = fsbtodb(fs, nb); 787 vfs_bio_clrbuf(bp); 788 if (DOINGSOFTDEP(vp)) { 789 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 790 newb, 0, fs->fs_bsize, 0, bp); 791 bdwrite(bp); 792 } else { 793 /* 794 * Write synchronously so that indirect blocks 795 * never point at garbage. 796 */ 797 if (DOINGASYNC(vp)) 798 bdwrite(bp); 799 else if ((error = bwrite(bp)) != 0) 800 goto fail; 801 } 802 allocib = &dp->di_ib[indirs[0].in_off]; 803 *allocib = nb; 804 ip->i_flag |= IN_CHANGE | IN_UPDATE; 805 } 806 /* 807 * Fetch through the indirect blocks, allocating as necessary. 808 */ 809 retry: 810 for (i = 1;;) { 811 error = bread(vp, 812 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 813 if (error) { 814 brelse(bp); 815 goto fail; 816 } 817 bap = (ufs2_daddr_t *)bp->b_data; 818 nb = bap[indirs[i].in_off]; 819 if (i == num) 820 break; 821 i += 1; 822 if (nb != 0) { 823 bqrelse(bp); 824 continue; 825 } 826 UFS_LOCK(ump); 827 if (pref == 0) 828 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 829 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 830 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 831 brelse(bp); 832 if (++reclaimed == 1) { 833 UFS_LOCK(ump); 834 softdep_request_cleanup(fs, vp, cred, 835 FLUSH_BLOCKS_WAIT); 836 UFS_UNLOCK(ump); 837 goto retry; 838 } 839 goto fail; 840 } 841 nb = newb; 842 *allocblk++ = nb; 843 *lbns_remfree++ = indirs[i].in_lbn; 844 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 845 nbp->b_blkno = fsbtodb(fs, nb); 846 vfs_bio_clrbuf(nbp); 847 if (DOINGSOFTDEP(vp)) { 848 softdep_setup_allocindir_meta(nbp, ip, bp, 849 indirs[i - 1].in_off, nb); 850 bdwrite(nbp); 851 } else { 852 /* 853 * Write synchronously so that indirect blocks 854 * never point at garbage. 855 */ 856 if ((error = bwrite(nbp)) != 0) { 857 brelse(bp); 858 goto fail; 859 } 860 } 861 bap[indirs[i - 1].in_off] = nb; 862 if (allocib == NULL && unwindidx < 0) 863 unwindidx = i - 1; 864 /* 865 * If required, write synchronously, otherwise use 866 * delayed write. 867 */ 868 if (flags & IO_SYNC) { 869 bwrite(bp); 870 } else { 871 if (bp->b_bufsize == fs->fs_bsize) 872 bp->b_flags |= B_CLUSTEROK; 873 bdwrite(bp); 874 } 875 } 876 /* 877 * If asked only for the indirect block, then return it. 878 */ 879 if (flags & BA_METAONLY) { 880 curthread->td_pflags &= saved_inbdflush; 881 *bpp = bp; 882 return (0); 883 } 884 /* 885 * Get the data block, allocating if necessary. 886 */ 887 if (nb == 0) { 888 UFS_LOCK(ump); 889 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); 890 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 891 flags | IO_BUFLOCKED, cred, &newb); 892 if (error) { 893 brelse(bp); 894 if (++reclaimed == 1) { 895 UFS_LOCK(ump); 896 softdep_request_cleanup(fs, vp, cred, 897 FLUSH_BLOCKS_WAIT); 898 UFS_UNLOCK(ump); 899 goto retry; 900 } 901 goto fail; 902 } 903 nb = newb; 904 *allocblk++ = nb; 905 *lbns_remfree++ = lbn; 906 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 907 nbp->b_blkno = fsbtodb(fs, nb); 908 if (flags & BA_CLRBUF) 909 vfs_bio_clrbuf(nbp); 910 if (DOINGSOFTDEP(vp)) 911 softdep_setup_allocindir_page(ip, lbn, bp, 912 indirs[i].in_off, nb, 0, nbp); 913 bap[indirs[i].in_off] = nb; 914 /* 915 * If required, write synchronously, otherwise use 916 * delayed write. 917 */ 918 if (flags & IO_SYNC) { 919 bwrite(bp); 920 } else { 921 if (bp->b_bufsize == fs->fs_bsize) 922 bp->b_flags |= B_CLUSTEROK; 923 bdwrite(bp); 924 } 925 curthread->td_pflags &= saved_inbdflush; 926 *bpp = nbp; 927 return (0); 928 } 929 brelse(bp); 930 /* 931 * If requested clear invalid portions of the buffer. If we 932 * have to do a read-before-write (typical if BA_CLRBUF is set), 933 * try to do some read-ahead in the sequential case to reduce 934 * the number of I/O transactions. 935 */ 936 if (flags & BA_CLRBUF) { 937 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 938 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 939 error = cluster_read(vp, ip->i_size, lbn, 940 (int)fs->fs_bsize, NOCRED, 941 MAXBSIZE, seqcount, &nbp); 942 } else { 943 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 944 } 945 if (error) { 946 brelse(nbp); 947 goto fail; 948 } 949 } else { 950 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 951 nbp->b_blkno = fsbtodb(fs, nb); 952 } 953 curthread->td_pflags &= saved_inbdflush; 954 *bpp = nbp; 955 return (0); 956 fail: 957 curthread->td_pflags &= saved_inbdflush; 958 /* 959 * If we have failed to allocate any blocks, simply return the error. 960 * This is the usual case and avoids the need to fsync the file. 961 */ 962 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 963 return (error); 964 /* 965 * If we have failed part way through block allocation, we 966 * have to deallocate any indirect blocks that we have allocated. 967 * We have to fsync the file before we start to get rid of all 968 * of its dependencies so that we do not leave them dangling. 969 * We have to sync it at the end so that the soft updates code 970 * does not find any untracked changes. Although this is really 971 * slow, running out of disk space is not expected to be a common 972 * occurence. The error return from fsync is ignored as we already 973 * have an error to return to the user. 974 * 975 * XXX Still have to journal the free below 976 */ 977 (void) ffs_syncvnode(vp, MNT_WAIT); 978 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 979 blkp < allocblk; blkp++, lbns_remfree++) { 980 /* 981 * We shall not leave the freed blocks on the vnode 982 * buffer object lists. 983 */ 984 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 985 if (bp != NULL) { 986 bp->b_flags |= (B_INVAL | B_RELBUF); 987 bp->b_flags &= ~B_ASYNC; 988 brelse(bp); 989 } 990 deallocated += fs->fs_bsize; 991 } 992 if (allocib != NULL) { 993 *allocib = 0; 994 } else if (unwindidx >= 0) { 995 int r; 996 997 r = bread(vp, indirs[unwindidx].in_lbn, 998 (int)fs->fs_bsize, NOCRED, &bp); 999 if (r) { 1000 panic("Could not unwind indirect block, error %d", r); 1001 brelse(bp); 1002 } else { 1003 bap = (ufs2_daddr_t *)bp->b_data; 1004 bap[indirs[unwindidx].in_off] = 0; 1005 if (flags & IO_SYNC) { 1006 bwrite(bp); 1007 } else { 1008 if (bp->b_bufsize == fs->fs_bsize) 1009 bp->b_flags |= B_CLUSTEROK; 1010 bdwrite(bp); 1011 } 1012 } 1013 } 1014 if (deallocated) { 1015 #ifdef QUOTA 1016 /* 1017 * Restore user's disk quota because allocation failed. 1018 */ 1019 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 1020 #endif 1021 dp->di_blocks -= btodb(deallocated); 1022 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1023 } 1024 (void) ffs_syncvnode(vp, MNT_WAIT); 1025 /* 1026 * After the buffers are invalidated and on-disk pointers are 1027 * cleared, free the blocks. 1028 */ 1029 for (blkp = allociblk; blkp < allocblk; blkp++) { 1030 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 1031 ip->i_number, NULL); 1032 } 1033 return (error); 1034 } 1035