1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/bio.h> 68 #include <sys/buf.h> 69 #include <sys/lock.h> 70 #include <sys/mount.h> 71 #include <sys/vnode.h> 72 73 #include <ufs/ufs/quota.h> 74 #include <ufs/ufs/inode.h> 75 #include <ufs/ufs/ufs_extern.h> 76 #include <ufs/ufs/extattr.h> 77 #include <ufs/ufs/ufsmount.h> 78 79 #include <ufs/ffs/fs.h> 80 #include <ufs/ffs/ffs_extern.h> 81 82 /* 83 * Balloc defines the structure of filesystem storage 84 * by allocating the physical blocks on a device given 85 * the inode and the logical block number in a file. 86 * This is the allocation strategy for UFS1. Below is 87 * the allocation strategy for UFS2. 88 */ 89 int 90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 91 struct ucred *cred, int flags, struct buf **bpp) 92 { 93 struct inode *ip; 94 struct ufs1_dinode *dp; 95 ufs_lbn_t lbn, lastlbn; 96 struct fs *fs; 97 ufs1_daddr_t nb; 98 struct buf *bp, *nbp; 99 struct ufsmount *ump; 100 struct indir indirs[NIADDR + 2]; 101 int deallocated, osize, nsize, num, i, error; 102 ufs2_daddr_t newb; 103 ufs1_daddr_t *bap, pref; 104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 106 int unwindidx = -1; 107 int saved_inbdflush; 108 static struct timeval lastfail; 109 static int curfail; 110 int gbflags, reclaimed; 111 112 ip = VTOI(vp); 113 dp = ip->i_din1; 114 fs = ip->i_fs; 115 ump = ip->i_ump; 116 lbn = lblkno(fs, startoffset); 117 size = blkoff(fs, startoffset) + size; 118 reclaimed = 0; 119 if (size > fs->fs_bsize) 120 panic("ffs_balloc_ufs1: blk too big"); 121 *bpp = NULL; 122 if (flags & IO_EXT) 123 return (EOPNOTSUPP); 124 if (lbn < 0) 125 return (EFBIG); 126 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; 127 128 if (DOINGSOFTDEP(vp)) 129 softdep_prealloc(vp, MNT_WAIT); 130 /* 131 * If the next write will extend the file into a new block, 132 * and the file is currently composed of a fragment 133 * this fragment has to be extended to be a full block. 134 */ 135 lastlbn = lblkno(fs, ip->i_size); 136 if (lastlbn < NDADDR && lastlbn < lbn) { 137 nb = lastlbn; 138 osize = blksize(fs, ip, nb); 139 if (osize < fs->fs_bsize && osize > 0) { 140 UFS_LOCK(ump); 141 error = ffs_realloccg(ip, nb, dp->di_db[nb], 142 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 143 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, 144 cred, &bp); 145 if (error) 146 return (error); 147 if (DOINGSOFTDEP(vp)) 148 softdep_setup_allocdirect(ip, nb, 149 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 150 fs->fs_bsize, osize, bp); 151 ip->i_size = smalllblktosize(fs, nb + 1); 152 dp->di_size = ip->i_size; 153 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 154 ip->i_flag |= IN_CHANGE | IN_UPDATE; 155 if (flags & IO_SYNC) 156 bwrite(bp); 157 else 158 bawrite(bp); 159 } 160 } 161 /* 162 * The first NDADDR blocks are direct blocks 163 */ 164 if (lbn < NDADDR) { 165 if (flags & BA_METAONLY) 166 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 167 nb = dp->di_db[lbn]; 168 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 169 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 170 if (error) { 171 brelse(bp); 172 return (error); 173 } 174 bp->b_blkno = fsbtodb(fs, nb); 175 *bpp = bp; 176 return (0); 177 } 178 if (nb != 0) { 179 /* 180 * Consider need to reallocate a fragment. 181 */ 182 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 183 nsize = fragroundup(fs, size); 184 if (nsize <= osize) { 185 error = bread(vp, lbn, osize, NOCRED, &bp); 186 if (error) { 187 brelse(bp); 188 return (error); 189 } 190 bp->b_blkno = fsbtodb(fs, nb); 191 } else { 192 UFS_LOCK(ump); 193 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 194 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 195 &dp->di_db[0]), osize, nsize, flags, 196 cred, &bp); 197 if (error) 198 return (error); 199 if (DOINGSOFTDEP(vp)) 200 softdep_setup_allocdirect(ip, lbn, 201 dbtofsb(fs, bp->b_blkno), nb, 202 nsize, osize, bp); 203 } 204 } else { 205 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 206 nsize = fragroundup(fs, size); 207 else 208 nsize = fs->fs_bsize; 209 UFS_LOCK(ump); 210 error = ffs_alloc(ip, lbn, 211 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 212 nsize, flags, cred, &newb); 213 if (error) 214 return (error); 215 bp = getblk(vp, lbn, nsize, 0, 0, gbflags); 216 bp->b_blkno = fsbtodb(fs, newb); 217 if (flags & BA_CLRBUF) 218 vfs_bio_clrbuf(bp); 219 if (DOINGSOFTDEP(vp)) 220 softdep_setup_allocdirect(ip, lbn, newb, 0, 221 nsize, 0, bp); 222 } 223 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 224 ip->i_flag |= IN_CHANGE | IN_UPDATE; 225 *bpp = bp; 226 return (0); 227 } 228 /* 229 * Determine the number of levels of indirection. 230 */ 231 pref = 0; 232 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 233 return(error); 234 #ifdef INVARIANTS 235 if (num < 1) 236 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 237 #endif 238 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); 239 /* 240 * Fetch the first indirect block allocating if necessary. 241 */ 242 --num; 243 nb = dp->di_ib[indirs[0].in_off]; 244 allocib = NULL; 245 allocblk = allociblk; 246 lbns_remfree = lbns; 247 if (nb == 0) { 248 UFS_LOCK(ump); 249 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1, 250 (ufs1_daddr_t *)0); 251 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 252 flags, cred, &newb)) != 0) { 253 curthread_pflags_restore(saved_inbdflush); 254 return (error); 255 } 256 pref = newb + fs->fs_frag; 257 nb = newb; 258 *allocblk++ = nb; 259 *lbns_remfree++ = indirs[1].in_lbn; 260 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags); 261 bp->b_blkno = fsbtodb(fs, nb); 262 vfs_bio_clrbuf(bp); 263 if (DOINGSOFTDEP(vp)) { 264 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 265 newb, 0, fs->fs_bsize, 0, bp); 266 bdwrite(bp); 267 } else { 268 /* 269 * Write synchronously so that indirect blocks 270 * never point at garbage. 271 */ 272 if (DOINGASYNC(vp)) 273 bdwrite(bp); 274 else if ((error = bwrite(bp)) != 0) 275 goto fail; 276 } 277 allocib = &dp->di_ib[indirs[0].in_off]; 278 *allocib = nb; 279 ip->i_flag |= IN_CHANGE | IN_UPDATE; 280 } 281 /* 282 * Fetch through the indirect blocks, allocating as necessary. 283 */ 284 retry: 285 for (i = 1;;) { 286 error = bread(vp, 287 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 288 if (error) { 289 brelse(bp); 290 goto fail; 291 } 292 bap = (ufs1_daddr_t *)bp->b_data; 293 nb = bap[indirs[i].in_off]; 294 if (i == num) 295 break; 296 i += 1; 297 if (nb != 0) { 298 bqrelse(bp); 299 continue; 300 } 301 UFS_LOCK(ump); 302 if (pref == 0) 303 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1, 304 (ufs1_daddr_t *)0); 305 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 306 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 307 brelse(bp); 308 if (++reclaimed == 1) { 309 UFS_LOCK(ump); 310 softdep_request_cleanup(fs, vp, cred, 311 FLUSH_BLOCKS_WAIT); 312 UFS_UNLOCK(ump); 313 goto retry; 314 } 315 if (ppsratecheck(&lastfail, &curfail, 1)) { 316 ffs_fserr(fs, ip->i_number, "filesystem full"); 317 uprintf("\n%s: write failed, filesystem " 318 "is full\n", fs->fs_fsmnt); 319 } 320 goto fail; 321 } 322 pref = newb + fs->fs_frag; 323 nb = newb; 324 *allocblk++ = nb; 325 *lbns_remfree++ = indirs[i].in_lbn; 326 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 327 nbp->b_blkno = fsbtodb(fs, nb); 328 vfs_bio_clrbuf(nbp); 329 if (DOINGSOFTDEP(vp)) { 330 softdep_setup_allocindir_meta(nbp, ip, bp, 331 indirs[i - 1].in_off, nb); 332 bdwrite(nbp); 333 } else { 334 /* 335 * Write synchronously so that indirect blocks 336 * never point at garbage. 337 */ 338 if ((error = bwrite(nbp)) != 0) { 339 brelse(bp); 340 goto fail; 341 } 342 } 343 bap[indirs[i - 1].in_off] = nb; 344 if (allocib == NULL && unwindidx < 0) 345 unwindidx = i - 1; 346 /* 347 * If required, write synchronously, otherwise use 348 * delayed write. 349 */ 350 if (flags & IO_SYNC) { 351 bwrite(bp); 352 } else { 353 if (bp->b_bufsize == fs->fs_bsize) 354 bp->b_flags |= B_CLUSTEROK; 355 bdwrite(bp); 356 } 357 } 358 /* 359 * If asked only for the indirect block, then return it. 360 */ 361 if (flags & BA_METAONLY) { 362 curthread_pflags_restore(saved_inbdflush); 363 *bpp = bp; 364 return (0); 365 } 366 /* 367 * Get the data block, allocating if necessary. 368 */ 369 if (nb == 0) { 370 UFS_LOCK(ump); 371 if (pref == 0) 372 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, 373 &bap[0]); 374 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 375 flags | IO_BUFLOCKED, cred, &newb); 376 if (error) { 377 brelse(bp); 378 if (++reclaimed == 1) { 379 UFS_LOCK(ump); 380 softdep_request_cleanup(fs, vp, cred, 381 FLUSH_BLOCKS_WAIT); 382 UFS_UNLOCK(ump); 383 goto retry; 384 } 385 if (ppsratecheck(&lastfail, &curfail, 1)) { 386 ffs_fserr(fs, ip->i_number, "filesystem full"); 387 uprintf("\n%s: write failed, filesystem " 388 "is full\n", fs->fs_fsmnt); 389 } 390 goto fail; 391 } 392 nb = newb; 393 *allocblk++ = nb; 394 *lbns_remfree++ = lbn; 395 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 396 nbp->b_blkno = fsbtodb(fs, nb); 397 if (flags & BA_CLRBUF) 398 vfs_bio_clrbuf(nbp); 399 if (DOINGSOFTDEP(vp)) 400 softdep_setup_allocindir_page(ip, lbn, bp, 401 indirs[i].in_off, nb, 0, nbp); 402 bap[indirs[i].in_off] = nb; 403 /* 404 * If required, write synchronously, otherwise use 405 * delayed write. 406 */ 407 if (flags & IO_SYNC) { 408 bwrite(bp); 409 } else { 410 if (bp->b_bufsize == fs->fs_bsize) 411 bp->b_flags |= B_CLUSTEROK; 412 bdwrite(bp); 413 } 414 curthread_pflags_restore(saved_inbdflush); 415 *bpp = nbp; 416 return (0); 417 } 418 brelse(bp); 419 if (flags & BA_CLRBUF) { 420 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 421 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 422 error = cluster_read(vp, ip->i_size, lbn, 423 (int)fs->fs_bsize, NOCRED, 424 MAXBSIZE, seqcount, gbflags, &nbp); 425 } else { 426 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED, 427 gbflags, &nbp); 428 } 429 if (error) { 430 brelse(nbp); 431 goto fail; 432 } 433 } else { 434 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 435 nbp->b_blkno = fsbtodb(fs, nb); 436 } 437 curthread_pflags_restore(saved_inbdflush); 438 *bpp = nbp; 439 return (0); 440 fail: 441 curthread_pflags_restore(saved_inbdflush); 442 /* 443 * If we have failed to allocate any blocks, simply return the error. 444 * This is the usual case and avoids the need to fsync the file. 445 */ 446 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 447 return (error); 448 /* 449 * If we have failed part way through block allocation, we 450 * have to deallocate any indirect blocks that we have allocated. 451 * We have to fsync the file before we start to get rid of all 452 * of its dependencies so that we do not leave them dangling. 453 * We have to sync it at the end so that the soft updates code 454 * does not find any untracked changes. Although this is really 455 * slow, running out of disk space is not expected to be a common 456 * occurrence. The error return from fsync is ignored as we already 457 * have an error to return to the user. 458 * 459 * XXX Still have to journal the free below 460 */ 461 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 462 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 463 blkp < allocblk; blkp++, lbns_remfree++) { 464 /* 465 * We shall not leave the freed blocks on the vnode 466 * buffer object lists. 467 */ 468 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 469 if (bp != NULL) { 470 bp->b_flags |= (B_INVAL | B_RELBUF); 471 bp->b_flags &= ~B_ASYNC; 472 brelse(bp); 473 } 474 deallocated += fs->fs_bsize; 475 } 476 if (allocib != NULL) { 477 *allocib = 0; 478 } else if (unwindidx >= 0) { 479 int r; 480 481 r = bread(vp, indirs[unwindidx].in_lbn, 482 (int)fs->fs_bsize, NOCRED, &bp); 483 if (r) { 484 panic("Could not unwind indirect block, error %d", r); 485 brelse(bp); 486 } else { 487 bap = (ufs1_daddr_t *)bp->b_data; 488 bap[indirs[unwindidx].in_off] = 0; 489 if (flags & IO_SYNC) { 490 bwrite(bp); 491 } else { 492 if (bp->b_bufsize == fs->fs_bsize) 493 bp->b_flags |= B_CLUSTEROK; 494 bdwrite(bp); 495 } 496 } 497 } 498 if (deallocated) { 499 #ifdef QUOTA 500 /* 501 * Restore user's disk quota because allocation failed. 502 */ 503 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 504 #endif 505 dp->di_blocks -= btodb(deallocated); 506 ip->i_flag |= IN_CHANGE | IN_UPDATE; 507 } 508 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 509 /* 510 * After the buffers are invalidated and on-disk pointers are 511 * cleared, free the blocks. 512 */ 513 for (blkp = allociblk; blkp < allocblk; blkp++) { 514 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 515 ip->i_number, vp->v_type, NULL); 516 } 517 return (error); 518 } 519 520 /* 521 * Balloc defines the structure of file system storage 522 * by allocating the physical blocks on a device given 523 * the inode and the logical block number in a file. 524 * This is the allocation strategy for UFS2. Above is 525 * the allocation strategy for UFS1. 526 */ 527 int 528 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 529 struct ucred *cred, int flags, struct buf **bpp) 530 { 531 struct inode *ip; 532 struct ufs2_dinode *dp; 533 ufs_lbn_t lbn, lastlbn; 534 struct fs *fs; 535 struct buf *bp, *nbp; 536 struct ufsmount *ump; 537 struct indir indirs[NIADDR + 2]; 538 ufs2_daddr_t nb, newb, *bap, pref; 539 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 540 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 541 int deallocated, osize, nsize, num, i, error; 542 int unwindidx = -1; 543 int saved_inbdflush; 544 static struct timeval lastfail; 545 static int curfail; 546 int gbflags, reclaimed; 547 548 ip = VTOI(vp); 549 dp = ip->i_din2; 550 fs = ip->i_fs; 551 ump = ip->i_ump; 552 lbn = lblkno(fs, startoffset); 553 size = blkoff(fs, startoffset) + size; 554 reclaimed = 0; 555 if (size > fs->fs_bsize) 556 panic("ffs_balloc_ufs2: blk too big"); 557 *bpp = NULL; 558 if (lbn < 0) 559 return (EFBIG); 560 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; 561 562 if (DOINGSOFTDEP(vp)) 563 softdep_prealloc(vp, MNT_WAIT); 564 565 /* 566 * Check for allocating external data. 567 */ 568 if (flags & IO_EXT) { 569 if (lbn >= NXADDR) 570 return (EFBIG); 571 /* 572 * If the next write will extend the data into a new block, 573 * and the data is currently composed of a fragment 574 * this fragment has to be extended to be a full block. 575 */ 576 lastlbn = lblkno(fs, dp->di_extsize); 577 if (lastlbn < lbn) { 578 nb = lastlbn; 579 osize = sblksize(fs, dp->di_extsize, nb); 580 if (osize < fs->fs_bsize && osize > 0) { 581 UFS_LOCK(ump); 582 error = ffs_realloccg(ip, -1 - nb, 583 dp->di_extb[nb], 584 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 585 &dp->di_extb[0]), osize, 586 (int)fs->fs_bsize, flags, cred, &bp); 587 if (error) 588 return (error); 589 if (DOINGSOFTDEP(vp)) 590 softdep_setup_allocext(ip, nb, 591 dbtofsb(fs, bp->b_blkno), 592 dp->di_extb[nb], 593 fs->fs_bsize, osize, bp); 594 dp->di_extsize = smalllblktosize(fs, nb + 1); 595 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 596 bp->b_xflags |= BX_ALTDATA; 597 ip->i_flag |= IN_CHANGE; 598 if (flags & IO_SYNC) 599 bwrite(bp); 600 else 601 bawrite(bp); 602 } 603 } 604 /* 605 * All blocks are direct blocks 606 */ 607 if (flags & BA_METAONLY) 608 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 609 nb = dp->di_extb[lbn]; 610 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 611 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED, 612 gbflags, &bp); 613 if (error) { 614 brelse(bp); 615 return (error); 616 } 617 bp->b_blkno = fsbtodb(fs, nb); 618 bp->b_xflags |= BX_ALTDATA; 619 *bpp = bp; 620 return (0); 621 } 622 if (nb != 0) { 623 /* 624 * Consider need to reallocate a fragment. 625 */ 626 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 627 nsize = fragroundup(fs, size); 628 if (nsize <= osize) { 629 error = bread_gb(vp, -1 - lbn, osize, NOCRED, 630 gbflags, &bp); 631 if (error) { 632 brelse(bp); 633 return (error); 634 } 635 bp->b_blkno = fsbtodb(fs, nb); 636 bp->b_xflags |= BX_ALTDATA; 637 } else { 638 UFS_LOCK(ump); 639 error = ffs_realloccg(ip, -1 - lbn, 640 dp->di_extb[lbn], 641 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 642 &dp->di_extb[0]), osize, nsize, flags, 643 cred, &bp); 644 if (error) 645 return (error); 646 bp->b_xflags |= BX_ALTDATA; 647 if (DOINGSOFTDEP(vp)) 648 softdep_setup_allocext(ip, lbn, 649 dbtofsb(fs, bp->b_blkno), nb, 650 nsize, osize, bp); 651 } 652 } else { 653 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 654 nsize = fragroundup(fs, size); 655 else 656 nsize = fs->fs_bsize; 657 UFS_LOCK(ump); 658 error = ffs_alloc(ip, lbn, 659 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 660 nsize, flags, cred, &newb); 661 if (error) 662 return (error); 663 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags); 664 bp->b_blkno = fsbtodb(fs, newb); 665 bp->b_xflags |= BX_ALTDATA; 666 if (flags & BA_CLRBUF) 667 vfs_bio_clrbuf(bp); 668 if (DOINGSOFTDEP(vp)) 669 softdep_setup_allocext(ip, lbn, newb, 0, 670 nsize, 0, bp); 671 } 672 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 673 ip->i_flag |= IN_CHANGE; 674 *bpp = bp; 675 return (0); 676 } 677 /* 678 * If the next write will extend the file into a new block, 679 * and the file is currently composed of a fragment 680 * this fragment has to be extended to be a full block. 681 */ 682 lastlbn = lblkno(fs, ip->i_size); 683 if (lastlbn < NDADDR && lastlbn < lbn) { 684 nb = lastlbn; 685 osize = blksize(fs, ip, nb); 686 if (osize < fs->fs_bsize && osize > 0) { 687 UFS_LOCK(ump); 688 error = ffs_realloccg(ip, nb, dp->di_db[nb], 689 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 690 &dp->di_db[0]), osize, (int)fs->fs_bsize, 691 flags, cred, &bp); 692 if (error) 693 return (error); 694 if (DOINGSOFTDEP(vp)) 695 softdep_setup_allocdirect(ip, nb, 696 dbtofsb(fs, bp->b_blkno), 697 dp->di_db[nb], 698 fs->fs_bsize, osize, bp); 699 ip->i_size = smalllblktosize(fs, nb + 1); 700 dp->di_size = ip->i_size; 701 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 702 ip->i_flag |= IN_CHANGE | IN_UPDATE; 703 if (flags & IO_SYNC) 704 bwrite(bp); 705 else 706 bawrite(bp); 707 } 708 } 709 /* 710 * The first NDADDR blocks are direct blocks 711 */ 712 if (lbn < NDADDR) { 713 if (flags & BA_METAONLY) 714 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 715 nb = dp->di_db[lbn]; 716 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 717 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED, 718 gbflags, &bp); 719 if (error) { 720 brelse(bp); 721 return (error); 722 } 723 bp->b_blkno = fsbtodb(fs, nb); 724 *bpp = bp; 725 return (0); 726 } 727 if (nb != 0) { 728 /* 729 * Consider need to reallocate a fragment. 730 */ 731 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 732 nsize = fragroundup(fs, size); 733 if (nsize <= osize) { 734 error = bread_gb(vp, lbn, osize, NOCRED, 735 gbflags, &bp); 736 if (error) { 737 brelse(bp); 738 return (error); 739 } 740 bp->b_blkno = fsbtodb(fs, nb); 741 } else { 742 UFS_LOCK(ump); 743 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 744 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 745 &dp->di_db[0]), osize, nsize, flags, 746 cred, &bp); 747 if (error) 748 return (error); 749 if (DOINGSOFTDEP(vp)) 750 softdep_setup_allocdirect(ip, lbn, 751 dbtofsb(fs, bp->b_blkno), nb, 752 nsize, osize, bp); 753 } 754 } else { 755 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 756 nsize = fragroundup(fs, size); 757 else 758 nsize = fs->fs_bsize; 759 UFS_LOCK(ump); 760 error = ffs_alloc(ip, lbn, 761 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 762 &dp->di_db[0]), nsize, flags, cred, &newb); 763 if (error) 764 return (error); 765 bp = getblk(vp, lbn, nsize, 0, 0, gbflags); 766 bp->b_blkno = fsbtodb(fs, newb); 767 if (flags & BA_CLRBUF) 768 vfs_bio_clrbuf(bp); 769 if (DOINGSOFTDEP(vp)) 770 softdep_setup_allocdirect(ip, lbn, newb, 0, 771 nsize, 0, bp); 772 } 773 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 774 ip->i_flag |= IN_CHANGE | IN_UPDATE; 775 *bpp = bp; 776 return (0); 777 } 778 /* 779 * Determine the number of levels of indirection. 780 */ 781 pref = 0; 782 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 783 return(error); 784 #ifdef INVARIANTS 785 if (num < 1) 786 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 787 #endif 788 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); 789 /* 790 * Fetch the first indirect block allocating if necessary. 791 */ 792 --num; 793 nb = dp->di_ib[indirs[0].in_off]; 794 allocib = NULL; 795 allocblk = allociblk; 796 lbns_remfree = lbns; 797 if (nb == 0) { 798 UFS_LOCK(ump); 799 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1, 800 (ufs2_daddr_t *)0); 801 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 802 flags, cred, &newb)) != 0) { 803 curthread_pflags_restore(saved_inbdflush); 804 return (error); 805 } 806 pref = newb + fs->fs_frag; 807 nb = newb; 808 *allocblk++ = nb; 809 *lbns_remfree++ = indirs[1].in_lbn; 810 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 811 GB_UNMAPPED); 812 bp->b_blkno = fsbtodb(fs, nb); 813 vfs_bio_clrbuf(bp); 814 if (DOINGSOFTDEP(vp)) { 815 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 816 newb, 0, fs->fs_bsize, 0, bp); 817 bdwrite(bp); 818 } else { 819 /* 820 * Write synchronously so that indirect blocks 821 * never point at garbage. 822 */ 823 if (DOINGASYNC(vp)) 824 bdwrite(bp); 825 else if ((error = bwrite(bp)) != 0) 826 goto fail; 827 } 828 allocib = &dp->di_ib[indirs[0].in_off]; 829 *allocib = nb; 830 ip->i_flag |= IN_CHANGE | IN_UPDATE; 831 } 832 /* 833 * Fetch through the indirect blocks, allocating as necessary. 834 */ 835 retry: 836 for (i = 1;;) { 837 error = bread(vp, 838 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 839 if (error) { 840 brelse(bp); 841 goto fail; 842 } 843 bap = (ufs2_daddr_t *)bp->b_data; 844 nb = bap[indirs[i].in_off]; 845 if (i == num) 846 break; 847 i += 1; 848 if (nb != 0) { 849 bqrelse(bp); 850 continue; 851 } 852 UFS_LOCK(ump); 853 if (pref == 0) 854 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1, 855 (ufs2_daddr_t *)0); 856 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 857 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 858 brelse(bp); 859 if (++reclaimed == 1) { 860 UFS_LOCK(ump); 861 softdep_request_cleanup(fs, vp, cred, 862 FLUSH_BLOCKS_WAIT); 863 UFS_UNLOCK(ump); 864 goto retry; 865 } 866 if (ppsratecheck(&lastfail, &curfail, 1)) { 867 ffs_fserr(fs, ip->i_number, "filesystem full"); 868 uprintf("\n%s: write failed, filesystem " 869 "is full\n", fs->fs_fsmnt); 870 } 871 goto fail; 872 } 873 pref = newb + fs->fs_frag; 874 nb = newb; 875 *allocblk++ = nb; 876 *lbns_remfree++ = indirs[i].in_lbn; 877 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 878 GB_UNMAPPED); 879 nbp->b_blkno = fsbtodb(fs, nb); 880 vfs_bio_clrbuf(nbp); 881 if (DOINGSOFTDEP(vp)) { 882 softdep_setup_allocindir_meta(nbp, ip, bp, 883 indirs[i - 1].in_off, nb); 884 bdwrite(nbp); 885 } else { 886 /* 887 * Write synchronously so that indirect blocks 888 * never point at garbage. 889 */ 890 if ((error = bwrite(nbp)) != 0) { 891 brelse(bp); 892 goto fail; 893 } 894 } 895 bap[indirs[i - 1].in_off] = nb; 896 if (allocib == NULL && unwindidx < 0) 897 unwindidx = i - 1; 898 /* 899 * If required, write synchronously, otherwise use 900 * delayed write. 901 */ 902 if (flags & IO_SYNC) { 903 bwrite(bp); 904 } else { 905 if (bp->b_bufsize == fs->fs_bsize) 906 bp->b_flags |= B_CLUSTEROK; 907 bdwrite(bp); 908 } 909 } 910 /* 911 * If asked only for the indirect block, then return it. 912 */ 913 if (flags & BA_METAONLY) { 914 curthread_pflags_restore(saved_inbdflush); 915 *bpp = bp; 916 return (0); 917 } 918 /* 919 * Get the data block, allocating if necessary. 920 */ 921 if (nb == 0) { 922 UFS_LOCK(ump); 923 if (pref == 0) 924 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, 925 &bap[0]); 926 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 927 flags | IO_BUFLOCKED, cred, &newb); 928 if (error) { 929 brelse(bp); 930 if (++reclaimed == 1) { 931 UFS_LOCK(ump); 932 softdep_request_cleanup(fs, vp, cred, 933 FLUSH_BLOCKS_WAIT); 934 UFS_UNLOCK(ump); 935 goto retry; 936 } 937 if (ppsratecheck(&lastfail, &curfail, 1)) { 938 ffs_fserr(fs, ip->i_number, "filesystem full"); 939 uprintf("\n%s: write failed, filesystem " 940 "is full\n", fs->fs_fsmnt); 941 } 942 goto fail; 943 } 944 nb = newb; 945 *allocblk++ = nb; 946 *lbns_remfree++ = lbn; 947 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 948 nbp->b_blkno = fsbtodb(fs, nb); 949 if (flags & BA_CLRBUF) 950 vfs_bio_clrbuf(nbp); 951 if (DOINGSOFTDEP(vp)) 952 softdep_setup_allocindir_page(ip, lbn, bp, 953 indirs[i].in_off, nb, 0, nbp); 954 bap[indirs[i].in_off] = nb; 955 /* 956 * If required, write synchronously, otherwise use 957 * delayed write. 958 */ 959 if (flags & IO_SYNC) { 960 bwrite(bp); 961 } else { 962 if (bp->b_bufsize == fs->fs_bsize) 963 bp->b_flags |= B_CLUSTEROK; 964 bdwrite(bp); 965 } 966 curthread_pflags_restore(saved_inbdflush); 967 *bpp = nbp; 968 return (0); 969 } 970 brelse(bp); 971 /* 972 * If requested clear invalid portions of the buffer. If we 973 * have to do a read-before-write (typical if BA_CLRBUF is set), 974 * try to do some read-ahead in the sequential case to reduce 975 * the number of I/O transactions. 976 */ 977 if (flags & BA_CLRBUF) { 978 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 979 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 980 error = cluster_read(vp, ip->i_size, lbn, 981 (int)fs->fs_bsize, NOCRED, 982 MAXBSIZE, seqcount, gbflags, &nbp); 983 } else { 984 error = bread_gb(vp, lbn, (int)fs->fs_bsize, 985 NOCRED, gbflags, &nbp); 986 } 987 if (error) { 988 brelse(nbp); 989 goto fail; 990 } 991 } else { 992 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 993 nbp->b_blkno = fsbtodb(fs, nb); 994 } 995 curthread_pflags_restore(saved_inbdflush); 996 *bpp = nbp; 997 return (0); 998 fail: 999 curthread_pflags_restore(saved_inbdflush); 1000 /* 1001 * If we have failed to allocate any blocks, simply return the error. 1002 * This is the usual case and avoids the need to fsync the file. 1003 */ 1004 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 1005 return (error); 1006 /* 1007 * If we have failed part way through block allocation, we 1008 * have to deallocate any indirect blocks that we have allocated. 1009 * We have to fsync the file before we start to get rid of all 1010 * of its dependencies so that we do not leave them dangling. 1011 * We have to sync it at the end so that the soft updates code 1012 * does not find any untracked changes. Although this is really 1013 * slow, running out of disk space is not expected to be a common 1014 * occurrence. The error return from fsync is ignored as we already 1015 * have an error to return to the user. 1016 * 1017 * XXX Still have to journal the free below 1018 */ 1019 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 1020 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 1021 blkp < allocblk; blkp++, lbns_remfree++) { 1022 /* 1023 * We shall not leave the freed blocks on the vnode 1024 * buffer object lists. 1025 */ 1026 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 1027 if (bp != NULL) { 1028 bp->b_flags |= (B_INVAL | B_RELBUF); 1029 bp->b_flags &= ~B_ASYNC; 1030 brelse(bp); 1031 } 1032 deallocated += fs->fs_bsize; 1033 } 1034 if (allocib != NULL) { 1035 *allocib = 0; 1036 } else if (unwindidx >= 0) { 1037 int r; 1038 1039 r = bread(vp, indirs[unwindidx].in_lbn, 1040 (int)fs->fs_bsize, NOCRED, &bp); 1041 if (r) { 1042 panic("Could not unwind indirect block, error %d", r); 1043 brelse(bp); 1044 } else { 1045 bap = (ufs2_daddr_t *)bp->b_data; 1046 bap[indirs[unwindidx].in_off] = 0; 1047 if (flags & IO_SYNC) { 1048 bwrite(bp); 1049 } else { 1050 if (bp->b_bufsize == fs->fs_bsize) 1051 bp->b_flags |= B_CLUSTEROK; 1052 bdwrite(bp); 1053 } 1054 } 1055 } 1056 if (deallocated) { 1057 #ifdef QUOTA 1058 /* 1059 * Restore user's disk quota because allocation failed. 1060 */ 1061 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 1062 #endif 1063 dp->di_blocks -= btodb(deallocated); 1064 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1065 } 1066 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 1067 /* 1068 * After the buffers are invalidated and on-disk pointers are 1069 * cleared, free the blocks. 1070 */ 1071 for (blkp = allociblk; blkp < allocblk; blkp++) { 1072 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 1073 ip->i_number, vp->v_type, NULL); 1074 } 1075 return (error); 1076 } 1077