1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/bio.h> 68 #include <sys/buf.h> 69 #include <sys/lock.h> 70 #include <sys/mount.h> 71 #include <sys/vnode.h> 72 73 #include <ufs/ufs/quota.h> 74 #include <ufs/ufs/inode.h> 75 #include <ufs/ufs/ufs_extern.h> 76 #include <ufs/ufs/extattr.h> 77 #include <ufs/ufs/ufsmount.h> 78 79 #include <ufs/ffs/fs.h> 80 #include <ufs/ffs/ffs_extern.h> 81 82 /* 83 * Balloc defines the structure of filesystem storage 84 * by allocating the physical blocks on a device given 85 * the inode and the logical block number in a file. 86 * This is the allocation strategy for UFS1. Below is 87 * the allocation strategy for UFS2. 88 */ 89 int 90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 91 struct ucred *cred, int flags, struct buf **bpp) 92 { 93 struct inode *ip; 94 struct ufs1_dinode *dp; 95 ufs_lbn_t lbn, lastlbn; 96 struct fs *fs; 97 ufs1_daddr_t nb; 98 struct buf *bp, *nbp; 99 struct ufsmount *ump; 100 struct indir indirs[NIADDR + 2]; 101 int deallocated, osize, nsize, num, i, error; 102 ufs2_daddr_t newb; 103 ufs1_daddr_t *bap, pref; 104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 106 int unwindidx = -1; 107 int saved_inbdflush; 108 static struct timeval lastfail; 109 static int curfail; 110 int reclaimed; 111 112 ip = VTOI(vp); 113 dp = ip->i_din1; 114 fs = ip->i_fs; 115 ump = ip->i_ump; 116 lbn = lblkno(fs, startoffset); 117 size = blkoff(fs, startoffset) + size; 118 reclaimed = 0; 119 if (size > fs->fs_bsize) 120 panic("ffs_balloc_ufs1: blk too big"); 121 *bpp = NULL; 122 if (flags & IO_EXT) 123 return (EOPNOTSUPP); 124 if (lbn < 0) 125 return (EFBIG); 126 127 if (DOINGSOFTDEP(vp)) 128 softdep_prealloc(vp, MNT_WAIT); 129 /* 130 * If the next write will extend the file into a new block, 131 * and the file is currently composed of a fragment 132 * this fragment has to be extended to be a full block. 133 */ 134 lastlbn = lblkno(fs, ip->i_size); 135 if (lastlbn < NDADDR && lastlbn < lbn) { 136 nb = lastlbn; 137 osize = blksize(fs, ip, nb); 138 if (osize < fs->fs_bsize && osize > 0) { 139 UFS_LOCK(ump); 140 error = ffs_realloccg(ip, nb, dp->di_db[nb], 141 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 142 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, 143 cred, &bp); 144 if (error) 145 return (error); 146 if (DOINGSOFTDEP(vp)) 147 softdep_setup_allocdirect(ip, nb, 148 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 149 fs->fs_bsize, osize, bp); 150 ip->i_size = smalllblktosize(fs, nb + 1); 151 dp->di_size = ip->i_size; 152 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 153 ip->i_flag |= IN_CHANGE | IN_UPDATE; 154 if (flags & IO_SYNC) 155 bwrite(bp); 156 else 157 bawrite(bp); 158 } 159 } 160 /* 161 * The first NDADDR blocks are direct blocks 162 */ 163 if (lbn < NDADDR) { 164 if (flags & BA_METAONLY) 165 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 166 nb = dp->di_db[lbn]; 167 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 168 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 169 if (error) { 170 brelse(bp); 171 return (error); 172 } 173 bp->b_blkno = fsbtodb(fs, nb); 174 *bpp = bp; 175 return (0); 176 } 177 if (nb != 0) { 178 /* 179 * Consider need to reallocate a fragment. 180 */ 181 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 182 nsize = fragroundup(fs, size); 183 if (nsize <= osize) { 184 error = bread(vp, lbn, osize, NOCRED, &bp); 185 if (error) { 186 brelse(bp); 187 return (error); 188 } 189 bp->b_blkno = fsbtodb(fs, nb); 190 } else { 191 UFS_LOCK(ump); 192 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 193 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 194 &dp->di_db[0]), osize, nsize, flags, 195 cred, &bp); 196 if (error) 197 return (error); 198 if (DOINGSOFTDEP(vp)) 199 softdep_setup_allocdirect(ip, lbn, 200 dbtofsb(fs, bp->b_blkno), nb, 201 nsize, osize, bp); 202 } 203 } else { 204 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 205 nsize = fragroundup(fs, size); 206 else 207 nsize = fs->fs_bsize; 208 UFS_LOCK(ump); 209 error = ffs_alloc(ip, lbn, 210 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 211 nsize, flags, cred, &newb); 212 if (error) 213 return (error); 214 bp = getblk(vp, lbn, nsize, 0, 0, 0); 215 bp->b_blkno = fsbtodb(fs, newb); 216 if (flags & BA_CLRBUF) 217 vfs_bio_clrbuf(bp); 218 if (DOINGSOFTDEP(vp)) 219 softdep_setup_allocdirect(ip, lbn, newb, 0, 220 nsize, 0, bp); 221 } 222 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 223 ip->i_flag |= IN_CHANGE | IN_UPDATE; 224 *bpp = bp; 225 return (0); 226 } 227 /* 228 * Determine the number of levels of indirection. 229 */ 230 pref = 0; 231 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 232 return(error); 233 #ifdef INVARIANTS 234 if (num < 1) 235 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 236 #endif 237 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); 238 /* 239 * Fetch the first indirect block allocating if necessary. 240 */ 241 --num; 242 nb = dp->di_ib[indirs[0].in_off]; 243 allocib = NULL; 244 allocblk = allociblk; 245 lbns_remfree = lbns; 246 if (nb == 0) { 247 UFS_LOCK(ump); 248 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 249 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 250 flags, cred, &newb)) != 0) { 251 curthread_pflags_restore(saved_inbdflush); 252 return (error); 253 } 254 pref = newb + fs->fs_frag; 255 nb = newb; 256 *allocblk++ = nb; 257 *lbns_remfree++ = indirs[1].in_lbn; 258 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 259 bp->b_blkno = fsbtodb(fs, nb); 260 vfs_bio_clrbuf(bp); 261 if (DOINGSOFTDEP(vp)) { 262 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 263 newb, 0, fs->fs_bsize, 0, bp); 264 bdwrite(bp); 265 } else { 266 /* 267 * Write synchronously so that indirect blocks 268 * never point at garbage. 269 */ 270 if (DOINGASYNC(vp)) 271 bdwrite(bp); 272 else if ((error = bwrite(bp)) != 0) 273 goto fail; 274 } 275 allocib = &dp->di_ib[indirs[0].in_off]; 276 *allocib = nb; 277 ip->i_flag |= IN_CHANGE | IN_UPDATE; 278 } 279 /* 280 * Fetch through the indirect blocks, allocating as necessary. 281 */ 282 retry: 283 for (i = 1;;) { 284 error = bread(vp, 285 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 286 if (error) { 287 brelse(bp); 288 goto fail; 289 } 290 bap = (ufs1_daddr_t *)bp->b_data; 291 nb = bap[indirs[i].in_off]; 292 if (i == num) 293 break; 294 i += 1; 295 if (nb != 0) { 296 bqrelse(bp); 297 continue; 298 } 299 UFS_LOCK(ump); 300 if (pref == 0) 301 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 302 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 303 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 304 brelse(bp); 305 if (++reclaimed == 1) { 306 UFS_LOCK(ump); 307 softdep_request_cleanup(fs, vp, cred, 308 FLUSH_BLOCKS_WAIT); 309 UFS_UNLOCK(ump); 310 goto retry; 311 } 312 if (ppsratecheck(&lastfail, &curfail, 1)) { 313 ffs_fserr(fs, ip->i_number, "filesystem full"); 314 uprintf("\n%s: write failed, filesystem " 315 "is full\n", fs->fs_fsmnt); 316 } 317 goto fail; 318 } 319 pref = newb + fs->fs_frag; 320 nb = newb; 321 *allocblk++ = nb; 322 *lbns_remfree++ = indirs[i].in_lbn; 323 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 324 nbp->b_blkno = fsbtodb(fs, nb); 325 vfs_bio_clrbuf(nbp); 326 if (DOINGSOFTDEP(vp)) { 327 softdep_setup_allocindir_meta(nbp, ip, bp, 328 indirs[i - 1].in_off, nb); 329 bdwrite(nbp); 330 } else { 331 /* 332 * Write synchronously so that indirect blocks 333 * never point at garbage. 334 */ 335 if ((error = bwrite(nbp)) != 0) { 336 brelse(bp); 337 goto fail; 338 } 339 } 340 bap[indirs[i - 1].in_off] = nb; 341 if (allocib == NULL && unwindidx < 0) 342 unwindidx = i - 1; 343 /* 344 * If required, write synchronously, otherwise use 345 * delayed write. 346 */ 347 if (flags & IO_SYNC) { 348 bwrite(bp); 349 } else { 350 if (bp->b_bufsize == fs->fs_bsize) 351 bp->b_flags |= B_CLUSTEROK; 352 bdwrite(bp); 353 } 354 } 355 /* 356 * If asked only for the indirect block, then return it. 357 */ 358 if (flags & BA_METAONLY) { 359 curthread_pflags_restore(saved_inbdflush); 360 *bpp = bp; 361 return (0); 362 } 363 /* 364 * Get the data block, allocating if necessary. 365 */ 366 if (nb == 0) { 367 UFS_LOCK(ump); 368 if (pref == 0) 369 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, 370 &bap[0]); 371 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 372 flags | IO_BUFLOCKED, cred, &newb); 373 if (error) { 374 brelse(bp); 375 if (++reclaimed == 1) { 376 UFS_LOCK(ump); 377 softdep_request_cleanup(fs, vp, cred, 378 FLUSH_BLOCKS_WAIT); 379 UFS_UNLOCK(ump); 380 goto retry; 381 } 382 if (ppsratecheck(&lastfail, &curfail, 1)) { 383 ffs_fserr(fs, ip->i_number, "filesystem full"); 384 uprintf("\n%s: write failed, filesystem " 385 "is full\n", fs->fs_fsmnt); 386 } 387 goto fail; 388 } 389 nb = newb; 390 *allocblk++ = nb; 391 *lbns_remfree++ = lbn; 392 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 393 nbp->b_blkno = fsbtodb(fs, nb); 394 if (flags & BA_CLRBUF) 395 vfs_bio_clrbuf(nbp); 396 if (DOINGSOFTDEP(vp)) 397 softdep_setup_allocindir_page(ip, lbn, bp, 398 indirs[i].in_off, nb, 0, nbp); 399 bap[indirs[i].in_off] = nb; 400 /* 401 * If required, write synchronously, otherwise use 402 * delayed write. 403 */ 404 if (flags & IO_SYNC) { 405 bwrite(bp); 406 } else { 407 if (bp->b_bufsize == fs->fs_bsize) 408 bp->b_flags |= B_CLUSTEROK; 409 bdwrite(bp); 410 } 411 curthread_pflags_restore(saved_inbdflush); 412 *bpp = nbp; 413 return (0); 414 } 415 brelse(bp); 416 if (flags & BA_CLRBUF) { 417 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 418 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 419 error = cluster_read(vp, ip->i_size, lbn, 420 (int)fs->fs_bsize, NOCRED, 421 MAXBSIZE, seqcount, &nbp); 422 } else { 423 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 424 } 425 if (error) { 426 brelse(nbp); 427 goto fail; 428 } 429 } else { 430 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 431 nbp->b_blkno = fsbtodb(fs, nb); 432 } 433 curthread_pflags_restore(saved_inbdflush); 434 *bpp = nbp; 435 return (0); 436 fail: 437 curthread_pflags_restore(saved_inbdflush); 438 /* 439 * If we have failed to allocate any blocks, simply return the error. 440 * This is the usual case and avoids the need to fsync the file. 441 */ 442 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 443 return (error); 444 /* 445 * If we have failed part way through block allocation, we 446 * have to deallocate any indirect blocks that we have allocated. 447 * We have to fsync the file before we start to get rid of all 448 * of its dependencies so that we do not leave them dangling. 449 * We have to sync it at the end so that the soft updates code 450 * does not find any untracked changes. Although this is really 451 * slow, running out of disk space is not expected to be a common 452 * occurence. The error return from fsync is ignored as we already 453 * have an error to return to the user. 454 * 455 * XXX Still have to journal the free below 456 */ 457 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 458 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 459 blkp < allocblk; blkp++, lbns_remfree++) { 460 /* 461 * We shall not leave the freed blocks on the vnode 462 * buffer object lists. 463 */ 464 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 465 if (bp != NULL) { 466 bp->b_flags |= (B_INVAL | B_RELBUF); 467 bp->b_flags &= ~B_ASYNC; 468 brelse(bp); 469 } 470 deallocated += fs->fs_bsize; 471 } 472 if (allocib != NULL) { 473 *allocib = 0; 474 } else if (unwindidx >= 0) { 475 int r; 476 477 r = bread(vp, indirs[unwindidx].in_lbn, 478 (int)fs->fs_bsize, NOCRED, &bp); 479 if (r) { 480 panic("Could not unwind indirect block, error %d", r); 481 brelse(bp); 482 } else { 483 bap = (ufs1_daddr_t *)bp->b_data; 484 bap[indirs[unwindidx].in_off] = 0; 485 if (flags & IO_SYNC) { 486 bwrite(bp); 487 } else { 488 if (bp->b_bufsize == fs->fs_bsize) 489 bp->b_flags |= B_CLUSTEROK; 490 bdwrite(bp); 491 } 492 } 493 } 494 if (deallocated) { 495 #ifdef QUOTA 496 /* 497 * Restore user's disk quota because allocation failed. 498 */ 499 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 500 #endif 501 dp->di_blocks -= btodb(deallocated); 502 ip->i_flag |= IN_CHANGE | IN_UPDATE; 503 } 504 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 505 /* 506 * After the buffers are invalidated and on-disk pointers are 507 * cleared, free the blocks. 508 */ 509 for (blkp = allociblk; blkp < allocblk; blkp++) { 510 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 511 ip->i_number, vp->v_type, NULL); 512 } 513 return (error); 514 } 515 516 /* 517 * Balloc defines the structure of file system storage 518 * by allocating the physical blocks on a device given 519 * the inode and the logical block number in a file. 520 * This is the allocation strategy for UFS2. Above is 521 * the allocation strategy for UFS1. 522 */ 523 int 524 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 525 struct ucred *cred, int flags, struct buf **bpp) 526 { 527 struct inode *ip; 528 struct ufs2_dinode *dp; 529 ufs_lbn_t lbn, lastlbn; 530 struct fs *fs; 531 struct buf *bp, *nbp; 532 struct ufsmount *ump; 533 struct indir indirs[NIADDR + 2]; 534 ufs2_daddr_t nb, newb, *bap, pref; 535 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 536 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 537 int deallocated, osize, nsize, num, i, error; 538 int unwindidx = -1; 539 int saved_inbdflush; 540 static struct timeval lastfail; 541 static int curfail; 542 int reclaimed; 543 544 ip = VTOI(vp); 545 dp = ip->i_din2; 546 fs = ip->i_fs; 547 ump = ip->i_ump; 548 lbn = lblkno(fs, startoffset); 549 size = blkoff(fs, startoffset) + size; 550 reclaimed = 0; 551 if (size > fs->fs_bsize) 552 panic("ffs_balloc_ufs2: blk too big"); 553 *bpp = NULL; 554 if (lbn < 0) 555 return (EFBIG); 556 557 if (DOINGSOFTDEP(vp)) 558 softdep_prealloc(vp, MNT_WAIT); 559 560 /* 561 * Check for allocating external data. 562 */ 563 if (flags & IO_EXT) { 564 if (lbn >= NXADDR) 565 return (EFBIG); 566 /* 567 * If the next write will extend the data into a new block, 568 * and the data is currently composed of a fragment 569 * this fragment has to be extended to be a full block. 570 */ 571 lastlbn = lblkno(fs, dp->di_extsize); 572 if (lastlbn < lbn) { 573 nb = lastlbn; 574 osize = sblksize(fs, dp->di_extsize, nb); 575 if (osize < fs->fs_bsize && osize > 0) { 576 UFS_LOCK(ump); 577 error = ffs_realloccg(ip, -1 - nb, 578 dp->di_extb[nb], 579 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 580 &dp->di_extb[0]), osize, 581 (int)fs->fs_bsize, flags, cred, &bp); 582 if (error) 583 return (error); 584 if (DOINGSOFTDEP(vp)) 585 softdep_setup_allocext(ip, nb, 586 dbtofsb(fs, bp->b_blkno), 587 dp->di_extb[nb], 588 fs->fs_bsize, osize, bp); 589 dp->di_extsize = smalllblktosize(fs, nb + 1); 590 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 591 bp->b_xflags |= BX_ALTDATA; 592 ip->i_flag |= IN_CHANGE; 593 if (flags & IO_SYNC) 594 bwrite(bp); 595 else 596 bawrite(bp); 597 } 598 } 599 /* 600 * All blocks are direct blocks 601 */ 602 if (flags & BA_METAONLY) 603 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 604 nb = dp->di_extb[lbn]; 605 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 606 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); 607 if (error) { 608 brelse(bp); 609 return (error); 610 } 611 bp->b_blkno = fsbtodb(fs, nb); 612 bp->b_xflags |= BX_ALTDATA; 613 *bpp = bp; 614 return (0); 615 } 616 if (nb != 0) { 617 /* 618 * Consider need to reallocate a fragment. 619 */ 620 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 621 nsize = fragroundup(fs, size); 622 if (nsize <= osize) { 623 error = bread(vp, -1 - lbn, osize, NOCRED, &bp); 624 if (error) { 625 brelse(bp); 626 return (error); 627 } 628 bp->b_blkno = fsbtodb(fs, nb); 629 bp->b_xflags |= BX_ALTDATA; 630 } else { 631 UFS_LOCK(ump); 632 error = ffs_realloccg(ip, -1 - lbn, 633 dp->di_extb[lbn], 634 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 635 &dp->di_extb[0]), osize, nsize, flags, 636 cred, &bp); 637 if (error) 638 return (error); 639 bp->b_xflags |= BX_ALTDATA; 640 if (DOINGSOFTDEP(vp)) 641 softdep_setup_allocext(ip, lbn, 642 dbtofsb(fs, bp->b_blkno), nb, 643 nsize, osize, bp); 644 } 645 } else { 646 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 647 nsize = fragroundup(fs, size); 648 else 649 nsize = fs->fs_bsize; 650 UFS_LOCK(ump); 651 error = ffs_alloc(ip, lbn, 652 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 653 nsize, flags, cred, &newb); 654 if (error) 655 return (error); 656 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0); 657 bp->b_blkno = fsbtodb(fs, newb); 658 bp->b_xflags |= BX_ALTDATA; 659 if (flags & BA_CLRBUF) 660 vfs_bio_clrbuf(bp); 661 if (DOINGSOFTDEP(vp)) 662 softdep_setup_allocext(ip, lbn, newb, 0, 663 nsize, 0, bp); 664 } 665 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 666 ip->i_flag |= IN_CHANGE; 667 *bpp = bp; 668 return (0); 669 } 670 /* 671 * If the next write will extend the file into a new block, 672 * and the file is currently composed of a fragment 673 * this fragment has to be extended to be a full block. 674 */ 675 lastlbn = lblkno(fs, ip->i_size); 676 if (lastlbn < NDADDR && lastlbn < lbn) { 677 nb = lastlbn; 678 osize = blksize(fs, ip, nb); 679 if (osize < fs->fs_bsize && osize > 0) { 680 UFS_LOCK(ump); 681 error = ffs_realloccg(ip, nb, dp->di_db[nb], 682 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 683 &dp->di_db[0]), osize, (int)fs->fs_bsize, 684 flags, cred, &bp); 685 if (error) 686 return (error); 687 if (DOINGSOFTDEP(vp)) 688 softdep_setup_allocdirect(ip, nb, 689 dbtofsb(fs, bp->b_blkno), 690 dp->di_db[nb], 691 fs->fs_bsize, osize, bp); 692 ip->i_size = smalllblktosize(fs, nb + 1); 693 dp->di_size = ip->i_size; 694 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 695 ip->i_flag |= IN_CHANGE | IN_UPDATE; 696 if (flags & IO_SYNC) 697 bwrite(bp); 698 else 699 bawrite(bp); 700 } 701 } 702 /* 703 * The first NDADDR blocks are direct blocks 704 */ 705 if (lbn < NDADDR) { 706 if (flags & BA_METAONLY) 707 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 708 nb = dp->di_db[lbn]; 709 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 710 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 711 if (error) { 712 brelse(bp); 713 return (error); 714 } 715 bp->b_blkno = fsbtodb(fs, nb); 716 *bpp = bp; 717 return (0); 718 } 719 if (nb != 0) { 720 /* 721 * Consider need to reallocate a fragment. 722 */ 723 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 724 nsize = fragroundup(fs, size); 725 if (nsize <= osize) { 726 error = bread(vp, lbn, osize, NOCRED, &bp); 727 if (error) { 728 brelse(bp); 729 return (error); 730 } 731 bp->b_blkno = fsbtodb(fs, nb); 732 } else { 733 UFS_LOCK(ump); 734 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 735 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 736 &dp->di_db[0]), osize, nsize, flags, 737 cred, &bp); 738 if (error) 739 return (error); 740 if (DOINGSOFTDEP(vp)) 741 softdep_setup_allocdirect(ip, lbn, 742 dbtofsb(fs, bp->b_blkno), nb, 743 nsize, osize, bp); 744 } 745 } else { 746 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 747 nsize = fragroundup(fs, size); 748 else 749 nsize = fs->fs_bsize; 750 UFS_LOCK(ump); 751 error = ffs_alloc(ip, lbn, 752 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 753 &dp->di_db[0]), nsize, flags, cred, &newb); 754 if (error) 755 return (error); 756 bp = getblk(vp, lbn, nsize, 0, 0, 0); 757 bp->b_blkno = fsbtodb(fs, newb); 758 if (flags & BA_CLRBUF) 759 vfs_bio_clrbuf(bp); 760 if (DOINGSOFTDEP(vp)) 761 softdep_setup_allocdirect(ip, lbn, newb, 0, 762 nsize, 0, bp); 763 } 764 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 765 ip->i_flag |= IN_CHANGE | IN_UPDATE; 766 *bpp = bp; 767 return (0); 768 } 769 /* 770 * Determine the number of levels of indirection. 771 */ 772 pref = 0; 773 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 774 return(error); 775 #ifdef INVARIANTS 776 if (num < 1) 777 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 778 #endif 779 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); 780 /* 781 * Fetch the first indirect block allocating if necessary. 782 */ 783 --num; 784 nb = dp->di_ib[indirs[0].in_off]; 785 allocib = NULL; 786 allocblk = allociblk; 787 lbns_remfree = lbns; 788 if (nb == 0) { 789 UFS_LOCK(ump); 790 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 791 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 792 flags, cred, &newb)) != 0) { 793 curthread_pflags_restore(saved_inbdflush); 794 return (error); 795 } 796 pref = newb + fs->fs_frag; 797 nb = newb; 798 *allocblk++ = nb; 799 *lbns_remfree++ = indirs[1].in_lbn; 800 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 801 bp->b_blkno = fsbtodb(fs, nb); 802 vfs_bio_clrbuf(bp); 803 if (DOINGSOFTDEP(vp)) { 804 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 805 newb, 0, fs->fs_bsize, 0, bp); 806 bdwrite(bp); 807 } else { 808 /* 809 * Write synchronously so that indirect blocks 810 * never point at garbage. 811 */ 812 if (DOINGASYNC(vp)) 813 bdwrite(bp); 814 else if ((error = bwrite(bp)) != 0) 815 goto fail; 816 } 817 allocib = &dp->di_ib[indirs[0].in_off]; 818 *allocib = nb; 819 ip->i_flag |= IN_CHANGE | IN_UPDATE; 820 } 821 /* 822 * Fetch through the indirect blocks, allocating as necessary. 823 */ 824 retry: 825 for (i = 1;;) { 826 error = bread(vp, 827 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 828 if (error) { 829 brelse(bp); 830 goto fail; 831 } 832 bap = (ufs2_daddr_t *)bp->b_data; 833 nb = bap[indirs[i].in_off]; 834 if (i == num) 835 break; 836 i += 1; 837 if (nb != 0) { 838 bqrelse(bp); 839 continue; 840 } 841 UFS_LOCK(ump); 842 if (pref == 0) 843 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 844 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 845 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 846 brelse(bp); 847 if (++reclaimed == 1) { 848 UFS_LOCK(ump); 849 softdep_request_cleanup(fs, vp, cred, 850 FLUSH_BLOCKS_WAIT); 851 UFS_UNLOCK(ump); 852 goto retry; 853 } 854 if (ppsratecheck(&lastfail, &curfail, 1)) { 855 ffs_fserr(fs, ip->i_number, "filesystem full"); 856 uprintf("\n%s: write failed, filesystem " 857 "is full\n", fs->fs_fsmnt); 858 } 859 goto fail; 860 } 861 pref = newb + fs->fs_frag; 862 nb = newb; 863 *allocblk++ = nb; 864 *lbns_remfree++ = indirs[i].in_lbn; 865 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 866 nbp->b_blkno = fsbtodb(fs, nb); 867 vfs_bio_clrbuf(nbp); 868 if (DOINGSOFTDEP(vp)) { 869 softdep_setup_allocindir_meta(nbp, ip, bp, 870 indirs[i - 1].in_off, nb); 871 bdwrite(nbp); 872 } else { 873 /* 874 * Write synchronously so that indirect blocks 875 * never point at garbage. 876 */ 877 if ((error = bwrite(nbp)) != 0) { 878 brelse(bp); 879 goto fail; 880 } 881 } 882 bap[indirs[i - 1].in_off] = nb; 883 if (allocib == NULL && unwindidx < 0) 884 unwindidx = i - 1; 885 /* 886 * If required, write synchronously, otherwise use 887 * delayed write. 888 */ 889 if (flags & IO_SYNC) { 890 bwrite(bp); 891 } else { 892 if (bp->b_bufsize == fs->fs_bsize) 893 bp->b_flags |= B_CLUSTEROK; 894 bdwrite(bp); 895 } 896 } 897 /* 898 * If asked only for the indirect block, then return it. 899 */ 900 if (flags & BA_METAONLY) { 901 curthread_pflags_restore(saved_inbdflush); 902 *bpp = bp; 903 return (0); 904 } 905 /* 906 * Get the data block, allocating if necessary. 907 */ 908 if (nb == 0) { 909 UFS_LOCK(ump); 910 if (pref == 0) 911 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, 912 &bap[0]); 913 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 914 flags | IO_BUFLOCKED, cred, &newb); 915 if (error) { 916 brelse(bp); 917 if (++reclaimed == 1) { 918 UFS_LOCK(ump); 919 softdep_request_cleanup(fs, vp, cred, 920 FLUSH_BLOCKS_WAIT); 921 UFS_UNLOCK(ump); 922 goto retry; 923 } 924 if (ppsratecheck(&lastfail, &curfail, 1)) { 925 ffs_fserr(fs, ip->i_number, "filesystem full"); 926 uprintf("\n%s: write failed, filesystem " 927 "is full\n", fs->fs_fsmnt); 928 } 929 goto fail; 930 } 931 nb = newb; 932 *allocblk++ = nb; 933 *lbns_remfree++ = lbn; 934 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 935 nbp->b_blkno = fsbtodb(fs, nb); 936 if (flags & BA_CLRBUF) 937 vfs_bio_clrbuf(nbp); 938 if (DOINGSOFTDEP(vp)) 939 softdep_setup_allocindir_page(ip, lbn, bp, 940 indirs[i].in_off, nb, 0, nbp); 941 bap[indirs[i].in_off] = nb; 942 /* 943 * If required, write synchronously, otherwise use 944 * delayed write. 945 */ 946 if (flags & IO_SYNC) { 947 bwrite(bp); 948 } else { 949 if (bp->b_bufsize == fs->fs_bsize) 950 bp->b_flags |= B_CLUSTEROK; 951 bdwrite(bp); 952 } 953 curthread_pflags_restore(saved_inbdflush); 954 *bpp = nbp; 955 return (0); 956 } 957 brelse(bp); 958 /* 959 * If requested clear invalid portions of the buffer. If we 960 * have to do a read-before-write (typical if BA_CLRBUF is set), 961 * try to do some read-ahead in the sequential case to reduce 962 * the number of I/O transactions. 963 */ 964 if (flags & BA_CLRBUF) { 965 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 966 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 967 error = cluster_read(vp, ip->i_size, lbn, 968 (int)fs->fs_bsize, NOCRED, 969 MAXBSIZE, seqcount, &nbp); 970 } else { 971 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 972 } 973 if (error) { 974 brelse(nbp); 975 goto fail; 976 } 977 } else { 978 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 979 nbp->b_blkno = fsbtodb(fs, nb); 980 } 981 curthread_pflags_restore(saved_inbdflush); 982 *bpp = nbp; 983 return (0); 984 fail: 985 curthread_pflags_restore(saved_inbdflush); 986 /* 987 * If we have failed to allocate any blocks, simply return the error. 988 * This is the usual case and avoids the need to fsync the file. 989 */ 990 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 991 return (error); 992 /* 993 * If we have failed part way through block allocation, we 994 * have to deallocate any indirect blocks that we have allocated. 995 * We have to fsync the file before we start to get rid of all 996 * of its dependencies so that we do not leave them dangling. 997 * We have to sync it at the end so that the soft updates code 998 * does not find any untracked changes. Although this is really 999 * slow, running out of disk space is not expected to be a common 1000 * occurence. The error return from fsync is ignored as we already 1001 * have an error to return to the user. 1002 * 1003 * XXX Still have to journal the free below 1004 */ 1005 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 1006 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 1007 blkp < allocblk; blkp++, lbns_remfree++) { 1008 /* 1009 * We shall not leave the freed blocks on the vnode 1010 * buffer object lists. 1011 */ 1012 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 1013 if (bp != NULL) { 1014 bp->b_flags |= (B_INVAL | B_RELBUF); 1015 bp->b_flags &= ~B_ASYNC; 1016 brelse(bp); 1017 } 1018 deallocated += fs->fs_bsize; 1019 } 1020 if (allocib != NULL) { 1021 *allocib = 0; 1022 } else if (unwindidx >= 0) { 1023 int r; 1024 1025 r = bread(vp, indirs[unwindidx].in_lbn, 1026 (int)fs->fs_bsize, NOCRED, &bp); 1027 if (r) { 1028 panic("Could not unwind indirect block, error %d", r); 1029 brelse(bp); 1030 } else { 1031 bap = (ufs2_daddr_t *)bp->b_data; 1032 bap[indirs[unwindidx].in_off] = 0; 1033 if (flags & IO_SYNC) { 1034 bwrite(bp); 1035 } else { 1036 if (bp->b_bufsize == fs->fs_bsize) 1037 bp->b_flags |= B_CLUSTEROK; 1038 bdwrite(bp); 1039 } 1040 } 1041 } 1042 if (deallocated) { 1043 #ifdef QUOTA 1044 /* 1045 * Restore user's disk quota because allocation failed. 1046 */ 1047 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 1048 #endif 1049 dp->di_blocks -= btodb(deallocated); 1050 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1051 } 1052 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 1053 /* 1054 * After the buffers are invalidated and on-disk pointers are 1055 * cleared, free the blocks. 1056 */ 1057 for (blkp = allociblk; blkp < allocblk; blkp++) { 1058 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 1059 ip->i_number, vp->v_type, NULL); 1060 } 1061 return (error); 1062 } 1063