1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/bio.h> 68 #include <sys/buf.h> 69 #include <sys/lock.h> 70 #include <sys/mount.h> 71 #include <sys/vnode.h> 72 73 #include <ufs/ufs/quota.h> 74 #include <ufs/ufs/inode.h> 75 #include <ufs/ufs/ufs_extern.h> 76 #include <ufs/ufs/extattr.h> 77 #include <ufs/ufs/ufsmount.h> 78 79 #include <ufs/ffs/fs.h> 80 #include <ufs/ffs/ffs_extern.h> 81 82 /* 83 * Balloc defines the structure of filesystem storage 84 * by allocating the physical blocks on a device given 85 * the inode and the logical block number in a file. 86 * This is the allocation strategy for UFS1. Below is 87 * the allocation strategy for UFS2. 88 */ 89 int 90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 91 struct ucred *cred, int flags, struct buf **bpp) 92 { 93 struct inode *ip; 94 struct ufs1_dinode *dp; 95 ufs_lbn_t lbn, lastlbn; 96 struct fs *fs; 97 ufs1_daddr_t nb; 98 struct buf *bp, *nbp; 99 struct ufsmount *ump; 100 struct indir indirs[NIADDR + 2]; 101 int deallocated, osize, nsize, num, i, error; 102 ufs2_daddr_t newb; 103 ufs1_daddr_t *bap, pref; 104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 106 int unwindidx = -1; 107 int saved_inbdflush; 108 static struct timeval lastfail; 109 static int curfail; 110 int reclaimed; 111 112 ip = VTOI(vp); 113 dp = ip->i_din1; 114 fs = ip->i_fs; 115 ump = ip->i_ump; 116 lbn = lblkno(fs, startoffset); 117 size = blkoff(fs, startoffset) + size; 118 reclaimed = 0; 119 if (size > fs->fs_bsize) 120 panic("ffs_balloc_ufs1: blk too big"); 121 *bpp = NULL; 122 if (flags & IO_EXT) 123 return (EOPNOTSUPP); 124 if (lbn < 0) 125 return (EFBIG); 126 127 if (DOINGSOFTDEP(vp)) 128 softdep_prealloc(vp, MNT_WAIT); 129 /* 130 * If the next write will extend the file into a new block, 131 * and the file is currently composed of a fragment 132 * this fragment has to be extended to be a full block. 133 */ 134 lastlbn = lblkno(fs, ip->i_size); 135 if (lastlbn < NDADDR && lastlbn < lbn) { 136 nb = lastlbn; 137 osize = blksize(fs, ip, nb); 138 if (osize < fs->fs_bsize && osize > 0) { 139 UFS_LOCK(ump); 140 error = ffs_realloccg(ip, nb, dp->di_db[nb], 141 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 142 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, 143 cred, &bp); 144 if (error) 145 return (error); 146 if (DOINGSOFTDEP(vp)) 147 softdep_setup_allocdirect(ip, nb, 148 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 149 fs->fs_bsize, osize, bp); 150 ip->i_size = smalllblktosize(fs, nb + 1); 151 dp->di_size = ip->i_size; 152 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 153 ip->i_flag |= IN_CHANGE | IN_UPDATE; 154 if (flags & IO_SYNC) 155 bwrite(bp); 156 else 157 bawrite(bp); 158 } 159 } 160 /* 161 * The first NDADDR blocks are direct blocks 162 */ 163 if (lbn < NDADDR) { 164 if (flags & BA_METAONLY) 165 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 166 nb = dp->di_db[lbn]; 167 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 168 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 169 if (error) { 170 brelse(bp); 171 return (error); 172 } 173 bp->b_blkno = fsbtodb(fs, nb); 174 *bpp = bp; 175 return (0); 176 } 177 if (nb != 0) { 178 /* 179 * Consider need to reallocate a fragment. 180 */ 181 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 182 nsize = fragroundup(fs, size); 183 if (nsize <= osize) { 184 error = bread(vp, lbn, osize, NOCRED, &bp); 185 if (error) { 186 brelse(bp); 187 return (error); 188 } 189 bp->b_blkno = fsbtodb(fs, nb); 190 } else { 191 UFS_LOCK(ump); 192 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 193 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 194 &dp->di_db[0]), osize, nsize, flags, 195 cred, &bp); 196 if (error) 197 return (error); 198 if (DOINGSOFTDEP(vp)) 199 softdep_setup_allocdirect(ip, lbn, 200 dbtofsb(fs, bp->b_blkno), nb, 201 nsize, osize, bp); 202 } 203 } else { 204 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 205 nsize = fragroundup(fs, size); 206 else 207 nsize = fs->fs_bsize; 208 UFS_LOCK(ump); 209 error = ffs_alloc(ip, lbn, 210 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 211 nsize, flags, cred, &newb); 212 if (error) 213 return (error); 214 bp = getblk(vp, lbn, nsize, 0, 0, 0); 215 bp->b_blkno = fsbtodb(fs, newb); 216 if (flags & BA_CLRBUF) 217 vfs_bio_clrbuf(bp); 218 if (DOINGSOFTDEP(vp)) 219 softdep_setup_allocdirect(ip, lbn, newb, 0, 220 nsize, 0, bp); 221 } 222 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 223 ip->i_flag |= IN_CHANGE | IN_UPDATE; 224 *bpp = bp; 225 return (0); 226 } 227 /* 228 * Determine the number of levels of indirection. 229 */ 230 pref = 0; 231 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 232 return(error); 233 #ifdef INVARIANTS 234 if (num < 1) 235 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 236 #endif 237 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); 238 /* 239 * Fetch the first indirect block allocating if necessary. 240 */ 241 --num; 242 nb = dp->di_ib[indirs[0].in_off]; 243 allocib = NULL; 244 allocblk = allociblk; 245 lbns_remfree = lbns; 246 if (nb == 0) { 247 UFS_LOCK(ump); 248 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 249 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 250 flags, cred, &newb)) != 0) { 251 curthread_pflags_restore(saved_inbdflush); 252 return (error); 253 } 254 nb = newb; 255 *allocblk++ = nb; 256 *lbns_remfree++ = indirs[1].in_lbn; 257 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 258 bp->b_blkno = fsbtodb(fs, nb); 259 vfs_bio_clrbuf(bp); 260 if (DOINGSOFTDEP(vp)) { 261 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 262 newb, 0, fs->fs_bsize, 0, bp); 263 bdwrite(bp); 264 } else { 265 /* 266 * Write synchronously so that indirect blocks 267 * never point at garbage. 268 */ 269 if (DOINGASYNC(vp)) 270 bdwrite(bp); 271 else if ((error = bwrite(bp)) != 0) 272 goto fail; 273 } 274 allocib = &dp->di_ib[indirs[0].in_off]; 275 *allocib = nb; 276 ip->i_flag |= IN_CHANGE | IN_UPDATE; 277 } 278 /* 279 * Fetch through the indirect blocks, allocating as necessary. 280 */ 281 retry: 282 for (i = 1;;) { 283 error = bread(vp, 284 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 285 if (error) { 286 brelse(bp); 287 goto fail; 288 } 289 bap = (ufs1_daddr_t *)bp->b_data; 290 nb = bap[indirs[i].in_off]; 291 if (i == num) 292 break; 293 i += 1; 294 if (nb != 0) { 295 bqrelse(bp); 296 continue; 297 } 298 UFS_LOCK(ump); 299 if (pref == 0) 300 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 301 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 302 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 303 brelse(bp); 304 if (++reclaimed == 1) { 305 UFS_LOCK(ump); 306 softdep_request_cleanup(fs, vp, cred, 307 FLUSH_BLOCKS_WAIT); 308 UFS_UNLOCK(ump); 309 goto retry; 310 } 311 if (ppsratecheck(&lastfail, &curfail, 1)) { 312 ffs_fserr(fs, ip->i_number, "filesystem full"); 313 uprintf("\n%s: write failed, filesystem " 314 "is full\n", fs->fs_fsmnt); 315 } 316 goto fail; 317 } 318 nb = newb; 319 *allocblk++ = nb; 320 *lbns_remfree++ = indirs[i].in_lbn; 321 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 322 nbp->b_blkno = fsbtodb(fs, nb); 323 vfs_bio_clrbuf(nbp); 324 if (DOINGSOFTDEP(vp)) { 325 softdep_setup_allocindir_meta(nbp, ip, bp, 326 indirs[i - 1].in_off, nb); 327 bdwrite(nbp); 328 } else { 329 /* 330 * Write synchronously so that indirect blocks 331 * never point at garbage. 332 */ 333 if ((error = bwrite(nbp)) != 0) { 334 brelse(bp); 335 goto fail; 336 } 337 } 338 bap[indirs[i - 1].in_off] = nb; 339 if (allocib == NULL && unwindidx < 0) 340 unwindidx = i - 1; 341 /* 342 * If required, write synchronously, otherwise use 343 * delayed write. 344 */ 345 if (flags & IO_SYNC) { 346 bwrite(bp); 347 } else { 348 if (bp->b_bufsize == fs->fs_bsize) 349 bp->b_flags |= B_CLUSTEROK; 350 bdwrite(bp); 351 } 352 } 353 /* 354 * If asked only for the indirect block, then return it. 355 */ 356 if (flags & BA_METAONLY) { 357 curthread_pflags_restore(saved_inbdflush); 358 *bpp = bp; 359 return (0); 360 } 361 /* 362 * Get the data block, allocating if necessary. 363 */ 364 if (nb == 0) { 365 UFS_LOCK(ump); 366 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); 367 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 368 flags | IO_BUFLOCKED, cred, &newb); 369 if (error) { 370 brelse(bp); 371 if (++reclaimed == 1) { 372 UFS_LOCK(ump); 373 softdep_request_cleanup(fs, vp, cred, 374 FLUSH_BLOCKS_WAIT); 375 UFS_UNLOCK(ump); 376 goto retry; 377 } 378 if (ppsratecheck(&lastfail, &curfail, 1)) { 379 ffs_fserr(fs, ip->i_number, "filesystem full"); 380 uprintf("\n%s: write failed, filesystem " 381 "is full\n", fs->fs_fsmnt); 382 } 383 goto fail; 384 } 385 nb = newb; 386 *allocblk++ = nb; 387 *lbns_remfree++ = lbn; 388 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 389 nbp->b_blkno = fsbtodb(fs, nb); 390 if (flags & BA_CLRBUF) 391 vfs_bio_clrbuf(nbp); 392 if (DOINGSOFTDEP(vp)) 393 softdep_setup_allocindir_page(ip, lbn, bp, 394 indirs[i].in_off, nb, 0, nbp); 395 bap[indirs[i].in_off] = nb; 396 /* 397 * If required, write synchronously, otherwise use 398 * delayed write. 399 */ 400 if (flags & IO_SYNC) { 401 bwrite(bp); 402 } else { 403 if (bp->b_bufsize == fs->fs_bsize) 404 bp->b_flags |= B_CLUSTEROK; 405 bdwrite(bp); 406 } 407 curthread_pflags_restore(saved_inbdflush); 408 *bpp = nbp; 409 return (0); 410 } 411 brelse(bp); 412 if (flags & BA_CLRBUF) { 413 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 414 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 415 error = cluster_read(vp, ip->i_size, lbn, 416 (int)fs->fs_bsize, NOCRED, 417 MAXBSIZE, seqcount, &nbp); 418 } else { 419 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 420 } 421 if (error) { 422 brelse(nbp); 423 goto fail; 424 } 425 } else { 426 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 427 nbp->b_blkno = fsbtodb(fs, nb); 428 } 429 curthread_pflags_restore(saved_inbdflush); 430 *bpp = nbp; 431 return (0); 432 fail: 433 curthread_pflags_restore(saved_inbdflush); 434 /* 435 * If we have failed to allocate any blocks, simply return the error. 436 * This is the usual case and avoids the need to fsync the file. 437 */ 438 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 439 return (error); 440 /* 441 * If we have failed part way through block allocation, we 442 * have to deallocate any indirect blocks that we have allocated. 443 * We have to fsync the file before we start to get rid of all 444 * of its dependencies so that we do not leave them dangling. 445 * We have to sync it at the end so that the soft updates code 446 * does not find any untracked changes. Although this is really 447 * slow, running out of disk space is not expected to be a common 448 * occurence. The error return from fsync is ignored as we already 449 * have an error to return to the user. 450 * 451 * XXX Still have to journal the free below 452 */ 453 (void) ffs_syncvnode(vp, MNT_WAIT); 454 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 455 blkp < allocblk; blkp++, lbns_remfree++) { 456 /* 457 * We shall not leave the freed blocks on the vnode 458 * buffer object lists. 459 */ 460 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 461 if (bp != NULL) { 462 bp->b_flags |= (B_INVAL | B_RELBUF); 463 bp->b_flags &= ~B_ASYNC; 464 brelse(bp); 465 } 466 deallocated += fs->fs_bsize; 467 } 468 if (allocib != NULL) { 469 *allocib = 0; 470 } else if (unwindidx >= 0) { 471 int r; 472 473 r = bread(vp, indirs[unwindidx].in_lbn, 474 (int)fs->fs_bsize, NOCRED, &bp); 475 if (r) { 476 panic("Could not unwind indirect block, error %d", r); 477 brelse(bp); 478 } else { 479 bap = (ufs1_daddr_t *)bp->b_data; 480 bap[indirs[unwindidx].in_off] = 0; 481 if (flags & IO_SYNC) { 482 bwrite(bp); 483 } else { 484 if (bp->b_bufsize == fs->fs_bsize) 485 bp->b_flags |= B_CLUSTEROK; 486 bdwrite(bp); 487 } 488 } 489 } 490 if (deallocated) { 491 #ifdef QUOTA 492 /* 493 * Restore user's disk quota because allocation failed. 494 */ 495 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 496 #endif 497 dp->di_blocks -= btodb(deallocated); 498 ip->i_flag |= IN_CHANGE | IN_UPDATE; 499 } 500 (void) ffs_syncvnode(vp, MNT_WAIT); 501 /* 502 * After the buffers are invalidated and on-disk pointers are 503 * cleared, free the blocks. 504 */ 505 for (blkp = allociblk; blkp < allocblk; blkp++) { 506 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 507 ip->i_number, vp->v_type, NULL); 508 } 509 return (error); 510 } 511 512 /* 513 * Balloc defines the structure of file system storage 514 * by allocating the physical blocks on a device given 515 * the inode and the logical block number in a file. 516 * This is the allocation strategy for UFS2. Above is 517 * the allocation strategy for UFS1. 518 */ 519 int 520 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 521 struct ucred *cred, int flags, struct buf **bpp) 522 { 523 struct inode *ip; 524 struct ufs2_dinode *dp; 525 ufs_lbn_t lbn, lastlbn; 526 struct fs *fs; 527 struct buf *bp, *nbp; 528 struct ufsmount *ump; 529 struct indir indirs[NIADDR + 2]; 530 ufs2_daddr_t nb, newb, *bap, pref; 531 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 532 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 533 int deallocated, osize, nsize, num, i, error; 534 int unwindidx = -1; 535 int saved_inbdflush; 536 static struct timeval lastfail; 537 static int curfail; 538 int reclaimed; 539 540 ip = VTOI(vp); 541 dp = ip->i_din2; 542 fs = ip->i_fs; 543 ump = ip->i_ump; 544 lbn = lblkno(fs, startoffset); 545 size = blkoff(fs, startoffset) + size; 546 reclaimed = 0; 547 if (size > fs->fs_bsize) 548 panic("ffs_balloc_ufs2: blk too big"); 549 *bpp = NULL; 550 if (lbn < 0) 551 return (EFBIG); 552 553 if (DOINGSOFTDEP(vp)) 554 softdep_prealloc(vp, MNT_WAIT); 555 556 /* 557 * Check for allocating external data. 558 */ 559 if (flags & IO_EXT) { 560 if (lbn >= NXADDR) 561 return (EFBIG); 562 /* 563 * If the next write will extend the data into a new block, 564 * and the data is currently composed of a fragment 565 * this fragment has to be extended to be a full block. 566 */ 567 lastlbn = lblkno(fs, dp->di_extsize); 568 if (lastlbn < lbn) { 569 nb = lastlbn; 570 osize = sblksize(fs, dp->di_extsize, nb); 571 if (osize < fs->fs_bsize && osize > 0) { 572 UFS_LOCK(ump); 573 error = ffs_realloccg(ip, -1 - nb, 574 dp->di_extb[nb], 575 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 576 &dp->di_extb[0]), osize, 577 (int)fs->fs_bsize, flags, cred, &bp); 578 if (error) 579 return (error); 580 if (DOINGSOFTDEP(vp)) 581 softdep_setup_allocext(ip, nb, 582 dbtofsb(fs, bp->b_blkno), 583 dp->di_extb[nb], 584 fs->fs_bsize, osize, bp); 585 dp->di_extsize = smalllblktosize(fs, nb + 1); 586 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 587 bp->b_xflags |= BX_ALTDATA; 588 ip->i_flag |= IN_CHANGE; 589 if (flags & IO_SYNC) 590 bwrite(bp); 591 else 592 bawrite(bp); 593 } 594 } 595 /* 596 * All blocks are direct blocks 597 */ 598 if (flags & BA_METAONLY) 599 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 600 nb = dp->di_extb[lbn]; 601 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 602 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); 603 if (error) { 604 brelse(bp); 605 return (error); 606 } 607 bp->b_blkno = fsbtodb(fs, nb); 608 bp->b_xflags |= BX_ALTDATA; 609 *bpp = bp; 610 return (0); 611 } 612 if (nb != 0) { 613 /* 614 * Consider need to reallocate a fragment. 615 */ 616 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 617 nsize = fragroundup(fs, size); 618 if (nsize <= osize) { 619 error = bread(vp, -1 - lbn, osize, NOCRED, &bp); 620 if (error) { 621 brelse(bp); 622 return (error); 623 } 624 bp->b_blkno = fsbtodb(fs, nb); 625 bp->b_xflags |= BX_ALTDATA; 626 } else { 627 UFS_LOCK(ump); 628 error = ffs_realloccg(ip, -1 - lbn, 629 dp->di_extb[lbn], 630 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 631 &dp->di_extb[0]), osize, nsize, flags, 632 cred, &bp); 633 if (error) 634 return (error); 635 bp->b_xflags |= BX_ALTDATA; 636 if (DOINGSOFTDEP(vp)) 637 softdep_setup_allocext(ip, lbn, 638 dbtofsb(fs, bp->b_blkno), nb, 639 nsize, osize, bp); 640 } 641 } else { 642 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 643 nsize = fragroundup(fs, size); 644 else 645 nsize = fs->fs_bsize; 646 UFS_LOCK(ump); 647 error = ffs_alloc(ip, lbn, 648 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 649 nsize, flags, cred, &newb); 650 if (error) 651 return (error); 652 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0); 653 bp->b_blkno = fsbtodb(fs, newb); 654 bp->b_xflags |= BX_ALTDATA; 655 if (flags & BA_CLRBUF) 656 vfs_bio_clrbuf(bp); 657 if (DOINGSOFTDEP(vp)) 658 softdep_setup_allocext(ip, lbn, newb, 0, 659 nsize, 0, bp); 660 } 661 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 662 ip->i_flag |= IN_CHANGE; 663 *bpp = bp; 664 return (0); 665 } 666 /* 667 * If the next write will extend the file into a new block, 668 * and the file is currently composed of a fragment 669 * this fragment has to be extended to be a full block. 670 */ 671 lastlbn = lblkno(fs, ip->i_size); 672 if (lastlbn < NDADDR && lastlbn < lbn) { 673 nb = lastlbn; 674 osize = blksize(fs, ip, nb); 675 if (osize < fs->fs_bsize && osize > 0) { 676 UFS_LOCK(ump); 677 error = ffs_realloccg(ip, nb, dp->di_db[nb], 678 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 679 &dp->di_db[0]), osize, (int)fs->fs_bsize, 680 flags, cred, &bp); 681 if (error) 682 return (error); 683 if (DOINGSOFTDEP(vp)) 684 softdep_setup_allocdirect(ip, nb, 685 dbtofsb(fs, bp->b_blkno), 686 dp->di_db[nb], 687 fs->fs_bsize, osize, bp); 688 ip->i_size = smalllblktosize(fs, nb + 1); 689 dp->di_size = ip->i_size; 690 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 691 ip->i_flag |= IN_CHANGE | IN_UPDATE; 692 if (flags & IO_SYNC) 693 bwrite(bp); 694 else 695 bawrite(bp); 696 } 697 } 698 /* 699 * The first NDADDR blocks are direct blocks 700 */ 701 if (lbn < NDADDR) { 702 if (flags & BA_METAONLY) 703 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 704 nb = dp->di_db[lbn]; 705 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 706 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 707 if (error) { 708 brelse(bp); 709 return (error); 710 } 711 bp->b_blkno = fsbtodb(fs, nb); 712 *bpp = bp; 713 return (0); 714 } 715 if (nb != 0) { 716 /* 717 * Consider need to reallocate a fragment. 718 */ 719 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 720 nsize = fragroundup(fs, size); 721 if (nsize <= osize) { 722 error = bread(vp, lbn, osize, NOCRED, &bp); 723 if (error) { 724 brelse(bp); 725 return (error); 726 } 727 bp->b_blkno = fsbtodb(fs, nb); 728 } else { 729 UFS_LOCK(ump); 730 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 731 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 732 &dp->di_db[0]), osize, nsize, flags, 733 cred, &bp); 734 if (error) 735 return (error); 736 if (DOINGSOFTDEP(vp)) 737 softdep_setup_allocdirect(ip, lbn, 738 dbtofsb(fs, bp->b_blkno), nb, 739 nsize, osize, bp); 740 } 741 } else { 742 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 743 nsize = fragroundup(fs, size); 744 else 745 nsize = fs->fs_bsize; 746 UFS_LOCK(ump); 747 error = ffs_alloc(ip, lbn, 748 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 749 &dp->di_db[0]), nsize, flags, cred, &newb); 750 if (error) 751 return (error); 752 bp = getblk(vp, lbn, nsize, 0, 0, 0); 753 bp->b_blkno = fsbtodb(fs, newb); 754 if (flags & BA_CLRBUF) 755 vfs_bio_clrbuf(bp); 756 if (DOINGSOFTDEP(vp)) 757 softdep_setup_allocdirect(ip, lbn, newb, 0, 758 nsize, 0, bp); 759 } 760 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 761 ip->i_flag |= IN_CHANGE | IN_UPDATE; 762 *bpp = bp; 763 return (0); 764 } 765 /* 766 * Determine the number of levels of indirection. 767 */ 768 pref = 0; 769 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 770 return(error); 771 #ifdef INVARIANTS 772 if (num < 1) 773 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 774 #endif 775 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); 776 /* 777 * Fetch the first indirect block allocating if necessary. 778 */ 779 --num; 780 nb = dp->di_ib[indirs[0].in_off]; 781 allocib = NULL; 782 allocblk = allociblk; 783 lbns_remfree = lbns; 784 if (nb == 0) { 785 UFS_LOCK(ump); 786 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 787 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 788 flags, cred, &newb)) != 0) { 789 curthread_pflags_restore(saved_inbdflush); 790 return (error); 791 } 792 nb = newb; 793 *allocblk++ = nb; 794 *lbns_remfree++ = indirs[1].in_lbn; 795 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 796 bp->b_blkno = fsbtodb(fs, nb); 797 vfs_bio_clrbuf(bp); 798 if (DOINGSOFTDEP(vp)) { 799 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 800 newb, 0, fs->fs_bsize, 0, bp); 801 bdwrite(bp); 802 } else { 803 /* 804 * Write synchronously so that indirect blocks 805 * never point at garbage. 806 */ 807 if (DOINGASYNC(vp)) 808 bdwrite(bp); 809 else if ((error = bwrite(bp)) != 0) 810 goto fail; 811 } 812 allocib = &dp->di_ib[indirs[0].in_off]; 813 *allocib = nb; 814 ip->i_flag |= IN_CHANGE | IN_UPDATE; 815 } 816 /* 817 * Fetch through the indirect blocks, allocating as necessary. 818 */ 819 retry: 820 for (i = 1;;) { 821 error = bread(vp, 822 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 823 if (error) { 824 brelse(bp); 825 goto fail; 826 } 827 bap = (ufs2_daddr_t *)bp->b_data; 828 nb = bap[indirs[i].in_off]; 829 if (i == num) 830 break; 831 i += 1; 832 if (nb != 0) { 833 bqrelse(bp); 834 continue; 835 } 836 UFS_LOCK(ump); 837 if (pref == 0) 838 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 839 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 840 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 841 brelse(bp); 842 if (++reclaimed == 1) { 843 UFS_LOCK(ump); 844 softdep_request_cleanup(fs, vp, cred, 845 FLUSH_BLOCKS_WAIT); 846 UFS_UNLOCK(ump); 847 goto retry; 848 } 849 if (ppsratecheck(&lastfail, &curfail, 1)) { 850 ffs_fserr(fs, ip->i_number, "filesystem full"); 851 uprintf("\n%s: write failed, filesystem " 852 "is full\n", fs->fs_fsmnt); 853 } 854 goto fail; 855 } 856 nb = newb; 857 *allocblk++ = nb; 858 *lbns_remfree++ = indirs[i].in_lbn; 859 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 860 nbp->b_blkno = fsbtodb(fs, nb); 861 vfs_bio_clrbuf(nbp); 862 if (DOINGSOFTDEP(vp)) { 863 softdep_setup_allocindir_meta(nbp, ip, bp, 864 indirs[i - 1].in_off, nb); 865 bdwrite(nbp); 866 } else { 867 /* 868 * Write synchronously so that indirect blocks 869 * never point at garbage. 870 */ 871 if ((error = bwrite(nbp)) != 0) { 872 brelse(bp); 873 goto fail; 874 } 875 } 876 bap[indirs[i - 1].in_off] = nb; 877 if (allocib == NULL && unwindidx < 0) 878 unwindidx = i - 1; 879 /* 880 * If required, write synchronously, otherwise use 881 * delayed write. 882 */ 883 if (flags & IO_SYNC) { 884 bwrite(bp); 885 } else { 886 if (bp->b_bufsize == fs->fs_bsize) 887 bp->b_flags |= B_CLUSTEROK; 888 bdwrite(bp); 889 } 890 } 891 /* 892 * If asked only for the indirect block, then return it. 893 */ 894 if (flags & BA_METAONLY) { 895 curthread_pflags_restore(saved_inbdflush); 896 *bpp = bp; 897 return (0); 898 } 899 /* 900 * Get the data block, allocating if necessary. 901 */ 902 if (nb == 0) { 903 UFS_LOCK(ump); 904 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); 905 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 906 flags | IO_BUFLOCKED, cred, &newb); 907 if (error) { 908 brelse(bp); 909 if (++reclaimed == 1) { 910 UFS_LOCK(ump); 911 softdep_request_cleanup(fs, vp, cred, 912 FLUSH_BLOCKS_WAIT); 913 UFS_UNLOCK(ump); 914 goto retry; 915 } 916 if (ppsratecheck(&lastfail, &curfail, 1)) { 917 ffs_fserr(fs, ip->i_number, "filesystem full"); 918 uprintf("\n%s: write failed, filesystem " 919 "is full\n", fs->fs_fsmnt); 920 } 921 goto fail; 922 } 923 nb = newb; 924 *allocblk++ = nb; 925 *lbns_remfree++ = lbn; 926 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 927 nbp->b_blkno = fsbtodb(fs, nb); 928 if (flags & BA_CLRBUF) 929 vfs_bio_clrbuf(nbp); 930 if (DOINGSOFTDEP(vp)) 931 softdep_setup_allocindir_page(ip, lbn, bp, 932 indirs[i].in_off, nb, 0, nbp); 933 bap[indirs[i].in_off] = nb; 934 /* 935 * If required, write synchronously, otherwise use 936 * delayed write. 937 */ 938 if (flags & IO_SYNC) { 939 bwrite(bp); 940 } else { 941 if (bp->b_bufsize == fs->fs_bsize) 942 bp->b_flags |= B_CLUSTEROK; 943 bdwrite(bp); 944 } 945 curthread_pflags_restore(saved_inbdflush); 946 *bpp = nbp; 947 return (0); 948 } 949 brelse(bp); 950 /* 951 * If requested clear invalid portions of the buffer. If we 952 * have to do a read-before-write (typical if BA_CLRBUF is set), 953 * try to do some read-ahead in the sequential case to reduce 954 * the number of I/O transactions. 955 */ 956 if (flags & BA_CLRBUF) { 957 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 958 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 959 error = cluster_read(vp, ip->i_size, lbn, 960 (int)fs->fs_bsize, NOCRED, 961 MAXBSIZE, seqcount, &nbp); 962 } else { 963 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 964 } 965 if (error) { 966 brelse(nbp); 967 goto fail; 968 } 969 } else { 970 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 971 nbp->b_blkno = fsbtodb(fs, nb); 972 } 973 curthread_pflags_restore(saved_inbdflush); 974 *bpp = nbp; 975 return (0); 976 fail: 977 curthread_pflags_restore(saved_inbdflush); 978 /* 979 * If we have failed to allocate any blocks, simply return the error. 980 * This is the usual case and avoids the need to fsync the file. 981 */ 982 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 983 return (error); 984 /* 985 * If we have failed part way through block allocation, we 986 * have to deallocate any indirect blocks that we have allocated. 987 * We have to fsync the file before we start to get rid of all 988 * of its dependencies so that we do not leave them dangling. 989 * We have to sync it at the end so that the soft updates code 990 * does not find any untracked changes. Although this is really 991 * slow, running out of disk space is not expected to be a common 992 * occurence. The error return from fsync is ignored as we already 993 * have an error to return to the user. 994 * 995 * XXX Still have to journal the free below 996 */ 997 (void) ffs_syncvnode(vp, MNT_WAIT); 998 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 999 blkp < allocblk; blkp++, lbns_remfree++) { 1000 /* 1001 * We shall not leave the freed blocks on the vnode 1002 * buffer object lists. 1003 */ 1004 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 1005 if (bp != NULL) { 1006 bp->b_flags |= (B_INVAL | B_RELBUF); 1007 bp->b_flags &= ~B_ASYNC; 1008 brelse(bp); 1009 } 1010 deallocated += fs->fs_bsize; 1011 } 1012 if (allocib != NULL) { 1013 *allocib = 0; 1014 } else if (unwindidx >= 0) { 1015 int r; 1016 1017 r = bread(vp, indirs[unwindidx].in_lbn, 1018 (int)fs->fs_bsize, NOCRED, &bp); 1019 if (r) { 1020 panic("Could not unwind indirect block, error %d", r); 1021 brelse(bp); 1022 } else { 1023 bap = (ufs2_daddr_t *)bp->b_data; 1024 bap[indirs[unwindidx].in_off] = 0; 1025 if (flags & IO_SYNC) { 1026 bwrite(bp); 1027 } else { 1028 if (bp->b_bufsize == fs->fs_bsize) 1029 bp->b_flags |= B_CLUSTEROK; 1030 bdwrite(bp); 1031 } 1032 } 1033 } 1034 if (deallocated) { 1035 #ifdef QUOTA 1036 /* 1037 * Restore user's disk quota because allocation failed. 1038 */ 1039 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 1040 #endif 1041 dp->di_blocks -= btodb(deallocated); 1042 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1043 } 1044 (void) ffs_syncvnode(vp, MNT_WAIT); 1045 /* 1046 * After the buffers are invalidated and on-disk pointers are 1047 * cleared, free the blocks. 1048 */ 1049 for (blkp = allociblk; blkp < allocblk; blkp++) { 1050 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 1051 ip->i_number, vp->v_type, NULL); 1052 } 1053 return (error); 1054 } 1055