1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/bio.h> 68 #include <sys/buf.h> 69 #include <sys/lock.h> 70 #include <sys/mount.h> 71 #include <sys/vnode.h> 72 73 #include <ufs/ufs/quota.h> 74 #include <ufs/ufs/inode.h> 75 #include <ufs/ufs/ufs_extern.h> 76 #include <ufs/ufs/extattr.h> 77 #include <ufs/ufs/ufsmount.h> 78 79 #include <ufs/ffs/fs.h> 80 #include <ufs/ffs/ffs_extern.h> 81 82 /* 83 * Balloc defines the structure of filesystem storage 84 * by allocating the physical blocks on a device given 85 * the inode and the logical block number in a file. 86 * This is the allocation strategy for UFS1. Below is 87 * the allocation strategy for UFS2. 88 */ 89 int 90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 91 struct ucred *cred, int flags, struct buf **bpp) 92 { 93 struct inode *ip; 94 struct ufs1_dinode *dp; 95 ufs_lbn_t lbn, lastlbn; 96 struct fs *fs; 97 ufs1_daddr_t nb; 98 struct buf *bp, *nbp; 99 struct ufsmount *ump; 100 struct indir indirs[NIADDR + 2]; 101 int deallocated, osize, nsize, num, i, error; 102 ufs2_daddr_t newb; 103 ufs1_daddr_t *bap, pref; 104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 106 int unwindidx = -1; 107 108 ip = VTOI(vp); 109 dp = ip->i_din1; 110 fs = ip->i_fs; 111 ump = ip->i_ump; 112 lbn = lblkno(fs, startoffset); 113 size = blkoff(fs, startoffset) + size; 114 if (size > fs->fs_bsize) 115 panic("ffs_balloc_ufs1: blk too big"); 116 *bpp = NULL; 117 if (flags & IO_EXT) 118 return (EOPNOTSUPP); 119 if (lbn < 0) 120 return (EFBIG); 121 122 /* 123 * If the next write will extend the file into a new block, 124 * and the file is currently composed of a fragment 125 * this fragment has to be extended to be a full block. 126 */ 127 lastlbn = lblkno(fs, ip->i_size); 128 if (lastlbn < NDADDR && lastlbn < lbn) { 129 nb = lastlbn; 130 osize = blksize(fs, ip, nb); 131 if (osize < fs->fs_bsize && osize > 0) { 132 UFS_LOCK(ump); 133 error = ffs_realloccg(ip, nb, dp->di_db[nb], 134 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 135 &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp); 136 if (error) 137 return (error); 138 if (DOINGSOFTDEP(vp)) 139 softdep_setup_allocdirect(ip, nb, 140 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 141 fs->fs_bsize, osize, bp); 142 ip->i_size = smalllblktosize(fs, nb + 1); 143 dp->di_size = ip->i_size; 144 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 145 ip->i_flag |= IN_CHANGE | IN_UPDATE; 146 if (flags & IO_SYNC) 147 bwrite(bp); 148 else 149 bawrite(bp); 150 } 151 } 152 /* 153 * The first NDADDR blocks are direct blocks 154 */ 155 if (lbn < NDADDR) { 156 if (flags & BA_METAONLY) 157 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 158 nb = dp->di_db[lbn]; 159 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 160 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 161 if (error) { 162 brelse(bp); 163 return (error); 164 } 165 bp->b_blkno = fsbtodb(fs, nb); 166 *bpp = bp; 167 return (0); 168 } 169 if (nb != 0) { 170 /* 171 * Consider need to reallocate a fragment. 172 */ 173 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 174 nsize = fragroundup(fs, size); 175 if (nsize <= osize) { 176 error = bread(vp, lbn, osize, NOCRED, &bp); 177 if (error) { 178 brelse(bp); 179 return (error); 180 } 181 bp->b_blkno = fsbtodb(fs, nb); 182 } else { 183 UFS_LOCK(ump); 184 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 185 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 186 &dp->di_db[0]), osize, nsize, cred, &bp); 187 if (error) 188 return (error); 189 if (DOINGSOFTDEP(vp)) 190 softdep_setup_allocdirect(ip, lbn, 191 dbtofsb(fs, bp->b_blkno), nb, 192 nsize, osize, bp); 193 } 194 } else { 195 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 196 nsize = fragroundup(fs, size); 197 else 198 nsize = fs->fs_bsize; 199 UFS_LOCK(ump); 200 error = ffs_alloc(ip, lbn, 201 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 202 nsize, cred, &newb); 203 if (error) 204 return (error); 205 bp = getblk(vp, lbn, nsize, 0, 0, 0); 206 bp->b_blkno = fsbtodb(fs, newb); 207 if (flags & BA_CLRBUF) 208 vfs_bio_clrbuf(bp); 209 if (DOINGSOFTDEP(vp)) 210 softdep_setup_allocdirect(ip, lbn, newb, 0, 211 nsize, 0, bp); 212 } 213 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 214 ip->i_flag |= IN_CHANGE | IN_UPDATE; 215 *bpp = bp; 216 return (0); 217 } 218 /* 219 * Determine the number of levels of indirection. 220 */ 221 pref = 0; 222 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 223 return(error); 224 #ifdef INVARIANTS 225 if (num < 1) 226 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 227 #endif 228 /* 229 * Fetch the first indirect block allocating if necessary. 230 */ 231 --num; 232 nb = dp->di_ib[indirs[0].in_off]; 233 allocib = NULL; 234 allocblk = allociblk; 235 lbns_remfree = lbns; 236 if (nb == 0) { 237 UFS_LOCK(ump); 238 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 239 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 240 cred, &newb)) != 0) 241 return (error); 242 nb = newb; 243 *allocblk++ = nb; 244 *lbns_remfree++ = indirs[1].in_lbn; 245 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 246 bp->b_blkno = fsbtodb(fs, nb); 247 vfs_bio_clrbuf(bp); 248 if (DOINGSOFTDEP(vp)) { 249 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 250 newb, 0, fs->fs_bsize, 0, bp); 251 bdwrite(bp); 252 } else { 253 /* 254 * Write synchronously so that indirect blocks 255 * never point at garbage. 256 */ 257 if (DOINGASYNC(vp)) 258 bdwrite(bp); 259 else if ((error = bwrite(bp)) != 0) 260 goto fail; 261 } 262 allocib = &dp->di_ib[indirs[0].in_off]; 263 *allocib = nb; 264 ip->i_flag |= IN_CHANGE | IN_UPDATE; 265 } 266 /* 267 * Fetch through the indirect blocks, allocating as necessary. 268 */ 269 for (i = 1;;) { 270 error = bread(vp, 271 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 272 if (error) { 273 brelse(bp); 274 goto fail; 275 } 276 bap = (ufs1_daddr_t *)bp->b_data; 277 nb = bap[indirs[i].in_off]; 278 if (i == num) 279 break; 280 i += 1; 281 if (nb != 0) { 282 bqrelse(bp); 283 continue; 284 } 285 UFS_LOCK(ump); 286 if (pref == 0) 287 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 288 if ((error = 289 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { 290 brelse(bp); 291 goto fail; 292 } 293 nb = newb; 294 *allocblk++ = nb; 295 *lbns_remfree++ = indirs[i].in_lbn; 296 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 297 nbp->b_blkno = fsbtodb(fs, nb); 298 vfs_bio_clrbuf(nbp); 299 if (DOINGSOFTDEP(vp)) { 300 softdep_setup_allocindir_meta(nbp, ip, bp, 301 indirs[i - 1].in_off, nb); 302 bdwrite(nbp); 303 } else { 304 /* 305 * Write synchronously so that indirect blocks 306 * never point at garbage. 307 */ 308 if ((error = bwrite(nbp)) != 0) { 309 brelse(bp); 310 goto fail; 311 } 312 } 313 bap[indirs[i - 1].in_off] = nb; 314 if (allocib == NULL && unwindidx < 0) 315 unwindidx = i - 1; 316 /* 317 * If required, write synchronously, otherwise use 318 * delayed write. 319 */ 320 if (flags & IO_SYNC) { 321 bwrite(bp); 322 } else { 323 if (bp->b_bufsize == fs->fs_bsize) 324 bp->b_flags |= B_CLUSTEROK; 325 bdwrite(bp); 326 } 327 } 328 /* 329 * If asked only for the indirect block, then return it. 330 */ 331 if (flags & BA_METAONLY) { 332 *bpp = bp; 333 return (0); 334 } 335 /* 336 * Get the data block, allocating if necessary. 337 */ 338 if (nb == 0) { 339 UFS_LOCK(ump); 340 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); 341 error = ffs_alloc(ip, 342 lbn, pref, (int)fs->fs_bsize, cred, &newb); 343 if (error) { 344 brelse(bp); 345 goto fail; 346 } 347 nb = newb; 348 *allocblk++ = nb; 349 *lbns_remfree++ = lbn; 350 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 351 nbp->b_blkno = fsbtodb(fs, nb); 352 if (flags & BA_CLRBUF) 353 vfs_bio_clrbuf(nbp); 354 if (DOINGSOFTDEP(vp)) 355 softdep_setup_allocindir_page(ip, lbn, bp, 356 indirs[i].in_off, nb, 0, nbp); 357 bap[indirs[i].in_off] = nb; 358 /* 359 * If required, write synchronously, otherwise use 360 * delayed write. 361 */ 362 if (flags & IO_SYNC) { 363 bwrite(bp); 364 } else { 365 if (bp->b_bufsize == fs->fs_bsize) 366 bp->b_flags |= B_CLUSTEROK; 367 bdwrite(bp); 368 } 369 *bpp = nbp; 370 return (0); 371 } 372 brelse(bp); 373 if (flags & BA_CLRBUF) { 374 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 375 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 376 error = cluster_read(vp, ip->i_size, lbn, 377 (int)fs->fs_bsize, NOCRED, 378 MAXBSIZE, seqcount, &nbp); 379 } else { 380 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 381 } 382 if (error) { 383 brelse(nbp); 384 goto fail; 385 } 386 } else { 387 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 388 nbp->b_blkno = fsbtodb(fs, nb); 389 } 390 *bpp = nbp; 391 return (0); 392 fail: 393 /* 394 * If we have failed to allocate any blocks, simply return the error. 395 * This is the usual case and avoids the need to fsync the file. 396 */ 397 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 398 return (error); 399 /* 400 * If we have failed part way through block allocation, we 401 * have to deallocate any indirect blocks that we have allocated. 402 * We have to fsync the file before we start to get rid of all 403 * of its dependencies so that we do not leave them dangling. 404 * We have to sync it at the end so that the soft updates code 405 * does not find any untracked changes. Although this is really 406 * slow, running out of disk space is not expected to be a common 407 * occurence. The error return from fsync is ignored as we already 408 * have an error to return to the user. 409 */ 410 (void) ffs_syncvnode(vp, MNT_WAIT); 411 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 412 blkp < allocblk; blkp++, lbns_remfree++) { 413 /* 414 * We shall not leave the freed blocks on the vnode 415 * buffer object lists. 416 */ 417 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 418 if (bp != NULL) { 419 bp->b_flags |= (B_INVAL | B_RELBUF); 420 bp->b_flags &= ~B_ASYNC; 421 brelse(bp); 422 } 423 deallocated += fs->fs_bsize; 424 } 425 if (allocib != NULL) { 426 *allocib = 0; 427 } else if (unwindidx >= 0) { 428 int r; 429 430 r = bread(vp, indirs[unwindidx].in_lbn, 431 (int)fs->fs_bsize, NOCRED, &bp); 432 if (r) { 433 panic("Could not unwind indirect block, error %d", r); 434 brelse(bp); 435 } else { 436 bap = (ufs1_daddr_t *)bp->b_data; 437 bap[indirs[unwindidx].in_off] = 0; 438 if (flags & IO_SYNC) { 439 bwrite(bp); 440 } else { 441 if (bp->b_bufsize == fs->fs_bsize) 442 bp->b_flags |= B_CLUSTEROK; 443 bdwrite(bp); 444 } 445 } 446 } 447 if (deallocated) { 448 #ifdef QUOTA 449 /* 450 * Restore user's disk quota because allocation failed. 451 */ 452 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 453 #endif 454 dp->di_blocks -= btodb(deallocated); 455 ip->i_flag |= IN_CHANGE | IN_UPDATE; 456 } 457 (void) ffs_syncvnode(vp, MNT_WAIT); 458 /* 459 * After the buffers are invalidated and on-disk pointers are 460 * cleared, free the blocks. 461 */ 462 for (blkp = allociblk; blkp < allocblk; blkp++) { 463 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 464 ip->i_number); 465 } 466 return (error); 467 } 468 469 /* 470 * Balloc defines the structure of file system storage 471 * by allocating the physical blocks on a device given 472 * the inode and the logical block number in a file. 473 * This is the allocation strategy for UFS2. Above is 474 * the allocation strategy for UFS1. 475 */ 476 int 477 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 478 struct ucred *cred, int flags, struct buf **bpp) 479 { 480 struct inode *ip; 481 struct ufs2_dinode *dp; 482 ufs_lbn_t lbn, lastlbn; 483 struct fs *fs; 484 struct buf *bp, *nbp; 485 struct ufsmount *ump; 486 struct indir indirs[NIADDR + 2]; 487 ufs2_daddr_t nb, newb, *bap, pref; 488 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 489 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 490 int deallocated, osize, nsize, num, i, error; 491 int unwindidx = -1; 492 493 ip = VTOI(vp); 494 dp = ip->i_din2; 495 fs = ip->i_fs; 496 ump = ip->i_ump; 497 lbn = lblkno(fs, startoffset); 498 size = blkoff(fs, startoffset) + size; 499 if (size > fs->fs_bsize) 500 panic("ffs_balloc_ufs2: blk too big"); 501 *bpp = NULL; 502 if (lbn < 0) 503 return (EFBIG); 504 505 /* 506 * Check for allocating external data. 507 */ 508 if (flags & IO_EXT) { 509 if (lbn >= NXADDR) 510 return (EFBIG); 511 /* 512 * If the next write will extend the data into a new block, 513 * and the data is currently composed of a fragment 514 * this fragment has to be extended to be a full block. 515 */ 516 lastlbn = lblkno(fs, dp->di_extsize); 517 if (lastlbn < lbn) { 518 nb = lastlbn; 519 osize = sblksize(fs, dp->di_extsize, nb); 520 if (osize < fs->fs_bsize && osize > 0) { 521 UFS_LOCK(ump); 522 error = ffs_realloccg(ip, -1 - nb, 523 dp->di_extb[nb], 524 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 525 &dp->di_extb[0]), osize, 526 (int)fs->fs_bsize, cred, &bp); 527 if (error) 528 return (error); 529 if (DOINGSOFTDEP(vp)) 530 softdep_setup_allocext(ip, nb, 531 dbtofsb(fs, bp->b_blkno), 532 dp->di_extb[nb], 533 fs->fs_bsize, osize, bp); 534 dp->di_extsize = smalllblktosize(fs, nb + 1); 535 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 536 bp->b_xflags |= BX_ALTDATA; 537 ip->i_flag |= IN_CHANGE | IN_UPDATE; 538 if (flags & IO_SYNC) 539 bwrite(bp); 540 else 541 bawrite(bp); 542 } 543 } 544 /* 545 * All blocks are direct blocks 546 */ 547 if (flags & BA_METAONLY) 548 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 549 nb = dp->di_extb[lbn]; 550 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 551 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); 552 if (error) { 553 brelse(bp); 554 return (error); 555 } 556 bp->b_blkno = fsbtodb(fs, nb); 557 bp->b_xflags |= BX_ALTDATA; 558 *bpp = bp; 559 return (0); 560 } 561 if (nb != 0) { 562 /* 563 * Consider need to reallocate a fragment. 564 */ 565 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 566 nsize = fragroundup(fs, size); 567 if (nsize <= osize) { 568 error = bread(vp, -1 - lbn, osize, NOCRED, &bp); 569 if (error) { 570 brelse(bp); 571 return (error); 572 } 573 bp->b_blkno = fsbtodb(fs, nb); 574 bp->b_xflags |= BX_ALTDATA; 575 } else { 576 UFS_LOCK(ump); 577 error = ffs_realloccg(ip, -1 - lbn, 578 dp->di_extb[lbn], 579 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 580 &dp->di_extb[0]), osize, nsize, cred, &bp); 581 if (error) 582 return (error); 583 bp->b_xflags |= BX_ALTDATA; 584 if (DOINGSOFTDEP(vp)) 585 softdep_setup_allocext(ip, lbn, 586 dbtofsb(fs, bp->b_blkno), nb, 587 nsize, osize, bp); 588 } 589 } else { 590 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 591 nsize = fragroundup(fs, size); 592 else 593 nsize = fs->fs_bsize; 594 UFS_LOCK(ump); 595 error = ffs_alloc(ip, lbn, 596 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 597 nsize, cred, &newb); 598 if (error) 599 return (error); 600 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0); 601 bp->b_blkno = fsbtodb(fs, newb); 602 bp->b_xflags |= BX_ALTDATA; 603 if (flags & BA_CLRBUF) 604 vfs_bio_clrbuf(bp); 605 if (DOINGSOFTDEP(vp)) 606 softdep_setup_allocext(ip, lbn, newb, 0, 607 nsize, 0, bp); 608 } 609 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 610 ip->i_flag |= IN_CHANGE | IN_UPDATE; 611 *bpp = bp; 612 return (0); 613 } 614 /* 615 * If the next write will extend the file into a new block, 616 * and the file is currently composed of a fragment 617 * this fragment has to be extended to be a full block. 618 */ 619 lastlbn = lblkno(fs, ip->i_size); 620 if (lastlbn < NDADDR && lastlbn < lbn) { 621 nb = lastlbn; 622 osize = blksize(fs, ip, nb); 623 if (osize < fs->fs_bsize && osize > 0) { 624 UFS_LOCK(ump); 625 error = ffs_realloccg(ip, nb, dp->di_db[nb], 626 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 627 &dp->di_db[0]), osize, (int)fs->fs_bsize, 628 cred, &bp); 629 if (error) 630 return (error); 631 if (DOINGSOFTDEP(vp)) 632 softdep_setup_allocdirect(ip, nb, 633 dbtofsb(fs, bp->b_blkno), 634 dp->di_db[nb], 635 fs->fs_bsize, osize, bp); 636 ip->i_size = smalllblktosize(fs, nb + 1); 637 dp->di_size = ip->i_size; 638 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 639 ip->i_flag |= IN_CHANGE | IN_UPDATE; 640 if (flags & IO_SYNC) 641 bwrite(bp); 642 else 643 bawrite(bp); 644 } 645 } 646 /* 647 * The first NDADDR blocks are direct blocks 648 */ 649 if (lbn < NDADDR) { 650 if (flags & BA_METAONLY) 651 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 652 nb = dp->di_db[lbn]; 653 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 654 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 655 if (error) { 656 brelse(bp); 657 return (error); 658 } 659 bp->b_blkno = fsbtodb(fs, nb); 660 *bpp = bp; 661 return (0); 662 } 663 if (nb != 0) { 664 /* 665 * Consider need to reallocate a fragment. 666 */ 667 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 668 nsize = fragroundup(fs, size); 669 if (nsize <= osize) { 670 error = bread(vp, lbn, osize, NOCRED, &bp); 671 if (error) { 672 brelse(bp); 673 return (error); 674 } 675 bp->b_blkno = fsbtodb(fs, nb); 676 } else { 677 UFS_LOCK(ump); 678 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 679 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 680 &dp->di_db[0]), osize, nsize, cred, &bp); 681 if (error) 682 return (error); 683 if (DOINGSOFTDEP(vp)) 684 softdep_setup_allocdirect(ip, lbn, 685 dbtofsb(fs, bp->b_blkno), nb, 686 nsize, osize, bp); 687 } 688 } else { 689 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 690 nsize = fragroundup(fs, size); 691 else 692 nsize = fs->fs_bsize; 693 UFS_LOCK(ump); 694 error = ffs_alloc(ip, lbn, 695 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 696 &dp->di_db[0]), nsize, cred, &newb); 697 if (error) 698 return (error); 699 bp = getblk(vp, lbn, nsize, 0, 0, 0); 700 bp->b_blkno = fsbtodb(fs, newb); 701 if (flags & BA_CLRBUF) 702 vfs_bio_clrbuf(bp); 703 if (DOINGSOFTDEP(vp)) 704 softdep_setup_allocdirect(ip, lbn, newb, 0, 705 nsize, 0, bp); 706 } 707 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 708 ip->i_flag |= IN_CHANGE | IN_UPDATE; 709 *bpp = bp; 710 return (0); 711 } 712 /* 713 * Determine the number of levels of indirection. 714 */ 715 pref = 0; 716 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 717 return(error); 718 #ifdef INVARIANTS 719 if (num < 1) 720 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 721 #endif 722 /* 723 * Fetch the first indirect block allocating if necessary. 724 */ 725 --num; 726 nb = dp->di_ib[indirs[0].in_off]; 727 allocib = NULL; 728 allocblk = allociblk; 729 lbns_remfree = lbns; 730 if (nb == 0) { 731 UFS_LOCK(ump); 732 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 733 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 734 cred, &newb)) != 0) 735 return (error); 736 nb = newb; 737 *allocblk++ = nb; 738 *lbns_remfree++ = indirs[1].in_lbn; 739 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 740 bp->b_blkno = fsbtodb(fs, nb); 741 vfs_bio_clrbuf(bp); 742 if (DOINGSOFTDEP(vp)) { 743 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 744 newb, 0, fs->fs_bsize, 0, bp); 745 bdwrite(bp); 746 } else { 747 /* 748 * Write synchronously so that indirect blocks 749 * never point at garbage. 750 */ 751 if (DOINGASYNC(vp)) 752 bdwrite(bp); 753 else if ((error = bwrite(bp)) != 0) 754 goto fail; 755 } 756 allocib = &dp->di_ib[indirs[0].in_off]; 757 *allocib = nb; 758 ip->i_flag |= IN_CHANGE | IN_UPDATE; 759 } 760 /* 761 * Fetch through the indirect blocks, allocating as necessary. 762 */ 763 for (i = 1;;) { 764 error = bread(vp, 765 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 766 if (error) { 767 brelse(bp); 768 goto fail; 769 } 770 bap = (ufs2_daddr_t *)bp->b_data; 771 nb = bap[indirs[i].in_off]; 772 if (i == num) 773 break; 774 i += 1; 775 if (nb != 0) { 776 bqrelse(bp); 777 continue; 778 } 779 UFS_LOCK(ump); 780 if (pref == 0) 781 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 782 if ((error = 783 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { 784 brelse(bp); 785 goto fail; 786 } 787 nb = newb; 788 *allocblk++ = nb; 789 *lbns_remfree++ = indirs[i].in_lbn; 790 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 791 nbp->b_blkno = fsbtodb(fs, nb); 792 vfs_bio_clrbuf(nbp); 793 if (DOINGSOFTDEP(vp)) { 794 softdep_setup_allocindir_meta(nbp, ip, bp, 795 indirs[i - 1].in_off, nb); 796 bdwrite(nbp); 797 } else { 798 /* 799 * Write synchronously so that indirect blocks 800 * never point at garbage. 801 */ 802 if ((error = bwrite(nbp)) != 0) { 803 brelse(bp); 804 goto fail; 805 } 806 } 807 bap[indirs[i - 1].in_off] = nb; 808 if (allocib == NULL && unwindidx < 0) 809 unwindidx = i - 1; 810 /* 811 * If required, write synchronously, otherwise use 812 * delayed write. 813 */ 814 if (flags & IO_SYNC) { 815 bwrite(bp); 816 } else { 817 if (bp->b_bufsize == fs->fs_bsize) 818 bp->b_flags |= B_CLUSTEROK; 819 bdwrite(bp); 820 } 821 } 822 /* 823 * If asked only for the indirect block, then return it. 824 */ 825 if (flags & BA_METAONLY) { 826 *bpp = bp; 827 return (0); 828 } 829 /* 830 * Get the data block, allocating if necessary. 831 */ 832 if (nb == 0) { 833 UFS_LOCK(ump); 834 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); 835 error = ffs_alloc(ip, 836 lbn, pref, (int)fs->fs_bsize, cred, &newb); 837 if (error) { 838 brelse(bp); 839 goto fail; 840 } 841 nb = newb; 842 *allocblk++ = nb; 843 *lbns_remfree++ = lbn; 844 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 845 nbp->b_blkno = fsbtodb(fs, nb); 846 if (flags & BA_CLRBUF) 847 vfs_bio_clrbuf(nbp); 848 if (DOINGSOFTDEP(vp)) 849 softdep_setup_allocindir_page(ip, lbn, bp, 850 indirs[i].in_off, nb, 0, nbp); 851 bap[indirs[i].in_off] = nb; 852 /* 853 * If required, write synchronously, otherwise use 854 * delayed write. 855 */ 856 if (flags & IO_SYNC) { 857 bwrite(bp); 858 } else { 859 if (bp->b_bufsize == fs->fs_bsize) 860 bp->b_flags |= B_CLUSTEROK; 861 bdwrite(bp); 862 } 863 *bpp = nbp; 864 return (0); 865 } 866 brelse(bp); 867 /* 868 * If requested clear invalid portions of the buffer. If we 869 * have to do a read-before-write (typical if BA_CLRBUF is set), 870 * try to do some read-ahead in the sequential case to reduce 871 * the number of I/O transactions. 872 */ 873 if (flags & BA_CLRBUF) { 874 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 875 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 876 error = cluster_read(vp, ip->i_size, lbn, 877 (int)fs->fs_bsize, NOCRED, 878 MAXBSIZE, seqcount, &nbp); 879 } else { 880 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 881 } 882 if (error) { 883 brelse(nbp); 884 goto fail; 885 } 886 } else { 887 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 888 nbp->b_blkno = fsbtodb(fs, nb); 889 } 890 *bpp = nbp; 891 return (0); 892 fail: 893 /* 894 * If we have failed to allocate any blocks, simply return the error. 895 * This is the usual case and avoids the need to fsync the file. 896 */ 897 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 898 return (error); 899 /* 900 * If we have failed part way through block allocation, we 901 * have to deallocate any indirect blocks that we have allocated. 902 * We have to fsync the file before we start to get rid of all 903 * of its dependencies so that we do not leave them dangling. 904 * We have to sync it at the end so that the soft updates code 905 * does not find any untracked changes. Although this is really 906 * slow, running out of disk space is not expected to be a common 907 * occurence. The error return from fsync is ignored as we already 908 * have an error to return to the user. 909 */ 910 (void) ffs_syncvnode(vp, MNT_WAIT); 911 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 912 blkp < allocblk; blkp++, lbns_remfree++) { 913 /* 914 * We shall not leave the freed blocks on the vnode 915 * buffer object lists. 916 */ 917 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 918 if (bp != NULL) { 919 bp->b_flags |= (B_INVAL | B_RELBUF); 920 bp->b_flags &= ~B_ASYNC; 921 brelse(bp); 922 } 923 deallocated += fs->fs_bsize; 924 } 925 if (allocib != NULL) { 926 *allocib = 0; 927 } else if (unwindidx >= 0) { 928 int r; 929 930 r = bread(vp, indirs[unwindidx].in_lbn, 931 (int)fs->fs_bsize, NOCRED, &bp); 932 if (r) { 933 panic("Could not unwind indirect block, error %d", r); 934 brelse(bp); 935 } else { 936 bap = (ufs2_daddr_t *)bp->b_data; 937 bap[indirs[unwindidx].in_off] = 0; 938 if (flags & IO_SYNC) { 939 bwrite(bp); 940 } else { 941 if (bp->b_bufsize == fs->fs_bsize) 942 bp->b_flags |= B_CLUSTEROK; 943 bdwrite(bp); 944 } 945 } 946 } 947 if (deallocated) { 948 #ifdef QUOTA 949 /* 950 * Restore user's disk quota because allocation failed. 951 */ 952 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 953 #endif 954 dp->di_blocks -= btodb(deallocated); 955 ip->i_flag |= IN_CHANGE | IN_UPDATE; 956 } 957 (void) ffs_syncvnode(vp, MNT_WAIT); 958 /* 959 * After the buffers are invalidated and on-disk pointers are 960 * cleared, free the blocks. 961 */ 962 for (blkp = allociblk; blkp < allocblk; blkp++) { 963 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 964 ip->i_number); 965 } 966 return (error); 967 } 968