1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/bio.h> 68 #include <sys/buf.h> 69 #include <sys/lock.h> 70 #include <sys/mount.h> 71 #include <sys/vnode.h> 72 73 #include <ufs/ufs/quota.h> 74 #include <ufs/ufs/inode.h> 75 #include <ufs/ufs/ufs_extern.h> 76 #include <ufs/ufs/extattr.h> 77 #include <ufs/ufs/ufsmount.h> 78 79 #include <ufs/ffs/fs.h> 80 #include <ufs/ffs/ffs_extern.h> 81 82 /* 83 * Balloc defines the structure of filesystem storage 84 * by allocating the physical blocks on a device given 85 * the inode and the logical block number in a file. 86 * This is the allocation strategy for UFS1. Below is 87 * the allocation strategy for UFS2. 88 */ 89 int 90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 91 struct ucred *cred, int flags, struct buf **bpp) 92 { 93 struct inode *ip; 94 struct ufs1_dinode *dp; 95 ufs_lbn_t lbn, lastlbn; 96 struct fs *fs; 97 ufs1_daddr_t nb; 98 struct buf *bp, *nbp; 99 struct ufsmount *ump; 100 struct indir indirs[NIADDR + 2]; 101 int deallocated, osize, nsize, num, i, error; 102 ufs2_daddr_t newb; 103 ufs1_daddr_t *bap, pref; 104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 106 int unwindidx = -1; 107 int saved_inbdflush; 108 109 ip = VTOI(vp); 110 dp = ip->i_din1; 111 fs = ip->i_fs; 112 ump = ip->i_ump; 113 lbn = lblkno(fs, startoffset); 114 size = blkoff(fs, startoffset) + size; 115 if (size > fs->fs_bsize) 116 panic("ffs_balloc_ufs1: blk too big"); 117 *bpp = NULL; 118 if (flags & IO_EXT) 119 return (EOPNOTSUPP); 120 if (lbn < 0) 121 return (EFBIG); 122 123 if (DOINGSOFTDEP(vp)) 124 softdep_prealloc(vp, MNT_WAIT); 125 /* 126 * If the next write will extend the file into a new block, 127 * and the file is currently composed of a fragment 128 * this fragment has to be extended to be a full block. 129 */ 130 lastlbn = lblkno(fs, ip->i_size); 131 if (lastlbn < NDADDR && lastlbn < lbn) { 132 nb = lastlbn; 133 osize = blksize(fs, ip, nb); 134 if (osize < fs->fs_bsize && osize > 0) { 135 UFS_LOCK(ump); 136 error = ffs_realloccg(ip, nb, dp->di_db[nb], 137 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 138 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, 139 cred, &bp); 140 if (error) 141 return (error); 142 if (DOINGSOFTDEP(vp)) 143 softdep_setup_allocdirect(ip, nb, 144 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 145 fs->fs_bsize, osize, bp); 146 ip->i_size = smalllblktosize(fs, nb + 1); 147 dp->di_size = ip->i_size; 148 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 149 ip->i_flag |= IN_CHANGE | IN_UPDATE; 150 if (flags & IO_SYNC) 151 bwrite(bp); 152 else 153 bawrite(bp); 154 } 155 } 156 /* 157 * The first NDADDR blocks are direct blocks 158 */ 159 if (lbn < NDADDR) { 160 if (flags & BA_METAONLY) 161 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 162 nb = dp->di_db[lbn]; 163 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 164 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 165 if (error) { 166 brelse(bp); 167 return (error); 168 } 169 bp->b_blkno = fsbtodb(fs, nb); 170 *bpp = bp; 171 return (0); 172 } 173 if (nb != 0) { 174 /* 175 * Consider need to reallocate a fragment. 176 */ 177 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 178 nsize = fragroundup(fs, size); 179 if (nsize <= osize) { 180 error = bread(vp, lbn, osize, NOCRED, &bp); 181 if (error) { 182 brelse(bp); 183 return (error); 184 } 185 bp->b_blkno = fsbtodb(fs, nb); 186 } else { 187 UFS_LOCK(ump); 188 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 189 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 190 &dp->di_db[0]), osize, nsize, flags, 191 cred, &bp); 192 if (error) 193 return (error); 194 if (DOINGSOFTDEP(vp)) 195 softdep_setup_allocdirect(ip, lbn, 196 dbtofsb(fs, bp->b_blkno), nb, 197 nsize, osize, bp); 198 } 199 } else { 200 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 201 nsize = fragroundup(fs, size); 202 else 203 nsize = fs->fs_bsize; 204 UFS_LOCK(ump); 205 error = ffs_alloc(ip, lbn, 206 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 207 nsize, flags, cred, &newb); 208 if (error) 209 return (error); 210 bp = getblk(vp, lbn, nsize, 0, 0, 0); 211 bp->b_blkno = fsbtodb(fs, newb); 212 if (flags & BA_CLRBUF) 213 vfs_bio_clrbuf(bp); 214 if (DOINGSOFTDEP(vp)) 215 softdep_setup_allocdirect(ip, lbn, newb, 0, 216 nsize, 0, bp); 217 } 218 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 219 ip->i_flag |= IN_CHANGE | IN_UPDATE; 220 *bpp = bp; 221 return (0); 222 } 223 /* 224 * Determine the number of levels of indirection. 225 */ 226 pref = 0; 227 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 228 return(error); 229 #ifdef INVARIANTS 230 if (num < 1) 231 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 232 #endif 233 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags & 234 TDP_INBDFLUSH); 235 curthread->td_pflags |= TDP_INBDFLUSH; 236 /* 237 * Fetch the first indirect block allocating if necessary. 238 */ 239 --num; 240 nb = dp->di_ib[indirs[0].in_off]; 241 allocib = NULL; 242 allocblk = allociblk; 243 lbns_remfree = lbns; 244 if (nb == 0) { 245 UFS_LOCK(ump); 246 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 247 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 248 flags, cred, &newb)) != 0) { 249 curthread->td_pflags &= saved_inbdflush; 250 return (error); 251 } 252 nb = newb; 253 *allocblk++ = nb; 254 *lbns_remfree++ = indirs[1].in_lbn; 255 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 256 bp->b_blkno = fsbtodb(fs, nb); 257 vfs_bio_clrbuf(bp); 258 if (DOINGSOFTDEP(vp)) { 259 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 260 newb, 0, fs->fs_bsize, 0, bp); 261 bdwrite(bp); 262 } else { 263 /* 264 * Write synchronously so that indirect blocks 265 * never point at garbage. 266 */ 267 if (DOINGASYNC(vp)) 268 bdwrite(bp); 269 else if ((error = bwrite(bp)) != 0) 270 goto fail; 271 } 272 allocib = &dp->di_ib[indirs[0].in_off]; 273 *allocib = nb; 274 ip->i_flag |= IN_CHANGE | IN_UPDATE; 275 } 276 /* 277 * Fetch through the indirect blocks, allocating as necessary. 278 */ 279 for (i = 1;;) { 280 error = bread(vp, 281 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 282 if (error) { 283 brelse(bp); 284 goto fail; 285 } 286 bap = (ufs1_daddr_t *)bp->b_data; 287 nb = bap[indirs[i].in_off]; 288 if (i == num) 289 break; 290 i += 1; 291 if (nb != 0) { 292 bqrelse(bp); 293 continue; 294 } 295 UFS_LOCK(ump); 296 if (pref == 0) 297 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 298 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 299 flags, cred, &newb)) != 0) { 300 brelse(bp); 301 goto fail; 302 } 303 nb = newb; 304 *allocblk++ = nb; 305 *lbns_remfree++ = indirs[i].in_lbn; 306 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 307 nbp->b_blkno = fsbtodb(fs, nb); 308 vfs_bio_clrbuf(nbp); 309 if (DOINGSOFTDEP(vp)) { 310 softdep_setup_allocindir_meta(nbp, ip, bp, 311 indirs[i - 1].in_off, nb); 312 bdwrite(nbp); 313 } else { 314 /* 315 * Write synchronously so that indirect blocks 316 * never point at garbage. 317 */ 318 if ((error = bwrite(nbp)) != 0) { 319 brelse(bp); 320 goto fail; 321 } 322 } 323 bap[indirs[i - 1].in_off] = nb; 324 if (allocib == NULL && unwindidx < 0) 325 unwindidx = i - 1; 326 /* 327 * If required, write synchronously, otherwise use 328 * delayed write. 329 */ 330 if (flags & IO_SYNC) { 331 bwrite(bp); 332 } else { 333 if (bp->b_bufsize == fs->fs_bsize) 334 bp->b_flags |= B_CLUSTEROK; 335 bdwrite(bp); 336 } 337 } 338 /* 339 * If asked only for the indirect block, then return it. 340 */ 341 if (flags & BA_METAONLY) { 342 curthread->td_pflags &= saved_inbdflush; 343 *bpp = bp; 344 return (0); 345 } 346 /* 347 * Get the data block, allocating if necessary. 348 */ 349 if (nb == 0) { 350 UFS_LOCK(ump); 351 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); 352 error = ffs_alloc(ip, 353 lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); 354 if (error) { 355 brelse(bp); 356 goto fail; 357 } 358 nb = newb; 359 *allocblk++ = nb; 360 *lbns_remfree++ = lbn; 361 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 362 nbp->b_blkno = fsbtodb(fs, nb); 363 if (flags & BA_CLRBUF) 364 vfs_bio_clrbuf(nbp); 365 if (DOINGSOFTDEP(vp)) 366 softdep_setup_allocindir_page(ip, lbn, bp, 367 indirs[i].in_off, nb, 0, nbp); 368 bap[indirs[i].in_off] = nb; 369 /* 370 * If required, write synchronously, otherwise use 371 * delayed write. 372 */ 373 if (flags & IO_SYNC) { 374 bwrite(bp); 375 } else { 376 if (bp->b_bufsize == fs->fs_bsize) 377 bp->b_flags |= B_CLUSTEROK; 378 bdwrite(bp); 379 } 380 curthread->td_pflags &= saved_inbdflush; 381 *bpp = nbp; 382 return (0); 383 } 384 brelse(bp); 385 if (flags & BA_CLRBUF) { 386 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 387 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 388 error = cluster_read(vp, ip->i_size, lbn, 389 (int)fs->fs_bsize, NOCRED, 390 MAXBSIZE, seqcount, &nbp); 391 } else { 392 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 393 } 394 if (error) { 395 brelse(nbp); 396 goto fail; 397 } 398 } else { 399 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 400 nbp->b_blkno = fsbtodb(fs, nb); 401 } 402 curthread->td_pflags &= saved_inbdflush; 403 *bpp = nbp; 404 return (0); 405 fail: 406 curthread->td_pflags &= saved_inbdflush; 407 /* 408 * If we have failed to allocate any blocks, simply return the error. 409 * This is the usual case and avoids the need to fsync the file. 410 */ 411 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 412 return (error); 413 /* 414 * If we have failed part way through block allocation, we 415 * have to deallocate any indirect blocks that we have allocated. 416 * We have to fsync the file before we start to get rid of all 417 * of its dependencies so that we do not leave them dangling. 418 * We have to sync it at the end so that the soft updates code 419 * does not find any untracked changes. Although this is really 420 * slow, running out of disk space is not expected to be a common 421 * occurence. The error return from fsync is ignored as we already 422 * have an error to return to the user. 423 * 424 * XXX Still have to journal the free below 425 */ 426 (void) ffs_syncvnode(vp, MNT_WAIT); 427 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 428 blkp < allocblk; blkp++, lbns_remfree++) { 429 /* 430 * We shall not leave the freed blocks on the vnode 431 * buffer object lists. 432 */ 433 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 434 if (bp != NULL) { 435 bp->b_flags |= (B_INVAL | B_RELBUF); 436 bp->b_flags &= ~B_ASYNC; 437 brelse(bp); 438 } 439 deallocated += fs->fs_bsize; 440 } 441 if (allocib != NULL) { 442 *allocib = 0; 443 } else if (unwindidx >= 0) { 444 int r; 445 446 r = bread(vp, indirs[unwindidx].in_lbn, 447 (int)fs->fs_bsize, NOCRED, &bp); 448 if (r) { 449 panic("Could not unwind indirect block, error %d", r); 450 brelse(bp); 451 } else { 452 bap = (ufs1_daddr_t *)bp->b_data; 453 bap[indirs[unwindidx].in_off] = 0; 454 if (flags & IO_SYNC) { 455 bwrite(bp); 456 } else { 457 if (bp->b_bufsize == fs->fs_bsize) 458 bp->b_flags |= B_CLUSTEROK; 459 bdwrite(bp); 460 } 461 } 462 } 463 if (deallocated) { 464 #ifdef QUOTA 465 /* 466 * Restore user's disk quota because allocation failed. 467 */ 468 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 469 #endif 470 dp->di_blocks -= btodb(deallocated); 471 ip->i_flag |= IN_CHANGE | IN_UPDATE; 472 } 473 (void) ffs_syncvnode(vp, MNT_WAIT); 474 /* 475 * After the buffers are invalidated and on-disk pointers are 476 * cleared, free the blocks. 477 */ 478 for (blkp = allociblk; blkp < allocblk; blkp++) { 479 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 480 ip->i_number, NULL); 481 } 482 return (error); 483 } 484 485 /* 486 * Balloc defines the structure of file system storage 487 * by allocating the physical blocks on a device given 488 * the inode and the logical block number in a file. 489 * This is the allocation strategy for UFS2. Above is 490 * the allocation strategy for UFS1. 491 */ 492 int 493 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 494 struct ucred *cred, int flags, struct buf **bpp) 495 { 496 struct inode *ip; 497 struct ufs2_dinode *dp; 498 ufs_lbn_t lbn, lastlbn; 499 struct fs *fs; 500 struct buf *bp, *nbp; 501 struct ufsmount *ump; 502 struct indir indirs[NIADDR + 2]; 503 ufs2_daddr_t nb, newb, *bap, pref; 504 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 505 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 506 int deallocated, osize, nsize, num, i, error; 507 int unwindidx = -1; 508 int saved_inbdflush; 509 510 ip = VTOI(vp); 511 dp = ip->i_din2; 512 fs = ip->i_fs; 513 ump = ip->i_ump; 514 lbn = lblkno(fs, startoffset); 515 size = blkoff(fs, startoffset) + size; 516 if (size > fs->fs_bsize) 517 panic("ffs_balloc_ufs2: blk too big"); 518 *bpp = NULL; 519 if (lbn < 0) 520 return (EFBIG); 521 522 if (DOINGSOFTDEP(vp)) 523 softdep_prealloc(vp, MNT_WAIT); 524 525 /* 526 * Check for allocating external data. 527 */ 528 if (flags & IO_EXT) { 529 if (lbn >= NXADDR) 530 return (EFBIG); 531 /* 532 * If the next write will extend the data into a new block, 533 * and the data is currently composed of a fragment 534 * this fragment has to be extended to be a full block. 535 */ 536 lastlbn = lblkno(fs, dp->di_extsize); 537 if (lastlbn < lbn) { 538 nb = lastlbn; 539 osize = sblksize(fs, dp->di_extsize, nb); 540 if (osize < fs->fs_bsize && osize > 0) { 541 UFS_LOCK(ump); 542 error = ffs_realloccg(ip, -1 - nb, 543 dp->di_extb[nb], 544 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 545 &dp->di_extb[0]), osize, 546 (int)fs->fs_bsize, flags, cred, &bp); 547 if (error) 548 return (error); 549 if (DOINGSOFTDEP(vp)) 550 softdep_setup_allocext(ip, nb, 551 dbtofsb(fs, bp->b_blkno), 552 dp->di_extb[nb], 553 fs->fs_bsize, osize, bp); 554 dp->di_extsize = smalllblktosize(fs, nb + 1); 555 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 556 bp->b_xflags |= BX_ALTDATA; 557 ip->i_flag |= IN_CHANGE; 558 if (flags & IO_SYNC) 559 bwrite(bp); 560 else 561 bawrite(bp); 562 } 563 } 564 /* 565 * All blocks are direct blocks 566 */ 567 if (flags & BA_METAONLY) 568 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 569 nb = dp->di_extb[lbn]; 570 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 571 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); 572 if (error) { 573 brelse(bp); 574 return (error); 575 } 576 bp->b_blkno = fsbtodb(fs, nb); 577 bp->b_xflags |= BX_ALTDATA; 578 *bpp = bp; 579 return (0); 580 } 581 if (nb != 0) { 582 /* 583 * Consider need to reallocate a fragment. 584 */ 585 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 586 nsize = fragroundup(fs, size); 587 if (nsize <= osize) { 588 error = bread(vp, -1 - lbn, osize, NOCRED, &bp); 589 if (error) { 590 brelse(bp); 591 return (error); 592 } 593 bp->b_blkno = fsbtodb(fs, nb); 594 bp->b_xflags |= BX_ALTDATA; 595 } else { 596 UFS_LOCK(ump); 597 error = ffs_realloccg(ip, -1 - lbn, 598 dp->di_extb[lbn], 599 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 600 &dp->di_extb[0]), osize, nsize, flags, 601 cred, &bp); 602 if (error) 603 return (error); 604 bp->b_xflags |= BX_ALTDATA; 605 if (DOINGSOFTDEP(vp)) 606 softdep_setup_allocext(ip, lbn, 607 dbtofsb(fs, bp->b_blkno), nb, 608 nsize, osize, bp); 609 } 610 } else { 611 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 612 nsize = fragroundup(fs, size); 613 else 614 nsize = fs->fs_bsize; 615 UFS_LOCK(ump); 616 error = ffs_alloc(ip, lbn, 617 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 618 nsize, flags, cred, &newb); 619 if (error) 620 return (error); 621 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0); 622 bp->b_blkno = fsbtodb(fs, newb); 623 bp->b_xflags |= BX_ALTDATA; 624 if (flags & BA_CLRBUF) 625 vfs_bio_clrbuf(bp); 626 if (DOINGSOFTDEP(vp)) 627 softdep_setup_allocext(ip, lbn, newb, 0, 628 nsize, 0, bp); 629 } 630 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 631 ip->i_flag |= IN_CHANGE; 632 *bpp = bp; 633 return (0); 634 } 635 /* 636 * If the next write will extend the file into a new block, 637 * and the file is currently composed of a fragment 638 * this fragment has to be extended to be a full block. 639 */ 640 lastlbn = lblkno(fs, ip->i_size); 641 if (lastlbn < NDADDR && lastlbn < lbn) { 642 nb = lastlbn; 643 osize = blksize(fs, ip, nb); 644 if (osize < fs->fs_bsize && osize > 0) { 645 UFS_LOCK(ump); 646 error = ffs_realloccg(ip, nb, dp->di_db[nb], 647 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 648 &dp->di_db[0]), osize, (int)fs->fs_bsize, 649 flags, cred, &bp); 650 if (error) 651 return (error); 652 if (DOINGSOFTDEP(vp)) 653 softdep_setup_allocdirect(ip, nb, 654 dbtofsb(fs, bp->b_blkno), 655 dp->di_db[nb], 656 fs->fs_bsize, osize, bp); 657 ip->i_size = smalllblktosize(fs, nb + 1); 658 dp->di_size = ip->i_size; 659 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 660 ip->i_flag |= IN_CHANGE | IN_UPDATE; 661 if (flags & IO_SYNC) 662 bwrite(bp); 663 else 664 bawrite(bp); 665 } 666 } 667 /* 668 * The first NDADDR blocks are direct blocks 669 */ 670 if (lbn < NDADDR) { 671 if (flags & BA_METAONLY) 672 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 673 nb = dp->di_db[lbn]; 674 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 675 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 676 if (error) { 677 brelse(bp); 678 return (error); 679 } 680 bp->b_blkno = fsbtodb(fs, nb); 681 *bpp = bp; 682 return (0); 683 } 684 if (nb != 0) { 685 /* 686 * Consider need to reallocate a fragment. 687 */ 688 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 689 nsize = fragroundup(fs, size); 690 if (nsize <= osize) { 691 error = bread(vp, lbn, osize, NOCRED, &bp); 692 if (error) { 693 brelse(bp); 694 return (error); 695 } 696 bp->b_blkno = fsbtodb(fs, nb); 697 } else { 698 UFS_LOCK(ump); 699 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 700 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 701 &dp->di_db[0]), osize, nsize, flags, 702 cred, &bp); 703 if (error) 704 return (error); 705 if (DOINGSOFTDEP(vp)) 706 softdep_setup_allocdirect(ip, lbn, 707 dbtofsb(fs, bp->b_blkno), nb, 708 nsize, osize, bp); 709 } 710 } else { 711 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 712 nsize = fragroundup(fs, size); 713 else 714 nsize = fs->fs_bsize; 715 UFS_LOCK(ump); 716 error = ffs_alloc(ip, lbn, 717 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 718 &dp->di_db[0]), nsize, flags, cred, &newb); 719 if (error) 720 return (error); 721 bp = getblk(vp, lbn, nsize, 0, 0, 0); 722 bp->b_blkno = fsbtodb(fs, newb); 723 if (flags & BA_CLRBUF) 724 vfs_bio_clrbuf(bp); 725 if (DOINGSOFTDEP(vp)) 726 softdep_setup_allocdirect(ip, lbn, newb, 0, 727 nsize, 0, bp); 728 } 729 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 730 ip->i_flag |= IN_CHANGE | IN_UPDATE; 731 *bpp = bp; 732 return (0); 733 } 734 /* 735 * Determine the number of levels of indirection. 736 */ 737 pref = 0; 738 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 739 return(error); 740 #ifdef INVARIANTS 741 if (num < 1) 742 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 743 #endif 744 saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags & 745 TDP_INBDFLUSH); 746 curthread->td_pflags |= TDP_INBDFLUSH; 747 /* 748 * Fetch the first indirect block allocating if necessary. 749 */ 750 --num; 751 nb = dp->di_ib[indirs[0].in_off]; 752 allocib = NULL; 753 allocblk = allociblk; 754 lbns_remfree = lbns; 755 if (nb == 0) { 756 UFS_LOCK(ump); 757 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 758 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 759 flags, cred, &newb)) != 0) { 760 curthread->td_pflags &= saved_inbdflush; 761 return (error); 762 } 763 nb = newb; 764 *allocblk++ = nb; 765 *lbns_remfree++ = indirs[1].in_lbn; 766 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 767 bp->b_blkno = fsbtodb(fs, nb); 768 vfs_bio_clrbuf(bp); 769 if (DOINGSOFTDEP(vp)) { 770 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 771 newb, 0, fs->fs_bsize, 0, bp); 772 bdwrite(bp); 773 } else { 774 /* 775 * Write synchronously so that indirect blocks 776 * never point at garbage. 777 */ 778 if (DOINGASYNC(vp)) 779 bdwrite(bp); 780 else if ((error = bwrite(bp)) != 0) 781 goto fail; 782 } 783 allocib = &dp->di_ib[indirs[0].in_off]; 784 *allocib = nb; 785 ip->i_flag |= IN_CHANGE | IN_UPDATE; 786 } 787 /* 788 * Fetch through the indirect blocks, allocating as necessary. 789 */ 790 for (i = 1;;) { 791 error = bread(vp, 792 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 793 if (error) { 794 brelse(bp); 795 goto fail; 796 } 797 bap = (ufs2_daddr_t *)bp->b_data; 798 nb = bap[indirs[i].in_off]; 799 if (i == num) 800 break; 801 i += 1; 802 if (nb != 0) { 803 bqrelse(bp); 804 continue; 805 } 806 UFS_LOCK(ump); 807 if (pref == 0) 808 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 809 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 810 flags, cred, &newb)) != 0) { 811 brelse(bp); 812 goto fail; 813 } 814 nb = newb; 815 *allocblk++ = nb; 816 *lbns_remfree++ = indirs[i].in_lbn; 817 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 818 nbp->b_blkno = fsbtodb(fs, nb); 819 vfs_bio_clrbuf(nbp); 820 if (DOINGSOFTDEP(vp)) { 821 softdep_setup_allocindir_meta(nbp, ip, bp, 822 indirs[i - 1].in_off, nb); 823 bdwrite(nbp); 824 } else { 825 /* 826 * Write synchronously so that indirect blocks 827 * never point at garbage. 828 */ 829 if ((error = bwrite(nbp)) != 0) { 830 brelse(bp); 831 goto fail; 832 } 833 } 834 bap[indirs[i - 1].in_off] = nb; 835 if (allocib == NULL && unwindidx < 0) 836 unwindidx = i - 1; 837 /* 838 * If required, write synchronously, otherwise use 839 * delayed write. 840 */ 841 if (flags & IO_SYNC) { 842 bwrite(bp); 843 } else { 844 if (bp->b_bufsize == fs->fs_bsize) 845 bp->b_flags |= B_CLUSTEROK; 846 bdwrite(bp); 847 } 848 } 849 /* 850 * If asked only for the indirect block, then return it. 851 */ 852 if (flags & BA_METAONLY) { 853 curthread->td_pflags &= saved_inbdflush; 854 *bpp = bp; 855 return (0); 856 } 857 /* 858 * Get the data block, allocating if necessary. 859 */ 860 if (nb == 0) { 861 UFS_LOCK(ump); 862 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); 863 error = ffs_alloc(ip, 864 lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); 865 if (error) { 866 brelse(bp); 867 goto fail; 868 } 869 nb = newb; 870 *allocblk++ = nb; 871 *lbns_remfree++ = lbn; 872 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 873 nbp->b_blkno = fsbtodb(fs, nb); 874 if (flags & BA_CLRBUF) 875 vfs_bio_clrbuf(nbp); 876 if (DOINGSOFTDEP(vp)) 877 softdep_setup_allocindir_page(ip, lbn, bp, 878 indirs[i].in_off, nb, 0, nbp); 879 bap[indirs[i].in_off] = nb; 880 /* 881 * If required, write synchronously, otherwise use 882 * delayed write. 883 */ 884 if (flags & IO_SYNC) { 885 bwrite(bp); 886 } else { 887 if (bp->b_bufsize == fs->fs_bsize) 888 bp->b_flags |= B_CLUSTEROK; 889 bdwrite(bp); 890 } 891 curthread->td_pflags &= saved_inbdflush; 892 *bpp = nbp; 893 return (0); 894 } 895 brelse(bp); 896 /* 897 * If requested clear invalid portions of the buffer. If we 898 * have to do a read-before-write (typical if BA_CLRBUF is set), 899 * try to do some read-ahead in the sequential case to reduce 900 * the number of I/O transactions. 901 */ 902 if (flags & BA_CLRBUF) { 903 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 904 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 905 error = cluster_read(vp, ip->i_size, lbn, 906 (int)fs->fs_bsize, NOCRED, 907 MAXBSIZE, seqcount, &nbp); 908 } else { 909 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 910 } 911 if (error) { 912 brelse(nbp); 913 goto fail; 914 } 915 } else { 916 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 917 nbp->b_blkno = fsbtodb(fs, nb); 918 } 919 curthread->td_pflags &= saved_inbdflush; 920 *bpp = nbp; 921 return (0); 922 fail: 923 curthread->td_pflags &= saved_inbdflush; 924 /* 925 * If we have failed to allocate any blocks, simply return the error. 926 * This is the usual case and avoids the need to fsync the file. 927 */ 928 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 929 return (error); 930 /* 931 * If we have failed part way through block allocation, we 932 * have to deallocate any indirect blocks that we have allocated. 933 * We have to fsync the file before we start to get rid of all 934 * of its dependencies so that we do not leave them dangling. 935 * We have to sync it at the end so that the soft updates code 936 * does not find any untracked changes. Although this is really 937 * slow, running out of disk space is not expected to be a common 938 * occurence. The error return from fsync is ignored as we already 939 * have an error to return to the user. 940 * 941 * XXX Still have to journal the free below 942 */ 943 (void) ffs_syncvnode(vp, MNT_WAIT); 944 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 945 blkp < allocblk; blkp++, lbns_remfree++) { 946 /* 947 * We shall not leave the freed blocks on the vnode 948 * buffer object lists. 949 */ 950 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 951 if (bp != NULL) { 952 bp->b_flags |= (B_INVAL | B_RELBUF); 953 bp->b_flags &= ~B_ASYNC; 954 brelse(bp); 955 } 956 deallocated += fs->fs_bsize; 957 } 958 if (allocib != NULL) { 959 *allocib = 0; 960 } else if (unwindidx >= 0) { 961 int r; 962 963 r = bread(vp, indirs[unwindidx].in_lbn, 964 (int)fs->fs_bsize, NOCRED, &bp); 965 if (r) { 966 panic("Could not unwind indirect block, error %d", r); 967 brelse(bp); 968 } else { 969 bap = (ufs2_daddr_t *)bp->b_data; 970 bap[indirs[unwindidx].in_off] = 0; 971 if (flags & IO_SYNC) { 972 bwrite(bp); 973 } else { 974 if (bp->b_bufsize == fs->fs_bsize) 975 bp->b_flags |= B_CLUSTEROK; 976 bdwrite(bp); 977 } 978 } 979 } 980 if (deallocated) { 981 #ifdef QUOTA 982 /* 983 * Restore user's disk quota because allocation failed. 984 */ 985 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 986 #endif 987 dp->di_blocks -= btodb(deallocated); 988 ip->i_flag |= IN_CHANGE | IN_UPDATE; 989 } 990 (void) ffs_syncvnode(vp, MNT_WAIT); 991 /* 992 * After the buffers are invalidated and on-disk pointers are 993 * cleared, free the blocks. 994 */ 995 for (blkp = allociblk; blkp < allocblk; blkp++) { 996 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 997 ip->i_number, NULL); 998 } 999 return (error); 1000 } 1001