1 /* 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Copyright (c) 1982, 1986, 1989, 1993 12 * The Regents of the University of California. All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 43 * $FreeBSD$ 44 */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/bio.h> 49 #include <sys/buf.h> 50 #include <sys/lock.h> 51 #include <sys/mount.h> 52 #include <sys/vnode.h> 53 54 #include <ufs/ufs/quota.h> 55 #include <ufs/ufs/inode.h> 56 #include <ufs/ufs/ufs_extern.h> 57 58 #include <ufs/ffs/fs.h> 59 #include <ufs/ffs/ffs_extern.h> 60 61 /* 62 * Balloc defines the structure of filesystem storage 63 * by allocating the physical blocks on a device given 64 * the inode and the logical block number in a file. 65 * This is the allocation strategy for UFS1. Below is 66 * the allocation strategy for UFS2. 67 */ 68 int 69 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 70 struct ucred *cred, int flags, struct buf **bpp) 71 { 72 struct inode *ip; 73 struct ufs1_dinode *dp; 74 ufs_lbn_t lbn, lastlbn; 75 struct fs *fs; 76 ufs1_daddr_t nb; 77 struct buf *bp, *nbp; 78 struct indir indirs[NIADDR + 2]; 79 int deallocated, osize, nsize, num, i, error; 80 ufs2_daddr_t newb; 81 ufs1_daddr_t *bap, pref; 82 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 83 int unwindidx = -1; 84 struct thread *td = curthread; /* XXX */ 85 86 ip = VTOI(vp); 87 dp = ip->i_din1; 88 fs = ip->i_fs; 89 lbn = lblkno(fs, startoffset); 90 size = blkoff(fs, startoffset) + size; 91 if (size > fs->fs_bsize) 92 panic("ffs_balloc_ufs1: blk too big"); 93 *bpp = NULL; 94 if (flags & IO_EXT) 95 return (EOPNOTSUPP); 96 if (lbn < 0) 97 return (EFBIG); 98 99 /* 100 * If the next write will extend the file into a new block, 101 * and the file is currently composed of a fragment 102 * this fragment has to be extended to be a full block. 103 */ 104 lastlbn = lblkno(fs, ip->i_size); 105 if (lastlbn < NDADDR && lastlbn < lbn) { 106 nb = lastlbn; 107 osize = blksize(fs, ip, nb); 108 if (osize < fs->fs_bsize && osize > 0) { 109 error = ffs_realloccg(ip, nb, dp->di_db[nb], 110 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 111 &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp); 112 if (error) 113 return (error); 114 if (DOINGSOFTDEP(vp)) 115 softdep_setup_allocdirect(ip, nb, 116 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 117 fs->fs_bsize, osize, bp); 118 ip->i_size = smalllblktosize(fs, nb + 1); 119 dp->di_size = ip->i_size; 120 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 121 ip->i_flag |= IN_CHANGE | IN_UPDATE; 122 if (flags & IO_SYNC) 123 bwrite(bp); 124 else 125 bawrite(bp); 126 } 127 } 128 /* 129 * The first NDADDR blocks are direct blocks 130 */ 131 if (lbn < NDADDR) { 132 if (flags & BA_METAONLY) 133 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 134 nb = dp->di_db[lbn]; 135 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 136 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 137 if (error) { 138 brelse(bp); 139 return (error); 140 } 141 bp->b_blkno = fsbtodb(fs, nb); 142 *bpp = bp; 143 return (0); 144 } 145 if (nb != 0) { 146 /* 147 * Consider need to reallocate a fragment. 148 */ 149 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 150 nsize = fragroundup(fs, size); 151 if (nsize <= osize) { 152 error = bread(vp, lbn, osize, NOCRED, &bp); 153 if (error) { 154 brelse(bp); 155 return (error); 156 } 157 bp->b_blkno = fsbtodb(fs, nb); 158 } else { 159 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 160 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 161 &dp->di_db[0]), osize, nsize, cred, &bp); 162 if (error) 163 return (error); 164 if (DOINGSOFTDEP(vp)) 165 softdep_setup_allocdirect(ip, lbn, 166 dbtofsb(fs, bp->b_blkno), nb, 167 nsize, osize, bp); 168 } 169 } else { 170 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 171 nsize = fragroundup(fs, size); 172 else 173 nsize = fs->fs_bsize; 174 error = ffs_alloc(ip, lbn, 175 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 176 nsize, cred, &newb); 177 if (error) 178 return (error); 179 bp = getblk(vp, lbn, nsize, 0, 0, 0); 180 bp->b_blkno = fsbtodb(fs, newb); 181 if (flags & BA_CLRBUF) 182 vfs_bio_clrbuf(bp); 183 if (DOINGSOFTDEP(vp)) 184 softdep_setup_allocdirect(ip, lbn, newb, 0, 185 nsize, 0, bp); 186 } 187 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 188 ip->i_flag |= IN_CHANGE | IN_UPDATE; 189 *bpp = bp; 190 return (0); 191 } 192 /* 193 * Determine the number of levels of indirection. 194 */ 195 pref = 0; 196 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 197 return(error); 198 #ifdef DIAGNOSTIC 199 if (num < 1) 200 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 201 #endif 202 /* 203 * Fetch the first indirect block allocating if necessary. 204 */ 205 --num; 206 nb = dp->di_ib[indirs[0].in_off]; 207 allocib = NULL; 208 allocblk = allociblk; 209 if (nb == 0) { 210 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 211 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 212 cred, &newb)) != 0) 213 return (error); 214 nb = newb; 215 *allocblk++ = nb; 216 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 217 bp->b_blkno = fsbtodb(fs, nb); 218 vfs_bio_clrbuf(bp); 219 if (DOINGSOFTDEP(vp)) { 220 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 221 newb, 0, fs->fs_bsize, 0, bp); 222 bdwrite(bp); 223 } else { 224 /* 225 * Write synchronously so that indirect blocks 226 * never point at garbage. 227 */ 228 if (DOINGASYNC(vp)) 229 bdwrite(bp); 230 else if ((error = bwrite(bp)) != 0) 231 goto fail; 232 } 233 allocib = &dp->di_ib[indirs[0].in_off]; 234 *allocib = nb; 235 ip->i_flag |= IN_CHANGE | IN_UPDATE; 236 } 237 /* 238 * Fetch through the indirect blocks, allocating as necessary. 239 */ 240 for (i = 1;;) { 241 error = bread(vp, 242 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 243 if (error) { 244 brelse(bp); 245 goto fail; 246 } 247 bap = (ufs1_daddr_t *)bp->b_data; 248 nb = bap[indirs[i].in_off]; 249 if (i == num) 250 break; 251 i += 1; 252 if (nb != 0) { 253 bqrelse(bp); 254 continue; 255 } 256 if (pref == 0) 257 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 258 if ((error = 259 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { 260 brelse(bp); 261 goto fail; 262 } 263 nb = newb; 264 *allocblk++ = nb; 265 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 266 nbp->b_blkno = fsbtodb(fs, nb); 267 vfs_bio_clrbuf(nbp); 268 if (DOINGSOFTDEP(vp)) { 269 softdep_setup_allocindir_meta(nbp, ip, bp, 270 indirs[i - 1].in_off, nb); 271 bdwrite(nbp); 272 } else { 273 /* 274 * Write synchronously so that indirect blocks 275 * never point at garbage. 276 */ 277 if ((error = bwrite(nbp)) != 0) { 278 brelse(bp); 279 goto fail; 280 } 281 } 282 bap[indirs[i - 1].in_off] = nb; 283 if (allocib == NULL && unwindidx < 0) 284 unwindidx = i - 1; 285 /* 286 * If required, write synchronously, otherwise use 287 * delayed write. 288 */ 289 if (flags & IO_SYNC) { 290 bwrite(bp); 291 } else { 292 if (bp->b_bufsize == fs->fs_bsize) 293 bp->b_flags |= B_CLUSTEROK; 294 bdwrite(bp); 295 } 296 } 297 /* 298 * If asked only for the indirect block, then return it. 299 */ 300 if (flags & BA_METAONLY) { 301 *bpp = bp; 302 return (0); 303 } 304 /* 305 * Get the data block, allocating if necessary. 306 */ 307 if (nb == 0) { 308 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); 309 error = ffs_alloc(ip, 310 lbn, pref, (int)fs->fs_bsize, cred, &newb); 311 if (error) { 312 brelse(bp); 313 goto fail; 314 } 315 nb = newb; 316 *allocblk++ = nb; 317 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 318 nbp->b_blkno = fsbtodb(fs, nb); 319 if (flags & BA_CLRBUF) 320 vfs_bio_clrbuf(nbp); 321 if (DOINGSOFTDEP(vp)) 322 softdep_setup_allocindir_page(ip, lbn, bp, 323 indirs[i].in_off, nb, 0, nbp); 324 bap[indirs[i].in_off] = nb; 325 /* 326 * If required, write synchronously, otherwise use 327 * delayed write. 328 */ 329 if (flags & IO_SYNC) { 330 bwrite(bp); 331 } else { 332 if (bp->b_bufsize == fs->fs_bsize) 333 bp->b_flags |= B_CLUSTEROK; 334 bdwrite(bp); 335 } 336 *bpp = nbp; 337 return (0); 338 } 339 brelse(bp); 340 if (flags & BA_CLRBUF) { 341 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 342 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 343 error = cluster_read(vp, ip->i_size, lbn, 344 (int)fs->fs_bsize, NOCRED, 345 MAXBSIZE, seqcount, &nbp); 346 } else { 347 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 348 } 349 if (error) { 350 brelse(nbp); 351 goto fail; 352 } 353 } else { 354 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 355 nbp->b_blkno = fsbtodb(fs, nb); 356 } 357 *bpp = nbp; 358 return (0); 359 fail: 360 /* 361 * If we have failed to allocate any blocks, simply return the error. 362 * This is the usual case and avoids the need to fsync the file. 363 */ 364 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 365 return (error); 366 /* 367 * If we have failed part way through block allocation, we 368 * have to deallocate any indirect blocks that we have allocated. 369 * We have to fsync the file before we start to get rid of all 370 * of its dependencies so that we do not leave them dangling. 371 * We have to sync it at the end so that the soft updates code 372 * does not find any untracked changes. Although this is really 373 * slow, running out of disk space is not expected to be a common 374 * occurence. The error return from fsync is ignored as we already 375 * have an error to return to the user. 376 */ 377 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 378 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { 379 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number); 380 deallocated += fs->fs_bsize; 381 } 382 if (allocib != NULL) { 383 *allocib = 0; 384 } else if (unwindidx >= 0) { 385 int r; 386 387 r = bread(vp, indirs[unwindidx].in_lbn, 388 (int)fs->fs_bsize, NOCRED, &bp); 389 if (r) { 390 panic("Could not unwind indirect block, error %d", r); 391 brelse(bp); 392 } else { 393 bap = (ufs1_daddr_t *)bp->b_data; 394 bap[indirs[unwindidx].in_off] = 0; 395 if (flags & IO_SYNC) { 396 bwrite(bp); 397 } else { 398 if (bp->b_bufsize == fs->fs_bsize) 399 bp->b_flags |= B_CLUSTEROK; 400 bdwrite(bp); 401 } 402 } 403 } 404 if (deallocated) { 405 #ifdef QUOTA 406 /* 407 * Restore user's disk quota because allocation failed. 408 */ 409 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 410 #endif 411 dp->di_blocks -= btodb(deallocated); 412 ip->i_flag |= IN_CHANGE | IN_UPDATE; 413 } 414 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 415 return (error); 416 } 417 418 /* 419 * Balloc defines the structure of file system storage 420 * by allocating the physical blocks on a device given 421 * the inode and the logical block number in a file. 422 * This is the allocation strategy for UFS2. Above is 423 * the allocation strategy for UFS1. 424 */ 425 int 426 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 427 struct ucred *cred, int flags, struct buf **bpp) 428 { 429 struct inode *ip; 430 struct ufs2_dinode *dp; 431 ufs_lbn_t lbn, lastlbn; 432 struct fs *fs; 433 struct buf *bp, *nbp; 434 struct indir indirs[NIADDR + 2]; 435 ufs2_daddr_t nb, newb, *bap, pref; 436 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 437 int deallocated, osize, nsize, num, i, error; 438 int unwindidx = -1; 439 struct thread *td = curthread; /* XXX */ 440 441 ip = VTOI(vp); 442 dp = ip->i_din2; 443 fs = ip->i_fs; 444 lbn = lblkno(fs, startoffset); 445 size = blkoff(fs, startoffset) + size; 446 if (size > fs->fs_bsize) 447 panic("ffs_balloc_ufs2: blk too big"); 448 *bpp = NULL; 449 if (lbn < 0) 450 return (EFBIG); 451 452 /* 453 * Check for allocating external data. 454 */ 455 if (flags & IO_EXT) { 456 if (lbn >= NXADDR) 457 return (EFBIG); 458 /* 459 * If the next write will extend the data into a new block, 460 * and the data is currently composed of a fragment 461 * this fragment has to be extended to be a full block. 462 */ 463 lastlbn = lblkno(fs, dp->di_extsize); 464 if (lastlbn < lbn) { 465 nb = lastlbn; 466 osize = sblksize(fs, dp->di_extsize, nb); 467 if (osize < fs->fs_bsize && osize > 0) { 468 error = ffs_realloccg(ip, -1 - nb, 469 dp->di_extb[nb], 470 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 471 &dp->di_extb[0]), osize, 472 (int)fs->fs_bsize, cred, &bp); 473 if (error) 474 return (error); 475 if (DOINGSOFTDEP(vp)) 476 softdep_setup_allocext(ip, nb, 477 dbtofsb(fs, bp->b_blkno), 478 dp->di_extb[nb], 479 fs->fs_bsize, osize, bp); 480 dp->di_extsize = smalllblktosize(fs, nb + 1); 481 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 482 bp->b_xflags |= BX_ALTDATA; 483 ip->i_flag |= IN_CHANGE | IN_UPDATE; 484 if (flags & IO_SYNC) 485 bwrite(bp); 486 else 487 bawrite(bp); 488 } 489 } 490 /* 491 * All blocks are direct blocks 492 */ 493 if (flags & BA_METAONLY) 494 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 495 nb = dp->di_extb[lbn]; 496 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 497 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); 498 if (error) { 499 brelse(bp); 500 return (error); 501 } 502 bp->b_blkno = fsbtodb(fs, nb); 503 bp->b_xflags |= BX_ALTDATA; 504 *bpp = bp; 505 return (0); 506 } 507 if (nb != 0) { 508 /* 509 * Consider need to reallocate a fragment. 510 */ 511 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 512 nsize = fragroundup(fs, size); 513 if (nsize <= osize) { 514 error = bread(vp, -1 - lbn, osize, NOCRED, &bp); 515 if (error) { 516 brelse(bp); 517 return (error); 518 } 519 bp->b_blkno = fsbtodb(fs, nb); 520 bp->b_xflags |= BX_ALTDATA; 521 } else { 522 error = ffs_realloccg(ip, -1 - lbn, 523 dp->di_extb[lbn], 524 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 525 &dp->di_extb[0]), osize, nsize, cred, &bp); 526 if (error) 527 return (error); 528 bp->b_xflags |= BX_ALTDATA; 529 if (DOINGSOFTDEP(vp)) 530 softdep_setup_allocext(ip, lbn, 531 dbtofsb(fs, bp->b_blkno), nb, 532 nsize, osize, bp); 533 } 534 } else { 535 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 536 nsize = fragroundup(fs, size); 537 else 538 nsize = fs->fs_bsize; 539 error = ffs_alloc(ip, lbn, 540 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 541 nsize, cred, &newb); 542 if (error) 543 return (error); 544 bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0); 545 bp->b_blkno = fsbtodb(fs, newb); 546 bp->b_xflags |= BX_ALTDATA; 547 if (flags & BA_CLRBUF) 548 vfs_bio_clrbuf(bp); 549 if (DOINGSOFTDEP(vp)) 550 softdep_setup_allocext(ip, lbn, newb, 0, 551 nsize, 0, bp); 552 } 553 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 554 ip->i_flag |= IN_CHANGE | IN_UPDATE; 555 *bpp = bp; 556 return (0); 557 } 558 /* 559 * If the next write will extend the file into a new block, 560 * and the file is currently composed of a fragment 561 * this fragment has to be extended to be a full block. 562 */ 563 lastlbn = lblkno(fs, ip->i_size); 564 if (lastlbn < NDADDR && lastlbn < lbn) { 565 nb = lastlbn; 566 osize = blksize(fs, ip, nb); 567 if (osize < fs->fs_bsize && osize > 0) { 568 error = ffs_realloccg(ip, nb, dp->di_db[nb], 569 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 570 &dp->di_db[0]), osize, (int)fs->fs_bsize, 571 cred, &bp); 572 if (error) 573 return (error); 574 if (DOINGSOFTDEP(vp)) 575 softdep_setup_allocdirect(ip, nb, 576 dbtofsb(fs, bp->b_blkno), 577 dp->di_db[nb], 578 fs->fs_bsize, osize, bp); 579 ip->i_size = smalllblktosize(fs, nb + 1); 580 dp->di_size = ip->i_size; 581 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 582 ip->i_flag |= IN_CHANGE | IN_UPDATE; 583 if (flags & IO_SYNC) 584 bwrite(bp); 585 else 586 bawrite(bp); 587 } 588 } 589 /* 590 * The first NDADDR blocks are direct blocks 591 */ 592 if (lbn < NDADDR) { 593 if (flags & BA_METAONLY) 594 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 595 nb = dp->di_db[lbn]; 596 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 597 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 598 if (error) { 599 brelse(bp); 600 return (error); 601 } 602 bp->b_blkno = fsbtodb(fs, nb); 603 *bpp = bp; 604 return (0); 605 } 606 if (nb != 0) { 607 /* 608 * Consider need to reallocate a fragment. 609 */ 610 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 611 nsize = fragroundup(fs, size); 612 if (nsize <= osize) { 613 error = bread(vp, lbn, osize, NOCRED, &bp); 614 if (error) { 615 brelse(bp); 616 return (error); 617 } 618 bp->b_blkno = fsbtodb(fs, nb); 619 } else { 620 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 621 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 622 &dp->di_db[0]), osize, nsize, cred, &bp); 623 if (error) 624 return (error); 625 if (DOINGSOFTDEP(vp)) 626 softdep_setup_allocdirect(ip, lbn, 627 dbtofsb(fs, bp->b_blkno), nb, 628 nsize, osize, bp); 629 } 630 } else { 631 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 632 nsize = fragroundup(fs, size); 633 else 634 nsize = fs->fs_bsize; 635 error = ffs_alloc(ip, lbn, 636 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 637 &dp->di_db[0]), nsize, cred, &newb); 638 if (error) 639 return (error); 640 bp = getblk(vp, lbn, nsize, 0, 0, 0); 641 bp->b_blkno = fsbtodb(fs, newb); 642 if (flags & BA_CLRBUF) 643 vfs_bio_clrbuf(bp); 644 if (DOINGSOFTDEP(vp)) 645 softdep_setup_allocdirect(ip, lbn, newb, 0, 646 nsize, 0, bp); 647 } 648 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 649 ip->i_flag |= IN_CHANGE | IN_UPDATE; 650 *bpp = bp; 651 return (0); 652 } 653 /* 654 * Determine the number of levels of indirection. 655 */ 656 pref = 0; 657 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 658 return(error); 659 #ifdef DIAGNOSTIC 660 if (num < 1) 661 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 662 #endif 663 /* 664 * Fetch the first indirect block allocating if necessary. 665 */ 666 --num; 667 nb = dp->di_ib[indirs[0].in_off]; 668 allocib = NULL; 669 allocblk = allociblk; 670 if (nb == 0) { 671 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 672 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 673 cred, &newb)) != 0) 674 return (error); 675 nb = newb; 676 *allocblk++ = nb; 677 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); 678 bp->b_blkno = fsbtodb(fs, nb); 679 vfs_bio_clrbuf(bp); 680 if (DOINGSOFTDEP(vp)) { 681 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 682 newb, 0, fs->fs_bsize, 0, bp); 683 bdwrite(bp); 684 } else { 685 /* 686 * Write synchronously so that indirect blocks 687 * never point at garbage. 688 */ 689 if (DOINGASYNC(vp)) 690 bdwrite(bp); 691 else if ((error = bwrite(bp)) != 0) 692 goto fail; 693 } 694 allocib = &dp->di_ib[indirs[0].in_off]; 695 *allocib = nb; 696 ip->i_flag |= IN_CHANGE | IN_UPDATE; 697 } 698 /* 699 * Fetch through the indirect blocks, allocating as necessary. 700 */ 701 for (i = 1;;) { 702 error = bread(vp, 703 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 704 if (error) { 705 brelse(bp); 706 goto fail; 707 } 708 bap = (ufs2_daddr_t *)bp->b_data; 709 nb = bap[indirs[i].in_off]; 710 if (i == num) 711 break; 712 i += 1; 713 if (nb != 0) { 714 bqrelse(bp); 715 continue; 716 } 717 if (pref == 0) 718 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 719 if ((error = 720 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { 721 brelse(bp); 722 goto fail; 723 } 724 nb = newb; 725 *allocblk++ = nb; 726 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 727 nbp->b_blkno = fsbtodb(fs, nb); 728 vfs_bio_clrbuf(nbp); 729 if (DOINGSOFTDEP(vp)) { 730 softdep_setup_allocindir_meta(nbp, ip, bp, 731 indirs[i - 1].in_off, nb); 732 bdwrite(nbp); 733 } else { 734 /* 735 * Write synchronously so that indirect blocks 736 * never point at garbage. 737 */ 738 if ((error = bwrite(nbp)) != 0) { 739 brelse(bp); 740 goto fail; 741 } 742 } 743 bap[indirs[i - 1].in_off] = nb; 744 if (allocib == NULL && unwindidx < 0) 745 unwindidx = i - 1; 746 /* 747 * If required, write synchronously, otherwise use 748 * delayed write. 749 */ 750 if (flags & IO_SYNC) { 751 bwrite(bp); 752 } else { 753 if (bp->b_bufsize == fs->fs_bsize) 754 bp->b_flags |= B_CLUSTEROK; 755 bdwrite(bp); 756 } 757 } 758 /* 759 * If asked only for the indirect block, then return it. 760 */ 761 if (flags & BA_METAONLY) { 762 *bpp = bp; 763 return (0); 764 } 765 /* 766 * Get the data block, allocating if necessary. 767 */ 768 if (nb == 0) { 769 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); 770 error = ffs_alloc(ip, 771 lbn, pref, (int)fs->fs_bsize, cred, &newb); 772 if (error) { 773 brelse(bp); 774 goto fail; 775 } 776 nb = newb; 777 *allocblk++ = nb; 778 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 779 nbp->b_blkno = fsbtodb(fs, nb); 780 if (flags & BA_CLRBUF) 781 vfs_bio_clrbuf(nbp); 782 if (DOINGSOFTDEP(vp)) 783 softdep_setup_allocindir_page(ip, lbn, bp, 784 indirs[i].in_off, nb, 0, nbp); 785 bap[indirs[i].in_off] = nb; 786 /* 787 * If required, write synchronously, otherwise use 788 * delayed write. 789 */ 790 if (flags & IO_SYNC) { 791 bwrite(bp); 792 } else { 793 if (bp->b_bufsize == fs->fs_bsize) 794 bp->b_flags |= B_CLUSTEROK; 795 bdwrite(bp); 796 } 797 *bpp = nbp; 798 return (0); 799 } 800 brelse(bp); 801 /* 802 * If requested clear invalid portions of the buffer. If we 803 * have to do a read-before-write (typical if BA_CLRBUF is set), 804 * try to do some read-ahead in the sequential case to reduce 805 * the number of I/O transactions. 806 */ 807 if (flags & BA_CLRBUF) { 808 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 809 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 810 error = cluster_read(vp, ip->i_size, lbn, 811 (int)fs->fs_bsize, NOCRED, 812 MAXBSIZE, seqcount, &nbp); 813 } else { 814 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 815 } 816 if (error) { 817 brelse(nbp); 818 goto fail; 819 } 820 } else { 821 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); 822 nbp->b_blkno = fsbtodb(fs, nb); 823 } 824 *bpp = nbp; 825 return (0); 826 fail: 827 /* 828 * If we have failed to allocate any blocks, simply return the error. 829 * This is the usual case and avoids the need to fsync the file. 830 */ 831 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 832 return (error); 833 /* 834 * If we have failed part way through block allocation, we 835 * have to deallocate any indirect blocks that we have allocated. 836 * We have to fsync the file before we start to get rid of all 837 * of its dependencies so that we do not leave them dangling. 838 * We have to sync it at the end so that the soft updates code 839 * does not find any untracked changes. Although this is really 840 * slow, running out of disk space is not expected to be a common 841 * occurence. The error return from fsync is ignored as we already 842 * have an error to return to the user. 843 */ 844 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 845 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { 846 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number); 847 deallocated += fs->fs_bsize; 848 } 849 if (allocib != NULL) { 850 *allocib = 0; 851 } else if (unwindidx >= 0) { 852 int r; 853 854 r = bread(vp, indirs[unwindidx].in_lbn, 855 (int)fs->fs_bsize, NOCRED, &bp); 856 if (r) { 857 panic("Could not unwind indirect block, error %d", r); 858 brelse(bp); 859 } else { 860 bap = (ufs2_daddr_t *)bp->b_data; 861 bap[indirs[unwindidx].in_off] = 0; 862 if (flags & IO_SYNC) { 863 bwrite(bp); 864 } else { 865 if (bp->b_bufsize == fs->fs_bsize) 866 bp->b_flags |= B_CLUSTEROK; 867 bdwrite(bp); 868 } 869 } 870 } 871 if (deallocated) { 872 #ifdef QUOTA 873 /* 874 * Restore user's disk quota because allocation failed. 875 */ 876 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 877 #endif 878 dp->di_blocks -= btodb(deallocated); 879 ip->i_flag |= IN_CHANGE | IN_UPDATE; 880 } 881 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 882 return (error); 883 } 884