1 /* 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Copyright (c) 1982, 1989, 1993 12 * The Regents of the University of California. All rights reserved. 13 * (c) UNIX System Laboratories, Inc. 14 * Copyright (c) 1982, 1986, 1989, 1993 15 * The Regents of the University of California. All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. All advertising materials mentioning features or use of this software 26 * must display the following acknowledgement: 27 * This product includes software developed by the University of 28 * California, Berkeley and its contributors. 29 * 4. Neither the name of the University nor the names of its contributors 30 * may be used to endorse or promote products derived from this software 31 * without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 34 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 37 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 41 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 42 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 43 * SUCH DAMAGE. 44 * 45 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 46 * $FreeBSD$ 47 */ 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/bio.h> 52 #include <sys/buf.h> 53 #include <sys/lock.h> 54 #include <sys/mount.h> 55 #include <sys/vnode.h> 56 57 #include <ufs/ufs/quota.h> 58 #include <ufs/ufs/inode.h> 59 #include <ufs/ufs/ufs_extern.h> 60 61 #include <ufs/ffs/fs.h> 62 #include <ufs/ffs/ffs_extern.h> 63 64 /* 65 * Balloc defines the structure of filesystem storage 66 * by allocating the physical blocks on a device given 67 * the inode and the logical block number in a file. 68 * This is the allocation strategy for UFS1. Below is 69 * the allocation strategy for UFS2. 70 */ 71 int 72 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 73 struct ucred *cred, int flags, struct buf **bpp) 74 { 75 struct inode *ip; 76 struct ufs1_dinode *dp; 77 ufs_lbn_t lbn, lastlbn; 78 struct fs *fs; 79 ufs1_daddr_t nb; 80 struct buf *bp, *nbp; 81 struct indir indirs[NIADDR + 2]; 82 int deallocated, osize, nsize, num, i, error; 83 ufs2_daddr_t newb; 84 ufs1_daddr_t *bap, pref; 85 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 86 int unwindidx = -1; 87 struct thread *td = curthread; /* XXX */ 88 89 ip = VTOI(vp); 90 dp = ip->i_din1; 91 fs = ip->i_fs; 92 lbn = lblkno(fs, startoffset); 93 size = blkoff(fs, startoffset) + size; 94 if (size > fs->fs_bsize) 95 panic("ffs_balloc_ufs1: blk too big"); 96 *bpp = NULL; 97 if (flags & IO_EXT) 98 return (EOPNOTSUPP); 99 if (lbn < 0) 100 return (EFBIG); 101 102 /* 103 * If the next write will extend the file into a new block, 104 * and the file is currently composed of a fragment 105 * this fragment has to be extended to be a full block. 106 */ 107 lastlbn = lblkno(fs, ip->i_size); 108 if (lastlbn < NDADDR && lastlbn < lbn) { 109 nb = lastlbn; 110 osize = blksize(fs, ip, nb); 111 if (osize < fs->fs_bsize && osize > 0) { 112 error = ffs_realloccg(ip, nb, dp->di_db[nb], 113 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 114 &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp); 115 if (error) 116 return (error); 117 if (DOINGSOFTDEP(vp)) 118 softdep_setup_allocdirect(ip, nb, 119 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 120 fs->fs_bsize, osize, bp); 121 ip->i_size = smalllblktosize(fs, nb + 1); 122 dp->di_size = ip->i_size; 123 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 124 ip->i_flag |= IN_CHANGE | IN_UPDATE; 125 if (flags & IO_SYNC) 126 bwrite(bp); 127 else 128 bawrite(bp); 129 } 130 } 131 /* 132 * The first NDADDR blocks are direct blocks 133 */ 134 if (lbn < NDADDR) { 135 if (flags & BA_METAONLY) 136 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 137 nb = dp->di_db[lbn]; 138 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 139 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 140 if (error) { 141 brelse(bp); 142 return (error); 143 } 144 bp->b_blkno = fsbtodb(fs, nb); 145 *bpp = bp; 146 return (0); 147 } 148 if (nb != 0) { 149 /* 150 * Consider need to reallocate a fragment. 151 */ 152 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 153 nsize = fragroundup(fs, size); 154 if (nsize <= osize) { 155 error = bread(vp, lbn, osize, NOCRED, &bp); 156 if (error) { 157 brelse(bp); 158 return (error); 159 } 160 bp->b_blkno = fsbtodb(fs, nb); 161 } else { 162 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 163 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 164 &dp->di_db[0]), osize, nsize, cred, &bp); 165 if (error) 166 return (error); 167 if (DOINGSOFTDEP(vp)) 168 softdep_setup_allocdirect(ip, lbn, 169 dbtofsb(fs, bp->b_blkno), nb, 170 nsize, osize, bp); 171 } 172 } else { 173 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 174 nsize = fragroundup(fs, size); 175 else 176 nsize = fs->fs_bsize; 177 error = ffs_alloc(ip, lbn, 178 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 179 nsize, cred, &newb); 180 if (error) 181 return (error); 182 bp = getblk(vp, lbn, nsize, 0, 0); 183 bp->b_blkno = fsbtodb(fs, newb); 184 if (flags & BA_CLRBUF) 185 vfs_bio_clrbuf(bp); 186 if (DOINGSOFTDEP(vp)) 187 softdep_setup_allocdirect(ip, lbn, newb, 0, 188 nsize, 0, bp); 189 } 190 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 191 ip->i_flag |= IN_CHANGE | IN_UPDATE; 192 *bpp = bp; 193 return (0); 194 } 195 /* 196 * Determine the number of levels of indirection. 197 */ 198 pref = 0; 199 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 200 return(error); 201 #ifdef DIAGNOSTIC 202 if (num < 1) 203 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 204 #endif 205 /* 206 * Fetch the first indirect block allocating if necessary. 207 */ 208 --num; 209 nb = dp->di_ib[indirs[0].in_off]; 210 allocib = NULL; 211 allocblk = allociblk; 212 if (nb == 0) { 213 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 214 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 215 cred, &newb)) != 0) 216 return (error); 217 nb = newb; 218 *allocblk++ = nb; 219 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); 220 bp->b_blkno = fsbtodb(fs, nb); 221 vfs_bio_clrbuf(bp); 222 if (DOINGSOFTDEP(vp)) { 223 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 224 newb, 0, fs->fs_bsize, 0, bp); 225 bdwrite(bp); 226 } else { 227 /* 228 * Write synchronously so that indirect blocks 229 * never point at garbage. 230 */ 231 if (DOINGASYNC(vp)) 232 bdwrite(bp); 233 else if ((error = bwrite(bp)) != 0) 234 goto fail; 235 } 236 allocib = &dp->di_ib[indirs[0].in_off]; 237 *allocib = nb; 238 ip->i_flag |= IN_CHANGE | IN_UPDATE; 239 } 240 /* 241 * Fetch through the indirect blocks, allocating as necessary. 242 */ 243 for (i = 1;;) { 244 error = bread(vp, 245 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 246 if (error) { 247 brelse(bp); 248 goto fail; 249 } 250 bap = (ufs1_daddr_t *)bp->b_data; 251 nb = bap[indirs[i].in_off]; 252 if (i == num) 253 break; 254 i += 1; 255 if (nb != 0) { 256 bqrelse(bp); 257 continue; 258 } 259 if (pref == 0) 260 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); 261 if ((error = 262 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { 263 brelse(bp); 264 goto fail; 265 } 266 nb = newb; 267 *allocblk++ = nb; 268 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); 269 nbp->b_blkno = fsbtodb(fs, nb); 270 vfs_bio_clrbuf(nbp); 271 if (DOINGSOFTDEP(vp)) { 272 softdep_setup_allocindir_meta(nbp, ip, bp, 273 indirs[i - 1].in_off, nb); 274 bdwrite(nbp); 275 } else { 276 /* 277 * Write synchronously so that indirect blocks 278 * never point at garbage. 279 */ 280 if ((error = bwrite(nbp)) != 0) { 281 brelse(bp); 282 goto fail; 283 } 284 } 285 bap[indirs[i - 1].in_off] = nb; 286 if (allocib == NULL && unwindidx < 0) 287 unwindidx = i - 1; 288 /* 289 * If required, write synchronously, otherwise use 290 * delayed write. 291 */ 292 if (flags & IO_SYNC) { 293 bwrite(bp); 294 } else { 295 if (bp->b_bufsize == fs->fs_bsize) 296 bp->b_flags |= B_CLUSTEROK; 297 bdwrite(bp); 298 } 299 } 300 /* 301 * If asked only for the indirect block, then return it. 302 */ 303 if (flags & BA_METAONLY) { 304 *bpp = bp; 305 return (0); 306 } 307 /* 308 * Get the data block, allocating if necessary. 309 */ 310 if (nb == 0) { 311 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); 312 error = ffs_alloc(ip, 313 lbn, pref, (int)fs->fs_bsize, cred, &newb); 314 if (error) { 315 brelse(bp); 316 goto fail; 317 } 318 nb = newb; 319 *allocblk++ = nb; 320 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); 321 nbp->b_blkno = fsbtodb(fs, nb); 322 if (flags & BA_CLRBUF) 323 vfs_bio_clrbuf(nbp); 324 if (DOINGSOFTDEP(vp)) 325 softdep_setup_allocindir_page(ip, lbn, bp, 326 indirs[i].in_off, nb, 0, nbp); 327 bap[indirs[i].in_off] = nb; 328 /* 329 * If required, write synchronously, otherwise use 330 * delayed write. 331 */ 332 if (flags & IO_SYNC) { 333 bwrite(bp); 334 } else { 335 if (bp->b_bufsize == fs->fs_bsize) 336 bp->b_flags |= B_CLUSTEROK; 337 bdwrite(bp); 338 } 339 *bpp = nbp; 340 return (0); 341 } 342 brelse(bp); 343 if (flags & BA_CLRBUF) { 344 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 345 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 346 error = cluster_read(vp, ip->i_size, lbn, 347 (int)fs->fs_bsize, NOCRED, 348 MAXBSIZE, seqcount, &nbp); 349 } else { 350 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 351 } 352 if (error) { 353 brelse(nbp); 354 goto fail; 355 } 356 } else { 357 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); 358 nbp->b_blkno = fsbtodb(fs, nb); 359 } 360 *bpp = nbp; 361 return (0); 362 fail: 363 /* 364 * If we have failed to allocate any blocks, simply return the error. 365 * This is the usual case and avoids the need to fsync the file. 366 */ 367 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 368 return (error); 369 /* 370 * If we have failed part way through block allocation, we 371 * have to deallocate any indirect blocks that we have allocated. 372 * We have to fsync the file before we start to get rid of all 373 * of its dependencies so that we do not leave them dangling. 374 * We have to sync it at the end so that the soft updates code 375 * does not find any untracked changes. Although this is really 376 * slow, running out of disk space is not expected to be a common 377 * occurence. The error return from fsync is ignored as we already 378 * have an error to return to the user. 379 */ 380 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 381 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { 382 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number); 383 deallocated += fs->fs_bsize; 384 } 385 if (allocib != NULL) { 386 *allocib = 0; 387 } else if (unwindidx >= 0) { 388 int r; 389 390 r = bread(vp, indirs[unwindidx].in_lbn, 391 (int)fs->fs_bsize, NOCRED, &bp); 392 if (r) { 393 panic("Could not unwind indirect block, error %d", r); 394 brelse(bp); 395 } else { 396 bap = (ufs1_daddr_t *)bp->b_data; 397 bap[indirs[unwindidx].in_off] = 0; 398 if (flags & IO_SYNC) { 399 bwrite(bp); 400 } else { 401 if (bp->b_bufsize == fs->fs_bsize) 402 bp->b_flags |= B_CLUSTEROK; 403 bdwrite(bp); 404 } 405 } 406 } 407 if (deallocated) { 408 #ifdef QUOTA 409 /* 410 * Restore user's disk quota because allocation failed. 411 */ 412 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 413 #endif 414 dp->di_blocks -= btodb(deallocated); 415 ip->i_flag |= IN_CHANGE | IN_UPDATE; 416 } 417 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 418 return (error); 419 } 420 421 /* 422 * Balloc defines the structure of file system storage 423 * by allocating the physical blocks on a device given 424 * the inode and the logical block number in a file. 425 * This is the allocation strategy for UFS2. Above is 426 * the allocation strategy for UFS1. 427 */ 428 int 429 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 430 struct ucred *cred, int flags, struct buf **bpp) 431 { 432 struct inode *ip; 433 struct ufs2_dinode *dp; 434 ufs_lbn_t lbn, lastlbn; 435 struct fs *fs; 436 struct buf *bp, *nbp; 437 struct indir indirs[NIADDR + 2]; 438 ufs2_daddr_t nb, newb, *bap, pref; 439 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 440 int deallocated, osize, nsize, num, i, error; 441 int unwindidx = -1; 442 struct thread *td = curthread; /* XXX */ 443 444 ip = VTOI(vp); 445 dp = ip->i_din2; 446 fs = ip->i_fs; 447 lbn = lblkno(fs, startoffset); 448 size = blkoff(fs, startoffset) + size; 449 if (size > fs->fs_bsize) 450 panic("ffs_balloc_ufs2: blk too big"); 451 *bpp = NULL; 452 if (lbn < 0) 453 return (EFBIG); 454 455 /* 456 * Check for allocating external data. 457 */ 458 if (flags & IO_EXT) { 459 if (lbn >= NXADDR) 460 return (EFBIG); 461 /* 462 * If the next write will extend the data into a new block, 463 * and the data is currently composed of a fragment 464 * this fragment has to be extended to be a full block. 465 */ 466 lastlbn = lblkno(fs, dp->di_extsize); 467 if (lastlbn < lbn) { 468 nb = lastlbn; 469 osize = sblksize(fs, dp->di_extsize, nb); 470 if (osize < fs->fs_bsize && osize > 0) { 471 error = ffs_realloccg(ip, -1 - nb, 472 dp->di_extb[nb], 473 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 474 &dp->di_extb[0]), osize, 475 (int)fs->fs_bsize, cred, &bp); 476 if (error) 477 return (error); 478 if (DOINGSOFTDEP(vp)) 479 softdep_setup_allocext(ip, nb, 480 dbtofsb(fs, bp->b_blkno), 481 dp->di_extb[nb], 482 fs->fs_bsize, osize, bp); 483 dp->di_extsize = smalllblktosize(fs, nb + 1); 484 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 485 bp->b_xflags |= BX_ALTDATA; 486 ip->i_flag |= IN_CHANGE | IN_UPDATE; 487 if (flags & IO_SYNC) 488 bwrite(bp); 489 else 490 bawrite(bp); 491 } 492 } 493 /* 494 * All blocks are direct blocks 495 */ 496 if (flags & BA_METAONLY) 497 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 498 nb = dp->di_extb[lbn]; 499 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 500 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp); 501 if (error) { 502 brelse(bp); 503 return (error); 504 } 505 bp->b_blkno = fsbtodb(fs, nb); 506 bp->b_xflags |= BX_ALTDATA; 507 *bpp = bp; 508 return (0); 509 } 510 if (nb != 0) { 511 /* 512 * Consider need to reallocate a fragment. 513 */ 514 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 515 nsize = fragroundup(fs, size); 516 if (nsize <= osize) { 517 error = bread(vp, -1 - lbn, osize, NOCRED, &bp); 518 if (error) { 519 brelse(bp); 520 return (error); 521 } 522 bp->b_blkno = fsbtodb(fs, nb); 523 bp->b_xflags |= BX_ALTDATA; 524 } else { 525 error = ffs_realloccg(ip, -1 - lbn, 526 dp->di_extb[lbn], 527 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 528 &dp->di_extb[0]), osize, nsize, cred, &bp); 529 if (error) 530 return (error); 531 bp->b_xflags |= BX_ALTDATA; 532 if (DOINGSOFTDEP(vp)) 533 softdep_setup_allocext(ip, lbn, 534 dbtofsb(fs, bp->b_blkno), nb, 535 nsize, osize, bp); 536 } 537 } else { 538 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 539 nsize = fragroundup(fs, size); 540 else 541 nsize = fs->fs_bsize; 542 error = ffs_alloc(ip, lbn, 543 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 544 nsize, cred, &newb); 545 if (error) 546 return (error); 547 bp = getblk(vp, -1 - lbn, nsize, 0, 0); 548 bp->b_blkno = fsbtodb(fs, newb); 549 bp->b_xflags |= BX_ALTDATA; 550 if (flags & BA_CLRBUF) 551 vfs_bio_clrbuf(bp); 552 if (DOINGSOFTDEP(vp)) 553 softdep_setup_allocext(ip, lbn, newb, 0, 554 nsize, 0, bp); 555 } 556 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 557 ip->i_flag |= IN_CHANGE | IN_UPDATE; 558 *bpp = bp; 559 return (0); 560 } 561 /* 562 * If the next write will extend the file into a new block, 563 * and the file is currently composed of a fragment 564 * this fragment has to be extended to be a full block. 565 */ 566 lastlbn = lblkno(fs, ip->i_size); 567 if (lastlbn < NDADDR && lastlbn < lbn) { 568 nb = lastlbn; 569 osize = blksize(fs, ip, nb); 570 if (osize < fs->fs_bsize && osize > 0) { 571 error = ffs_realloccg(ip, nb, dp->di_db[nb], 572 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 573 &dp->di_db[0]), osize, (int)fs->fs_bsize, 574 cred, &bp); 575 if (error) 576 return (error); 577 if (DOINGSOFTDEP(vp)) 578 softdep_setup_allocdirect(ip, nb, 579 dbtofsb(fs, bp->b_blkno), 580 dp->di_db[nb], 581 fs->fs_bsize, osize, bp); 582 ip->i_size = smalllblktosize(fs, nb + 1); 583 dp->di_size = ip->i_size; 584 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 585 ip->i_flag |= IN_CHANGE | IN_UPDATE; 586 if (flags & IO_SYNC) 587 bwrite(bp); 588 else 589 bawrite(bp); 590 } 591 } 592 /* 593 * The first NDADDR blocks are direct blocks 594 */ 595 if (lbn < NDADDR) { 596 if (flags & BA_METAONLY) 597 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 598 nb = dp->di_db[lbn]; 599 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 600 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 601 if (error) { 602 brelse(bp); 603 return (error); 604 } 605 bp->b_blkno = fsbtodb(fs, nb); 606 *bpp = bp; 607 return (0); 608 } 609 if (nb != 0) { 610 /* 611 * Consider need to reallocate a fragment. 612 */ 613 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 614 nsize = fragroundup(fs, size); 615 if (nsize <= osize) { 616 error = bread(vp, lbn, osize, NOCRED, &bp); 617 if (error) { 618 brelse(bp); 619 return (error); 620 } 621 bp->b_blkno = fsbtodb(fs, nb); 622 } else { 623 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 624 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 625 &dp->di_db[0]), osize, nsize, cred, &bp); 626 if (error) 627 return (error); 628 if (DOINGSOFTDEP(vp)) 629 softdep_setup_allocdirect(ip, lbn, 630 dbtofsb(fs, bp->b_blkno), nb, 631 nsize, osize, bp); 632 } 633 } else { 634 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 635 nsize = fragroundup(fs, size); 636 else 637 nsize = fs->fs_bsize; 638 error = ffs_alloc(ip, lbn, 639 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 640 &dp->di_db[0]), nsize, cred, &newb); 641 if (error) 642 return (error); 643 bp = getblk(vp, lbn, nsize, 0, 0); 644 bp->b_blkno = fsbtodb(fs, newb); 645 if (flags & BA_CLRBUF) 646 vfs_bio_clrbuf(bp); 647 if (DOINGSOFTDEP(vp)) 648 softdep_setup_allocdirect(ip, lbn, newb, 0, 649 nsize, 0, bp); 650 } 651 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 652 ip->i_flag |= IN_CHANGE | IN_UPDATE; 653 *bpp = bp; 654 return (0); 655 } 656 /* 657 * Determine the number of levels of indirection. 658 */ 659 pref = 0; 660 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 661 return(error); 662 #ifdef DIAGNOSTIC 663 if (num < 1) 664 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 665 #endif 666 /* 667 * Fetch the first indirect block allocating if necessary. 668 */ 669 --num; 670 nb = dp->di_ib[indirs[0].in_off]; 671 allocib = NULL; 672 allocblk = allociblk; 673 if (nb == 0) { 674 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 675 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 676 cred, &newb)) != 0) 677 return (error); 678 nb = newb; 679 *allocblk++ = nb; 680 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); 681 bp->b_blkno = fsbtodb(fs, nb); 682 vfs_bio_clrbuf(bp); 683 if (DOINGSOFTDEP(vp)) { 684 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 685 newb, 0, fs->fs_bsize, 0, bp); 686 bdwrite(bp); 687 } else { 688 /* 689 * Write synchronously so that indirect blocks 690 * never point at garbage. 691 */ 692 if (DOINGASYNC(vp)) 693 bdwrite(bp); 694 else if ((error = bwrite(bp)) != 0) 695 goto fail; 696 } 697 allocib = &dp->di_ib[indirs[0].in_off]; 698 *allocib = nb; 699 ip->i_flag |= IN_CHANGE | IN_UPDATE; 700 } 701 /* 702 * Fetch through the indirect blocks, allocating as necessary. 703 */ 704 for (i = 1;;) { 705 error = bread(vp, 706 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 707 if (error) { 708 brelse(bp); 709 goto fail; 710 } 711 bap = (ufs2_daddr_t *)bp->b_data; 712 nb = bap[indirs[i].in_off]; 713 if (i == num) 714 break; 715 i += 1; 716 if (nb != 0) { 717 bqrelse(bp); 718 continue; 719 } 720 if (pref == 0) 721 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); 722 if ((error = 723 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { 724 brelse(bp); 725 goto fail; 726 } 727 nb = newb; 728 *allocblk++ = nb; 729 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); 730 nbp->b_blkno = fsbtodb(fs, nb); 731 vfs_bio_clrbuf(nbp); 732 if (DOINGSOFTDEP(vp)) { 733 softdep_setup_allocindir_meta(nbp, ip, bp, 734 indirs[i - 1].in_off, nb); 735 bdwrite(nbp); 736 } else { 737 /* 738 * Write synchronously so that indirect blocks 739 * never point at garbage. 740 */ 741 if ((error = bwrite(nbp)) != 0) { 742 brelse(bp); 743 goto fail; 744 } 745 } 746 bap[indirs[i - 1].in_off] = nb; 747 if (allocib == NULL && unwindidx < 0) 748 unwindidx = i - 1; 749 /* 750 * If required, write synchronously, otherwise use 751 * delayed write. 752 */ 753 if (flags & IO_SYNC) { 754 bwrite(bp); 755 } else { 756 if (bp->b_bufsize == fs->fs_bsize) 757 bp->b_flags |= B_CLUSTEROK; 758 bdwrite(bp); 759 } 760 } 761 /* 762 * If asked only for the indirect block, then return it. 763 */ 764 if (flags & BA_METAONLY) { 765 *bpp = bp; 766 return (0); 767 } 768 /* 769 * Get the data block, allocating if necessary. 770 */ 771 if (nb == 0) { 772 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); 773 error = ffs_alloc(ip, 774 lbn, pref, (int)fs->fs_bsize, cred, &newb); 775 if (error) { 776 brelse(bp); 777 goto fail; 778 } 779 nb = newb; 780 *allocblk++ = nb; 781 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); 782 nbp->b_blkno = fsbtodb(fs, nb); 783 if (flags & BA_CLRBUF) 784 vfs_bio_clrbuf(nbp); 785 if (DOINGSOFTDEP(vp)) 786 softdep_setup_allocindir_page(ip, lbn, bp, 787 indirs[i].in_off, nb, 0, nbp); 788 bap[indirs[i].in_off] = nb; 789 /* 790 * If required, write synchronously, otherwise use 791 * delayed write. 792 */ 793 if (flags & IO_SYNC) { 794 bwrite(bp); 795 } else { 796 if (bp->b_bufsize == fs->fs_bsize) 797 bp->b_flags |= B_CLUSTEROK; 798 bdwrite(bp); 799 } 800 *bpp = nbp; 801 return (0); 802 } 803 brelse(bp); 804 /* 805 * If requested clear invalid portions of the buffer. If we 806 * have to do a read-before-write (typical if BA_CLRBUF is set), 807 * try to do some read-ahead in the sequential case to reduce 808 * the number of I/O transactions. 809 */ 810 if (flags & BA_CLRBUF) { 811 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 812 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 813 error = cluster_read(vp, ip->i_size, lbn, 814 (int)fs->fs_bsize, NOCRED, 815 MAXBSIZE, seqcount, &nbp); 816 } else { 817 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); 818 } 819 if (error) { 820 brelse(nbp); 821 goto fail; 822 } 823 } else { 824 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); 825 nbp->b_blkno = fsbtodb(fs, nb); 826 } 827 *bpp = nbp; 828 return (0); 829 fail: 830 /* 831 * If we have failed to allocate any blocks, simply return the error. 832 * This is the usual case and avoids the need to fsync the file. 833 */ 834 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 835 return (error); 836 /* 837 * If we have failed part way through block allocation, we 838 * have to deallocate any indirect blocks that we have allocated. 839 * We have to fsync the file before we start to get rid of all 840 * of its dependencies so that we do not leave them dangling. 841 * We have to sync it at the end so that the soft updates code 842 * does not find any untracked changes. Although this is really 843 * slow, running out of disk space is not expected to be a common 844 * occurence. The error return from fsync is ignored as we already 845 * have an error to return to the user. 846 */ 847 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 848 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { 849 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number); 850 deallocated += fs->fs_bsize; 851 } 852 if (allocib != NULL) { 853 *allocib = 0; 854 } else if (unwindidx >= 0) { 855 int r; 856 857 r = bread(vp, indirs[unwindidx].in_lbn, 858 (int)fs->fs_bsize, NOCRED, &bp); 859 if (r) { 860 panic("Could not unwind indirect block, error %d", r); 861 brelse(bp); 862 } else { 863 bap = (ufs2_daddr_t *)bp->b_data; 864 bap[indirs[unwindidx].in_off] = 0; 865 if (flags & IO_SYNC) { 866 bwrite(bp); 867 } else { 868 if (bp->b_bufsize == fs->fs_bsize) 869 bp->b_flags |= B_CLUSTEROK; 870 bdwrite(bp); 871 } 872 } 873 } 874 if (deallocated) { 875 #ifdef QUOTA 876 /* 877 * Restore user's disk quota because allocation failed. 878 */ 879 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 880 #endif 881 dp->di_blocks -= btodb(deallocated); 882 ip->i_flag |= IN_CHANGE | IN_UPDATE; 883 } 884 (void) VOP_FSYNC(vp, cred, MNT_WAIT, td); 885 return (error); 886 } 887