1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/bio.h> 68 #include <sys/buf.h> 69 #include <sys/lock.h> 70 #include <sys/mount.h> 71 #include <sys/vnode.h> 72 73 #include <ufs/ufs/quota.h> 74 #include <ufs/ufs/inode.h> 75 #include <ufs/ufs/ufs_extern.h> 76 #include <ufs/ufs/extattr.h> 77 #include <ufs/ufs/ufsmount.h> 78 79 #include <ufs/ffs/fs.h> 80 #include <ufs/ffs/ffs_extern.h> 81 82 /* 83 * Balloc defines the structure of filesystem storage 84 * by allocating the physical blocks on a device given 85 * the inode and the logical block number in a file. 86 * This is the allocation strategy for UFS1. Below is 87 * the allocation strategy for UFS2. 88 */ 89 int 90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 91 struct ucred *cred, int flags, struct buf **bpp) 92 { 93 struct inode *ip; 94 struct ufs1_dinode *dp; 95 ufs_lbn_t lbn, lastlbn; 96 struct fs *fs; 97 ufs1_daddr_t nb; 98 struct buf *bp, *nbp; 99 struct ufsmount *ump; 100 struct indir indirs[NIADDR + 2]; 101 int deallocated, osize, nsize, num, i, error; 102 ufs2_daddr_t newb; 103 ufs1_daddr_t *bap, pref; 104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 106 int unwindidx = -1; 107 int saved_inbdflush; 108 static struct timeval lastfail; 109 static int curfail; 110 int gbflags, reclaimed; 111 112 ip = VTOI(vp); 113 dp = ip->i_din1; 114 fs = ip->i_fs; 115 ump = ip->i_ump; 116 lbn = lblkno(fs, startoffset); 117 size = blkoff(fs, startoffset) + size; 118 reclaimed = 0; 119 if (size > fs->fs_bsize) 120 panic("ffs_balloc_ufs1: blk too big"); 121 *bpp = NULL; 122 if (flags & IO_EXT) 123 return (EOPNOTSUPP); 124 if (lbn < 0) 125 return (EFBIG); 126 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; 127 128 if (DOINGSOFTDEP(vp)) 129 softdep_prealloc(vp, MNT_WAIT); 130 /* 131 * If the next write will extend the file into a new block, 132 * and the file is currently composed of a fragment 133 * this fragment has to be extended to be a full block. 134 */ 135 lastlbn = lblkno(fs, ip->i_size); 136 if (lastlbn < NDADDR && lastlbn < lbn) { 137 nb = lastlbn; 138 osize = blksize(fs, ip, nb); 139 if (osize < fs->fs_bsize && osize > 0) { 140 UFS_LOCK(ump); 141 error = ffs_realloccg(ip, nb, dp->di_db[nb], 142 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 143 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, 144 cred, &bp); 145 if (error) 146 return (error); 147 if (DOINGSOFTDEP(vp)) 148 softdep_setup_allocdirect(ip, nb, 149 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 150 fs->fs_bsize, osize, bp); 151 ip->i_size = smalllblktosize(fs, nb + 1); 152 dp->di_size = ip->i_size; 153 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 154 ip->i_flag |= IN_CHANGE | IN_UPDATE; 155 if (flags & IO_SYNC) 156 bwrite(bp); 157 else 158 bawrite(bp); 159 } 160 } 161 /* 162 * The first NDADDR blocks are direct blocks 163 */ 164 if (lbn < NDADDR) { 165 if (flags & BA_METAONLY) 166 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 167 nb = dp->di_db[lbn]; 168 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 169 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 170 if (error) { 171 brelse(bp); 172 return (error); 173 } 174 bp->b_blkno = fsbtodb(fs, nb); 175 *bpp = bp; 176 return (0); 177 } 178 if (nb != 0) { 179 /* 180 * Consider need to reallocate a fragment. 181 */ 182 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 183 nsize = fragroundup(fs, size); 184 if (nsize <= osize) { 185 error = bread(vp, lbn, osize, NOCRED, &bp); 186 if (error) { 187 brelse(bp); 188 return (error); 189 } 190 bp->b_blkno = fsbtodb(fs, nb); 191 } else { 192 UFS_LOCK(ump); 193 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 194 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 195 &dp->di_db[0]), osize, nsize, flags, 196 cred, &bp); 197 if (error) 198 return (error); 199 if (DOINGSOFTDEP(vp)) 200 softdep_setup_allocdirect(ip, lbn, 201 dbtofsb(fs, bp->b_blkno), nb, 202 nsize, osize, bp); 203 } 204 } else { 205 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 206 nsize = fragroundup(fs, size); 207 else 208 nsize = fs->fs_bsize; 209 UFS_LOCK(ump); 210 error = ffs_alloc(ip, lbn, 211 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 212 nsize, flags, cred, &newb); 213 if (error) 214 return (error); 215 bp = getblk(vp, lbn, nsize, 0, 0, gbflags); 216 bp->b_blkno = fsbtodb(fs, newb); 217 if (flags & BA_CLRBUF) 218 vfs_bio_clrbuf(bp); 219 if (DOINGSOFTDEP(vp)) 220 softdep_setup_allocdirect(ip, lbn, newb, 0, 221 nsize, 0, bp); 222 } 223 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 224 ip->i_flag |= IN_CHANGE | IN_UPDATE; 225 *bpp = bp; 226 return (0); 227 } 228 /* 229 * Determine the number of levels of indirection. 230 */ 231 pref = 0; 232 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 233 return(error); 234 #ifdef INVARIANTS 235 if (num < 1) 236 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 237 #endif 238 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); 239 /* 240 * Fetch the first indirect block allocating if necessary. 241 */ 242 --num; 243 nb = dp->di_ib[indirs[0].in_off]; 244 allocib = NULL; 245 allocblk = allociblk; 246 lbns_remfree = lbns; 247 if (nb == 0) { 248 UFS_LOCK(ump); 249 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1, 250 (ufs1_daddr_t *)0); 251 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 252 flags, cred, &newb)) != 0) { 253 curthread_pflags_restore(saved_inbdflush); 254 return (error); 255 } 256 pref = newb + fs->fs_frag; 257 nb = newb; 258 *allocblk++ = nb; 259 *lbns_remfree++ = indirs[1].in_lbn; 260 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags); 261 bp->b_blkno = fsbtodb(fs, nb); 262 vfs_bio_clrbuf(bp); 263 if (DOINGSOFTDEP(vp)) { 264 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 265 newb, 0, fs->fs_bsize, 0, bp); 266 bdwrite(bp); 267 } else { 268 /* 269 * Write synchronously so that indirect blocks 270 * never point at garbage. 271 */ 272 if (DOINGASYNC(vp)) 273 bdwrite(bp); 274 else if ((error = bwrite(bp)) != 0) 275 goto fail; 276 } 277 allocib = &dp->di_ib[indirs[0].in_off]; 278 *allocib = nb; 279 ip->i_flag |= IN_CHANGE | IN_UPDATE; 280 } 281 /* 282 * Fetch through the indirect blocks, allocating as necessary. 283 */ 284 retry: 285 for (i = 1;;) { 286 error = bread(vp, 287 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 288 if (error) { 289 brelse(bp); 290 goto fail; 291 } 292 bap = (ufs1_daddr_t *)bp->b_data; 293 nb = bap[indirs[i].in_off]; 294 if (i == num) 295 break; 296 i += 1; 297 if (nb != 0) { 298 bqrelse(bp); 299 continue; 300 } 301 UFS_LOCK(ump); 302 /* 303 * If parent indirect has just been allocated, try to cluster 304 * immediately following it. 305 */ 306 if (pref == 0) 307 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1, 308 (ufs1_daddr_t *)0); 309 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 310 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 311 brelse(bp); 312 if (++reclaimed == 1) { 313 UFS_LOCK(ump); 314 softdep_request_cleanup(fs, vp, cred, 315 FLUSH_BLOCKS_WAIT); 316 UFS_UNLOCK(ump); 317 goto retry; 318 } 319 if (ppsratecheck(&lastfail, &curfail, 1)) { 320 ffs_fserr(fs, ip->i_number, "filesystem full"); 321 uprintf("\n%s: write failed, filesystem " 322 "is full\n", fs->fs_fsmnt); 323 } 324 goto fail; 325 } 326 pref = newb + fs->fs_frag; 327 nb = newb; 328 *allocblk++ = nb; 329 *lbns_remfree++ = indirs[i].in_lbn; 330 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 331 nbp->b_blkno = fsbtodb(fs, nb); 332 vfs_bio_clrbuf(nbp); 333 if (DOINGSOFTDEP(vp)) { 334 softdep_setup_allocindir_meta(nbp, ip, bp, 335 indirs[i - 1].in_off, nb); 336 bdwrite(nbp); 337 } else { 338 /* 339 * Write synchronously so that indirect blocks 340 * never point at garbage. 341 */ 342 if ((error = bwrite(nbp)) != 0) { 343 brelse(bp); 344 goto fail; 345 } 346 } 347 bap[indirs[i - 1].in_off] = nb; 348 if (allocib == NULL && unwindidx < 0) 349 unwindidx = i - 1; 350 /* 351 * If required, write synchronously, otherwise use 352 * delayed write. 353 */ 354 if (flags & IO_SYNC) { 355 bwrite(bp); 356 } else { 357 if (bp->b_bufsize == fs->fs_bsize) 358 bp->b_flags |= B_CLUSTEROK; 359 bdwrite(bp); 360 } 361 } 362 /* 363 * If asked only for the indirect block, then return it. 364 */ 365 if (flags & BA_METAONLY) { 366 curthread_pflags_restore(saved_inbdflush); 367 *bpp = bp; 368 return (0); 369 } 370 /* 371 * Get the data block, allocating if necessary. 372 */ 373 if (nb == 0) { 374 UFS_LOCK(ump); 375 /* 376 * If allocating metadata at the front of the cylinder 377 * group and parent indirect block has just been allocated, 378 * then cluster next to it if it is the first indirect in 379 * the file. Otherwise it has been allocated in the metadata 380 * area, so we want to find our own place out in the data area. 381 */ 382 if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0)) 383 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, 384 &bap[0]); 385 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 386 flags | IO_BUFLOCKED, cred, &newb); 387 if (error) { 388 brelse(bp); 389 if (++reclaimed == 1) { 390 UFS_LOCK(ump); 391 softdep_request_cleanup(fs, vp, cred, 392 FLUSH_BLOCKS_WAIT); 393 UFS_UNLOCK(ump); 394 goto retry; 395 } 396 if (ppsratecheck(&lastfail, &curfail, 1)) { 397 ffs_fserr(fs, ip->i_number, "filesystem full"); 398 uprintf("\n%s: write failed, filesystem " 399 "is full\n", fs->fs_fsmnt); 400 } 401 goto fail; 402 } 403 nb = newb; 404 *allocblk++ = nb; 405 *lbns_remfree++ = lbn; 406 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 407 nbp->b_blkno = fsbtodb(fs, nb); 408 if (flags & BA_CLRBUF) 409 vfs_bio_clrbuf(nbp); 410 if (DOINGSOFTDEP(vp)) 411 softdep_setup_allocindir_page(ip, lbn, bp, 412 indirs[i].in_off, nb, 0, nbp); 413 bap[indirs[i].in_off] = nb; 414 /* 415 * If required, write synchronously, otherwise use 416 * delayed write. 417 */ 418 if (flags & IO_SYNC) { 419 bwrite(bp); 420 } else { 421 if (bp->b_bufsize == fs->fs_bsize) 422 bp->b_flags |= B_CLUSTEROK; 423 bdwrite(bp); 424 } 425 curthread_pflags_restore(saved_inbdflush); 426 *bpp = nbp; 427 return (0); 428 } 429 brelse(bp); 430 if (flags & BA_CLRBUF) { 431 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 432 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 433 error = cluster_read(vp, ip->i_size, lbn, 434 (int)fs->fs_bsize, NOCRED, 435 MAXBSIZE, seqcount, gbflags, &nbp); 436 } else { 437 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED, 438 gbflags, &nbp); 439 } 440 if (error) { 441 brelse(nbp); 442 goto fail; 443 } 444 } else { 445 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 446 nbp->b_blkno = fsbtodb(fs, nb); 447 } 448 curthread_pflags_restore(saved_inbdflush); 449 *bpp = nbp; 450 return (0); 451 fail: 452 curthread_pflags_restore(saved_inbdflush); 453 /* 454 * If we have failed to allocate any blocks, simply return the error. 455 * This is the usual case and avoids the need to fsync the file. 456 */ 457 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 458 return (error); 459 /* 460 * If we have failed part way through block allocation, we 461 * have to deallocate any indirect blocks that we have allocated. 462 * We have to fsync the file before we start to get rid of all 463 * of its dependencies so that we do not leave them dangling. 464 * We have to sync it at the end so that the soft updates code 465 * does not find any untracked changes. Although this is really 466 * slow, running out of disk space is not expected to be a common 467 * occurrence. The error return from fsync is ignored as we already 468 * have an error to return to the user. 469 * 470 * XXX Still have to journal the free below 471 */ 472 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 473 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 474 blkp < allocblk; blkp++, lbns_remfree++) { 475 /* 476 * We shall not leave the freed blocks on the vnode 477 * buffer object lists. 478 */ 479 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 480 if (bp != NULL) { 481 bp->b_flags |= (B_INVAL | B_RELBUF); 482 bp->b_flags &= ~B_ASYNC; 483 brelse(bp); 484 } 485 deallocated += fs->fs_bsize; 486 } 487 if (allocib != NULL) { 488 *allocib = 0; 489 } else if (unwindidx >= 0) { 490 int r; 491 492 r = bread(vp, indirs[unwindidx].in_lbn, 493 (int)fs->fs_bsize, NOCRED, &bp); 494 if (r) { 495 panic("Could not unwind indirect block, error %d", r); 496 brelse(bp); 497 } else { 498 bap = (ufs1_daddr_t *)bp->b_data; 499 bap[indirs[unwindidx].in_off] = 0; 500 if (flags & IO_SYNC) { 501 bwrite(bp); 502 } else { 503 if (bp->b_bufsize == fs->fs_bsize) 504 bp->b_flags |= B_CLUSTEROK; 505 bdwrite(bp); 506 } 507 } 508 } 509 if (deallocated) { 510 #ifdef QUOTA 511 /* 512 * Restore user's disk quota because allocation failed. 513 */ 514 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 515 #endif 516 dp->di_blocks -= btodb(deallocated); 517 ip->i_flag |= IN_CHANGE | IN_UPDATE; 518 } 519 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 520 /* 521 * After the buffers are invalidated and on-disk pointers are 522 * cleared, free the blocks. 523 */ 524 for (blkp = allociblk; blkp < allocblk; blkp++) { 525 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 526 ip->i_number, vp->v_type, NULL); 527 } 528 return (error); 529 } 530 531 /* 532 * Balloc defines the structure of file system storage 533 * by allocating the physical blocks on a device given 534 * the inode and the logical block number in a file. 535 * This is the allocation strategy for UFS2. Above is 536 * the allocation strategy for UFS1. 537 */ 538 int 539 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 540 struct ucred *cred, int flags, struct buf **bpp) 541 { 542 struct inode *ip; 543 struct ufs2_dinode *dp; 544 ufs_lbn_t lbn, lastlbn; 545 struct fs *fs; 546 struct buf *bp, *nbp; 547 struct ufsmount *ump; 548 struct indir indirs[NIADDR + 2]; 549 ufs2_daddr_t nb, newb, *bap, pref; 550 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 551 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 552 int deallocated, osize, nsize, num, i, error; 553 int unwindidx = -1; 554 int saved_inbdflush; 555 static struct timeval lastfail; 556 static int curfail; 557 int gbflags, reclaimed; 558 559 ip = VTOI(vp); 560 dp = ip->i_din2; 561 fs = ip->i_fs; 562 ump = ip->i_ump; 563 lbn = lblkno(fs, startoffset); 564 size = blkoff(fs, startoffset) + size; 565 reclaimed = 0; 566 if (size > fs->fs_bsize) 567 panic("ffs_balloc_ufs2: blk too big"); 568 *bpp = NULL; 569 if (lbn < 0) 570 return (EFBIG); 571 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; 572 573 if (DOINGSOFTDEP(vp)) 574 softdep_prealloc(vp, MNT_WAIT); 575 576 /* 577 * Check for allocating external data. 578 */ 579 if (flags & IO_EXT) { 580 if (lbn >= NXADDR) 581 return (EFBIG); 582 /* 583 * If the next write will extend the data into a new block, 584 * and the data is currently composed of a fragment 585 * this fragment has to be extended to be a full block. 586 */ 587 lastlbn = lblkno(fs, dp->di_extsize); 588 if (lastlbn < lbn) { 589 nb = lastlbn; 590 osize = sblksize(fs, dp->di_extsize, nb); 591 if (osize < fs->fs_bsize && osize > 0) { 592 UFS_LOCK(ump); 593 error = ffs_realloccg(ip, -1 - nb, 594 dp->di_extb[nb], 595 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 596 &dp->di_extb[0]), osize, 597 (int)fs->fs_bsize, flags, cred, &bp); 598 if (error) 599 return (error); 600 if (DOINGSOFTDEP(vp)) 601 softdep_setup_allocext(ip, nb, 602 dbtofsb(fs, bp->b_blkno), 603 dp->di_extb[nb], 604 fs->fs_bsize, osize, bp); 605 dp->di_extsize = smalllblktosize(fs, nb + 1); 606 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 607 bp->b_xflags |= BX_ALTDATA; 608 ip->i_flag |= IN_CHANGE; 609 if (flags & IO_SYNC) 610 bwrite(bp); 611 else 612 bawrite(bp); 613 } 614 } 615 /* 616 * All blocks are direct blocks 617 */ 618 if (flags & BA_METAONLY) 619 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 620 nb = dp->di_extb[lbn]; 621 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 622 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED, 623 gbflags, &bp); 624 if (error) { 625 brelse(bp); 626 return (error); 627 } 628 bp->b_blkno = fsbtodb(fs, nb); 629 bp->b_xflags |= BX_ALTDATA; 630 *bpp = bp; 631 return (0); 632 } 633 if (nb != 0) { 634 /* 635 * Consider need to reallocate a fragment. 636 */ 637 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 638 nsize = fragroundup(fs, size); 639 if (nsize <= osize) { 640 error = bread_gb(vp, -1 - lbn, osize, NOCRED, 641 gbflags, &bp); 642 if (error) { 643 brelse(bp); 644 return (error); 645 } 646 bp->b_blkno = fsbtodb(fs, nb); 647 bp->b_xflags |= BX_ALTDATA; 648 } else { 649 UFS_LOCK(ump); 650 error = ffs_realloccg(ip, -1 - lbn, 651 dp->di_extb[lbn], 652 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 653 &dp->di_extb[0]), osize, nsize, flags, 654 cred, &bp); 655 if (error) 656 return (error); 657 bp->b_xflags |= BX_ALTDATA; 658 if (DOINGSOFTDEP(vp)) 659 softdep_setup_allocext(ip, lbn, 660 dbtofsb(fs, bp->b_blkno), nb, 661 nsize, osize, bp); 662 } 663 } else { 664 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 665 nsize = fragroundup(fs, size); 666 else 667 nsize = fs->fs_bsize; 668 UFS_LOCK(ump); 669 error = ffs_alloc(ip, lbn, 670 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 671 nsize, flags, cred, &newb); 672 if (error) 673 return (error); 674 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags); 675 bp->b_blkno = fsbtodb(fs, newb); 676 bp->b_xflags |= BX_ALTDATA; 677 if (flags & BA_CLRBUF) 678 vfs_bio_clrbuf(bp); 679 if (DOINGSOFTDEP(vp)) 680 softdep_setup_allocext(ip, lbn, newb, 0, 681 nsize, 0, bp); 682 } 683 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 684 ip->i_flag |= IN_CHANGE; 685 *bpp = bp; 686 return (0); 687 } 688 /* 689 * If the next write will extend the file into a new block, 690 * and the file is currently composed of a fragment 691 * this fragment has to be extended to be a full block. 692 */ 693 lastlbn = lblkno(fs, ip->i_size); 694 if (lastlbn < NDADDR && lastlbn < lbn) { 695 nb = lastlbn; 696 osize = blksize(fs, ip, nb); 697 if (osize < fs->fs_bsize && osize > 0) { 698 UFS_LOCK(ump); 699 error = ffs_realloccg(ip, nb, dp->di_db[nb], 700 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 701 &dp->di_db[0]), osize, (int)fs->fs_bsize, 702 flags, cred, &bp); 703 if (error) 704 return (error); 705 if (DOINGSOFTDEP(vp)) 706 softdep_setup_allocdirect(ip, nb, 707 dbtofsb(fs, bp->b_blkno), 708 dp->di_db[nb], 709 fs->fs_bsize, osize, bp); 710 ip->i_size = smalllblktosize(fs, nb + 1); 711 dp->di_size = ip->i_size; 712 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 713 ip->i_flag |= IN_CHANGE | IN_UPDATE; 714 if (flags & IO_SYNC) 715 bwrite(bp); 716 else 717 bawrite(bp); 718 } 719 } 720 /* 721 * The first NDADDR blocks are direct blocks 722 */ 723 if (lbn < NDADDR) { 724 if (flags & BA_METAONLY) 725 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 726 nb = dp->di_db[lbn]; 727 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 728 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED, 729 gbflags, &bp); 730 if (error) { 731 brelse(bp); 732 return (error); 733 } 734 bp->b_blkno = fsbtodb(fs, nb); 735 *bpp = bp; 736 return (0); 737 } 738 if (nb != 0) { 739 /* 740 * Consider need to reallocate a fragment. 741 */ 742 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 743 nsize = fragroundup(fs, size); 744 if (nsize <= osize) { 745 error = bread_gb(vp, lbn, osize, NOCRED, 746 gbflags, &bp); 747 if (error) { 748 brelse(bp); 749 return (error); 750 } 751 bp->b_blkno = fsbtodb(fs, nb); 752 } else { 753 UFS_LOCK(ump); 754 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 755 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 756 &dp->di_db[0]), osize, nsize, flags, 757 cred, &bp); 758 if (error) 759 return (error); 760 if (DOINGSOFTDEP(vp)) 761 softdep_setup_allocdirect(ip, lbn, 762 dbtofsb(fs, bp->b_blkno), nb, 763 nsize, osize, bp); 764 } 765 } else { 766 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 767 nsize = fragroundup(fs, size); 768 else 769 nsize = fs->fs_bsize; 770 UFS_LOCK(ump); 771 error = ffs_alloc(ip, lbn, 772 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 773 &dp->di_db[0]), nsize, flags, cred, &newb); 774 if (error) 775 return (error); 776 bp = getblk(vp, lbn, nsize, 0, 0, gbflags); 777 bp->b_blkno = fsbtodb(fs, newb); 778 if (flags & BA_CLRBUF) 779 vfs_bio_clrbuf(bp); 780 if (DOINGSOFTDEP(vp)) 781 softdep_setup_allocdirect(ip, lbn, newb, 0, 782 nsize, 0, bp); 783 } 784 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 785 ip->i_flag |= IN_CHANGE | IN_UPDATE; 786 *bpp = bp; 787 return (0); 788 } 789 /* 790 * Determine the number of levels of indirection. 791 */ 792 pref = 0; 793 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 794 return(error); 795 #ifdef INVARIANTS 796 if (num < 1) 797 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 798 #endif 799 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); 800 /* 801 * Fetch the first indirect block allocating if necessary. 802 */ 803 --num; 804 nb = dp->di_ib[indirs[0].in_off]; 805 allocib = NULL; 806 allocblk = allociblk; 807 lbns_remfree = lbns; 808 if (nb == 0) { 809 UFS_LOCK(ump); 810 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1, 811 (ufs2_daddr_t *)0); 812 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 813 flags, cred, &newb)) != 0) { 814 curthread_pflags_restore(saved_inbdflush); 815 return (error); 816 } 817 pref = newb + fs->fs_frag; 818 nb = newb; 819 *allocblk++ = nb; 820 *lbns_remfree++ = indirs[1].in_lbn; 821 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 822 GB_UNMAPPED); 823 bp->b_blkno = fsbtodb(fs, nb); 824 vfs_bio_clrbuf(bp); 825 if (DOINGSOFTDEP(vp)) { 826 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 827 newb, 0, fs->fs_bsize, 0, bp); 828 bdwrite(bp); 829 } else { 830 /* 831 * Write synchronously so that indirect blocks 832 * never point at garbage. 833 */ 834 if (DOINGASYNC(vp)) 835 bdwrite(bp); 836 else if ((error = bwrite(bp)) != 0) 837 goto fail; 838 } 839 allocib = &dp->di_ib[indirs[0].in_off]; 840 *allocib = nb; 841 ip->i_flag |= IN_CHANGE | IN_UPDATE; 842 } 843 /* 844 * Fetch through the indirect blocks, allocating as necessary. 845 */ 846 retry: 847 for (i = 1;;) { 848 error = bread(vp, 849 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 850 if (error) { 851 brelse(bp); 852 goto fail; 853 } 854 bap = (ufs2_daddr_t *)bp->b_data; 855 nb = bap[indirs[i].in_off]; 856 if (i == num) 857 break; 858 i += 1; 859 if (nb != 0) { 860 bqrelse(bp); 861 continue; 862 } 863 UFS_LOCK(ump); 864 /* 865 * If parent indirect has just been allocated, try to cluster 866 * immediately following it. 867 */ 868 if (pref == 0) 869 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1, 870 (ufs2_daddr_t *)0); 871 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 872 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 873 brelse(bp); 874 if (++reclaimed == 1) { 875 UFS_LOCK(ump); 876 softdep_request_cleanup(fs, vp, cred, 877 FLUSH_BLOCKS_WAIT); 878 UFS_UNLOCK(ump); 879 goto retry; 880 } 881 if (ppsratecheck(&lastfail, &curfail, 1)) { 882 ffs_fserr(fs, ip->i_number, "filesystem full"); 883 uprintf("\n%s: write failed, filesystem " 884 "is full\n", fs->fs_fsmnt); 885 } 886 goto fail; 887 } 888 pref = newb + fs->fs_frag; 889 nb = newb; 890 *allocblk++ = nb; 891 *lbns_remfree++ = indirs[i].in_lbn; 892 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 893 GB_UNMAPPED); 894 nbp->b_blkno = fsbtodb(fs, nb); 895 vfs_bio_clrbuf(nbp); 896 if (DOINGSOFTDEP(vp)) { 897 softdep_setup_allocindir_meta(nbp, ip, bp, 898 indirs[i - 1].in_off, nb); 899 bdwrite(nbp); 900 } else { 901 /* 902 * Write synchronously so that indirect blocks 903 * never point at garbage. 904 */ 905 if ((error = bwrite(nbp)) != 0) { 906 brelse(bp); 907 goto fail; 908 } 909 } 910 bap[indirs[i - 1].in_off] = nb; 911 if (allocib == NULL && unwindidx < 0) 912 unwindidx = i - 1; 913 /* 914 * If required, write synchronously, otherwise use 915 * delayed write. 916 */ 917 if (flags & IO_SYNC) { 918 bwrite(bp); 919 } else { 920 if (bp->b_bufsize == fs->fs_bsize) 921 bp->b_flags |= B_CLUSTEROK; 922 bdwrite(bp); 923 } 924 } 925 /* 926 * If asked only for the indirect block, then return it. 927 */ 928 if (flags & BA_METAONLY) { 929 curthread_pflags_restore(saved_inbdflush); 930 *bpp = bp; 931 return (0); 932 } 933 /* 934 * Get the data block, allocating if necessary. 935 */ 936 if (nb == 0) { 937 UFS_LOCK(ump); 938 /* 939 * If allocating metadata at the front of the cylinder 940 * group and parent indirect block has just been allocated, 941 * then cluster next to it if it is the first indirect in 942 * the file. Otherwise it has been allocated in the metadata 943 * area, so we want to find our own place out in the data area. 944 */ 945 if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0)) 946 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, 947 &bap[0]); 948 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 949 flags | IO_BUFLOCKED, cred, &newb); 950 if (error) { 951 brelse(bp); 952 if (++reclaimed == 1) { 953 UFS_LOCK(ump); 954 softdep_request_cleanup(fs, vp, cred, 955 FLUSH_BLOCKS_WAIT); 956 UFS_UNLOCK(ump); 957 goto retry; 958 } 959 if (ppsratecheck(&lastfail, &curfail, 1)) { 960 ffs_fserr(fs, ip->i_number, "filesystem full"); 961 uprintf("\n%s: write failed, filesystem " 962 "is full\n", fs->fs_fsmnt); 963 } 964 goto fail; 965 } 966 nb = newb; 967 *allocblk++ = nb; 968 *lbns_remfree++ = lbn; 969 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 970 nbp->b_blkno = fsbtodb(fs, nb); 971 if (flags & BA_CLRBUF) 972 vfs_bio_clrbuf(nbp); 973 if (DOINGSOFTDEP(vp)) 974 softdep_setup_allocindir_page(ip, lbn, bp, 975 indirs[i].in_off, nb, 0, nbp); 976 bap[indirs[i].in_off] = nb; 977 /* 978 * If required, write synchronously, otherwise use 979 * delayed write. 980 */ 981 if (flags & IO_SYNC) { 982 bwrite(bp); 983 } else { 984 if (bp->b_bufsize == fs->fs_bsize) 985 bp->b_flags |= B_CLUSTEROK; 986 bdwrite(bp); 987 } 988 curthread_pflags_restore(saved_inbdflush); 989 *bpp = nbp; 990 return (0); 991 } 992 brelse(bp); 993 /* 994 * If requested clear invalid portions of the buffer. If we 995 * have to do a read-before-write (typical if BA_CLRBUF is set), 996 * try to do some read-ahead in the sequential case to reduce 997 * the number of I/O transactions. 998 */ 999 if (flags & BA_CLRBUF) { 1000 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 1001 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 1002 error = cluster_read(vp, ip->i_size, lbn, 1003 (int)fs->fs_bsize, NOCRED, 1004 MAXBSIZE, seqcount, gbflags, &nbp); 1005 } else { 1006 error = bread_gb(vp, lbn, (int)fs->fs_bsize, 1007 NOCRED, gbflags, &nbp); 1008 } 1009 if (error) { 1010 brelse(nbp); 1011 goto fail; 1012 } 1013 } else { 1014 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 1015 nbp->b_blkno = fsbtodb(fs, nb); 1016 } 1017 curthread_pflags_restore(saved_inbdflush); 1018 *bpp = nbp; 1019 return (0); 1020 fail: 1021 curthread_pflags_restore(saved_inbdflush); 1022 /* 1023 * If we have failed to allocate any blocks, simply return the error. 1024 * This is the usual case and avoids the need to fsync the file. 1025 */ 1026 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 1027 return (error); 1028 /* 1029 * If we have failed part way through block allocation, we 1030 * have to deallocate any indirect blocks that we have allocated. 1031 * We have to fsync the file before we start to get rid of all 1032 * of its dependencies so that we do not leave them dangling. 1033 * We have to sync it at the end so that the soft updates code 1034 * does not find any untracked changes. Although this is really 1035 * slow, running out of disk space is not expected to be a common 1036 * occurrence. The error return from fsync is ignored as we already 1037 * have an error to return to the user. 1038 * 1039 * XXX Still have to journal the free below 1040 */ 1041 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 1042 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 1043 blkp < allocblk; blkp++, lbns_remfree++) { 1044 /* 1045 * We shall not leave the freed blocks on the vnode 1046 * buffer object lists. 1047 */ 1048 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 1049 if (bp != NULL) { 1050 bp->b_flags |= (B_INVAL | B_RELBUF); 1051 bp->b_flags &= ~B_ASYNC; 1052 brelse(bp); 1053 } 1054 deallocated += fs->fs_bsize; 1055 } 1056 if (allocib != NULL) { 1057 *allocib = 0; 1058 } else if (unwindidx >= 0) { 1059 int r; 1060 1061 r = bread(vp, indirs[unwindidx].in_lbn, 1062 (int)fs->fs_bsize, NOCRED, &bp); 1063 if (r) { 1064 panic("Could not unwind indirect block, error %d", r); 1065 brelse(bp); 1066 } else { 1067 bap = (ufs2_daddr_t *)bp->b_data; 1068 bap[indirs[unwindidx].in_off] = 0; 1069 if (flags & IO_SYNC) { 1070 bwrite(bp); 1071 } else { 1072 if (bp->b_bufsize == fs->fs_bsize) 1073 bp->b_flags |= B_CLUSTEROK; 1074 bdwrite(bp); 1075 } 1076 } 1077 } 1078 if (deallocated) { 1079 #ifdef QUOTA 1080 /* 1081 * Restore user's disk quota because allocation failed. 1082 */ 1083 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 1084 #endif 1085 dp->di_blocks -= btodb(deallocated); 1086 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1087 } 1088 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 1089 /* 1090 * After the buffers are invalidated and on-disk pointers are 1091 * cleared, free the blocks. 1092 */ 1093 for (blkp = allociblk; blkp < allocblk; blkp++) { 1094 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 1095 ip->i_number, vp->v_type, NULL); 1096 } 1097 return (error); 1098 } 1099