1 /*- 2 * Copyright (c) 2002 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * Copyright (c) 1982, 1986, 1989, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 4. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/bio.h> 68 #include <sys/buf.h> 69 #include <sys/lock.h> 70 #include <sys/mount.h> 71 #include <sys/vnode.h> 72 73 #include <ufs/ufs/quota.h> 74 #include <ufs/ufs/inode.h> 75 #include <ufs/ufs/ufs_extern.h> 76 #include <ufs/ufs/extattr.h> 77 #include <ufs/ufs/ufsmount.h> 78 79 #include <ufs/ffs/fs.h> 80 #include <ufs/ffs/ffs_extern.h> 81 82 /* 83 * Balloc defines the structure of filesystem storage 84 * by allocating the physical blocks on a device given 85 * the inode and the logical block number in a file. 86 * This is the allocation strategy for UFS1. Below is 87 * the allocation strategy for UFS2. 88 */ 89 int 90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, 91 struct ucred *cred, int flags, struct buf **bpp) 92 { 93 struct inode *ip; 94 struct ufs1_dinode *dp; 95 ufs_lbn_t lbn, lastlbn; 96 struct fs *fs; 97 ufs1_daddr_t nb; 98 struct buf *bp, *nbp; 99 struct ufsmount *ump; 100 struct indir indirs[NIADDR + 2]; 101 int deallocated, osize, nsize, num, i, error; 102 ufs2_daddr_t newb; 103 ufs1_daddr_t *bap, pref; 104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 105 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 106 int unwindidx = -1; 107 int saved_inbdflush; 108 static struct timeval lastfail; 109 static int curfail; 110 int gbflags, reclaimed; 111 112 ip = VTOI(vp); 113 dp = ip->i_din1; 114 fs = ip->i_fs; 115 ump = ip->i_ump; 116 lbn = lblkno(fs, startoffset); 117 size = blkoff(fs, startoffset) + size; 118 reclaimed = 0; 119 if (size > fs->fs_bsize) 120 panic("ffs_balloc_ufs1: blk too big"); 121 *bpp = NULL; 122 if (flags & IO_EXT) 123 return (EOPNOTSUPP); 124 if (lbn < 0) 125 return (EFBIG); 126 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; 127 128 if (DOINGSOFTDEP(vp)) 129 softdep_prealloc(vp, MNT_WAIT); 130 /* 131 * If the next write will extend the file into a new block, 132 * and the file is currently composed of a fragment 133 * this fragment has to be extended to be a full block. 134 */ 135 lastlbn = lblkno(fs, ip->i_size); 136 if (lastlbn < NDADDR && lastlbn < lbn) { 137 nb = lastlbn; 138 osize = blksize(fs, ip, nb); 139 if (osize < fs->fs_bsize && osize > 0) { 140 UFS_LOCK(ump); 141 error = ffs_realloccg(ip, nb, dp->di_db[nb], 142 ffs_blkpref_ufs1(ip, lastlbn, (int)nb, 143 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, 144 cred, &bp); 145 if (error) 146 return (error); 147 if (DOINGSOFTDEP(vp)) 148 softdep_setup_allocdirect(ip, nb, 149 dbtofsb(fs, bp->b_blkno), dp->di_db[nb], 150 fs->fs_bsize, osize, bp); 151 ip->i_size = smalllblktosize(fs, nb + 1); 152 dp->di_size = ip->i_size; 153 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 154 ip->i_flag |= IN_CHANGE | IN_UPDATE; 155 if (flags & IO_SYNC) 156 bwrite(bp); 157 else 158 bawrite(bp); 159 } 160 } 161 /* 162 * The first NDADDR blocks are direct blocks 163 */ 164 if (lbn < NDADDR) { 165 if (flags & BA_METAONLY) 166 panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); 167 nb = dp->di_db[lbn]; 168 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 169 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); 170 if (error) { 171 brelse(bp); 172 return (error); 173 } 174 bp->b_blkno = fsbtodb(fs, nb); 175 *bpp = bp; 176 return (0); 177 } 178 if (nb != 0) { 179 /* 180 * Consider need to reallocate a fragment. 181 */ 182 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 183 nsize = fragroundup(fs, size); 184 if (nsize <= osize) { 185 error = bread(vp, lbn, osize, NOCRED, &bp); 186 if (error) { 187 brelse(bp); 188 return (error); 189 } 190 bp->b_blkno = fsbtodb(fs, nb); 191 } else { 192 UFS_LOCK(ump); 193 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 194 ffs_blkpref_ufs1(ip, lbn, (int)lbn, 195 &dp->di_db[0]), osize, nsize, flags, 196 cred, &bp); 197 if (error) 198 return (error); 199 if (DOINGSOFTDEP(vp)) 200 softdep_setup_allocdirect(ip, lbn, 201 dbtofsb(fs, bp->b_blkno), nb, 202 nsize, osize, bp); 203 } 204 } else { 205 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 206 nsize = fragroundup(fs, size); 207 else 208 nsize = fs->fs_bsize; 209 UFS_LOCK(ump); 210 error = ffs_alloc(ip, lbn, 211 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), 212 nsize, flags, cred, &newb); 213 if (error) 214 return (error); 215 bp = getblk(vp, lbn, nsize, 0, 0, gbflags); 216 bp->b_blkno = fsbtodb(fs, newb); 217 if (flags & BA_CLRBUF) 218 vfs_bio_clrbuf(bp); 219 if (DOINGSOFTDEP(vp)) 220 softdep_setup_allocdirect(ip, lbn, newb, 0, 221 nsize, 0, bp); 222 } 223 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 224 ip->i_flag |= IN_CHANGE | IN_UPDATE; 225 *bpp = bp; 226 return (0); 227 } 228 /* 229 * Determine the number of levels of indirection. 230 */ 231 pref = 0; 232 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 233 return(error); 234 #ifdef INVARIANTS 235 if (num < 1) 236 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); 237 #endif 238 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); 239 /* 240 * Fetch the first indirect block allocating if necessary. 241 */ 242 --num; 243 nb = dp->di_ib[indirs[0].in_off]; 244 allocib = NULL; 245 allocblk = allociblk; 246 lbns_remfree = lbns; 247 if (nb == 0) { 248 UFS_LOCK(ump); 249 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1, 250 (ufs1_daddr_t *)0); 251 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 252 flags, cred, &newb)) != 0) { 253 curthread_pflags_restore(saved_inbdflush); 254 return (error); 255 } 256 pref = newb + fs->fs_frag; 257 nb = newb; 258 *allocblk++ = nb; 259 *lbns_remfree++ = indirs[1].in_lbn; 260 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags); 261 bp->b_blkno = fsbtodb(fs, nb); 262 vfs_bio_clrbuf(bp); 263 if (DOINGSOFTDEP(vp)) { 264 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 265 newb, 0, fs->fs_bsize, 0, bp); 266 bdwrite(bp); 267 } else { 268 /* 269 * Write synchronously so that indirect blocks 270 * never point at garbage. 271 */ 272 if (DOINGASYNC(vp)) 273 bdwrite(bp); 274 else if ((error = bwrite(bp)) != 0) 275 goto fail; 276 } 277 allocib = &dp->di_ib[indirs[0].in_off]; 278 *allocib = nb; 279 ip->i_flag |= IN_CHANGE | IN_UPDATE; 280 } 281 /* 282 * Fetch through the indirect blocks, allocating as necessary. 283 */ 284 retry: 285 for (i = 1;;) { 286 error = bread(vp, 287 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 288 if (error) { 289 brelse(bp); 290 goto fail; 291 } 292 bap = (ufs1_daddr_t *)bp->b_data; 293 nb = bap[indirs[i].in_off]; 294 if (i == num) 295 break; 296 i += 1; 297 if (nb != 0) { 298 bqrelse(bp); 299 continue; 300 } 301 UFS_LOCK(ump); 302 /* 303 * If parent indirect has just been allocated, try to cluster 304 * immediately following it. 305 */ 306 if (pref == 0) 307 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1, 308 (ufs1_daddr_t *)0); 309 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 310 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 311 brelse(bp); 312 if (++reclaimed == 1) { 313 UFS_LOCK(ump); 314 softdep_request_cleanup(fs, vp, cred, 315 FLUSH_BLOCKS_WAIT); 316 UFS_UNLOCK(ump); 317 goto retry; 318 } 319 if (ppsratecheck(&lastfail, &curfail, 1)) { 320 ffs_fserr(fs, ip->i_number, "filesystem full"); 321 uprintf("\n%s: write failed, filesystem " 322 "is full\n", fs->fs_fsmnt); 323 } 324 goto fail; 325 } 326 pref = newb + fs->fs_frag; 327 nb = newb; 328 *allocblk++ = nb; 329 *lbns_remfree++ = indirs[i].in_lbn; 330 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); 331 nbp->b_blkno = fsbtodb(fs, nb); 332 vfs_bio_clrbuf(nbp); 333 if (DOINGSOFTDEP(vp)) { 334 softdep_setup_allocindir_meta(nbp, ip, bp, 335 indirs[i - 1].in_off, nb); 336 bdwrite(nbp); 337 } else { 338 /* 339 * Write synchronously so that indirect blocks 340 * never point at garbage. 341 */ 342 if ((error = bwrite(nbp)) != 0) { 343 brelse(bp); 344 goto fail; 345 } 346 } 347 bap[indirs[i - 1].in_off] = nb; 348 if (allocib == NULL && unwindidx < 0) 349 unwindidx = i - 1; 350 /* 351 * If required, write synchronously, otherwise use 352 * delayed write. 353 */ 354 if (flags & IO_SYNC) { 355 bwrite(bp); 356 } else { 357 if (bp->b_bufsize == fs->fs_bsize) 358 bp->b_flags |= B_CLUSTEROK; 359 bdwrite(bp); 360 } 361 } 362 /* 363 * If asked only for the indirect block, then return it. 364 */ 365 if (flags & BA_METAONLY) { 366 curthread_pflags_restore(saved_inbdflush); 367 *bpp = bp; 368 return (0); 369 } 370 /* 371 * Get the data block, allocating if necessary. 372 */ 373 if (nb == 0) { 374 UFS_LOCK(ump); 375 /* 376 * If allocating metadata at the front of the cylinder 377 * group and parent indirect block has just been allocated, 378 * then cluster next to it if it is the first indirect in 379 * the file. Otherwise it has been allocated in the metadata 380 * area, so we want to find our own place out in the data area. 381 */ 382 if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0)) 383 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, 384 &bap[0]); 385 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 386 flags | IO_BUFLOCKED, cred, &newb); 387 if (error) { 388 brelse(bp); 389 if (++reclaimed == 1) { 390 UFS_LOCK(ump); 391 softdep_request_cleanup(fs, vp, cred, 392 FLUSH_BLOCKS_WAIT); 393 UFS_UNLOCK(ump); 394 goto retry; 395 } 396 if (ppsratecheck(&lastfail, &curfail, 1)) { 397 ffs_fserr(fs, ip->i_number, "filesystem full"); 398 uprintf("\n%s: write failed, filesystem " 399 "is full\n", fs->fs_fsmnt); 400 } 401 goto fail; 402 } 403 nb = newb; 404 *allocblk++ = nb; 405 *lbns_remfree++ = lbn; 406 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 407 nbp->b_blkno = fsbtodb(fs, nb); 408 if (flags & BA_CLRBUF) 409 vfs_bio_clrbuf(nbp); 410 if (DOINGSOFTDEP(vp)) 411 softdep_setup_allocindir_page(ip, lbn, bp, 412 indirs[i].in_off, nb, 0, nbp); 413 bap[indirs[i].in_off] = nb; 414 /* 415 * If required, write synchronously, otherwise use 416 * delayed write. 417 */ 418 if (flags & IO_SYNC) { 419 bwrite(bp); 420 } else { 421 if (bp->b_bufsize == fs->fs_bsize) 422 bp->b_flags |= B_CLUSTEROK; 423 bdwrite(bp); 424 } 425 curthread_pflags_restore(saved_inbdflush); 426 *bpp = nbp; 427 return (0); 428 } 429 brelse(bp); 430 if (flags & BA_CLRBUF) { 431 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 432 if (seqcount != 0 && 433 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 && 434 !(vm_page_count_severe() || buf_dirty_count_severe())) { 435 error = cluster_read(vp, ip->i_size, lbn, 436 (int)fs->fs_bsize, NOCRED, 437 MAXBSIZE, seqcount, gbflags, &nbp); 438 } else { 439 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED, 440 gbflags, &nbp); 441 } 442 if (error) { 443 brelse(nbp); 444 goto fail; 445 } 446 } else { 447 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 448 nbp->b_blkno = fsbtodb(fs, nb); 449 } 450 curthread_pflags_restore(saved_inbdflush); 451 *bpp = nbp; 452 return (0); 453 fail: 454 curthread_pflags_restore(saved_inbdflush); 455 /* 456 * If we have failed to allocate any blocks, simply return the error. 457 * This is the usual case and avoids the need to fsync the file. 458 */ 459 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 460 return (error); 461 /* 462 * If we have failed part way through block allocation, we 463 * have to deallocate any indirect blocks that we have allocated. 464 * We have to fsync the file before we start to get rid of all 465 * of its dependencies so that we do not leave them dangling. 466 * We have to sync it at the end so that the soft updates code 467 * does not find any untracked changes. Although this is really 468 * slow, running out of disk space is not expected to be a common 469 * occurrence. The error return from fsync is ignored as we already 470 * have an error to return to the user. 471 * 472 * XXX Still have to journal the free below 473 */ 474 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 475 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 476 blkp < allocblk; blkp++, lbns_remfree++) { 477 /* 478 * We shall not leave the freed blocks on the vnode 479 * buffer object lists. 480 */ 481 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 482 if (bp != NULL) { 483 bp->b_flags |= (B_INVAL | B_RELBUF); 484 bp->b_flags &= ~B_ASYNC; 485 brelse(bp); 486 } 487 deallocated += fs->fs_bsize; 488 } 489 if (allocib != NULL) { 490 *allocib = 0; 491 } else if (unwindidx >= 0) { 492 int r; 493 494 r = bread(vp, indirs[unwindidx].in_lbn, 495 (int)fs->fs_bsize, NOCRED, &bp); 496 if (r) { 497 panic("Could not unwind indirect block, error %d", r); 498 brelse(bp); 499 } else { 500 bap = (ufs1_daddr_t *)bp->b_data; 501 bap[indirs[unwindidx].in_off] = 0; 502 if (flags & IO_SYNC) { 503 bwrite(bp); 504 } else { 505 if (bp->b_bufsize == fs->fs_bsize) 506 bp->b_flags |= B_CLUSTEROK; 507 bdwrite(bp); 508 } 509 } 510 } 511 if (deallocated) { 512 #ifdef QUOTA 513 /* 514 * Restore user's disk quota because allocation failed. 515 */ 516 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 517 #endif 518 dp->di_blocks -= btodb(deallocated); 519 ip->i_flag |= IN_CHANGE | IN_UPDATE; 520 } 521 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 522 /* 523 * After the buffers are invalidated and on-disk pointers are 524 * cleared, free the blocks. 525 */ 526 for (blkp = allociblk; blkp < allocblk; blkp++) { 527 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 528 ip->i_number, vp->v_type, NULL); 529 } 530 return (error); 531 } 532 533 /* 534 * Balloc defines the structure of file system storage 535 * by allocating the physical blocks on a device given 536 * the inode and the logical block number in a file. 537 * This is the allocation strategy for UFS2. Above is 538 * the allocation strategy for UFS1. 539 */ 540 int 541 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, 542 struct ucred *cred, int flags, struct buf **bpp) 543 { 544 struct inode *ip; 545 struct ufs2_dinode *dp; 546 ufs_lbn_t lbn, lastlbn; 547 struct fs *fs; 548 struct buf *bp, *nbp; 549 struct ufsmount *ump; 550 struct indir indirs[NIADDR + 2]; 551 ufs2_daddr_t nb, newb, *bap, pref; 552 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 553 ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; 554 int deallocated, osize, nsize, num, i, error; 555 int unwindidx = -1; 556 int saved_inbdflush; 557 static struct timeval lastfail; 558 static int curfail; 559 int gbflags, reclaimed; 560 561 ip = VTOI(vp); 562 dp = ip->i_din2; 563 fs = ip->i_fs; 564 ump = ip->i_ump; 565 lbn = lblkno(fs, startoffset); 566 size = blkoff(fs, startoffset) + size; 567 reclaimed = 0; 568 if (size > fs->fs_bsize) 569 panic("ffs_balloc_ufs2: blk too big"); 570 *bpp = NULL; 571 if (lbn < 0) 572 return (EFBIG); 573 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; 574 575 if (DOINGSOFTDEP(vp)) 576 softdep_prealloc(vp, MNT_WAIT); 577 578 /* 579 * Check for allocating external data. 580 */ 581 if (flags & IO_EXT) { 582 if (lbn >= NXADDR) 583 return (EFBIG); 584 /* 585 * If the next write will extend the data into a new block, 586 * and the data is currently composed of a fragment 587 * this fragment has to be extended to be a full block. 588 */ 589 lastlbn = lblkno(fs, dp->di_extsize); 590 if (lastlbn < lbn) { 591 nb = lastlbn; 592 osize = sblksize(fs, dp->di_extsize, nb); 593 if (osize < fs->fs_bsize && osize > 0) { 594 UFS_LOCK(ump); 595 error = ffs_realloccg(ip, -1 - nb, 596 dp->di_extb[nb], 597 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 598 &dp->di_extb[0]), osize, 599 (int)fs->fs_bsize, flags, cred, &bp); 600 if (error) 601 return (error); 602 if (DOINGSOFTDEP(vp)) 603 softdep_setup_allocext(ip, nb, 604 dbtofsb(fs, bp->b_blkno), 605 dp->di_extb[nb], 606 fs->fs_bsize, osize, bp); 607 dp->di_extsize = smalllblktosize(fs, nb + 1); 608 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); 609 bp->b_xflags |= BX_ALTDATA; 610 ip->i_flag |= IN_CHANGE; 611 if (flags & IO_SYNC) 612 bwrite(bp); 613 else 614 bawrite(bp); 615 } 616 } 617 /* 618 * All blocks are direct blocks 619 */ 620 if (flags & BA_METAONLY) 621 panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); 622 nb = dp->di_extb[lbn]; 623 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { 624 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED, 625 gbflags, &bp); 626 if (error) { 627 brelse(bp); 628 return (error); 629 } 630 bp->b_blkno = fsbtodb(fs, nb); 631 bp->b_xflags |= BX_ALTDATA; 632 *bpp = bp; 633 return (0); 634 } 635 if (nb != 0) { 636 /* 637 * Consider need to reallocate a fragment. 638 */ 639 osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); 640 nsize = fragroundup(fs, size); 641 if (nsize <= osize) { 642 error = bread_gb(vp, -1 - lbn, osize, NOCRED, 643 gbflags, &bp); 644 if (error) { 645 brelse(bp); 646 return (error); 647 } 648 bp->b_blkno = fsbtodb(fs, nb); 649 bp->b_xflags |= BX_ALTDATA; 650 } else { 651 UFS_LOCK(ump); 652 error = ffs_realloccg(ip, -1 - lbn, 653 dp->di_extb[lbn], 654 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 655 &dp->di_extb[0]), osize, nsize, flags, 656 cred, &bp); 657 if (error) 658 return (error); 659 bp->b_xflags |= BX_ALTDATA; 660 if (DOINGSOFTDEP(vp)) 661 softdep_setup_allocext(ip, lbn, 662 dbtofsb(fs, bp->b_blkno), nb, 663 nsize, osize, bp); 664 } 665 } else { 666 if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) 667 nsize = fragroundup(fs, size); 668 else 669 nsize = fs->fs_bsize; 670 UFS_LOCK(ump); 671 error = ffs_alloc(ip, lbn, 672 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), 673 nsize, flags, cred, &newb); 674 if (error) 675 return (error); 676 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags); 677 bp->b_blkno = fsbtodb(fs, newb); 678 bp->b_xflags |= BX_ALTDATA; 679 if (flags & BA_CLRBUF) 680 vfs_bio_clrbuf(bp); 681 if (DOINGSOFTDEP(vp)) 682 softdep_setup_allocext(ip, lbn, newb, 0, 683 nsize, 0, bp); 684 } 685 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); 686 ip->i_flag |= IN_CHANGE; 687 *bpp = bp; 688 return (0); 689 } 690 /* 691 * If the next write will extend the file into a new block, 692 * and the file is currently composed of a fragment 693 * this fragment has to be extended to be a full block. 694 */ 695 lastlbn = lblkno(fs, ip->i_size); 696 if (lastlbn < NDADDR && lastlbn < lbn) { 697 nb = lastlbn; 698 osize = blksize(fs, ip, nb); 699 if (osize < fs->fs_bsize && osize > 0) { 700 UFS_LOCK(ump); 701 error = ffs_realloccg(ip, nb, dp->di_db[nb], 702 ffs_blkpref_ufs2(ip, lastlbn, (int)nb, 703 &dp->di_db[0]), osize, (int)fs->fs_bsize, 704 flags, cred, &bp); 705 if (error) 706 return (error); 707 if (DOINGSOFTDEP(vp)) 708 softdep_setup_allocdirect(ip, nb, 709 dbtofsb(fs, bp->b_blkno), 710 dp->di_db[nb], 711 fs->fs_bsize, osize, bp); 712 ip->i_size = smalllblktosize(fs, nb + 1); 713 dp->di_size = ip->i_size; 714 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); 715 ip->i_flag |= IN_CHANGE | IN_UPDATE; 716 if (flags & IO_SYNC) 717 bwrite(bp); 718 else 719 bawrite(bp); 720 } 721 } 722 /* 723 * The first NDADDR blocks are direct blocks 724 */ 725 if (lbn < NDADDR) { 726 if (flags & BA_METAONLY) 727 panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); 728 nb = dp->di_db[lbn]; 729 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 730 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED, 731 gbflags, &bp); 732 if (error) { 733 brelse(bp); 734 return (error); 735 } 736 bp->b_blkno = fsbtodb(fs, nb); 737 *bpp = bp; 738 return (0); 739 } 740 if (nb != 0) { 741 /* 742 * Consider need to reallocate a fragment. 743 */ 744 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 745 nsize = fragroundup(fs, size); 746 if (nsize <= osize) { 747 error = bread_gb(vp, lbn, osize, NOCRED, 748 gbflags, &bp); 749 if (error) { 750 brelse(bp); 751 return (error); 752 } 753 bp->b_blkno = fsbtodb(fs, nb); 754 } else { 755 UFS_LOCK(ump); 756 error = ffs_realloccg(ip, lbn, dp->di_db[lbn], 757 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 758 &dp->di_db[0]), osize, nsize, flags, 759 cred, &bp); 760 if (error) 761 return (error); 762 if (DOINGSOFTDEP(vp)) 763 softdep_setup_allocdirect(ip, lbn, 764 dbtofsb(fs, bp->b_blkno), nb, 765 nsize, osize, bp); 766 } 767 } else { 768 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 769 nsize = fragroundup(fs, size); 770 else 771 nsize = fs->fs_bsize; 772 UFS_LOCK(ump); 773 error = ffs_alloc(ip, lbn, 774 ffs_blkpref_ufs2(ip, lbn, (int)lbn, 775 &dp->di_db[0]), nsize, flags, cred, &newb); 776 if (error) 777 return (error); 778 bp = getblk(vp, lbn, nsize, 0, 0, gbflags); 779 bp->b_blkno = fsbtodb(fs, newb); 780 if (flags & BA_CLRBUF) 781 vfs_bio_clrbuf(bp); 782 if (DOINGSOFTDEP(vp)) 783 softdep_setup_allocdirect(ip, lbn, newb, 0, 784 nsize, 0, bp); 785 } 786 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); 787 ip->i_flag |= IN_CHANGE | IN_UPDATE; 788 *bpp = bp; 789 return (0); 790 } 791 /* 792 * Determine the number of levels of indirection. 793 */ 794 pref = 0; 795 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 796 return(error); 797 #ifdef INVARIANTS 798 if (num < 1) 799 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); 800 #endif 801 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); 802 /* 803 * Fetch the first indirect block allocating if necessary. 804 */ 805 --num; 806 nb = dp->di_ib[indirs[0].in_off]; 807 allocib = NULL; 808 allocblk = allociblk; 809 lbns_remfree = lbns; 810 if (nb == 0) { 811 UFS_LOCK(ump); 812 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1, 813 (ufs2_daddr_t *)0); 814 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 815 flags, cred, &newb)) != 0) { 816 curthread_pflags_restore(saved_inbdflush); 817 return (error); 818 } 819 pref = newb + fs->fs_frag; 820 nb = newb; 821 *allocblk++ = nb; 822 *lbns_remfree++ = indirs[1].in_lbn; 823 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 824 GB_UNMAPPED); 825 bp->b_blkno = fsbtodb(fs, nb); 826 vfs_bio_clrbuf(bp); 827 if (DOINGSOFTDEP(vp)) { 828 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 829 newb, 0, fs->fs_bsize, 0, bp); 830 bdwrite(bp); 831 } else { 832 /* 833 * Write synchronously so that indirect blocks 834 * never point at garbage. 835 */ 836 if (DOINGASYNC(vp)) 837 bdwrite(bp); 838 else if ((error = bwrite(bp)) != 0) 839 goto fail; 840 } 841 allocib = &dp->di_ib[indirs[0].in_off]; 842 *allocib = nb; 843 ip->i_flag |= IN_CHANGE | IN_UPDATE; 844 } 845 /* 846 * Fetch through the indirect blocks, allocating as necessary. 847 */ 848 retry: 849 for (i = 1;;) { 850 error = bread(vp, 851 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); 852 if (error) { 853 brelse(bp); 854 goto fail; 855 } 856 bap = (ufs2_daddr_t *)bp->b_data; 857 nb = bap[indirs[i].in_off]; 858 if (i == num) 859 break; 860 i += 1; 861 if (nb != 0) { 862 bqrelse(bp); 863 continue; 864 } 865 UFS_LOCK(ump); 866 /* 867 * If parent indirect has just been allocated, try to cluster 868 * immediately following it. 869 */ 870 if (pref == 0) 871 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1, 872 (ufs2_daddr_t *)0); 873 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 874 flags | IO_BUFLOCKED, cred, &newb)) != 0) { 875 brelse(bp); 876 if (++reclaimed == 1) { 877 UFS_LOCK(ump); 878 softdep_request_cleanup(fs, vp, cred, 879 FLUSH_BLOCKS_WAIT); 880 UFS_UNLOCK(ump); 881 goto retry; 882 } 883 if (ppsratecheck(&lastfail, &curfail, 1)) { 884 ffs_fserr(fs, ip->i_number, "filesystem full"); 885 uprintf("\n%s: write failed, filesystem " 886 "is full\n", fs->fs_fsmnt); 887 } 888 goto fail; 889 } 890 pref = newb + fs->fs_frag; 891 nb = newb; 892 *allocblk++ = nb; 893 *lbns_remfree++ = indirs[i].in_lbn; 894 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 895 GB_UNMAPPED); 896 nbp->b_blkno = fsbtodb(fs, nb); 897 vfs_bio_clrbuf(nbp); 898 if (DOINGSOFTDEP(vp)) { 899 softdep_setup_allocindir_meta(nbp, ip, bp, 900 indirs[i - 1].in_off, nb); 901 bdwrite(nbp); 902 } else { 903 /* 904 * Write synchronously so that indirect blocks 905 * never point at garbage. 906 */ 907 if ((error = bwrite(nbp)) != 0) { 908 brelse(bp); 909 goto fail; 910 } 911 } 912 bap[indirs[i - 1].in_off] = nb; 913 if (allocib == NULL && unwindidx < 0) 914 unwindidx = i - 1; 915 /* 916 * If required, write synchronously, otherwise use 917 * delayed write. 918 */ 919 if (flags & IO_SYNC) { 920 bwrite(bp); 921 } else { 922 if (bp->b_bufsize == fs->fs_bsize) 923 bp->b_flags |= B_CLUSTEROK; 924 bdwrite(bp); 925 } 926 } 927 /* 928 * If asked only for the indirect block, then return it. 929 */ 930 if (flags & BA_METAONLY) { 931 curthread_pflags_restore(saved_inbdflush); 932 *bpp = bp; 933 return (0); 934 } 935 /* 936 * Get the data block, allocating if necessary. 937 */ 938 if (nb == 0) { 939 UFS_LOCK(ump); 940 /* 941 * If allocating metadata at the front of the cylinder 942 * group and parent indirect block has just been allocated, 943 * then cluster next to it if it is the first indirect in 944 * the file. Otherwise it has been allocated in the metadata 945 * area, so we want to find our own place out in the data area. 946 */ 947 if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0)) 948 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, 949 &bap[0]); 950 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 951 flags | IO_BUFLOCKED, cred, &newb); 952 if (error) { 953 brelse(bp); 954 if (++reclaimed == 1) { 955 UFS_LOCK(ump); 956 softdep_request_cleanup(fs, vp, cred, 957 FLUSH_BLOCKS_WAIT); 958 UFS_UNLOCK(ump); 959 goto retry; 960 } 961 if (ppsratecheck(&lastfail, &curfail, 1)) { 962 ffs_fserr(fs, ip->i_number, "filesystem full"); 963 uprintf("\n%s: write failed, filesystem " 964 "is full\n", fs->fs_fsmnt); 965 } 966 goto fail; 967 } 968 nb = newb; 969 *allocblk++ = nb; 970 *lbns_remfree++ = lbn; 971 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 972 nbp->b_blkno = fsbtodb(fs, nb); 973 if (flags & BA_CLRBUF) 974 vfs_bio_clrbuf(nbp); 975 if (DOINGSOFTDEP(vp)) 976 softdep_setup_allocindir_page(ip, lbn, bp, 977 indirs[i].in_off, nb, 0, nbp); 978 bap[indirs[i].in_off] = nb; 979 /* 980 * If required, write synchronously, otherwise use 981 * delayed write. 982 */ 983 if (flags & IO_SYNC) { 984 bwrite(bp); 985 } else { 986 if (bp->b_bufsize == fs->fs_bsize) 987 bp->b_flags |= B_CLUSTEROK; 988 bdwrite(bp); 989 } 990 curthread_pflags_restore(saved_inbdflush); 991 *bpp = nbp; 992 return (0); 993 } 994 brelse(bp); 995 /* 996 * If requested clear invalid portions of the buffer. If we 997 * have to do a read-before-write (typical if BA_CLRBUF is set), 998 * try to do some read-ahead in the sequential case to reduce 999 * the number of I/O transactions. 1000 */ 1001 if (flags & BA_CLRBUF) { 1002 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; 1003 if (seqcount != 0 && 1004 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 && 1005 !(vm_page_count_severe() || buf_dirty_count_severe())) { 1006 error = cluster_read(vp, ip->i_size, lbn, 1007 (int)fs->fs_bsize, NOCRED, 1008 MAXBSIZE, seqcount, gbflags, &nbp); 1009 } else { 1010 error = bread_gb(vp, lbn, (int)fs->fs_bsize, 1011 NOCRED, gbflags, &nbp); 1012 } 1013 if (error) { 1014 brelse(nbp); 1015 goto fail; 1016 } 1017 } else { 1018 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); 1019 nbp->b_blkno = fsbtodb(fs, nb); 1020 } 1021 curthread_pflags_restore(saved_inbdflush); 1022 *bpp = nbp; 1023 return (0); 1024 fail: 1025 curthread_pflags_restore(saved_inbdflush); 1026 /* 1027 * If we have failed to allocate any blocks, simply return the error. 1028 * This is the usual case and avoids the need to fsync the file. 1029 */ 1030 if (allocblk == allociblk && allocib == NULL && unwindidx == -1) 1031 return (error); 1032 /* 1033 * If we have failed part way through block allocation, we 1034 * have to deallocate any indirect blocks that we have allocated. 1035 * We have to fsync the file before we start to get rid of all 1036 * of its dependencies so that we do not leave them dangling. 1037 * We have to sync it at the end so that the soft updates code 1038 * does not find any untracked changes. Although this is really 1039 * slow, running out of disk space is not expected to be a common 1040 * occurrence. The error return from fsync is ignored as we already 1041 * have an error to return to the user. 1042 * 1043 * XXX Still have to journal the free below 1044 */ 1045 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 1046 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns; 1047 blkp < allocblk; blkp++, lbns_remfree++) { 1048 /* 1049 * We shall not leave the freed blocks on the vnode 1050 * buffer object lists. 1051 */ 1052 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT); 1053 if (bp != NULL) { 1054 bp->b_flags |= (B_INVAL | B_RELBUF); 1055 bp->b_flags &= ~B_ASYNC; 1056 brelse(bp); 1057 } 1058 deallocated += fs->fs_bsize; 1059 } 1060 if (allocib != NULL) { 1061 *allocib = 0; 1062 } else if (unwindidx >= 0) { 1063 int r; 1064 1065 r = bread(vp, indirs[unwindidx].in_lbn, 1066 (int)fs->fs_bsize, NOCRED, &bp); 1067 if (r) { 1068 panic("Could not unwind indirect block, error %d", r); 1069 brelse(bp); 1070 } else { 1071 bap = (ufs2_daddr_t *)bp->b_data; 1072 bap[indirs[unwindidx].in_off] = 0; 1073 if (flags & IO_SYNC) { 1074 bwrite(bp); 1075 } else { 1076 if (bp->b_bufsize == fs->fs_bsize) 1077 bp->b_flags |= B_CLUSTEROK; 1078 bdwrite(bp); 1079 } 1080 } 1081 } 1082 if (deallocated) { 1083 #ifdef QUOTA 1084 /* 1085 * Restore user's disk quota because allocation failed. 1086 */ 1087 (void) chkdq(ip, -btodb(deallocated), cred, FORCE); 1088 #endif 1089 dp->di_blocks -= btodb(deallocated); 1090 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1091 } 1092 (void) ffs_syncvnode(vp, MNT_WAIT, 0); 1093 /* 1094 * After the buffers are invalidated and on-disk pointers are 1095 * cleared, free the blocks. 1096 */ 1097 for (blkp = allociblk; blkp < allocblk; blkp++) { 1098 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, 1099 ip->i_number, vp->v_type, NULL); 1100 } 1101 return (error); 1102 } 1103