1 /*- 2 * Copyright (c) 1993 3 * The Regents of the University of California. All rights reserved. 4 * Modifications/enhancements: 5 * Copyright (c) 1995 John S. Dyson. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94 36 * $Id: vfs_cluster.c,v 1.47 1997/06/15 17:56:49 dyson Exp $ 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/buf.h> 43 #include <sys/vnode.h> 44 #include <sys/mount.h> 45 #include <sys/resourcevar.h> 46 #include <vm/vm.h> 47 #include <vm/vm_prot.h> 48 #include <vm/vm_object.h> 49 #include <vm/vm_page.h> 50 51 #if defined(CLUSTERDEBUG) 52 #include <sys/sysctl.h> 53 #include <sys/kernel.h> 54 static int rcluster= 0; 55 SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0, ""); 56 #endif 57 58 #ifdef notyet_block_reallocation_enabled 59 static struct cluster_save * 60 cluster_collectbufs __P((struct vnode *vp, struct buf *last_bp)); 61 #endif 62 static struct buf * 63 cluster_rbuild __P((struct vnode *vp, u_quad_t filesize, daddr_t lbn, 64 daddr_t blkno, long size, int run, struct buf *fbp)); 65 66 extern vm_page_t bogus_page; 67 68 /* 69 * Maximum number of blocks for read-ahead. 70 */ 71 #define MAXRA 32 72 73 /* 74 * This replaces bread. 75 */ 76 int 77 cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) 78 struct vnode *vp; 79 u_quad_t filesize; 80 daddr_t lblkno; 81 long size; 82 struct ucred *cred; 83 long totread; 84 int seqcount; 85 struct buf **bpp; 86 { 87 struct buf *bp, *rbp, *reqbp; 88 daddr_t blkno, rablkno, origblkno; 89 int error, num_ra; 90 int i; 91 int maxra, racluster; 92 long origtotread; 93 94 error = 0; 95 96 /* 97 * Try to limit the amount of read-ahead by a few 98 * ad-hoc parameters. This needs work!!! 99 */ 100 racluster = MAXPHYS/size; 101 maxra = 2 * racluster + (totread / size); 102 if (maxra > MAXRA) 103 maxra = MAXRA; 104 if (maxra > nbuf/8) 105 maxra = nbuf/8; 106 107 /* 108 * get the requested block 109 */ 110 *bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0); 111 origblkno = lblkno; 112 origtotread = totread; 113 114 /* 115 * if it is in the cache, then check to see if the reads have been 116 * sequential. If they have, then try some read-ahead, otherwise 117 * back-off on prospective read-aheads. 118 */ 119 if (bp->b_flags & B_CACHE) { 120 if (!seqcount) { 121 return 0; 122 } else if ((bp->b_flags & B_RAM) == 0) { 123 return 0; 124 } else { 125 int s; 126 struct buf *tbp; 127 bp->b_flags &= ~B_RAM; 128 /* 129 * We do the spl here so that there is no window 130 * between the incore and the b_usecount increment 131 * below. We opt to keep the spl out of the loop 132 * for efficiency. 133 */ 134 s = splbio(); 135 for(i=1;i<maxra;i++) { 136 137 if (!(tbp = incore(vp, lblkno+i))) { 138 break; 139 } 140 141 /* 142 * Set another read-ahead mark so we know to check 143 * again. 144 */ 145 if (((i % racluster) == (racluster - 1)) || 146 (i == (maxra - 1))) 147 tbp->b_flags |= B_RAM; 148 149 #if 0 150 if (tbp->b_usecount == 0) { 151 /* 152 * Make sure that the soon-to-be used readaheads 153 * are still there. The getblk/bqrelse pair will 154 * boost the priority of the buffer. 155 */ 156 tbp = getblk(vp, lblkno+i, size, 0, 0); 157 bqrelse(tbp); 158 } 159 #endif 160 } 161 splx(s); 162 if (i >= maxra) { 163 return 0; 164 } 165 lblkno += i; 166 } 167 reqbp = bp = NULL; 168 } else { 169 u_quad_t firstread; 170 firstread = (u_quad_t) lblkno * size; 171 if (firstread + totread > filesize) 172 totread = filesize - firstread; 173 if (totread > size) { 174 int nblks = 0; 175 int ncontigafter; 176 while (totread > 0) { 177 nblks++; 178 totread -= size; 179 } 180 if (nblks == 1) 181 goto single_block_read; 182 if (nblks > racluster) 183 nblks = racluster; 184 185 error = VOP_BMAP(vp, lblkno, NULL, 186 &blkno, &ncontigafter, NULL); 187 if (error) 188 goto single_block_read; 189 if (blkno == -1) 190 goto single_block_read; 191 if (ncontigafter == 0) 192 goto single_block_read; 193 if (ncontigafter + 1 < nblks) 194 nblks = ncontigafter + 1; 195 196 bp = cluster_rbuild(vp, filesize, lblkno, 197 blkno, size, nblks, bp); 198 lblkno += nblks; 199 } else { 200 single_block_read: 201 /* 202 * if it isn't in the cache, then get a chunk from 203 * disk if sequential, otherwise just get the block. 204 */ 205 bp->b_flags |= B_READ | B_RAM; 206 lblkno += 1; 207 } 208 } 209 210 /* 211 * if we have been doing sequential I/O, then do some read-ahead 212 */ 213 rbp = NULL; 214 /* if (seqcount && (lblkno < (origblkno + maxra))) { */ 215 if (seqcount && (lblkno < (origblkno + seqcount))) { 216 /* 217 * we now build the read-ahead buffer if it is desirable. 218 */ 219 if (((u_quad_t)(lblkno + 1) * size) <= filesize && 220 !(error = VOP_BMAP(vp, lblkno, NULL, &blkno, &num_ra, NULL)) && 221 blkno != -1) { 222 int nblksread; 223 int ntoread = num_ra + 1; 224 nblksread = (origtotread + size - 1) / size; 225 if (seqcount < nblksread) 226 seqcount = nblksread; 227 if (seqcount < ntoread) 228 ntoread = seqcount; 229 if (num_ra) { 230 rbp = cluster_rbuild(vp, filesize, lblkno, 231 blkno, size, ntoread, NULL); 232 } else { 233 rbp = getblk(vp, lblkno, size, 0, 0); 234 rbp->b_flags |= B_READ | B_ASYNC | B_RAM; 235 rbp->b_blkno = blkno; 236 } 237 } 238 } 239 240 /* 241 * handle the synchronous read 242 */ 243 if (bp) { 244 if (bp->b_flags & (B_DONE | B_DELWRI)) { 245 panic("cluster_read: DONE bp"); 246 } else { 247 #if defined(CLUSTERDEBUG) 248 if (rcluster) 249 printf("S(%d,%d,%d) ", 250 bp->b_lblkno, bp->b_bcount, seqcount); 251 #endif 252 if ((bp->b_flags & B_CLUSTER) == 0) 253 vfs_busy_pages(bp, 0); 254 error = VOP_STRATEGY(bp); 255 curproc->p_stats->p_ru.ru_inblock++; 256 } 257 } 258 /* 259 * and if we have read-aheads, do them too 260 */ 261 if (rbp) { 262 if (error) { 263 rbp->b_flags &= ~(B_ASYNC | B_READ); 264 brelse(rbp); 265 } else if (rbp->b_flags & B_CACHE) { 266 rbp->b_flags &= ~(B_ASYNC | B_READ); 267 bqrelse(rbp); 268 } else { 269 #if defined(CLUSTERDEBUG) 270 if (rcluster) { 271 if (bp) 272 printf("A+(%d,%d,%d,%d) ", 273 rbp->b_lblkno, rbp->b_bcount, 274 rbp->b_lblkno - origblkno, 275 seqcount); 276 else 277 printf("A(%d,%d,%d,%d) ", 278 rbp->b_lblkno, rbp->b_bcount, 279 rbp->b_lblkno - origblkno, 280 seqcount); 281 } 282 #endif 283 284 if ((rbp->b_flags & B_CLUSTER) == 0) 285 vfs_busy_pages(rbp, 0); 286 (void) VOP_STRATEGY(rbp); 287 curproc->p_stats->p_ru.ru_inblock++; 288 } 289 } 290 if (reqbp) 291 return (biowait(reqbp)); 292 else 293 return (error); 294 } 295 296 /* 297 * If blocks are contiguous on disk, use this to provide clustered 298 * read ahead. We will read as many blocks as possible sequentially 299 * and then parcel them up into logical blocks in the buffer hash table. 300 */ 301 static struct buf * 302 cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) 303 struct vnode *vp; 304 u_quad_t filesize; 305 daddr_t lbn; 306 daddr_t blkno; 307 long size; 308 int run; 309 struct buf *fbp; 310 { 311 struct buf *bp, *tbp; 312 daddr_t bn; 313 int i, inc, j; 314 315 #ifdef DIAGNOSTIC 316 if (size != vp->v_mount->mnt_stat.f_iosize) 317 panic("cluster_rbuild: size %d != filesize %d\n", 318 size, vp->v_mount->mnt_stat.f_iosize); 319 #endif 320 /* 321 * avoid a division 322 */ 323 while ((u_quad_t) size * (lbn + run) > filesize) { 324 --run; 325 } 326 327 if (fbp) { 328 tbp = fbp; 329 tbp->b_flags |= B_READ; 330 } else { 331 tbp = getblk(vp, lbn, size, 0, 0); 332 if (tbp->b_flags & B_CACHE) 333 return tbp; 334 tbp->b_flags |= B_ASYNC | B_READ | B_RAM; 335 } 336 337 tbp->b_blkno = blkno; 338 if( (tbp->b_flags & B_MALLOC) || 339 ((tbp->b_flags & B_VMIO) == 0) || (run <= 1) ) 340 return tbp; 341 342 bp = trypbuf(); 343 if (bp == 0) 344 return tbp; 345 346 (vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK; 347 bp->b_flags = B_ASYNC | B_READ | B_CALL | B_BUSY | B_CLUSTER | B_VMIO; 348 bp->b_iodone = cluster_callback; 349 bp->b_blkno = blkno; 350 bp->b_lblkno = lbn; 351 pbgetvp(vp, bp); 352 353 TAILQ_INIT(&bp->b_cluster.cluster_head); 354 355 bp->b_bcount = 0; 356 bp->b_bufsize = 0; 357 bp->b_npages = 0; 358 359 inc = btodb(size); 360 for (bn = blkno, i = 0; i < run; ++i, bn += inc) { 361 if (i != 0) { 362 if ((bp->b_npages * PAGE_SIZE) + 363 round_page(size) > MAXPHYS) 364 break; 365 366 if (incore(vp, lbn + i)) 367 break; 368 369 tbp = getblk(vp, lbn + i, size, 0, 0); 370 371 if ((tbp->b_flags & B_CACHE) || 372 (tbp->b_flags & B_VMIO) == 0) { 373 bqrelse(tbp); 374 break; 375 } 376 377 for (j=0;j<tbp->b_npages;j++) { 378 if (tbp->b_pages[j]->valid) { 379 break; 380 } 381 } 382 383 if (j != tbp->b_npages) { 384 /* 385 * force buffer to be re-constituted later 386 */ 387 tbp->b_flags |= B_RELBUF; 388 brelse(tbp); 389 break; 390 } 391 392 if ((fbp && (i == 1)) || (i == (run - 1))) 393 tbp->b_flags |= B_RAM; 394 tbp->b_flags |= B_READ | B_ASYNC; 395 if (tbp->b_blkno == tbp->b_lblkno) { 396 tbp->b_blkno = bn; 397 } else if (tbp->b_blkno != bn) { 398 brelse(tbp); 399 break; 400 } 401 } 402 TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, 403 tbp, b_cluster.cluster_entry); 404 for (j = 0; j < tbp->b_npages; j += 1) { 405 vm_page_t m; 406 m = tbp->b_pages[j]; 407 ++m->busy; 408 ++m->object->paging_in_progress; 409 if ((bp->b_npages == 0) || 410 (bp->b_pages[bp->b_npages-1] != m)) { 411 bp->b_pages[bp->b_npages] = m; 412 bp->b_npages++; 413 } 414 if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) 415 tbp->b_pages[j] = bogus_page; 416 } 417 bp->b_bcount += tbp->b_bcount; 418 bp->b_bufsize += tbp->b_bufsize; 419 } 420 421 for(j=0;j<bp->b_npages;j++) { 422 if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) == 423 VM_PAGE_BITS_ALL) 424 bp->b_pages[j] = bogus_page; 425 } 426 if (bp->b_bufsize > bp->b_kvasize) 427 panic("cluster_rbuild: b_bufsize(%d) > b_kvasize(%d)\n", 428 bp->b_bufsize, bp->b_kvasize); 429 bp->b_kvasize = bp->b_bufsize; 430 431 pmap_qenter(trunc_page((vm_offset_t) bp->b_data), 432 (vm_page_t *)bp->b_pages, bp->b_npages); 433 return (bp); 434 } 435 436 /* 437 * Cleanup after a clustered read or write. 438 * This is complicated by the fact that any of the buffers might have 439 * extra memory (if there were no empty buffer headers at allocbuf time) 440 * that we will need to shift around. 441 */ 442 void 443 cluster_callback(bp) 444 struct buf *bp; 445 { 446 struct buf *nbp, *tbp; 447 int error = 0; 448 449 /* 450 * Must propogate errors to all the components. 451 */ 452 if (bp->b_flags & B_ERROR) 453 error = bp->b_error; 454 455 pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); 456 /* 457 * Move memory from the large cluster buffer into the component 458 * buffers and mark IO as done on these. 459 */ 460 for (tbp = TAILQ_FIRST(&bp->b_cluster.cluster_head); 461 tbp; tbp = nbp) { 462 nbp = TAILQ_NEXT(&tbp->b_cluster, cluster_entry); 463 if (error) { 464 tbp->b_flags |= B_ERROR; 465 tbp->b_error = error; 466 } else 467 tbp->b_dirtyoff = tbp->b_dirtyend = 0; 468 biodone(tbp); 469 } 470 relpbuf(bp); 471 } 472 473 /* 474 * Do clustered write for FFS. 475 * 476 * Three cases: 477 * 1. Write is not sequential (write asynchronously) 478 * Write is sequential: 479 * 2. beginning of cluster - begin cluster 480 * 3. middle of a cluster - add to cluster 481 * 4. end of a cluster - asynchronously write cluster 482 */ 483 void 484 cluster_write(bp, filesize) 485 struct buf *bp; 486 u_quad_t filesize; 487 { 488 struct vnode *vp; 489 daddr_t lbn; 490 int maxclen, cursize; 491 int lblocksize; 492 int async; 493 494 vp = bp->b_vp; 495 async = vp->v_mount->mnt_flag & MNT_ASYNC; 496 lblocksize = vp->v_mount->mnt_stat.f_iosize; 497 lbn = bp->b_lblkno; 498 499 /* Initialize vnode to beginning of file. */ 500 if (lbn == 0) 501 vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; 502 503 if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 || 504 (bp->b_blkno != vp->v_lasta + btodb(lblocksize))) { 505 maxclen = MAXPHYS / lblocksize - 1; 506 if (vp->v_clen != 0) { 507 /* 508 * Next block is not sequential. 509 * 510 * If we are not writing at end of file, the process 511 * seeked to another point in the file since its last 512 * write, or we have reached our maximum cluster size, 513 * then push the previous cluster. Otherwise try 514 * reallocating to make it sequential. 515 */ 516 cursize = vp->v_lastw - vp->v_cstart + 1; 517 #ifndef notyet_block_reallocation_enabled 518 if (((u_quad_t)(lbn + 1) * lblocksize) != filesize || 519 lbn != vp->v_lastw + 1 || 520 vp->v_clen <= cursize) { 521 if (!async) 522 cluster_wbuild(vp, lblocksize, 523 vp->v_cstart, cursize); 524 } 525 #else 526 if ((lbn + 1) * lblocksize != filesize || 527 lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { 528 if (!async) 529 cluster_wbuild(vp, lblocksize, 530 vp->v_cstart, cursize); 531 } else { 532 struct buf **bpp, **endbp; 533 struct cluster_save *buflist; 534 535 buflist = cluster_collectbufs(vp, bp); 536 endbp = &buflist->bs_children 537 [buflist->bs_nchildren - 1]; 538 if (VOP_REALLOCBLKS(vp, buflist)) { 539 /* 540 * Failed, push the previous cluster. 541 */ 542 for (bpp = buflist->bs_children; 543 bpp < endbp; bpp++) 544 brelse(*bpp); 545 free(buflist, M_SEGMENT); 546 cluster_wbuild(vp, lblocksize, 547 vp->v_cstart, cursize); 548 } else { 549 /* 550 * Succeeded, keep building cluster. 551 */ 552 for (bpp = buflist->bs_children; 553 bpp <= endbp; bpp++) 554 bdwrite(*bpp); 555 free(buflist, M_SEGMENT); 556 vp->v_lastw = lbn; 557 vp->v_lasta = bp->b_blkno; 558 return; 559 } 560 } 561 #endif /* notyet_block_reallocation_enabled */ 562 } 563 /* 564 * Consider beginning a cluster. If at end of file, make 565 * cluster as large as possible, otherwise find size of 566 * existing cluster. 567 */ 568 if (((u_quad_t) (lbn + 1) * lblocksize) != filesize && 569 (bp->b_blkno == bp->b_lblkno) && 570 (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, NULL) || 571 bp->b_blkno == -1)) { 572 bawrite(bp); 573 vp->v_clen = 0; 574 vp->v_lasta = bp->b_blkno; 575 vp->v_cstart = lbn + 1; 576 vp->v_lastw = lbn; 577 return; 578 } 579 vp->v_clen = maxclen; 580 if (!async && maxclen == 0) { /* I/O not contiguous */ 581 vp->v_cstart = lbn + 1; 582 bawrite(bp); 583 } else { /* Wait for rest of cluster */ 584 vp->v_cstart = lbn; 585 bdwrite(bp); 586 } 587 } else if (lbn == vp->v_cstart + vp->v_clen) { 588 /* 589 * At end of cluster, write it out. 590 */ 591 bdwrite(bp); 592 cluster_wbuild(vp, lblocksize, vp->v_cstart, vp->v_clen + 1); 593 vp->v_clen = 0; 594 vp->v_cstart = lbn + 1; 595 } else 596 /* 597 * In the middle of a cluster, so just delay the I/O for now. 598 */ 599 bdwrite(bp); 600 vp->v_lastw = lbn; 601 vp->v_lasta = bp->b_blkno; 602 } 603 604 605 /* 606 * This is an awful lot like cluster_rbuild...wish they could be combined. 607 * The last lbn argument is the current block on which I/O is being 608 * performed. Check to see that it doesn't fall in the middle of 609 * the current block (if last_bp == NULL). 610 */ 611 int 612 cluster_wbuild(vp, size, start_lbn, len) 613 struct vnode *vp; 614 long size; 615 daddr_t start_lbn; 616 int len; 617 { 618 struct buf *bp, *tbp; 619 int i, j, s; 620 int totalwritten = 0; 621 int dbsize = btodb(size); 622 while (len > 0) { 623 s = splbio(); 624 if ( ((tbp = gbincore(vp, start_lbn)) == NULL) || 625 ((tbp->b_flags & (B_INVAL|B_BUSY|B_DELWRI)) != B_DELWRI)) { 626 ++start_lbn; 627 --len; 628 splx(s); 629 continue; 630 } 631 bremfree(tbp); 632 tbp->b_flags |= B_BUSY; 633 tbp->b_flags &= ~B_DONE; 634 splx(s); 635 636 /* 637 * Extra memory in the buffer, punt on this buffer. XXX we could 638 * handle this in most cases, but we would have to push the extra 639 * memory down to after our max possible cluster size and then 640 * potentially pull it back up if the cluster was terminated 641 * prematurely--too much hassle. 642 */ 643 if (((tbp->b_flags & (B_CLUSTEROK|B_MALLOC)) != B_CLUSTEROK) || 644 (tbp->b_bcount != tbp->b_bufsize) || 645 (tbp->b_bcount != size) || 646 len == 1) { 647 totalwritten += tbp->b_bufsize; 648 bawrite(tbp); 649 ++start_lbn; 650 --len; 651 continue; 652 } 653 654 bp = trypbuf(); 655 if (bp == NULL) { 656 totalwritten += tbp->b_bufsize; 657 bawrite(tbp); 658 ++start_lbn; 659 --len; 660 continue; 661 } 662 663 TAILQ_INIT(&bp->b_cluster.cluster_head); 664 bp->b_bcount = 0; 665 bp->b_bufsize = 0; 666 bp->b_npages = 0; 667 if (tbp->b_wcred != NOCRED) { 668 bp->b_wcred = tbp->b_wcred; 669 crhold(bp->b_wcred); 670 } 671 672 bp->b_blkno = tbp->b_blkno; 673 bp->b_lblkno = tbp->b_lblkno; 674 (vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK; 675 bp->b_flags |= B_CALL | B_BUSY | B_CLUSTER | (tbp->b_flags & (B_VMIO|B_NEEDCOMMIT)); 676 bp->b_iodone = cluster_callback; 677 pbgetvp(vp, bp); 678 679 for (i = 0; i < len; ++i, ++start_lbn) { 680 if (i != 0) { 681 s = splbio(); 682 if ((tbp = gbincore(vp, start_lbn)) == NULL) { 683 splx(s); 684 break; 685 } 686 687 if ((tbp->b_flags & (B_VMIO|B_CLUSTEROK|B_INVAL|B_BUSY|B_DELWRI|B_NEEDCOMMIT)) != (B_DELWRI|B_CLUSTEROK|(bp->b_flags & (B_VMIO|B_NEEDCOMMIT)))) { 688 splx(s); 689 break; 690 } 691 692 if (tbp->b_wcred != bp->b_wcred) { 693 splx(s); 694 break; 695 } 696 697 if ((tbp->b_bcount != size) || 698 ((bp->b_blkno + dbsize * i) != tbp->b_blkno) || 699 ((tbp->b_npages + bp->b_npages) > (MAXPHYS / PAGE_SIZE))) { 700 splx(s); 701 break; 702 } 703 bremfree(tbp); 704 tbp->b_flags |= B_BUSY; 705 tbp->b_flags &= ~B_DONE; 706 splx(s); 707 } 708 if (tbp->b_flags & B_VMIO) { 709 for (j = 0; j < tbp->b_npages; j += 1) { 710 vm_page_t m; 711 m = tbp->b_pages[j]; 712 ++m->busy; 713 ++m->object->paging_in_progress; 714 if ((bp->b_npages == 0) || 715 (bp->b_pages[bp->b_npages - 1] != m)) { 716 bp->b_pages[bp->b_npages] = m; 717 bp->b_npages++; 718 } 719 } 720 } 721 bp->b_bcount += size; 722 bp->b_bufsize += size; 723 724 --numdirtybuffers; 725 tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 726 tbp->b_flags |= B_ASYNC; 727 s = splbio(); 728 reassignbuf(tbp, tbp->b_vp); /* put on clean list */ 729 ++tbp->b_vp->v_numoutput; 730 splx(s); 731 TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, 732 tbp, b_cluster.cluster_entry); 733 } 734 pmap_qenter(trunc_page((vm_offset_t) bp->b_data), 735 (vm_page_t *) bp->b_pages, bp->b_npages); 736 if (bp->b_bufsize > bp->b_kvasize) 737 panic("cluster_wbuild: b_bufsize(%d) > b_kvasize(%d)\n", 738 bp->b_bufsize, bp->b_kvasize); 739 bp->b_kvasize = bp->b_bufsize; 740 totalwritten += bp->b_bufsize; 741 bp->b_dirtyoff = 0; 742 bp->b_dirtyend = bp->b_bufsize; 743 bawrite(bp); 744 745 len -= i; 746 } 747 return totalwritten; 748 } 749 750 #ifdef notyet_block_reallocation_enabled 751 /* 752 * Collect together all the buffers in a cluster. 753 * Plus add one additional buffer. 754 */ 755 static struct cluster_save * 756 cluster_collectbufs(vp, last_bp) 757 struct vnode *vp; 758 struct buf *last_bp; 759 { 760 struct cluster_save *buflist; 761 daddr_t lbn; 762 int i, len; 763 764 len = vp->v_lastw - vp->v_cstart + 1; 765 buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), 766 M_SEGMENT, M_WAITOK); 767 buflist->bs_nchildren = 0; 768 buflist->bs_children = (struct buf **) (buflist + 1); 769 for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) 770 (void) bread(vp, lbn, last_bp->b_bcount, NOCRED, 771 &buflist->bs_children[i]); 772 buflist->bs_children[i] = last_bp; 773 buflist->bs_nchildren = i + 1; 774 return (buflist); 775 } 776 #endif /* notyet_block_reallocation_enabled */ 777