1 /*- 2 * Copyright (c) 1993 3 * The Regents of the University of California. All rights reserved. 4 * Modifications/enhancements: 5 * Copyright (c) 1995 John S. Dyson. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94 36 * $Id: vfs_cluster.c,v 1.49 1997/11/07 08:53:05 phk Exp $ 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/buf.h> 43 #include <sys/vnode.h> 44 #include <sys/mount.h> 45 #include <sys/resourcevar.h> 46 #include <vm/vm.h> 47 #include <vm/vm_prot.h> 48 #include <vm/vm_object.h> 49 #include <vm/vm_page.h> 50 51 #if defined(CLUSTERDEBUG) 52 #include <sys/sysctl.h> 53 #include <sys/kernel.h> 54 static int rcluster= 0; 55 SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0, ""); 56 #endif 57 58 #ifdef notyet_block_reallocation_enabled 59 static struct cluster_save * 60 cluster_collectbufs __P((struct vnode *vp, struct buf *last_bp)); 61 #endif 62 static struct buf * 63 cluster_rbuild __P((struct vnode *vp, u_quad_t filesize, daddr_t lbn, 64 daddr_t blkno, long size, int run, struct buf *fbp)); 65 66 extern vm_page_t bogus_page; 67 68 /* 69 * Maximum number of blocks for read-ahead. 70 */ 71 #define MAXRA 32 72 73 /* 74 * This replaces bread. 75 */ 76 int 77 cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) 78 struct vnode *vp; 79 u_quad_t filesize; 80 daddr_t lblkno; 81 long size; 82 struct ucred *cred; 83 long totread; 84 int seqcount; 85 struct buf **bpp; 86 { 87 struct buf *bp, *rbp, *reqbp; 88 daddr_t blkno, origblkno; 89 int error, num_ra; 90 int i; 91 int maxra, racluster; 92 long origtotread; 93 94 error = 0; 95 96 /* 97 * Try to limit the amount of read-ahead by a few 98 * ad-hoc parameters. This needs work!!! 99 */ 100 racluster = MAXPHYS/size; 101 maxra = 2 * racluster + (totread / size); 102 if (maxra > MAXRA) 103 maxra = MAXRA; 104 if (maxra > nbuf/8) 105 maxra = nbuf/8; 106 107 /* 108 * get the requested block 109 */ 110 *bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0); 111 origblkno = lblkno; 112 origtotread = totread; 113 114 /* 115 * if it is in the cache, then check to see if the reads have been 116 * sequential. If they have, then try some read-ahead, otherwise 117 * back-off on prospective read-aheads. 118 */ 119 if (bp->b_flags & B_CACHE) { 120 if (!seqcount) { 121 return 0; 122 } else if ((bp->b_flags & B_RAM) == 0) { 123 return 0; 124 } else { 125 int s; 126 struct buf *tbp; 127 bp->b_flags &= ~B_RAM; 128 /* 129 * We do the spl here so that there is no window 130 * between the incore and the b_usecount increment 131 * below. We opt to keep the spl out of the loop 132 * for efficiency. 133 */ 134 s = splbio(); 135 for(i=1;i<maxra;i++) { 136 137 if (!(tbp = incore(vp, lblkno+i))) { 138 break; 139 } 140 141 /* 142 * Set another read-ahead mark so we know to check 143 * again. 144 */ 145 if (((i % racluster) == (racluster - 1)) || 146 (i == (maxra - 1))) 147 tbp->b_flags |= B_RAM; 148 149 #if 0 150 if (tbp->b_usecount == 0) { 151 /* 152 * Make sure that the soon-to-be used readaheads 153 * are still there. The getblk/bqrelse pair will 154 * boost the priority of the buffer. 155 */ 156 tbp = getblk(vp, lblkno+i, size, 0, 0); 157 bqrelse(tbp); 158 } 159 #endif 160 } 161 splx(s); 162 if (i >= maxra) { 163 return 0; 164 } 165 lblkno += i; 166 } 167 reqbp = bp = NULL; 168 } else { 169 u_quad_t firstread; 170 firstread = (u_quad_t) lblkno * size; 171 if (firstread + totread > filesize) 172 totread = filesize - firstread; 173 if (totread > size) { 174 int nblks = 0; 175 int ncontigafter; 176 while (totread > 0) { 177 nblks++; 178 totread -= size; 179 } 180 if (nblks == 1) 181 goto single_block_read; 182 if (nblks > racluster) 183 nblks = racluster; 184 185 error = VOP_BMAP(vp, lblkno, NULL, 186 &blkno, &ncontigafter, NULL); 187 if (error) 188 goto single_block_read; 189 if (blkno == -1) 190 goto single_block_read; 191 if (ncontigafter == 0) 192 goto single_block_read; 193 if (ncontigafter + 1 < nblks) 194 nblks = ncontigafter + 1; 195 196 bp = cluster_rbuild(vp, filesize, lblkno, 197 blkno, size, nblks, bp); 198 lblkno += nblks; 199 } else { 200 single_block_read: 201 /* 202 * if it isn't in the cache, then get a chunk from 203 * disk if sequential, otherwise just get the block. 204 */ 205 bp->b_flags |= B_READ | B_RAM; 206 lblkno += 1; 207 } 208 } 209 210 /* 211 * if we have been doing sequential I/O, then do some read-ahead 212 */ 213 rbp = NULL; 214 /* if (seqcount && (lblkno < (origblkno + maxra))) { */ 215 if (seqcount && (lblkno < (origblkno + seqcount))) { 216 /* 217 * we now build the read-ahead buffer if it is desirable. 218 */ 219 if (((u_quad_t)(lblkno + 1) * size) <= filesize && 220 !(error = VOP_BMAP(vp, lblkno, NULL, &blkno, &num_ra, NULL)) && 221 blkno != -1) { 222 int nblksread; 223 int ntoread = num_ra + 1; 224 nblksread = (origtotread + size - 1) / size; 225 if (seqcount < nblksread) 226 seqcount = nblksread; 227 if (seqcount < ntoread) 228 ntoread = seqcount; 229 if (num_ra) { 230 rbp = cluster_rbuild(vp, filesize, lblkno, 231 blkno, size, ntoread, NULL); 232 } else { 233 rbp = getblk(vp, lblkno, size, 0, 0); 234 rbp->b_flags |= B_READ | B_ASYNC | B_RAM; 235 rbp->b_blkno = blkno; 236 } 237 } 238 } 239 240 /* 241 * handle the synchronous read 242 */ 243 if (bp) { 244 if (bp->b_flags & (B_DONE | B_DELWRI)) { 245 panic("cluster_read: DONE bp"); 246 } else { 247 #if defined(CLUSTERDEBUG) 248 if (rcluster) 249 printf("S(%d,%d,%d) ", 250 bp->b_lblkno, bp->b_bcount, seqcount); 251 #endif 252 if ((bp->b_flags & B_CLUSTER) == 0) 253 vfs_busy_pages(bp, 0); 254 error = VOP_STRATEGY(bp); 255 curproc->p_stats->p_ru.ru_inblock++; 256 } 257 } 258 /* 259 * and if we have read-aheads, do them too 260 */ 261 if (rbp) { 262 if (error) { 263 rbp->b_flags &= ~(B_ASYNC | B_READ); 264 brelse(rbp); 265 } else if (rbp->b_flags & B_CACHE) { 266 rbp->b_flags &= ~(B_ASYNC | B_READ); 267 bqrelse(rbp); 268 } else { 269 #if defined(CLUSTERDEBUG) 270 if (rcluster) { 271 if (bp) 272 printf("A+(%d,%d,%d,%d) ", 273 rbp->b_lblkno, rbp->b_bcount, 274 rbp->b_lblkno - origblkno, 275 seqcount); 276 else 277 printf("A(%d,%d,%d,%d) ", 278 rbp->b_lblkno, rbp->b_bcount, 279 rbp->b_lblkno - origblkno, 280 seqcount); 281 } 282 #endif 283 284 if ((rbp->b_flags & B_CLUSTER) == 0) 285 vfs_busy_pages(rbp, 0); 286 (void) VOP_STRATEGY(rbp); 287 curproc->p_stats->p_ru.ru_inblock++; 288 } 289 } 290 if (reqbp) 291 return (biowait(reqbp)); 292 else 293 return (error); 294 } 295 296 /* 297 * If blocks are contiguous on disk, use this to provide clustered 298 * read ahead. We will read as many blocks as possible sequentially 299 * and then parcel them up into logical blocks in the buffer hash table. 300 */ 301 static struct buf * 302 cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) 303 struct vnode *vp; 304 u_quad_t filesize; 305 daddr_t lbn; 306 daddr_t blkno; 307 long size; 308 int run; 309 struct buf *fbp; 310 { 311 struct buf *bp, *tbp; 312 daddr_t bn; 313 int i, inc, j; 314 315 #ifdef DIAGNOSTIC 316 if (size != vp->v_mount->mnt_stat.f_iosize) 317 panic("cluster_rbuild: size %d != filesize %d\n", 318 size, vp->v_mount->mnt_stat.f_iosize); 319 #endif 320 /* 321 * avoid a division 322 */ 323 while ((u_quad_t) size * (lbn + run) > filesize) { 324 --run; 325 } 326 327 if (fbp) { 328 tbp = fbp; 329 tbp->b_flags |= B_READ; 330 } else { 331 tbp = getblk(vp, lbn, size, 0, 0); 332 if (tbp->b_flags & B_CACHE) 333 return tbp; 334 tbp->b_flags |= B_ASYNC | B_READ | B_RAM; 335 } 336 337 tbp->b_blkno = blkno; 338 if( (tbp->b_flags & B_MALLOC) || 339 ((tbp->b_flags & B_VMIO) == 0) || (run <= 1) ) 340 return tbp; 341 342 bp = trypbuf(); 343 if (bp == 0) 344 return tbp; 345 346 (vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK; 347 bp->b_flags = B_ASYNC | B_READ | B_CALL | B_BUSY | B_CLUSTER | B_VMIO; 348 bp->b_iodone = cluster_callback; 349 bp->b_blkno = blkno; 350 bp->b_lblkno = lbn; 351 pbgetvp(vp, bp); 352 353 TAILQ_INIT(&bp->b_cluster.cluster_head); 354 355 bp->b_bcount = 0; 356 bp->b_bufsize = 0; 357 bp->b_npages = 0; 358 359 inc = btodb(size); 360 for (bn = blkno, i = 0; i < run; ++i, bn += inc) { 361 if (i != 0) { 362 if ((bp->b_npages * PAGE_SIZE) + 363 round_page(size) > MAXPHYS) 364 break; 365 366 if (incore(vp, lbn + i)) 367 break; 368 369 tbp = getblk(vp, lbn + i, size, 0, 0); 370 371 if ((tbp->b_flags & B_CACHE) || 372 (tbp->b_flags & B_VMIO) == 0) { 373 bqrelse(tbp); 374 break; 375 } 376 377 for (j=0;j<tbp->b_npages;j++) { 378 if (tbp->b_pages[j]->valid) { 379 break; 380 } 381 } 382 383 if (j != tbp->b_npages) { 384 /* 385 * force buffer to be re-constituted later 386 */ 387 tbp->b_flags |= B_RELBUF; 388 brelse(tbp); 389 break; 390 } 391 392 if ((fbp && (i == 1)) || (i == (run - 1))) 393 tbp->b_flags |= B_RAM; 394 tbp->b_flags |= B_READ | B_ASYNC; 395 if (tbp->b_blkno == tbp->b_lblkno) { 396 tbp->b_blkno = bn; 397 } else if (tbp->b_blkno != bn) { 398 brelse(tbp); 399 break; 400 } 401 } 402 TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, 403 tbp, b_cluster.cluster_entry); 404 for (j = 0; j < tbp->b_npages; j += 1) { 405 vm_page_t m; 406 m = tbp->b_pages[j]; 407 ++m->busy; 408 ++m->object->paging_in_progress; 409 if ((bp->b_npages == 0) || 410 (bp->b_pages[bp->b_npages-1] != m)) { 411 bp->b_pages[bp->b_npages] = m; 412 bp->b_npages++; 413 } 414 if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) 415 tbp->b_pages[j] = bogus_page; 416 } 417 bp->b_bcount += tbp->b_bcount; 418 bp->b_bufsize += tbp->b_bufsize; 419 } 420 421 for(j=0;j<bp->b_npages;j++) { 422 if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) == 423 VM_PAGE_BITS_ALL) 424 bp->b_pages[j] = bogus_page; 425 } 426 if (bp->b_bufsize > bp->b_kvasize) 427 panic("cluster_rbuild: b_bufsize(%d) > b_kvasize(%d)\n", 428 bp->b_bufsize, bp->b_kvasize); 429 bp->b_kvasize = bp->b_bufsize; 430 431 pmap_qenter(trunc_page((vm_offset_t) bp->b_data), 432 (vm_page_t *)bp->b_pages, bp->b_npages); 433 return (bp); 434 } 435 436 /* 437 * Cleanup after a clustered read or write. 438 * This is complicated by the fact that any of the buffers might have 439 * extra memory (if there were no empty buffer headers at allocbuf time) 440 * that we will need to shift around. 441 */ 442 void 443 cluster_callback(bp) 444 struct buf *bp; 445 { 446 struct buf *nbp, *tbp; 447 int error = 0; 448 449 /* 450 * Must propogate errors to all the components. 451 */ 452 if (bp->b_flags & B_ERROR) 453 error = bp->b_error; 454 455 pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); 456 /* 457 * Move memory from the large cluster buffer into the component 458 * buffers and mark IO as done on these. 459 */ 460 for (tbp = TAILQ_FIRST(&bp->b_cluster.cluster_head); 461 tbp; tbp = nbp) { 462 nbp = TAILQ_NEXT(&tbp->b_cluster, cluster_entry); 463 if (error) { 464 tbp->b_flags |= B_ERROR; 465 tbp->b_error = error; 466 } else 467 tbp->b_dirtyoff = tbp->b_dirtyend = 0; 468 biodone(tbp); 469 } 470 relpbuf(bp); 471 } 472 473 /* 474 * Do clustered write for FFS. 475 * 476 * Three cases: 477 * 1. Write is not sequential (write asynchronously) 478 * Write is sequential: 479 * 2. beginning of cluster - begin cluster 480 * 3. middle of a cluster - add to cluster 481 * 4. end of a cluster - asynchronously write cluster 482 */ 483 void 484 cluster_write(bp, filesize) 485 struct buf *bp; 486 u_quad_t filesize; 487 { 488 struct vnode *vp; 489 daddr_t lbn; 490 int maxclen, cursize; 491 int lblocksize; 492 int async; 493 494 vp = bp->b_vp; 495 if (vp->v_type == VREG) { 496 async = vp->v_mount->mnt_flag & MNT_ASYNC; 497 lblocksize = vp->v_mount->mnt_stat.f_iosize; 498 } else { 499 async = 0; 500 lblocksize = bp->b_bufsize; 501 } 502 lbn = bp->b_lblkno; 503 504 /* Initialize vnode to beginning of file. */ 505 if (lbn == 0) 506 vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; 507 508 if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 || 509 (bp->b_blkno != vp->v_lasta + btodb(lblocksize))) { 510 maxclen = MAXPHYS / lblocksize - 1; 511 if (vp->v_clen != 0) { 512 /* 513 * Next block is not sequential. 514 * 515 * If we are not writing at end of file, the process 516 * seeked to another point in the file since its last 517 * write, or we have reached our maximum cluster size, 518 * then push the previous cluster. Otherwise try 519 * reallocating to make it sequential. 520 */ 521 cursize = vp->v_lastw - vp->v_cstart + 1; 522 #ifndef notyet_block_reallocation_enabled 523 if (((u_quad_t)(lbn + 1) * lblocksize) != filesize || 524 lbn != vp->v_lastw + 1 || 525 vp->v_clen <= cursize) { 526 if (!async) 527 cluster_wbuild(vp, lblocksize, 528 vp->v_cstart, cursize); 529 } 530 #else 531 if ((lbn + 1) * lblocksize != filesize || 532 lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { 533 if (!async) 534 cluster_wbuild(vp, lblocksize, 535 vp->v_cstart, cursize); 536 } else { 537 struct buf **bpp, **endbp; 538 struct cluster_save *buflist; 539 540 buflist = cluster_collectbufs(vp, bp); 541 endbp = &buflist->bs_children 542 [buflist->bs_nchildren - 1]; 543 if (VOP_REALLOCBLKS(vp, buflist)) { 544 /* 545 * Failed, push the previous cluster. 546 */ 547 for (bpp = buflist->bs_children; 548 bpp < endbp; bpp++) 549 brelse(*bpp); 550 free(buflist, M_SEGMENT); 551 cluster_wbuild(vp, lblocksize, 552 vp->v_cstart, cursize); 553 } else { 554 /* 555 * Succeeded, keep building cluster. 556 */ 557 for (bpp = buflist->bs_children; 558 bpp <= endbp; bpp++) 559 bdwrite(*bpp); 560 free(buflist, M_SEGMENT); 561 vp->v_lastw = lbn; 562 vp->v_lasta = bp->b_blkno; 563 return; 564 } 565 } 566 #endif /* notyet_block_reallocation_enabled */ 567 } 568 /* 569 * Consider beginning a cluster. If at end of file, make 570 * cluster as large as possible, otherwise find size of 571 * existing cluster. 572 */ 573 if ((vp->v_type == VREG) && 574 ((u_quad_t) (lbn + 1) * lblocksize) != filesize && 575 (bp->b_blkno == bp->b_lblkno) && 576 (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, NULL) || 577 bp->b_blkno == -1)) { 578 bawrite(bp); 579 vp->v_clen = 0; 580 vp->v_lasta = bp->b_blkno; 581 vp->v_cstart = lbn + 1; 582 vp->v_lastw = lbn; 583 return; 584 } 585 vp->v_clen = maxclen; 586 if (!async && maxclen == 0) { /* I/O not contiguous */ 587 vp->v_cstart = lbn + 1; 588 bawrite(bp); 589 } else { /* Wait for rest of cluster */ 590 vp->v_cstart = lbn; 591 bdwrite(bp); 592 } 593 } else if (lbn == vp->v_cstart + vp->v_clen) { 594 /* 595 * At end of cluster, write it out. 596 */ 597 bdwrite(bp); 598 cluster_wbuild(vp, lblocksize, vp->v_cstart, vp->v_clen + 1); 599 vp->v_clen = 0; 600 vp->v_cstart = lbn + 1; 601 } else 602 /* 603 * In the middle of a cluster, so just delay the I/O for now. 604 */ 605 bdwrite(bp); 606 vp->v_lastw = lbn; 607 vp->v_lasta = bp->b_blkno; 608 } 609 610 611 /* 612 * This is an awful lot like cluster_rbuild...wish they could be combined. 613 * The last lbn argument is the current block on which I/O is being 614 * performed. Check to see that it doesn't fall in the middle of 615 * the current block (if last_bp == NULL). 616 */ 617 int 618 cluster_wbuild(vp, size, start_lbn, len) 619 struct vnode *vp; 620 long size; 621 daddr_t start_lbn; 622 int len; 623 { 624 struct buf *bp, *tbp; 625 int i, j, s; 626 int totalwritten = 0; 627 int dbsize = btodb(size); 628 while (len > 0) { 629 s = splbio(); 630 if (((tbp = gbincore(vp, start_lbn)) == NULL) || 631 ((tbp->b_flags & (B_INVAL|B_BUSY|B_DELWRI)) != B_DELWRI)) { 632 ++start_lbn; 633 --len; 634 splx(s); 635 continue; 636 } 637 bremfree(tbp); 638 tbp->b_flags |= B_BUSY; 639 tbp->b_flags &= ~B_DONE; 640 splx(s); 641 642 /* 643 * Extra memory in the buffer, punt on this buffer. XXX we could 644 * handle this in most cases, but we would have to push the extra 645 * memory down to after our max possible cluster size and then 646 * potentially pull it back up if the cluster was terminated 647 * prematurely--too much hassle. 648 */ 649 if (((tbp->b_flags & (B_CLUSTEROK|B_MALLOC)) != B_CLUSTEROK) || 650 (tbp->b_bcount != tbp->b_bufsize) || 651 (tbp->b_bcount != size) || 652 len == 1) { 653 totalwritten += tbp->b_bufsize; 654 bawrite(tbp); 655 ++start_lbn; 656 --len; 657 continue; 658 } 659 660 bp = trypbuf(); 661 if (bp == NULL) { 662 totalwritten += tbp->b_bufsize; 663 bawrite(tbp); 664 ++start_lbn; 665 --len; 666 continue; 667 } 668 669 TAILQ_INIT(&bp->b_cluster.cluster_head); 670 bp->b_bcount = 0; 671 bp->b_bufsize = 0; 672 bp->b_npages = 0; 673 if (tbp->b_wcred != NOCRED) { 674 bp->b_wcred = tbp->b_wcred; 675 crhold(bp->b_wcred); 676 } 677 678 bp->b_blkno = tbp->b_blkno; 679 bp->b_lblkno = tbp->b_lblkno; 680 (vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK; 681 bp->b_flags |= B_CALL | B_BUSY | B_CLUSTER | 682 (tbp->b_flags & (B_VMIO|B_NEEDCOMMIT)); 683 bp->b_iodone = cluster_callback; 684 pbgetvp(vp, bp); 685 686 for (i = 0; i < len; ++i, ++start_lbn) { 687 if (i != 0) { 688 s = splbio(); 689 if ((tbp = gbincore(vp, start_lbn)) == NULL) { 690 splx(s); 691 break; 692 } 693 694 if ((tbp->b_flags & (B_VMIO|B_CLUSTEROK|B_INVAL|B_BUSY|B_DELWRI|B_NEEDCOMMIT)) != (B_DELWRI|B_CLUSTEROK|(bp->b_flags & (B_VMIO|B_NEEDCOMMIT)))) { 695 splx(s); 696 break; 697 } 698 699 if (tbp->b_wcred != bp->b_wcred) { 700 splx(s); 701 break; 702 } 703 704 if ((tbp->b_bcount != size) || 705 ((bp->b_blkno + dbsize * i) != tbp->b_blkno) || 706 ((tbp->b_npages + bp->b_npages) > (MAXPHYS / PAGE_SIZE))) { 707 splx(s); 708 break; 709 } 710 bremfree(tbp); 711 tbp->b_flags |= B_BUSY; 712 tbp->b_flags &= ~B_DONE; 713 splx(s); 714 } 715 if (tbp->b_flags & B_VMIO) { 716 for (j = 0; j < tbp->b_npages; j += 1) { 717 vm_page_t m; 718 m = tbp->b_pages[j]; 719 ++m->busy; 720 ++m->object->paging_in_progress; 721 if ((bp->b_npages == 0) || 722 (bp->b_pages[bp->b_npages - 1] != m)) { 723 bp->b_pages[bp->b_npages] = m; 724 bp->b_npages++; 725 } 726 } 727 } 728 bp->b_bcount += size; 729 bp->b_bufsize += size; 730 731 --numdirtybuffers; 732 tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 733 tbp->b_flags |= B_ASYNC; 734 s = splbio(); 735 reassignbuf(tbp, tbp->b_vp); /* put on clean list */ 736 ++tbp->b_vp->v_numoutput; 737 splx(s); 738 TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, 739 tbp, b_cluster.cluster_entry); 740 } 741 pmap_qenter(trunc_page((vm_offset_t) bp->b_data), 742 (vm_page_t *) bp->b_pages, bp->b_npages); 743 if (bp->b_bufsize > bp->b_kvasize) 744 panic("cluster_wbuild: b_bufsize(%d) > b_kvasize(%d)\n", 745 bp->b_bufsize, bp->b_kvasize); 746 bp->b_kvasize = bp->b_bufsize; 747 totalwritten += bp->b_bufsize; 748 bp->b_dirtyoff = 0; 749 bp->b_dirtyend = bp->b_bufsize; 750 bawrite(bp); 751 752 len -= i; 753 } 754 return totalwritten; 755 } 756 757 #ifdef notyet_block_reallocation_enabled 758 /* 759 * Collect together all the buffers in a cluster. 760 * Plus add one additional buffer. 761 */ 762 static struct cluster_save * 763 cluster_collectbufs(vp, last_bp) 764 struct vnode *vp; 765 struct buf *last_bp; 766 { 767 struct cluster_save *buflist; 768 daddr_t lbn; 769 int i, len; 770 771 len = vp->v_lastw - vp->v_cstart + 1; 772 buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), 773 M_SEGMENT, M_WAITOK); 774 buflist->bs_nchildren = 0; 775 buflist->bs_children = (struct buf **) (buflist + 1); 776 for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) 777 (void) bread(vp, lbn, last_bp->b_bcount, NOCRED, 778 &buflist->bs_children[i]); 779 buflist->bs_children[i] = last_bp; 780 buflist->bs_nchildren = i + 1; 781 return (buflist); 782 } 783 #endif /* notyet_block_reallocation_enabled */ 784