1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.14 1994/10/15 13:33:06 davidg Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/proc.h> 55 #include <sys/buf.h> 56 #include <sys/vnode.h> 57 #include <sys/malloc.h> 58 59 #include <miscfs/specfs/specdev.h> 60 #include <sys/rlist.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_pager.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/swap_pager.h> 67 68 #ifndef NPENDINGIO 69 #define NPENDINGIO 16 70 #endif 71 72 int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int)); 73 int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *)); 74 75 int nswiodone; 76 extern int vm_pageout_rate_limit; 77 static int cleandone; 78 extern int hz; 79 int swap_pager_full; 80 extern vm_map_t pager_map; 81 extern int vm_swap_size; 82 struct rlist *swaplist; 83 int nswaplist; 84 85 #define MAX_PAGEOUT_CLUSTER 8 86 87 TAILQ_HEAD(swpclean, swpagerclean); 88 89 typedef struct swpagerclean *swp_clean_t; 90 91 struct swpagerclean { 92 TAILQ_ENTRY(swpagerclean) spc_list; 93 int spc_flags; 94 struct buf *spc_bp; 95 sw_pager_t spc_swp; 96 vm_offset_t spc_kva; 97 int spc_count; 98 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 99 } swcleanlist [NPENDINGIO] ; 100 101 102 extern vm_map_t kernel_map; 103 104 /* spc_flags values */ 105 #define SPC_ERROR 0x01 106 107 #define SWB_EMPTY (-1) 108 109 struct swpclean swap_pager_done; /* list of compileted page cleans */ 110 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 111 struct swpclean swap_pager_free; /* list of free pager clean structs */ 112 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 113 struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 114 115 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 116 int swap_pager_needflags; 117 struct rlist *swapfrag; 118 119 struct pagerlst *swp_qs[]={ 120 &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 121 }; 122 123 int swap_pager_putmulti(); 124 125 struct pagerops swappagerops = { 126 swap_pager_init, 127 swap_pager_alloc, 128 swap_pager_dealloc, 129 swap_pager_getpage, 130 swap_pager_getmulti, 131 swap_pager_putpage, 132 swap_pager_putmulti, 133 swap_pager_haspage 134 }; 135 136 int npendingio = NPENDINGIO; 137 int pendingiowait; 138 int require_swap_init; 139 void swap_pager_finish(); 140 int dmmin, dmmax; 141 extern int vm_page_count; 142 143 static inline void swapsizecheck() { 144 if( vm_swap_size == 0) 145 return; 146 if( vm_swap_size < 128*btodb(PAGE_SIZE)) { 147 if( swap_pager_full) 148 printf("swap_pager: out of space\n"); 149 swap_pager_full = 1; 150 } else if( vm_swap_size > 192*btodb(PAGE_SIZE)) 151 swap_pager_full = 0; 152 } 153 154 void 155 swap_pager_init() 156 { 157 dfltpagerops = &swappagerops; 158 159 TAILQ_INIT(&swap_pager_list); 160 TAILQ_INIT(&swap_pager_un_list); 161 162 /* 163 * Initialize clean lists 164 */ 165 TAILQ_INIT(&swap_pager_inuse); 166 TAILQ_INIT(&swap_pager_done); 167 TAILQ_INIT(&swap_pager_free); 168 169 require_swap_init = 1; 170 171 /* 172 * Calculate the swap allocation constants. 173 */ 174 175 dmmin = CLBYTES/DEV_BSIZE; 176 dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2; 177 178 } 179 180 /* 181 * Allocate a pager structure and associated resources. 182 * Note that if we are called from the pageout daemon (handle == NULL) 183 * we should not wait for memory as it could resulting in deadlock. 184 */ 185 vm_pager_t 186 swap_pager_alloc(handle, size, prot, offset) 187 caddr_t handle; 188 register vm_size_t size; 189 vm_prot_t prot; 190 vm_offset_t offset; 191 { 192 register vm_pager_t pager; 193 register sw_pager_t swp; 194 int waitok; 195 int i,j; 196 197 if (require_swap_init) { 198 swp_clean_t spc; 199 struct buf *bp; 200 /* 201 * kva's are allocated here so that we dont need to keep 202 * doing kmem_alloc pageables at runtime 203 */ 204 for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { 205 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE*MAX_PAGEOUT_CLUSTER); 206 if (!spc->spc_kva) { 207 break; 208 } 209 spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); 210 if (!spc->spc_bp) { 211 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 212 break; 213 } 214 spc->spc_flags = 0; 215 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 216 } 217 require_swap_init = 0; 218 if( size == 0) 219 return(NULL); 220 } 221 222 /* 223 * If this is a "named" anonymous region, look it up and 224 * return the appropriate pager if it exists. 225 */ 226 if (handle) { 227 pager = vm_pager_lookup(&swap_pager_list, handle); 228 if (pager != NULL) { 229 /* 230 * Use vm_object_lookup to gain a reference 231 * to the object and also to remove from the 232 * object cache. 233 */ 234 if (vm_object_lookup(pager) == NULL) 235 panic("swap_pager_alloc: bad object"); 236 return(pager); 237 } 238 } 239 240 /* 241 if (swap_pager_full && (vm_swap_size == 0)) { 242 return(NULL); 243 } 244 */ 245 246 /* 247 * Pager doesn't exist, allocate swap management resources 248 * and initialize. 249 */ 250 waitok = handle ? M_WAITOK : M_NOWAIT; 251 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 252 if (pager == NULL) 253 return(NULL); 254 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 255 if (swp == NULL) { 256 free((caddr_t)pager, M_VMPAGER); 257 return(NULL); 258 } 259 size = round_page(size); 260 swp->sw_osize = size; 261 swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE); 262 swp->sw_blocks = (sw_blk_t) 263 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 264 M_VMPGDATA, waitok); 265 if (swp->sw_blocks == NULL) { 266 free((caddr_t)swp, M_VMPGDATA); 267 free((caddr_t)pager, M_VMPAGER); 268 return(NULL); 269 } 270 271 for (i = 0; i < swp->sw_nblocks; i++) { 272 swp->sw_blocks[i].swb_valid = 0; 273 swp->sw_blocks[i].swb_locked = 0; 274 for (j = 0; j < SWB_NPAGES; j++) 275 swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 276 } 277 278 swp->sw_poip = 0; 279 if (handle) { 280 vm_object_t object; 281 282 swp->sw_flags = SW_NAMED; 283 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 284 /* 285 * Consistant with other pagers: return with object 286 * referenced. Can't do this with handle == NULL 287 * since it might be the pageout daemon calling. 288 */ 289 object = vm_object_allocate(size); 290 vm_object_enter(object, pager); 291 vm_object_setpager(object, pager, 0, FALSE); 292 } else { 293 swp->sw_flags = 0; 294 TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 295 } 296 pager->pg_handle = handle; 297 pager->pg_ops = &swappagerops; 298 pager->pg_type = PG_SWAP; 299 pager->pg_data = (caddr_t)swp; 300 301 return(pager); 302 } 303 304 /* 305 * returns disk block associated with pager and offset 306 * additionally, as a side effect returns a flag indicating 307 * if the block has been written 308 */ 309 310 static int * 311 swap_pager_diskaddr(swp, offset, valid) 312 sw_pager_t swp; 313 vm_offset_t offset; 314 int *valid; 315 { 316 register sw_blk_t swb; 317 int ix; 318 319 if (valid) 320 *valid = 0; 321 ix = offset / (SWB_NPAGES*PAGE_SIZE); 322 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 323 return(FALSE); 324 } 325 swb = &swp->sw_blocks[ix]; 326 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 327 if (valid) 328 *valid = swb->swb_valid & (1<<ix); 329 return &swb->swb_block[ix]; 330 } 331 332 /* 333 * Utility routine to set the valid (written) bit for 334 * a block associated with a pager and offset 335 */ 336 static void 337 swap_pager_setvalid(swp, offset, valid) 338 sw_pager_t swp; 339 vm_offset_t offset; 340 int valid; 341 { 342 register sw_blk_t swb; 343 int ix; 344 345 ix = offset / (SWB_NPAGES*PAGE_SIZE); 346 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 347 return; 348 349 swb = &swp->sw_blocks[ix]; 350 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 351 if (valid) 352 swb->swb_valid |= (1 << ix); 353 else 354 swb->swb_valid &= ~(1 << ix); 355 return; 356 } 357 358 /* 359 * this routine allocates swap space with a fragmentation 360 * minimization policy. 361 */ 362 int 363 swap_pager_getswapspace( unsigned amount, unsigned *rtval) { 364 #ifdef EXP 365 unsigned tmpalloc; 366 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 367 if( amount < nblocksfrag) { 368 if( rlist_alloc(&swapfrag, amount, rtval)) 369 return 1; 370 if( !rlist_alloc(&swaplist, nblocksfrag, &tmpalloc)) 371 return 0; 372 rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1); 373 *rtval = tmpalloc; 374 return 1; 375 } 376 #endif 377 if( !rlist_alloc(&swaplist, amount, rtval)) 378 return 0; 379 else 380 return 1; 381 } 382 383 /* 384 * this routine frees swap space with a fragmentation 385 * minimization policy. 386 */ 387 void 388 swap_pager_freeswapspace( unsigned from, unsigned to) { 389 #ifdef EXP 390 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 391 unsigned tmpalloc; 392 if( ((to + 1) - from) >= nblocksfrag) { 393 #endif 394 rlist_free(&swaplist, from, to); 395 #ifdef EXP 396 return; 397 } 398 rlist_free(&swapfrag, from, to); 399 while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) { 400 rlist_free(&swaplist, tmpalloc, tmpalloc + nblocksfrag-1); 401 } 402 #endif 403 } 404 /* 405 * this routine frees swap blocks from a specified pager 406 */ 407 void 408 _swap_pager_freespace(swp, start, size) 409 sw_pager_t swp; 410 vm_offset_t start; 411 vm_offset_t size; 412 { 413 vm_offset_t i; 414 int s; 415 416 s = splbio(); 417 for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) { 418 int valid; 419 int *addr = swap_pager_diskaddr(swp, i, &valid); 420 if (addr && *addr != SWB_EMPTY) { 421 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 422 if( valid) { 423 vm_swap_size += btodb(PAGE_SIZE); 424 swap_pager_setvalid(swp, i, 0); 425 } 426 *addr = SWB_EMPTY; 427 } 428 } 429 swapsizecheck(); 430 splx(s); 431 } 432 433 void 434 swap_pager_freespace(pager, start, size) 435 vm_pager_t pager; 436 vm_offset_t start; 437 vm_offset_t size; 438 { 439 _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 440 } 441 442 /* 443 * swap_pager_reclaim frees up over-allocated space from all pagers 444 * this eliminates internal fragmentation due to allocation of space 445 * for segments that are never swapped to. It has been written so that 446 * it does not block until the rlist_free operation occurs; it keeps 447 * the queues consistant. 448 */ 449 450 /* 451 * Maximum number of blocks (pages) to reclaim per pass 452 */ 453 #define MAXRECLAIM 256 454 455 void 456 swap_pager_reclaim() 457 { 458 vm_pager_t p; 459 sw_pager_t swp; 460 int i, j, k; 461 int s; 462 int reclaimcount; 463 static int reclaims[MAXRECLAIM]; 464 static int in_reclaim; 465 466 /* 467 * allow only one process to be in the swap_pager_reclaim subroutine 468 */ 469 s = splbio(); 470 if (in_reclaim) { 471 tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 472 splx(s); 473 return; 474 } 475 in_reclaim = 1; 476 reclaimcount = 0; 477 478 /* for each pager queue */ 479 for (k = 0; swp_qs[k]; k++) { 480 481 p = swp_qs[k]->tqh_first; 482 while (p && (reclaimcount < MAXRECLAIM)) { 483 484 /* 485 * see if any blocks associated with a pager has been 486 * allocated but not used (written) 487 */ 488 swp = (sw_pager_t) p->pg_data; 489 for (i = 0; i < swp->sw_nblocks; i++) { 490 sw_blk_t swb = &swp->sw_blocks[i]; 491 if( swb->swb_locked) 492 continue; 493 for (j = 0; j < SWB_NPAGES; j++) { 494 if (swb->swb_block[j] != SWB_EMPTY && 495 (swb->swb_valid & (1 << j)) == 0) { 496 reclaims[reclaimcount++] = swb->swb_block[j]; 497 swb->swb_block[j] = SWB_EMPTY; 498 if (reclaimcount >= MAXRECLAIM) 499 goto rfinished; 500 } 501 } 502 } 503 p = p->pg_list.tqe_next; 504 } 505 } 506 507 rfinished: 508 509 /* 510 * free the blocks that have been added to the reclaim list 511 */ 512 for (i = 0; i < reclaimcount; i++) { 513 swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1); 514 swapsizecheck(); 515 wakeup((caddr_t) &in_reclaim); 516 } 517 518 splx(s); 519 in_reclaim = 0; 520 wakeup((caddr_t) &in_reclaim); 521 } 522 523 524 /* 525 * swap_pager_copy copies blocks from one pager to another and 526 * destroys the source pager 527 */ 528 529 void 530 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 531 vm_pager_t srcpager; 532 vm_offset_t srcoffset; 533 vm_pager_t dstpager; 534 vm_offset_t dstoffset; 535 vm_offset_t offset; 536 { 537 sw_pager_t srcswp, dstswp; 538 vm_offset_t i; 539 int s; 540 541 if( vm_swap_size == 0) 542 return; 543 544 srcswp = (sw_pager_t) srcpager->pg_data; 545 dstswp = (sw_pager_t) dstpager->pg_data; 546 547 /* 548 * remove the source pager from the swap_pager internal queue 549 */ 550 s = splbio(); 551 if (srcswp->sw_flags & SW_NAMED) { 552 TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 553 srcswp->sw_flags &= ~SW_NAMED; 554 } else { 555 TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 556 } 557 558 while (srcswp->sw_poip) { 559 tsleep((caddr_t)srcswp, PVM, "spgout", 0); 560 } 561 splx(s); 562 563 /* 564 * clean all of the pages that are currently active and finished 565 */ 566 (void) swap_pager_clean(); 567 568 s = splbio(); 569 /* 570 * clear source block before destination object 571 * (release allocated space) 572 */ 573 for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { 574 int valid; 575 int *addr = swap_pager_diskaddr(srcswp, i, &valid); 576 if (addr && *addr != SWB_EMPTY) { 577 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 578 if( valid) 579 vm_swap_size += btodb(PAGE_SIZE); 580 swapsizecheck(); 581 *addr = SWB_EMPTY; 582 } 583 } 584 /* 585 * transfer source to destination 586 */ 587 for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 588 int srcvalid, dstvalid; 589 int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 590 &srcvalid); 591 int *dstaddrp; 592 /* 593 * see if the source has space allocated 594 */ 595 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 596 /* 597 * if the source is valid and the dest has no space, then 598 * copy the allocation from the srouce to the dest. 599 */ 600 if (srcvalid) { 601 dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); 602 /* 603 * if the dest already has a valid block, deallocate the 604 * source block without copying. 605 */ 606 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 607 swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1); 608 *dstaddrp = SWB_EMPTY; 609 } 610 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 611 *dstaddrp = *srcaddrp; 612 *srcaddrp = SWB_EMPTY; 613 swap_pager_setvalid(dstswp, i + dstoffset, 1); 614 vm_swap_size -= btodb(PAGE_SIZE); 615 } 616 } 617 /* 618 * if the source is not empty at this point, then deallocate the space. 619 */ 620 if (*srcaddrp != SWB_EMPTY) { 621 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 622 if( srcvalid) 623 vm_swap_size += btodb(PAGE_SIZE); 624 *srcaddrp = SWB_EMPTY; 625 } 626 } 627 } 628 629 /* 630 * deallocate the rest of the source object 631 */ 632 for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { 633 int valid; 634 int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); 635 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 636 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 637 if( valid) 638 vm_swap_size += btodb(PAGE_SIZE); 639 *srcaddrp = SWB_EMPTY; 640 } 641 } 642 643 swapsizecheck(); 644 splx(s); 645 646 free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); 647 srcswp->sw_blocks = 0; 648 free((caddr_t)srcswp, M_VMPGDATA); 649 srcpager->pg_data = 0; 650 free((caddr_t)srcpager, M_VMPAGER); 651 652 return; 653 } 654 655 656 void 657 swap_pager_dealloc(pager) 658 vm_pager_t pager; 659 { 660 register int i,j; 661 register sw_blk_t bp; 662 register sw_pager_t swp; 663 int s; 664 665 /* 666 * Remove from list right away so lookups will fail if we 667 * block for pageout completion. 668 */ 669 s = splbio(); 670 swp = (sw_pager_t) pager->pg_data; 671 if (swp->sw_flags & SW_NAMED) { 672 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 673 swp->sw_flags &= ~SW_NAMED; 674 } else { 675 TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 676 } 677 /* 678 * Wait for all pageouts to finish and remove 679 * all entries from cleaning list. 680 */ 681 682 while (swp->sw_poip) { 683 tsleep((caddr_t)swp, PVM, "swpout", 0); 684 } 685 splx(s); 686 687 688 (void) swap_pager_clean(); 689 690 /* 691 * Free left over swap blocks 692 */ 693 s = splbio(); 694 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 695 for (j = 0; j < SWB_NPAGES; j++) 696 if (bp->swb_block[j] != SWB_EMPTY) { 697 swap_pager_freeswapspace((unsigned)bp->swb_block[j], 698 (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1); 699 if( bp->swb_valid & (1<<j)) 700 vm_swap_size += btodb(PAGE_SIZE); 701 bp->swb_block[j] = SWB_EMPTY; 702 } 703 } 704 splx(s); 705 swapsizecheck(); 706 707 /* 708 * Free swap management resources 709 */ 710 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 711 swp->sw_blocks = 0; 712 free((caddr_t)swp, M_VMPGDATA); 713 pager->pg_data = 0; 714 free((caddr_t)pager, M_VMPAGER); 715 } 716 717 /* 718 * swap_pager_getmulti can get multiple pages. 719 */ 720 int 721 swap_pager_getmulti(pager, m, count, reqpage, sync) 722 vm_pager_t pager; 723 vm_page_t *m; 724 int count; 725 int reqpage; 726 boolean_t sync; 727 { 728 if( reqpage >= count) 729 panic("swap_pager_getmulti: reqpage >= count\n"); 730 return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 731 } 732 733 /* 734 * swap_pager_getpage gets individual pages 735 */ 736 int 737 swap_pager_getpage(pager, m, sync) 738 vm_pager_t pager; 739 vm_page_t m; 740 boolean_t sync; 741 { 742 vm_page_t marray[1]; 743 744 marray[0] = m; 745 return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); 746 } 747 748 int 749 swap_pager_putmulti(pager, m, c, sync, rtvals) 750 vm_pager_t pager; 751 vm_page_t *m; 752 int c; 753 boolean_t sync; 754 int *rtvals; 755 { 756 int flags; 757 758 if (pager == NULL) { 759 (void) swap_pager_clean(); 760 return VM_PAGER_OK; 761 } 762 763 flags = B_WRITE; 764 if (!sync) 765 flags |= B_ASYNC; 766 767 return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); 768 } 769 770 /* 771 * swap_pager_putpage writes individual pages 772 */ 773 int 774 swap_pager_putpage(pager, m, sync) 775 vm_pager_t pager; 776 vm_page_t m; 777 boolean_t sync; 778 { 779 int flags; 780 vm_page_t marray[1]; 781 int rtvals[1]; 782 783 784 if (pager == NULL) { 785 (void) swap_pager_clean(); 786 return VM_PAGER_OK; 787 } 788 789 marray[0] = m; 790 flags = B_WRITE; 791 if (!sync) 792 flags |= B_ASYNC; 793 794 swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); 795 796 return rtvals[0]; 797 } 798 799 static inline int 800 const swap_pager_block_index(swp, offset) 801 sw_pager_t swp; 802 vm_offset_t offset; 803 { 804 return (offset / (SWB_NPAGES*PAGE_SIZE)); 805 } 806 807 static inline int 808 const swap_pager_block_offset(swp, offset) 809 sw_pager_t swp; 810 vm_offset_t offset; 811 { 812 return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE); 813 } 814 815 /* 816 * _swap_pager_haspage returns TRUE if the pager has data that has 817 * been written out. 818 */ 819 static boolean_t 820 _swap_pager_haspage(swp, offset) 821 sw_pager_t swp; 822 vm_offset_t offset; 823 { 824 register sw_blk_t swb; 825 int ix; 826 827 ix = offset / (SWB_NPAGES*PAGE_SIZE); 828 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 829 return(FALSE); 830 } 831 swb = &swp->sw_blocks[ix]; 832 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 833 if (swb->swb_block[ix] != SWB_EMPTY) { 834 if (swb->swb_valid & (1 << ix)) 835 return TRUE; 836 } 837 838 return(FALSE); 839 } 840 841 /* 842 * swap_pager_haspage is the externally accessible version of 843 * _swap_pager_haspage above. this routine takes a vm_pager_t 844 * for an argument instead of sw_pager_t. 845 */ 846 boolean_t 847 swap_pager_haspage(pager, offset) 848 vm_pager_t pager; 849 vm_offset_t offset; 850 { 851 return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 852 } 853 854 /* 855 * swap_pager_freepage is a convienience routine that clears the busy 856 * bit and deallocates a page. 857 */ 858 static void 859 swap_pager_freepage(m) 860 vm_page_t m; 861 { 862 PAGE_WAKEUP(m); 863 vm_page_free(m); 864 } 865 866 /* 867 * swap_pager_ridpages is a convienience routine that deallocates all 868 * but the required page. this is usually used in error returns that 869 * need to invalidate the "extra" readahead pages. 870 */ 871 static void 872 swap_pager_ridpages(m, count, reqpage) 873 vm_page_t *m; 874 int count; 875 int reqpage; 876 { 877 int i; 878 for (i = 0; i < count; i++) 879 if (i != reqpage) 880 swap_pager_freepage(m[i]); 881 } 882 883 int swapwritecount=0; 884 885 /* 886 * swap_pager_iodone1 is the completion routine for both reads and async writes 887 */ 888 void 889 swap_pager_iodone1(bp) 890 struct buf *bp; 891 { 892 bp->b_flags |= B_DONE; 893 bp->b_flags &= ~B_ASYNC; 894 wakeup((caddr_t)bp); 895 /* 896 if ((bp->b_flags & B_READ) == 0) 897 vwakeup(bp); 898 */ 899 } 900 901 902 int 903 swap_pager_input(swp, m, count, reqpage) 904 register sw_pager_t swp; 905 vm_page_t *m; 906 int count, reqpage; 907 { 908 register struct buf *bp; 909 sw_blk_t swb[count]; 910 register int s; 911 int i; 912 boolean_t rv; 913 vm_offset_t kva, off[count]; 914 swp_clean_t spc; 915 vm_offset_t paging_offset; 916 vm_object_t object; 917 int reqaddr[count]; 918 919 int first, last; 920 int failed; 921 int reqdskregion; 922 923 object = m[reqpage]->object; 924 paging_offset = object->paging_offset; 925 /* 926 * First determine if the page exists in the pager if this is 927 * a sync read. This quickly handles cases where we are 928 * following shadow chains looking for the top level object 929 * with the page. 930 */ 931 if (swp->sw_blocks == NULL) { 932 swap_pager_ridpages(m, count, reqpage); 933 return(VM_PAGER_FAIL); 934 } 935 936 for(i = 0; i < count; i++) { 937 vm_offset_t foff = m[i]->offset + paging_offset; 938 int ix = swap_pager_block_index(swp, foff); 939 if (ix >= swp->sw_nblocks) { 940 int j; 941 if( i <= reqpage) { 942 swap_pager_ridpages(m, count, reqpage); 943 return(VM_PAGER_FAIL); 944 } 945 for(j = i; j < count; j++) { 946 swap_pager_freepage(m[j]); 947 } 948 count = i; 949 break; 950 } 951 952 swb[i] = &swp->sw_blocks[ix]; 953 off[i] = swap_pager_block_offset(swp, foff); 954 reqaddr[i] = swb[i]->swb_block[off[i]]; 955 } 956 957 /* make sure that our required input request is existant */ 958 959 if (reqaddr[reqpage] == SWB_EMPTY || 960 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 961 swap_pager_ridpages(m, count, reqpage); 962 return(VM_PAGER_FAIL); 963 } 964 965 966 reqdskregion = reqaddr[reqpage] / dmmax; 967 968 /* 969 * search backwards for the first contiguous page to transfer 970 */ 971 failed = 0; 972 first = 0; 973 for (i = reqpage - 1; i >= 0; --i) { 974 if ( failed || (reqaddr[i] == SWB_EMPTY) || 975 (swb[i]->swb_valid & (1 << off[i])) == 0 || 976 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 977 ((reqaddr[i] / dmmax) != reqdskregion)) { 978 failed = 1; 979 swap_pager_freepage(m[i]); 980 if (first == 0) 981 first = i + 1; 982 } 983 } 984 /* 985 * search forwards for the last contiguous page to transfer 986 */ 987 failed = 0; 988 last = count; 989 for (i = reqpage + 1; i < count; i++) { 990 if ( failed || (reqaddr[i] == SWB_EMPTY) || 991 (swb[i]->swb_valid & (1 << off[i])) == 0 || 992 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 993 ((reqaddr[i] / dmmax) != reqdskregion)) { 994 failed = 1; 995 swap_pager_freepage(m[i]); 996 if (last == count) 997 last = i; 998 } 999 } 1000 1001 count = last; 1002 if (first != 0) { 1003 for (i = first; i < count; i++) { 1004 m[i-first] = m[i]; 1005 reqaddr[i-first] = reqaddr[i]; 1006 off[i-first] = off[i]; 1007 } 1008 count -= first; 1009 reqpage -= first; 1010 } 1011 1012 ++swb[reqpage]->swb_locked; 1013 1014 /* 1015 * at this point: 1016 * "m" is a pointer to the array of vm_page_t for paging I/O 1017 * "count" is the number of vm_page_t entries represented by "m" 1018 * "object" is the vm_object_t for I/O 1019 * "reqpage" is the index into "m" for the page actually faulted 1020 */ 1021 1022 spc = NULL; /* we might not use an spc data structure */ 1023 1024 if (count == 1) { 1025 /* 1026 * if a kva has not been allocated, we can only do a one page transfer, 1027 * so we free the other pages that might have been allocated by 1028 * vm_fault. 1029 */ 1030 swap_pager_ridpages(m, count, reqpage); 1031 m[0] = m[reqpage]; 1032 reqaddr[0] = reqaddr[reqpage]; 1033 1034 count = 1; 1035 reqpage = 0; 1036 /* 1037 * get a swap pager clean data structure, block until we get it 1038 */ 1039 if (swap_pager_free.tqh_first == NULL) { 1040 s = splbio(); 1041 if( curproc == pageproc) 1042 (void) swap_pager_clean(); 1043 else 1044 wakeup((caddr_t) &vm_pages_needed); 1045 while (swap_pager_free.tqh_first == NULL) { 1046 swap_pager_needflags |= SWAP_FREE_NEEDED; 1047 tsleep((caddr_t)&swap_pager_free, 1048 PVM, "swpfre", 0); 1049 if( curproc == pageproc) 1050 (void) swap_pager_clean(); 1051 else 1052 wakeup((caddr_t) &vm_pages_needed); 1053 } 1054 splx(s); 1055 } 1056 spc = swap_pager_free.tqh_first; 1057 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1058 kva = spc->spc_kva; 1059 bp = spc->spc_bp; 1060 bzero(bp, sizeof *bp); 1061 bp->b_spc = spc; 1062 } else { 1063 /* 1064 * Get a swap buffer header to perform the IO 1065 */ 1066 bp = getpbuf(); 1067 kva = (vm_offset_t) bp->b_data; 1068 } 1069 1070 /* 1071 * map our page(s) into kva for input 1072 */ 1073 pmap_qenter( kva, m, count); 1074 1075 s = splbio(); 1076 bp->b_flags = B_BUSY | B_READ | B_CALL; 1077 bp->b_iodone = swap_pager_iodone1; 1078 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1079 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1080 crhold(bp->b_rcred); 1081 crhold(bp->b_wcred); 1082 bp->b_un.b_addr = (caddr_t) kva; 1083 bp->b_blkno = reqaddr[0]; 1084 bp->b_bcount = PAGE_SIZE*count; 1085 bp->b_bufsize = PAGE_SIZE*count; 1086 1087 bgetvp( swapdev_vp, bp); 1088 1089 swp->sw_piip++; 1090 1091 cnt.v_swapin++; 1092 cnt.v_swappgsin += count; 1093 /* 1094 * perform the I/O 1095 */ 1096 VOP_STRATEGY(bp); 1097 1098 /* 1099 * wait for the sync I/O to complete 1100 */ 1101 while ((bp->b_flags & B_DONE) == 0) { 1102 tsleep((caddr_t)bp, PVM, "swread", 0); 1103 } 1104 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1105 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1106 1107 --swp->sw_piip; 1108 if (swp->sw_piip == 0) 1109 wakeup((caddr_t) swp); 1110 1111 /* 1112 * relpbuf does this, but we maintain our own buffer 1113 * list also... 1114 */ 1115 if (bp->b_vp) 1116 brelvp(bp); 1117 1118 splx(s); 1119 --swb[reqpage]->swb_locked; 1120 1121 /* 1122 * remove the mapping for kernel virtual 1123 */ 1124 pmap_qremove( kva, count); 1125 1126 if (spc) { 1127 /* 1128 * if we have used an spc, we need to free it. 1129 */ 1130 if( bp->b_rcred != NOCRED) 1131 crfree(bp->b_rcred); 1132 if( bp->b_wcred != NOCRED) 1133 crfree(bp->b_wcred); 1134 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1135 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1136 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1137 wakeup((caddr_t)&swap_pager_free); 1138 } 1139 } else { 1140 /* 1141 * release the physical I/O buffer 1142 */ 1143 relpbuf(bp); 1144 /* 1145 * finish up input if everything is ok 1146 */ 1147 if( rv == VM_PAGER_OK) { 1148 for (i = 0; i < count; i++) { 1149 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1150 m[i]->flags |= PG_CLEAN; 1151 m[i]->flags &= ~PG_LAUNDRY; 1152 if (i != reqpage) { 1153 /* 1154 * whether or not to leave the page activated 1155 * is up in the air, but we should put the page 1156 * on a page queue somewhere. (it already is in 1157 * the object). 1158 * After some emperical results, it is best 1159 * to deactivate the readahead pages. 1160 */ 1161 vm_page_deactivate(m[i]); 1162 1163 /* 1164 * just in case someone was asking for this 1165 * page we now tell them that it is ok to use 1166 */ 1167 m[i]->flags &= ~PG_FAKE; 1168 PAGE_WAKEUP(m[i]); 1169 } 1170 } 1171 /* 1172 * If we're out of swap space, then attempt to free 1173 * some whenever pages are brought in. We must clear 1174 * the clean flag so that the page contents will be 1175 * preserved. 1176 */ 1177 if (swap_pager_full) { 1178 for (i = 0; i < count; i++) { 1179 m[i]->flags &= ~PG_CLEAN; 1180 } 1181 _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE); 1182 } 1183 } else { 1184 swap_pager_ridpages(m, count, reqpage); 1185 } 1186 } 1187 return(rv); 1188 } 1189 1190 int 1191 swap_pager_output(swp, m, count, flags, rtvals) 1192 register sw_pager_t swp; 1193 vm_page_t *m; 1194 int count; 1195 int flags; 1196 int *rtvals; 1197 { 1198 register struct buf *bp; 1199 sw_blk_t swb[count]; 1200 register int s; 1201 int i, j, ix; 1202 boolean_t rv; 1203 vm_offset_t kva, off, foff; 1204 swp_clean_t spc; 1205 vm_offset_t paging_offset; 1206 vm_object_t object; 1207 int reqaddr[count]; 1208 int failed; 1209 1210 /* 1211 if( count > 1) 1212 printf("off: 0x%x, count: %d\n", m[0]->offset, count); 1213 */ 1214 if( vm_swap_size == 0) { 1215 for(i=0;i<count;i++) 1216 rtvals[i] = VM_PAGER_FAIL; 1217 return VM_PAGER_FAIL; 1218 } 1219 1220 spc = NULL; 1221 1222 object = m[0]->object; 1223 paging_offset = object->paging_offset; 1224 1225 failed = 0; 1226 for(j=0;j<count;j++) { 1227 foff = m[j]->offset + paging_offset; 1228 ix = swap_pager_block_index(swp, foff); 1229 swb[j] = 0; 1230 if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 1231 rtvals[j] = VM_PAGER_FAIL; 1232 failed = 1; 1233 continue; 1234 } else { 1235 rtvals[j] = VM_PAGER_OK; 1236 } 1237 swb[j] = &swp->sw_blocks[ix]; 1238 ++swb[j]->swb_locked; 1239 if( failed) { 1240 rtvals[j] = VM_PAGER_FAIL; 1241 continue; 1242 } 1243 off = swap_pager_block_offset(swp, foff); 1244 reqaddr[j] = swb[j]->swb_block[off]; 1245 if( reqaddr[j] == SWB_EMPTY) { 1246 int blk; 1247 int tries; 1248 int ntoget; 1249 tries = 0; 1250 s = splbio(); 1251 1252 /* 1253 * if any other pages have been allocated in this block, we 1254 * only try to get one page. 1255 */ 1256 for (i = 0; i < SWB_NPAGES; i++) { 1257 if (swb[j]->swb_block[i] != SWB_EMPTY) 1258 break; 1259 } 1260 1261 1262 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1263 /* 1264 * this code is alittle conservative, but works 1265 * (the intent of this code is to allocate small chunks 1266 * for small objects) 1267 */ 1268 if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) { 1269 ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE; 1270 } 1271 1272 retrygetspace: 1273 if (!swap_pager_full && ntoget > 1 && 1274 swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { 1275 1276 for (i = 0; i < ntoget; i++) { 1277 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1278 swb[j]->swb_valid = 0; 1279 } 1280 1281 reqaddr[j] = swb[j]->swb_block[off]; 1282 } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), 1283 &swb[j]->swb_block[off])) { 1284 /* 1285 * if the allocation has failed, we try to reclaim space and 1286 * retry. 1287 */ 1288 if (++tries == 1) { 1289 swap_pager_reclaim(); 1290 goto retrygetspace; 1291 } 1292 rtvals[j] = VM_PAGER_AGAIN; 1293 failed = 1; 1294 } else { 1295 reqaddr[j] = swb[j]->swb_block[off]; 1296 swb[j]->swb_valid &= ~(1<<off); 1297 } 1298 splx(s); 1299 } 1300 } 1301 1302 /* 1303 * search forwards for the last contiguous page to transfer 1304 */ 1305 failed = 0; 1306 for (i = 0; i < count; i++) { 1307 if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) || 1308 (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 1309 (rtvals[i] != VM_PAGER_OK)) { 1310 failed = 1; 1311 if( rtvals[i] == VM_PAGER_OK) 1312 rtvals[i] = VM_PAGER_AGAIN; 1313 } 1314 } 1315 1316 for(i = 0; i < count; i++) { 1317 if( rtvals[i] != VM_PAGER_OK) { 1318 if( swb[i]) 1319 --swb[i]->swb_locked; 1320 } 1321 } 1322 1323 for(i = 0; i < count; i++) 1324 if( rtvals[i] != VM_PAGER_OK) 1325 break; 1326 1327 if( i == 0) { 1328 return VM_PAGER_AGAIN; 1329 } 1330 1331 count = i; 1332 for(i=0;i<count;i++) { 1333 if( reqaddr[i] == SWB_EMPTY) 1334 printf("I/O to empty block????\n"); 1335 } 1336 1337 /* 1338 */ 1339 1340 /* 1341 * For synchronous writes, we clean up 1342 * all completed async pageouts. 1343 */ 1344 if ((flags & B_ASYNC) == 0) { 1345 swap_pager_clean(); 1346 } 1347 1348 kva = 0; 1349 1350 /* 1351 * we allocate a new kva for transfers > 1 page 1352 * but for transfers == 1 page, the swap_pager_free list contains 1353 * entries that have pre-allocated kva's (for efficiency). 1354 * NOTE -- we do not use the physical buffer pool or the 1355 * preallocated associated kva's because of the potential for 1356 * deadlock. This is very subtile -- but deadlocks or resource 1357 * contention must be avoided on pageouts -- or your system will 1358 * sleep (forever) !!! 1359 */ 1360 /* 1361 if ( count > 1) { 1362 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1363 if( !kva) { 1364 for (i = 0; i < count; i++) { 1365 if( swb[i]) 1366 --swb[i]->swb_locked; 1367 rtvals[i] = VM_PAGER_AGAIN; 1368 } 1369 return VM_PAGER_AGAIN; 1370 } 1371 } 1372 */ 1373 1374 /* 1375 * get a swap pager clean data structure, block until we get it 1376 */ 1377 if (swap_pager_free.tqh_first == NULL) { 1378 s = splbio(); 1379 if( curproc == pageproc) 1380 (void) swap_pager_clean(); 1381 else 1382 wakeup((caddr_t) &vm_pages_needed); 1383 while (swap_pager_free.tqh_first == NULL) { 1384 swap_pager_needflags |= SWAP_FREE_NEEDED; 1385 tsleep((caddr_t)&swap_pager_free, 1386 PVM, "swpfre", 0); 1387 if( curproc == pageproc) 1388 (void) swap_pager_clean(); 1389 else 1390 wakeup((caddr_t) &vm_pages_needed); 1391 } 1392 splx(s); 1393 } 1394 1395 spc = swap_pager_free.tqh_first; 1396 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1397 1398 kva = spc->spc_kva; 1399 1400 /* 1401 * map our page(s) into kva for I/O 1402 */ 1403 pmap_qenter(kva, m, count); 1404 1405 /* 1406 * get the base I/O offset into the swap file 1407 */ 1408 for(i=0;i<count;i++) { 1409 foff = m[i]->offset + paging_offset; 1410 off = swap_pager_block_offset(swp, foff); 1411 /* 1412 * if we are setting the valid bit anew, 1413 * then diminish the swap free space 1414 */ 1415 if( (swb[i]->swb_valid & (1 << off)) == 0) 1416 vm_swap_size -= btodb(PAGE_SIZE); 1417 1418 /* 1419 * set the valid bit 1420 */ 1421 swb[i]->swb_valid |= (1 << off); 1422 /* 1423 * and unlock the data structure 1424 */ 1425 --swb[i]->swb_locked; 1426 } 1427 1428 s = splbio(); 1429 /* 1430 * Get a swap buffer header and perform the IO 1431 */ 1432 bp = spc->spc_bp; 1433 bzero(bp, sizeof *bp); 1434 bp->b_spc = spc; 1435 1436 bp->b_flags = B_BUSY; 1437 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1438 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1439 if( bp->b_rcred != NOCRED) 1440 crhold(bp->b_rcred); 1441 if( bp->b_wcred != NOCRED) 1442 crhold(bp->b_wcred); 1443 bp->b_data = (caddr_t) kva; 1444 bp->b_blkno = reqaddr[0]; 1445 bgetvp( swapdev_vp, bp); 1446 1447 bp->b_bcount = PAGE_SIZE*count; 1448 bp->b_bufsize = PAGE_SIZE*count; 1449 swapdev_vp->v_numoutput++; 1450 1451 /* 1452 * If this is an async write we set up additional buffer fields 1453 * and place a "cleaning" entry on the inuse queue. 1454 */ 1455 if ( flags & B_ASYNC ) { 1456 spc->spc_flags = 0; 1457 spc->spc_swp = swp; 1458 for(i=0;i<count;i++) 1459 spc->spc_m[i] = m[i]; 1460 spc->spc_count = count; 1461 /* 1462 * the completion routine for async writes 1463 */ 1464 bp->b_flags |= B_CALL; 1465 bp->b_iodone = swap_pager_iodone; 1466 bp->b_dirtyoff = 0; 1467 bp->b_dirtyend = bp->b_bcount; 1468 swp->sw_poip++; 1469 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1470 } else { 1471 swp->sw_poip++; 1472 bp->b_flags |= B_CALL; 1473 bp->b_iodone = swap_pager_iodone1; 1474 } 1475 1476 cnt.v_swapout++; 1477 cnt.v_swappgsout += count; 1478 /* 1479 * perform the I/O 1480 */ 1481 VOP_STRATEGY(bp); 1482 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { 1483 if ((bp->b_flags & B_DONE) == B_DONE) { 1484 swap_pager_clean(); 1485 } 1486 splx(s); 1487 for(i=0;i<count;i++) { 1488 rtvals[i] = VM_PAGER_PEND; 1489 } 1490 return VM_PAGER_PEND; 1491 } 1492 1493 /* 1494 * wait for the sync I/O to complete 1495 */ 1496 while ((bp->b_flags & B_DONE) == 0) { 1497 tsleep((caddr_t)bp, PVM, "swwrt", 0); 1498 } 1499 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1500 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1501 1502 --swp->sw_poip; 1503 if (swp->sw_poip == 0) 1504 wakeup((caddr_t) swp); 1505 1506 if (bp->b_vp) 1507 brelvp(bp); 1508 1509 splx(s); 1510 1511 /* 1512 * remove the mapping for kernel virtual 1513 */ 1514 pmap_qremove( kva, count); 1515 1516 /* 1517 * if we have written the page, then indicate that the page 1518 * is clean. 1519 */ 1520 if (rv == VM_PAGER_OK) { 1521 for(i=0;i<count;i++) { 1522 if( rtvals[i] == VM_PAGER_OK) { 1523 m[i]->flags |= PG_CLEAN; 1524 m[i]->flags &= ~PG_LAUNDRY; 1525 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1526 /* 1527 * optimization, if a page has been read during the 1528 * pageout process, we activate it. 1529 */ 1530 if ( (m[i]->flags & PG_ACTIVE) == 0 && 1531 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) 1532 vm_page_activate(m[i]); 1533 } 1534 } 1535 } else { 1536 for(i=0;i<count;i++) { 1537 rtvals[i] = rv; 1538 m[i]->flags |= PG_LAUNDRY; 1539 } 1540 } 1541 1542 if( bp->b_rcred != NOCRED) 1543 crfree(bp->b_rcred); 1544 if( bp->b_wcred != NOCRED) 1545 crfree(bp->b_wcred); 1546 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1547 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1548 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1549 wakeup((caddr_t)&swap_pager_free); 1550 } 1551 1552 return(rv); 1553 } 1554 1555 boolean_t 1556 swap_pager_clean() 1557 { 1558 register swp_clean_t spc, tspc; 1559 register int s; 1560 1561 tspc = NULL; 1562 if (swap_pager_done.tqh_first == NULL) 1563 return FALSE; 1564 for (;;) { 1565 s = splbio(); 1566 /* 1567 * Look up and removal from done list must be done 1568 * at splbio() to avoid conflicts with swap_pager_iodone. 1569 */ 1570 while ((spc = swap_pager_done.tqh_first) != 0) { 1571 pmap_qremove( spc->spc_kva, spc->spc_count); 1572 swap_pager_finish(spc); 1573 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1574 goto doclean; 1575 } 1576 1577 /* 1578 * No operations done, thats all we can do for now. 1579 */ 1580 1581 splx(s); 1582 break; 1583 1584 /* 1585 * The desired page was found to be busy earlier in 1586 * the scan but has since completed. 1587 */ 1588 doclean: 1589 if (tspc && tspc == spc) { 1590 tspc = NULL; 1591 } 1592 spc->spc_flags = 0; 1593 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1594 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1595 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1596 wakeup((caddr_t)&swap_pager_free); 1597 } 1598 ++cleandone; 1599 splx(s); 1600 } 1601 1602 return(tspc ? TRUE : FALSE); 1603 } 1604 1605 void 1606 swap_pager_finish(spc) 1607 register swp_clean_t spc; 1608 { 1609 vm_object_t object = spc->spc_m[0]->object; 1610 int i; 1611 1612 if ((object->paging_in_progress -= spc->spc_count) == 0) 1613 thread_wakeup((int) object); 1614 1615 /* 1616 * If no error mark as clean and inform the pmap system. 1617 * If error, mark as dirty so we will try again. 1618 * (XXX could get stuck doing this, should give up after awhile) 1619 */ 1620 if (spc->spc_flags & SPC_ERROR) { 1621 for(i=0;i<spc->spc_count;i++) { 1622 printf("swap_pager_finish: clean of page %lx failed\n", 1623 (u_long)VM_PAGE_TO_PHYS(spc->spc_m[i])); 1624 spc->spc_m[i]->flags |= PG_LAUNDRY; 1625 } 1626 } else { 1627 for(i=0;i<spc->spc_count;i++) { 1628 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1629 spc->spc_m[i]->flags |= PG_CLEAN; 1630 } 1631 } 1632 1633 1634 for(i=0;i<spc->spc_count;i++) { 1635 /* 1636 * we wakeup any processes that are waiting on 1637 * these pages. 1638 */ 1639 PAGE_WAKEUP(spc->spc_m[i]); 1640 } 1641 nswiodone -= spc->spc_count; 1642 1643 return; 1644 } 1645 1646 /* 1647 * swap_pager_iodone 1648 */ 1649 void 1650 swap_pager_iodone(bp) 1651 register struct buf *bp; 1652 { 1653 register swp_clean_t spc; 1654 int s; 1655 1656 s = splbio(); 1657 spc = (swp_clean_t) bp->b_spc; 1658 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1659 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1660 if (bp->b_flags & B_ERROR) { 1661 spc->spc_flags |= SPC_ERROR; 1662 printf("error %d blkno %lu sz %ld ", 1663 bp->b_error, (u_long)bp->b_blkno, bp->b_bcount); 1664 } 1665 1666 /* 1667 if ((bp->b_flags & B_READ) == 0) 1668 vwakeup(bp); 1669 */ 1670 1671 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); 1672 if (bp->b_vp) { 1673 brelvp(bp); 1674 } 1675 if( bp->b_rcred != NOCRED) 1676 crfree(bp->b_rcred); 1677 if( bp->b_wcred != NOCRED) 1678 crfree(bp->b_wcred); 1679 1680 nswiodone += spc->spc_count; 1681 if (--spc->spc_swp->sw_poip == 0) { 1682 wakeup((caddr_t)spc->spc_swp); 1683 } 1684 1685 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1686 swap_pager_inuse.tqh_first == 0) { 1687 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1688 wakeup((caddr_t)&swap_pager_free); 1689 wakeup((caddr_t)&vm_pages_needed); 1690 } 1691 1692 if (vm_pageout_pages_needed) { 1693 wakeup((caddr_t)&vm_pageout_pages_needed); 1694 } 1695 1696 if ((swap_pager_inuse.tqh_first == NULL) || 1697 (cnt.v_free_count < cnt.v_free_min && 1698 nswiodone + cnt.v_free_count >= cnt.v_free_min) ) { 1699 wakeup((caddr_t)&vm_pages_needed); 1700 } 1701 splx(s); 1702 } 1703 1704 /* 1705 * return true if any swap control structures can be allocated 1706 */ 1707 int 1708 swap_pager_ready() { 1709 if( swap_pager_free.tqh_first) 1710 return 1; 1711 else 1712 return 0; 1713 } 1714