1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.13 1994/10/14 12:26:17 davidg Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/proc.h> 55 #include <sys/buf.h> 56 #include <sys/vnode.h> 57 #include <sys/malloc.h> 58 59 #include <miscfs/specfs/specdev.h> 60 #include <sys/rlist.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_pager.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/swap_pager.h> 67 68 #ifndef NPENDINGIO 69 #define NPENDINGIO 16 70 #endif 71 72 int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int)); 73 int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *)); 74 75 int nswiodone; 76 extern int vm_pageout_rate_limit; 77 static int cleandone; 78 extern int hz; 79 int swap_pager_full; 80 extern vm_map_t pager_map; 81 extern int vm_swap_size; 82 struct rlist *swaplist; 83 int nswaplist; 84 85 #define MAX_PAGEOUT_CLUSTER 8 86 87 TAILQ_HEAD(swpclean, swpagerclean); 88 89 typedef struct swpagerclean *swp_clean_t; 90 91 struct swpagerclean { 92 TAILQ_ENTRY(swpagerclean) spc_list; 93 int spc_flags; 94 struct buf *spc_bp; 95 sw_pager_t spc_swp; 96 vm_offset_t spc_kva; 97 int spc_count; 98 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 99 } swcleanlist [NPENDINGIO] ; 100 101 102 extern vm_map_t kernel_map; 103 104 /* spc_flags values */ 105 #define SPC_ERROR 0x01 106 107 #define SWB_EMPTY (-1) 108 109 struct swpclean swap_pager_done; /* list of compileted page cleans */ 110 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 111 struct swpclean swap_pager_free; /* list of free pager clean structs */ 112 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 113 struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 114 115 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 116 int swap_pager_needflags; 117 struct rlist *swapfrag; 118 119 struct pagerlst *swp_qs[]={ 120 &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 121 }; 122 123 int swap_pager_putmulti(); 124 125 struct pagerops swappagerops = { 126 swap_pager_init, 127 swap_pager_alloc, 128 swap_pager_dealloc, 129 swap_pager_getpage, 130 swap_pager_getmulti, 131 swap_pager_putpage, 132 swap_pager_putmulti, 133 swap_pager_haspage 134 }; 135 136 int npendingio = NPENDINGIO; 137 int pendingiowait; 138 int require_swap_init; 139 void swap_pager_finish(); 140 int dmmin, dmmax; 141 extern int vm_page_count; 142 143 static inline void swapsizecheck() { 144 if( vm_swap_size < 128*btodb(PAGE_SIZE)) { 145 if( swap_pager_full) 146 printf("swap_pager: out of space\n"); 147 swap_pager_full = 1; 148 } else if( vm_swap_size > 192*btodb(PAGE_SIZE)) 149 swap_pager_full = 0; 150 } 151 152 void 153 swap_pager_init() 154 { 155 dfltpagerops = &swappagerops; 156 157 TAILQ_INIT(&swap_pager_list); 158 TAILQ_INIT(&swap_pager_un_list); 159 160 /* 161 * Initialize clean lists 162 */ 163 TAILQ_INIT(&swap_pager_inuse); 164 TAILQ_INIT(&swap_pager_done); 165 TAILQ_INIT(&swap_pager_free); 166 167 require_swap_init = 1; 168 169 /* 170 * Calculate the swap allocation constants. 171 */ 172 173 dmmin = CLBYTES/DEV_BSIZE; 174 dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2; 175 176 } 177 178 /* 179 * Allocate a pager structure and associated resources. 180 * Note that if we are called from the pageout daemon (handle == NULL) 181 * we should not wait for memory as it could resulting in deadlock. 182 */ 183 vm_pager_t 184 swap_pager_alloc(handle, size, prot, offset) 185 caddr_t handle; 186 register vm_size_t size; 187 vm_prot_t prot; 188 vm_offset_t offset; 189 { 190 register vm_pager_t pager; 191 register sw_pager_t swp; 192 int waitok; 193 int i,j; 194 195 if (require_swap_init) { 196 swp_clean_t spc; 197 struct buf *bp; 198 /* 199 * kva's are allocated here so that we dont need to keep 200 * doing kmem_alloc pageables at runtime 201 */ 202 for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { 203 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE*MAX_PAGEOUT_CLUSTER); 204 if (!spc->spc_kva) { 205 break; 206 } 207 spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); 208 if (!spc->spc_bp) { 209 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 210 break; 211 } 212 spc->spc_flags = 0; 213 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 214 } 215 require_swap_init = 0; 216 if( size == 0) 217 return(NULL); 218 } 219 220 /* 221 * If this is a "named" anonymous region, look it up and 222 * return the appropriate pager if it exists. 223 */ 224 if (handle) { 225 pager = vm_pager_lookup(&swap_pager_list, handle); 226 if (pager != NULL) { 227 /* 228 * Use vm_object_lookup to gain a reference 229 * to the object and also to remove from the 230 * object cache. 231 */ 232 if (vm_object_lookup(pager) == NULL) 233 panic("swap_pager_alloc: bad object"); 234 return(pager); 235 } 236 } 237 238 if (swap_pager_full) { 239 return(NULL); 240 } 241 242 /* 243 * Pager doesn't exist, allocate swap management resources 244 * and initialize. 245 */ 246 waitok = handle ? M_WAITOK : M_NOWAIT; 247 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 248 if (pager == NULL) 249 return(NULL); 250 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 251 if (swp == NULL) { 252 free((caddr_t)pager, M_VMPAGER); 253 return(NULL); 254 } 255 size = round_page(size); 256 swp->sw_osize = size; 257 swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE); 258 swp->sw_blocks = (sw_blk_t) 259 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 260 M_VMPGDATA, waitok); 261 if (swp->sw_blocks == NULL) { 262 free((caddr_t)swp, M_VMPGDATA); 263 free((caddr_t)pager, M_VMPAGER); 264 return(NULL); 265 } 266 267 for (i = 0; i < swp->sw_nblocks; i++) { 268 swp->sw_blocks[i].swb_valid = 0; 269 swp->sw_blocks[i].swb_locked = 0; 270 for (j = 0; j < SWB_NPAGES; j++) 271 swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 272 } 273 274 swp->sw_poip = 0; 275 if (handle) { 276 vm_object_t object; 277 278 swp->sw_flags = SW_NAMED; 279 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 280 /* 281 * Consistant with other pagers: return with object 282 * referenced. Can't do this with handle == NULL 283 * since it might be the pageout daemon calling. 284 */ 285 object = vm_object_allocate(size); 286 vm_object_enter(object, pager); 287 vm_object_setpager(object, pager, 0, FALSE); 288 } else { 289 swp->sw_flags = 0; 290 TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 291 } 292 pager->pg_handle = handle; 293 pager->pg_ops = &swappagerops; 294 pager->pg_type = PG_SWAP; 295 pager->pg_data = (caddr_t)swp; 296 297 return(pager); 298 } 299 300 /* 301 * returns disk block associated with pager and offset 302 * additionally, as a side effect returns a flag indicating 303 * if the block has been written 304 */ 305 306 static int * 307 swap_pager_diskaddr(swp, offset, valid) 308 sw_pager_t swp; 309 vm_offset_t offset; 310 int *valid; 311 { 312 register sw_blk_t swb; 313 int ix; 314 315 if (valid) 316 *valid = 0; 317 ix = offset / (SWB_NPAGES*PAGE_SIZE); 318 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 319 return(FALSE); 320 } 321 swb = &swp->sw_blocks[ix]; 322 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 323 if (valid) 324 *valid = swb->swb_valid & (1<<ix); 325 return &swb->swb_block[ix]; 326 } 327 328 /* 329 * Utility routine to set the valid (written) bit for 330 * a block associated with a pager and offset 331 */ 332 static void 333 swap_pager_setvalid(swp, offset, valid) 334 sw_pager_t swp; 335 vm_offset_t offset; 336 int valid; 337 { 338 register sw_blk_t swb; 339 int ix; 340 341 ix = offset / (SWB_NPAGES*PAGE_SIZE); 342 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 343 return; 344 345 swb = &swp->sw_blocks[ix]; 346 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 347 if (valid) 348 swb->swb_valid |= (1 << ix); 349 else 350 swb->swb_valid &= ~(1 << ix); 351 return; 352 } 353 354 /* 355 * this routine allocates swap space with a fragmentation 356 * minimization policy. 357 */ 358 int 359 swap_pager_getswapspace( unsigned amount, unsigned *rtval) { 360 #ifdef EXP 361 unsigned tmpalloc; 362 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 363 if( amount < nblocksfrag) { 364 if( rlist_alloc(&swapfrag, amount, rtval)) 365 return 1; 366 if( !rlist_alloc(&swaplist, nblocksfrag, &tmpalloc)) 367 return 0; 368 rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1); 369 *rtval = tmpalloc; 370 return 1; 371 } 372 #endif 373 if( !rlist_alloc(&swaplist, amount, rtval)) 374 return 0; 375 else 376 return 1; 377 } 378 379 /* 380 * this routine frees swap space with a fragmentation 381 * minimization policy. 382 */ 383 void 384 swap_pager_freeswapspace( unsigned from, unsigned to) { 385 #ifdef EXP 386 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 387 unsigned tmpalloc; 388 if( ((to + 1) - from) >= nblocksfrag) { 389 #endif 390 rlist_free(&swaplist, from, to); 391 #ifdef EXP 392 return; 393 } 394 rlist_free(&swapfrag, from, to); 395 while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) { 396 rlist_free(&swaplist, tmpalloc, tmpalloc + nblocksfrag-1); 397 } 398 #endif 399 } 400 /* 401 * this routine frees swap blocks from a specified pager 402 */ 403 void 404 _swap_pager_freespace(swp, start, size) 405 sw_pager_t swp; 406 vm_offset_t start; 407 vm_offset_t size; 408 { 409 vm_offset_t i; 410 int s; 411 412 s = splbio(); 413 for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) { 414 int valid; 415 int *addr = swap_pager_diskaddr(swp, i, &valid); 416 if (addr && *addr != SWB_EMPTY) { 417 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 418 if( valid) { 419 vm_swap_size += btodb(PAGE_SIZE); 420 swap_pager_setvalid(swp, i, 0); 421 } 422 *addr = SWB_EMPTY; 423 } 424 } 425 swapsizecheck(); 426 splx(s); 427 } 428 429 void 430 swap_pager_freespace(pager, start, size) 431 vm_pager_t pager; 432 vm_offset_t start; 433 vm_offset_t size; 434 { 435 _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 436 } 437 438 /* 439 * swap_pager_reclaim frees up over-allocated space from all pagers 440 * this eliminates internal fragmentation due to allocation of space 441 * for segments that are never swapped to. It has been written so that 442 * it does not block until the rlist_free operation occurs; it keeps 443 * the queues consistant. 444 */ 445 446 /* 447 * Maximum number of blocks (pages) to reclaim per pass 448 */ 449 #define MAXRECLAIM 256 450 451 void 452 swap_pager_reclaim() 453 { 454 vm_pager_t p; 455 sw_pager_t swp; 456 int i, j, k; 457 int s; 458 int reclaimcount; 459 static int reclaims[MAXRECLAIM]; 460 static int in_reclaim; 461 462 /* 463 * allow only one process to be in the swap_pager_reclaim subroutine 464 */ 465 s = splbio(); 466 if (in_reclaim) { 467 tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 468 splx(s); 469 return; 470 } 471 in_reclaim = 1; 472 reclaimcount = 0; 473 474 /* for each pager queue */ 475 for (k = 0; swp_qs[k]; k++) { 476 477 p = swp_qs[k]->tqh_first; 478 while (p && (reclaimcount < MAXRECLAIM)) { 479 480 /* 481 * see if any blocks associated with a pager has been 482 * allocated but not used (written) 483 */ 484 swp = (sw_pager_t) p->pg_data; 485 for (i = 0; i < swp->sw_nblocks; i++) { 486 sw_blk_t swb = &swp->sw_blocks[i]; 487 if( swb->swb_locked) 488 continue; 489 for (j = 0; j < SWB_NPAGES; j++) { 490 if (swb->swb_block[j] != SWB_EMPTY && 491 (swb->swb_valid & (1 << j)) == 0) { 492 reclaims[reclaimcount++] = swb->swb_block[j]; 493 swb->swb_block[j] = SWB_EMPTY; 494 if (reclaimcount >= MAXRECLAIM) 495 goto rfinished; 496 } 497 } 498 } 499 p = p->pg_list.tqe_next; 500 } 501 } 502 503 rfinished: 504 505 /* 506 * free the blocks that have been added to the reclaim list 507 */ 508 for (i = 0; i < reclaimcount; i++) { 509 swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1); 510 swapsizecheck(); 511 wakeup((caddr_t) &in_reclaim); 512 } 513 514 splx(s); 515 in_reclaim = 0; 516 wakeup((caddr_t) &in_reclaim); 517 } 518 519 520 /* 521 * swap_pager_copy copies blocks from one pager to another and 522 * destroys the source pager 523 */ 524 525 void 526 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 527 vm_pager_t srcpager; 528 vm_offset_t srcoffset; 529 vm_pager_t dstpager; 530 vm_offset_t dstoffset; 531 vm_offset_t offset; 532 { 533 sw_pager_t srcswp, dstswp; 534 vm_offset_t i; 535 int s; 536 537 srcswp = (sw_pager_t) srcpager->pg_data; 538 dstswp = (sw_pager_t) dstpager->pg_data; 539 540 /* 541 * remove the source pager from the swap_pager internal queue 542 */ 543 s = splbio(); 544 if (srcswp->sw_flags & SW_NAMED) { 545 TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 546 srcswp->sw_flags &= ~SW_NAMED; 547 } else { 548 TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 549 } 550 551 while (srcswp->sw_poip) { 552 tsleep((caddr_t)srcswp, PVM, "spgout", 0); 553 } 554 splx(s); 555 556 /* 557 * clean all of the pages that are currently active and finished 558 */ 559 (void) swap_pager_clean(); 560 561 s = splbio(); 562 /* 563 * clear source block before destination object 564 * (release allocated space) 565 */ 566 for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { 567 int valid; 568 int *addr = swap_pager_diskaddr(srcswp, i, &valid); 569 if (addr && *addr != SWB_EMPTY) { 570 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 571 if( valid) 572 vm_swap_size += btodb(PAGE_SIZE); 573 swapsizecheck(); 574 *addr = SWB_EMPTY; 575 } 576 } 577 /* 578 * transfer source to destination 579 */ 580 for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 581 int srcvalid, dstvalid; 582 int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 583 &srcvalid); 584 int *dstaddrp; 585 /* 586 * see if the source has space allocated 587 */ 588 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 589 /* 590 * if the source is valid and the dest has no space, then 591 * copy the allocation from the srouce to the dest. 592 */ 593 if (srcvalid) { 594 dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); 595 /* 596 * if the dest already has a valid block, deallocate the 597 * source block without copying. 598 */ 599 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 600 swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1); 601 *dstaddrp = SWB_EMPTY; 602 } 603 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 604 *dstaddrp = *srcaddrp; 605 *srcaddrp = SWB_EMPTY; 606 swap_pager_setvalid(dstswp, i + dstoffset, 1); 607 vm_swap_size -= btodb(PAGE_SIZE); 608 } 609 } 610 /* 611 * if the source is not empty at this point, then deallocate the space. 612 */ 613 if (*srcaddrp != SWB_EMPTY) { 614 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 615 if( srcvalid) 616 vm_swap_size += btodb(PAGE_SIZE); 617 *srcaddrp = SWB_EMPTY; 618 } 619 } 620 } 621 622 /* 623 * deallocate the rest of the source object 624 */ 625 for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { 626 int valid; 627 int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); 628 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 629 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 630 if( valid) 631 vm_swap_size += btodb(PAGE_SIZE); 632 *srcaddrp = SWB_EMPTY; 633 } 634 } 635 636 swapsizecheck(); 637 splx(s); 638 639 free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); 640 srcswp->sw_blocks = 0; 641 free((caddr_t)srcswp, M_VMPGDATA); 642 srcpager->pg_data = 0; 643 free((caddr_t)srcpager, M_VMPAGER); 644 645 return; 646 } 647 648 649 void 650 swap_pager_dealloc(pager) 651 vm_pager_t pager; 652 { 653 register int i,j; 654 register sw_blk_t bp; 655 register sw_pager_t swp; 656 int s; 657 658 /* 659 * Remove from list right away so lookups will fail if we 660 * block for pageout completion. 661 */ 662 s = splbio(); 663 swp = (sw_pager_t) pager->pg_data; 664 if (swp->sw_flags & SW_NAMED) { 665 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 666 swp->sw_flags &= ~SW_NAMED; 667 } else { 668 TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 669 } 670 /* 671 * Wait for all pageouts to finish and remove 672 * all entries from cleaning list. 673 */ 674 675 while (swp->sw_poip) { 676 tsleep((caddr_t)swp, PVM, "swpout", 0); 677 } 678 splx(s); 679 680 681 (void) swap_pager_clean(); 682 683 /* 684 * Free left over swap blocks 685 */ 686 s = splbio(); 687 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 688 for (j = 0; j < SWB_NPAGES; j++) 689 if (bp->swb_block[j] != SWB_EMPTY) { 690 swap_pager_freeswapspace((unsigned)bp->swb_block[j], 691 (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1); 692 if( bp->swb_valid & (1<<j)) 693 vm_swap_size += btodb(PAGE_SIZE); 694 bp->swb_block[j] = SWB_EMPTY; 695 } 696 } 697 splx(s); 698 swapsizecheck(); 699 700 /* 701 * Free swap management resources 702 */ 703 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 704 swp->sw_blocks = 0; 705 free((caddr_t)swp, M_VMPGDATA); 706 pager->pg_data = 0; 707 free((caddr_t)pager, M_VMPAGER); 708 } 709 710 /* 711 * swap_pager_getmulti can get multiple pages. 712 */ 713 int 714 swap_pager_getmulti(pager, m, count, reqpage, sync) 715 vm_pager_t pager; 716 vm_page_t *m; 717 int count; 718 int reqpage; 719 boolean_t sync; 720 { 721 if( reqpage >= count) 722 panic("swap_pager_getmulti: reqpage >= count\n"); 723 return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 724 } 725 726 /* 727 * swap_pager_getpage gets individual pages 728 */ 729 int 730 swap_pager_getpage(pager, m, sync) 731 vm_pager_t pager; 732 vm_page_t m; 733 boolean_t sync; 734 { 735 vm_page_t marray[1]; 736 737 marray[0] = m; 738 return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); 739 } 740 741 int 742 swap_pager_putmulti(pager, m, c, sync, rtvals) 743 vm_pager_t pager; 744 vm_page_t *m; 745 int c; 746 boolean_t sync; 747 int *rtvals; 748 { 749 int flags; 750 751 if (pager == NULL) { 752 (void) swap_pager_clean(); 753 return VM_PAGER_OK; 754 } 755 756 flags = B_WRITE; 757 if (!sync) 758 flags |= B_ASYNC; 759 760 return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); 761 } 762 763 /* 764 * swap_pager_putpage writes individual pages 765 */ 766 int 767 swap_pager_putpage(pager, m, sync) 768 vm_pager_t pager; 769 vm_page_t m; 770 boolean_t sync; 771 { 772 int flags; 773 vm_page_t marray[1]; 774 int rtvals[1]; 775 776 777 if (pager == NULL) { 778 (void) swap_pager_clean(); 779 return VM_PAGER_OK; 780 } 781 782 marray[0] = m; 783 flags = B_WRITE; 784 if (!sync) 785 flags |= B_ASYNC; 786 787 swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); 788 789 return rtvals[0]; 790 } 791 792 static inline int 793 const swap_pager_block_index(swp, offset) 794 sw_pager_t swp; 795 vm_offset_t offset; 796 { 797 return (offset / (SWB_NPAGES*PAGE_SIZE)); 798 } 799 800 static inline int 801 const swap_pager_block_offset(swp, offset) 802 sw_pager_t swp; 803 vm_offset_t offset; 804 { 805 return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE); 806 } 807 808 /* 809 * _swap_pager_haspage returns TRUE if the pager has data that has 810 * been written out. 811 */ 812 static boolean_t 813 _swap_pager_haspage(swp, offset) 814 sw_pager_t swp; 815 vm_offset_t offset; 816 { 817 register sw_blk_t swb; 818 int ix; 819 820 ix = offset / (SWB_NPAGES*PAGE_SIZE); 821 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 822 return(FALSE); 823 } 824 swb = &swp->sw_blocks[ix]; 825 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 826 if (swb->swb_block[ix] != SWB_EMPTY) { 827 if (swb->swb_valid & (1 << ix)) 828 return TRUE; 829 } 830 831 return(FALSE); 832 } 833 834 /* 835 * swap_pager_haspage is the externally accessible version of 836 * _swap_pager_haspage above. this routine takes a vm_pager_t 837 * for an argument instead of sw_pager_t. 838 */ 839 boolean_t 840 swap_pager_haspage(pager, offset) 841 vm_pager_t pager; 842 vm_offset_t offset; 843 { 844 return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 845 } 846 847 /* 848 * swap_pager_freepage is a convienience routine that clears the busy 849 * bit and deallocates a page. 850 */ 851 static void 852 swap_pager_freepage(m) 853 vm_page_t m; 854 { 855 PAGE_WAKEUP(m); 856 vm_page_free(m); 857 } 858 859 /* 860 * swap_pager_ridpages is a convienience routine that deallocates all 861 * but the required page. this is usually used in error returns that 862 * need to invalidate the "extra" readahead pages. 863 */ 864 static void 865 swap_pager_ridpages(m, count, reqpage) 866 vm_page_t *m; 867 int count; 868 int reqpage; 869 { 870 int i; 871 for (i = 0; i < count; i++) 872 if (i != reqpage) 873 swap_pager_freepage(m[i]); 874 } 875 876 int swapwritecount=0; 877 878 /* 879 * swap_pager_iodone1 is the completion routine for both reads and async writes 880 */ 881 void 882 swap_pager_iodone1(bp) 883 struct buf *bp; 884 { 885 bp->b_flags |= B_DONE; 886 bp->b_flags &= ~B_ASYNC; 887 wakeup((caddr_t)bp); 888 /* 889 if ((bp->b_flags & B_READ) == 0) 890 vwakeup(bp); 891 */ 892 } 893 894 895 int 896 swap_pager_input(swp, m, count, reqpage) 897 register sw_pager_t swp; 898 vm_page_t *m; 899 int count, reqpage; 900 { 901 register struct buf *bp; 902 sw_blk_t swb[count]; 903 register int s; 904 int i; 905 boolean_t rv; 906 vm_offset_t kva, off[count]; 907 swp_clean_t spc; 908 vm_offset_t paging_offset; 909 vm_object_t object; 910 int reqaddr[count]; 911 912 int first, last; 913 int failed; 914 int reqdskregion; 915 916 object = m[reqpage]->object; 917 paging_offset = object->paging_offset; 918 /* 919 * First determine if the page exists in the pager if this is 920 * a sync read. This quickly handles cases where we are 921 * following shadow chains looking for the top level object 922 * with the page. 923 */ 924 if (swp->sw_blocks == NULL) { 925 swap_pager_ridpages(m, count, reqpage); 926 return(VM_PAGER_FAIL); 927 } 928 929 for(i = 0; i < count; i++) { 930 vm_offset_t foff = m[i]->offset + paging_offset; 931 int ix = swap_pager_block_index(swp, foff); 932 if (ix >= swp->sw_nblocks) { 933 int j; 934 if( i <= reqpage) { 935 swap_pager_ridpages(m, count, reqpage); 936 return(VM_PAGER_FAIL); 937 } 938 for(j = i; j < count; j++) { 939 swap_pager_freepage(m[j]); 940 } 941 count = i; 942 break; 943 } 944 945 swb[i] = &swp->sw_blocks[ix]; 946 off[i] = swap_pager_block_offset(swp, foff); 947 reqaddr[i] = swb[i]->swb_block[off[i]]; 948 } 949 950 /* make sure that our required input request is existant */ 951 952 if (reqaddr[reqpage] == SWB_EMPTY || 953 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 954 swap_pager_ridpages(m, count, reqpage); 955 return(VM_PAGER_FAIL); 956 } 957 958 959 reqdskregion = reqaddr[reqpage] / dmmax; 960 961 /* 962 * search backwards for the first contiguous page to transfer 963 */ 964 failed = 0; 965 first = 0; 966 for (i = reqpage - 1; i >= 0; --i) { 967 if ( failed || (reqaddr[i] == SWB_EMPTY) || 968 (swb[i]->swb_valid & (1 << off[i])) == 0 || 969 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 970 ((reqaddr[i] / dmmax) != reqdskregion)) { 971 failed = 1; 972 swap_pager_freepage(m[i]); 973 if (first == 0) 974 first = i + 1; 975 } 976 } 977 /* 978 * search forwards for the last contiguous page to transfer 979 */ 980 failed = 0; 981 last = count; 982 for (i = reqpage + 1; i < count; i++) { 983 if ( failed || (reqaddr[i] == SWB_EMPTY) || 984 (swb[i]->swb_valid & (1 << off[i])) == 0 || 985 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 986 ((reqaddr[i] / dmmax) != reqdskregion)) { 987 failed = 1; 988 swap_pager_freepage(m[i]); 989 if (last == count) 990 last = i; 991 } 992 } 993 994 count = last; 995 if (first != 0) { 996 for (i = first; i < count; i++) { 997 m[i-first] = m[i]; 998 reqaddr[i-first] = reqaddr[i]; 999 off[i-first] = off[i]; 1000 } 1001 count -= first; 1002 reqpage -= first; 1003 } 1004 1005 ++swb[reqpage]->swb_locked; 1006 1007 /* 1008 * at this point: 1009 * "m" is a pointer to the array of vm_page_t for paging I/O 1010 * "count" is the number of vm_page_t entries represented by "m" 1011 * "object" is the vm_object_t for I/O 1012 * "reqpage" is the index into "m" for the page actually faulted 1013 */ 1014 1015 spc = NULL; /* we might not use an spc data structure */ 1016 1017 if (count == 1) { 1018 /* 1019 * if a kva has not been allocated, we can only do a one page transfer, 1020 * so we free the other pages that might have been allocated by 1021 * vm_fault. 1022 */ 1023 swap_pager_ridpages(m, count, reqpage); 1024 m[0] = m[reqpage]; 1025 reqaddr[0] = reqaddr[reqpage]; 1026 1027 count = 1; 1028 reqpage = 0; 1029 /* 1030 * get a swap pager clean data structure, block until we get it 1031 */ 1032 if (swap_pager_free.tqh_first == NULL) { 1033 s = splbio(); 1034 if( curproc == pageproc) 1035 (void) swap_pager_clean(); 1036 else 1037 wakeup((caddr_t) &vm_pages_needed); 1038 while (swap_pager_free.tqh_first == NULL) { 1039 swap_pager_needflags |= SWAP_FREE_NEEDED; 1040 tsleep((caddr_t)&swap_pager_free, 1041 PVM, "swpfre", 0); 1042 if( curproc == pageproc) 1043 (void) swap_pager_clean(); 1044 else 1045 wakeup((caddr_t) &vm_pages_needed); 1046 } 1047 splx(s); 1048 } 1049 spc = swap_pager_free.tqh_first; 1050 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1051 kva = spc->spc_kva; 1052 bp = spc->spc_bp; 1053 bzero(bp, sizeof *bp); 1054 bp->b_spc = spc; 1055 } else { 1056 /* 1057 * Get a swap buffer header to perform the IO 1058 */ 1059 bp = getpbuf(); 1060 kva = (vm_offset_t) bp->b_data; 1061 } 1062 1063 /* 1064 * map our page(s) into kva for input 1065 */ 1066 pmap_qenter( kva, m, count); 1067 1068 s = splbio(); 1069 bp->b_flags = B_BUSY | B_READ | B_CALL; 1070 bp->b_iodone = swap_pager_iodone1; 1071 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1072 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1073 crhold(bp->b_rcred); 1074 crhold(bp->b_wcred); 1075 bp->b_un.b_addr = (caddr_t) kva; 1076 bp->b_blkno = reqaddr[0]; 1077 bp->b_bcount = PAGE_SIZE*count; 1078 bp->b_bufsize = PAGE_SIZE*count; 1079 1080 bgetvp( swapdev_vp, bp); 1081 1082 swp->sw_piip++; 1083 1084 cnt.v_swapin++; 1085 cnt.v_swappgsin += count; 1086 /* 1087 * perform the I/O 1088 */ 1089 VOP_STRATEGY(bp); 1090 1091 /* 1092 * wait for the sync I/O to complete 1093 */ 1094 while ((bp->b_flags & B_DONE) == 0) { 1095 tsleep((caddr_t)bp, PVM, "swread", 0); 1096 } 1097 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1098 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1099 1100 --swp->sw_piip; 1101 if (swp->sw_piip == 0) 1102 wakeup((caddr_t) swp); 1103 1104 /* 1105 * relpbuf does this, but we maintain our own buffer 1106 * list also... 1107 */ 1108 if (bp->b_vp) 1109 brelvp(bp); 1110 1111 splx(s); 1112 --swb[reqpage]->swb_locked; 1113 1114 /* 1115 * remove the mapping for kernel virtual 1116 */ 1117 pmap_qremove( kva, count); 1118 1119 if (spc) { 1120 /* 1121 * if we have used an spc, we need to free it. 1122 */ 1123 if( bp->b_rcred != NOCRED) 1124 crfree(bp->b_rcred); 1125 if( bp->b_wcred != NOCRED) 1126 crfree(bp->b_wcred); 1127 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1128 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1129 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1130 wakeup((caddr_t)&swap_pager_free); 1131 } 1132 } else { 1133 /* 1134 * release the physical I/O buffer 1135 */ 1136 relpbuf(bp); 1137 /* 1138 * finish up input if everything is ok 1139 */ 1140 if( rv == VM_PAGER_OK) { 1141 for (i = 0; i < count; i++) { 1142 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1143 m[i]->flags |= PG_CLEAN; 1144 m[i]->flags &= ~PG_LAUNDRY; 1145 if (i != reqpage) { 1146 /* 1147 * whether or not to leave the page activated 1148 * is up in the air, but we should put the page 1149 * on a page queue somewhere. (it already is in 1150 * the object). 1151 * After some emperical results, it is best 1152 * to deactivate the readahead pages. 1153 */ 1154 vm_page_deactivate(m[i]); 1155 1156 /* 1157 * just in case someone was asking for this 1158 * page we now tell them that it is ok to use 1159 */ 1160 m[i]->flags &= ~PG_FAKE; 1161 PAGE_WAKEUP(m[i]); 1162 } 1163 } 1164 /* 1165 * If we're out of swap space, then attempt to free 1166 * some whenever pages are brought in. We must clear 1167 * the clean flag so that the page contents will be 1168 * preserved. 1169 */ 1170 if (swap_pager_full) { 1171 for (i = 0; i < count; i++) { 1172 m[i]->flags &= ~PG_CLEAN; 1173 } 1174 _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE); 1175 } 1176 } else { 1177 swap_pager_ridpages(m, count, reqpage); 1178 } 1179 } 1180 return(rv); 1181 } 1182 1183 int 1184 swap_pager_output(swp, m, count, flags, rtvals) 1185 register sw_pager_t swp; 1186 vm_page_t *m; 1187 int count; 1188 int flags; 1189 int *rtvals; 1190 { 1191 register struct buf *bp; 1192 sw_blk_t swb[count]; 1193 register int s; 1194 int i, j, ix; 1195 boolean_t rv; 1196 vm_offset_t kva, off, foff; 1197 swp_clean_t spc; 1198 vm_offset_t paging_offset; 1199 vm_object_t object; 1200 int reqaddr[count]; 1201 int failed; 1202 1203 /* 1204 if( count > 1) 1205 printf("off: 0x%x, count: %d\n", m[0]->offset, count); 1206 */ 1207 spc = NULL; 1208 1209 object = m[0]->object; 1210 paging_offset = object->paging_offset; 1211 1212 failed = 0; 1213 for(j=0;j<count;j++) { 1214 foff = m[j]->offset + paging_offset; 1215 ix = swap_pager_block_index(swp, foff); 1216 swb[j] = 0; 1217 if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 1218 rtvals[j] = VM_PAGER_FAIL; 1219 failed = 1; 1220 continue; 1221 } else { 1222 rtvals[j] = VM_PAGER_OK; 1223 } 1224 swb[j] = &swp->sw_blocks[ix]; 1225 ++swb[j]->swb_locked; 1226 if( failed) { 1227 rtvals[j] = VM_PAGER_FAIL; 1228 continue; 1229 } 1230 off = swap_pager_block_offset(swp, foff); 1231 reqaddr[j] = swb[j]->swb_block[off]; 1232 if( reqaddr[j] == SWB_EMPTY) { 1233 int blk; 1234 int tries; 1235 int ntoget; 1236 tries = 0; 1237 s = splbio(); 1238 1239 /* 1240 * if any other pages have been allocated in this block, we 1241 * only try to get one page. 1242 */ 1243 for (i = 0; i < SWB_NPAGES; i++) { 1244 if (swb[j]->swb_block[i] != SWB_EMPTY) 1245 break; 1246 } 1247 1248 1249 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1250 /* 1251 * this code is alittle conservative, but works 1252 * (the intent of this code is to allocate small chunks 1253 * for small objects) 1254 */ 1255 if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) { 1256 ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE; 1257 } 1258 1259 retrygetspace: 1260 if (!swap_pager_full && ntoget > 1 && 1261 swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { 1262 1263 for (i = 0; i < ntoget; i++) { 1264 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1265 swb[j]->swb_valid = 0; 1266 } 1267 1268 reqaddr[j] = swb[j]->swb_block[off]; 1269 } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), 1270 &swb[j]->swb_block[off])) { 1271 /* 1272 * if the allocation has failed, we try to reclaim space and 1273 * retry. 1274 */ 1275 if (++tries == 1) { 1276 swap_pager_reclaim(); 1277 goto retrygetspace; 1278 } 1279 rtvals[j] = VM_PAGER_AGAIN; 1280 failed = 1; 1281 } else { 1282 reqaddr[j] = swb[j]->swb_block[off]; 1283 swb[j]->swb_valid &= ~(1<<off); 1284 } 1285 splx(s); 1286 } 1287 } 1288 1289 /* 1290 * search forwards for the last contiguous page to transfer 1291 */ 1292 failed = 0; 1293 for (i = 0; i < count; i++) { 1294 if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) || 1295 (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 1296 (rtvals[i] != VM_PAGER_OK)) { 1297 failed = 1; 1298 if( rtvals[i] == VM_PAGER_OK) 1299 rtvals[i] = VM_PAGER_AGAIN; 1300 } 1301 } 1302 1303 for(i = 0; i < count; i++) { 1304 if( rtvals[i] != VM_PAGER_OK) { 1305 if( swb[i]) 1306 --swb[i]->swb_locked; 1307 } 1308 } 1309 1310 for(i = 0; i < count; i++) 1311 if( rtvals[i] != VM_PAGER_OK) 1312 break; 1313 1314 if( i == 0) { 1315 return VM_PAGER_AGAIN; 1316 } 1317 1318 count = i; 1319 for(i=0;i<count;i++) { 1320 if( reqaddr[i] == SWB_EMPTY) 1321 printf("I/O to empty block????\n"); 1322 } 1323 1324 /* 1325 */ 1326 1327 /* 1328 * For synchronous writes, we clean up 1329 * all completed async pageouts. 1330 */ 1331 if ((flags & B_ASYNC) == 0) { 1332 swap_pager_clean(); 1333 } 1334 1335 kva = 0; 1336 1337 /* 1338 * we allocate a new kva for transfers > 1 page 1339 * but for transfers == 1 page, the swap_pager_free list contains 1340 * entries that have pre-allocated kva's (for efficiency). 1341 * NOTE -- we do not use the physical buffer pool or the 1342 * preallocated associated kva's because of the potential for 1343 * deadlock. This is very subtile -- but deadlocks or resource 1344 * contention must be avoided on pageouts -- or your system will 1345 * sleep (forever) !!! 1346 */ 1347 /* 1348 if ( count > 1) { 1349 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1350 if( !kva) { 1351 for (i = 0; i < count; i++) { 1352 if( swb[i]) 1353 --swb[i]->swb_locked; 1354 rtvals[i] = VM_PAGER_AGAIN; 1355 } 1356 return VM_PAGER_AGAIN; 1357 } 1358 } 1359 */ 1360 1361 /* 1362 * get a swap pager clean data structure, block until we get it 1363 */ 1364 if (swap_pager_free.tqh_first == NULL) { 1365 s = splbio(); 1366 if( curproc == pageproc) 1367 (void) swap_pager_clean(); 1368 else 1369 wakeup((caddr_t) &vm_pages_needed); 1370 while (swap_pager_free.tqh_first == NULL) { 1371 swap_pager_needflags |= SWAP_FREE_NEEDED; 1372 tsleep((caddr_t)&swap_pager_free, 1373 PVM, "swpfre", 0); 1374 if( curproc == pageproc) 1375 (void) swap_pager_clean(); 1376 else 1377 wakeup((caddr_t) &vm_pages_needed); 1378 } 1379 splx(s); 1380 } 1381 1382 spc = swap_pager_free.tqh_first; 1383 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1384 1385 kva = spc->spc_kva; 1386 1387 /* 1388 * map our page(s) into kva for I/O 1389 */ 1390 pmap_qenter(kva, m, count); 1391 1392 /* 1393 * get the base I/O offset into the swap file 1394 */ 1395 for(i=0;i<count;i++) { 1396 foff = m[i]->offset + paging_offset; 1397 off = swap_pager_block_offset(swp, foff); 1398 /* 1399 * if we are setting the valid bit anew, 1400 * then diminish the swap free space 1401 */ 1402 if( (swb[i]->swb_valid & (1 << off)) == 0) 1403 vm_swap_size -= btodb(PAGE_SIZE); 1404 1405 /* 1406 * set the valid bit 1407 */ 1408 swb[i]->swb_valid |= (1 << off); 1409 /* 1410 * and unlock the data structure 1411 */ 1412 --swb[i]->swb_locked; 1413 } 1414 1415 s = splbio(); 1416 /* 1417 * Get a swap buffer header and perform the IO 1418 */ 1419 bp = spc->spc_bp; 1420 bzero(bp, sizeof *bp); 1421 bp->b_spc = spc; 1422 1423 bp->b_flags = B_BUSY; 1424 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1425 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1426 if( bp->b_rcred != NOCRED) 1427 crhold(bp->b_rcred); 1428 if( bp->b_wcred != NOCRED) 1429 crhold(bp->b_wcred); 1430 bp->b_data = (caddr_t) kva; 1431 bp->b_blkno = reqaddr[0]; 1432 bgetvp( swapdev_vp, bp); 1433 1434 bp->b_bcount = PAGE_SIZE*count; 1435 bp->b_bufsize = PAGE_SIZE*count; 1436 swapdev_vp->v_numoutput++; 1437 1438 /* 1439 * If this is an async write we set up additional buffer fields 1440 * and place a "cleaning" entry on the inuse queue. 1441 */ 1442 if ( flags & B_ASYNC ) { 1443 spc->spc_flags = 0; 1444 spc->spc_swp = swp; 1445 for(i=0;i<count;i++) 1446 spc->spc_m[i] = m[i]; 1447 spc->spc_count = count; 1448 /* 1449 * the completion routine for async writes 1450 */ 1451 bp->b_flags |= B_CALL; 1452 bp->b_iodone = swap_pager_iodone; 1453 bp->b_dirtyoff = 0; 1454 bp->b_dirtyend = bp->b_bcount; 1455 swp->sw_poip++; 1456 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1457 } else { 1458 swp->sw_poip++; 1459 bp->b_flags |= B_CALL; 1460 bp->b_iodone = swap_pager_iodone1; 1461 } 1462 1463 cnt.v_swapout++; 1464 cnt.v_swappgsout += count; 1465 /* 1466 * perform the I/O 1467 */ 1468 VOP_STRATEGY(bp); 1469 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { 1470 if ((bp->b_flags & B_DONE) == B_DONE) { 1471 swap_pager_clean(); 1472 } 1473 splx(s); 1474 for(i=0;i<count;i++) { 1475 rtvals[i] = VM_PAGER_PEND; 1476 } 1477 return VM_PAGER_PEND; 1478 } 1479 1480 /* 1481 * wait for the sync I/O to complete 1482 */ 1483 while ((bp->b_flags & B_DONE) == 0) { 1484 tsleep((caddr_t)bp, PVM, "swwrt", 0); 1485 } 1486 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1487 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1488 1489 --swp->sw_poip; 1490 if (swp->sw_poip == 0) 1491 wakeup((caddr_t) swp); 1492 1493 if (bp->b_vp) 1494 brelvp(bp); 1495 1496 splx(s); 1497 1498 /* 1499 * remove the mapping for kernel virtual 1500 */ 1501 pmap_qremove( kva, count); 1502 1503 /* 1504 * if we have written the page, then indicate that the page 1505 * is clean. 1506 */ 1507 if (rv == VM_PAGER_OK) { 1508 for(i=0;i<count;i++) { 1509 if( rtvals[i] == VM_PAGER_OK) { 1510 m[i]->flags |= PG_CLEAN; 1511 m[i]->flags &= ~PG_LAUNDRY; 1512 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1513 /* 1514 * optimization, if a page has been read during the 1515 * pageout process, we activate it. 1516 */ 1517 if ( (m[i]->flags & PG_ACTIVE) == 0 && 1518 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) 1519 vm_page_activate(m[i]); 1520 } 1521 } 1522 } else { 1523 for(i=0;i<count;i++) { 1524 rtvals[i] = rv; 1525 m[i]->flags |= PG_LAUNDRY; 1526 } 1527 } 1528 1529 if( bp->b_rcred != NOCRED) 1530 crfree(bp->b_rcred); 1531 if( bp->b_wcred != NOCRED) 1532 crfree(bp->b_wcred); 1533 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1534 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1535 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1536 wakeup((caddr_t)&swap_pager_free); 1537 } 1538 1539 return(rv); 1540 } 1541 1542 boolean_t 1543 swap_pager_clean() 1544 { 1545 register swp_clean_t spc, tspc; 1546 register int s; 1547 1548 tspc = NULL; 1549 if (swap_pager_done.tqh_first == NULL) 1550 return FALSE; 1551 for (;;) { 1552 s = splbio(); 1553 /* 1554 * Look up and removal from done list must be done 1555 * at splbio() to avoid conflicts with swap_pager_iodone. 1556 */ 1557 while ((spc = swap_pager_done.tqh_first) != 0) { 1558 pmap_qremove( spc->spc_kva, spc->spc_count); 1559 swap_pager_finish(spc); 1560 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1561 goto doclean; 1562 } 1563 1564 /* 1565 * No operations done, thats all we can do for now. 1566 */ 1567 1568 splx(s); 1569 break; 1570 1571 /* 1572 * The desired page was found to be busy earlier in 1573 * the scan but has since completed. 1574 */ 1575 doclean: 1576 if (tspc && tspc == spc) { 1577 tspc = NULL; 1578 } 1579 spc->spc_flags = 0; 1580 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1581 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1582 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1583 wakeup((caddr_t)&swap_pager_free); 1584 } 1585 ++cleandone; 1586 splx(s); 1587 } 1588 1589 return(tspc ? TRUE : FALSE); 1590 } 1591 1592 void 1593 swap_pager_finish(spc) 1594 register swp_clean_t spc; 1595 { 1596 vm_object_t object = spc->spc_m[0]->object; 1597 int i; 1598 1599 if ((object->paging_in_progress -= spc->spc_count) == 0) 1600 thread_wakeup((int) object); 1601 1602 /* 1603 * If no error mark as clean and inform the pmap system. 1604 * If error, mark as dirty so we will try again. 1605 * (XXX could get stuck doing this, should give up after awhile) 1606 */ 1607 if (spc->spc_flags & SPC_ERROR) { 1608 for(i=0;i<spc->spc_count;i++) { 1609 printf("swap_pager_finish: clean of page %lx failed\n", 1610 (u_long)VM_PAGE_TO_PHYS(spc->spc_m[i])); 1611 spc->spc_m[i]->flags |= PG_LAUNDRY; 1612 } 1613 } else { 1614 for(i=0;i<spc->spc_count;i++) { 1615 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1616 spc->spc_m[i]->flags |= PG_CLEAN; 1617 } 1618 } 1619 1620 1621 for(i=0;i<spc->spc_count;i++) { 1622 /* 1623 * we wakeup any processes that are waiting on 1624 * these pages. 1625 */ 1626 PAGE_WAKEUP(spc->spc_m[i]); 1627 } 1628 nswiodone -= spc->spc_count; 1629 1630 return; 1631 } 1632 1633 /* 1634 * swap_pager_iodone 1635 */ 1636 void 1637 swap_pager_iodone(bp) 1638 register struct buf *bp; 1639 { 1640 register swp_clean_t spc; 1641 int s; 1642 1643 s = splbio(); 1644 spc = (swp_clean_t) bp->b_spc; 1645 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1646 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1647 if (bp->b_flags & B_ERROR) { 1648 spc->spc_flags |= SPC_ERROR; 1649 printf("error %d blkno %lu sz %ld ", 1650 bp->b_error, (u_long)bp->b_blkno, bp->b_bcount); 1651 } 1652 1653 /* 1654 if ((bp->b_flags & B_READ) == 0) 1655 vwakeup(bp); 1656 */ 1657 1658 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); 1659 if (bp->b_vp) { 1660 brelvp(bp); 1661 } 1662 if( bp->b_rcred != NOCRED) 1663 crfree(bp->b_rcred); 1664 if( bp->b_wcred != NOCRED) 1665 crfree(bp->b_wcred); 1666 1667 nswiodone += spc->spc_count; 1668 if (--spc->spc_swp->sw_poip == 0) { 1669 wakeup((caddr_t)spc->spc_swp); 1670 } 1671 1672 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1673 swap_pager_inuse.tqh_first == 0) { 1674 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1675 wakeup((caddr_t)&swap_pager_free); 1676 wakeup((caddr_t)&vm_pages_needed); 1677 } 1678 1679 if (vm_pageout_pages_needed) { 1680 wakeup((caddr_t)&vm_pageout_pages_needed); 1681 } 1682 1683 if ((swap_pager_inuse.tqh_first == NULL) || 1684 (cnt.v_free_count < cnt.v_free_min && 1685 nswiodone + cnt.v_free_count >= cnt.v_free_min) ) { 1686 wakeup((caddr_t)&vm_pages_needed); 1687 } 1688 splx(s); 1689 } 1690 1691 /* 1692 * return true if any swap control structures can be allocated 1693 */ 1694 int 1695 swap_pager_ready() { 1696 if( swap_pager_free.tqh_first) 1697 return 1; 1698 else 1699 return 0; 1700 } 1701