1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.7 1994/08/18 22:36:00 wollman Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/proc.h> 55 #include <sys/buf.h> 56 #include <sys/vnode.h> 57 #include <sys/malloc.h> 58 59 #include <miscfs/specfs/specdev.h> 60 #include <sys/rlist.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_pager.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/swap_pager.h> 67 68 #ifndef NPENDINGIO 69 #define NPENDINGIO 16 70 #endif 71 72 int nswiodone; 73 extern int vm_pageout_rate_limit; 74 static int cleandone; 75 extern int hz; 76 int swap_pager_full; 77 extern vm_map_t pager_map; 78 extern int vm_swap_size; 79 80 #define MAX_PAGEOUT_CLUSTER 8 81 82 TAILQ_HEAD(swpclean, swpagerclean); 83 84 typedef struct swpagerclean *swp_clean_t; 85 86 struct swpagerclean { 87 TAILQ_ENTRY(swpagerclean) spc_list; 88 int spc_flags; 89 struct buf *spc_bp; 90 sw_pager_t spc_swp; 91 vm_offset_t spc_kva; 92 int spc_count; 93 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 94 } swcleanlist [NPENDINGIO] ; 95 96 97 extern vm_map_t kernel_map; 98 99 /* spc_flags values */ 100 #define SPC_ERROR 0x01 101 102 #define SWB_EMPTY (-1) 103 104 struct swpclean swap_pager_done; /* list of compileted page cleans */ 105 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 106 struct swpclean swap_pager_free; /* list of free pager clean structs */ 107 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 108 struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 109 110 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 111 int swap_pager_needflags; 112 struct rlist *swapfrag; 113 114 struct pagerlst *swp_qs[]={ 115 &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 116 }; 117 118 int swap_pager_putmulti(); 119 120 struct pagerops swappagerops = { 121 swap_pager_init, 122 swap_pager_alloc, 123 swap_pager_dealloc, 124 swap_pager_getpage, 125 swap_pager_getmulti, 126 swap_pager_putpage, 127 swap_pager_putmulti, 128 swap_pager_haspage 129 }; 130 131 int npendingio = NPENDINGIO; 132 int pendingiowait; 133 int require_swap_init; 134 void swap_pager_finish(); 135 int dmmin, dmmax; 136 extern int vm_page_count; 137 138 struct buf * getpbuf() ; 139 void relpbuf(struct buf *bp) ; 140 141 static inline void swapsizecheck() { 142 if( vm_swap_size < 128*btodb(PAGE_SIZE)) { 143 if( swap_pager_full) 144 printf("swap_pager: out of space\n"); 145 swap_pager_full = 1; 146 } else if( vm_swap_size > 192*btodb(PAGE_SIZE)) 147 swap_pager_full = 0; 148 } 149 150 void 151 swap_pager_init() 152 { 153 dfltpagerops = &swappagerops; 154 155 TAILQ_INIT(&swap_pager_list); 156 TAILQ_INIT(&swap_pager_un_list); 157 158 /* 159 * Initialize clean lists 160 */ 161 TAILQ_INIT(&swap_pager_inuse); 162 TAILQ_INIT(&swap_pager_done); 163 TAILQ_INIT(&swap_pager_free); 164 165 require_swap_init = 1; 166 167 /* 168 * Calculate the swap allocation constants. 169 */ 170 171 dmmin = CLBYTES/DEV_BSIZE; 172 dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2; 173 174 } 175 176 /* 177 * Allocate a pager structure and associated resources. 178 * Note that if we are called from the pageout daemon (handle == NULL) 179 * we should not wait for memory as it could resulting in deadlock. 180 */ 181 vm_pager_t 182 swap_pager_alloc(handle, size, prot, offset) 183 caddr_t handle; 184 register vm_size_t size; 185 vm_prot_t prot; 186 vm_offset_t offset; 187 { 188 register vm_pager_t pager; 189 register sw_pager_t swp; 190 int waitok; 191 int i,j; 192 193 if (require_swap_init) { 194 swp_clean_t spc; 195 struct buf *bp; 196 /* 197 * kva's are allocated here so that we dont need to keep 198 * doing kmem_alloc pageables at runtime 199 */ 200 for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { 201 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE*MAX_PAGEOUT_CLUSTER); 202 if (!spc->spc_kva) { 203 break; 204 } 205 spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); 206 if (!spc->spc_bp) { 207 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 208 break; 209 } 210 spc->spc_flags = 0; 211 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 212 } 213 require_swap_init = 0; 214 if( size == 0) 215 return(NULL); 216 } 217 218 /* 219 * If this is a "named" anonymous region, look it up and 220 * return the appropriate pager if it exists. 221 */ 222 if (handle) { 223 pager = vm_pager_lookup(&swap_pager_list, handle); 224 if (pager != NULL) { 225 /* 226 * Use vm_object_lookup to gain a reference 227 * to the object and also to remove from the 228 * object cache. 229 */ 230 if (vm_object_lookup(pager) == NULL) 231 panic("swap_pager_alloc: bad object"); 232 return(pager); 233 } 234 } 235 236 if (swap_pager_full) { 237 return(NULL); 238 } 239 240 /* 241 * Pager doesn't exist, allocate swap management resources 242 * and initialize. 243 */ 244 waitok = handle ? M_WAITOK : M_NOWAIT; 245 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 246 if (pager == NULL) 247 return(NULL); 248 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 249 if (swp == NULL) { 250 free((caddr_t)pager, M_VMPAGER); 251 return(NULL); 252 } 253 size = round_page(size); 254 swp->sw_osize = size; 255 swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE); 256 swp->sw_blocks = (sw_blk_t) 257 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 258 M_VMPGDATA, waitok); 259 if (swp->sw_blocks == NULL) { 260 free((caddr_t)swp, M_VMPGDATA); 261 free((caddr_t)pager, M_VMPAGER); 262 return(NULL); 263 } 264 265 for (i = 0; i < swp->sw_nblocks; i++) { 266 swp->sw_blocks[i].swb_valid = 0; 267 swp->sw_blocks[i].swb_locked = 0; 268 for (j = 0; j < SWB_NPAGES; j++) 269 swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 270 } 271 272 swp->sw_poip = 0; 273 if (handle) { 274 vm_object_t object; 275 276 swp->sw_flags = SW_NAMED; 277 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 278 /* 279 * Consistant with other pagers: return with object 280 * referenced. Can't do this with handle == NULL 281 * since it might be the pageout daemon calling. 282 */ 283 object = vm_object_allocate(size); 284 vm_object_enter(object, pager); 285 vm_object_setpager(object, pager, 0, FALSE); 286 } else { 287 swp->sw_flags = 0; 288 TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 289 } 290 pager->pg_handle = handle; 291 pager->pg_ops = &swappagerops; 292 pager->pg_type = PG_SWAP; 293 pager->pg_data = (caddr_t)swp; 294 295 return(pager); 296 } 297 298 /* 299 * returns disk block associated with pager and offset 300 * additionally, as a side effect returns a flag indicating 301 * if the block has been written 302 */ 303 304 static int * 305 swap_pager_diskaddr(swp, offset, valid) 306 sw_pager_t swp; 307 vm_offset_t offset; 308 int *valid; 309 { 310 register sw_blk_t swb; 311 int ix; 312 313 if (valid) 314 *valid = 0; 315 ix = offset / (SWB_NPAGES*PAGE_SIZE); 316 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 317 return(FALSE); 318 } 319 swb = &swp->sw_blocks[ix]; 320 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 321 if (valid) 322 *valid = swb->swb_valid & (1<<ix); 323 return &swb->swb_block[ix]; 324 } 325 326 /* 327 * Utility routine to set the valid (written) bit for 328 * a block associated with a pager and offset 329 */ 330 static void 331 swap_pager_setvalid(swp, offset, valid) 332 sw_pager_t swp; 333 vm_offset_t offset; 334 int valid; 335 { 336 register sw_blk_t swb; 337 int ix; 338 339 ix = offset / (SWB_NPAGES*PAGE_SIZE); 340 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 341 return; 342 343 swb = &swp->sw_blocks[ix]; 344 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 345 if (valid) 346 swb->swb_valid |= (1 << ix); 347 else 348 swb->swb_valid &= ~(1 << ix); 349 return; 350 } 351 352 /* 353 * this routine allocates swap space with a fragmentation 354 * minimization policy. 355 */ 356 int 357 swap_pager_getswapspace( unsigned amount, unsigned *rtval) { 358 unsigned tmpalloc; 359 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 360 if( amount < nblocksfrag) { 361 if( rlist_alloc(&swapfrag, amount, rtval)) 362 return 1; 363 if( !rlist_alloc(&swapmap, nblocksfrag, &tmpalloc)) 364 return 0; 365 rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1); 366 *rtval = tmpalloc; 367 return 1; 368 } 369 if( !rlist_alloc(&swapmap, amount, rtval)) 370 return 0; 371 else 372 return 1; 373 } 374 375 /* 376 * this routine frees swap space with a fragmentation 377 * minimization policy. 378 */ 379 void 380 swap_pager_freeswapspace( unsigned from, unsigned to) { 381 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 382 unsigned tmpalloc; 383 if( ((to + 1) - from) >= nblocksfrag) { 384 while( (from + nblocksfrag) <= to + 1) { 385 rlist_free(&swapmap, from, from + nblocksfrag - 1); 386 from += nblocksfrag; 387 } 388 } 389 if( from >= to) 390 return; 391 rlist_free(&swapfrag, from, to); 392 while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) { 393 rlist_free(&swapmap, tmpalloc, tmpalloc + nblocksfrag-1); 394 } 395 } 396 /* 397 * this routine frees swap blocks from a specified pager 398 */ 399 void 400 _swap_pager_freespace(swp, start, size) 401 sw_pager_t swp; 402 vm_offset_t start; 403 vm_offset_t size; 404 { 405 vm_offset_t i; 406 int s; 407 408 s = splbio(); 409 for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) { 410 int valid; 411 int *addr = swap_pager_diskaddr(swp, i, &valid); 412 if (addr && *addr != SWB_EMPTY) { 413 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 414 if( valid) { 415 vm_swap_size += btodb(PAGE_SIZE); 416 swap_pager_setvalid(swp, i, 0); 417 } 418 *addr = SWB_EMPTY; 419 } 420 } 421 swapsizecheck(); 422 splx(s); 423 } 424 425 void 426 swap_pager_freespace(pager, start, size) 427 vm_pager_t pager; 428 vm_offset_t start; 429 vm_offset_t size; 430 { 431 _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 432 } 433 434 /* 435 * swap_pager_reclaim frees up over-allocated space from all pagers 436 * this eliminates internal fragmentation due to allocation of space 437 * for segments that are never swapped to. It has been written so that 438 * it does not block until the rlist_free operation occurs; it keeps 439 * the queues consistant. 440 */ 441 442 /* 443 * Maximum number of blocks (pages) to reclaim per pass 444 */ 445 #define MAXRECLAIM 256 446 447 void 448 swap_pager_reclaim() 449 { 450 vm_pager_t p; 451 sw_pager_t swp; 452 int i, j, k; 453 int s; 454 int reclaimcount; 455 static int reclaims[MAXRECLAIM]; 456 static int in_reclaim; 457 458 /* 459 * allow only one process to be in the swap_pager_reclaim subroutine 460 */ 461 s = splbio(); 462 if (in_reclaim) { 463 tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 464 splx(s); 465 return; 466 } 467 in_reclaim = 1; 468 reclaimcount = 0; 469 470 /* for each pager queue */ 471 for (k = 0; swp_qs[k]; k++) { 472 473 p = swp_qs[k]->tqh_first; 474 while (p && (reclaimcount < MAXRECLAIM)) { 475 476 /* 477 * see if any blocks associated with a pager has been 478 * allocated but not used (written) 479 */ 480 swp = (sw_pager_t) p->pg_data; 481 for (i = 0; i < swp->sw_nblocks; i++) { 482 sw_blk_t swb = &swp->sw_blocks[i]; 483 if( swb->swb_locked) 484 continue; 485 for (j = 0; j < SWB_NPAGES; j++) { 486 if (swb->swb_block[j] != SWB_EMPTY && 487 (swb->swb_valid & (1 << j)) == 0) { 488 reclaims[reclaimcount++] = swb->swb_block[j]; 489 swb->swb_block[j] = SWB_EMPTY; 490 if (reclaimcount >= MAXRECLAIM) 491 goto rfinished; 492 } 493 } 494 } 495 p = p->pg_list.tqe_next; 496 } 497 } 498 499 rfinished: 500 501 /* 502 * free the blocks that have been added to the reclaim list 503 */ 504 for (i = 0; i < reclaimcount; i++) { 505 swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1); 506 swapsizecheck(); 507 wakeup((caddr_t) &in_reclaim); 508 } 509 510 splx(s); 511 in_reclaim = 0; 512 wakeup((caddr_t) &in_reclaim); 513 } 514 515 516 /* 517 * swap_pager_copy copies blocks from one pager to another and 518 * destroys the source pager 519 */ 520 521 void 522 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 523 vm_pager_t srcpager; 524 vm_offset_t srcoffset; 525 vm_pager_t dstpager; 526 vm_offset_t dstoffset; 527 vm_offset_t offset; 528 { 529 sw_pager_t srcswp, dstswp; 530 vm_offset_t i; 531 int s; 532 533 srcswp = (sw_pager_t) srcpager->pg_data; 534 dstswp = (sw_pager_t) dstpager->pg_data; 535 536 /* 537 * remove the source pager from the swap_pager internal queue 538 */ 539 s = splbio(); 540 if (srcswp->sw_flags & SW_NAMED) { 541 TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 542 srcswp->sw_flags &= ~SW_NAMED; 543 } else { 544 TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 545 } 546 547 while (srcswp->sw_poip) { 548 tsleep((caddr_t)srcswp, PVM, "spgout", 0); 549 } 550 splx(s); 551 552 /* 553 * clean all of the pages that are currently active and finished 554 */ 555 (void) swap_pager_clean(); 556 557 s = splbio(); 558 /* 559 * clear source block before destination object 560 * (release allocated space) 561 */ 562 for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { 563 int valid; 564 int *addr = swap_pager_diskaddr(srcswp, i, &valid); 565 if (addr && *addr != SWB_EMPTY) { 566 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 567 if( valid) 568 vm_swap_size += btodb(PAGE_SIZE); 569 swapsizecheck(); 570 *addr = SWB_EMPTY; 571 } 572 } 573 /* 574 * transfer source to destination 575 */ 576 for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 577 int srcvalid, dstvalid; 578 int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 579 &srcvalid); 580 int *dstaddrp; 581 /* 582 * see if the source has space allocated 583 */ 584 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 585 /* 586 * if the source is valid and the dest has no space, then 587 * copy the allocation from the srouce to the dest. 588 */ 589 if (srcvalid) { 590 dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); 591 /* 592 * if the dest already has a valid block, deallocate the 593 * source block without copying. 594 */ 595 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 596 swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1); 597 *dstaddrp = SWB_EMPTY; 598 } 599 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 600 *dstaddrp = *srcaddrp; 601 *srcaddrp = SWB_EMPTY; 602 swap_pager_setvalid(dstswp, i + dstoffset, 1); 603 vm_swap_size -= btodb(PAGE_SIZE); 604 } 605 } 606 /* 607 * if the source is not empty at this point, then deallocate the space. 608 */ 609 if (*srcaddrp != SWB_EMPTY) { 610 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 611 if( srcvalid) 612 vm_swap_size += btodb(PAGE_SIZE); 613 *srcaddrp = SWB_EMPTY; 614 } 615 } 616 } 617 618 /* 619 * deallocate the rest of the source object 620 */ 621 for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { 622 int valid; 623 int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); 624 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 625 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 626 if( valid) 627 vm_swap_size += btodb(PAGE_SIZE); 628 *srcaddrp = SWB_EMPTY; 629 } 630 } 631 632 swapsizecheck(); 633 splx(s); 634 635 free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); 636 srcswp->sw_blocks = 0; 637 free((caddr_t)srcswp, M_VMPGDATA); 638 srcpager->pg_data = 0; 639 free((caddr_t)srcpager, M_VMPAGER); 640 641 return; 642 } 643 644 645 void 646 swap_pager_dealloc(pager) 647 vm_pager_t pager; 648 { 649 register int i,j; 650 register sw_blk_t bp; 651 register sw_pager_t swp; 652 int s; 653 654 /* 655 * Remove from list right away so lookups will fail if we 656 * block for pageout completion. 657 */ 658 s = splbio(); 659 swp = (sw_pager_t) pager->pg_data; 660 if (swp->sw_flags & SW_NAMED) { 661 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 662 swp->sw_flags &= ~SW_NAMED; 663 } else { 664 TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 665 } 666 /* 667 * Wait for all pageouts to finish and remove 668 * all entries from cleaning list. 669 */ 670 671 while (swp->sw_poip) { 672 tsleep((caddr_t)swp, PVM, "swpout", 0); 673 } 674 splx(s); 675 676 677 (void) swap_pager_clean(); 678 679 /* 680 * Free left over swap blocks 681 */ 682 s = splbio(); 683 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 684 for (j = 0; j < SWB_NPAGES; j++) 685 if (bp->swb_block[j] != SWB_EMPTY) { 686 swap_pager_freeswapspace((unsigned)bp->swb_block[j], 687 (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1); 688 if( bp->swb_valid & (1<<j)) 689 vm_swap_size += btodb(PAGE_SIZE); 690 bp->swb_block[j] = SWB_EMPTY; 691 } 692 } 693 splx(s); 694 swapsizecheck(); 695 696 /* 697 * Free swap management resources 698 */ 699 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 700 swp->sw_blocks = 0; 701 free((caddr_t)swp, M_VMPGDATA); 702 pager->pg_data = 0; 703 free((caddr_t)pager, M_VMPAGER); 704 } 705 706 /* 707 * swap_pager_getmulti can get multiple pages. 708 */ 709 int 710 swap_pager_getmulti(pager, m, count, reqpage, sync) 711 vm_pager_t pager; 712 vm_page_t *m; 713 int count; 714 int reqpage; 715 boolean_t sync; 716 { 717 if( reqpage >= count) 718 panic("swap_pager_getmulti: reqpage >= count\n"); 719 return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 720 } 721 722 /* 723 * swap_pager_getpage gets individual pages 724 */ 725 int 726 swap_pager_getpage(pager, m, sync) 727 vm_pager_t pager; 728 vm_page_t m; 729 boolean_t sync; 730 { 731 vm_page_t marray[1]; 732 733 marray[0] = m; 734 return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); 735 } 736 737 int 738 swap_pager_putmulti(pager, m, c, sync, rtvals) 739 vm_pager_t pager; 740 vm_page_t *m; 741 int c; 742 boolean_t sync; 743 int *rtvals; 744 { 745 int flags; 746 747 if (pager == NULL) { 748 (void) swap_pager_clean(); 749 return VM_PAGER_OK; 750 } 751 752 flags = B_WRITE; 753 if (!sync) 754 flags |= B_ASYNC; 755 756 return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); 757 } 758 759 /* 760 * swap_pager_putpage writes individual pages 761 */ 762 int 763 swap_pager_putpage(pager, m, sync) 764 vm_pager_t pager; 765 vm_page_t m; 766 boolean_t sync; 767 { 768 int flags; 769 vm_page_t marray[1]; 770 int rtvals[1]; 771 772 773 if (pager == NULL) { 774 (void) swap_pager_clean(); 775 return VM_PAGER_OK; 776 } 777 778 marray[0] = m; 779 flags = B_WRITE; 780 if (!sync) 781 flags |= B_ASYNC; 782 783 swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); 784 785 return rtvals[0]; 786 } 787 788 static inline int 789 const swap_pager_block_index(swp, offset) 790 sw_pager_t swp; 791 vm_offset_t offset; 792 { 793 return (offset / (SWB_NPAGES*PAGE_SIZE)); 794 } 795 796 static inline int 797 const swap_pager_block_offset(swp, offset) 798 sw_pager_t swp; 799 vm_offset_t offset; 800 { 801 return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE); 802 } 803 804 /* 805 * _swap_pager_haspage returns TRUE if the pager has data that has 806 * been written out. 807 */ 808 static boolean_t 809 _swap_pager_haspage(swp, offset) 810 sw_pager_t swp; 811 vm_offset_t offset; 812 { 813 register sw_blk_t swb; 814 int ix; 815 816 ix = offset / (SWB_NPAGES*PAGE_SIZE); 817 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 818 return(FALSE); 819 } 820 swb = &swp->sw_blocks[ix]; 821 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 822 if (swb->swb_block[ix] != SWB_EMPTY) { 823 if (swb->swb_valid & (1 << ix)) 824 return TRUE; 825 } 826 827 return(FALSE); 828 } 829 830 /* 831 * swap_pager_haspage is the externally accessible version of 832 * _swap_pager_haspage above. this routine takes a vm_pager_t 833 * for an argument instead of sw_pager_t. 834 */ 835 boolean_t 836 swap_pager_haspage(pager, offset) 837 vm_pager_t pager; 838 vm_offset_t offset; 839 { 840 return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 841 } 842 843 /* 844 * swap_pager_freepage is a convienience routine that clears the busy 845 * bit and deallocates a page. 846 */ 847 static void 848 swap_pager_freepage(m) 849 vm_page_t m; 850 { 851 PAGE_WAKEUP(m); 852 vm_page_free(m); 853 } 854 855 /* 856 * swap_pager_ridpages is a convienience routine that deallocates all 857 * but the required page. this is usually used in error returns that 858 * need to invalidate the "extra" readahead pages. 859 */ 860 static void 861 swap_pager_ridpages(m, count, reqpage) 862 vm_page_t *m; 863 int count; 864 int reqpage; 865 { 866 int i; 867 for (i = 0; i < count; i++) 868 if (i != reqpage) 869 swap_pager_freepage(m[i]); 870 } 871 872 int swapwritecount=0; 873 874 /* 875 * swap_pager_iodone1 is the completion routine for both reads and async writes 876 */ 877 void 878 swap_pager_iodone1(bp) 879 struct buf *bp; 880 { 881 bp->b_flags |= B_DONE; 882 bp->b_flags &= ~B_ASYNC; 883 wakeup((caddr_t)bp); 884 /* 885 if ((bp->b_flags & B_READ) == 0) 886 vwakeup(bp); 887 */ 888 } 889 890 891 int 892 swap_pager_input(swp, m, count, reqpage) 893 register sw_pager_t swp; 894 vm_page_t *m; 895 int count, reqpage; 896 { 897 register struct buf *bp; 898 sw_blk_t swb[count]; 899 register int s; 900 int i; 901 boolean_t rv; 902 vm_offset_t kva, off[count]; 903 swp_clean_t spc; 904 vm_offset_t paging_offset; 905 vm_object_t object; 906 int reqaddr[count]; 907 908 int first, last; 909 int failed; 910 int reqdskregion; 911 912 object = m[reqpage]->object; 913 paging_offset = object->paging_offset; 914 /* 915 * First determine if the page exists in the pager if this is 916 * a sync read. This quickly handles cases where we are 917 * following shadow chains looking for the top level object 918 * with the page. 919 */ 920 if (swp->sw_blocks == NULL) { 921 swap_pager_ridpages(m, count, reqpage); 922 return(VM_PAGER_FAIL); 923 } 924 925 for(i = 0; i < count; i++) { 926 vm_offset_t foff = m[i]->offset + paging_offset; 927 int ix = swap_pager_block_index(swp, foff); 928 if (ix >= swp->sw_nblocks) { 929 int j; 930 if( i <= reqpage) { 931 swap_pager_ridpages(m, count, reqpage); 932 return(VM_PAGER_FAIL); 933 } 934 for(j = i; j < count; j++) { 935 swap_pager_freepage(m[j]); 936 } 937 count = i; 938 break; 939 } 940 941 swb[i] = &swp->sw_blocks[ix]; 942 off[i] = swap_pager_block_offset(swp, foff); 943 reqaddr[i] = swb[i]->swb_block[off[i]]; 944 } 945 946 /* make sure that our required input request is existant */ 947 948 if (reqaddr[reqpage] == SWB_EMPTY || 949 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 950 swap_pager_ridpages(m, count, reqpage); 951 return(VM_PAGER_FAIL); 952 } 953 954 955 reqdskregion = reqaddr[reqpage] / dmmax; 956 957 /* 958 * search backwards for the first contiguous page to transfer 959 */ 960 failed = 0; 961 first = 0; 962 for (i = reqpage - 1; i >= 0; --i) { 963 if ( failed || (reqaddr[i] == SWB_EMPTY) || 964 (swb[i]->swb_valid & (1 << off[i])) == 0 || 965 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 966 ((reqaddr[i] / dmmax) != reqdskregion)) { 967 failed = 1; 968 swap_pager_freepage(m[i]); 969 if (first == 0) 970 first = i + 1; 971 } 972 } 973 /* 974 * search forwards for the last contiguous page to transfer 975 */ 976 failed = 0; 977 last = count; 978 for (i = reqpage + 1; i < count; i++) { 979 if ( failed || (reqaddr[i] == SWB_EMPTY) || 980 (swb[i]->swb_valid & (1 << off[i])) == 0 || 981 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 982 ((reqaddr[i] / dmmax) != reqdskregion)) { 983 failed = 1; 984 swap_pager_freepage(m[i]); 985 if (last == count) 986 last = i; 987 } 988 } 989 990 count = last; 991 if (first != 0) { 992 for (i = first; i < count; i++) { 993 m[i-first] = m[i]; 994 reqaddr[i-first] = reqaddr[i]; 995 off[i-first] = off[i]; 996 } 997 count -= first; 998 reqpage -= first; 999 } 1000 1001 ++swb[reqpage]->swb_locked; 1002 1003 /* 1004 * at this point: 1005 * "m" is a pointer to the array of vm_page_t for paging I/O 1006 * "count" is the number of vm_page_t entries represented by "m" 1007 * "object" is the vm_object_t for I/O 1008 * "reqpage" is the index into "m" for the page actually faulted 1009 */ 1010 1011 spc = NULL; /* we might not use an spc data structure */ 1012 1013 if (count == 1) { 1014 /* 1015 * if a kva has not been allocated, we can only do a one page transfer, 1016 * so we free the other pages that might have been allocated by 1017 * vm_fault. 1018 */ 1019 swap_pager_ridpages(m, count, reqpage); 1020 m[0] = m[reqpage]; 1021 reqaddr[0] = reqaddr[reqpage]; 1022 1023 count = 1; 1024 reqpage = 0; 1025 /* 1026 * get a swap pager clean data structure, block until we get it 1027 */ 1028 if (swap_pager_free.tqh_first == NULL) { 1029 s = splbio(); 1030 if( curproc == pageproc) 1031 (void) swap_pager_clean(); 1032 else 1033 wakeup((caddr_t) &vm_pages_needed); 1034 while (swap_pager_free.tqh_first == NULL) { 1035 swap_pager_needflags |= SWAP_FREE_NEEDED; 1036 tsleep((caddr_t)&swap_pager_free, 1037 PVM, "swpfre", 0); 1038 if( curproc == pageproc) 1039 (void) swap_pager_clean(); 1040 else 1041 wakeup((caddr_t) &vm_pages_needed); 1042 } 1043 splx(s); 1044 } 1045 spc = swap_pager_free.tqh_first; 1046 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1047 kva = spc->spc_kva; 1048 bp = spc->spc_bp; 1049 bzero(bp, sizeof *bp); 1050 bp->b_spc = spc; 1051 } else { 1052 /* 1053 * Get a swap buffer header to perform the IO 1054 */ 1055 bp = getpbuf(); 1056 kva = (vm_offset_t) bp->b_data; 1057 } 1058 1059 /* 1060 * map our page(s) into kva for input 1061 */ 1062 pmap_qenter( kva, m, count); 1063 1064 s = splbio(); 1065 bp->b_flags = B_BUSY | B_READ | B_CALL; 1066 bp->b_iodone = swap_pager_iodone1; 1067 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1068 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1069 crhold(bp->b_rcred); 1070 crhold(bp->b_wcred); 1071 bp->b_un.b_addr = (caddr_t) kva; 1072 bp->b_blkno = reqaddr[0]; 1073 bp->b_bcount = PAGE_SIZE*count; 1074 bp->b_bufsize = PAGE_SIZE*count; 1075 1076 bgetvp( swapdev_vp, bp); 1077 1078 swp->sw_piip++; 1079 1080 /* 1081 * perform the I/O 1082 */ 1083 VOP_STRATEGY(bp); 1084 1085 /* 1086 * wait for the sync I/O to complete 1087 */ 1088 while ((bp->b_flags & B_DONE) == 0) { 1089 tsleep((caddr_t)bp, PVM, "swread", 0); 1090 } 1091 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1092 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1093 1094 --swp->sw_piip; 1095 if (swp->sw_piip == 0) 1096 wakeup((caddr_t) swp); 1097 1098 /* 1099 * relpbuf does this, but we maintain our own buffer 1100 * list also... 1101 */ 1102 if (bp->b_vp) 1103 brelvp(bp); 1104 1105 splx(s); 1106 --swb[reqpage]->swb_locked; 1107 1108 /* 1109 * remove the mapping for kernel virtual 1110 */ 1111 pmap_qremove( kva, count); 1112 1113 if (spc) { 1114 /* 1115 * if we have used an spc, we need to free it. 1116 */ 1117 if( bp->b_rcred != NOCRED) 1118 crfree(bp->b_rcred); 1119 if( bp->b_wcred != NOCRED) 1120 crfree(bp->b_wcred); 1121 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1122 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1123 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1124 wakeup((caddr_t)&swap_pager_free); 1125 } 1126 } else { 1127 /* 1128 * release the physical I/O buffer 1129 */ 1130 relpbuf(bp); 1131 /* 1132 * finish up input if everything is ok 1133 */ 1134 if( rv == VM_PAGER_OK) { 1135 for (i = 0; i < count; i++) { 1136 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1137 m[i]->flags |= PG_CLEAN; 1138 m[i]->flags &= ~PG_LAUNDRY; 1139 if (i != reqpage) { 1140 /* 1141 * whether or not to leave the page activated 1142 * is up in the air, but we should put the page 1143 * on a page queue somewhere. (it already is in 1144 * the object). 1145 * After some emperical results, it is best 1146 * to deactivate the readahead pages. 1147 */ 1148 vm_page_deactivate(m[i]); 1149 1150 /* 1151 * just in case someone was asking for this 1152 * page we now tell them that it is ok to use 1153 */ 1154 m[i]->flags &= ~PG_FAKE; 1155 PAGE_WAKEUP(m[i]); 1156 } 1157 } 1158 if( swap_pager_full) { 1159 _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE); 1160 } 1161 } else { 1162 swap_pager_ridpages(m, count, reqpage); 1163 } 1164 } 1165 return(rv); 1166 } 1167 1168 int 1169 swap_pager_output(swp, m, count, flags, rtvals) 1170 register sw_pager_t swp; 1171 vm_page_t *m; 1172 int count; 1173 int flags; 1174 int *rtvals; 1175 { 1176 register struct buf *bp; 1177 sw_blk_t swb[count]; 1178 register int s; 1179 int i, j, ix; 1180 boolean_t rv; 1181 vm_offset_t kva, off, foff; 1182 swp_clean_t spc; 1183 vm_offset_t paging_offset; 1184 vm_object_t object; 1185 int reqaddr[count]; 1186 int failed; 1187 1188 /* 1189 if( count > 1) 1190 printf("off: 0x%x, count: %d\n", m[0]->offset, count); 1191 */ 1192 spc = NULL; 1193 1194 object = m[0]->object; 1195 paging_offset = object->paging_offset; 1196 1197 failed = 0; 1198 for(j=0;j<count;j++) { 1199 foff = m[j]->offset + paging_offset; 1200 ix = swap_pager_block_index(swp, foff); 1201 swb[j] = 0; 1202 if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 1203 rtvals[j] = VM_PAGER_FAIL; 1204 failed = 1; 1205 continue; 1206 } else { 1207 rtvals[j] = VM_PAGER_OK; 1208 } 1209 swb[j] = &swp->sw_blocks[ix]; 1210 ++swb[j]->swb_locked; 1211 if( failed) { 1212 rtvals[j] = VM_PAGER_FAIL; 1213 continue; 1214 } 1215 off = swap_pager_block_offset(swp, foff); 1216 reqaddr[j] = swb[j]->swb_block[off]; 1217 if( reqaddr[j] == SWB_EMPTY) { 1218 int blk; 1219 int tries; 1220 int ntoget; 1221 tries = 0; 1222 s = splbio(); 1223 1224 /* 1225 * if any other pages have been allocated in this block, we 1226 * only try to get one page. 1227 */ 1228 for (i = 0; i < SWB_NPAGES; i++) { 1229 if (swb[j]->swb_block[i] != SWB_EMPTY) 1230 break; 1231 } 1232 1233 1234 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1235 /* 1236 * this code is alittle conservative, but works 1237 * (the intent of this code is to allocate small chunks 1238 * for small objects) 1239 */ 1240 if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) { 1241 ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE; 1242 } 1243 1244 retrygetspace: 1245 if (!swap_pager_full && ntoget > 1 && 1246 swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { 1247 1248 for (i = 0; i < ntoget; i++) { 1249 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1250 swb[j]->swb_valid = 0; 1251 } 1252 1253 reqaddr[j] = swb[j]->swb_block[off]; 1254 } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), 1255 &swb[j]->swb_block[off])) { 1256 /* 1257 * if the allocation has failed, we try to reclaim space and 1258 * retry. 1259 */ 1260 if (++tries == 1) { 1261 swap_pager_reclaim(); 1262 goto retrygetspace; 1263 } 1264 rtvals[j] = VM_PAGER_AGAIN; 1265 failed = 1; 1266 } else { 1267 reqaddr[j] = swb[j]->swb_block[off]; 1268 swb[j]->swb_valid &= ~(1<<off); 1269 } 1270 splx(s); 1271 } 1272 } 1273 1274 /* 1275 * search forwards for the last contiguous page to transfer 1276 */ 1277 failed = 0; 1278 for (i = 0; i < count; i++) { 1279 if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) || 1280 (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 1281 (rtvals[i] != VM_PAGER_OK)) { 1282 failed = 1; 1283 if( rtvals[i] == VM_PAGER_OK) 1284 rtvals[i] = VM_PAGER_AGAIN; 1285 } 1286 } 1287 1288 for(i = 0; i < count; i++) { 1289 if( rtvals[i] != VM_PAGER_OK) { 1290 if( swb[i]) 1291 --swb[i]->swb_locked; 1292 } 1293 } 1294 1295 for(i = 0; i < count; i++) 1296 if( rtvals[i] != VM_PAGER_OK) 1297 break; 1298 1299 if( i == 0) { 1300 return VM_PAGER_AGAIN; 1301 } 1302 1303 count = i; 1304 for(i=0;i<count;i++) { 1305 if( reqaddr[i] == SWB_EMPTY) 1306 printf("I/O to empty block????\n"); 1307 } 1308 1309 /* 1310 */ 1311 1312 /* 1313 * For synchronous writes, we clean up 1314 * all completed async pageouts. 1315 */ 1316 if ((flags & B_ASYNC) == 0) { 1317 swap_pager_clean(); 1318 } 1319 1320 kva = 0; 1321 1322 /* 1323 * we allocate a new kva for transfers > 1 page 1324 * but for transfers == 1 page, the swap_pager_free list contains 1325 * entries that have pre-allocated kva's (for efficiency). 1326 * NOTE -- we do not use the physical buffer pool or the 1327 * preallocated associated kva's because of the potential for 1328 * deadlock. This is very subtile -- but deadlocks or resource 1329 * contention must be avoided on pageouts -- or your system will 1330 * sleep (forever) !!! 1331 */ 1332 /* 1333 if ( count > 1) { 1334 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1335 if( !kva) { 1336 for (i = 0; i < count; i++) { 1337 if( swb[i]) 1338 --swb[i]->swb_locked; 1339 rtvals[i] = VM_PAGER_AGAIN; 1340 } 1341 return VM_PAGER_AGAIN; 1342 } 1343 } 1344 */ 1345 1346 /* 1347 * get a swap pager clean data structure, block until we get it 1348 */ 1349 if (swap_pager_free.tqh_first == NULL) { 1350 s = splbio(); 1351 if( curproc == pageproc) 1352 (void) swap_pager_clean(); 1353 else 1354 wakeup((caddr_t) &vm_pages_needed); 1355 while (swap_pager_free.tqh_first == NULL) { 1356 swap_pager_needflags |= SWAP_FREE_NEEDED; 1357 tsleep((caddr_t)&swap_pager_free, 1358 PVM, "swpfre", 0); 1359 if( curproc == pageproc) 1360 (void) swap_pager_clean(); 1361 else 1362 wakeup((caddr_t) &vm_pages_needed); 1363 } 1364 splx(s); 1365 } 1366 1367 spc = swap_pager_free.tqh_first; 1368 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1369 1370 kva = spc->spc_kva; 1371 1372 /* 1373 * map our page(s) into kva for I/O 1374 */ 1375 pmap_qenter(kva, m, count); 1376 1377 /* 1378 * get the base I/O offset into the swap file 1379 */ 1380 for(i=0;i<count;i++) { 1381 foff = m[i]->offset + paging_offset; 1382 off = swap_pager_block_offset(swp, foff); 1383 /* 1384 * if we are setting the valid bit anew, 1385 * then diminish the swap free space 1386 */ 1387 if( (swb[i]->swb_valid & (1 << off)) == 0) 1388 vm_swap_size -= btodb(PAGE_SIZE); 1389 1390 /* 1391 * set the valid bit 1392 */ 1393 swb[i]->swb_valid |= (1 << off); 1394 /* 1395 * and unlock the data structure 1396 */ 1397 --swb[i]->swb_locked; 1398 } 1399 1400 s = splbio(); 1401 /* 1402 * Get a swap buffer header and perform the IO 1403 */ 1404 bp = spc->spc_bp; 1405 bzero(bp, sizeof *bp); 1406 bp->b_spc = spc; 1407 1408 bp->b_flags = B_BUSY; 1409 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1410 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1411 if( bp->b_rcred != NOCRED) 1412 crhold(bp->b_rcred); 1413 if( bp->b_wcred != NOCRED) 1414 crhold(bp->b_wcred); 1415 bp->b_data = (caddr_t) kva; 1416 bp->b_blkno = reqaddr[0]; 1417 bgetvp( swapdev_vp, bp); 1418 1419 bp->b_bcount = PAGE_SIZE*count; 1420 bp->b_bufsize = PAGE_SIZE*count; 1421 swapdev_vp->v_numoutput++; 1422 1423 /* 1424 * If this is an async write we set up additional buffer fields 1425 * and place a "cleaning" entry on the inuse queue. 1426 */ 1427 if ( flags & B_ASYNC ) { 1428 spc->spc_flags = 0; 1429 spc->spc_swp = swp; 1430 for(i=0;i<count;i++) 1431 spc->spc_m[i] = m[i]; 1432 spc->spc_count = count; 1433 /* 1434 * the completion routine for async writes 1435 */ 1436 bp->b_flags |= B_CALL; 1437 bp->b_iodone = swap_pager_iodone; 1438 bp->b_dirtyoff = 0; 1439 bp->b_dirtyend = bp->b_bcount; 1440 swp->sw_poip++; 1441 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1442 } else { 1443 swp->sw_poip++; 1444 bp->b_flags |= B_CALL; 1445 bp->b_iodone = swap_pager_iodone1; 1446 } 1447 /* 1448 * perform the I/O 1449 */ 1450 VOP_STRATEGY(bp); 1451 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { 1452 if ((bp->b_flags & B_DONE) == B_DONE) { 1453 swap_pager_clean(); 1454 } 1455 splx(s); 1456 for(i=0;i<count;i++) { 1457 rtvals[i] = VM_PAGER_PEND; 1458 } 1459 return VM_PAGER_PEND; 1460 } 1461 1462 /* 1463 * wait for the sync I/O to complete 1464 */ 1465 while ((bp->b_flags & B_DONE) == 0) { 1466 tsleep((caddr_t)bp, PVM, "swwrt", 0); 1467 } 1468 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1469 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1470 1471 --swp->sw_poip; 1472 if (swp->sw_poip == 0) 1473 wakeup((caddr_t) swp); 1474 1475 if (bp->b_vp) 1476 brelvp(bp); 1477 1478 splx(s); 1479 1480 /* 1481 * remove the mapping for kernel virtual 1482 */ 1483 pmap_qremove( kva, count); 1484 1485 /* 1486 * if we have written the page, then indicate that the page 1487 * is clean. 1488 */ 1489 if (rv == VM_PAGER_OK) { 1490 for(i=0;i<count;i++) { 1491 if( rtvals[i] == VM_PAGER_OK) { 1492 m[i]->flags |= PG_CLEAN; 1493 m[i]->flags &= ~PG_LAUNDRY; 1494 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1495 /* 1496 * optimization, if a page has been read during the 1497 * pageout process, we activate it. 1498 */ 1499 if ( (m[i]->flags & PG_ACTIVE) == 0 && 1500 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) 1501 vm_page_activate(m[i]); 1502 } 1503 } 1504 } else { 1505 for(i=0;i<count;i++) { 1506 rtvals[i] = rv; 1507 m[i]->flags |= PG_LAUNDRY; 1508 } 1509 } 1510 1511 if( bp->b_rcred != NOCRED) 1512 crfree(bp->b_rcred); 1513 if( bp->b_wcred != NOCRED) 1514 crfree(bp->b_wcred); 1515 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1516 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1517 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1518 wakeup((caddr_t)&swap_pager_free); 1519 } 1520 1521 return(rv); 1522 } 1523 1524 boolean_t 1525 swap_pager_clean() 1526 { 1527 register swp_clean_t spc, tspc; 1528 register int s; 1529 1530 tspc = NULL; 1531 if (swap_pager_done.tqh_first == NULL) 1532 return FALSE; 1533 for (;;) { 1534 s = splbio(); 1535 /* 1536 * Look up and removal from done list must be done 1537 * at splbio() to avoid conflicts with swap_pager_iodone. 1538 */ 1539 while (spc = swap_pager_done.tqh_first) { 1540 pmap_qremove( spc->spc_kva, spc->spc_count); 1541 swap_pager_finish(spc); 1542 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1543 goto doclean; 1544 } 1545 1546 /* 1547 * No operations done, thats all we can do for now. 1548 */ 1549 1550 splx(s); 1551 break; 1552 1553 /* 1554 * The desired page was found to be busy earlier in 1555 * the scan but has since completed. 1556 */ 1557 doclean: 1558 if (tspc && tspc == spc) { 1559 tspc = NULL; 1560 } 1561 spc->spc_flags = 0; 1562 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1563 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1564 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1565 wakeup((caddr_t)&swap_pager_free); 1566 } 1567 ++cleandone; 1568 splx(s); 1569 } 1570 1571 return(tspc ? TRUE : FALSE); 1572 } 1573 1574 void 1575 swap_pager_finish(spc) 1576 register swp_clean_t spc; 1577 { 1578 vm_object_t object = spc->spc_m[0]->object; 1579 int i; 1580 1581 if ((object->paging_in_progress -= spc->spc_count) == 0) 1582 thread_wakeup((int) object); 1583 1584 /* 1585 * If no error mark as clean and inform the pmap system. 1586 * If error, mark as dirty so we will try again. 1587 * (XXX could get stuck doing this, should give up after awhile) 1588 */ 1589 if (spc->spc_flags & SPC_ERROR) { 1590 for(i=0;i<spc->spc_count;i++) { 1591 printf("swap_pager_finish: clean of page %x failed\n", 1592 VM_PAGE_TO_PHYS(spc->spc_m[i])); 1593 spc->spc_m[i]->flags |= PG_LAUNDRY; 1594 } 1595 } else { 1596 for(i=0;i<spc->spc_count;i++) { 1597 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1598 spc->spc_m[i]->flags |= PG_CLEAN; 1599 } 1600 } 1601 1602 1603 for(i=0;i<spc->spc_count;i++) { 1604 /* 1605 * we wakeup any processes that are waiting on 1606 * these pages. 1607 */ 1608 PAGE_WAKEUP(spc->spc_m[i]); 1609 } 1610 nswiodone -= spc->spc_count; 1611 1612 return; 1613 } 1614 1615 /* 1616 * swap_pager_iodone 1617 */ 1618 void 1619 swap_pager_iodone(bp) 1620 register struct buf *bp; 1621 { 1622 register swp_clean_t spc; 1623 int s; 1624 1625 s = splbio(); 1626 spc = (swp_clean_t) bp->b_spc; 1627 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1628 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1629 if (bp->b_flags & B_ERROR) { 1630 spc->spc_flags |= SPC_ERROR; 1631 printf("error %d blkno %d sz %d ", 1632 bp->b_error, bp->b_blkno, bp->b_bcount); 1633 } 1634 1635 /* 1636 if ((bp->b_flags & B_READ) == 0) 1637 vwakeup(bp); 1638 */ 1639 1640 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); 1641 if (bp->b_vp) { 1642 brelvp(bp); 1643 } 1644 if( bp->b_rcred != NOCRED) 1645 crfree(bp->b_rcred); 1646 if( bp->b_wcred != NOCRED) 1647 crfree(bp->b_wcred); 1648 1649 nswiodone += spc->spc_count; 1650 if (--spc->spc_swp->sw_poip == 0) { 1651 wakeup((caddr_t)spc->spc_swp); 1652 } 1653 1654 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1655 swap_pager_inuse.tqh_first == 0) { 1656 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1657 wakeup((caddr_t)&swap_pager_free); 1658 wakeup((caddr_t)&vm_pages_needed); 1659 } 1660 1661 if (vm_pageout_pages_needed) { 1662 wakeup((caddr_t)&vm_pageout_pages_needed); 1663 } 1664 1665 if ((swap_pager_inuse.tqh_first == NULL) || 1666 (cnt.v_free_count < cnt.v_free_min && 1667 nswiodone + cnt.v_free_count >= cnt.v_free_min) ) { 1668 wakeup((caddr_t)&vm_pages_needed); 1669 } 1670 splx(s); 1671 } 1672 1673 /* 1674 * return true if any swap control structures can be allocated 1675 */ 1676 int 1677 swap_pager_ready() { 1678 if( swap_pager_free.tqh_first) 1679 return 1; 1680 else 1681 return 0; 1682 } 1683