1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.6 1994/08/07 13:10:37 davidg Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/proc.h> 55 #include <sys/buf.h> 56 #include <sys/vnode.h> 57 #include <sys/malloc.h> 58 59 #include <miscfs/specfs/specdev.h> 60 #include <sys/rlist.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_pager.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/swap_pager.h> 67 68 #ifndef NPENDINGIO 69 #define NPENDINGIO 16 70 #endif 71 72 int nswiodone; 73 extern int vm_pageout_rate_limit; 74 static int cleandone; 75 extern int hz; 76 int swap_pager_full; 77 extern vm_map_t pager_map; 78 extern int vm_swap_size; 79 80 #define MAX_PAGEOUT_CLUSTER 8 81 82 TAILQ_HEAD(swpclean, swpagerclean); 83 84 typedef struct swpagerclean *swp_clean_t; 85 86 struct swpagerclean { 87 TAILQ_ENTRY(swpagerclean) spc_list; 88 int spc_flags; 89 struct buf *spc_bp; 90 sw_pager_t spc_swp; 91 vm_offset_t spc_kva; 92 vm_offset_t spc_altkva; 93 int spc_count; 94 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 95 } swcleanlist [NPENDINGIO] ; 96 97 98 extern vm_map_t kernel_map; 99 100 /* spc_flags values */ 101 #define SPC_ERROR 0x01 102 103 #define SWB_EMPTY (-1) 104 105 struct swpclean swap_pager_done; /* list of compileted page cleans */ 106 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 107 struct swpclean swap_pager_free; /* list of free pager clean structs */ 108 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 109 struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 110 111 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 112 int swap_pager_needflags; 113 struct rlist *swapfrag; 114 115 struct pagerlst *swp_qs[]={ 116 &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 117 }; 118 119 int swap_pager_putmulti(); 120 121 struct pagerops swappagerops = { 122 swap_pager_init, 123 swap_pager_alloc, 124 swap_pager_dealloc, 125 swap_pager_getpage, 126 swap_pager_getmulti, 127 swap_pager_putpage, 128 swap_pager_putmulti, 129 swap_pager_haspage 130 }; 131 132 int npendingio = NPENDINGIO; 133 int pendingiowait; 134 int require_swap_init; 135 void swap_pager_finish(); 136 int dmmin, dmmax; 137 extern int vm_page_count; 138 139 struct buf * getpbuf() ; 140 void relpbuf(struct buf *bp) ; 141 142 static inline void swapsizecheck() { 143 if( vm_swap_size < 128*btodb(PAGE_SIZE)) { 144 if( swap_pager_full) 145 printf("swap_pager: out of space\n"); 146 swap_pager_full = 1; 147 } else if( vm_swap_size > 192*btodb(PAGE_SIZE)) 148 swap_pager_full = 0; 149 } 150 151 void 152 swap_pager_init() 153 { 154 dfltpagerops = &swappagerops; 155 156 TAILQ_INIT(&swap_pager_list); 157 TAILQ_INIT(&swap_pager_un_list); 158 159 /* 160 * Initialize clean lists 161 */ 162 TAILQ_INIT(&swap_pager_inuse); 163 TAILQ_INIT(&swap_pager_done); 164 TAILQ_INIT(&swap_pager_free); 165 166 require_swap_init = 1; 167 168 /* 169 * Calculate the swap allocation constants. 170 */ 171 172 dmmin = CLBYTES/DEV_BSIZE; 173 dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2; 174 175 } 176 177 /* 178 * Allocate a pager structure and associated resources. 179 * Note that if we are called from the pageout daemon (handle == NULL) 180 * we should not wait for memory as it could resulting in deadlock. 181 */ 182 vm_pager_t 183 swap_pager_alloc(handle, size, prot, offset) 184 caddr_t handle; 185 register vm_size_t size; 186 vm_prot_t prot; 187 vm_offset_t offset; 188 { 189 register vm_pager_t pager; 190 register sw_pager_t swp; 191 int waitok; 192 int i,j; 193 194 if (require_swap_init) { 195 swp_clean_t spc; 196 struct buf *bp; 197 /* 198 * kva's are allocated here so that we dont need to keep 199 * doing kmem_alloc pageables at runtime 200 */ 201 for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { 202 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE); 203 if (!spc->spc_kva) { 204 break; 205 } 206 spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); 207 if (!spc->spc_bp) { 208 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 209 break; 210 } 211 spc->spc_flags = 0; 212 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 213 } 214 require_swap_init = 0; 215 if( size == 0) 216 return(NULL); 217 } 218 219 /* 220 * If this is a "named" anonymous region, look it up and 221 * return the appropriate pager if it exists. 222 */ 223 if (handle) { 224 pager = vm_pager_lookup(&swap_pager_list, handle); 225 if (pager != NULL) { 226 /* 227 * Use vm_object_lookup to gain a reference 228 * to the object and also to remove from the 229 * object cache. 230 */ 231 if (vm_object_lookup(pager) == NULL) 232 panic("swap_pager_alloc: bad object"); 233 return(pager); 234 } 235 } 236 237 if (swap_pager_full) { 238 return(NULL); 239 } 240 241 /* 242 * Pager doesn't exist, allocate swap management resources 243 * and initialize. 244 */ 245 waitok = handle ? M_WAITOK : M_NOWAIT; 246 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 247 if (pager == NULL) 248 return(NULL); 249 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 250 if (swp == NULL) { 251 free((caddr_t)pager, M_VMPAGER); 252 return(NULL); 253 } 254 size = round_page(size); 255 swp->sw_osize = size; 256 swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE); 257 swp->sw_blocks = (sw_blk_t) 258 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 259 M_VMPGDATA, waitok); 260 if (swp->sw_blocks == NULL) { 261 free((caddr_t)swp, M_VMPGDATA); 262 free((caddr_t)pager, M_VMPAGER); 263 return(NULL); 264 } 265 266 for (i = 0; i < swp->sw_nblocks; i++) { 267 swp->sw_blocks[i].swb_valid = 0; 268 swp->sw_blocks[i].swb_locked = 0; 269 for (j = 0; j < SWB_NPAGES; j++) 270 swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 271 } 272 273 swp->sw_poip = 0; 274 if (handle) { 275 vm_object_t object; 276 277 swp->sw_flags = SW_NAMED; 278 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 279 /* 280 * Consistant with other pagers: return with object 281 * referenced. Can't do this with handle == NULL 282 * since it might be the pageout daemon calling. 283 */ 284 object = vm_object_allocate(size); 285 vm_object_enter(object, pager); 286 vm_object_setpager(object, pager, 0, FALSE); 287 } else { 288 swp->sw_flags = 0; 289 TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 290 } 291 pager->pg_handle = handle; 292 pager->pg_ops = &swappagerops; 293 pager->pg_type = PG_SWAP; 294 pager->pg_data = (caddr_t)swp; 295 296 return(pager); 297 } 298 299 /* 300 * returns disk block associated with pager and offset 301 * additionally, as a side effect returns a flag indicating 302 * if the block has been written 303 */ 304 305 static int * 306 swap_pager_diskaddr(swp, offset, valid) 307 sw_pager_t swp; 308 vm_offset_t offset; 309 int *valid; 310 { 311 register sw_blk_t swb; 312 int ix; 313 314 if (valid) 315 *valid = 0; 316 ix = offset / (SWB_NPAGES*PAGE_SIZE); 317 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 318 return(FALSE); 319 } 320 swb = &swp->sw_blocks[ix]; 321 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 322 if (valid) 323 *valid = swb->swb_valid & (1<<ix); 324 return &swb->swb_block[ix]; 325 } 326 327 /* 328 * Utility routine to set the valid (written) bit for 329 * a block associated with a pager and offset 330 */ 331 static void 332 swap_pager_setvalid(swp, offset, valid) 333 sw_pager_t swp; 334 vm_offset_t offset; 335 int valid; 336 { 337 register sw_blk_t swb; 338 int ix; 339 340 ix = offset / (SWB_NPAGES*PAGE_SIZE); 341 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 342 return; 343 344 swb = &swp->sw_blocks[ix]; 345 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 346 if (valid) 347 swb->swb_valid |= (1 << ix); 348 else 349 swb->swb_valid &= ~(1 << ix); 350 return; 351 } 352 353 /* 354 * this routine allocates swap space with a fragmentation 355 * minimization policy. 356 */ 357 int 358 swap_pager_getswapspace( unsigned amount, unsigned *rtval) { 359 unsigned tmpalloc; 360 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 361 if( amount < nblocksfrag) { 362 if( rlist_alloc(&swapfrag, amount, rtval)) 363 return 1; 364 if( !rlist_alloc(&swapmap, nblocksfrag, &tmpalloc)) 365 return 0; 366 rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1); 367 *rtval = tmpalloc; 368 return 1; 369 } 370 if( !rlist_alloc(&swapmap, amount, rtval)) 371 return 0; 372 else 373 return 1; 374 } 375 376 /* 377 * this routine frees swap space with a fragmentation 378 * minimization policy. 379 */ 380 void 381 swap_pager_freeswapspace( unsigned from, unsigned to) { 382 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 383 unsigned tmpalloc; 384 if( ((to + 1) - from) >= nblocksfrag) { 385 while( (from + nblocksfrag) <= to + 1) { 386 rlist_free(&swapmap, from, from + nblocksfrag - 1); 387 from += nblocksfrag; 388 } 389 } 390 if( from >= to) 391 return; 392 rlist_free(&swapfrag, from, to); 393 while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) { 394 rlist_free(&swapmap, tmpalloc, tmpalloc + nblocksfrag-1); 395 } 396 } 397 /* 398 * this routine frees swap blocks from a specified pager 399 */ 400 void 401 _swap_pager_freespace(swp, start, size) 402 sw_pager_t swp; 403 vm_offset_t start; 404 vm_offset_t size; 405 { 406 vm_offset_t i; 407 int s; 408 409 s = splbio(); 410 for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) { 411 int valid; 412 int *addr = swap_pager_diskaddr(swp, i, &valid); 413 if (addr && *addr != SWB_EMPTY) { 414 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 415 if( valid) { 416 vm_swap_size += btodb(PAGE_SIZE); 417 swap_pager_setvalid(swp, i, 0); 418 } 419 *addr = SWB_EMPTY; 420 } 421 } 422 swapsizecheck(); 423 splx(s); 424 } 425 426 void 427 swap_pager_freespace(pager, start, size) 428 vm_pager_t pager; 429 vm_offset_t start; 430 vm_offset_t size; 431 { 432 _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 433 } 434 435 /* 436 * swap_pager_reclaim frees up over-allocated space from all pagers 437 * this eliminates internal fragmentation due to allocation of space 438 * for segments that are never swapped to. It has been written so that 439 * it does not block until the rlist_free operation occurs; it keeps 440 * the queues consistant. 441 */ 442 443 /* 444 * Maximum number of blocks (pages) to reclaim per pass 445 */ 446 #define MAXRECLAIM 256 447 448 void 449 swap_pager_reclaim() 450 { 451 vm_pager_t p; 452 sw_pager_t swp; 453 int i, j, k; 454 int s; 455 int reclaimcount; 456 static int reclaims[MAXRECLAIM]; 457 static int in_reclaim; 458 459 /* 460 * allow only one process to be in the swap_pager_reclaim subroutine 461 */ 462 s = splbio(); 463 if (in_reclaim) { 464 tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 465 splx(s); 466 return; 467 } 468 in_reclaim = 1; 469 reclaimcount = 0; 470 471 /* for each pager queue */ 472 for (k = 0; swp_qs[k]; k++) { 473 474 p = swp_qs[k]->tqh_first; 475 while (p && (reclaimcount < MAXRECLAIM)) { 476 477 /* 478 * see if any blocks associated with a pager has been 479 * allocated but not used (written) 480 */ 481 swp = (sw_pager_t) p->pg_data; 482 for (i = 0; i < swp->sw_nblocks; i++) { 483 sw_blk_t swb = &swp->sw_blocks[i]; 484 if( swb->swb_locked) 485 continue; 486 for (j = 0; j < SWB_NPAGES; j++) { 487 if (swb->swb_block[j] != SWB_EMPTY && 488 (swb->swb_valid & (1 << j)) == 0) { 489 reclaims[reclaimcount++] = swb->swb_block[j]; 490 swb->swb_block[j] = SWB_EMPTY; 491 if (reclaimcount >= MAXRECLAIM) 492 goto rfinished; 493 } 494 } 495 } 496 p = p->pg_list.tqe_next; 497 } 498 } 499 500 rfinished: 501 502 /* 503 * free the blocks that have been added to the reclaim list 504 */ 505 for (i = 0; i < reclaimcount; i++) { 506 swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1); 507 swapsizecheck(); 508 wakeup((caddr_t) &in_reclaim); 509 } 510 511 splx(s); 512 in_reclaim = 0; 513 wakeup((caddr_t) &in_reclaim); 514 } 515 516 517 /* 518 * swap_pager_copy copies blocks from one pager to another and 519 * destroys the source pager 520 */ 521 522 void 523 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 524 vm_pager_t srcpager; 525 vm_offset_t srcoffset; 526 vm_pager_t dstpager; 527 vm_offset_t dstoffset; 528 vm_offset_t offset; 529 { 530 sw_pager_t srcswp, dstswp; 531 vm_offset_t i; 532 int s; 533 534 srcswp = (sw_pager_t) srcpager->pg_data; 535 dstswp = (sw_pager_t) dstpager->pg_data; 536 537 /* 538 * remove the source pager from the swap_pager internal queue 539 */ 540 s = splbio(); 541 if (srcswp->sw_flags & SW_NAMED) { 542 TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 543 srcswp->sw_flags &= ~SW_NAMED; 544 } else { 545 TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 546 } 547 548 while (srcswp->sw_poip) { 549 tsleep((caddr_t)srcswp, PVM, "spgout", 0); 550 } 551 splx(s); 552 553 /* 554 * clean all of the pages that are currently active and finished 555 */ 556 (void) swap_pager_clean(); 557 558 s = splbio(); 559 /* 560 * clear source block before destination object 561 * (release allocated space) 562 */ 563 for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { 564 int valid; 565 int *addr = swap_pager_diskaddr(srcswp, i, &valid); 566 if (addr && *addr != SWB_EMPTY) { 567 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 568 if( valid) 569 vm_swap_size += btodb(PAGE_SIZE); 570 swapsizecheck(); 571 *addr = SWB_EMPTY; 572 } 573 } 574 /* 575 * transfer source to destination 576 */ 577 for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 578 int srcvalid, dstvalid; 579 int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 580 &srcvalid); 581 int *dstaddrp; 582 /* 583 * see if the source has space allocated 584 */ 585 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 586 /* 587 * if the source is valid and the dest has no space, then 588 * copy the allocation from the srouce to the dest. 589 */ 590 if (srcvalid) { 591 dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); 592 /* 593 * if the dest already has a valid block, deallocate the 594 * source block without copying. 595 */ 596 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 597 swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1); 598 *dstaddrp = SWB_EMPTY; 599 } 600 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 601 *dstaddrp = *srcaddrp; 602 *srcaddrp = SWB_EMPTY; 603 swap_pager_setvalid(dstswp, i + dstoffset, 1); 604 vm_swap_size -= btodb(PAGE_SIZE); 605 } 606 } 607 /* 608 * if the source is not empty at this point, then deallocate the space. 609 */ 610 if (*srcaddrp != SWB_EMPTY) { 611 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 612 if( srcvalid) 613 vm_swap_size += btodb(PAGE_SIZE); 614 *srcaddrp = SWB_EMPTY; 615 } 616 } 617 } 618 619 /* 620 * deallocate the rest of the source object 621 */ 622 for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { 623 int valid; 624 int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); 625 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 626 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 627 if( valid) 628 vm_swap_size += btodb(PAGE_SIZE); 629 *srcaddrp = SWB_EMPTY; 630 } 631 } 632 633 swapsizecheck(); 634 splx(s); 635 636 free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); 637 srcswp->sw_blocks = 0; 638 free((caddr_t)srcswp, M_VMPGDATA); 639 srcpager->pg_data = 0; 640 free((caddr_t)srcpager, M_VMPAGER); 641 642 return; 643 } 644 645 646 void 647 swap_pager_dealloc(pager) 648 vm_pager_t pager; 649 { 650 register int i,j; 651 register sw_blk_t bp; 652 register sw_pager_t swp; 653 int s; 654 655 /* 656 * Remove from list right away so lookups will fail if we 657 * block for pageout completion. 658 */ 659 s = splbio(); 660 swp = (sw_pager_t) pager->pg_data; 661 if (swp->sw_flags & SW_NAMED) { 662 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 663 swp->sw_flags &= ~SW_NAMED; 664 } else { 665 TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 666 } 667 /* 668 * Wait for all pageouts to finish and remove 669 * all entries from cleaning list. 670 */ 671 672 while (swp->sw_poip) { 673 tsleep((caddr_t)swp, PVM, "swpout", 0); 674 } 675 splx(s); 676 677 678 (void) swap_pager_clean(); 679 680 /* 681 * Free left over swap blocks 682 */ 683 s = splbio(); 684 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 685 for (j = 0; j < SWB_NPAGES; j++) 686 if (bp->swb_block[j] != SWB_EMPTY) { 687 swap_pager_freeswapspace((unsigned)bp->swb_block[j], 688 (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1); 689 if( bp->swb_valid & (1<<j)) 690 vm_swap_size += btodb(PAGE_SIZE); 691 bp->swb_block[j] = SWB_EMPTY; 692 } 693 } 694 splx(s); 695 swapsizecheck(); 696 697 /* 698 * Free swap management resources 699 */ 700 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 701 swp->sw_blocks = 0; 702 free((caddr_t)swp, M_VMPGDATA); 703 pager->pg_data = 0; 704 free((caddr_t)pager, M_VMPAGER); 705 } 706 707 /* 708 * swap_pager_getmulti can get multiple pages. 709 */ 710 int 711 swap_pager_getmulti(pager, m, count, reqpage, sync) 712 vm_pager_t pager; 713 vm_page_t *m; 714 int count; 715 int reqpage; 716 boolean_t sync; 717 { 718 if( reqpage >= count) 719 panic("swap_pager_getmulti: reqpage >= count\n"); 720 return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 721 } 722 723 /* 724 * swap_pager_getpage gets individual pages 725 */ 726 int 727 swap_pager_getpage(pager, m, sync) 728 vm_pager_t pager; 729 vm_page_t m; 730 boolean_t sync; 731 { 732 vm_page_t marray[1]; 733 734 marray[0] = m; 735 return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); 736 } 737 738 int 739 swap_pager_putmulti(pager, m, c, sync, rtvals) 740 vm_pager_t pager; 741 vm_page_t *m; 742 int c; 743 boolean_t sync; 744 int *rtvals; 745 { 746 int flags; 747 748 if (pager == NULL) { 749 (void) swap_pager_clean(); 750 return VM_PAGER_OK; 751 } 752 753 flags = B_WRITE; 754 if (!sync) 755 flags |= B_ASYNC; 756 757 return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); 758 } 759 760 /* 761 * swap_pager_putpage writes individual pages 762 */ 763 int 764 swap_pager_putpage(pager, m, sync) 765 vm_pager_t pager; 766 vm_page_t m; 767 boolean_t sync; 768 { 769 int flags; 770 vm_page_t marray[1]; 771 int rtvals[1]; 772 773 774 if (pager == NULL) { 775 (void) swap_pager_clean(); 776 return VM_PAGER_OK; 777 } 778 779 marray[0] = m; 780 flags = B_WRITE; 781 if (!sync) 782 flags |= B_ASYNC; 783 784 swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); 785 786 return rtvals[0]; 787 } 788 789 static inline int 790 const swap_pager_block_index(swp, offset) 791 sw_pager_t swp; 792 vm_offset_t offset; 793 { 794 return (offset / (SWB_NPAGES*PAGE_SIZE)); 795 } 796 797 static inline int 798 const swap_pager_block_offset(swp, offset) 799 sw_pager_t swp; 800 vm_offset_t offset; 801 { 802 return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE); 803 } 804 805 /* 806 * _swap_pager_haspage returns TRUE if the pager has data that has 807 * been written out. 808 */ 809 static boolean_t 810 _swap_pager_haspage(swp, offset) 811 sw_pager_t swp; 812 vm_offset_t offset; 813 { 814 register sw_blk_t swb; 815 int ix; 816 817 ix = offset / (SWB_NPAGES*PAGE_SIZE); 818 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 819 return(FALSE); 820 } 821 swb = &swp->sw_blocks[ix]; 822 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 823 if (swb->swb_block[ix] != SWB_EMPTY) { 824 if (swb->swb_valid & (1 << ix)) 825 return TRUE; 826 } 827 828 return(FALSE); 829 } 830 831 /* 832 * swap_pager_haspage is the externally accessible version of 833 * _swap_pager_haspage above. this routine takes a vm_pager_t 834 * for an argument instead of sw_pager_t. 835 */ 836 boolean_t 837 swap_pager_haspage(pager, offset) 838 vm_pager_t pager; 839 vm_offset_t offset; 840 { 841 return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 842 } 843 844 /* 845 * swap_pager_freepage is a convienience routine that clears the busy 846 * bit and deallocates a page. 847 */ 848 static void 849 swap_pager_freepage(m) 850 vm_page_t m; 851 { 852 PAGE_WAKEUP(m); 853 vm_page_free(m); 854 } 855 856 /* 857 * swap_pager_ridpages is a convienience routine that deallocates all 858 * but the required page. this is usually used in error returns that 859 * need to invalidate the "extra" readahead pages. 860 */ 861 static void 862 swap_pager_ridpages(m, count, reqpage) 863 vm_page_t *m; 864 int count; 865 int reqpage; 866 { 867 int i; 868 for (i = 0; i < count; i++) 869 if (i != reqpage) 870 swap_pager_freepage(m[i]); 871 } 872 873 int swapwritecount=0; 874 875 /* 876 * swap_pager_iodone1 is the completion routine for both reads and async writes 877 */ 878 void 879 swap_pager_iodone1(bp) 880 struct buf *bp; 881 { 882 bp->b_flags |= B_DONE; 883 bp->b_flags &= ~B_ASYNC; 884 wakeup((caddr_t)bp); 885 /* 886 if ((bp->b_flags & B_READ) == 0) 887 vwakeup(bp); 888 */ 889 } 890 891 892 int 893 swap_pager_input(swp, m, count, reqpage) 894 register sw_pager_t swp; 895 vm_page_t *m; 896 int count, reqpage; 897 { 898 register struct buf *bp; 899 sw_blk_t swb[count]; 900 register int s; 901 int i; 902 boolean_t rv; 903 vm_offset_t kva, off[count]; 904 swp_clean_t spc; 905 vm_offset_t paging_offset; 906 vm_object_t object; 907 int reqaddr[count]; 908 909 int first, last; 910 int failed; 911 int reqdskregion; 912 913 object = m[reqpage]->object; 914 paging_offset = object->paging_offset; 915 /* 916 * First determine if the page exists in the pager if this is 917 * a sync read. This quickly handles cases where we are 918 * following shadow chains looking for the top level object 919 * with the page. 920 */ 921 if (swp->sw_blocks == NULL) { 922 swap_pager_ridpages(m, count, reqpage); 923 return(VM_PAGER_FAIL); 924 } 925 926 for(i = 0; i < count; i++) { 927 vm_offset_t foff = m[i]->offset + paging_offset; 928 int ix = swap_pager_block_index(swp, foff); 929 if (ix >= swp->sw_nblocks) { 930 int j; 931 if( i <= reqpage) { 932 swap_pager_ridpages(m, count, reqpage); 933 return(VM_PAGER_FAIL); 934 } 935 for(j = i; j < count; j++) { 936 swap_pager_freepage(m[j]); 937 } 938 count = i; 939 break; 940 } 941 942 swb[i] = &swp->sw_blocks[ix]; 943 off[i] = swap_pager_block_offset(swp, foff); 944 reqaddr[i] = swb[i]->swb_block[off[i]]; 945 } 946 947 /* make sure that our required input request is existant */ 948 949 if (reqaddr[reqpage] == SWB_EMPTY || 950 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 951 swap_pager_ridpages(m, count, reqpage); 952 return(VM_PAGER_FAIL); 953 } 954 955 956 reqdskregion = reqaddr[reqpage] / dmmax; 957 958 /* 959 * search backwards for the first contiguous page to transfer 960 */ 961 failed = 0; 962 first = 0; 963 for (i = reqpage - 1; i >= 0; --i) { 964 if ( failed || (reqaddr[i] == SWB_EMPTY) || 965 (swb[i]->swb_valid & (1 << off[i])) == 0 || 966 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 967 ((reqaddr[i] / dmmax) != reqdskregion)) { 968 failed = 1; 969 swap_pager_freepage(m[i]); 970 if (first == 0) 971 first = i + 1; 972 } 973 } 974 /* 975 * search forwards for the last contiguous page to transfer 976 */ 977 failed = 0; 978 last = count; 979 for (i = reqpage + 1; i < count; i++) { 980 if ( failed || (reqaddr[i] == SWB_EMPTY) || 981 (swb[i]->swb_valid & (1 << off[i])) == 0 || 982 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 983 ((reqaddr[i] / dmmax) != reqdskregion)) { 984 failed = 1; 985 swap_pager_freepage(m[i]); 986 if (last == count) 987 last = i; 988 } 989 } 990 991 count = last; 992 if (first != 0) { 993 for (i = first; i < count; i++) { 994 m[i-first] = m[i]; 995 reqaddr[i-first] = reqaddr[i]; 996 off[i-first] = off[i]; 997 } 998 count -= first; 999 reqpage -= first; 1000 } 1001 1002 ++swb[reqpage]->swb_locked; 1003 1004 /* 1005 * at this point: 1006 * "m" is a pointer to the array of vm_page_t for paging I/O 1007 * "count" is the number of vm_page_t entries represented by "m" 1008 * "object" is the vm_object_t for I/O 1009 * "reqpage" is the index into "m" for the page actually faulted 1010 */ 1011 1012 spc = NULL; /* we might not use an spc data structure */ 1013 1014 if (count == 1) { 1015 /* 1016 * if a kva has not been allocated, we can only do a one page transfer, 1017 * so we free the other pages that might have been allocated by 1018 * vm_fault. 1019 */ 1020 swap_pager_ridpages(m, count, reqpage); 1021 m[0] = m[reqpage]; 1022 reqaddr[0] = reqaddr[reqpage]; 1023 1024 count = 1; 1025 reqpage = 0; 1026 /* 1027 * get a swap pager clean data structure, block until we get it 1028 */ 1029 if (swap_pager_free.tqh_first == NULL) { 1030 s = splbio(); 1031 if( curproc == pageproc) 1032 (void) swap_pager_clean(); 1033 else 1034 wakeup((caddr_t) &vm_pages_needed); 1035 while (swap_pager_free.tqh_first == NULL) { 1036 swap_pager_needflags |= SWAP_FREE_NEEDED; 1037 tsleep((caddr_t)&swap_pager_free, 1038 PVM, "swpfre", 0); 1039 if( curproc == pageproc) 1040 (void) swap_pager_clean(); 1041 else 1042 wakeup((caddr_t) &vm_pages_needed); 1043 } 1044 splx(s); 1045 } 1046 spc = swap_pager_free.tqh_first; 1047 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1048 kva = spc->spc_kva; 1049 bp = spc->spc_bp; 1050 bzero(bp, sizeof *bp); 1051 bp->b_spc = spc; 1052 } else { 1053 /* 1054 * Get a swap buffer header to perform the IO 1055 */ 1056 bp = getpbuf(); 1057 kva = (vm_offset_t) bp->b_data; 1058 } 1059 1060 /* 1061 * map our page(s) into kva for input 1062 */ 1063 pmap_qenter( kva, m, count); 1064 1065 s = splbio(); 1066 bp->b_flags = B_BUSY | B_READ | B_CALL; 1067 bp->b_iodone = swap_pager_iodone1; 1068 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1069 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1070 crhold(bp->b_rcred); 1071 crhold(bp->b_wcred); 1072 bp->b_un.b_addr = (caddr_t) kva; 1073 bp->b_blkno = reqaddr[0]; 1074 bp->b_bcount = PAGE_SIZE*count; 1075 bp->b_bufsize = PAGE_SIZE*count; 1076 1077 bgetvp( swapdev_vp, bp); 1078 1079 swp->sw_piip++; 1080 1081 /* 1082 * perform the I/O 1083 */ 1084 VOP_STRATEGY(bp); 1085 1086 /* 1087 * wait for the sync I/O to complete 1088 */ 1089 while ((bp->b_flags & B_DONE) == 0) { 1090 tsleep((caddr_t)bp, PVM, "swread", 0); 1091 } 1092 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1093 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1094 1095 --swp->sw_piip; 1096 if (swp->sw_piip == 0) 1097 wakeup((caddr_t) swp); 1098 1099 /* 1100 * relpbuf does this, but we maintain our own buffer 1101 * list also... 1102 */ 1103 if (bp->b_vp) 1104 brelvp(bp); 1105 1106 splx(s); 1107 --swb[reqpage]->swb_locked; 1108 1109 /* 1110 * remove the mapping for kernel virtual 1111 */ 1112 pmap_qremove( kva, count); 1113 1114 if (spc) { 1115 /* 1116 * if we have used an spc, we need to free it. 1117 */ 1118 if( bp->b_rcred != NOCRED) 1119 crfree(bp->b_rcred); 1120 if( bp->b_wcred != NOCRED) 1121 crfree(bp->b_wcred); 1122 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1123 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1124 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1125 wakeup((caddr_t)&swap_pager_free); 1126 } 1127 } else { 1128 /* 1129 * release the physical I/O buffer 1130 */ 1131 relpbuf(bp); 1132 /* 1133 * finish up input if everything is ok 1134 */ 1135 if( rv == VM_PAGER_OK) { 1136 for (i = 0; i < count; i++) { 1137 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1138 m[i]->flags |= PG_CLEAN; 1139 m[i]->flags &= ~PG_LAUNDRY; 1140 if (i != reqpage) { 1141 /* 1142 * whether or not to leave the page activated 1143 * is up in the air, but we should put the page 1144 * on a page queue somewhere. (it already is in 1145 * the object). 1146 * After some emperical results, it is best 1147 * to deactivate the readahead pages. 1148 */ 1149 vm_page_deactivate(m[i]); 1150 1151 /* 1152 * just in case someone was asking for this 1153 * page we now tell them that it is ok to use 1154 */ 1155 m[i]->flags &= ~PG_FAKE; 1156 PAGE_WAKEUP(m[i]); 1157 } 1158 } 1159 if( swap_pager_full) { 1160 _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE); 1161 } 1162 } else { 1163 swap_pager_ridpages(m, count, reqpage); 1164 } 1165 } 1166 return(rv); 1167 } 1168 1169 int 1170 swap_pager_output(swp, m, count, flags, rtvals) 1171 register sw_pager_t swp; 1172 vm_page_t *m; 1173 int count; 1174 int flags; 1175 int *rtvals; 1176 { 1177 register struct buf *bp; 1178 sw_blk_t swb[count]; 1179 register int s; 1180 int i, j, ix; 1181 boolean_t rv; 1182 vm_offset_t kva, off, foff; 1183 swp_clean_t spc; 1184 vm_offset_t paging_offset; 1185 vm_object_t object; 1186 int reqaddr[count]; 1187 int failed; 1188 1189 /* 1190 if( count > 1) 1191 printf("off: 0x%x, count: %d\n", m[0]->offset, count); 1192 */ 1193 spc = NULL; 1194 1195 object = m[0]->object; 1196 paging_offset = object->paging_offset; 1197 1198 failed = 0; 1199 for(j=0;j<count;j++) { 1200 foff = m[j]->offset + paging_offset; 1201 ix = swap_pager_block_index(swp, foff); 1202 swb[j] = 0; 1203 if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 1204 rtvals[j] = VM_PAGER_FAIL; 1205 failed = 1; 1206 continue; 1207 } else { 1208 rtvals[j] = VM_PAGER_OK; 1209 } 1210 swb[j] = &swp->sw_blocks[ix]; 1211 ++swb[j]->swb_locked; 1212 if( failed) { 1213 rtvals[j] = VM_PAGER_FAIL; 1214 continue; 1215 } 1216 off = swap_pager_block_offset(swp, foff); 1217 reqaddr[j] = swb[j]->swb_block[off]; 1218 if( reqaddr[j] == SWB_EMPTY) { 1219 int blk; 1220 int tries; 1221 int ntoget; 1222 tries = 0; 1223 s = splbio(); 1224 1225 /* 1226 * if any other pages have been allocated in this block, we 1227 * only try to get one page. 1228 */ 1229 for (i = 0; i < SWB_NPAGES; i++) { 1230 if (swb[j]->swb_block[i] != SWB_EMPTY) 1231 break; 1232 } 1233 1234 1235 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1236 /* 1237 * this code is alittle conservative, but works 1238 * (the intent of this code is to allocate small chunks 1239 * for small objects) 1240 */ 1241 if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) { 1242 ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE; 1243 } 1244 1245 retrygetspace: 1246 if (!swap_pager_full && ntoget > 1 && 1247 swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { 1248 1249 for (i = 0; i < ntoget; i++) { 1250 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1251 swb[j]->swb_valid = 0; 1252 } 1253 1254 reqaddr[j] = swb[j]->swb_block[off]; 1255 } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), 1256 &swb[j]->swb_block[off])) { 1257 /* 1258 * if the allocation has failed, we try to reclaim space and 1259 * retry. 1260 */ 1261 if (++tries == 1) { 1262 swap_pager_reclaim(); 1263 goto retrygetspace; 1264 } 1265 rtvals[j] = VM_PAGER_AGAIN; 1266 failed = 1; 1267 } else { 1268 reqaddr[j] = swb[j]->swb_block[off]; 1269 swb[j]->swb_valid &= ~(1<<off); 1270 } 1271 splx(s); 1272 } 1273 } 1274 1275 /* 1276 * search forwards for the last contiguous page to transfer 1277 */ 1278 failed = 0; 1279 for (i = 0; i < count; i++) { 1280 if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) || 1281 (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 1282 (rtvals[i] != VM_PAGER_OK)) { 1283 failed = 1; 1284 if( rtvals[i] == VM_PAGER_OK) 1285 rtvals[i] = VM_PAGER_AGAIN; 1286 } 1287 } 1288 1289 for(i = 0; i < count; i++) { 1290 if( rtvals[i] != VM_PAGER_OK) { 1291 if( swb[i]) 1292 --swb[i]->swb_locked; 1293 } 1294 } 1295 1296 for(i = 0; i < count; i++) 1297 if( rtvals[i] != VM_PAGER_OK) 1298 break; 1299 1300 if( i == 0) { 1301 return VM_PAGER_AGAIN; 1302 } 1303 1304 count = i; 1305 for(i=0;i<count;i++) { 1306 if( reqaddr[i] == SWB_EMPTY) 1307 printf("I/O to empty block????\n"); 1308 } 1309 1310 /* 1311 */ 1312 1313 /* 1314 * For synchronous writes, we clean up 1315 * all completed async pageouts. 1316 */ 1317 if ((flags & B_ASYNC) == 0) { 1318 swap_pager_clean(); 1319 } 1320 1321 kva = 0; 1322 1323 /* 1324 * we allocate a new kva for transfers > 1 page 1325 * but for transfers == 1 page, the swap_pager_free list contains 1326 * entries that have pre-allocated kva's (for efficiency). 1327 * NOTE -- we do not use the physical buffer pool or the 1328 * preallocated associated kva's because of the potential for 1329 * deadlock. This is very subtile -- but deadlocks or resource 1330 * contention must be avoided on pageouts -- or your system will 1331 * sleep (forever) !!! 1332 */ 1333 if ( count > 1) { 1334 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1335 if( !kva) { 1336 for (i = 0; i < count; i++) { 1337 if( swb[i]) 1338 --swb[i]->swb_locked; 1339 rtvals[i] = VM_PAGER_AGAIN; 1340 } 1341 return VM_PAGER_AGAIN; 1342 } 1343 } 1344 1345 /* 1346 * get a swap pager clean data structure, block until we get it 1347 */ 1348 if (swap_pager_free.tqh_first == NULL) { 1349 s = splbio(); 1350 if( curproc == pageproc) 1351 (void) swap_pager_clean(); 1352 else 1353 wakeup((caddr_t) &vm_pages_needed); 1354 while (swap_pager_free.tqh_first == NULL) { 1355 swap_pager_needflags |= SWAP_FREE_NEEDED; 1356 tsleep((caddr_t)&swap_pager_free, 1357 PVM, "swpfre", 0); 1358 if( curproc == pageproc) 1359 (void) swap_pager_clean(); 1360 else 1361 wakeup((caddr_t) &vm_pages_needed); 1362 } 1363 splx(s); 1364 } 1365 1366 spc = swap_pager_free.tqh_first; 1367 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1368 if( !kva) { 1369 kva = spc->spc_kva; 1370 spc->spc_altkva = 0; 1371 } else { 1372 spc->spc_altkva = kva; 1373 } 1374 1375 /* 1376 * map our page(s) into kva for I/O 1377 */ 1378 pmap_qenter(kva, m, count); 1379 1380 /* 1381 * get the base I/O offset into the swap file 1382 */ 1383 for(i=0;i<count;i++) { 1384 foff = m[i]->offset + paging_offset; 1385 off = swap_pager_block_offset(swp, foff); 1386 /* 1387 * if we are setting the valid bit anew, 1388 * then diminish the swap free space 1389 */ 1390 if( (swb[i]->swb_valid & (1 << off)) == 0) 1391 vm_swap_size -= btodb(PAGE_SIZE); 1392 1393 /* 1394 * set the valid bit 1395 */ 1396 swb[i]->swb_valid |= (1 << off); 1397 /* 1398 * and unlock the data structure 1399 */ 1400 --swb[i]->swb_locked; 1401 } 1402 1403 s = splbio(); 1404 /* 1405 * Get a swap buffer header and perform the IO 1406 */ 1407 bp = spc->spc_bp; 1408 bzero(bp, sizeof *bp); 1409 bp->b_spc = spc; 1410 1411 bp->b_flags = B_BUSY; 1412 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1413 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1414 if( bp->b_rcred != NOCRED) 1415 crhold(bp->b_rcred); 1416 if( bp->b_wcred != NOCRED) 1417 crhold(bp->b_wcred); 1418 bp->b_data = (caddr_t) kva; 1419 bp->b_blkno = reqaddr[0]; 1420 bgetvp( swapdev_vp, bp); 1421 1422 bp->b_bcount = PAGE_SIZE*count; 1423 bp->b_bufsize = PAGE_SIZE*count; 1424 swapdev_vp->v_numoutput++; 1425 1426 /* 1427 * If this is an async write we set up additional buffer fields 1428 * and place a "cleaning" entry on the inuse queue. 1429 */ 1430 if ( flags & B_ASYNC ) { 1431 spc->spc_flags = 0; 1432 spc->spc_swp = swp; 1433 for(i=0;i<count;i++) 1434 spc->spc_m[i] = m[i]; 1435 spc->spc_count = count; 1436 /* 1437 * the completion routine for async writes 1438 */ 1439 bp->b_flags |= B_CALL; 1440 bp->b_iodone = swap_pager_iodone; 1441 bp->b_dirtyoff = 0; 1442 bp->b_dirtyend = bp->b_bcount; 1443 swp->sw_poip++; 1444 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1445 } else { 1446 swp->sw_poip++; 1447 bp->b_flags |= B_CALL; 1448 bp->b_iodone = swap_pager_iodone1; 1449 } 1450 /* 1451 * perform the I/O 1452 */ 1453 VOP_STRATEGY(bp); 1454 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { 1455 if ((bp->b_flags & B_DONE) == B_DONE) { 1456 swap_pager_clean(); 1457 } 1458 splx(s); 1459 for(i=0;i<count;i++) { 1460 rtvals[i] = VM_PAGER_PEND; 1461 } 1462 return VM_PAGER_PEND; 1463 } 1464 1465 /* 1466 * wait for the sync I/O to complete 1467 */ 1468 while ((bp->b_flags & B_DONE) == 0) { 1469 tsleep((caddr_t)bp, PVM, "swwrt", 0); 1470 } 1471 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1472 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1473 1474 --swp->sw_poip; 1475 if (swp->sw_poip == 0) 1476 wakeup((caddr_t) swp); 1477 1478 if (bp->b_vp) 1479 brelvp(bp); 1480 1481 splx(s); 1482 1483 /* 1484 * remove the mapping for kernel virtual 1485 */ 1486 pmap_qremove( kva, count); 1487 1488 /* 1489 * if we have written the page, then indicate that the page 1490 * is clean. 1491 */ 1492 if (rv == VM_PAGER_OK) { 1493 for(i=0;i<count;i++) { 1494 if( rtvals[i] == VM_PAGER_OK) { 1495 m[i]->flags |= PG_CLEAN; 1496 m[i]->flags &= ~PG_LAUNDRY; 1497 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1498 /* 1499 * optimization, if a page has been read during the 1500 * pageout process, we activate it. 1501 */ 1502 if ( (m[i]->flags & PG_ACTIVE) == 0 && 1503 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) 1504 vm_page_activate(m[i]); 1505 } 1506 } 1507 } else { 1508 for(i=0;i<count;i++) { 1509 rtvals[i] = rv; 1510 m[i]->flags |= PG_LAUNDRY; 1511 } 1512 } 1513 1514 if( spc->spc_altkva) 1515 kmem_free_wakeup(pager_map, kva, count * PAGE_SIZE); 1516 1517 if( bp->b_rcred != NOCRED) 1518 crfree(bp->b_rcred); 1519 if( bp->b_wcred != NOCRED) 1520 crfree(bp->b_wcred); 1521 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1522 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1523 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1524 wakeup((caddr_t)&swap_pager_free); 1525 } 1526 1527 return(rv); 1528 } 1529 1530 boolean_t 1531 swap_pager_clean() 1532 { 1533 register swp_clean_t spc, tspc; 1534 register int s; 1535 1536 tspc = NULL; 1537 if (swap_pager_done.tqh_first == NULL) 1538 return FALSE; 1539 for (;;) { 1540 s = splbio(); 1541 /* 1542 * Look up and removal from done list must be done 1543 * at splbio() to avoid conflicts with swap_pager_iodone. 1544 */ 1545 while (spc = swap_pager_done.tqh_first) { 1546 if( spc->spc_altkva) { 1547 pmap_qremove( spc->spc_altkva, spc->spc_count); 1548 kmem_free_wakeup(pager_map, spc->spc_altkva, spc->spc_count * PAGE_SIZE); 1549 spc->spc_altkva = 0; 1550 } else { 1551 pmap_qremove( spc->spc_kva, 1); 1552 } 1553 swap_pager_finish(spc); 1554 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1555 goto doclean; 1556 } 1557 1558 /* 1559 * No operations done, thats all we can do for now. 1560 */ 1561 1562 splx(s); 1563 break; 1564 1565 /* 1566 * The desired page was found to be busy earlier in 1567 * the scan but has since completed. 1568 */ 1569 doclean: 1570 if (tspc && tspc == spc) { 1571 tspc = NULL; 1572 } 1573 spc->spc_flags = 0; 1574 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1575 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1576 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1577 wakeup((caddr_t)&swap_pager_free); 1578 } 1579 ++cleandone; 1580 splx(s); 1581 } 1582 1583 return(tspc ? TRUE : FALSE); 1584 } 1585 1586 void 1587 swap_pager_finish(spc) 1588 register swp_clean_t spc; 1589 { 1590 vm_object_t object = spc->spc_m[0]->object; 1591 int i; 1592 1593 if ((object->paging_in_progress -= spc->spc_count) == 0) 1594 thread_wakeup((int) object); 1595 1596 /* 1597 * If no error mark as clean and inform the pmap system. 1598 * If error, mark as dirty so we will try again. 1599 * (XXX could get stuck doing this, should give up after awhile) 1600 */ 1601 if (spc->spc_flags & SPC_ERROR) { 1602 for(i=0;i<spc->spc_count;i++) { 1603 printf("swap_pager_finish: clean of page %x failed\n", 1604 VM_PAGE_TO_PHYS(spc->spc_m[i])); 1605 spc->spc_m[i]->flags |= PG_LAUNDRY; 1606 } 1607 } else { 1608 for(i=0;i<spc->spc_count;i++) { 1609 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1610 spc->spc_m[i]->flags |= PG_CLEAN; 1611 } 1612 } 1613 1614 1615 for(i=0;i<spc->spc_count;i++) { 1616 /* 1617 * we wakeup any processes that are waiting on 1618 * these pages. 1619 */ 1620 PAGE_WAKEUP(spc->spc_m[i]); 1621 } 1622 nswiodone -= spc->spc_count; 1623 1624 return; 1625 } 1626 1627 /* 1628 * swap_pager_iodone 1629 */ 1630 void 1631 swap_pager_iodone(bp) 1632 register struct buf *bp; 1633 { 1634 register swp_clean_t spc; 1635 int s; 1636 1637 s = splbio(); 1638 spc = (swp_clean_t) bp->b_spc; 1639 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1640 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1641 if (bp->b_flags & B_ERROR) { 1642 spc->spc_flags |= SPC_ERROR; 1643 printf("error %d blkno %d sz %d ", 1644 bp->b_error, bp->b_blkno, bp->b_bcount); 1645 } 1646 1647 /* 1648 if ((bp->b_flags & B_READ) == 0) 1649 vwakeup(bp); 1650 */ 1651 1652 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); 1653 if (bp->b_vp) { 1654 brelvp(bp); 1655 } 1656 if( bp->b_rcred != NOCRED) 1657 crfree(bp->b_rcred); 1658 if( bp->b_wcred != NOCRED) 1659 crfree(bp->b_wcred); 1660 1661 nswiodone += spc->spc_count; 1662 if (--spc->spc_swp->sw_poip == 0) { 1663 wakeup((caddr_t)spc->spc_swp); 1664 } 1665 1666 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1667 swap_pager_inuse.tqh_first == 0) { 1668 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1669 wakeup((caddr_t)&swap_pager_free); 1670 wakeup((caddr_t)&vm_pages_needed); 1671 } 1672 1673 if (vm_pageout_pages_needed) { 1674 wakeup((caddr_t)&vm_pageout_pages_needed); 1675 } 1676 1677 if ((swap_pager_inuse.tqh_first == NULL) || 1678 (cnt.v_free_count < cnt.v_free_min && 1679 nswiodone + cnt.v_free_count >= cnt.v_free_min) ) { 1680 wakeup((caddr_t)&vm_pages_needed); 1681 } 1682 splx(s); 1683 } 1684 1685 /* 1686 * return true if any swap control structures can be allocated 1687 */ 1688 int 1689 swap_pager_ready() { 1690 if( swap_pager_free.tqh_first) 1691 return 1; 1692 else 1693 return 0; 1694 } 1695