1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.15 1994/10/22 02:17:59 davidg Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/proc.h> 55 #include <sys/buf.h> 56 #include <sys/vnode.h> 57 #include <sys/malloc.h> 58 59 #include <miscfs/specfs/specdev.h> 60 #include <sys/rlist.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_pager.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/swap_pager.h> 67 68 #ifndef NPENDINGIO 69 #define NPENDINGIO 16 70 #endif 71 72 int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int)); 73 int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *)); 74 75 int nswiodone; 76 extern int vm_pageout_rate_limit; 77 static int cleandone; 78 extern int hz; 79 int swap_pager_full; 80 extern vm_map_t pager_map; 81 extern int vm_swap_size; 82 struct rlist *swaplist; 83 int nswaplist; 84 85 #define MAX_PAGEOUT_CLUSTER 8 86 87 TAILQ_HEAD(swpclean, swpagerclean); 88 89 typedef struct swpagerclean *swp_clean_t; 90 91 struct swpagerclean { 92 TAILQ_ENTRY(swpagerclean) spc_list; 93 int spc_flags; 94 struct buf *spc_bp; 95 sw_pager_t spc_swp; 96 vm_offset_t spc_kva; 97 int spc_count; 98 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 99 } swcleanlist [NPENDINGIO] ; 100 101 102 extern vm_map_t kernel_map; 103 104 /* spc_flags values */ 105 #define SPC_ERROR 0x01 106 107 #define SWB_EMPTY (-1) 108 109 struct swpclean swap_pager_done; /* list of compileted page cleans */ 110 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 111 struct swpclean swap_pager_free; /* list of free pager clean structs */ 112 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 113 struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 114 115 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 116 int swap_pager_needflags; 117 struct rlist *swapfrag; 118 119 struct pagerlst *swp_qs[]={ 120 &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 121 }; 122 123 int swap_pager_putmulti(); 124 125 struct pagerops swappagerops = { 126 swap_pager_init, 127 swap_pager_alloc, 128 swap_pager_dealloc, 129 swap_pager_getpage, 130 swap_pager_getmulti, 131 swap_pager_putpage, 132 swap_pager_putmulti, 133 swap_pager_haspage 134 }; 135 136 int npendingio = NPENDINGIO; 137 int pendingiowait; 138 int require_swap_init; 139 void swap_pager_finish(); 140 int dmmin, dmmax; 141 extern int vm_page_count; 142 143 static inline void swapsizecheck() { 144 if( vm_swap_size == 0) 145 return; 146 if( vm_swap_size < 128*btodb(PAGE_SIZE)) { 147 if( swap_pager_full) 148 printf("swap_pager: out of space\n"); 149 swap_pager_full = 1; 150 } else if( vm_swap_size > 192*btodb(PAGE_SIZE)) 151 swap_pager_full = 0; 152 } 153 154 void 155 swap_pager_init() 156 { 157 dfltpagerops = &swappagerops; 158 159 TAILQ_INIT(&swap_pager_list); 160 TAILQ_INIT(&swap_pager_un_list); 161 162 /* 163 * Initialize clean lists 164 */ 165 TAILQ_INIT(&swap_pager_inuse); 166 TAILQ_INIT(&swap_pager_done); 167 TAILQ_INIT(&swap_pager_free); 168 169 require_swap_init = 1; 170 171 /* 172 * Calculate the swap allocation constants. 173 */ 174 175 dmmin = CLBYTES/DEV_BSIZE; 176 dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2; 177 178 } 179 180 /* 181 * Allocate a pager structure and associated resources. 182 * Note that if we are called from the pageout daemon (handle == NULL) 183 * we should not wait for memory as it could resulting in deadlock. 184 */ 185 vm_pager_t 186 swap_pager_alloc(handle, size, prot, offset) 187 caddr_t handle; 188 register vm_size_t size; 189 vm_prot_t prot; 190 vm_offset_t offset; 191 { 192 register vm_pager_t pager; 193 register sw_pager_t swp; 194 int waitok; 195 int i,j; 196 197 if (require_swap_init) { 198 swp_clean_t spc; 199 struct buf *bp; 200 /* 201 * kva's are allocated here so that we dont need to keep 202 * doing kmem_alloc pageables at runtime 203 */ 204 for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { 205 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE*MAX_PAGEOUT_CLUSTER); 206 if (!spc->spc_kva) { 207 break; 208 } 209 spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); 210 if (!spc->spc_bp) { 211 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 212 break; 213 } 214 spc->spc_flags = 0; 215 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 216 } 217 require_swap_init = 0; 218 if( size == 0) 219 return(NULL); 220 } 221 222 /* 223 * If this is a "named" anonymous region, look it up and 224 * return the appropriate pager if it exists. 225 */ 226 if (handle) { 227 pager = vm_pager_lookup(&swap_pager_list, handle); 228 if (pager != NULL) { 229 /* 230 * Use vm_object_lookup to gain a reference 231 * to the object and also to remove from the 232 * object cache. 233 */ 234 if (vm_object_lookup(pager) == NULL) 235 panic("swap_pager_alloc: bad object"); 236 return(pager); 237 } 238 } 239 240 /* 241 if (swap_pager_full && (vm_swap_size == 0)) { 242 return(NULL); 243 } 244 */ 245 246 /* 247 * Pager doesn't exist, allocate swap management resources 248 * and initialize. 249 */ 250 waitok = handle ? M_WAITOK : M_NOWAIT; 251 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 252 if (pager == NULL) 253 return(NULL); 254 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 255 if (swp == NULL) { 256 free((caddr_t)pager, M_VMPAGER); 257 return(NULL); 258 } 259 size = round_page(size); 260 swp->sw_osize = size; 261 swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE); 262 swp->sw_blocks = (sw_blk_t) 263 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 264 M_VMPGDATA, waitok); 265 if (swp->sw_blocks == NULL) { 266 free((caddr_t)swp, M_VMPGDATA); 267 free((caddr_t)pager, M_VMPAGER); 268 return(NULL); 269 } 270 271 for (i = 0; i < swp->sw_nblocks; i++) { 272 swp->sw_blocks[i].swb_valid = 0; 273 swp->sw_blocks[i].swb_locked = 0; 274 for (j = 0; j < SWB_NPAGES; j++) 275 swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 276 } 277 278 swp->sw_poip = 0; 279 if (handle) { 280 vm_object_t object; 281 282 swp->sw_flags = SW_NAMED; 283 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 284 /* 285 * Consistant with other pagers: return with object 286 * referenced. Can't do this with handle == NULL 287 * since it might be the pageout daemon calling. 288 */ 289 object = vm_object_allocate(size); 290 vm_object_enter(object, pager); 291 vm_object_setpager(object, pager, 0, FALSE); 292 } else { 293 swp->sw_flags = 0; 294 TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 295 } 296 pager->pg_handle = handle; 297 pager->pg_ops = &swappagerops; 298 pager->pg_type = PG_SWAP; 299 pager->pg_data = (caddr_t)swp; 300 301 return(pager); 302 } 303 304 /* 305 * returns disk block associated with pager and offset 306 * additionally, as a side effect returns a flag indicating 307 * if the block has been written 308 */ 309 310 static int * 311 swap_pager_diskaddr(swp, offset, valid) 312 sw_pager_t swp; 313 vm_offset_t offset; 314 int *valid; 315 { 316 register sw_blk_t swb; 317 int ix; 318 319 if (valid) 320 *valid = 0; 321 ix = offset / (SWB_NPAGES*PAGE_SIZE); 322 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 323 return(FALSE); 324 } 325 swb = &swp->sw_blocks[ix]; 326 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 327 if (valid) 328 *valid = swb->swb_valid & (1<<ix); 329 return &swb->swb_block[ix]; 330 } 331 332 /* 333 * Utility routine to set the valid (written) bit for 334 * a block associated with a pager and offset 335 */ 336 static void 337 swap_pager_setvalid(swp, offset, valid) 338 sw_pager_t swp; 339 vm_offset_t offset; 340 int valid; 341 { 342 register sw_blk_t swb; 343 int ix; 344 345 ix = offset / (SWB_NPAGES*PAGE_SIZE); 346 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 347 return; 348 349 swb = &swp->sw_blocks[ix]; 350 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 351 if (valid) 352 swb->swb_valid |= (1 << ix); 353 else 354 swb->swb_valid &= ~(1 << ix); 355 return; 356 } 357 358 /* 359 * this routine allocates swap space with a fragmentation 360 * minimization policy. 361 */ 362 int 363 swap_pager_getswapspace( unsigned amount, unsigned *rtval) { 364 #ifdef EXP 365 unsigned tmpalloc; 366 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 367 if( amount < nblocksfrag) { 368 if( rlist_alloc(&swapfrag, amount, rtval)) 369 return 1; 370 if( !rlist_alloc(&swaplist, nblocksfrag, &tmpalloc)) 371 return 0; 372 rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1); 373 *rtval = tmpalloc; 374 return 1; 375 } 376 #endif 377 if( !rlist_alloc(&swaplist, amount, rtval)) 378 return 0; 379 else 380 return 1; 381 } 382 383 /* 384 * this routine frees swap space with a fragmentation 385 * minimization policy. 386 */ 387 void 388 swap_pager_freeswapspace( unsigned from, unsigned to) { 389 #ifdef EXP 390 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 391 unsigned tmpalloc; 392 if( ((to + 1) - from) >= nblocksfrag) { 393 #endif 394 rlist_free(&swaplist, from, to); 395 #ifdef EXP 396 return; 397 } 398 rlist_free(&swapfrag, from, to); 399 while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) { 400 rlist_free(&swaplist, tmpalloc, tmpalloc + nblocksfrag-1); 401 } 402 #endif 403 } 404 /* 405 * this routine frees swap blocks from a specified pager 406 */ 407 void 408 _swap_pager_freespace(swp, start, size) 409 sw_pager_t swp; 410 vm_offset_t start; 411 vm_offset_t size; 412 { 413 vm_offset_t i; 414 int s; 415 416 s = splbio(); 417 for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) { 418 int valid; 419 int *addr = swap_pager_diskaddr(swp, i, &valid); 420 if (addr && *addr != SWB_EMPTY) { 421 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 422 if( valid) { 423 vm_swap_size += btodb(PAGE_SIZE); 424 swap_pager_setvalid(swp, i, 0); 425 } 426 *addr = SWB_EMPTY; 427 } 428 } 429 swapsizecheck(); 430 splx(s); 431 } 432 433 void 434 swap_pager_freespace(pager, start, size) 435 vm_pager_t pager; 436 vm_offset_t start; 437 vm_offset_t size; 438 { 439 _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 440 } 441 442 /* 443 * swap_pager_reclaim frees up over-allocated space from all pagers 444 * this eliminates internal fragmentation due to allocation of space 445 * for segments that are never swapped to. It has been written so that 446 * it does not block until the rlist_free operation occurs; it keeps 447 * the queues consistant. 448 */ 449 450 /* 451 * Maximum number of blocks (pages) to reclaim per pass 452 */ 453 #define MAXRECLAIM 256 454 455 void 456 swap_pager_reclaim() 457 { 458 vm_pager_t p; 459 sw_pager_t swp; 460 int i, j, k; 461 int s; 462 int reclaimcount; 463 static int reclaims[MAXRECLAIM]; 464 static int in_reclaim; 465 466 /* 467 * allow only one process to be in the swap_pager_reclaim subroutine 468 */ 469 s = splbio(); 470 if (in_reclaim) { 471 tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 472 splx(s); 473 return; 474 } 475 in_reclaim = 1; 476 reclaimcount = 0; 477 478 /* for each pager queue */ 479 for (k = 0; swp_qs[k]; k++) { 480 481 p = swp_qs[k]->tqh_first; 482 while (p && (reclaimcount < MAXRECLAIM)) { 483 484 /* 485 * see if any blocks associated with a pager has been 486 * allocated but not used (written) 487 */ 488 swp = (sw_pager_t) p->pg_data; 489 for (i = 0; i < swp->sw_nblocks; i++) { 490 sw_blk_t swb = &swp->sw_blocks[i]; 491 if( swb->swb_locked) 492 continue; 493 for (j = 0; j < SWB_NPAGES; j++) { 494 if (swb->swb_block[j] != SWB_EMPTY && 495 (swb->swb_valid & (1 << j)) == 0) { 496 reclaims[reclaimcount++] = swb->swb_block[j]; 497 swb->swb_block[j] = SWB_EMPTY; 498 if (reclaimcount >= MAXRECLAIM) 499 goto rfinished; 500 } 501 } 502 } 503 p = p->pg_list.tqe_next; 504 } 505 } 506 507 rfinished: 508 509 /* 510 * free the blocks that have been added to the reclaim list 511 */ 512 for (i = 0; i < reclaimcount; i++) { 513 swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1); 514 swapsizecheck(); 515 wakeup((caddr_t) &in_reclaim); 516 } 517 518 splx(s); 519 in_reclaim = 0; 520 wakeup((caddr_t) &in_reclaim); 521 } 522 523 524 /* 525 * swap_pager_copy copies blocks from one pager to another and 526 * destroys the source pager 527 */ 528 529 void 530 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 531 vm_pager_t srcpager; 532 vm_offset_t srcoffset; 533 vm_pager_t dstpager; 534 vm_offset_t dstoffset; 535 vm_offset_t offset; 536 { 537 sw_pager_t srcswp, dstswp; 538 vm_offset_t i; 539 int s; 540 541 if( vm_swap_size == 0) 542 return; 543 544 srcswp = (sw_pager_t) srcpager->pg_data; 545 dstswp = (sw_pager_t) dstpager->pg_data; 546 547 /* 548 * remove the source pager from the swap_pager internal queue 549 */ 550 s = splbio(); 551 if (srcswp->sw_flags & SW_NAMED) { 552 TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 553 srcswp->sw_flags &= ~SW_NAMED; 554 } else { 555 TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 556 } 557 558 while (srcswp->sw_poip) { 559 tsleep((caddr_t)srcswp, PVM, "spgout", 0); 560 } 561 splx(s); 562 563 /* 564 * clean all of the pages that are currently active and finished 565 */ 566 (void) swap_pager_clean(); 567 568 s = splbio(); 569 /* 570 * clear source block before destination object 571 * (release allocated space) 572 */ 573 for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { 574 int valid; 575 int *addr = swap_pager_diskaddr(srcswp, i, &valid); 576 if (addr && *addr != SWB_EMPTY) { 577 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 578 if( valid) 579 vm_swap_size += btodb(PAGE_SIZE); 580 swapsizecheck(); 581 *addr = SWB_EMPTY; 582 } 583 } 584 /* 585 * transfer source to destination 586 */ 587 for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 588 int srcvalid, dstvalid; 589 int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 590 &srcvalid); 591 int *dstaddrp; 592 /* 593 * see if the source has space allocated 594 */ 595 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 596 /* 597 * if the source is valid and the dest has no space, then 598 * copy the allocation from the srouce to the dest. 599 */ 600 if (srcvalid) { 601 dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); 602 /* 603 * if the dest already has a valid block, deallocate the 604 * source block without copying. 605 */ 606 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 607 swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1); 608 *dstaddrp = SWB_EMPTY; 609 } 610 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 611 *dstaddrp = *srcaddrp; 612 *srcaddrp = SWB_EMPTY; 613 swap_pager_setvalid(dstswp, i + dstoffset, 1); 614 vm_swap_size -= btodb(PAGE_SIZE); 615 } 616 } 617 /* 618 * if the source is not empty at this point, then deallocate the space. 619 */ 620 if (*srcaddrp != SWB_EMPTY) { 621 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 622 if( srcvalid) 623 vm_swap_size += btodb(PAGE_SIZE); 624 *srcaddrp = SWB_EMPTY; 625 } 626 } 627 } 628 629 /* 630 * deallocate the rest of the source object 631 */ 632 for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { 633 int valid; 634 int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); 635 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 636 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 637 if( valid) 638 vm_swap_size += btodb(PAGE_SIZE); 639 *srcaddrp = SWB_EMPTY; 640 } 641 } 642 643 swapsizecheck(); 644 splx(s); 645 646 free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); 647 srcswp->sw_blocks = 0; 648 free((caddr_t)srcswp, M_VMPGDATA); 649 srcpager->pg_data = 0; 650 free((caddr_t)srcpager, M_VMPAGER); 651 652 return; 653 } 654 655 656 void 657 swap_pager_dealloc(pager) 658 vm_pager_t pager; 659 { 660 register int i,j; 661 register sw_blk_t bp; 662 register sw_pager_t swp; 663 int s; 664 665 /* 666 * Remove from list right away so lookups will fail if we 667 * block for pageout completion. 668 */ 669 s = splbio(); 670 swp = (sw_pager_t) pager->pg_data; 671 if (swp->sw_flags & SW_NAMED) { 672 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 673 swp->sw_flags &= ~SW_NAMED; 674 } else { 675 TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 676 } 677 /* 678 * Wait for all pageouts to finish and remove 679 * all entries from cleaning list. 680 */ 681 682 while (swp->sw_poip) { 683 tsleep((caddr_t)swp, PVM, "swpout", 0); 684 } 685 splx(s); 686 687 688 (void) swap_pager_clean(); 689 690 /* 691 * Free left over swap blocks 692 */ 693 s = splbio(); 694 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 695 for (j = 0; j < SWB_NPAGES; j++) 696 if (bp->swb_block[j] != SWB_EMPTY) { 697 swap_pager_freeswapspace((unsigned)bp->swb_block[j], 698 (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1); 699 if( bp->swb_valid & (1<<j)) 700 vm_swap_size += btodb(PAGE_SIZE); 701 bp->swb_block[j] = SWB_EMPTY; 702 } 703 } 704 splx(s); 705 swapsizecheck(); 706 707 /* 708 * Free swap management resources 709 */ 710 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 711 swp->sw_blocks = 0; 712 free((caddr_t)swp, M_VMPGDATA); 713 pager->pg_data = 0; 714 free((caddr_t)pager, M_VMPAGER); 715 } 716 717 /* 718 * swap_pager_getmulti can get multiple pages. 719 */ 720 int 721 swap_pager_getmulti(pager, m, count, reqpage, sync) 722 vm_pager_t pager; 723 vm_page_t *m; 724 int count; 725 int reqpage; 726 boolean_t sync; 727 { 728 if( reqpage >= count) 729 panic("swap_pager_getmulti: reqpage >= count\n"); 730 return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 731 } 732 733 /* 734 * swap_pager_getpage gets individual pages 735 */ 736 int 737 swap_pager_getpage(pager, m, sync) 738 vm_pager_t pager; 739 vm_page_t m; 740 boolean_t sync; 741 { 742 vm_page_t marray[1]; 743 744 marray[0] = m; 745 return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); 746 } 747 748 int 749 swap_pager_putmulti(pager, m, c, sync, rtvals) 750 vm_pager_t pager; 751 vm_page_t *m; 752 int c; 753 boolean_t sync; 754 int *rtvals; 755 { 756 int flags; 757 758 if (pager == NULL) { 759 (void) swap_pager_clean(); 760 return VM_PAGER_OK; 761 } 762 763 flags = B_WRITE; 764 if (!sync) 765 flags |= B_ASYNC; 766 767 return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); 768 } 769 770 /* 771 * swap_pager_putpage writes individual pages 772 */ 773 int 774 swap_pager_putpage(pager, m, sync) 775 vm_pager_t pager; 776 vm_page_t m; 777 boolean_t sync; 778 { 779 int flags; 780 vm_page_t marray[1]; 781 int rtvals[1]; 782 783 784 if (pager == NULL) { 785 (void) swap_pager_clean(); 786 return VM_PAGER_OK; 787 } 788 789 marray[0] = m; 790 flags = B_WRITE; 791 if (!sync) 792 flags |= B_ASYNC; 793 794 swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); 795 796 return rtvals[0]; 797 } 798 799 static inline int 800 const swap_pager_block_index(swp, offset) 801 sw_pager_t swp; 802 vm_offset_t offset; 803 { 804 return (offset / (SWB_NPAGES*PAGE_SIZE)); 805 } 806 807 static inline int 808 const swap_pager_block_offset(swp, offset) 809 sw_pager_t swp; 810 vm_offset_t offset; 811 { 812 return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE); 813 } 814 815 /* 816 * _swap_pager_haspage returns TRUE if the pager has data that has 817 * been written out. 818 */ 819 static boolean_t 820 _swap_pager_haspage(swp, offset) 821 sw_pager_t swp; 822 vm_offset_t offset; 823 { 824 register sw_blk_t swb; 825 int ix; 826 827 ix = offset / (SWB_NPAGES*PAGE_SIZE); 828 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 829 return(FALSE); 830 } 831 swb = &swp->sw_blocks[ix]; 832 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 833 if (swb->swb_block[ix] != SWB_EMPTY) { 834 if (swb->swb_valid & (1 << ix)) 835 return TRUE; 836 } 837 838 return(FALSE); 839 } 840 841 /* 842 * swap_pager_haspage is the externally accessible version of 843 * _swap_pager_haspage above. this routine takes a vm_pager_t 844 * for an argument instead of sw_pager_t. 845 */ 846 boolean_t 847 swap_pager_haspage(pager, offset) 848 vm_pager_t pager; 849 vm_offset_t offset; 850 { 851 return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 852 } 853 854 /* 855 * swap_pager_freepage is a convienience routine that clears the busy 856 * bit and deallocates a page. 857 */ 858 static void 859 swap_pager_freepage(m) 860 vm_page_t m; 861 { 862 PAGE_WAKEUP(m); 863 vm_page_free(m); 864 } 865 866 /* 867 * swap_pager_ridpages is a convienience routine that deallocates all 868 * but the required page. this is usually used in error returns that 869 * need to invalidate the "extra" readahead pages. 870 */ 871 static void 872 swap_pager_ridpages(m, count, reqpage) 873 vm_page_t *m; 874 int count; 875 int reqpage; 876 { 877 int i; 878 for (i = 0; i < count; i++) 879 if (i != reqpage) 880 swap_pager_freepage(m[i]); 881 } 882 883 int swapwritecount=0; 884 885 /* 886 * swap_pager_iodone1 is the completion routine for both reads and async writes 887 */ 888 void 889 swap_pager_iodone1(bp) 890 struct buf *bp; 891 { 892 bp->b_flags |= B_DONE; 893 bp->b_flags &= ~B_ASYNC; 894 wakeup((caddr_t)bp); 895 /* 896 if ((bp->b_flags & B_READ) == 0) 897 vwakeup(bp); 898 */ 899 } 900 901 902 int 903 swap_pager_input(swp, m, count, reqpage) 904 register sw_pager_t swp; 905 vm_page_t *m; 906 int count, reqpage; 907 { 908 register struct buf *bp; 909 sw_blk_t swb[count]; 910 register int s; 911 int i; 912 boolean_t rv; 913 vm_offset_t kva, off[count]; 914 swp_clean_t spc; 915 vm_offset_t paging_offset; 916 vm_object_t object; 917 int reqaddr[count]; 918 919 int first, last; 920 int failed; 921 int reqdskregion; 922 923 object = m[reqpage]->object; 924 paging_offset = object->paging_offset; 925 /* 926 * First determine if the page exists in the pager if this is 927 * a sync read. This quickly handles cases where we are 928 * following shadow chains looking for the top level object 929 * with the page. 930 */ 931 if (swp->sw_blocks == NULL) { 932 swap_pager_ridpages(m, count, reqpage); 933 return(VM_PAGER_FAIL); 934 } 935 936 for(i = 0; i < count; i++) { 937 vm_offset_t foff = m[i]->offset + paging_offset; 938 int ix = swap_pager_block_index(swp, foff); 939 if (ix >= swp->sw_nblocks) { 940 int j; 941 if( i <= reqpage) { 942 swap_pager_ridpages(m, count, reqpage); 943 return(VM_PAGER_FAIL); 944 } 945 for(j = i; j < count; j++) { 946 swap_pager_freepage(m[j]); 947 } 948 count = i; 949 break; 950 } 951 952 swb[i] = &swp->sw_blocks[ix]; 953 off[i] = swap_pager_block_offset(swp, foff); 954 reqaddr[i] = swb[i]->swb_block[off[i]]; 955 } 956 957 /* make sure that our required input request is existant */ 958 959 if (reqaddr[reqpage] == SWB_EMPTY || 960 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 961 swap_pager_ridpages(m, count, reqpage); 962 return(VM_PAGER_FAIL); 963 } 964 965 966 reqdskregion = reqaddr[reqpage] / dmmax; 967 968 /* 969 * search backwards for the first contiguous page to transfer 970 */ 971 failed = 0; 972 first = 0; 973 for (i = reqpage - 1; i >= 0; --i) { 974 if ( failed || (reqaddr[i] == SWB_EMPTY) || 975 (swb[i]->swb_valid & (1 << off[i])) == 0 || 976 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 977 ((reqaddr[i] / dmmax) != reqdskregion)) { 978 failed = 1; 979 swap_pager_freepage(m[i]); 980 if (first == 0) 981 first = i + 1; 982 } 983 } 984 /* 985 * search forwards for the last contiguous page to transfer 986 */ 987 failed = 0; 988 last = count; 989 for (i = reqpage + 1; i < count; i++) { 990 if ( failed || (reqaddr[i] == SWB_EMPTY) || 991 (swb[i]->swb_valid & (1 << off[i])) == 0 || 992 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 993 ((reqaddr[i] / dmmax) != reqdskregion)) { 994 failed = 1; 995 swap_pager_freepage(m[i]); 996 if (last == count) 997 last = i; 998 } 999 } 1000 1001 count = last; 1002 if (first != 0) { 1003 for (i = first; i < count; i++) { 1004 m[i-first] = m[i]; 1005 reqaddr[i-first] = reqaddr[i]; 1006 off[i-first] = off[i]; 1007 } 1008 count -= first; 1009 reqpage -= first; 1010 } 1011 1012 ++swb[reqpage]->swb_locked; 1013 1014 /* 1015 * at this point: 1016 * "m" is a pointer to the array of vm_page_t for paging I/O 1017 * "count" is the number of vm_page_t entries represented by "m" 1018 * "object" is the vm_object_t for I/O 1019 * "reqpage" is the index into "m" for the page actually faulted 1020 */ 1021 1022 spc = NULL; /* we might not use an spc data structure */ 1023 1024 if (count == 1) { 1025 /* 1026 * if a kva has not been allocated, we can only do a one page transfer, 1027 * so we free the other pages that might have been allocated by 1028 * vm_fault. 1029 */ 1030 swap_pager_ridpages(m, count, reqpage); 1031 m[0] = m[reqpage]; 1032 reqaddr[0] = reqaddr[reqpage]; 1033 1034 count = 1; 1035 reqpage = 0; 1036 /* 1037 * get a swap pager clean data structure, block until we get it 1038 */ 1039 if (swap_pager_free.tqh_first == NULL) { 1040 s = splbio(); 1041 if( curproc == pageproc) 1042 (void) swap_pager_clean(); 1043 else 1044 wakeup((caddr_t) &vm_pages_needed); 1045 while (swap_pager_free.tqh_first == NULL) { 1046 swap_pager_needflags |= SWAP_FREE_NEEDED; 1047 tsleep((caddr_t)&swap_pager_free, 1048 PVM, "swpfre", 0); 1049 if( curproc == pageproc) 1050 (void) swap_pager_clean(); 1051 else 1052 wakeup((caddr_t) &vm_pages_needed); 1053 } 1054 splx(s); 1055 } 1056 spc = swap_pager_free.tqh_first; 1057 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1058 kva = spc->spc_kva; 1059 bp = spc->spc_bp; 1060 bzero(bp, sizeof *bp); 1061 bp->b_spc = spc; 1062 } else { 1063 /* 1064 * Get a swap buffer header to perform the IO 1065 */ 1066 bp = getpbuf(); 1067 kva = (vm_offset_t) bp->b_data; 1068 } 1069 1070 /* 1071 * map our page(s) into kva for input 1072 */ 1073 pmap_qenter( kva, m, count); 1074 1075 s = splbio(); 1076 bp->b_flags = B_BUSY | B_READ | B_CALL; 1077 bp->b_iodone = swap_pager_iodone1; 1078 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1079 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1080 crhold(bp->b_rcred); 1081 crhold(bp->b_wcred); 1082 bp->b_un.b_addr = (caddr_t) kva; 1083 bp->b_blkno = reqaddr[0]; 1084 bp->b_bcount = PAGE_SIZE*count; 1085 bp->b_bufsize = PAGE_SIZE*count; 1086 1087 bgetvp( swapdev_vp, bp); 1088 1089 swp->sw_piip++; 1090 1091 cnt.v_swapin++; 1092 cnt.v_swappgsin += count; 1093 /* 1094 * perform the I/O 1095 */ 1096 VOP_STRATEGY(bp); 1097 1098 /* 1099 * wait for the sync I/O to complete 1100 */ 1101 while ((bp->b_flags & B_DONE) == 0) { 1102 tsleep((caddr_t)bp, PVM, "swread", 0); 1103 } 1104 1105 if (bp->b_flags & B_ERROR) { 1106 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n", 1107 bp->b_blkno, bp->b_bcount, bp->b_error); 1108 rv = VM_PAGER_FAIL; 1109 } else { 1110 rv = VM_PAGER_OK; 1111 } 1112 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1113 1114 --swp->sw_piip; 1115 if (swp->sw_piip == 0) 1116 wakeup((caddr_t) swp); 1117 1118 /* 1119 * relpbuf does this, but we maintain our own buffer 1120 * list also... 1121 */ 1122 if (bp->b_vp) 1123 brelvp(bp); 1124 1125 splx(s); 1126 --swb[reqpage]->swb_locked; 1127 1128 /* 1129 * remove the mapping for kernel virtual 1130 */ 1131 pmap_qremove( kva, count); 1132 1133 if (spc) { 1134 /* 1135 * if we have used an spc, we need to free it. 1136 */ 1137 if( bp->b_rcred != NOCRED) 1138 crfree(bp->b_rcred); 1139 if( bp->b_wcred != NOCRED) 1140 crfree(bp->b_wcred); 1141 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1142 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1143 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1144 wakeup((caddr_t)&swap_pager_free); 1145 } 1146 } else { 1147 /* 1148 * release the physical I/O buffer 1149 */ 1150 relpbuf(bp); 1151 /* 1152 * finish up input if everything is ok 1153 */ 1154 if( rv == VM_PAGER_OK) { 1155 for (i = 0; i < count; i++) { 1156 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1157 m[i]->flags |= PG_CLEAN; 1158 m[i]->flags &= ~PG_LAUNDRY; 1159 if (i != reqpage) { 1160 /* 1161 * whether or not to leave the page activated 1162 * is up in the air, but we should put the page 1163 * on a page queue somewhere. (it already is in 1164 * the object). 1165 * After some emperical results, it is best 1166 * to deactivate the readahead pages. 1167 */ 1168 vm_page_deactivate(m[i]); 1169 1170 /* 1171 * just in case someone was asking for this 1172 * page we now tell them that it is ok to use 1173 */ 1174 m[i]->flags &= ~PG_FAKE; 1175 PAGE_WAKEUP(m[i]); 1176 } 1177 } 1178 /* 1179 * If we're out of swap space, then attempt to free 1180 * some whenever pages are brought in. We must clear 1181 * the clean flag so that the page contents will be 1182 * preserved. 1183 */ 1184 if (swap_pager_full) { 1185 for (i = 0; i < count; i++) { 1186 m[i]->flags &= ~PG_CLEAN; 1187 } 1188 _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE); 1189 } 1190 } else { 1191 swap_pager_ridpages(m, count, reqpage); 1192 } 1193 } 1194 return(rv); 1195 } 1196 1197 int 1198 swap_pager_output(swp, m, count, flags, rtvals) 1199 register sw_pager_t swp; 1200 vm_page_t *m; 1201 int count; 1202 int flags; 1203 int *rtvals; 1204 { 1205 register struct buf *bp; 1206 sw_blk_t swb[count]; 1207 register int s; 1208 int i, j, ix; 1209 boolean_t rv; 1210 vm_offset_t kva, off, foff; 1211 swp_clean_t spc; 1212 vm_offset_t paging_offset; 1213 vm_object_t object; 1214 int reqaddr[count]; 1215 int failed; 1216 1217 /* 1218 if( count > 1) 1219 printf("off: 0x%x, count: %d\n", m[0]->offset, count); 1220 */ 1221 if( vm_swap_size == 0) { 1222 for(i=0;i<count;i++) 1223 rtvals[i] = VM_PAGER_FAIL; 1224 return VM_PAGER_FAIL; 1225 } 1226 1227 spc = NULL; 1228 1229 object = m[0]->object; 1230 paging_offset = object->paging_offset; 1231 1232 failed = 0; 1233 for(j=0;j<count;j++) { 1234 foff = m[j]->offset + paging_offset; 1235 ix = swap_pager_block_index(swp, foff); 1236 swb[j] = 0; 1237 if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 1238 rtvals[j] = VM_PAGER_FAIL; 1239 failed = 1; 1240 continue; 1241 } else { 1242 rtvals[j] = VM_PAGER_OK; 1243 } 1244 swb[j] = &swp->sw_blocks[ix]; 1245 ++swb[j]->swb_locked; 1246 if( failed) { 1247 rtvals[j] = VM_PAGER_FAIL; 1248 continue; 1249 } 1250 off = swap_pager_block_offset(swp, foff); 1251 reqaddr[j] = swb[j]->swb_block[off]; 1252 if( reqaddr[j] == SWB_EMPTY) { 1253 int blk; 1254 int tries; 1255 int ntoget; 1256 tries = 0; 1257 s = splbio(); 1258 1259 /* 1260 * if any other pages have been allocated in this block, we 1261 * only try to get one page. 1262 */ 1263 for (i = 0; i < SWB_NPAGES; i++) { 1264 if (swb[j]->swb_block[i] != SWB_EMPTY) 1265 break; 1266 } 1267 1268 1269 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1270 /* 1271 * this code is alittle conservative, but works 1272 * (the intent of this code is to allocate small chunks 1273 * for small objects) 1274 */ 1275 if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) { 1276 ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE; 1277 } 1278 1279 retrygetspace: 1280 if (!swap_pager_full && ntoget > 1 && 1281 swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { 1282 1283 for (i = 0; i < ntoget; i++) { 1284 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1285 swb[j]->swb_valid = 0; 1286 } 1287 1288 reqaddr[j] = swb[j]->swb_block[off]; 1289 } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), 1290 &swb[j]->swb_block[off])) { 1291 /* 1292 * if the allocation has failed, we try to reclaim space and 1293 * retry. 1294 */ 1295 if (++tries == 1) { 1296 swap_pager_reclaim(); 1297 goto retrygetspace; 1298 } 1299 rtvals[j] = VM_PAGER_AGAIN; 1300 failed = 1; 1301 } else { 1302 reqaddr[j] = swb[j]->swb_block[off]; 1303 swb[j]->swb_valid &= ~(1<<off); 1304 } 1305 splx(s); 1306 } 1307 } 1308 1309 /* 1310 * search forwards for the last contiguous page to transfer 1311 */ 1312 failed = 0; 1313 for (i = 0; i < count; i++) { 1314 if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) || 1315 (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 1316 (rtvals[i] != VM_PAGER_OK)) { 1317 failed = 1; 1318 if( rtvals[i] == VM_PAGER_OK) 1319 rtvals[i] = VM_PAGER_AGAIN; 1320 } 1321 } 1322 1323 for(i = 0; i < count; i++) { 1324 if( rtvals[i] != VM_PAGER_OK) { 1325 if( swb[i]) 1326 --swb[i]->swb_locked; 1327 } 1328 } 1329 1330 for(i = 0; i < count; i++) 1331 if( rtvals[i] != VM_PAGER_OK) 1332 break; 1333 1334 if( i == 0) { 1335 return VM_PAGER_AGAIN; 1336 } 1337 1338 count = i; 1339 for(i=0;i<count;i++) { 1340 if( reqaddr[i] == SWB_EMPTY) 1341 printf("I/O to empty block????\n"); 1342 } 1343 1344 /* 1345 */ 1346 1347 /* 1348 * For synchronous writes, we clean up 1349 * all completed async pageouts. 1350 */ 1351 if ((flags & B_ASYNC) == 0) { 1352 swap_pager_clean(); 1353 } 1354 1355 kva = 0; 1356 1357 /* 1358 * we allocate a new kva for transfers > 1 page 1359 * but for transfers == 1 page, the swap_pager_free list contains 1360 * entries that have pre-allocated kva's (for efficiency). 1361 * NOTE -- we do not use the physical buffer pool or the 1362 * preallocated associated kva's because of the potential for 1363 * deadlock. This is very subtile -- but deadlocks or resource 1364 * contention must be avoided on pageouts -- or your system will 1365 * sleep (forever) !!! 1366 */ 1367 /* 1368 if ( count > 1) { 1369 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1370 if( !kva) { 1371 for (i = 0; i < count; i++) { 1372 if( swb[i]) 1373 --swb[i]->swb_locked; 1374 rtvals[i] = VM_PAGER_AGAIN; 1375 } 1376 return VM_PAGER_AGAIN; 1377 } 1378 } 1379 */ 1380 1381 /* 1382 * get a swap pager clean data structure, block until we get it 1383 */ 1384 if (swap_pager_free.tqh_first == NULL) { 1385 s = splbio(); 1386 if( curproc == pageproc) 1387 (void) swap_pager_clean(); 1388 else 1389 wakeup((caddr_t) &vm_pages_needed); 1390 while (swap_pager_free.tqh_first == NULL) { 1391 swap_pager_needflags |= SWAP_FREE_NEEDED; 1392 tsleep((caddr_t)&swap_pager_free, 1393 PVM, "swpfre", 0); 1394 if( curproc == pageproc) 1395 (void) swap_pager_clean(); 1396 else 1397 wakeup((caddr_t) &vm_pages_needed); 1398 } 1399 splx(s); 1400 } 1401 1402 spc = swap_pager_free.tqh_first; 1403 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1404 1405 kva = spc->spc_kva; 1406 1407 /* 1408 * map our page(s) into kva for I/O 1409 */ 1410 pmap_qenter(kva, m, count); 1411 1412 /* 1413 * get the base I/O offset into the swap file 1414 */ 1415 for(i=0;i<count;i++) { 1416 foff = m[i]->offset + paging_offset; 1417 off = swap_pager_block_offset(swp, foff); 1418 /* 1419 * if we are setting the valid bit anew, 1420 * then diminish the swap free space 1421 */ 1422 if( (swb[i]->swb_valid & (1 << off)) == 0) 1423 vm_swap_size -= btodb(PAGE_SIZE); 1424 1425 /* 1426 * set the valid bit 1427 */ 1428 swb[i]->swb_valid |= (1 << off); 1429 /* 1430 * and unlock the data structure 1431 */ 1432 --swb[i]->swb_locked; 1433 } 1434 1435 s = splbio(); 1436 /* 1437 * Get a swap buffer header and perform the IO 1438 */ 1439 bp = spc->spc_bp; 1440 bzero(bp, sizeof *bp); 1441 bp->b_spc = spc; 1442 1443 bp->b_flags = B_BUSY; 1444 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1445 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1446 if( bp->b_rcred != NOCRED) 1447 crhold(bp->b_rcred); 1448 if( bp->b_wcred != NOCRED) 1449 crhold(bp->b_wcred); 1450 bp->b_data = (caddr_t) kva; 1451 bp->b_blkno = reqaddr[0]; 1452 bgetvp( swapdev_vp, bp); 1453 1454 bp->b_bcount = PAGE_SIZE*count; 1455 bp->b_bufsize = PAGE_SIZE*count; 1456 swapdev_vp->v_numoutput++; 1457 1458 /* 1459 * If this is an async write we set up additional buffer fields 1460 * and place a "cleaning" entry on the inuse queue. 1461 */ 1462 if ( flags & B_ASYNC ) { 1463 spc->spc_flags = 0; 1464 spc->spc_swp = swp; 1465 for(i=0;i<count;i++) 1466 spc->spc_m[i] = m[i]; 1467 spc->spc_count = count; 1468 /* 1469 * the completion routine for async writes 1470 */ 1471 bp->b_flags |= B_CALL; 1472 bp->b_iodone = swap_pager_iodone; 1473 bp->b_dirtyoff = 0; 1474 bp->b_dirtyend = bp->b_bcount; 1475 swp->sw_poip++; 1476 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1477 } else { 1478 swp->sw_poip++; 1479 bp->b_flags |= B_CALL; 1480 bp->b_iodone = swap_pager_iodone1; 1481 } 1482 1483 cnt.v_swapout++; 1484 cnt.v_swappgsout += count; 1485 /* 1486 * perform the I/O 1487 */ 1488 VOP_STRATEGY(bp); 1489 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { 1490 if ((bp->b_flags & B_DONE) == B_DONE) { 1491 swap_pager_clean(); 1492 } 1493 splx(s); 1494 for(i=0;i<count;i++) { 1495 rtvals[i] = VM_PAGER_PEND; 1496 } 1497 return VM_PAGER_PEND; 1498 } 1499 1500 /* 1501 * wait for the sync I/O to complete 1502 */ 1503 while ((bp->b_flags & B_DONE) == 0) { 1504 tsleep((caddr_t)bp, PVM, "swwrt", 0); 1505 } 1506 if (bp->b_flags & B_ERROR) { 1507 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n", 1508 bp->b_blkno, bp->b_bcount, bp->b_error); 1509 rv = VM_PAGER_FAIL; 1510 } else { 1511 rv = VM_PAGER_OK; 1512 } 1513 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1514 1515 --swp->sw_poip; 1516 if (swp->sw_poip == 0) 1517 wakeup((caddr_t) swp); 1518 1519 if (bp->b_vp) 1520 brelvp(bp); 1521 1522 splx(s); 1523 1524 /* 1525 * remove the mapping for kernel virtual 1526 */ 1527 pmap_qremove( kva, count); 1528 1529 /* 1530 * if we have written the page, then indicate that the page 1531 * is clean. 1532 */ 1533 if (rv == VM_PAGER_OK) { 1534 for(i=0;i<count;i++) { 1535 if( rtvals[i] == VM_PAGER_OK) { 1536 m[i]->flags |= PG_CLEAN; 1537 m[i]->flags &= ~PG_LAUNDRY; 1538 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1539 /* 1540 * optimization, if a page has been read during the 1541 * pageout process, we activate it. 1542 */ 1543 if ( (m[i]->flags & PG_ACTIVE) == 0 && 1544 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) 1545 vm_page_activate(m[i]); 1546 } 1547 } 1548 } else { 1549 for(i=0;i<count;i++) { 1550 rtvals[i] = rv; 1551 m[i]->flags |= PG_LAUNDRY; 1552 } 1553 } 1554 1555 if( bp->b_rcred != NOCRED) 1556 crfree(bp->b_rcred); 1557 if( bp->b_wcred != NOCRED) 1558 crfree(bp->b_wcred); 1559 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1560 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1561 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1562 wakeup((caddr_t)&swap_pager_free); 1563 } 1564 1565 return(rv); 1566 } 1567 1568 boolean_t 1569 swap_pager_clean() 1570 { 1571 register swp_clean_t spc, tspc; 1572 register int s; 1573 1574 tspc = NULL; 1575 if (swap_pager_done.tqh_first == NULL) 1576 return FALSE; 1577 for (;;) { 1578 s = splbio(); 1579 /* 1580 * Look up and removal from done list must be done 1581 * at splbio() to avoid conflicts with swap_pager_iodone. 1582 */ 1583 while ((spc = swap_pager_done.tqh_first) != 0) { 1584 pmap_qremove( spc->spc_kva, spc->spc_count); 1585 swap_pager_finish(spc); 1586 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1587 goto doclean; 1588 } 1589 1590 /* 1591 * No operations done, thats all we can do for now. 1592 */ 1593 1594 splx(s); 1595 break; 1596 1597 /* 1598 * The desired page was found to be busy earlier in 1599 * the scan but has since completed. 1600 */ 1601 doclean: 1602 if (tspc && tspc == spc) { 1603 tspc = NULL; 1604 } 1605 spc->spc_flags = 0; 1606 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1607 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1608 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1609 wakeup((caddr_t)&swap_pager_free); 1610 } 1611 ++cleandone; 1612 splx(s); 1613 } 1614 1615 return(tspc ? TRUE : FALSE); 1616 } 1617 1618 void 1619 swap_pager_finish(spc) 1620 register swp_clean_t spc; 1621 { 1622 vm_object_t object = spc->spc_m[0]->object; 1623 int i; 1624 1625 if ((object->paging_in_progress -= spc->spc_count) == 0) 1626 thread_wakeup((int) object); 1627 1628 /* 1629 * If no error mark as clean and inform the pmap system. 1630 * If error, mark as dirty so we will try again. 1631 * (XXX could get stuck doing this, should give up after awhile) 1632 */ 1633 if (spc->spc_flags & SPC_ERROR) { 1634 for(i=0;i<spc->spc_count;i++) { 1635 printf("swap_pager_finish: clean of page %lx failed\n", 1636 (u_long)VM_PAGE_TO_PHYS(spc->spc_m[i])); 1637 spc->spc_m[i]->flags |= PG_LAUNDRY; 1638 } 1639 } else { 1640 for(i=0;i<spc->spc_count;i++) { 1641 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1642 spc->spc_m[i]->flags |= PG_CLEAN; 1643 } 1644 } 1645 1646 1647 for(i=0;i<spc->spc_count;i++) { 1648 /* 1649 * we wakeup any processes that are waiting on 1650 * these pages. 1651 */ 1652 PAGE_WAKEUP(spc->spc_m[i]); 1653 } 1654 nswiodone -= spc->spc_count; 1655 1656 return; 1657 } 1658 1659 /* 1660 * swap_pager_iodone 1661 */ 1662 void 1663 swap_pager_iodone(bp) 1664 register struct buf *bp; 1665 { 1666 register swp_clean_t spc; 1667 int s; 1668 1669 s = splbio(); 1670 spc = (swp_clean_t) bp->b_spc; 1671 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1672 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1673 if (bp->b_flags & B_ERROR) { 1674 spc->spc_flags |= SPC_ERROR; 1675 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d", 1676 (bp->b_flags & B_READ) ? "pagein" : "pageout", 1677 bp->b_error, (u_long)bp->b_blkno, bp->b_bcount); 1678 } 1679 1680 /* 1681 if ((bp->b_flags & B_READ) == 0) 1682 vwakeup(bp); 1683 */ 1684 1685 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); 1686 if (bp->b_vp) { 1687 brelvp(bp); 1688 } 1689 if( bp->b_rcred != NOCRED) 1690 crfree(bp->b_rcred); 1691 if( bp->b_wcred != NOCRED) 1692 crfree(bp->b_wcred); 1693 1694 nswiodone += spc->spc_count; 1695 if (--spc->spc_swp->sw_poip == 0) { 1696 wakeup((caddr_t)spc->spc_swp); 1697 } 1698 1699 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1700 swap_pager_inuse.tqh_first == 0) { 1701 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1702 wakeup((caddr_t)&swap_pager_free); 1703 wakeup((caddr_t)&vm_pages_needed); 1704 } 1705 1706 if (vm_pageout_pages_needed) { 1707 wakeup((caddr_t)&vm_pageout_pages_needed); 1708 } 1709 1710 if ((swap_pager_inuse.tqh_first == NULL) || 1711 (cnt.v_free_count < cnt.v_free_min && 1712 nswiodone + cnt.v_free_count >= cnt.v_free_min) ) { 1713 wakeup((caddr_t)&vm_pages_needed); 1714 } 1715 splx(s); 1716 } 1717 1718 /* 1719 * return true if any swap control structures can be allocated 1720 */ 1721 int 1722 swap_pager_ready() { 1723 if( swap_pager_free.tqh_first) 1724 return 1; 1725 else 1726 return 0; 1727 } 1728