1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.4 1994/08/02 07:55:13 davidg Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/proc.h> 55 #include <sys/buf.h> 56 #include <sys/vnode.h> 57 #include <sys/malloc.h> 58 59 #include <miscfs/specfs/specdev.h> 60 #include <sys/rlist.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_pager.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/swap_pager.h> 67 68 #ifndef NPENDINGIO 69 #define NPENDINGIO 16 70 #endif 71 72 extern int nswbuf; 73 int nswiodone; 74 extern int vm_pageout_rate_limit; 75 static int cleandone; 76 extern int hz; 77 int swap_pager_full; 78 extern vm_map_t pager_map; 79 extern int vm_pageout_pages_needed; 80 extern int vm_swap_size; 81 extern struct vnode *swapdev_vp; 82 83 #define MAX_PAGEOUT_CLUSTER 8 84 85 TAILQ_HEAD(swpclean, swpagerclean); 86 87 typedef struct swpagerclean *swp_clean_t; 88 89 struct swpagerclean { 90 TAILQ_ENTRY(swpagerclean) spc_list; 91 int spc_flags; 92 struct buf *spc_bp; 93 sw_pager_t spc_swp; 94 vm_offset_t spc_kva; 95 vm_offset_t spc_altkva; 96 int spc_count; 97 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 98 } swcleanlist [NPENDINGIO] ; 99 100 101 extern vm_map_t kernel_map; 102 103 /* spc_flags values */ 104 #define SPC_ERROR 0x01 105 106 #define SWB_EMPTY (-1) 107 108 void swap_pager_init(void); 109 vm_pager_t swap_pager_alloc(caddr_t, vm_size_t, vm_prot_t, vm_offset_t); 110 void swap_pager_dealloc(vm_pager_t); 111 boolean_t swap_pager_getpage(vm_pager_t, vm_page_t, boolean_t); 112 boolean_t swap_pager_putpage(vm_pager_t, vm_page_t, boolean_t); 113 boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t); 114 boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t); 115 int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int); 116 void swap_pager_iodone(struct buf *); 117 boolean_t swap_pager_clean(); 118 119 extern struct pagerops swappagerops; 120 121 struct swpclean swap_pager_done; /* list of compileted page cleans */ 122 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 123 struct swpclean swap_pager_free; /* list of free pager clean structs */ 124 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 125 struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 126 127 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 128 int swap_pager_needflags; 129 struct rlist *swapfrag; 130 131 struct pagerlst *swp_qs[]={ 132 &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 133 }; 134 135 int swap_pager_putmulti(); 136 137 struct pagerops swappagerops = { 138 swap_pager_init, 139 swap_pager_alloc, 140 swap_pager_dealloc, 141 swap_pager_getpage, 142 swap_pager_getmulti, 143 swap_pager_putpage, 144 swap_pager_putmulti, 145 swap_pager_haspage 146 }; 147 148 extern int nswbuf; 149 150 int npendingio = NPENDINGIO; 151 int pendingiowait; 152 int require_swap_init; 153 void swap_pager_finish(); 154 int dmmin, dmmax; 155 extern int vm_page_count; 156 157 struct buf * getpbuf() ; 158 void relpbuf(struct buf *bp) ; 159 160 static inline void swapsizecheck() { 161 if( vm_swap_size < 128*btodb(PAGE_SIZE)) { 162 if( swap_pager_full) 163 printf("swap_pager: out of space\n"); 164 swap_pager_full = 1; 165 } else if( vm_swap_size > 192*btodb(PAGE_SIZE)) 166 swap_pager_full = 0; 167 } 168 169 void 170 swap_pager_init() 171 { 172 extern int dmmin, dmmax; 173 174 dfltpagerops = &swappagerops; 175 176 TAILQ_INIT(&swap_pager_list); 177 TAILQ_INIT(&swap_pager_un_list); 178 179 /* 180 * Initialize clean lists 181 */ 182 TAILQ_INIT(&swap_pager_inuse); 183 TAILQ_INIT(&swap_pager_done); 184 TAILQ_INIT(&swap_pager_free); 185 186 require_swap_init = 1; 187 188 /* 189 * Calculate the swap allocation constants. 190 */ 191 192 dmmin = CLBYTES/DEV_BSIZE; 193 dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2; 194 195 } 196 197 /* 198 * Allocate a pager structure and associated resources. 199 * Note that if we are called from the pageout daemon (handle == NULL) 200 * we should not wait for memory as it could resulting in deadlock. 201 */ 202 vm_pager_t 203 swap_pager_alloc(handle, size, prot, offset) 204 caddr_t handle; 205 register vm_size_t size; 206 vm_prot_t prot; 207 vm_offset_t offset; 208 { 209 register vm_pager_t pager; 210 register sw_pager_t swp; 211 int waitok; 212 int i,j; 213 214 if (require_swap_init) { 215 swp_clean_t spc; 216 struct buf *bp; 217 /* 218 * kva's are allocated here so that we dont need to keep 219 * doing kmem_alloc pageables at runtime 220 */ 221 for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { 222 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE); 223 if (!spc->spc_kva) { 224 break; 225 } 226 spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); 227 if (!spc->spc_bp) { 228 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 229 break; 230 } 231 spc->spc_flags = 0; 232 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 233 } 234 require_swap_init = 0; 235 if( size == 0) 236 return(NULL); 237 } 238 239 /* 240 * If this is a "named" anonymous region, look it up and 241 * return the appropriate pager if it exists. 242 */ 243 if (handle) { 244 pager = vm_pager_lookup(&swap_pager_list, handle); 245 if (pager != NULL) { 246 /* 247 * Use vm_object_lookup to gain a reference 248 * to the object and also to remove from the 249 * object cache. 250 */ 251 if (vm_object_lookup(pager) == NULL) 252 panic("swap_pager_alloc: bad object"); 253 return(pager); 254 } 255 } 256 257 if (swap_pager_full) { 258 return(NULL); 259 } 260 261 /* 262 * Pager doesn't exist, allocate swap management resources 263 * and initialize. 264 */ 265 waitok = handle ? M_WAITOK : M_NOWAIT; 266 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 267 if (pager == NULL) 268 return(NULL); 269 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 270 if (swp == NULL) { 271 free((caddr_t)pager, M_VMPAGER); 272 return(NULL); 273 } 274 size = round_page(size); 275 swp->sw_osize = size; 276 swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE); 277 swp->sw_blocks = (sw_blk_t) 278 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 279 M_VMPGDATA, waitok); 280 if (swp->sw_blocks == NULL) { 281 free((caddr_t)swp, M_VMPGDATA); 282 free((caddr_t)pager, M_VMPAGER); 283 return(NULL); 284 } 285 286 for (i = 0; i < swp->sw_nblocks; i++) { 287 swp->sw_blocks[i].swb_valid = 0; 288 swp->sw_blocks[i].swb_locked = 0; 289 for (j = 0; j < SWB_NPAGES; j++) 290 swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 291 } 292 293 swp->sw_poip = 0; 294 if (handle) { 295 vm_object_t object; 296 297 swp->sw_flags = SW_NAMED; 298 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 299 /* 300 * Consistant with other pagers: return with object 301 * referenced. Can't do this with handle == NULL 302 * since it might be the pageout daemon calling. 303 */ 304 object = vm_object_allocate(size); 305 vm_object_enter(object, pager); 306 vm_object_setpager(object, pager, 0, FALSE); 307 } else { 308 swp->sw_flags = 0; 309 TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 310 } 311 pager->pg_handle = handle; 312 pager->pg_ops = &swappagerops; 313 pager->pg_type = PG_SWAP; 314 pager->pg_data = (caddr_t)swp; 315 316 return(pager); 317 } 318 319 /* 320 * returns disk block associated with pager and offset 321 * additionally, as a side effect returns a flag indicating 322 * if the block has been written 323 */ 324 325 static int * 326 swap_pager_diskaddr(swp, offset, valid) 327 sw_pager_t swp; 328 vm_offset_t offset; 329 int *valid; 330 { 331 register sw_blk_t swb; 332 int ix; 333 334 if (valid) 335 *valid = 0; 336 ix = offset / (SWB_NPAGES*PAGE_SIZE); 337 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 338 return(FALSE); 339 } 340 swb = &swp->sw_blocks[ix]; 341 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 342 if (valid) 343 *valid = swb->swb_valid & (1<<ix); 344 return &swb->swb_block[ix]; 345 } 346 347 /* 348 * Utility routine to set the valid (written) bit for 349 * a block associated with a pager and offset 350 */ 351 static void 352 swap_pager_setvalid(swp, offset, valid) 353 sw_pager_t swp; 354 vm_offset_t offset; 355 int valid; 356 { 357 register sw_blk_t swb; 358 int ix; 359 360 ix = offset / (SWB_NPAGES*PAGE_SIZE); 361 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 362 return; 363 364 swb = &swp->sw_blocks[ix]; 365 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 366 if (valid) 367 swb->swb_valid |= (1 << ix); 368 else 369 swb->swb_valid &= ~(1 << ix); 370 return; 371 } 372 373 /* 374 * this routine allocates swap space with a fragmentation 375 * minimization policy. 376 */ 377 int 378 swap_pager_getswapspace( unsigned amount, unsigned *rtval) { 379 unsigned tmpalloc; 380 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 381 if( amount < nblocksfrag) { 382 if( rlist_alloc(&swapfrag, amount, rtval)) 383 return 1; 384 if( !rlist_alloc(&swapmap, nblocksfrag, &tmpalloc)) 385 return 0; 386 rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1); 387 *rtval = tmpalloc; 388 return 1; 389 } 390 if( !rlist_alloc(&swapmap, amount, rtval)) 391 return 0; 392 else 393 return 1; 394 } 395 396 /* 397 * this routine frees swap space with a fragmentation 398 * minimization policy. 399 */ 400 void 401 swap_pager_freeswapspace( unsigned from, unsigned to) { 402 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 403 unsigned tmpalloc; 404 if( ((to + 1) - from) >= nblocksfrag) { 405 while( (from + nblocksfrag) <= to + 1) { 406 rlist_free(&swapmap, from, from + nblocksfrag - 1); 407 from += nblocksfrag; 408 } 409 } 410 if( from >= to) 411 return; 412 rlist_free(&swapfrag, from, to); 413 while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) { 414 rlist_free(&swapmap, tmpalloc, tmpalloc + nblocksfrag-1); 415 } 416 } 417 /* 418 * this routine frees swap blocks from a specified pager 419 */ 420 void 421 _swap_pager_freespace(swp, start, size) 422 sw_pager_t swp; 423 vm_offset_t start; 424 vm_offset_t size; 425 { 426 vm_offset_t i; 427 int s; 428 429 s = splbio(); 430 for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) { 431 int valid; 432 int *addr = swap_pager_diskaddr(swp, i, &valid); 433 if (addr && *addr != SWB_EMPTY) { 434 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 435 if( valid) { 436 vm_swap_size += btodb(PAGE_SIZE); 437 swap_pager_setvalid(swp, i, 0); 438 } 439 *addr = SWB_EMPTY; 440 } 441 } 442 swapsizecheck(); 443 splx(s); 444 } 445 446 void 447 swap_pager_freespace(pager, start, size) 448 vm_pager_t pager; 449 vm_offset_t start; 450 vm_offset_t size; 451 { 452 _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 453 } 454 455 /* 456 * swap_pager_reclaim frees up over-allocated space from all pagers 457 * this eliminates internal fragmentation due to allocation of space 458 * for segments that are never swapped to. It has been written so that 459 * it does not block until the rlist_free operation occurs; it keeps 460 * the queues consistant. 461 */ 462 463 /* 464 * Maximum number of blocks (pages) to reclaim per pass 465 */ 466 #define MAXRECLAIM 256 467 468 void 469 swap_pager_reclaim() 470 { 471 vm_pager_t p; 472 sw_pager_t swp; 473 int i, j, k; 474 int s; 475 int reclaimcount; 476 static int reclaims[MAXRECLAIM]; 477 static int in_reclaim; 478 479 /* 480 * allow only one process to be in the swap_pager_reclaim subroutine 481 */ 482 s = splbio(); 483 if (in_reclaim) { 484 tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 485 splx(s); 486 return; 487 } 488 in_reclaim = 1; 489 reclaimcount = 0; 490 491 /* for each pager queue */ 492 for (k = 0; swp_qs[k]; k++) { 493 494 p = swp_qs[k]->tqh_first; 495 while (p && (reclaimcount < MAXRECLAIM)) { 496 497 /* 498 * see if any blocks associated with a pager has been 499 * allocated but not used (written) 500 */ 501 swp = (sw_pager_t) p->pg_data; 502 for (i = 0; i < swp->sw_nblocks; i++) { 503 sw_blk_t swb = &swp->sw_blocks[i]; 504 if( swb->swb_locked) 505 continue; 506 for (j = 0; j < SWB_NPAGES; j++) { 507 if (swb->swb_block[j] != SWB_EMPTY && 508 (swb->swb_valid & (1 << j)) == 0) { 509 reclaims[reclaimcount++] = swb->swb_block[j]; 510 swb->swb_block[j] = SWB_EMPTY; 511 if (reclaimcount >= MAXRECLAIM) 512 goto rfinished; 513 } 514 } 515 } 516 p = p->pg_list.tqe_next; 517 } 518 } 519 520 rfinished: 521 522 /* 523 * free the blocks that have been added to the reclaim list 524 */ 525 for (i = 0; i < reclaimcount; i++) { 526 swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1); 527 swapsizecheck(); 528 wakeup((caddr_t) &in_reclaim); 529 } 530 531 splx(s); 532 in_reclaim = 0; 533 wakeup((caddr_t) &in_reclaim); 534 } 535 536 537 /* 538 * swap_pager_copy copies blocks from one pager to another and 539 * destroys the source pager 540 */ 541 542 void 543 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 544 vm_pager_t srcpager; 545 vm_offset_t srcoffset; 546 vm_pager_t dstpager; 547 vm_offset_t dstoffset; 548 vm_offset_t offset; 549 { 550 sw_pager_t srcswp, dstswp; 551 vm_offset_t i; 552 int s; 553 554 srcswp = (sw_pager_t) srcpager->pg_data; 555 dstswp = (sw_pager_t) dstpager->pg_data; 556 557 /* 558 * remove the source pager from the swap_pager internal queue 559 */ 560 s = splbio(); 561 if (srcswp->sw_flags & SW_NAMED) { 562 TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 563 srcswp->sw_flags &= ~SW_NAMED; 564 } else { 565 TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 566 } 567 568 while (srcswp->sw_poip) { 569 tsleep((caddr_t)srcswp, PVM, "spgout", 0); 570 } 571 splx(s); 572 573 /* 574 * clean all of the pages that are currently active and finished 575 */ 576 (void) swap_pager_clean(); 577 578 s = splbio(); 579 /* 580 * clear source block before destination object 581 * (release allocated space) 582 */ 583 for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { 584 int valid; 585 int *addr = swap_pager_diskaddr(srcswp, i, &valid); 586 if (addr && *addr != SWB_EMPTY) { 587 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 588 if( valid) 589 vm_swap_size += btodb(PAGE_SIZE); 590 swapsizecheck(); 591 *addr = SWB_EMPTY; 592 } 593 } 594 /* 595 * transfer source to destination 596 */ 597 for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 598 int srcvalid, dstvalid; 599 int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 600 &srcvalid); 601 int *dstaddrp; 602 /* 603 * see if the source has space allocated 604 */ 605 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 606 /* 607 * if the source is valid and the dest has no space, then 608 * copy the allocation from the srouce to the dest. 609 */ 610 if (srcvalid) { 611 dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); 612 /* 613 * if the dest already has a valid block, deallocate the 614 * source block without copying. 615 */ 616 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 617 swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1); 618 *dstaddrp = SWB_EMPTY; 619 } 620 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 621 *dstaddrp = *srcaddrp; 622 *srcaddrp = SWB_EMPTY; 623 swap_pager_setvalid(dstswp, i + dstoffset, 1); 624 vm_swap_size -= btodb(PAGE_SIZE); 625 } 626 } 627 /* 628 * if the source is not empty at this point, then deallocate the space. 629 */ 630 if (*srcaddrp != SWB_EMPTY) { 631 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 632 if( srcvalid) 633 vm_swap_size += btodb(PAGE_SIZE); 634 *srcaddrp = SWB_EMPTY; 635 } 636 } 637 } 638 639 /* 640 * deallocate the rest of the source object 641 */ 642 for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { 643 int valid; 644 int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); 645 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 646 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 647 if( valid) 648 vm_swap_size += btodb(PAGE_SIZE); 649 *srcaddrp = SWB_EMPTY; 650 } 651 } 652 653 swapsizecheck(); 654 splx(s); 655 656 free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); 657 srcswp->sw_blocks = 0; 658 free((caddr_t)srcswp, M_VMPGDATA); 659 srcpager->pg_data = 0; 660 free((caddr_t)srcpager, M_VMPAGER); 661 662 return; 663 } 664 665 666 void 667 swap_pager_dealloc(pager) 668 vm_pager_t pager; 669 { 670 register int i,j; 671 register sw_blk_t bp; 672 register sw_pager_t swp; 673 int s; 674 675 /* 676 * Remove from list right away so lookups will fail if we 677 * block for pageout completion. 678 */ 679 s = splbio(); 680 swp = (sw_pager_t) pager->pg_data; 681 if (swp->sw_flags & SW_NAMED) { 682 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 683 swp->sw_flags &= ~SW_NAMED; 684 } else { 685 TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 686 } 687 /* 688 * Wait for all pageouts to finish and remove 689 * all entries from cleaning list. 690 */ 691 692 while (swp->sw_poip) { 693 tsleep((caddr_t)swp, PVM, "swpout", 0); 694 } 695 splx(s); 696 697 698 (void) swap_pager_clean(); 699 700 /* 701 * Free left over swap blocks 702 */ 703 s = splbio(); 704 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 705 for (j = 0; j < SWB_NPAGES; j++) 706 if (bp->swb_block[j] != SWB_EMPTY) { 707 swap_pager_freeswapspace((unsigned)bp->swb_block[j], 708 (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1); 709 if( bp->swb_valid & (1<<j)) 710 vm_swap_size += btodb(PAGE_SIZE); 711 bp->swb_block[j] = SWB_EMPTY; 712 } 713 } 714 splx(s); 715 swapsizecheck(); 716 717 /* 718 * Free swap management resources 719 */ 720 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 721 swp->sw_blocks = 0; 722 free((caddr_t)swp, M_VMPGDATA); 723 pager->pg_data = 0; 724 free((caddr_t)pager, M_VMPAGER); 725 } 726 727 /* 728 * swap_pager_getmulti can get multiple pages. 729 */ 730 int 731 swap_pager_getmulti(pager, m, count, reqpage, sync) 732 vm_pager_t pager; 733 vm_page_t *m; 734 int count; 735 int reqpage; 736 boolean_t sync; 737 { 738 if( reqpage >= count) 739 panic("swap_pager_getmulti: reqpage >= count\n"); 740 return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 741 } 742 743 /* 744 * swap_pager_getpage gets individual pages 745 */ 746 int 747 swap_pager_getpage(pager, m, sync) 748 vm_pager_t pager; 749 vm_page_t m; 750 boolean_t sync; 751 { 752 vm_page_t marray[1]; 753 754 marray[0] = m; 755 return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); 756 } 757 758 int 759 swap_pager_putmulti(pager, m, c, sync, rtvals) 760 vm_pager_t pager; 761 vm_page_t *m; 762 int c; 763 boolean_t sync; 764 int *rtvals; 765 { 766 int flags; 767 768 if (pager == NULL) { 769 (void) swap_pager_clean(); 770 return VM_PAGER_OK; 771 } 772 773 flags = B_WRITE; 774 if (!sync) 775 flags |= B_ASYNC; 776 777 return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); 778 } 779 780 /* 781 * swap_pager_putpage writes individual pages 782 */ 783 int 784 swap_pager_putpage(pager, m, sync) 785 vm_pager_t pager; 786 vm_page_t m; 787 boolean_t sync; 788 { 789 int flags; 790 vm_page_t marray[1]; 791 int rtvals[1]; 792 793 794 if (pager == NULL) { 795 (void) swap_pager_clean(); 796 return VM_PAGER_OK; 797 } 798 799 marray[0] = m; 800 flags = B_WRITE; 801 if (!sync) 802 flags |= B_ASYNC; 803 804 swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); 805 806 return rtvals[0]; 807 } 808 809 static inline int 810 const swap_pager_block_index(swp, offset) 811 sw_pager_t swp; 812 vm_offset_t offset; 813 { 814 return (offset / (SWB_NPAGES*PAGE_SIZE)); 815 } 816 817 static inline int 818 const swap_pager_block_offset(swp, offset) 819 sw_pager_t swp; 820 vm_offset_t offset; 821 { 822 return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE); 823 } 824 825 /* 826 * _swap_pager_haspage returns TRUE if the pager has data that has 827 * been written out. 828 */ 829 static boolean_t 830 _swap_pager_haspage(swp, offset) 831 sw_pager_t swp; 832 vm_offset_t offset; 833 { 834 register sw_blk_t swb; 835 int ix; 836 837 ix = offset / (SWB_NPAGES*PAGE_SIZE); 838 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 839 return(FALSE); 840 } 841 swb = &swp->sw_blocks[ix]; 842 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 843 if (swb->swb_block[ix] != SWB_EMPTY) { 844 if (swb->swb_valid & (1 << ix)) 845 return TRUE; 846 } 847 848 return(FALSE); 849 } 850 851 /* 852 * swap_pager_haspage is the externally accessible version of 853 * _swap_pager_haspage above. this routine takes a vm_pager_t 854 * for an argument instead of sw_pager_t. 855 */ 856 boolean_t 857 swap_pager_haspage(pager, offset) 858 vm_pager_t pager; 859 vm_offset_t offset; 860 { 861 return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 862 } 863 864 /* 865 * swap_pager_freepage is a convienience routine that clears the busy 866 * bit and deallocates a page. 867 */ 868 static void 869 swap_pager_freepage(m) 870 vm_page_t m; 871 { 872 PAGE_WAKEUP(m); 873 vm_page_free(m); 874 } 875 876 /* 877 * swap_pager_ridpages is a convienience routine that deallocates all 878 * but the required page. this is usually used in error returns that 879 * need to invalidate the "extra" readahead pages. 880 */ 881 static void 882 swap_pager_ridpages(m, count, reqpage) 883 vm_page_t *m; 884 int count; 885 int reqpage; 886 { 887 int i; 888 for (i = 0; i < count; i++) 889 if (i != reqpage) 890 swap_pager_freepage(m[i]); 891 } 892 893 int swapwritecount=0; 894 895 /* 896 * swap_pager_iodone1 is the completion routine for both reads and async writes 897 */ 898 void 899 swap_pager_iodone1(bp) 900 struct buf *bp; 901 { 902 bp->b_flags |= B_DONE; 903 bp->b_flags &= ~B_ASYNC; 904 wakeup((caddr_t)bp); 905 /* 906 if ((bp->b_flags & B_READ) == 0) 907 vwakeup(bp); 908 */ 909 } 910 911 912 int 913 swap_pager_input(swp, m, count, reqpage) 914 register sw_pager_t swp; 915 vm_page_t *m; 916 int count, reqpage; 917 { 918 register struct buf *bp; 919 sw_blk_t swb[count]; 920 register int s; 921 int i; 922 boolean_t rv; 923 vm_offset_t kva, off[count]; 924 swp_clean_t spc; 925 vm_offset_t paging_offset; 926 vm_object_t object; 927 int reqaddr[count]; 928 929 int first, last; 930 int failed; 931 int reqdskregion; 932 933 object = m[reqpage]->object; 934 paging_offset = object->paging_offset; 935 /* 936 * First determine if the page exists in the pager if this is 937 * a sync read. This quickly handles cases where we are 938 * following shadow chains looking for the top level object 939 * with the page. 940 */ 941 if (swp->sw_blocks == NULL) { 942 swap_pager_ridpages(m, count, reqpage); 943 return(VM_PAGER_FAIL); 944 } 945 946 for(i = 0; i < count; i++) { 947 vm_offset_t foff = m[i]->offset + paging_offset; 948 int ix = swap_pager_block_index(swp, foff); 949 if (ix >= swp->sw_nblocks) { 950 int j; 951 if( i <= reqpage) { 952 swap_pager_ridpages(m, count, reqpage); 953 return(VM_PAGER_FAIL); 954 } 955 for(j = i; j < count; j++) { 956 swap_pager_freepage(m[j]); 957 } 958 count = i; 959 break; 960 } 961 962 swb[i] = &swp->sw_blocks[ix]; 963 off[i] = swap_pager_block_offset(swp, foff); 964 reqaddr[i] = swb[i]->swb_block[off[i]]; 965 } 966 967 /* make sure that our required input request is existant */ 968 969 if (reqaddr[reqpage] == SWB_EMPTY || 970 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 971 swap_pager_ridpages(m, count, reqpage); 972 return(VM_PAGER_FAIL); 973 } 974 975 976 reqdskregion = reqaddr[reqpage] / dmmax; 977 978 /* 979 * search backwards for the first contiguous page to transfer 980 */ 981 failed = 0; 982 first = 0; 983 for (i = reqpage - 1; i >= 0; --i) { 984 if ( failed || (reqaddr[i] == SWB_EMPTY) || 985 (swb[i]->swb_valid & (1 << off[i])) == 0 || 986 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 987 ((reqaddr[i] / dmmax) != reqdskregion)) { 988 failed = 1; 989 swap_pager_freepage(m[i]); 990 if (first == 0) 991 first = i + 1; 992 } 993 } 994 /* 995 * search forwards for the last contiguous page to transfer 996 */ 997 failed = 0; 998 last = count; 999 for (i = reqpage + 1; i < count; i++) { 1000 if ( failed || (reqaddr[i] == SWB_EMPTY) || 1001 (swb[i]->swb_valid & (1 << off[i])) == 0 || 1002 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 1003 ((reqaddr[i] / dmmax) != reqdskregion)) { 1004 failed = 1; 1005 swap_pager_freepage(m[i]); 1006 if (last == count) 1007 last = i; 1008 } 1009 } 1010 1011 count = last; 1012 if (first != 0) { 1013 for (i = first; i < count; i++) { 1014 m[i-first] = m[i]; 1015 reqaddr[i-first] = reqaddr[i]; 1016 off[i-first] = off[i]; 1017 } 1018 count -= first; 1019 reqpage -= first; 1020 } 1021 1022 ++swb[reqpage]->swb_locked; 1023 1024 /* 1025 * at this point: 1026 * "m" is a pointer to the array of vm_page_t for paging I/O 1027 * "count" is the number of vm_page_t entries represented by "m" 1028 * "object" is the vm_object_t for I/O 1029 * "reqpage" is the index into "m" for the page actually faulted 1030 */ 1031 1032 spc = NULL; /* we might not use an spc data structure */ 1033 1034 if (count == 1) { 1035 /* 1036 * if a kva has not been allocated, we can only do a one page transfer, 1037 * so we free the other pages that might have been allocated by 1038 * vm_fault. 1039 */ 1040 swap_pager_ridpages(m, count, reqpage); 1041 m[0] = m[reqpage]; 1042 reqaddr[0] = reqaddr[reqpage]; 1043 1044 count = 1; 1045 reqpage = 0; 1046 /* 1047 * get a swap pager clean data structure, block until we get it 1048 */ 1049 if (swap_pager_free.tqh_first == NULL) { 1050 s = splbio(); 1051 if( curproc == pageproc) 1052 (void) swap_pager_clean(); 1053 else 1054 wakeup((caddr_t) &vm_pages_needed); 1055 while (swap_pager_free.tqh_first == NULL) { 1056 swap_pager_needflags |= SWAP_FREE_NEEDED; 1057 tsleep((caddr_t)&swap_pager_free, 1058 PVM, "swpfre", 0); 1059 if( curproc == pageproc) 1060 (void) swap_pager_clean(); 1061 else 1062 wakeup((caddr_t) &vm_pages_needed); 1063 } 1064 splx(s); 1065 } 1066 spc = swap_pager_free.tqh_first; 1067 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1068 kva = spc->spc_kva; 1069 bp = spc->spc_bp; 1070 bzero(bp, sizeof *bp); 1071 bp->b_spc = spc; 1072 } else { 1073 /* 1074 * Get a swap buffer header to perform the IO 1075 */ 1076 bp = getpbuf(); 1077 kva = (vm_offset_t) bp->b_data; 1078 } 1079 1080 /* 1081 * map our page(s) into kva for input 1082 */ 1083 pmap_qenter( kva, m, count); 1084 1085 s = splbio(); 1086 bp->b_flags = B_BUSY | B_READ | B_CALL; 1087 bp->b_iodone = swap_pager_iodone1; 1088 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1089 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1090 crhold(bp->b_rcred); 1091 crhold(bp->b_wcred); 1092 bp->b_un.b_addr = (caddr_t) kva; 1093 bp->b_blkno = reqaddr[0]; 1094 bp->b_bcount = PAGE_SIZE*count; 1095 bp->b_bufsize = PAGE_SIZE*count; 1096 1097 bgetvp( swapdev_vp, bp); 1098 1099 swp->sw_piip++; 1100 1101 /* 1102 * perform the I/O 1103 */ 1104 VOP_STRATEGY(bp); 1105 1106 /* 1107 * wait for the sync I/O to complete 1108 */ 1109 while ((bp->b_flags & B_DONE) == 0) { 1110 tsleep((caddr_t)bp, PVM, "swread", 0); 1111 } 1112 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1113 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1114 1115 --swp->sw_piip; 1116 if (swp->sw_piip == 0) 1117 wakeup((caddr_t) swp); 1118 1119 /* 1120 * relpbuf does this, but we maintain our own buffer 1121 * list also... 1122 */ 1123 if (bp->b_vp) 1124 brelvp(bp); 1125 1126 splx(s); 1127 --swb[reqpage]->swb_locked; 1128 1129 /* 1130 * remove the mapping for kernel virtual 1131 */ 1132 pmap_qremove( kva, count); 1133 1134 if (spc) { 1135 /* 1136 * if we have used an spc, we need to free it. 1137 */ 1138 if( bp->b_rcred != NOCRED) 1139 crfree(bp->b_rcred); 1140 if( bp->b_wcred != NOCRED) 1141 crfree(bp->b_wcred); 1142 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1143 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1144 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1145 wakeup((caddr_t)&swap_pager_free); 1146 } 1147 } else { 1148 /* 1149 * release the physical I/O buffer 1150 */ 1151 relpbuf(bp); 1152 /* 1153 * finish up input if everything is ok 1154 */ 1155 if( rv == VM_PAGER_OK) { 1156 for (i = 0; i < count; i++) { 1157 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1158 m[i]->flags |= PG_CLEAN; 1159 m[i]->flags &= ~PG_LAUNDRY; 1160 if (i != reqpage) { 1161 /* 1162 * whether or not to leave the page activated 1163 * is up in the air, but we should put the page 1164 * on a page queue somewhere. (it already is in 1165 * the object). 1166 * After some emperical results, it is best 1167 * to deactivate the readahead pages. 1168 */ 1169 vm_page_deactivate(m[i]); 1170 1171 /* 1172 * just in case someone was asking for this 1173 * page we now tell them that it is ok to use 1174 */ 1175 m[i]->flags &= ~PG_FAKE; 1176 PAGE_WAKEUP(m[i]); 1177 } 1178 } 1179 if( swap_pager_full) { 1180 _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE); 1181 } 1182 } else { 1183 swap_pager_ridpages(m, count, reqpage); 1184 } 1185 } 1186 return(rv); 1187 } 1188 1189 int 1190 swap_pager_output(swp, m, count, flags, rtvals) 1191 register sw_pager_t swp; 1192 vm_page_t *m; 1193 int count; 1194 int flags; 1195 int *rtvals; 1196 { 1197 register struct buf *bp; 1198 sw_blk_t swb[count]; 1199 register int s; 1200 int i, j, ix; 1201 boolean_t rv; 1202 vm_offset_t kva, off, foff; 1203 swp_clean_t spc; 1204 vm_offset_t paging_offset; 1205 vm_object_t object; 1206 int reqaddr[count]; 1207 int failed; 1208 1209 /* 1210 if( count > 1) 1211 printf("off: 0x%x, count: %d\n", m[0]->offset, count); 1212 */ 1213 spc = NULL; 1214 1215 object = m[0]->object; 1216 paging_offset = object->paging_offset; 1217 1218 failed = 0; 1219 for(j=0;j<count;j++) { 1220 foff = m[j]->offset + paging_offset; 1221 ix = swap_pager_block_index(swp, foff); 1222 swb[j] = 0; 1223 if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 1224 rtvals[j] = VM_PAGER_FAIL; 1225 failed = 1; 1226 continue; 1227 } else { 1228 rtvals[j] = VM_PAGER_OK; 1229 } 1230 swb[j] = &swp->sw_blocks[ix]; 1231 ++swb[j]->swb_locked; 1232 if( failed) { 1233 rtvals[j] = VM_PAGER_FAIL; 1234 continue; 1235 } 1236 off = swap_pager_block_offset(swp, foff); 1237 reqaddr[j] = swb[j]->swb_block[off]; 1238 if( reqaddr[j] == SWB_EMPTY) { 1239 int blk; 1240 int tries; 1241 int ntoget; 1242 tries = 0; 1243 s = splbio(); 1244 1245 /* 1246 * if any other pages have been allocated in this block, we 1247 * only try to get one page. 1248 */ 1249 for (i = 0; i < SWB_NPAGES; i++) { 1250 if (swb[j]->swb_block[i] != SWB_EMPTY) 1251 break; 1252 } 1253 1254 1255 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1256 /* 1257 * this code is alittle conservative, but works 1258 * (the intent of this code is to allocate small chunks 1259 * for small objects) 1260 */ 1261 if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) { 1262 ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE; 1263 } 1264 1265 retrygetspace: 1266 if (!swap_pager_full && ntoget > 1 && 1267 swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { 1268 1269 for (i = 0; i < ntoget; i++) { 1270 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1271 swb[j]->swb_valid = 0; 1272 } 1273 1274 reqaddr[j] = swb[j]->swb_block[off]; 1275 } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), 1276 &swb[j]->swb_block[off])) { 1277 /* 1278 * if the allocation has failed, we try to reclaim space and 1279 * retry. 1280 */ 1281 if (++tries == 1) { 1282 swap_pager_reclaim(); 1283 goto retrygetspace; 1284 } 1285 rtvals[j] = VM_PAGER_AGAIN; 1286 failed = 1; 1287 } else { 1288 reqaddr[j] = swb[j]->swb_block[off]; 1289 swb[j]->swb_valid &= ~(1<<off); 1290 } 1291 splx(s); 1292 } 1293 } 1294 1295 /* 1296 * search forwards for the last contiguous page to transfer 1297 */ 1298 failed = 0; 1299 for (i = 0; i < count; i++) { 1300 if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) || 1301 (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 1302 (rtvals[i] != VM_PAGER_OK)) { 1303 failed = 1; 1304 if( rtvals[i] == VM_PAGER_OK) 1305 rtvals[i] = VM_PAGER_AGAIN; 1306 } 1307 } 1308 1309 for(i = 0; i < count; i++) { 1310 if( rtvals[i] != VM_PAGER_OK) { 1311 if( swb[i]) 1312 --swb[i]->swb_locked; 1313 } 1314 } 1315 1316 for(i = 0; i < count; i++) 1317 if( rtvals[i] != VM_PAGER_OK) 1318 break; 1319 1320 if( i == 0) { 1321 return VM_PAGER_AGAIN; 1322 } 1323 1324 count = i; 1325 for(i=0;i<count;i++) { 1326 if( reqaddr[i] == SWB_EMPTY) 1327 printf("I/O to empty block????\n"); 1328 } 1329 1330 /* 1331 */ 1332 1333 /* 1334 * For synchronous writes, we clean up 1335 * all completed async pageouts. 1336 */ 1337 if ((flags & B_ASYNC) == 0) { 1338 swap_pager_clean(); 1339 } 1340 1341 kva = 0; 1342 1343 /* 1344 * we allocate a new kva for transfers > 1 page 1345 * but for transfers == 1 page, the swap_pager_free list contains 1346 * entries that have pre-allocated kva's (for efficiency). 1347 * NOTE -- we do not use the physical buffer pool or the 1348 * preallocated associated kva's because of the potential for 1349 * deadlock. This is very subtile -- but deadlocks or resource 1350 * contention must be avoided on pageouts -- or your system will 1351 * sleep (forever) !!! 1352 */ 1353 if ( count > 1) { 1354 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1355 if( !kva) { 1356 for (i = 0; i < count; i++) { 1357 if( swb[i]) 1358 --swb[i]->swb_locked; 1359 rtvals[i] = VM_PAGER_AGAIN; 1360 } 1361 return VM_PAGER_AGAIN; 1362 } 1363 } 1364 1365 /* 1366 * get a swap pager clean data structure, block until we get it 1367 */ 1368 if (swap_pager_free.tqh_first == NULL) { 1369 /* 1370 if (flags & B_ASYNC) { 1371 for(i=0;i<count;i++) { 1372 rtvals[i] = VM_PAGER_AGAIN; 1373 if( swb[i]) 1374 --swb[i]->swb_locked; 1375 } 1376 return VM_PAGER_AGAIN; 1377 } 1378 */ 1379 1380 s = splbio(); 1381 if( curproc == pageproc) 1382 (void) swap_pager_clean(); 1383 else 1384 wakeup((caddr_t) &vm_pages_needed); 1385 while (swap_pager_free.tqh_first == NULL) { 1386 swap_pager_needflags |= SWAP_FREE_NEEDED; 1387 tsleep((caddr_t)&swap_pager_free, 1388 PVM, "swpfre", 0); 1389 if( curproc == pageproc) 1390 (void) swap_pager_clean(); 1391 else 1392 wakeup((caddr_t) &vm_pages_needed); 1393 } 1394 splx(s); 1395 } 1396 1397 spc = swap_pager_free.tqh_first; 1398 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1399 if( !kva) { 1400 kva = spc->spc_kva; 1401 spc->spc_altkva = 0; 1402 } else { 1403 spc->spc_altkva = kva; 1404 } 1405 1406 /* 1407 * map our page(s) into kva for I/O 1408 */ 1409 pmap_qenter(kva, m, count); 1410 1411 /* 1412 * get the base I/O offset into the swap file 1413 */ 1414 for(i=0;i<count;i++) { 1415 foff = m[i]->offset + paging_offset; 1416 off = swap_pager_block_offset(swp, foff); 1417 /* 1418 * if we are setting the valid bit anew, 1419 * then diminish the swap free space 1420 */ 1421 if( (swb[i]->swb_valid & (1 << off)) == 0) 1422 vm_swap_size -= btodb(PAGE_SIZE); 1423 1424 /* 1425 * set the valid bit 1426 */ 1427 swb[i]->swb_valid |= (1 << off); 1428 /* 1429 * and unlock the data structure 1430 */ 1431 --swb[i]->swb_locked; 1432 } 1433 1434 s = splbio(); 1435 /* 1436 * Get a swap buffer header and perform the IO 1437 */ 1438 bp = spc->spc_bp; 1439 bzero(bp, sizeof *bp); 1440 bp->b_spc = spc; 1441 1442 bp->b_flags = B_BUSY; 1443 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1444 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1445 crhold(bp->b_rcred); 1446 crhold(bp->b_wcred); 1447 bp->b_un.b_addr = (caddr_t) kva; 1448 bp->b_blkno = reqaddr[0]; 1449 bgetvp( swapdev_vp, bp); 1450 1451 bp->b_bcount = PAGE_SIZE*count; 1452 bp->b_bufsize = PAGE_SIZE*count; 1453 swapdev_vp->v_numoutput++; 1454 1455 /* 1456 * If this is an async write we set up additional buffer fields 1457 * and place a "cleaning" entry on the inuse queue. 1458 */ 1459 if ( flags & B_ASYNC ) { 1460 spc->spc_flags = 0; 1461 spc->spc_swp = swp; 1462 for(i=0;i<count;i++) 1463 spc->spc_m[i] = m[i]; 1464 spc->spc_count = count; 1465 /* 1466 * the completion routine for async writes 1467 */ 1468 bp->b_flags |= B_CALL; 1469 bp->b_iodone = swap_pager_iodone; 1470 bp->b_dirtyoff = 0; 1471 bp->b_dirtyend = bp->b_bcount; 1472 swp->sw_poip++; 1473 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1474 } else { 1475 swp->sw_poip++; 1476 bp->b_flags |= B_CALL; 1477 bp->b_iodone = swap_pager_iodone1; 1478 } 1479 /* 1480 * perform the I/O 1481 */ 1482 VOP_STRATEGY(bp); 1483 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { 1484 if ((bp->b_flags & B_DONE) == B_DONE) { 1485 swap_pager_clean(); 1486 } 1487 splx(s); 1488 for(i=0;i<count;i++) { 1489 rtvals[i] = VM_PAGER_PEND; 1490 } 1491 return VM_PAGER_PEND; 1492 } 1493 1494 /* 1495 * wait for the sync I/O to complete 1496 */ 1497 while ((bp->b_flags & B_DONE) == 0) { 1498 tsleep((caddr_t)bp, PVM, "swwrt", 0); 1499 } 1500 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1501 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1502 1503 --swp->sw_poip; 1504 if (swp->sw_poip == 0) 1505 wakeup((caddr_t) swp); 1506 1507 if (bp->b_vp) 1508 brelvp(bp); 1509 1510 splx(s); 1511 1512 /* 1513 * remove the mapping for kernel virtual 1514 */ 1515 pmap_qremove( kva, count); 1516 1517 /* 1518 * if we have written the page, then indicate that the page 1519 * is clean. 1520 */ 1521 if (rv == VM_PAGER_OK) { 1522 for(i=0;i<count;i++) { 1523 if( rtvals[i] == VM_PAGER_OK) { 1524 m[i]->flags |= PG_CLEAN; 1525 m[i]->flags &= ~PG_LAUNDRY; 1526 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1527 /* 1528 * optimization, if a page has been read during the 1529 * pageout process, we activate it. 1530 */ 1531 if ( (m[i]->flags & PG_ACTIVE) == 0 && 1532 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) 1533 vm_page_activate(m[i]); 1534 } 1535 } 1536 } else { 1537 for(i=0;i<count;i++) { 1538 rtvals[i] = rv; 1539 m[i]->flags |= PG_LAUNDRY; 1540 } 1541 } 1542 1543 if( spc->spc_altkva) 1544 kmem_free_wakeup(pager_map, kva, count * PAGE_SIZE); 1545 1546 if( bp->b_rcred != NOCRED) 1547 crfree(bp->b_rcred); 1548 if( bp->b_wcred != NOCRED) 1549 crfree(bp->b_wcred); 1550 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1551 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1552 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1553 wakeup((caddr_t)&swap_pager_free); 1554 } 1555 1556 return(rv); 1557 } 1558 1559 boolean_t 1560 swap_pager_clean() 1561 { 1562 register swp_clean_t spc, tspc; 1563 register int s; 1564 1565 tspc = NULL; 1566 if (swap_pager_done.tqh_first == NULL) 1567 return FALSE; 1568 for (;;) { 1569 s = splbio(); 1570 /* 1571 * Look up and removal from done list must be done 1572 * at splbio() to avoid conflicts with swap_pager_iodone. 1573 */ 1574 while (spc = swap_pager_done.tqh_first) { 1575 if( spc->spc_altkva) { 1576 pmap_qremove( spc->spc_altkva, spc->spc_count); 1577 kmem_free_wakeup(pager_map, spc->spc_altkva, spc->spc_count * PAGE_SIZE); 1578 spc->spc_altkva = 0; 1579 } else { 1580 pmap_qremove( spc->spc_kva, 1); 1581 } 1582 swap_pager_finish(spc); 1583 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1584 goto doclean; 1585 } 1586 1587 /* 1588 * No operations done, thats all we can do for now. 1589 */ 1590 1591 splx(s); 1592 break; 1593 1594 /* 1595 * The desired page was found to be busy earlier in 1596 * the scan but has since completed. 1597 */ 1598 doclean: 1599 if (tspc && tspc == spc) { 1600 tspc = NULL; 1601 } 1602 spc->spc_flags = 0; 1603 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1604 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1605 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1606 wakeup((caddr_t)&swap_pager_free); 1607 } 1608 ++cleandone; 1609 splx(s); 1610 } 1611 1612 return(tspc ? TRUE : FALSE); 1613 } 1614 1615 void 1616 swap_pager_finish(spc) 1617 register swp_clean_t spc; 1618 { 1619 vm_object_t object = spc->spc_m[0]->object; 1620 int i; 1621 1622 if ((object->paging_in_progress -= spc->spc_count) == 0) 1623 thread_wakeup((int) object); 1624 1625 /* 1626 * If no error mark as clean and inform the pmap system. 1627 * If error, mark as dirty so we will try again. 1628 * (XXX could get stuck doing this, should give up after awhile) 1629 */ 1630 if (spc->spc_flags & SPC_ERROR) { 1631 for(i=0;i<spc->spc_count;i++) { 1632 printf("swap_pager_finish: clean of page %x failed\n", 1633 VM_PAGE_TO_PHYS(spc->spc_m[i])); 1634 spc->spc_m[i]->flags |= PG_LAUNDRY; 1635 } 1636 } else { 1637 for(i=0;i<spc->spc_count;i++) { 1638 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1639 spc->spc_m[i]->flags |= PG_CLEAN; 1640 } 1641 } 1642 1643 1644 for(i=0;i<spc->spc_count;i++) { 1645 /* 1646 * we wakeup any processes that are waiting on 1647 * these pages. 1648 */ 1649 PAGE_WAKEUP(spc->spc_m[i]); 1650 } 1651 nswiodone -= spc->spc_count; 1652 1653 return; 1654 } 1655 1656 /* 1657 * swap_pager_iodone 1658 */ 1659 void 1660 swap_pager_iodone(bp) 1661 register struct buf *bp; 1662 { 1663 register swp_clean_t spc; 1664 int s; 1665 1666 s = splbio(); 1667 spc = (swp_clean_t) bp->b_spc; 1668 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1669 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1670 if (bp->b_flags & B_ERROR) { 1671 spc->spc_flags |= SPC_ERROR; 1672 printf("error %d blkno %d sz %d ", 1673 bp->b_error, bp->b_blkno, bp->b_bcount); 1674 } 1675 1676 /* 1677 if ((bp->b_flags & B_READ) == 0) 1678 vwakeup(bp); 1679 */ 1680 1681 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); 1682 if (bp->b_vp) { 1683 brelvp(bp); 1684 } 1685 if( bp->b_rcred != NOCRED) 1686 crfree(bp->b_rcred); 1687 if( bp->b_wcred != NOCRED) 1688 crfree(bp->b_wcred); 1689 1690 nswiodone += spc->spc_count; 1691 if (--spc->spc_swp->sw_poip == 0) { 1692 wakeup((caddr_t)spc->spc_swp); 1693 } 1694 1695 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1696 swap_pager_inuse.tqh_first == 0) { 1697 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1698 wakeup((caddr_t)&swap_pager_free); 1699 wakeup((caddr_t)&vm_pages_needed); 1700 } 1701 1702 if (vm_pageout_pages_needed) { 1703 wakeup((caddr_t)&vm_pageout_pages_needed); 1704 } 1705 1706 if ((swap_pager_inuse.tqh_first == NULL) || 1707 (cnt.v_free_count < cnt.v_free_min && 1708 nswiodone + cnt.v_free_count >= cnt.v_free_min) ) { 1709 wakeup((caddr_t)&vm_pages_needed); 1710 } 1711 splx(s); 1712 } 1713 1714 /* 1715 * return true if any swap control structures can be allocated 1716 */ 1717 int 1718 swap_pager_ready() { 1719 if( swap_pager_free.tqh_first) 1720 return 1; 1721 else 1722 return 0; 1723 } 1724