1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id$ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/proc.h> 55 #include <sys/buf.h> 56 #include <sys/vnode.h> 57 #include <sys/malloc.h> 58 59 #include <miscfs/specfs/specdev.h> 60 #include <sys/rlist.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_pager.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/swap_pager.h> 67 68 #ifndef NPENDINGIO 69 #define NPENDINGIO 16 70 #endif 71 72 extern int nswbuf; 73 int nswiodone; 74 extern int vm_pageout_rate_limit; 75 static int cleandone; 76 extern int hz; 77 int swap_pager_full; 78 extern vm_map_t pager_map; 79 extern int vm_pageout_pages_needed; 80 extern int vm_swap_size; 81 extern struct vnode *swapdev_vp; 82 83 #define MAX_PAGEOUT_CLUSTER 8 84 85 TAILQ_HEAD(swpclean, swpagerclean); 86 87 typedef struct swpagerclean *swp_clean_t; 88 89 struct swpagerclean { 90 TAILQ_ENTRY(swpagerclean) spc_list; 91 int spc_flags; 92 struct buf *spc_bp; 93 sw_pager_t spc_swp; 94 vm_offset_t spc_kva; 95 vm_offset_t spc_altkva; 96 int spc_count; 97 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 98 } swcleanlist [NPENDINGIO] ; 99 100 101 extern vm_map_t kernel_map; 102 103 /* spc_flags values */ 104 #define SPC_ERROR 0x01 105 106 #define SWB_EMPTY (-1) 107 108 void swap_pager_init(void); 109 vm_pager_t swap_pager_alloc(caddr_t, vm_size_t, vm_prot_t, vm_offset_t); 110 void swap_pager_dealloc(vm_pager_t); 111 boolean_t swap_pager_getpage(vm_pager_t, vm_page_t, boolean_t); 112 boolean_t swap_pager_putpage(vm_pager_t, vm_page_t, boolean_t); 113 boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t); 114 boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t); 115 int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int); 116 void swap_pager_iodone(struct buf *); 117 boolean_t swap_pager_clean(); 118 119 extern struct pagerops swappagerops; 120 121 struct swpclean swap_pager_done; /* list of compileted page cleans */ 122 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 123 struct swpclean swap_pager_free; /* list of free pager clean structs */ 124 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 125 struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 126 127 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 128 int swap_pager_needflags; 129 struct rlist *swapfrag; 130 131 struct pagerlst *swp_qs[]={ 132 &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 133 }; 134 135 int swap_pager_putmulti(); 136 137 struct pagerops swappagerops = { 138 swap_pager_init, 139 swap_pager_alloc, 140 swap_pager_dealloc, 141 swap_pager_getpage, 142 swap_pager_getmulti, 143 swap_pager_putpage, 144 swap_pager_putmulti, 145 swap_pager_haspage 146 }; 147 148 extern int nswbuf; 149 150 int npendingio = NPENDINGIO; 151 int pendingiowait; 152 int require_swap_init; 153 void swap_pager_finish(); 154 int dmmin, dmmax; 155 extern int vm_page_count; 156 157 struct buf * getpbuf() ; 158 void relpbuf(struct buf *bp) ; 159 160 static inline void swapsizecheck() { 161 if( vm_swap_size < 128*btodb(PAGE_SIZE)) { 162 if( swap_pager_full) 163 printf("swap_pager: out of space\n"); 164 swap_pager_full = 1; 165 } else if( vm_swap_size > 192*btodb(PAGE_SIZE)) 166 swap_pager_full = 0; 167 } 168 169 void 170 swap_pager_init() 171 { 172 extern int dmmin, dmmax; 173 174 dfltpagerops = &swappagerops; 175 176 TAILQ_INIT(&swap_pager_list); 177 TAILQ_INIT(&swap_pager_un_list); 178 179 /* 180 * Initialize clean lists 181 */ 182 TAILQ_INIT(&swap_pager_inuse); 183 TAILQ_INIT(&swap_pager_done); 184 TAILQ_INIT(&swap_pager_free); 185 186 require_swap_init = 1; 187 188 /* 189 * Calculate the swap allocation constants. 190 */ 191 192 dmmin = CLBYTES/DEV_BSIZE; 193 dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2; 194 195 } 196 197 /* 198 * Allocate a pager structure and associated resources. 199 * Note that if we are called from the pageout daemon (handle == NULL) 200 * we should not wait for memory as it could resulting in deadlock. 201 */ 202 vm_pager_t 203 swap_pager_alloc(handle, size, prot, offset) 204 caddr_t handle; 205 register vm_size_t size; 206 vm_prot_t prot; 207 vm_offset_t offset; 208 { 209 register vm_pager_t pager; 210 register sw_pager_t swp; 211 int waitok; 212 int i,j; 213 214 if (require_swap_init) { 215 swp_clean_t spc; 216 struct buf *bp; 217 /* 218 * kva's are allocated here so that we dont need to keep 219 * doing kmem_alloc pageables at runtime 220 */ 221 for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { 222 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE); 223 if (!spc->spc_kva) { 224 break; 225 } 226 spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); 227 if (!spc->spc_bp) { 228 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 229 break; 230 } 231 spc->spc_flags = 0; 232 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 233 } 234 require_swap_init = 0; 235 if( size == 0) 236 return(NULL); 237 } 238 239 /* 240 * If this is a "named" anonymous region, look it up and 241 * return the appropriate pager if it exists. 242 */ 243 if (handle) { 244 pager = vm_pager_lookup(&swap_pager_list, handle); 245 if (pager != NULL) { 246 /* 247 * Use vm_object_lookup to gain a reference 248 * to the object and also to remove from the 249 * object cache. 250 */ 251 if (vm_object_lookup(pager) == NULL) 252 panic("swap_pager_alloc: bad object"); 253 return(pager); 254 } 255 } 256 257 if (swap_pager_full) { 258 return(NULL); 259 } 260 261 /* 262 * Pager doesn't exist, allocate swap management resources 263 * and initialize. 264 */ 265 waitok = handle ? M_WAITOK : M_NOWAIT; 266 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 267 if (pager == NULL) 268 return(NULL); 269 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 270 if (swp == NULL) { 271 free((caddr_t)pager, M_VMPAGER); 272 return(NULL); 273 } 274 size = round_page(size); 275 swp->sw_osize = size; 276 swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE); 277 swp->sw_blocks = (sw_blk_t) 278 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 279 M_VMPGDATA, waitok); 280 if (swp->sw_blocks == NULL) { 281 free((caddr_t)swp, M_VMPGDATA); 282 free((caddr_t)pager, M_VMPAGER); 283 return(NULL); 284 } 285 286 for (i = 0; i < swp->sw_nblocks; i++) { 287 swp->sw_blocks[i].swb_valid = 0; 288 swp->sw_blocks[i].swb_locked = 0; 289 for (j = 0; j < SWB_NPAGES; j++) 290 swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 291 } 292 293 swp->sw_poip = 0; 294 if (handle) { 295 vm_object_t object; 296 297 swp->sw_flags = SW_NAMED; 298 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 299 /* 300 * Consistant with other pagers: return with object 301 * referenced. Can't do this with handle == NULL 302 * since it might be the pageout daemon calling. 303 */ 304 object = vm_object_allocate(size); 305 vm_object_enter(object, pager); 306 vm_object_setpager(object, pager, 0, FALSE); 307 } else { 308 swp->sw_flags = 0; 309 TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 310 } 311 pager->pg_handle = handle; 312 pager->pg_ops = &swappagerops; 313 pager->pg_type = PG_SWAP; 314 pager->pg_data = (caddr_t)swp; 315 316 return(pager); 317 } 318 319 /* 320 * returns disk block associated with pager and offset 321 * additionally, as a side effect returns a flag indicating 322 * if the block has been written 323 */ 324 325 static int * 326 swap_pager_diskaddr(swp, offset, valid) 327 sw_pager_t swp; 328 vm_offset_t offset; 329 int *valid; 330 { 331 register sw_blk_t swb; 332 int ix; 333 334 if (valid) 335 *valid = 0; 336 ix = offset / (SWB_NPAGES*PAGE_SIZE); 337 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 338 return(FALSE); 339 } 340 swb = &swp->sw_blocks[ix]; 341 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 342 if (valid) 343 *valid = swb->swb_valid & (1<<ix); 344 return &swb->swb_block[ix]; 345 } 346 347 /* 348 * Utility routine to set the valid (written) bit for 349 * a block associated with a pager and offset 350 */ 351 static void 352 swap_pager_setvalid(swp, offset, valid) 353 sw_pager_t swp; 354 vm_offset_t offset; 355 int valid; 356 { 357 register sw_blk_t swb; 358 int ix; 359 360 ix = offset / (SWB_NPAGES*PAGE_SIZE); 361 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 362 return; 363 364 swb = &swp->sw_blocks[ix]; 365 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 366 if (valid) 367 swb->swb_valid |= (1 << ix); 368 else 369 swb->swb_valid &= ~(1 << ix); 370 return; 371 } 372 373 /* 374 * this routine allocates swap space with a fragmentation 375 * minimization policy. 376 */ 377 int 378 swap_pager_getswapspace( unsigned amount, unsigned *rtval) { 379 unsigned tmpalloc; 380 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 381 if( amount < nblocksfrag) { 382 if( rlist_alloc(&swapfrag, amount, rtval)) 383 return 1; 384 if( !rlist_alloc(&swapmap, nblocksfrag, &tmpalloc)) 385 return 0; 386 rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1); 387 *rtval = tmpalloc; 388 return 1; 389 } 390 if( !rlist_alloc(&swapmap, amount, rtval)) 391 return 0; 392 else 393 return 1; 394 } 395 396 /* 397 * this routine frees swap space with a fragmentation 398 * minimization policy. 399 */ 400 void 401 swap_pager_freeswapspace( unsigned from, unsigned to) { 402 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 403 unsigned tmpalloc; 404 if( ((to + 1) - from) >= nblocksfrag) { 405 while( (from + nblocksfrag) <= to + 1) { 406 rlist_free(&swapmap, from, from + nblocksfrag - 1); 407 from += nblocksfrag; 408 } 409 } 410 if( from >= to) 411 return; 412 rlist_free(&swapfrag, from, to); 413 while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) { 414 rlist_free(&swapmap, tmpalloc, tmpalloc + nblocksfrag-1); 415 } 416 } 417 /* 418 * this routine frees swap blocks from a specified pager 419 */ 420 void 421 _swap_pager_freespace(swp, start, size) 422 sw_pager_t swp; 423 vm_offset_t start; 424 vm_offset_t size; 425 { 426 vm_offset_t i; 427 int s; 428 429 s = splbio(); 430 for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) { 431 int valid; 432 int *addr = swap_pager_diskaddr(swp, i, &valid); 433 if (addr && *addr != SWB_EMPTY) { 434 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 435 if( valid) { 436 vm_swap_size += btodb(PAGE_SIZE); 437 swap_pager_setvalid(swp, i, 0); 438 } 439 *addr = SWB_EMPTY; 440 } 441 } 442 swapsizecheck(); 443 splx(s); 444 } 445 446 void 447 swap_pager_freespace(pager, start, size) 448 vm_pager_t pager; 449 vm_offset_t start; 450 vm_offset_t size; 451 { 452 _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 453 } 454 455 /* 456 * swap_pager_reclaim frees up over-allocated space from all pagers 457 * this eliminates internal fragmentation due to allocation of space 458 * for segments that are never swapped to. It has been written so that 459 * it does not block until the rlist_free operation occurs; it keeps 460 * the queues consistant. 461 */ 462 463 /* 464 * Maximum number of blocks (pages) to reclaim per pass 465 */ 466 #define MAXRECLAIM 256 467 468 void 469 swap_pager_reclaim() 470 { 471 vm_pager_t p; 472 sw_pager_t swp; 473 int i, j, k; 474 int s; 475 int reclaimcount; 476 static int reclaims[MAXRECLAIM]; 477 static int in_reclaim; 478 479 /* 480 * allow only one process to be in the swap_pager_reclaim subroutine 481 */ 482 s = splbio(); 483 if (in_reclaim) { 484 tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 485 splx(s); 486 return; 487 } 488 in_reclaim = 1; 489 reclaimcount = 0; 490 491 /* for each pager queue */ 492 for (k = 0; swp_qs[k]; k++) { 493 494 p = swp_qs[k]->tqh_first; 495 while (p && (reclaimcount < MAXRECLAIM)) { 496 497 /* 498 * see if any blocks associated with a pager has been 499 * allocated but not used (written) 500 */ 501 swp = (sw_pager_t) p->pg_data; 502 for (i = 0; i < swp->sw_nblocks; i++) { 503 sw_blk_t swb = &swp->sw_blocks[i]; 504 if( swb->swb_locked) 505 continue; 506 for (j = 0; j < SWB_NPAGES; j++) { 507 if (swb->swb_block[j] != SWB_EMPTY && 508 (swb->swb_valid & (1 << j)) == 0) { 509 reclaims[reclaimcount++] = swb->swb_block[j]; 510 swb->swb_block[j] = SWB_EMPTY; 511 if (reclaimcount >= MAXRECLAIM) 512 goto rfinished; 513 } 514 } 515 } 516 p = p->pg_list.tqe_next; 517 } 518 } 519 520 rfinished: 521 522 /* 523 * free the blocks that have been added to the reclaim list 524 */ 525 for (i = 0; i < reclaimcount; i++) { 526 swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1); 527 swapsizecheck(); 528 wakeup((caddr_t) &in_reclaim); 529 } 530 531 splx(s); 532 in_reclaim = 0; 533 wakeup((caddr_t) &in_reclaim); 534 } 535 536 537 /* 538 * swap_pager_copy copies blocks from one pager to another and 539 * destroys the source pager 540 */ 541 542 void 543 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 544 vm_pager_t srcpager; 545 vm_offset_t srcoffset; 546 vm_pager_t dstpager; 547 vm_offset_t dstoffset; 548 vm_offset_t offset; 549 { 550 sw_pager_t srcswp, dstswp; 551 vm_offset_t i; 552 int s; 553 554 srcswp = (sw_pager_t) srcpager->pg_data; 555 dstswp = (sw_pager_t) dstpager->pg_data; 556 557 /* 558 * remove the source pager from the swap_pager internal queue 559 */ 560 s = splbio(); 561 if (srcswp->sw_flags & SW_NAMED) { 562 TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 563 srcswp->sw_flags &= ~SW_NAMED; 564 } else { 565 TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 566 } 567 568 while (srcswp->sw_poip) { 569 tsleep((caddr_t)srcswp, PVM, "spgout", 0); 570 } 571 splx(s); 572 573 /* 574 * clean all of the pages that are currently active and finished 575 */ 576 (void) swap_pager_clean(); 577 578 s = splbio(); 579 /* 580 * clear source block before destination object 581 * (release allocated space) 582 */ 583 for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { 584 int valid; 585 int *addr = swap_pager_diskaddr(srcswp, i, &valid); 586 if (addr && *addr != SWB_EMPTY) { 587 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 588 if( valid) 589 vm_swap_size += btodb(PAGE_SIZE); 590 swapsizecheck(); 591 *addr = SWB_EMPTY; 592 } 593 } 594 /* 595 * transfer source to destination 596 */ 597 for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 598 int srcvalid, dstvalid; 599 int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 600 &srcvalid); 601 int *dstaddrp; 602 /* 603 * see if the source has space allocated 604 */ 605 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 606 /* 607 * if the source is valid and the dest has no space, then 608 * copy the allocation from the srouce to the dest. 609 */ 610 if (srcvalid) { 611 dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); 612 /* 613 * if the dest already has a valid block, deallocate the 614 * source block without copying. 615 */ 616 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 617 swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1); 618 *dstaddrp = SWB_EMPTY; 619 } 620 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 621 *dstaddrp = *srcaddrp; 622 *srcaddrp = SWB_EMPTY; 623 swap_pager_setvalid(dstswp, i + dstoffset, 1); 624 vm_swap_size -= btodb(PAGE_SIZE); 625 } 626 } 627 /* 628 * if the source is not empty at this point, then deallocate the space. 629 */ 630 if (*srcaddrp != SWB_EMPTY) { 631 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 632 if( srcvalid) 633 vm_swap_size += btodb(PAGE_SIZE); 634 *srcaddrp = SWB_EMPTY; 635 } 636 } 637 } 638 639 /* 640 * deallocate the rest of the source object 641 */ 642 for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { 643 int valid; 644 int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); 645 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 646 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 647 if( valid) 648 vm_swap_size += btodb(PAGE_SIZE); 649 *srcaddrp = SWB_EMPTY; 650 } 651 } 652 653 swapsizecheck(); 654 splx(s); 655 656 free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); 657 srcswp->sw_blocks = 0; 658 free((caddr_t)srcswp, M_VMPGDATA); 659 srcpager->pg_data = 0; 660 free((caddr_t)srcpager, M_VMPAGER); 661 662 return; 663 } 664 665 666 void 667 swap_pager_dealloc(pager) 668 vm_pager_t pager; 669 { 670 register int i,j; 671 register sw_blk_t bp; 672 register sw_pager_t swp; 673 int s; 674 675 /* 676 * Remove from list right away so lookups will fail if we 677 * block for pageout completion. 678 */ 679 s = splbio(); 680 swp = (sw_pager_t) pager->pg_data; 681 if (swp->sw_flags & SW_NAMED) { 682 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 683 swp->sw_flags &= ~SW_NAMED; 684 } else { 685 TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 686 } 687 /* 688 * Wait for all pageouts to finish and remove 689 * all entries from cleaning list. 690 */ 691 692 while (swp->sw_poip) { 693 tsleep((caddr_t)swp, PVM, "swpout", 0); 694 } 695 splx(s); 696 697 698 (void) swap_pager_clean(); 699 700 /* 701 * Free left over swap blocks 702 */ 703 s = splbio(); 704 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 705 for (j = 0; j < SWB_NPAGES; j++) 706 if (bp->swb_block[j] != SWB_EMPTY) { 707 swap_pager_freeswapspace((unsigned)bp->swb_block[j], 708 (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1); 709 if( bp->swb_valid & (1<<j)) 710 vm_swap_size += btodb(PAGE_SIZE); 711 bp->swb_block[j] = SWB_EMPTY; 712 } 713 } 714 splx(s); 715 swapsizecheck(); 716 717 /* 718 * Free swap management resources 719 */ 720 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 721 swp->sw_blocks = 0; 722 free((caddr_t)swp, M_VMPGDATA); 723 pager->pg_data = 0; 724 free((caddr_t)pager, M_VMPAGER); 725 } 726 727 /* 728 * swap_pager_getmulti can get multiple pages. 729 */ 730 int 731 swap_pager_getmulti(pager, m, count, reqpage, sync) 732 vm_pager_t pager; 733 vm_page_t *m; 734 int count; 735 int reqpage; 736 boolean_t sync; 737 { 738 if( reqpage >= count) 739 panic("swap_pager_getmulti: reqpage >= count\n"); 740 return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 741 } 742 743 /* 744 * swap_pager_getpage gets individual pages 745 */ 746 int 747 swap_pager_getpage(pager, m, sync) 748 vm_pager_t pager; 749 vm_page_t m; 750 boolean_t sync; 751 { 752 vm_page_t marray[1]; 753 754 marray[0] = m; 755 return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); 756 } 757 758 int 759 swap_pager_putmulti(pager, m, c, sync, rtvals) 760 vm_pager_t pager; 761 vm_page_t *m; 762 int c; 763 boolean_t sync; 764 int *rtvals; 765 { 766 int flags; 767 768 if (pager == NULL) { 769 (void) swap_pager_clean(); 770 return VM_PAGER_OK; 771 } 772 773 flags = B_WRITE; 774 if (!sync) 775 flags |= B_ASYNC; 776 777 return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); 778 } 779 780 /* 781 * swap_pager_putpage writes individual pages 782 */ 783 int 784 swap_pager_putpage(pager, m, sync) 785 vm_pager_t pager; 786 vm_page_t m; 787 boolean_t sync; 788 { 789 int flags; 790 vm_page_t marray[1]; 791 int rtvals[1]; 792 793 794 if (pager == NULL) { 795 (void) swap_pager_clean(); 796 return VM_PAGER_OK; 797 } 798 799 marray[0] = m; 800 flags = B_WRITE; 801 if (!sync) 802 flags |= B_ASYNC; 803 804 swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); 805 806 return rtvals[0]; 807 } 808 809 static inline int 810 const swap_pager_block_index(swp, offset) 811 sw_pager_t swp; 812 vm_offset_t offset; 813 { 814 return (offset / (SWB_NPAGES*PAGE_SIZE)); 815 } 816 817 static inline int 818 const swap_pager_block_offset(swp, offset) 819 sw_pager_t swp; 820 vm_offset_t offset; 821 { 822 return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE); 823 } 824 825 /* 826 * _swap_pager_haspage returns TRUE if the pager has data that has 827 * been written out. 828 */ 829 static boolean_t 830 _swap_pager_haspage(swp, offset) 831 sw_pager_t swp; 832 vm_offset_t offset; 833 { 834 register sw_blk_t swb; 835 int ix; 836 837 ix = offset / (SWB_NPAGES*PAGE_SIZE); 838 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 839 return(FALSE); 840 } 841 swb = &swp->sw_blocks[ix]; 842 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 843 if (swb->swb_block[ix] != SWB_EMPTY) { 844 if (swb->swb_valid & (1 << ix)) 845 return TRUE; 846 } 847 848 return(FALSE); 849 } 850 851 /* 852 * swap_pager_haspage is the externally accessible version of 853 * _swap_pager_haspage above. this routine takes a vm_pager_t 854 * for an argument instead of sw_pager_t. 855 */ 856 boolean_t 857 swap_pager_haspage(pager, offset) 858 vm_pager_t pager; 859 vm_offset_t offset; 860 { 861 return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 862 } 863 864 /* 865 * swap_pager_freepage is a convienience routine that clears the busy 866 * bit and deallocates a page. 867 */ 868 static void 869 swap_pager_freepage(m) 870 vm_page_t m; 871 { 872 PAGE_WAKEUP(m); 873 vm_page_free(m); 874 } 875 876 /* 877 * swap_pager_ridpages is a convienience routine that deallocates all 878 * but the required page. this is usually used in error returns that 879 * need to invalidate the "extra" readahead pages. 880 */ 881 static void 882 swap_pager_ridpages(m, count, reqpage) 883 vm_page_t *m; 884 int count; 885 int reqpage; 886 { 887 int i; 888 for (i = 0; i < count; i++) 889 if (i != reqpage) 890 swap_pager_freepage(m[i]); 891 } 892 893 int swapwritecount=0; 894 895 /* 896 * swap_pager_iodone1 is the completion routine for both reads and async writes 897 */ 898 void 899 swap_pager_iodone1(bp) 900 struct buf *bp; 901 { 902 bp->b_flags |= B_DONE; 903 bp->b_flags &= ~B_ASYNC; 904 wakeup((caddr_t)bp); 905 /* 906 if ((bp->b_flags & B_READ) == 0) 907 vwakeup(bp); 908 */ 909 } 910 911 912 int 913 swap_pager_input(swp, m, count, reqpage) 914 register sw_pager_t swp; 915 vm_page_t *m; 916 int count, reqpage; 917 { 918 register struct buf *bp; 919 sw_blk_t swb[count]; 920 register int s; 921 int i; 922 boolean_t rv; 923 vm_offset_t kva, off[count]; 924 swp_clean_t spc; 925 vm_offset_t paging_offset; 926 vm_object_t object; 927 int reqaddr[count]; 928 929 int first, last; 930 int failed; 931 int reqdskregion; 932 933 object = m[reqpage]->object; 934 paging_offset = object->paging_offset; 935 /* 936 * First determine if the page exists in the pager if this is 937 * a sync read. This quickly handles cases where we are 938 * following shadow chains looking for the top level object 939 * with the page. 940 */ 941 if (swp->sw_blocks == NULL) { 942 swap_pager_ridpages(m, count, reqpage); 943 return(VM_PAGER_FAIL); 944 } 945 946 for(i = 0; i < count; i++) { 947 vm_offset_t foff = m[i]->offset + paging_offset; 948 int ix = swap_pager_block_index(swp, foff); 949 if (ix >= swp->sw_nblocks) { 950 int j; 951 if( i <= reqpage) { 952 swap_pager_ridpages(m, count, reqpage); 953 return(VM_PAGER_FAIL); 954 } 955 for(j = i; j < count; j++) { 956 swap_pager_freepage(m[j]); 957 } 958 count = i; 959 break; 960 } 961 962 swb[i] = &swp->sw_blocks[ix]; 963 off[i] = swap_pager_block_offset(swp, foff); 964 reqaddr[i] = swb[i]->swb_block[off[i]]; 965 } 966 967 /* make sure that our required input request is existant */ 968 969 if (reqaddr[reqpage] == SWB_EMPTY || 970 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 971 swap_pager_ridpages(m, count, reqpage); 972 return(VM_PAGER_FAIL); 973 } 974 975 976 reqdskregion = reqaddr[reqpage] / dmmax; 977 978 /* 979 * search backwards for the first contiguous page to transfer 980 */ 981 failed = 0; 982 first = 0; 983 for (i = reqpage - 1; i >= 0; --i) { 984 if ( failed || (reqaddr[i] == SWB_EMPTY) || 985 (swb[i]->swb_valid & (1 << off[i])) == 0 || 986 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 987 ((reqaddr[i] / dmmax) != reqdskregion)) { 988 failed = 1; 989 swap_pager_freepage(m[i]); 990 if (first == 0) 991 first = i + 1; 992 } 993 } 994 /* 995 * search forwards for the last contiguous page to transfer 996 */ 997 failed = 0; 998 last = count; 999 for (i = reqpage + 1; i < count; i++) { 1000 if ( failed || (reqaddr[i] == SWB_EMPTY) || 1001 (swb[i]->swb_valid & (1 << off[i])) == 0 || 1002 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 1003 ((reqaddr[i] / dmmax) != reqdskregion)) { 1004 failed = 1; 1005 swap_pager_freepage(m[i]); 1006 if (last == count) 1007 last = i; 1008 } 1009 } 1010 1011 count = last; 1012 if (first != 0) { 1013 for (i = first; i < count; i++) { 1014 m[i-first] = m[i]; 1015 reqaddr[i-first] = reqaddr[i]; 1016 off[i-first] = off[i]; 1017 } 1018 count -= first; 1019 reqpage -= first; 1020 } 1021 1022 ++swb[reqpage]->swb_locked; 1023 1024 /* 1025 * at this point: 1026 * "m" is a pointer to the array of vm_page_t for paging I/O 1027 * "count" is the number of vm_page_t entries represented by "m" 1028 * "object" is the vm_object_t for I/O 1029 * "reqpage" is the index into "m" for the page actually faulted 1030 */ 1031 1032 spc = NULL; /* we might not use an spc data structure */ 1033 kva = 0; 1034 1035 /* 1036 * we allocate a new kva for transfers > 1 page 1037 * but for transfers == 1 page, the swap_pager_free list contains 1038 * entries that have pre-allocated kva's (for efficiency). 1039 */ 1040 if (count > 1) { 1041 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1042 } 1043 1044 1045 if (!kva) { 1046 /* 1047 * if a kva has not been allocated, we can only do a one page transfer, 1048 * so we free the other pages that might have been allocated by 1049 * vm_fault. 1050 */ 1051 swap_pager_ridpages(m, count, reqpage); 1052 m[0] = m[reqpage]; 1053 reqaddr[0] = reqaddr[reqpage]; 1054 1055 count = 1; 1056 reqpage = 0; 1057 /* 1058 * get a swap pager clean data structure, block until we get it 1059 */ 1060 if (swap_pager_free.tqh_first == NULL) { 1061 s = splbio(); 1062 if( curproc == pageproc) 1063 (void) swap_pager_clean(); 1064 else 1065 wakeup((caddr_t) &vm_pages_needed); 1066 while (swap_pager_free.tqh_first == NULL) { 1067 swap_pager_needflags |= SWAP_FREE_NEEDED; 1068 tsleep((caddr_t)&swap_pager_free, 1069 PVM, "swpfre", 0); 1070 if( curproc == pageproc) 1071 (void) swap_pager_clean(); 1072 else 1073 wakeup((caddr_t) &vm_pages_needed); 1074 } 1075 splx(s); 1076 } 1077 spc = swap_pager_free.tqh_first; 1078 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1079 kva = spc->spc_kva; 1080 } 1081 1082 1083 /* 1084 * map our page(s) into kva for input 1085 */ 1086 for (i = 0; i < count; i++) { 1087 pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); 1088 } 1089 pmap_update(); 1090 1091 1092 /* 1093 * Get a swap buffer header and perform the IO 1094 */ 1095 if( spc) { 1096 bp = spc->spc_bp; 1097 bzero(bp, sizeof *bp); 1098 bp->b_spc = spc; 1099 } else { 1100 bp = getpbuf(); 1101 } 1102 1103 s = splbio(); 1104 bp->b_flags = B_BUSY | B_READ | B_CALL; 1105 bp->b_iodone = swap_pager_iodone1; 1106 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1107 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1108 crhold(bp->b_rcred); 1109 crhold(bp->b_wcred); 1110 bp->b_un.b_addr = (caddr_t) kva; 1111 bp->b_blkno = reqaddr[0]; 1112 bp->b_bcount = PAGE_SIZE*count; 1113 bp->b_bufsize = PAGE_SIZE*count; 1114 1115 /* 1116 VHOLD(swapdev_vp); 1117 bp->b_vp = swapdev_vp; 1118 if (swapdev_vp->v_type == VBLK) 1119 bp->b_dev = swapdev_vp->v_rdev; 1120 */ 1121 bgetvp( swapdev_vp, bp); 1122 1123 swp->sw_piip++; 1124 1125 /* 1126 * perform the I/O 1127 */ 1128 VOP_STRATEGY(bp); 1129 1130 /* 1131 * wait for the sync I/O to complete 1132 */ 1133 while ((bp->b_flags & B_DONE) == 0) { 1134 tsleep((caddr_t)bp, PVM, "swread", 0); 1135 } 1136 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1137 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1138 1139 --swp->sw_piip; 1140 if (swp->sw_piip == 0) 1141 wakeup((caddr_t) swp); 1142 1143 /* 1144 * relpbuf does this, but we maintain our own buffer 1145 * list also... 1146 */ 1147 if (bp->b_vp) 1148 brelvp(bp); 1149 1150 splx(s); 1151 --swb[reqpage]->swb_locked; 1152 1153 /* 1154 * remove the mapping for kernel virtual 1155 */ 1156 pmap_remove(vm_map_pmap(pager_map), kva, kva + count * PAGE_SIZE); 1157 1158 if (spc) { 1159 /* 1160 * if we have used an spc, we need to free it. 1161 */ 1162 if( bp->b_rcred != NOCRED) 1163 crfree(bp->b_rcred); 1164 if( bp->b_wcred != NOCRED) 1165 crfree(bp->b_wcred); 1166 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1167 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1168 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1169 wakeup((caddr_t)&swap_pager_free); 1170 } 1171 } else { 1172 /* 1173 * free the kernel virtual addresses 1174 */ 1175 kmem_free_wakeup(pager_map, kva, count * PAGE_SIZE); 1176 /* 1177 * release the physical I/O buffer 1178 */ 1179 relpbuf(bp); 1180 /* 1181 * finish up input if everything is ok 1182 */ 1183 if( rv == VM_PAGER_OK) { 1184 for (i = 0; i < count; i++) { 1185 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1186 m[i]->flags |= PG_CLEAN; 1187 m[i]->flags &= ~PG_LAUNDRY; 1188 if (i != reqpage) { 1189 /* 1190 * whether or not to leave the page activated 1191 * is up in the air, but we should put the page 1192 * on a page queue somewhere. (it already is in 1193 * the object). 1194 * After some emperical results, it is best 1195 * to deactivate the readahead pages. 1196 */ 1197 vm_page_deactivate(m[i]); 1198 1199 /* 1200 * just in case someone was asking for this 1201 * page we now tell them that it is ok to use 1202 */ 1203 m[i]->flags &= ~PG_FAKE; 1204 PAGE_WAKEUP(m[i]); 1205 } 1206 } 1207 if( swap_pager_full) { 1208 _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE); 1209 } 1210 } else { 1211 swap_pager_ridpages(m, count, reqpage); 1212 } 1213 } 1214 return(rv); 1215 } 1216 1217 int 1218 swap_pager_output(swp, m, count, flags, rtvals) 1219 register sw_pager_t swp; 1220 vm_page_t *m; 1221 int count; 1222 int flags; 1223 int *rtvals; 1224 { 1225 register struct buf *bp; 1226 sw_blk_t swb[count]; 1227 register int s; 1228 int i, j, ix; 1229 boolean_t rv; 1230 vm_offset_t kva, off, foff; 1231 swp_clean_t spc; 1232 vm_offset_t paging_offset; 1233 vm_object_t object; 1234 int reqaddr[count]; 1235 int failed; 1236 1237 /* 1238 if( count > 1) 1239 printf("off: 0x%x, count: %d\n", m[0]->offset, count); 1240 */ 1241 spc = NULL; 1242 1243 object = m[0]->object; 1244 paging_offset = object->paging_offset; 1245 1246 failed = 0; 1247 for(j=0;j<count;j++) { 1248 foff = m[j]->offset + paging_offset; 1249 ix = swap_pager_block_index(swp, foff); 1250 swb[j] = 0; 1251 if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 1252 rtvals[j] = VM_PAGER_FAIL; 1253 failed = 1; 1254 continue; 1255 } else { 1256 rtvals[j] = VM_PAGER_OK; 1257 } 1258 swb[j] = &swp->sw_blocks[ix]; 1259 ++swb[j]->swb_locked; 1260 if( failed) { 1261 rtvals[j] = VM_PAGER_FAIL; 1262 continue; 1263 } 1264 off = swap_pager_block_offset(swp, foff); 1265 reqaddr[j] = swb[j]->swb_block[off]; 1266 if( reqaddr[j] == SWB_EMPTY) { 1267 int blk; 1268 int tries; 1269 int ntoget; 1270 tries = 0; 1271 s = splbio(); 1272 1273 /* 1274 * if any other pages have been allocated in this block, we 1275 * only try to get one page. 1276 */ 1277 for (i = 0; i < SWB_NPAGES; i++) { 1278 if (swb[j]->swb_block[i] != SWB_EMPTY) 1279 break; 1280 } 1281 1282 1283 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1284 /* 1285 * this code is alittle conservative, but works 1286 * (the intent of this code is to allocate small chunks 1287 * for small objects) 1288 */ 1289 if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) { 1290 ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE; 1291 } 1292 1293 retrygetspace: 1294 if (!swap_pager_full && ntoget > 1 && 1295 swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { 1296 1297 for (i = 0; i < ntoget; i++) { 1298 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1299 swb[j]->swb_valid = 0; 1300 } 1301 1302 reqaddr[j] = swb[j]->swb_block[off]; 1303 } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), 1304 &swb[j]->swb_block[off])) { 1305 /* 1306 * if the allocation has failed, we try to reclaim space and 1307 * retry. 1308 */ 1309 if (++tries == 1) { 1310 swap_pager_reclaim(); 1311 goto retrygetspace; 1312 } 1313 rtvals[j] = VM_PAGER_AGAIN; 1314 failed = 1; 1315 } else { 1316 reqaddr[j] = swb[j]->swb_block[off]; 1317 swb[j]->swb_valid &= ~(1<<off); 1318 } 1319 splx(s); 1320 } 1321 } 1322 1323 /* 1324 * search forwards for the last contiguous page to transfer 1325 */ 1326 failed = 0; 1327 for (i = 0; i < count; i++) { 1328 if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) || 1329 (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 1330 (rtvals[i] != VM_PAGER_OK)) { 1331 failed = 1; 1332 if( rtvals[i] == VM_PAGER_OK) 1333 rtvals[i] = VM_PAGER_AGAIN; 1334 } 1335 } 1336 1337 for(i = 0; i < count; i++) { 1338 if( rtvals[i] != VM_PAGER_OK) { 1339 if( swb[i]) 1340 --swb[i]->swb_locked; 1341 } 1342 } 1343 1344 for(i = 0; i < count; i++) 1345 if( rtvals[i] != VM_PAGER_OK) 1346 break; 1347 1348 if( i == 0) { 1349 return VM_PAGER_AGAIN; 1350 } 1351 1352 count = i; 1353 for(i=0;i<count;i++) { 1354 if( reqaddr[i] == SWB_EMPTY) 1355 printf("I/O to empty block????\n"); 1356 } 1357 1358 /* 1359 */ 1360 1361 /* 1362 * For synchronous writes, we clean up 1363 * all completed async pageouts. 1364 */ 1365 if ((flags & B_ASYNC) == 0) { 1366 swap_pager_clean(); 1367 } 1368 1369 kva = 0; 1370 1371 /* 1372 * we allocate a new kva for transfers > 1 page 1373 * but for transfers == 1 page, the swap_pager_free list contains 1374 * entries that have pre-allocated kva's (for efficiency). 1375 */ 1376 if ( count > 1) { 1377 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1378 if( !kva) { 1379 for (i = 0; i < count; i++) { 1380 if( swb[i]) 1381 --swb[i]->swb_locked; 1382 rtvals[i] = VM_PAGER_AGAIN; 1383 } 1384 return VM_PAGER_AGAIN; 1385 } 1386 } 1387 1388 /* 1389 * get a swap pager clean data structure, block until we get it 1390 */ 1391 if (swap_pager_free.tqh_first == NULL) { 1392 /* 1393 if (flags & B_ASYNC) { 1394 for(i=0;i<count;i++) { 1395 rtvals[i] = VM_PAGER_AGAIN; 1396 if( swb[i]) 1397 --swb[i]->swb_locked; 1398 } 1399 return VM_PAGER_AGAIN; 1400 } 1401 */ 1402 1403 s = splbio(); 1404 if( curproc == pageproc) 1405 (void) swap_pager_clean(); 1406 else 1407 wakeup((caddr_t) &vm_pages_needed); 1408 while (swap_pager_free.tqh_first == NULL) { 1409 swap_pager_needflags |= SWAP_FREE_NEEDED; 1410 tsleep((caddr_t)&swap_pager_free, 1411 PVM, "swpfre", 0); 1412 if( curproc == pageproc) 1413 (void) swap_pager_clean(); 1414 else 1415 wakeup((caddr_t) &vm_pages_needed); 1416 } 1417 splx(s); 1418 } 1419 1420 spc = swap_pager_free.tqh_first; 1421 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1422 if( !kva) { 1423 kva = spc->spc_kva; 1424 spc->spc_altkva = 0; 1425 } else { 1426 spc->spc_altkva = kva; 1427 } 1428 1429 /* 1430 * map our page(s) into kva for I/O 1431 */ 1432 for (i = 0; i < count; i++) { 1433 pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); 1434 } 1435 pmap_update(); 1436 1437 /* 1438 * get the base I/O offset into the swap file 1439 */ 1440 for(i=0;i<count;i++) { 1441 foff = m[i]->offset + paging_offset; 1442 off = swap_pager_block_offset(swp, foff); 1443 /* 1444 * if we are setting the valid bit anew, 1445 * then diminish the swap free space 1446 */ 1447 if( (swb[i]->swb_valid & (1 << off)) == 0) 1448 vm_swap_size -= btodb(PAGE_SIZE); 1449 1450 /* 1451 * set the valid bit 1452 */ 1453 swb[i]->swb_valid |= (1 << off); 1454 /* 1455 * and unlock the data structure 1456 */ 1457 --swb[i]->swb_locked; 1458 } 1459 1460 s = splbio(); 1461 /* 1462 * Get a swap buffer header and perform the IO 1463 */ 1464 bp = spc->spc_bp; 1465 bzero(bp, sizeof *bp); 1466 bp->b_spc = spc; 1467 1468 bp->b_flags = B_BUSY; 1469 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1470 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1471 crhold(bp->b_rcred); 1472 crhold(bp->b_wcred); 1473 bp->b_un.b_addr = (caddr_t) kva; 1474 bp->b_blkno = reqaddr[0]; 1475 bgetvp( swapdev_vp, bp); 1476 /* 1477 VHOLD(swapdev_vp); 1478 bp->b_vp = swapdev_vp; 1479 if (swapdev_vp->v_type == VBLK) 1480 bp->b_dev = swapdev_vp->v_rdev; 1481 */ 1482 bp->b_bcount = PAGE_SIZE*count; 1483 bp->b_bufsize = PAGE_SIZE*count; 1484 swapdev_vp->v_numoutput++; 1485 1486 /* 1487 * If this is an async write we set up additional buffer fields 1488 * and place a "cleaning" entry on the inuse queue. 1489 */ 1490 if ( flags & B_ASYNC ) { 1491 spc->spc_flags = 0; 1492 spc->spc_swp = swp; 1493 for(i=0;i<count;i++) 1494 spc->spc_m[i] = m[i]; 1495 spc->spc_count = count; 1496 /* 1497 * the completion routine for async writes 1498 */ 1499 bp->b_flags |= B_CALL; 1500 bp->b_iodone = swap_pager_iodone; 1501 bp->b_dirtyoff = 0; 1502 bp->b_dirtyend = bp->b_bcount; 1503 swp->sw_poip++; 1504 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1505 } else { 1506 swp->sw_poip++; 1507 bp->b_flags |= B_CALL; 1508 bp->b_iodone = swap_pager_iodone1; 1509 } 1510 /* 1511 * perform the I/O 1512 */ 1513 VOP_STRATEGY(bp); 1514 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { 1515 if ((bp->b_flags & B_DONE) == B_DONE) { 1516 swap_pager_clean(); 1517 } 1518 splx(s); 1519 for(i=0;i<count;i++) { 1520 rtvals[i] = VM_PAGER_PEND; 1521 } 1522 return VM_PAGER_PEND; 1523 } 1524 1525 /* 1526 * wait for the sync I/O to complete 1527 */ 1528 while ((bp->b_flags & B_DONE) == 0) { 1529 tsleep((caddr_t)bp, PVM, "swwrt", 0); 1530 } 1531 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1532 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1533 1534 --swp->sw_poip; 1535 if (swp->sw_poip == 0) 1536 wakeup((caddr_t) swp); 1537 1538 if (bp->b_vp) 1539 brelvp(bp); 1540 1541 splx(s); 1542 1543 /* 1544 * remove the mapping for kernel virtual 1545 */ 1546 pmap_remove(vm_map_pmap(pager_map), kva, kva + count * PAGE_SIZE); 1547 1548 /* 1549 * if we have written the page, then indicate that the page 1550 * is clean. 1551 */ 1552 if (rv == VM_PAGER_OK) { 1553 for(i=0;i<count;i++) { 1554 if( rtvals[i] == VM_PAGER_OK) { 1555 m[i]->flags |= PG_CLEAN; 1556 m[i]->flags &= ~PG_LAUNDRY; 1557 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1558 /* 1559 * optimization, if a page has been read during the 1560 * pageout process, we activate it. 1561 */ 1562 if ( (m[i]->flags & PG_ACTIVE) == 0 && 1563 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) 1564 vm_page_activate(m[i]); 1565 } 1566 } 1567 } else { 1568 for(i=0;i<count;i++) { 1569 rtvals[i] = rv; 1570 m[i]->flags |= PG_LAUNDRY; 1571 } 1572 } 1573 1574 if( spc->spc_altkva) 1575 kmem_free_wakeup(pager_map, kva, count * PAGE_SIZE); 1576 1577 if( bp->b_rcred != NOCRED) 1578 crfree(bp->b_rcred); 1579 if( bp->b_wcred != NOCRED) 1580 crfree(bp->b_wcred); 1581 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1582 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1583 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1584 wakeup((caddr_t)&swap_pager_free); 1585 } 1586 1587 return(rv); 1588 } 1589 1590 boolean_t 1591 swap_pager_clean() 1592 { 1593 register swp_clean_t spc, tspc; 1594 register int s; 1595 1596 tspc = NULL; 1597 if (swap_pager_done.tqh_first == NULL) 1598 return FALSE; 1599 for (;;) { 1600 s = splbio(); 1601 /* 1602 * Look up and removal from done list must be done 1603 * at splbio() to avoid conflicts with swap_pager_iodone. 1604 */ 1605 while (spc = swap_pager_done.tqh_first) { 1606 if( spc->spc_altkva) { 1607 pmap_remove(vm_map_pmap(pager_map), spc->spc_altkva, spc->spc_altkva + spc->spc_count * PAGE_SIZE); 1608 kmem_free_wakeup(pager_map, spc->spc_altkva, spc->spc_count * PAGE_SIZE); 1609 spc->spc_altkva = 0; 1610 } else { 1611 pmap_remove(vm_map_pmap(pager_map), spc->spc_kva, spc->spc_kva + PAGE_SIZE); 1612 } 1613 swap_pager_finish(spc); 1614 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1615 goto doclean; 1616 } 1617 1618 /* 1619 * No operations done, thats all we can do for now. 1620 */ 1621 1622 splx(s); 1623 break; 1624 1625 /* 1626 * The desired page was found to be busy earlier in 1627 * the scan but has since completed. 1628 */ 1629 doclean: 1630 if (tspc && tspc == spc) { 1631 tspc = NULL; 1632 } 1633 spc->spc_flags = 0; 1634 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1635 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1636 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1637 wakeup((caddr_t)&swap_pager_free); 1638 } 1639 ++cleandone; 1640 splx(s); 1641 } 1642 1643 return(tspc ? TRUE : FALSE); 1644 } 1645 1646 void 1647 swap_pager_finish(spc) 1648 register swp_clean_t spc; 1649 { 1650 vm_object_t object = spc->spc_m[0]->object; 1651 int i; 1652 1653 if ((object->paging_in_progress -= spc->spc_count) == 0) 1654 thread_wakeup((int) object); 1655 1656 /* 1657 * If no error mark as clean and inform the pmap system. 1658 * If error, mark as dirty so we will try again. 1659 * (XXX could get stuck doing this, should give up after awhile) 1660 */ 1661 if (spc->spc_flags & SPC_ERROR) { 1662 for(i=0;i<spc->spc_count;i++) { 1663 printf("swap_pager_finish: clean of page %x failed\n", 1664 VM_PAGE_TO_PHYS(spc->spc_m[i])); 1665 spc->spc_m[i]->flags |= PG_LAUNDRY; 1666 } 1667 } else { 1668 for(i=0;i<spc->spc_count;i++) { 1669 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1670 spc->spc_m[i]->flags |= PG_CLEAN; 1671 } 1672 } 1673 1674 1675 for(i=0;i<spc->spc_count;i++) { 1676 /* 1677 * we wakeup any processes that are waiting on 1678 * these pages. 1679 */ 1680 PAGE_WAKEUP(spc->spc_m[i]); 1681 } 1682 nswiodone -= spc->spc_count; 1683 1684 return; 1685 } 1686 1687 /* 1688 * swap_pager_iodone 1689 */ 1690 void 1691 swap_pager_iodone(bp) 1692 register struct buf *bp; 1693 { 1694 register swp_clean_t spc; 1695 int s; 1696 1697 s = splbio(); 1698 spc = (swp_clean_t) bp->b_spc; 1699 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1700 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1701 if (bp->b_flags & B_ERROR) { 1702 spc->spc_flags |= SPC_ERROR; 1703 printf("error %d blkno %d sz %d ", 1704 bp->b_error, bp->b_blkno, bp->b_bcount); 1705 } 1706 1707 /* 1708 if ((bp->b_flags & B_READ) == 0) 1709 vwakeup(bp); 1710 */ 1711 1712 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); 1713 if (bp->b_vp) { 1714 brelvp(bp); 1715 } 1716 if( bp->b_rcred != NOCRED) 1717 crfree(bp->b_rcred); 1718 if( bp->b_wcred != NOCRED) 1719 crfree(bp->b_wcred); 1720 1721 nswiodone += spc->spc_count; 1722 if (--spc->spc_swp->sw_poip == 0) { 1723 wakeup((caddr_t)spc->spc_swp); 1724 } 1725 1726 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1727 swap_pager_inuse.tqh_first == 0) { 1728 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1729 wakeup((caddr_t)&swap_pager_free); 1730 wakeup((caddr_t)&vm_pages_needed); 1731 } 1732 1733 if (vm_pageout_pages_needed) { 1734 wakeup((caddr_t)&vm_pageout_pages_needed); 1735 } 1736 1737 if ((swap_pager_inuse.tqh_first == NULL) || 1738 (cnt.v_free_count < cnt.v_free_min && 1739 nswiodone + cnt.v_free_count >= cnt.v_free_min) ) { 1740 wakeup((caddr_t)&vm_pages_needed); 1741 } 1742 splx(s); 1743 } 1744 1745 int bswneeded; 1746 /* TAILQ_HEAD(swqueue, buf) bswlist; */ 1747 /* 1748 * allocate a physical buffer 1749 */ 1750 struct buf * 1751 getpbuf() { 1752 int s; 1753 struct buf *bp; 1754 1755 s = splbio(); 1756 /* get a bp from the swap buffer header pool */ 1757 while ((bp = bswlist.tqh_first) == NULL) { 1758 bswneeded = 1; 1759 tsleep((caddr_t)&bswneeded, PVM, "wswbuf", 0); 1760 } 1761 TAILQ_REMOVE(&bswlist, bp, b_freelist); 1762 1763 splx(s); 1764 1765 bzero(bp, sizeof *bp); 1766 bp->b_rcred = NOCRED; 1767 bp->b_wcred = NOCRED; 1768 return bp; 1769 } 1770 1771 /* 1772 * allocate a physical buffer, if one is available 1773 */ 1774 struct buf * 1775 trypbuf() { 1776 int s; 1777 struct buf *bp; 1778 1779 s = splbio(); 1780 if ((bp = bswlist.tqh_first) == NULL) { 1781 splx(s); 1782 return NULL; 1783 } 1784 TAILQ_REMOVE(&bswlist, bp, b_freelist); 1785 splx(s); 1786 1787 bzero(bp, sizeof *bp); 1788 bp->b_rcred = NOCRED; 1789 bp->b_wcred = NOCRED; 1790 return bp; 1791 } 1792 1793 /* 1794 * release a physical buffer 1795 */ 1796 void 1797 relpbuf(bp) 1798 struct buf *bp; 1799 { 1800 int s; 1801 1802 s = splbio(); 1803 1804 if (bp->b_rcred != NOCRED) { 1805 crfree(bp->b_rcred); 1806 bp->b_rcred = NOCRED; 1807 } 1808 if (bp->b_wcred != NOCRED) { 1809 crfree(bp->b_wcred); 1810 bp->b_wcred = NOCRED; 1811 } 1812 1813 if (bp->b_vp) 1814 brelvp(bp); 1815 1816 TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist); 1817 1818 if (bswneeded) { 1819 bswneeded = 0; 1820 wakeup((caddr_t)&bswlist); 1821 } 1822 splx(s); 1823 } 1824 1825 /* 1826 * return true if any swap control structures can be allocated 1827 */ 1828 int 1829 swap_pager_ready() { 1830 if( swap_pager_free.tqh_first) 1831 return 1; 1832 else 1833 return 0; 1834 } 1835