1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 */ 43 44 /* 45 * Quick hack to page to dedicated partition(s). 46 * TODO: 47 * Add multiprocessor locks 48 * Deal with async writes in a better fashion 49 */ 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/proc.h> 54 #include <sys/buf.h> 55 #include <sys/vnode.h> 56 #include <sys/malloc.h> 57 58 #include <miscfs/specfs/specdev.h> 59 #include <sys/rlist.h> 60 61 #include <vm/vm.h> 62 #include <vm/vm_pager.h> 63 #include <vm/vm_page.h> 64 #include <vm/vm_pageout.h> 65 #include <vm/swap_pager.h> 66 67 #ifndef NPENDINGIO 68 #define NPENDINGIO 16 69 #endif 70 71 extern int nswbuf; 72 int nswiodone; 73 extern int vm_pageout_rate_limit; 74 static int cleandone; 75 extern int hz; 76 int swap_pager_full; 77 extern vm_map_t pager_map; 78 extern int vm_pageout_pages_needed; 79 extern int vm_swap_size; 80 extern struct vnode *swapdev_vp; 81 82 #define MAX_PAGEOUT_CLUSTER 8 83 84 TAILQ_HEAD(swpclean, swpagerclean); 85 86 typedef struct swpagerclean *swp_clean_t; 87 88 struct swpagerclean { 89 TAILQ_ENTRY(swpagerclean) spc_list; 90 int spc_flags; 91 struct buf *spc_bp; 92 sw_pager_t spc_swp; 93 vm_offset_t spc_kva; 94 vm_offset_t spc_altkva; 95 int spc_count; 96 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 97 } swcleanlist [NPENDINGIO] ; 98 99 100 extern vm_map_t kernel_map; 101 102 /* spc_flags values */ 103 #define SPC_ERROR 0x01 104 105 #define SWB_EMPTY (-1) 106 107 void swap_pager_init(void); 108 vm_pager_t swap_pager_alloc(caddr_t, vm_size_t, vm_prot_t, vm_offset_t); 109 void swap_pager_dealloc(vm_pager_t); 110 boolean_t swap_pager_getpage(vm_pager_t, vm_page_t, boolean_t); 111 boolean_t swap_pager_putpage(vm_pager_t, vm_page_t, boolean_t); 112 boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t); 113 boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t); 114 int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int); 115 void swap_pager_iodone(struct buf *); 116 boolean_t swap_pager_clean(); 117 118 extern struct pagerops swappagerops; 119 120 struct swpclean swap_pager_done; /* list of compileted page cleans */ 121 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 122 struct swpclean swap_pager_free; /* list of free pager clean structs */ 123 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 124 struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 125 126 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 127 int swap_pager_needflags; 128 struct rlist *swapfrag; 129 130 struct pagerlst *swp_qs[]={ 131 &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 132 }; 133 134 int swap_pager_putmulti(); 135 136 struct pagerops swappagerops = { 137 swap_pager_init, 138 swap_pager_alloc, 139 swap_pager_dealloc, 140 swap_pager_getpage, 141 swap_pager_getmulti, 142 swap_pager_putpage, 143 swap_pager_putmulti, 144 swap_pager_haspage 145 }; 146 147 extern int nswbuf; 148 149 int npendingio = NPENDINGIO; 150 int pendingiowait; 151 int require_swap_init; 152 void swap_pager_finish(); 153 int dmmin, dmmax; 154 extern int vm_page_count; 155 156 struct buf * getpbuf() ; 157 void relpbuf(struct buf *bp) ; 158 159 static inline void swapsizecheck() { 160 if( vm_swap_size < 128*btodb(PAGE_SIZE)) { 161 if( swap_pager_full) 162 printf("swap_pager: out of space\n"); 163 swap_pager_full = 1; 164 } else if( vm_swap_size > 192*btodb(PAGE_SIZE)) 165 swap_pager_full = 0; 166 } 167 168 void 169 swap_pager_init() 170 { 171 extern int dmmin, dmmax; 172 173 dfltpagerops = &swappagerops; 174 175 TAILQ_INIT(&swap_pager_list); 176 TAILQ_INIT(&swap_pager_un_list); 177 178 /* 179 * Initialize clean lists 180 */ 181 TAILQ_INIT(&swap_pager_inuse); 182 TAILQ_INIT(&swap_pager_done); 183 TAILQ_INIT(&swap_pager_free); 184 185 require_swap_init = 1; 186 187 /* 188 * Calculate the swap allocation constants. 189 */ 190 191 dmmin = CLBYTES/DEV_BSIZE; 192 dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2; 193 194 } 195 196 /* 197 * Allocate a pager structure and associated resources. 198 * Note that if we are called from the pageout daemon (handle == NULL) 199 * we should not wait for memory as it could resulting in deadlock. 200 */ 201 vm_pager_t 202 swap_pager_alloc(handle, size, prot, offset) 203 caddr_t handle; 204 register vm_size_t size; 205 vm_prot_t prot; 206 vm_offset_t offset; 207 { 208 register vm_pager_t pager; 209 register sw_pager_t swp; 210 int waitok; 211 int i,j; 212 213 if (require_swap_init) { 214 swp_clean_t spc; 215 struct buf *bp; 216 /* 217 * kva's are allocated here so that we dont need to keep 218 * doing kmem_alloc pageables at runtime 219 */ 220 for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { 221 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE); 222 if (!spc->spc_kva) { 223 break; 224 } 225 spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); 226 if (!spc->spc_bp) { 227 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 228 break; 229 } 230 spc->spc_flags = 0; 231 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 232 } 233 require_swap_init = 0; 234 if( size == 0) 235 return(NULL); 236 } 237 238 /* 239 * If this is a "named" anonymous region, look it up and 240 * return the appropriate pager if it exists. 241 */ 242 if (handle) { 243 pager = vm_pager_lookup(&swap_pager_list, handle); 244 if (pager != NULL) { 245 /* 246 * Use vm_object_lookup to gain a reference 247 * to the object and also to remove from the 248 * object cache. 249 */ 250 if (vm_object_lookup(pager) == NULL) 251 panic("swap_pager_alloc: bad object"); 252 return(pager); 253 } 254 } 255 256 if (swap_pager_full) { 257 return(NULL); 258 } 259 260 /* 261 * Pager doesn't exist, allocate swap management resources 262 * and initialize. 263 */ 264 waitok = handle ? M_WAITOK : M_NOWAIT; 265 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 266 if (pager == NULL) 267 return(NULL); 268 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 269 if (swp == NULL) { 270 free((caddr_t)pager, M_VMPAGER); 271 return(NULL); 272 } 273 size = round_page(size); 274 swp->sw_osize = size; 275 swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE); 276 swp->sw_blocks = (sw_blk_t) 277 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 278 M_VMPGDATA, waitok); 279 if (swp->sw_blocks == NULL) { 280 free((caddr_t)swp, M_VMPGDATA); 281 free((caddr_t)pager, M_VMPAGER); 282 return(NULL); 283 } 284 285 for (i = 0; i < swp->sw_nblocks; i++) { 286 swp->sw_blocks[i].swb_valid = 0; 287 swp->sw_blocks[i].swb_locked = 0; 288 for (j = 0; j < SWB_NPAGES; j++) 289 swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 290 } 291 292 swp->sw_poip = 0; 293 if (handle) { 294 vm_object_t object; 295 296 swp->sw_flags = SW_NAMED; 297 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 298 /* 299 * Consistant with other pagers: return with object 300 * referenced. Can't do this with handle == NULL 301 * since it might be the pageout daemon calling. 302 */ 303 object = vm_object_allocate(size); 304 vm_object_enter(object, pager); 305 vm_object_setpager(object, pager, 0, FALSE); 306 } else { 307 swp->sw_flags = 0; 308 TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 309 } 310 pager->pg_handle = handle; 311 pager->pg_ops = &swappagerops; 312 pager->pg_type = PG_SWAP; 313 pager->pg_data = (caddr_t)swp; 314 315 return(pager); 316 } 317 318 /* 319 * returns disk block associated with pager and offset 320 * additionally, as a side effect returns a flag indicating 321 * if the block has been written 322 */ 323 324 static int * 325 swap_pager_diskaddr(swp, offset, valid) 326 sw_pager_t swp; 327 vm_offset_t offset; 328 int *valid; 329 { 330 register sw_blk_t swb; 331 int ix; 332 333 if (valid) 334 *valid = 0; 335 ix = offset / (SWB_NPAGES*PAGE_SIZE); 336 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 337 return(FALSE); 338 } 339 swb = &swp->sw_blocks[ix]; 340 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 341 if (valid) 342 *valid = swb->swb_valid & (1<<ix); 343 return &swb->swb_block[ix]; 344 } 345 346 /* 347 * Utility routine to set the valid (written) bit for 348 * a block associated with a pager and offset 349 */ 350 static void 351 swap_pager_setvalid(swp, offset, valid) 352 sw_pager_t swp; 353 vm_offset_t offset; 354 int valid; 355 { 356 register sw_blk_t swb; 357 int ix; 358 359 ix = offset / (SWB_NPAGES*PAGE_SIZE); 360 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 361 return; 362 363 swb = &swp->sw_blocks[ix]; 364 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 365 if (valid) 366 swb->swb_valid |= (1 << ix); 367 else 368 swb->swb_valid &= ~(1 << ix); 369 return; 370 } 371 372 /* 373 * this routine allocates swap space with a fragmentation 374 * minimization policy. 375 */ 376 int 377 swap_pager_getswapspace( unsigned amount, unsigned *rtval) { 378 unsigned tmpalloc; 379 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 380 if( amount < nblocksfrag) { 381 if( rlist_alloc(&swapfrag, amount, rtval)) 382 return 1; 383 if( !rlist_alloc(&swapmap, nblocksfrag, &tmpalloc)) 384 return 0; 385 rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1); 386 *rtval = tmpalloc; 387 return 1; 388 } 389 if( !rlist_alloc(&swapmap, amount, rtval)) 390 return 0; 391 else 392 return 1; 393 } 394 395 /* 396 * this routine frees swap space with a fragmentation 397 * minimization policy. 398 */ 399 void 400 swap_pager_freeswapspace( unsigned from, unsigned to) { 401 unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE); 402 unsigned tmpalloc; 403 if( ((to + 1) - from) >= nblocksfrag) { 404 while( (from + nblocksfrag) <= to + 1) { 405 rlist_free(&swapmap, from, from + nblocksfrag - 1); 406 from += nblocksfrag; 407 } 408 } 409 if( from >= to) 410 return; 411 rlist_free(&swapfrag, from, to); 412 while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) { 413 rlist_free(&swapmap, tmpalloc, tmpalloc + nblocksfrag-1); 414 } 415 } 416 /* 417 * this routine frees swap blocks from a specified pager 418 */ 419 void 420 _swap_pager_freespace(swp, start, size) 421 sw_pager_t swp; 422 vm_offset_t start; 423 vm_offset_t size; 424 { 425 vm_offset_t i; 426 int s; 427 428 s = splbio(); 429 for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) { 430 int valid; 431 int *addr = swap_pager_diskaddr(swp, i, &valid); 432 if (addr && *addr != SWB_EMPTY) { 433 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 434 if( valid) { 435 vm_swap_size += btodb(PAGE_SIZE); 436 swap_pager_setvalid(swp, i, 0); 437 } 438 *addr = SWB_EMPTY; 439 } 440 } 441 swapsizecheck(); 442 splx(s); 443 } 444 445 void 446 swap_pager_freespace(pager, start, size) 447 vm_pager_t pager; 448 vm_offset_t start; 449 vm_offset_t size; 450 { 451 _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 452 } 453 454 /* 455 * swap_pager_reclaim frees up over-allocated space from all pagers 456 * this eliminates internal fragmentation due to allocation of space 457 * for segments that are never swapped to. It has been written so that 458 * it does not block until the rlist_free operation occurs; it keeps 459 * the queues consistant. 460 */ 461 462 /* 463 * Maximum number of blocks (pages) to reclaim per pass 464 */ 465 #define MAXRECLAIM 256 466 467 void 468 swap_pager_reclaim() 469 { 470 vm_pager_t p; 471 sw_pager_t swp; 472 int i, j, k; 473 int s; 474 int reclaimcount; 475 static int reclaims[MAXRECLAIM]; 476 static int in_reclaim; 477 478 /* 479 * allow only one process to be in the swap_pager_reclaim subroutine 480 */ 481 s = splbio(); 482 if (in_reclaim) { 483 tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 484 splx(s); 485 return; 486 } 487 in_reclaim = 1; 488 reclaimcount = 0; 489 490 /* for each pager queue */ 491 for (k = 0; swp_qs[k]; k++) { 492 493 p = swp_qs[k]->tqh_first; 494 while (p && (reclaimcount < MAXRECLAIM)) { 495 496 /* 497 * see if any blocks associated with a pager has been 498 * allocated but not used (written) 499 */ 500 swp = (sw_pager_t) p->pg_data; 501 for (i = 0; i < swp->sw_nblocks; i++) { 502 sw_blk_t swb = &swp->sw_blocks[i]; 503 if( swb->swb_locked) 504 continue; 505 for (j = 0; j < SWB_NPAGES; j++) { 506 if (swb->swb_block[j] != SWB_EMPTY && 507 (swb->swb_valid & (1 << j)) == 0) { 508 reclaims[reclaimcount++] = swb->swb_block[j]; 509 swb->swb_block[j] = SWB_EMPTY; 510 if (reclaimcount >= MAXRECLAIM) 511 goto rfinished; 512 } 513 } 514 } 515 p = p->pg_list.tqe_next; 516 } 517 } 518 519 rfinished: 520 521 /* 522 * free the blocks that have been added to the reclaim list 523 */ 524 for (i = 0; i < reclaimcount; i++) { 525 swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1); 526 swapsizecheck(); 527 wakeup((caddr_t) &in_reclaim); 528 } 529 530 splx(s); 531 in_reclaim = 0; 532 wakeup((caddr_t) &in_reclaim); 533 } 534 535 536 /* 537 * swap_pager_copy copies blocks from one pager to another and 538 * destroys the source pager 539 */ 540 541 void 542 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 543 vm_pager_t srcpager; 544 vm_offset_t srcoffset; 545 vm_pager_t dstpager; 546 vm_offset_t dstoffset; 547 vm_offset_t offset; 548 { 549 sw_pager_t srcswp, dstswp; 550 vm_offset_t i; 551 int s; 552 553 srcswp = (sw_pager_t) srcpager->pg_data; 554 dstswp = (sw_pager_t) dstpager->pg_data; 555 556 /* 557 * remove the source pager from the swap_pager internal queue 558 */ 559 s = splbio(); 560 if (srcswp->sw_flags & SW_NAMED) { 561 TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 562 srcswp->sw_flags &= ~SW_NAMED; 563 } else { 564 TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 565 } 566 567 while (srcswp->sw_poip) { 568 tsleep((caddr_t)srcswp, PVM, "spgout", 0); 569 } 570 splx(s); 571 572 /* 573 * clean all of the pages that are currently active and finished 574 */ 575 (void) swap_pager_clean(); 576 577 s = splbio(); 578 /* 579 * clear source block before destination object 580 * (release allocated space) 581 */ 582 for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { 583 int valid; 584 int *addr = swap_pager_diskaddr(srcswp, i, &valid); 585 if (addr && *addr != SWB_EMPTY) { 586 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 587 if( valid) 588 vm_swap_size += btodb(PAGE_SIZE); 589 swapsizecheck(); 590 *addr = SWB_EMPTY; 591 } 592 } 593 /* 594 * transfer source to destination 595 */ 596 for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 597 int srcvalid, dstvalid; 598 int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 599 &srcvalid); 600 int *dstaddrp; 601 /* 602 * see if the source has space allocated 603 */ 604 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 605 /* 606 * if the source is valid and the dest has no space, then 607 * copy the allocation from the srouce to the dest. 608 */ 609 if (srcvalid) { 610 dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); 611 /* 612 * if the dest already has a valid block, deallocate the 613 * source block without copying. 614 */ 615 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 616 swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1); 617 *dstaddrp = SWB_EMPTY; 618 } 619 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 620 *dstaddrp = *srcaddrp; 621 *srcaddrp = SWB_EMPTY; 622 swap_pager_setvalid(dstswp, i + dstoffset, 1); 623 vm_swap_size -= btodb(PAGE_SIZE); 624 } 625 } 626 /* 627 * if the source is not empty at this point, then deallocate the space. 628 */ 629 if (*srcaddrp != SWB_EMPTY) { 630 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 631 if( srcvalid) 632 vm_swap_size += btodb(PAGE_SIZE); 633 *srcaddrp = SWB_EMPTY; 634 } 635 } 636 } 637 638 /* 639 * deallocate the rest of the source object 640 */ 641 for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { 642 int valid; 643 int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); 644 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 645 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 646 if( valid) 647 vm_swap_size += btodb(PAGE_SIZE); 648 *srcaddrp = SWB_EMPTY; 649 } 650 } 651 652 swapsizecheck(); 653 splx(s); 654 655 free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); 656 srcswp->sw_blocks = 0; 657 free((caddr_t)srcswp, M_VMPGDATA); 658 srcpager->pg_data = 0; 659 free((caddr_t)srcpager, M_VMPAGER); 660 661 return; 662 } 663 664 665 void 666 swap_pager_dealloc(pager) 667 vm_pager_t pager; 668 { 669 register int i,j; 670 register sw_blk_t bp; 671 register sw_pager_t swp; 672 int s; 673 674 /* 675 * Remove from list right away so lookups will fail if we 676 * block for pageout completion. 677 */ 678 s = splbio(); 679 swp = (sw_pager_t) pager->pg_data; 680 if (swp->sw_flags & SW_NAMED) { 681 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 682 swp->sw_flags &= ~SW_NAMED; 683 } else { 684 TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 685 } 686 /* 687 * Wait for all pageouts to finish and remove 688 * all entries from cleaning list. 689 */ 690 691 while (swp->sw_poip) { 692 tsleep((caddr_t)swp, PVM, "swpout", 0); 693 } 694 splx(s); 695 696 697 (void) swap_pager_clean(); 698 699 /* 700 * Free left over swap blocks 701 */ 702 s = splbio(); 703 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 704 for (j = 0; j < SWB_NPAGES; j++) 705 if (bp->swb_block[j] != SWB_EMPTY) { 706 swap_pager_freeswapspace((unsigned)bp->swb_block[j], 707 (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1); 708 if( bp->swb_valid & (1<<j)) 709 vm_swap_size += btodb(PAGE_SIZE); 710 bp->swb_block[j] = SWB_EMPTY; 711 } 712 } 713 splx(s); 714 swapsizecheck(); 715 716 /* 717 * Free swap management resources 718 */ 719 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 720 swp->sw_blocks = 0; 721 free((caddr_t)swp, M_VMPGDATA); 722 pager->pg_data = 0; 723 free((caddr_t)pager, M_VMPAGER); 724 } 725 726 /* 727 * swap_pager_getmulti can get multiple pages. 728 */ 729 int 730 swap_pager_getmulti(pager, m, count, reqpage, sync) 731 vm_pager_t pager; 732 vm_page_t *m; 733 int count; 734 int reqpage; 735 boolean_t sync; 736 { 737 if( reqpage >= count) 738 panic("swap_pager_getmulti: reqpage >= count\n"); 739 return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 740 } 741 742 /* 743 * swap_pager_getpage gets individual pages 744 */ 745 int 746 swap_pager_getpage(pager, m, sync) 747 vm_pager_t pager; 748 vm_page_t m; 749 boolean_t sync; 750 { 751 vm_page_t marray[1]; 752 753 marray[0] = m; 754 return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); 755 } 756 757 int 758 swap_pager_putmulti(pager, m, c, sync, rtvals) 759 vm_pager_t pager; 760 vm_page_t *m; 761 int c; 762 boolean_t sync; 763 int *rtvals; 764 { 765 int flags; 766 767 if (pager == NULL) { 768 (void) swap_pager_clean(); 769 return VM_PAGER_OK; 770 } 771 772 flags = B_WRITE; 773 if (!sync) 774 flags |= B_ASYNC; 775 776 return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); 777 } 778 779 /* 780 * swap_pager_putpage writes individual pages 781 */ 782 int 783 swap_pager_putpage(pager, m, sync) 784 vm_pager_t pager; 785 vm_page_t m; 786 boolean_t sync; 787 { 788 int flags; 789 vm_page_t marray[1]; 790 int rtvals[1]; 791 792 793 if (pager == NULL) { 794 (void) swap_pager_clean(); 795 return VM_PAGER_OK; 796 } 797 798 marray[0] = m; 799 flags = B_WRITE; 800 if (!sync) 801 flags |= B_ASYNC; 802 803 swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); 804 805 return rtvals[0]; 806 } 807 808 static inline int 809 const swap_pager_block_index(swp, offset) 810 sw_pager_t swp; 811 vm_offset_t offset; 812 { 813 return (offset / (SWB_NPAGES*PAGE_SIZE)); 814 } 815 816 static inline int 817 const swap_pager_block_offset(swp, offset) 818 sw_pager_t swp; 819 vm_offset_t offset; 820 { 821 return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE); 822 } 823 824 /* 825 * _swap_pager_haspage returns TRUE if the pager has data that has 826 * been written out. 827 */ 828 static boolean_t 829 _swap_pager_haspage(swp, offset) 830 sw_pager_t swp; 831 vm_offset_t offset; 832 { 833 register sw_blk_t swb; 834 int ix; 835 836 ix = offset / (SWB_NPAGES*PAGE_SIZE); 837 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 838 return(FALSE); 839 } 840 swb = &swp->sw_blocks[ix]; 841 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 842 if (swb->swb_block[ix] != SWB_EMPTY) { 843 if (swb->swb_valid & (1 << ix)) 844 return TRUE; 845 } 846 847 return(FALSE); 848 } 849 850 /* 851 * swap_pager_haspage is the externally accessible version of 852 * _swap_pager_haspage above. this routine takes a vm_pager_t 853 * for an argument instead of sw_pager_t. 854 */ 855 boolean_t 856 swap_pager_haspage(pager, offset) 857 vm_pager_t pager; 858 vm_offset_t offset; 859 { 860 return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 861 } 862 863 /* 864 * swap_pager_freepage is a convienience routine that clears the busy 865 * bit and deallocates a page. 866 */ 867 static void 868 swap_pager_freepage(m) 869 vm_page_t m; 870 { 871 PAGE_WAKEUP(m); 872 vm_page_free(m); 873 } 874 875 /* 876 * swap_pager_ridpages is a convienience routine that deallocates all 877 * but the required page. this is usually used in error returns that 878 * need to invalidate the "extra" readahead pages. 879 */ 880 static void 881 swap_pager_ridpages(m, count, reqpage) 882 vm_page_t *m; 883 int count; 884 int reqpage; 885 { 886 int i; 887 for (i = 0; i < count; i++) 888 if (i != reqpage) 889 swap_pager_freepage(m[i]); 890 } 891 892 int swapwritecount=0; 893 894 /* 895 * swap_pager_iodone1 is the completion routine for both reads and async writes 896 */ 897 void 898 swap_pager_iodone1(bp) 899 struct buf *bp; 900 { 901 bp->b_flags |= B_DONE; 902 bp->b_flags &= ~B_ASYNC; 903 wakeup((caddr_t)bp); 904 /* 905 if ((bp->b_flags & B_READ) == 0) 906 vwakeup(bp); 907 */ 908 } 909 910 911 int 912 swap_pager_input(swp, m, count, reqpage) 913 register sw_pager_t swp; 914 vm_page_t *m; 915 int count, reqpage; 916 { 917 register struct buf *bp; 918 sw_blk_t swb[count]; 919 register int s; 920 int i; 921 boolean_t rv; 922 vm_offset_t kva, off[count]; 923 swp_clean_t spc; 924 vm_offset_t paging_offset; 925 vm_object_t object; 926 int reqaddr[count]; 927 928 int first, last; 929 int failed; 930 int reqdskregion; 931 932 object = m[reqpage]->object; 933 paging_offset = object->paging_offset; 934 /* 935 * First determine if the page exists in the pager if this is 936 * a sync read. This quickly handles cases where we are 937 * following shadow chains looking for the top level object 938 * with the page. 939 */ 940 if (swp->sw_blocks == NULL) { 941 swap_pager_ridpages(m, count, reqpage); 942 return(VM_PAGER_FAIL); 943 } 944 945 for(i = 0; i < count; i++) { 946 vm_offset_t foff = m[i]->offset + paging_offset; 947 int ix = swap_pager_block_index(swp, foff); 948 if (ix >= swp->sw_nblocks) { 949 int j; 950 if( i <= reqpage) { 951 swap_pager_ridpages(m, count, reqpage); 952 return(VM_PAGER_FAIL); 953 } 954 for(j = i; j < count; j++) { 955 swap_pager_freepage(m[j]); 956 } 957 count = i; 958 break; 959 } 960 961 swb[i] = &swp->sw_blocks[ix]; 962 off[i] = swap_pager_block_offset(swp, foff); 963 reqaddr[i] = swb[i]->swb_block[off[i]]; 964 } 965 966 /* make sure that our required input request is existant */ 967 968 if (reqaddr[reqpage] == SWB_EMPTY || 969 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 970 swap_pager_ridpages(m, count, reqpage); 971 return(VM_PAGER_FAIL); 972 } 973 974 975 reqdskregion = reqaddr[reqpage] / dmmax; 976 977 /* 978 * search backwards for the first contiguous page to transfer 979 */ 980 failed = 0; 981 first = 0; 982 for (i = reqpage - 1; i >= 0; --i) { 983 if ( failed || (reqaddr[i] == SWB_EMPTY) || 984 (swb[i]->swb_valid & (1 << off[i])) == 0 || 985 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 986 ((reqaddr[i] / dmmax) != reqdskregion)) { 987 failed = 1; 988 swap_pager_freepage(m[i]); 989 if (first == 0) 990 first = i + 1; 991 } 992 } 993 /* 994 * search forwards for the last contiguous page to transfer 995 */ 996 failed = 0; 997 last = count; 998 for (i = reqpage + 1; i < count; i++) { 999 if ( failed || (reqaddr[i] == SWB_EMPTY) || 1000 (swb[i]->swb_valid & (1 << off[i])) == 0 || 1001 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 1002 ((reqaddr[i] / dmmax) != reqdskregion)) { 1003 failed = 1; 1004 swap_pager_freepage(m[i]); 1005 if (last == count) 1006 last = i; 1007 } 1008 } 1009 1010 count = last; 1011 if (first != 0) { 1012 for (i = first; i < count; i++) { 1013 m[i-first] = m[i]; 1014 reqaddr[i-first] = reqaddr[i]; 1015 off[i-first] = off[i]; 1016 } 1017 count -= first; 1018 reqpage -= first; 1019 } 1020 1021 ++swb[reqpage]->swb_locked; 1022 1023 /* 1024 * at this point: 1025 * "m" is a pointer to the array of vm_page_t for paging I/O 1026 * "count" is the number of vm_page_t entries represented by "m" 1027 * "object" is the vm_object_t for I/O 1028 * "reqpage" is the index into "m" for the page actually faulted 1029 */ 1030 1031 spc = NULL; /* we might not use an spc data structure */ 1032 kva = 0; 1033 1034 /* 1035 * we allocate a new kva for transfers > 1 page 1036 * but for transfers == 1 page, the swap_pager_free list contains 1037 * entries that have pre-allocated kva's (for efficiency). 1038 */ 1039 if (count > 1) { 1040 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1041 } 1042 1043 1044 if (!kva) { 1045 /* 1046 * if a kva has not been allocated, we can only do a one page transfer, 1047 * so we free the other pages that might have been allocated by 1048 * vm_fault. 1049 */ 1050 swap_pager_ridpages(m, count, reqpage); 1051 m[0] = m[reqpage]; 1052 reqaddr[0] = reqaddr[reqpage]; 1053 1054 count = 1; 1055 reqpage = 0; 1056 /* 1057 * get a swap pager clean data structure, block until we get it 1058 */ 1059 if (swap_pager_free.tqh_first == NULL) { 1060 s = splbio(); 1061 if( curproc == pageproc) 1062 (void) swap_pager_clean(); 1063 else 1064 wakeup((caddr_t) &vm_pages_needed); 1065 while (swap_pager_free.tqh_first == NULL) { 1066 swap_pager_needflags |= SWAP_FREE_NEEDED; 1067 tsleep((caddr_t)&swap_pager_free, 1068 PVM, "swpfre", 0); 1069 if( curproc == pageproc) 1070 (void) swap_pager_clean(); 1071 else 1072 wakeup((caddr_t) &vm_pages_needed); 1073 } 1074 splx(s); 1075 } 1076 spc = swap_pager_free.tqh_first; 1077 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1078 kva = spc->spc_kva; 1079 } 1080 1081 1082 /* 1083 * map our page(s) into kva for input 1084 */ 1085 for (i = 0; i < count; i++) { 1086 pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); 1087 } 1088 pmap_update(); 1089 1090 1091 /* 1092 * Get a swap buffer header and perform the IO 1093 */ 1094 if( spc) { 1095 bp = spc->spc_bp; 1096 bzero(bp, sizeof *bp); 1097 bp->b_spc = spc; 1098 } else { 1099 bp = getpbuf(); 1100 } 1101 1102 s = splbio(); 1103 bp->b_flags = B_BUSY | B_READ | B_CALL; 1104 bp->b_iodone = swap_pager_iodone1; 1105 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1106 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1107 crhold(bp->b_rcred); 1108 crhold(bp->b_wcred); 1109 bp->b_un.b_addr = (caddr_t) kva; 1110 bp->b_blkno = reqaddr[0]; 1111 bp->b_bcount = PAGE_SIZE*count; 1112 bp->b_bufsize = PAGE_SIZE*count; 1113 1114 /* 1115 VHOLD(swapdev_vp); 1116 bp->b_vp = swapdev_vp; 1117 if (swapdev_vp->v_type == VBLK) 1118 bp->b_dev = swapdev_vp->v_rdev; 1119 */ 1120 bgetvp( swapdev_vp, bp); 1121 1122 swp->sw_piip++; 1123 1124 /* 1125 * perform the I/O 1126 */ 1127 VOP_STRATEGY(bp); 1128 1129 /* 1130 * wait for the sync I/O to complete 1131 */ 1132 while ((bp->b_flags & B_DONE) == 0) { 1133 tsleep((caddr_t)bp, PVM, "swread", 0); 1134 } 1135 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1136 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1137 1138 --swp->sw_piip; 1139 if (swp->sw_piip == 0) 1140 wakeup((caddr_t) swp); 1141 1142 /* 1143 * relpbuf does this, but we maintain our own buffer 1144 * list also... 1145 */ 1146 if (bp->b_vp) 1147 brelvp(bp); 1148 1149 splx(s); 1150 --swb[reqpage]->swb_locked; 1151 1152 /* 1153 * remove the mapping for kernel virtual 1154 */ 1155 pmap_remove(vm_map_pmap(pager_map), kva, kva + count * PAGE_SIZE); 1156 1157 if (spc) { 1158 /* 1159 * if we have used an spc, we need to free it. 1160 */ 1161 if( bp->b_rcred != NOCRED) 1162 crfree(bp->b_rcred); 1163 if( bp->b_wcred != NOCRED) 1164 crfree(bp->b_wcred); 1165 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1166 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1167 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1168 wakeup((caddr_t)&swap_pager_free); 1169 } 1170 } else { 1171 /* 1172 * free the kernel virtual addresses 1173 */ 1174 kmem_free_wakeup(pager_map, kva, count * PAGE_SIZE); 1175 /* 1176 * release the physical I/O buffer 1177 */ 1178 relpbuf(bp); 1179 /* 1180 * finish up input if everything is ok 1181 */ 1182 if( rv == VM_PAGER_OK) { 1183 for (i = 0; i < count; i++) { 1184 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1185 m[i]->flags |= PG_CLEAN; 1186 m[i]->flags &= ~PG_LAUNDRY; 1187 if (i != reqpage) { 1188 /* 1189 * whether or not to leave the page activated 1190 * is up in the air, but we should put the page 1191 * on a page queue somewhere. (it already is in 1192 * the object). 1193 * After some emperical results, it is best 1194 * to deactivate the readahead pages. 1195 */ 1196 vm_page_deactivate(m[i]); 1197 m[i]->act_count = 2; 1198 1199 /* 1200 * just in case someone was asking for this 1201 * page we now tell them that it is ok to use 1202 */ 1203 m[i]->flags &= ~PG_FAKE; 1204 PAGE_WAKEUP(m[i]); 1205 } 1206 } 1207 if( swap_pager_full) { 1208 _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE); 1209 } 1210 } else { 1211 swap_pager_ridpages(m, count, reqpage); 1212 } 1213 } 1214 return(rv); 1215 } 1216 1217 int 1218 swap_pager_output(swp, m, count, flags, rtvals) 1219 register sw_pager_t swp; 1220 vm_page_t *m; 1221 int count; 1222 int flags; 1223 int *rtvals; 1224 { 1225 register struct buf *bp; 1226 sw_blk_t swb[count]; 1227 register int s; 1228 int i, j, ix; 1229 boolean_t rv; 1230 vm_offset_t kva, off, foff; 1231 swp_clean_t spc; 1232 vm_offset_t paging_offset; 1233 vm_object_t object; 1234 int reqaddr[count]; 1235 int failed; 1236 1237 /* 1238 if( count > 1) 1239 printf("off: 0x%x, count: %d\n", m[0]->offset, count); 1240 */ 1241 spc = NULL; 1242 1243 object = m[0]->object; 1244 paging_offset = object->paging_offset; 1245 1246 failed = 0; 1247 for(j=0;j<count;j++) { 1248 foff = m[j]->offset + paging_offset; 1249 ix = swap_pager_block_index(swp, foff); 1250 swb[j] = 0; 1251 if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 1252 rtvals[j] = VM_PAGER_FAIL; 1253 failed = 1; 1254 continue; 1255 } else { 1256 rtvals[j] = VM_PAGER_OK; 1257 } 1258 swb[j] = &swp->sw_blocks[ix]; 1259 ++swb[j]->swb_locked; 1260 if( failed) { 1261 rtvals[j] = VM_PAGER_FAIL; 1262 continue; 1263 } 1264 off = swap_pager_block_offset(swp, foff); 1265 reqaddr[j] = swb[j]->swb_block[off]; 1266 if( reqaddr[j] == SWB_EMPTY) { 1267 int blk; 1268 int tries; 1269 int ntoget; 1270 tries = 0; 1271 s = splbio(); 1272 1273 /* 1274 * if any other pages have been allocated in this block, we 1275 * only try to get one page. 1276 */ 1277 for (i = 0; i < SWB_NPAGES; i++) { 1278 if (swb[j]->swb_block[i] != SWB_EMPTY) 1279 break; 1280 } 1281 1282 1283 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1284 /* 1285 * this code is alittle conservative, but works 1286 * (the intent of this code is to allocate small chunks 1287 * for small objects) 1288 */ 1289 if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) { 1290 ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE; 1291 } 1292 1293 retrygetspace: 1294 if (!swap_pager_full && ntoget > 1 && 1295 swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { 1296 1297 for (i = 0; i < ntoget; i++) { 1298 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1299 swb[j]->swb_valid = 0; 1300 } 1301 1302 reqaddr[j] = swb[j]->swb_block[off]; 1303 } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), 1304 &swb[j]->swb_block[off])) { 1305 /* 1306 * if the allocation has failed, we try to reclaim space and 1307 * retry. 1308 */ 1309 if (++tries == 1) { 1310 swap_pager_reclaim(); 1311 goto retrygetspace; 1312 } 1313 rtvals[j] = VM_PAGER_AGAIN; 1314 failed = 1; 1315 } else { 1316 reqaddr[j] = swb[j]->swb_block[off]; 1317 swb[j]->swb_valid &= ~(1<<off); 1318 } 1319 splx(s); 1320 } 1321 } 1322 1323 /* 1324 * search forwards for the last contiguous page to transfer 1325 */ 1326 failed = 0; 1327 for (i = 0; i < count; i++) { 1328 if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) || 1329 (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 1330 (rtvals[i] != VM_PAGER_OK)) { 1331 failed = 1; 1332 if( rtvals[i] == VM_PAGER_OK) 1333 rtvals[i] = VM_PAGER_AGAIN; 1334 } 1335 } 1336 1337 for(i = 0; i < count; i++) { 1338 if( rtvals[i] != VM_PAGER_OK) { 1339 if( swb[i]) 1340 --swb[i]->swb_locked; 1341 } 1342 } 1343 1344 for(i = 0; i < count; i++) 1345 if( rtvals[i] != VM_PAGER_OK) 1346 break; 1347 1348 if( i == 0) { 1349 return VM_PAGER_AGAIN; 1350 } 1351 1352 count = i; 1353 for(i=0;i<count;i++) { 1354 if( reqaddr[i] == SWB_EMPTY) 1355 printf("I/O to empty block????\n"); 1356 } 1357 1358 /* 1359 */ 1360 1361 /* 1362 * For synchronous writes, we clean up 1363 * all completed async pageouts. 1364 */ 1365 if ((flags & B_ASYNC) == 0) { 1366 swap_pager_clean(); 1367 } 1368 1369 kva = 0; 1370 1371 /* 1372 * we allocate a new kva for transfers > 1 page 1373 * but for transfers == 1 page, the swap_pager_free list contains 1374 * entries that have pre-allocated kva's (for efficiency). 1375 */ 1376 if ( count > 1) { 1377 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1378 if( !kva) { 1379 for (i = 0; i < count; i++) { 1380 if( swb[i]) 1381 --swb[i]->swb_locked; 1382 rtvals[i] = VM_PAGER_AGAIN; 1383 } 1384 return VM_PAGER_AGAIN; 1385 } 1386 } 1387 1388 /* 1389 * get a swap pager clean data structure, block until we get it 1390 */ 1391 if (swap_pager_free.tqh_first == NULL) { 1392 /* 1393 if (flags & B_ASYNC) { 1394 for(i=0;i<count;i++) { 1395 rtvals[i] = VM_PAGER_AGAIN; 1396 if( swb[i]) 1397 --swb[i]->swb_locked; 1398 } 1399 return VM_PAGER_AGAIN; 1400 } 1401 */ 1402 1403 s = splbio(); 1404 if( curproc == pageproc) 1405 (void) swap_pager_clean(); 1406 else 1407 wakeup((caddr_t) &vm_pages_needed); 1408 while (swap_pager_free.tqh_first == NULL) { 1409 swap_pager_needflags |= SWAP_FREE_NEEDED; 1410 tsleep((caddr_t)&swap_pager_free, 1411 PVM, "swpfre", 0); 1412 if( curproc == pageproc) 1413 (void) swap_pager_clean(); 1414 else 1415 wakeup((caddr_t) &vm_pages_needed); 1416 } 1417 splx(s); 1418 } 1419 1420 spc = swap_pager_free.tqh_first; 1421 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1422 if( !kva) { 1423 kva = spc->spc_kva; 1424 spc->spc_altkva = 0; 1425 } else { 1426 spc->spc_altkva = kva; 1427 } 1428 1429 /* 1430 * map our page(s) into kva for I/O 1431 */ 1432 for (i = 0; i < count; i++) { 1433 pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); 1434 } 1435 pmap_update(); 1436 1437 /* 1438 * get the base I/O offset into the swap file 1439 */ 1440 for(i=0;i<count;i++) { 1441 foff = m[i]->offset + paging_offset; 1442 off = swap_pager_block_offset(swp, foff); 1443 /* 1444 * if we are setting the valid bit anew, 1445 * then diminish the swap free space 1446 */ 1447 if( (swb[i]->swb_valid & (1 << off)) == 0) 1448 vm_swap_size -= btodb(PAGE_SIZE); 1449 1450 /* 1451 * set the valid bit 1452 */ 1453 swb[i]->swb_valid |= (1 << off); 1454 /* 1455 * and unlock the data structure 1456 */ 1457 --swb[i]->swb_locked; 1458 } 1459 1460 s = splbio(); 1461 /* 1462 * Get a swap buffer header and perform the IO 1463 */ 1464 bp = spc->spc_bp; 1465 bzero(bp, sizeof *bp); 1466 bp->b_spc = spc; 1467 1468 bp->b_flags = B_BUSY; 1469 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1470 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1471 crhold(bp->b_rcred); 1472 crhold(bp->b_wcred); 1473 bp->b_un.b_addr = (caddr_t) kva; 1474 bp->b_blkno = reqaddr[0]; 1475 bgetvp( swapdev_vp, bp); 1476 /* 1477 VHOLD(swapdev_vp); 1478 bp->b_vp = swapdev_vp; 1479 if (swapdev_vp->v_type == VBLK) 1480 bp->b_dev = swapdev_vp->v_rdev; 1481 */ 1482 bp->b_bcount = PAGE_SIZE*count; 1483 bp->b_bufsize = PAGE_SIZE*count; 1484 swapdev_vp->v_numoutput++; 1485 1486 /* 1487 * If this is an async write we set up additional buffer fields 1488 * and place a "cleaning" entry on the inuse queue. 1489 */ 1490 if ( flags & B_ASYNC ) { 1491 spc->spc_flags = 0; 1492 spc->spc_swp = swp; 1493 for(i=0;i<count;i++) 1494 spc->spc_m[i] = m[i]; 1495 spc->spc_count = count; 1496 /* 1497 * the completion routine for async writes 1498 */ 1499 bp->b_flags |= B_CALL; 1500 bp->b_iodone = swap_pager_iodone; 1501 bp->b_dirtyoff = 0; 1502 bp->b_dirtyend = bp->b_bcount; 1503 swp->sw_poip++; 1504 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1505 } else { 1506 swp->sw_poip++; 1507 bp->b_flags |= B_CALL; 1508 bp->b_iodone = swap_pager_iodone1; 1509 } 1510 /* 1511 * perform the I/O 1512 */ 1513 VOP_STRATEGY(bp); 1514 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { 1515 if ((bp->b_flags & B_DONE) == B_DONE) { 1516 swap_pager_clean(); 1517 } 1518 splx(s); 1519 for(i=0;i<count;i++) { 1520 rtvals[i] = VM_PAGER_PEND; 1521 } 1522 return VM_PAGER_PEND; 1523 } 1524 1525 /* 1526 * wait for the sync I/O to complete 1527 */ 1528 while ((bp->b_flags & B_DONE) == 0) { 1529 tsleep((caddr_t)bp, PVM, "swwrt", 0); 1530 } 1531 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 1532 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1533 1534 --swp->sw_poip; 1535 if (swp->sw_poip == 0) 1536 wakeup((caddr_t) swp); 1537 1538 if (bp->b_vp) 1539 brelvp(bp); 1540 1541 splx(s); 1542 1543 /* 1544 * remove the mapping for kernel virtual 1545 */ 1546 pmap_remove(vm_map_pmap(pager_map), kva, kva + count * PAGE_SIZE); 1547 1548 /* 1549 * if we have written the page, then indicate that the page 1550 * is clean. 1551 */ 1552 if (rv == VM_PAGER_OK) { 1553 for(i=0;i<count;i++) { 1554 if( rtvals[i] == VM_PAGER_OK) { 1555 m[i]->flags |= PG_CLEAN; 1556 m[i]->flags &= ~PG_LAUNDRY; 1557 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1558 /* 1559 * optimization, if a page has been read during the 1560 * pageout process, we activate it. 1561 */ 1562 if ( (m[i]->flags & PG_ACTIVE) == 0 && 1563 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) 1564 vm_page_activate(m[i]); 1565 } 1566 } 1567 } else { 1568 for(i=0;i<count;i++) { 1569 rtvals[i] = rv; 1570 m[i]->flags |= PG_LAUNDRY; 1571 } 1572 } 1573 1574 if( spc->spc_altkva) 1575 kmem_free_wakeup(pager_map, kva, count * PAGE_SIZE); 1576 1577 if( bp->b_rcred != NOCRED) 1578 crfree(bp->b_rcred); 1579 if( bp->b_wcred != NOCRED) 1580 crfree(bp->b_wcred); 1581 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1582 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1583 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1584 wakeup((caddr_t)&swap_pager_free); 1585 } 1586 1587 return(rv); 1588 } 1589 1590 boolean_t 1591 swap_pager_clean() 1592 { 1593 register swp_clean_t spc, tspc; 1594 register int s; 1595 1596 tspc = NULL; 1597 if (swap_pager_done.tqh_first == NULL) 1598 return FALSE; 1599 for (;;) { 1600 s = splbio(); 1601 /* 1602 * Look up and removal from done list must be done 1603 * at splbio() to avoid conflicts with swap_pager_iodone. 1604 */ 1605 while (spc = swap_pager_done.tqh_first) { 1606 if( spc->spc_altkva) { 1607 pmap_remove(vm_map_pmap(pager_map), spc->spc_altkva, spc->spc_altkva + spc->spc_count * PAGE_SIZE); 1608 kmem_free_wakeup(pager_map, spc->spc_altkva, spc->spc_count * PAGE_SIZE); 1609 spc->spc_altkva = 0; 1610 } else { 1611 pmap_remove(vm_map_pmap(pager_map), spc->spc_kva, spc->spc_kva + PAGE_SIZE); 1612 } 1613 swap_pager_finish(spc); 1614 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1615 goto doclean; 1616 } 1617 1618 /* 1619 * No operations done, thats all we can do for now. 1620 */ 1621 1622 splx(s); 1623 break; 1624 1625 /* 1626 * The desired page was found to be busy earlier in 1627 * the scan but has since completed. 1628 */ 1629 doclean: 1630 if (tspc && tspc == spc) { 1631 tspc = NULL; 1632 } 1633 spc->spc_flags = 0; 1634 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1635 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1636 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1637 wakeup((caddr_t)&swap_pager_free); 1638 } 1639 ++cleandone; 1640 splx(s); 1641 } 1642 1643 return(tspc ? TRUE : FALSE); 1644 } 1645 1646 void 1647 swap_pager_finish(spc) 1648 register swp_clean_t spc; 1649 { 1650 vm_object_t object = spc->spc_m[0]->object; 1651 int i; 1652 1653 if ((object->paging_in_progress -= spc->spc_count) == 0) 1654 thread_wakeup((int) object); 1655 1656 /* 1657 * If no error mark as clean and inform the pmap system. 1658 * If error, mark as dirty so we will try again. 1659 * (XXX could get stuck doing this, should give up after awhile) 1660 */ 1661 if (spc->spc_flags & SPC_ERROR) { 1662 for(i=0;i<spc->spc_count;i++) { 1663 printf("swap_pager_finish: clean of page %x failed\n", 1664 VM_PAGE_TO_PHYS(spc->spc_m[i])); 1665 spc->spc_m[i]->flags |= PG_LAUNDRY; 1666 } 1667 } else { 1668 for(i=0;i<spc->spc_count;i++) { 1669 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1670 spc->spc_m[i]->flags |= PG_CLEAN; 1671 } 1672 } 1673 1674 1675 for(i=0;i<spc->spc_count;i++) { 1676 /* 1677 * we wakeup any processes that are waiting on 1678 * these pages. 1679 */ 1680 PAGE_WAKEUP(spc->spc_m[i]); 1681 } 1682 nswiodone -= spc->spc_count; 1683 1684 return; 1685 } 1686 1687 /* 1688 * swap_pager_iodone 1689 */ 1690 void 1691 swap_pager_iodone(bp) 1692 register struct buf *bp; 1693 { 1694 register swp_clean_t spc; 1695 int s; 1696 1697 s = splbio(); 1698 spc = (swp_clean_t) bp->b_spc; 1699 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1700 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1701 if (bp->b_flags & B_ERROR) { 1702 spc->spc_flags |= SPC_ERROR; 1703 printf("error %d blkno %d sz %d ", 1704 bp->b_error, bp->b_blkno, bp->b_bcount); 1705 } 1706 1707 /* 1708 if ((bp->b_flags & B_READ) == 0) 1709 vwakeup(bp); 1710 */ 1711 1712 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); 1713 if (bp->b_vp) { 1714 brelvp(bp); 1715 } 1716 if( bp->b_rcred != NOCRED) 1717 crfree(bp->b_rcred); 1718 if( bp->b_wcred != NOCRED) 1719 crfree(bp->b_wcred); 1720 1721 nswiodone += spc->spc_count; 1722 if (--spc->spc_swp->sw_poip == 0) { 1723 wakeup((caddr_t)spc->spc_swp); 1724 } 1725 1726 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1727 swap_pager_inuse.tqh_first == 0) { 1728 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1729 wakeup((caddr_t)&swap_pager_free); 1730 wakeup((caddr_t)&vm_pages_needed); 1731 } 1732 1733 if (vm_pageout_pages_needed) { 1734 wakeup((caddr_t)&vm_pageout_pages_needed); 1735 } 1736 1737 if ((swap_pager_inuse.tqh_first == NULL) || 1738 (cnt.v_free_count < cnt.v_free_min && 1739 nswiodone + cnt.v_free_count >= cnt.v_free_min) ) { 1740 wakeup((caddr_t)&vm_pages_needed); 1741 } 1742 splx(s); 1743 } 1744 1745 int bswneeded; 1746 /* TAILQ_HEAD(swqueue, buf) bswlist; */ 1747 /* 1748 * allocate a physical buffer 1749 */ 1750 struct buf * 1751 getpbuf() { 1752 int s; 1753 struct buf *bp; 1754 1755 s = splbio(); 1756 /* get a bp from the swap buffer header pool */ 1757 while ((bp = bswlist.tqh_first) == NULL) { 1758 bswneeded = 1; 1759 tsleep((caddr_t)&bswneeded, PVM, "wswbuf", 0); 1760 } 1761 TAILQ_REMOVE(&bswlist, bp, b_freelist); 1762 1763 splx(s); 1764 1765 bzero(bp, sizeof *bp); 1766 bp->b_rcred = NOCRED; 1767 bp->b_wcred = NOCRED; 1768 return bp; 1769 } 1770 1771 /* 1772 * allocate a physical buffer, if one is available 1773 */ 1774 struct buf * 1775 trypbuf() { 1776 int s; 1777 struct buf *bp; 1778 1779 s = splbio(); 1780 if ((bp = bswlist.tqh_first) == NULL) { 1781 splx(s); 1782 return NULL; 1783 } 1784 TAILQ_REMOVE(&bswlist, bp, b_freelist); 1785 splx(s); 1786 1787 bzero(bp, sizeof *bp); 1788 bp->b_rcred = NOCRED; 1789 bp->b_wcred = NOCRED; 1790 return bp; 1791 } 1792 1793 /* 1794 * release a physical buffer 1795 */ 1796 void 1797 relpbuf(bp) 1798 struct buf *bp; 1799 { 1800 int s; 1801 1802 s = splbio(); 1803 1804 if (bp->b_rcred != NOCRED) { 1805 crfree(bp->b_rcred); 1806 bp->b_rcred = NOCRED; 1807 } 1808 if (bp->b_wcred != NOCRED) { 1809 crfree(bp->b_wcred); 1810 bp->b_wcred = NOCRED; 1811 } 1812 1813 if (bp->b_vp) 1814 brelvp(bp); 1815 1816 TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist); 1817 1818 if (bswneeded) { 1819 bswneeded = 0; 1820 wakeup((caddr_t)&bswlist); 1821 } 1822 splx(s); 1823 } 1824 1825 /* 1826 * return true if any swap control structures can be allocated 1827 */ 1828 int 1829 swap_pager_ready() { 1830 if( swap_pager_free.tqh_first) 1831 return 1; 1832 else 1833 return 0; 1834 } 1835