1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.17 1994/11/06 09:55:28 davidg Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/proc.h> 55 #include <sys/buf.h> 56 #include <sys/vnode.h> 57 #include <sys/malloc.h> 58 59 #include <miscfs/specfs/specdev.h> 60 #include <sys/rlist.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_pager.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/swap_pager.h> 67 68 #ifndef NPENDINGIO 69 #define NPENDINGIO 16 70 #endif 71 72 int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int)); 73 int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *)); 74 75 int nswiodone; 76 extern int vm_pageout_rate_limit; 77 static int cleandone; 78 extern int hz; 79 int swap_pager_full; 80 extern vm_map_t pager_map; 81 extern int vm_swap_size; 82 int no_swap_space=1; 83 struct rlist *swaplist; 84 int nswaplist; 85 86 #define MAX_PAGEOUT_CLUSTER 8 87 88 TAILQ_HEAD(swpclean, swpagerclean); 89 90 typedef struct swpagerclean *swp_clean_t; 91 92 struct swpagerclean { 93 TAILQ_ENTRY(swpagerclean) spc_list; 94 int spc_flags; 95 struct buf *spc_bp; 96 sw_pager_t spc_swp; 97 vm_offset_t spc_kva; 98 int spc_count; 99 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 100 } swcleanlist [NPENDINGIO] ; 101 102 103 extern vm_map_t kernel_map; 104 105 /* spc_flags values */ 106 #define SPC_ERROR 0x01 107 108 #define SWB_EMPTY (-1) 109 110 struct swpclean swap_pager_done; /* list of compileted page cleans */ 111 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 112 struct swpclean swap_pager_free; /* list of free pager clean structs */ 113 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 114 struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 115 116 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 117 int swap_pager_needflags; 118 struct rlist *swapfrag; 119 120 struct pagerlst *swp_qs[]={ 121 &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 122 }; 123 124 int swap_pager_putmulti(); 125 126 struct pagerops swappagerops = { 127 swap_pager_init, 128 swap_pager_alloc, 129 swap_pager_dealloc, 130 swap_pager_getpage, 131 swap_pager_getmulti, 132 swap_pager_putpage, 133 swap_pager_putmulti, 134 swap_pager_haspage 135 }; 136 137 int npendingio = NPENDINGIO; 138 int pendingiowait; 139 int require_swap_init; 140 void swap_pager_finish(); 141 int dmmin, dmmax; 142 extern int vm_page_count; 143 144 static inline void swapsizecheck() { 145 if( vm_swap_size < 128*btodb(PAGE_SIZE)) { 146 if( swap_pager_full) 147 printf("swap_pager: out of space\n"); 148 swap_pager_full = 1; 149 } else if( vm_swap_size > 192*btodb(PAGE_SIZE)) 150 swap_pager_full = 0; 151 } 152 153 void 154 swap_pager_init() 155 { 156 dfltpagerops = &swappagerops; 157 158 TAILQ_INIT(&swap_pager_list); 159 TAILQ_INIT(&swap_pager_un_list); 160 161 /* 162 * Initialize clean lists 163 */ 164 TAILQ_INIT(&swap_pager_inuse); 165 TAILQ_INIT(&swap_pager_done); 166 TAILQ_INIT(&swap_pager_free); 167 168 require_swap_init = 1; 169 170 /* 171 * Calculate the swap allocation constants. 172 */ 173 174 dmmin = CLBYTES/DEV_BSIZE; 175 dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2; 176 177 } 178 179 /* 180 * Allocate a pager structure and associated resources. 181 * Note that if we are called from the pageout daemon (handle == NULL) 182 * we should not wait for memory as it could resulting in deadlock. 183 */ 184 vm_pager_t 185 swap_pager_alloc(handle, size, prot, offset) 186 caddr_t handle; 187 register vm_size_t size; 188 vm_prot_t prot; 189 vm_offset_t offset; 190 { 191 register vm_pager_t pager; 192 register sw_pager_t swp; 193 int waitok; 194 int i,j; 195 196 if (require_swap_init) { 197 swp_clean_t spc; 198 struct buf *bp; 199 /* 200 * kva's are allocated here so that we dont need to keep 201 * doing kmem_alloc pageables at runtime 202 */ 203 for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) { 204 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE*MAX_PAGEOUT_CLUSTER); 205 if (!spc->spc_kva) { 206 break; 207 } 208 spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT); 209 if (!spc->spc_bp) { 210 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 211 break; 212 } 213 spc->spc_flags = 0; 214 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 215 } 216 require_swap_init = 0; 217 if( size == 0) 218 return(NULL); 219 } 220 221 /* 222 * If this is a "named" anonymous region, look it up and 223 * return the appropriate pager if it exists. 224 */ 225 if (handle) { 226 pager = vm_pager_lookup(&swap_pager_list, handle); 227 if (pager != NULL) { 228 /* 229 * Use vm_object_lookup to gain a reference 230 * to the object and also to remove from the 231 * object cache. 232 */ 233 if (vm_object_lookup(pager) == NULL) 234 panic("swap_pager_alloc: bad object"); 235 return(pager); 236 } 237 } 238 239 /* 240 * Pager doesn't exist, allocate swap management resources 241 * and initialize. 242 */ 243 waitok = handle ? M_WAITOK : M_NOWAIT; 244 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 245 if (pager == NULL) 246 return(NULL); 247 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 248 if (swp == NULL) { 249 free((caddr_t)pager, M_VMPAGER); 250 return(NULL); 251 } 252 size = round_page(size); 253 swp->sw_osize = size; 254 swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE); 255 swp->sw_blocks = (sw_blk_t) 256 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 257 M_VMPGDATA, waitok); 258 if (swp->sw_blocks == NULL) { 259 free((caddr_t)swp, M_VMPGDATA); 260 free((caddr_t)pager, M_VMPAGER); 261 return(NULL); 262 } 263 264 for (i = 0; i < swp->sw_nblocks; i++) { 265 swp->sw_blocks[i].swb_valid = 0; 266 swp->sw_blocks[i].swb_locked = 0; 267 for (j = 0; j < SWB_NPAGES; j++) 268 swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 269 } 270 271 swp->sw_poip = 0; 272 if (handle) { 273 vm_object_t object; 274 275 swp->sw_flags = SW_NAMED; 276 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 277 /* 278 * Consistant with other pagers: return with object 279 * referenced. Can't do this with handle == NULL 280 * since it might be the pageout daemon calling. 281 */ 282 object = vm_object_allocate(size); 283 vm_object_enter(object, pager); 284 vm_object_setpager(object, pager, 0, FALSE); 285 } else { 286 swp->sw_flags = 0; 287 TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 288 } 289 pager->pg_handle = handle; 290 pager->pg_ops = &swappagerops; 291 pager->pg_type = PG_SWAP; 292 pager->pg_data = (caddr_t)swp; 293 294 return(pager); 295 } 296 297 /* 298 * returns disk block associated with pager and offset 299 * additionally, as a side effect returns a flag indicating 300 * if the block has been written 301 */ 302 303 static int * 304 swap_pager_diskaddr(swp, offset, valid) 305 sw_pager_t swp; 306 vm_offset_t offset; 307 int *valid; 308 { 309 register sw_blk_t swb; 310 int ix; 311 312 if (valid) 313 *valid = 0; 314 ix = offset / (SWB_NPAGES*PAGE_SIZE); 315 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 316 return(FALSE); 317 } 318 swb = &swp->sw_blocks[ix]; 319 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 320 if (valid) 321 *valid = swb->swb_valid & (1<<ix); 322 return &swb->swb_block[ix]; 323 } 324 325 /* 326 * Utility routine to set the valid (written) bit for 327 * a block associated with a pager and offset 328 */ 329 static void 330 swap_pager_setvalid(swp, offset, valid) 331 sw_pager_t swp; 332 vm_offset_t offset; 333 int valid; 334 { 335 register sw_blk_t swb; 336 int ix; 337 338 ix = offset / (SWB_NPAGES*PAGE_SIZE); 339 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 340 return; 341 342 swb = &swp->sw_blocks[ix]; 343 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 344 if (valid) 345 swb->swb_valid |= (1 << ix); 346 else 347 swb->swb_valid &= ~(1 << ix); 348 return; 349 } 350 351 /* 352 * this routine allocates swap space with a fragmentation 353 * minimization policy. 354 */ 355 int 356 swap_pager_getswapspace( unsigned amount, unsigned *rtval) { 357 vm_swap_size -= amount; 358 if( !rlist_alloc(&swaplist, amount, rtval)) { 359 vm_swap_size += amount; 360 return 0; 361 } else { 362 swapsizecheck(); 363 return 1; 364 } 365 } 366 367 /* 368 * this routine frees swap space with a fragmentation 369 * minimization policy. 370 */ 371 void 372 swap_pager_freeswapspace( unsigned from, unsigned to) { 373 rlist_free(&swaplist, from, to); 374 vm_swap_size += (to-from)+1; 375 swapsizecheck(); 376 } 377 /* 378 * this routine frees swap blocks from a specified pager 379 */ 380 void 381 _swap_pager_freespace(swp, start, size) 382 sw_pager_t swp; 383 vm_offset_t start; 384 vm_offset_t size; 385 { 386 vm_offset_t i; 387 int s; 388 389 s = splbio(); 390 for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) { 391 int valid; 392 int *addr = swap_pager_diskaddr(swp, i, &valid); 393 if (addr && *addr != SWB_EMPTY) { 394 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 395 if( valid) { 396 swap_pager_setvalid(swp, i, 0); 397 } 398 *addr = SWB_EMPTY; 399 } 400 } 401 splx(s); 402 } 403 404 void 405 swap_pager_freespace(pager, start, size) 406 vm_pager_t pager; 407 vm_offset_t start; 408 vm_offset_t size; 409 { 410 _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 411 } 412 413 /* 414 * swap_pager_reclaim frees up over-allocated space from all pagers 415 * this eliminates internal fragmentation due to allocation of space 416 * for segments that are never swapped to. It has been written so that 417 * it does not block until the rlist_free operation occurs; it keeps 418 * the queues consistant. 419 */ 420 421 /* 422 * Maximum number of blocks (pages) to reclaim per pass 423 */ 424 #define MAXRECLAIM 256 425 426 void 427 swap_pager_reclaim() 428 { 429 vm_pager_t p; 430 sw_pager_t swp; 431 int i, j, k; 432 int s; 433 int reclaimcount; 434 static int reclaims[MAXRECLAIM]; 435 static int in_reclaim; 436 437 /* 438 * allow only one process to be in the swap_pager_reclaim subroutine 439 */ 440 s = splbio(); 441 if (in_reclaim) { 442 tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 443 splx(s); 444 return; 445 } 446 in_reclaim = 1; 447 reclaimcount = 0; 448 449 /* for each pager queue */ 450 for (k = 0; swp_qs[k]; k++) { 451 452 p = swp_qs[k]->tqh_first; 453 while (p && (reclaimcount < MAXRECLAIM)) { 454 455 /* 456 * see if any blocks associated with a pager has been 457 * allocated but not used (written) 458 */ 459 swp = (sw_pager_t) p->pg_data; 460 for (i = 0; i < swp->sw_nblocks; i++) { 461 sw_blk_t swb = &swp->sw_blocks[i]; 462 if( swb->swb_locked) 463 continue; 464 for (j = 0; j < SWB_NPAGES; j++) { 465 if (swb->swb_block[j] != SWB_EMPTY && 466 (swb->swb_valid & (1 << j)) == 0) { 467 reclaims[reclaimcount++] = swb->swb_block[j]; 468 swb->swb_block[j] = SWB_EMPTY; 469 if (reclaimcount >= MAXRECLAIM) 470 goto rfinished; 471 } 472 } 473 } 474 p = p->pg_list.tqe_next; 475 } 476 } 477 478 rfinished: 479 480 /* 481 * free the blocks that have been added to the reclaim list 482 */ 483 for (i = 0; i < reclaimcount; i++) { 484 swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1); 485 wakeup((caddr_t) &in_reclaim); 486 } 487 488 splx(s); 489 in_reclaim = 0; 490 wakeup((caddr_t) &in_reclaim); 491 } 492 493 494 /* 495 * swap_pager_copy copies blocks from one pager to another and 496 * destroys the source pager 497 */ 498 499 void 500 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 501 vm_pager_t srcpager; 502 vm_offset_t srcoffset; 503 vm_pager_t dstpager; 504 vm_offset_t dstoffset; 505 vm_offset_t offset; 506 { 507 sw_pager_t srcswp, dstswp; 508 vm_offset_t i; 509 int s; 510 511 if( vm_swap_size) 512 no_swap_space = 0; 513 514 if( no_swap_space) 515 return; 516 517 srcswp = (sw_pager_t) srcpager->pg_data; 518 dstswp = (sw_pager_t) dstpager->pg_data; 519 520 /* 521 * remove the source pager from the swap_pager internal queue 522 */ 523 s = splbio(); 524 if (srcswp->sw_flags & SW_NAMED) { 525 TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 526 srcswp->sw_flags &= ~SW_NAMED; 527 } else { 528 TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 529 } 530 531 while (srcswp->sw_poip) { 532 tsleep((caddr_t)srcswp, PVM, "spgout", 0); 533 } 534 splx(s); 535 536 /* 537 * clean all of the pages that are currently active and finished 538 */ 539 (void) swap_pager_clean(); 540 541 s = splbio(); 542 /* 543 * clear source block before destination object 544 * (release allocated space) 545 */ 546 for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { 547 int valid; 548 int *addr = swap_pager_diskaddr(srcswp, i, &valid); 549 if (addr && *addr != SWB_EMPTY) { 550 swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1); 551 *addr = SWB_EMPTY; 552 } 553 } 554 /* 555 * transfer source to destination 556 */ 557 for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 558 int srcvalid, dstvalid; 559 int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 560 &srcvalid); 561 int *dstaddrp; 562 /* 563 * see if the source has space allocated 564 */ 565 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 566 /* 567 * if the source is valid and the dest has no space, then 568 * copy the allocation from the srouce to the dest. 569 */ 570 if (srcvalid) { 571 dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); 572 /* 573 * if the dest already has a valid block, deallocate the 574 * source block without copying. 575 */ 576 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 577 swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1); 578 *dstaddrp = SWB_EMPTY; 579 } 580 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 581 *dstaddrp = *srcaddrp; 582 *srcaddrp = SWB_EMPTY; 583 swap_pager_setvalid(dstswp, i + dstoffset, 1); 584 } 585 } 586 /* 587 * if the source is not empty at this point, then deallocate the space. 588 */ 589 if (*srcaddrp != SWB_EMPTY) { 590 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 591 *srcaddrp = SWB_EMPTY; 592 } 593 } 594 } 595 596 /* 597 * deallocate the rest of the source object 598 */ 599 for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { 600 int valid; 601 int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); 602 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 603 swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1); 604 *srcaddrp = SWB_EMPTY; 605 } 606 } 607 608 splx(s); 609 610 free((caddr_t)srcswp->sw_blocks, M_VMPGDATA); 611 srcswp->sw_blocks = 0; 612 free((caddr_t)srcswp, M_VMPGDATA); 613 srcpager->pg_data = 0; 614 free((caddr_t)srcpager, M_VMPAGER); 615 616 return; 617 } 618 619 620 void 621 swap_pager_dealloc(pager) 622 vm_pager_t pager; 623 { 624 register int i,j; 625 register sw_blk_t bp; 626 register sw_pager_t swp; 627 int s; 628 629 /* 630 * Remove from list right away so lookups will fail if we 631 * block for pageout completion. 632 */ 633 s = splbio(); 634 swp = (sw_pager_t) pager->pg_data; 635 if (swp->sw_flags & SW_NAMED) { 636 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 637 swp->sw_flags &= ~SW_NAMED; 638 } else { 639 TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 640 } 641 /* 642 * Wait for all pageouts to finish and remove 643 * all entries from cleaning list. 644 */ 645 646 while (swp->sw_poip) { 647 tsleep((caddr_t)swp, PVM, "swpout", 0); 648 } 649 splx(s); 650 651 652 (void) swap_pager_clean(); 653 654 /* 655 * Free left over swap blocks 656 */ 657 s = splbio(); 658 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 659 for (j = 0; j < SWB_NPAGES; j++) 660 if (bp->swb_block[j] != SWB_EMPTY) { 661 swap_pager_freeswapspace((unsigned)bp->swb_block[j], 662 (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1); 663 bp->swb_block[j] = SWB_EMPTY; 664 } 665 } 666 splx(s); 667 668 /* 669 * Free swap management resources 670 */ 671 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 672 swp->sw_blocks = 0; 673 free((caddr_t)swp, M_VMPGDATA); 674 pager->pg_data = 0; 675 free((caddr_t)pager, M_VMPAGER); 676 } 677 678 /* 679 * swap_pager_getmulti can get multiple pages. 680 */ 681 int 682 swap_pager_getmulti(pager, m, count, reqpage, sync) 683 vm_pager_t pager; 684 vm_page_t *m; 685 int count; 686 int reqpage; 687 boolean_t sync; 688 { 689 if( reqpage >= count) 690 panic("swap_pager_getmulti: reqpage >= count\n"); 691 return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 692 } 693 694 /* 695 * swap_pager_getpage gets individual pages 696 */ 697 int 698 swap_pager_getpage(pager, m, sync) 699 vm_pager_t pager; 700 vm_page_t m; 701 boolean_t sync; 702 { 703 vm_page_t marray[1]; 704 705 marray[0] = m; 706 return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0); 707 } 708 709 int 710 swap_pager_putmulti(pager, m, c, sync, rtvals) 711 vm_pager_t pager; 712 vm_page_t *m; 713 int c; 714 boolean_t sync; 715 int *rtvals; 716 { 717 int flags; 718 719 if (pager == NULL) { 720 (void) swap_pager_clean(); 721 return VM_PAGER_OK; 722 } 723 724 flags = B_WRITE; 725 if (!sync) 726 flags |= B_ASYNC; 727 728 return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals); 729 } 730 731 /* 732 * swap_pager_putpage writes individual pages 733 */ 734 int 735 swap_pager_putpage(pager, m, sync) 736 vm_pager_t pager; 737 vm_page_t m; 738 boolean_t sync; 739 { 740 int flags; 741 vm_page_t marray[1]; 742 int rtvals[1]; 743 744 745 if (pager == NULL) { 746 (void) swap_pager_clean(); 747 return VM_PAGER_OK; 748 } 749 750 marray[0] = m; 751 flags = B_WRITE; 752 if (!sync) 753 flags |= B_ASYNC; 754 755 swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals); 756 757 return rtvals[0]; 758 } 759 760 static inline int 761 const swap_pager_block_index(swp, offset) 762 sw_pager_t swp; 763 vm_offset_t offset; 764 { 765 return (offset / (SWB_NPAGES*PAGE_SIZE)); 766 } 767 768 static inline int 769 const swap_pager_block_offset(swp, offset) 770 sw_pager_t swp; 771 vm_offset_t offset; 772 { 773 return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE); 774 } 775 776 /* 777 * _swap_pager_haspage returns TRUE if the pager has data that has 778 * been written out. 779 */ 780 static boolean_t 781 _swap_pager_haspage(swp, offset) 782 sw_pager_t swp; 783 vm_offset_t offset; 784 { 785 register sw_blk_t swb; 786 int ix; 787 788 ix = offset / (SWB_NPAGES*PAGE_SIZE); 789 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 790 return(FALSE); 791 } 792 swb = &swp->sw_blocks[ix]; 793 ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE; 794 if (swb->swb_block[ix] != SWB_EMPTY) { 795 if (swb->swb_valid & (1 << ix)) 796 return TRUE; 797 } 798 799 return(FALSE); 800 } 801 802 /* 803 * swap_pager_haspage is the externally accessible version of 804 * _swap_pager_haspage above. this routine takes a vm_pager_t 805 * for an argument instead of sw_pager_t. 806 */ 807 boolean_t 808 swap_pager_haspage(pager, offset) 809 vm_pager_t pager; 810 vm_offset_t offset; 811 { 812 return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 813 } 814 815 /* 816 * swap_pager_freepage is a convienience routine that clears the busy 817 * bit and deallocates a page. 818 */ 819 static void 820 swap_pager_freepage(m) 821 vm_page_t m; 822 { 823 PAGE_WAKEUP(m); 824 vm_page_free(m); 825 } 826 827 /* 828 * swap_pager_ridpages is a convienience routine that deallocates all 829 * but the required page. this is usually used in error returns that 830 * need to invalidate the "extra" readahead pages. 831 */ 832 static void 833 swap_pager_ridpages(m, count, reqpage) 834 vm_page_t *m; 835 int count; 836 int reqpage; 837 { 838 int i; 839 for (i = 0; i < count; i++) 840 if (i != reqpage) 841 swap_pager_freepage(m[i]); 842 } 843 844 int swapwritecount=0; 845 846 /* 847 * swap_pager_iodone1 is the completion routine for both reads and async writes 848 */ 849 void 850 swap_pager_iodone1(bp) 851 struct buf *bp; 852 { 853 bp->b_flags |= B_DONE; 854 bp->b_flags &= ~B_ASYNC; 855 wakeup((caddr_t)bp); 856 /* 857 if ((bp->b_flags & B_READ) == 0) 858 vwakeup(bp); 859 */ 860 } 861 862 863 int 864 swap_pager_input(swp, m, count, reqpage) 865 register sw_pager_t swp; 866 vm_page_t *m; 867 int count, reqpage; 868 { 869 register struct buf *bp; 870 sw_blk_t swb[count]; 871 register int s; 872 int i; 873 boolean_t rv; 874 vm_offset_t kva, off[count]; 875 swp_clean_t spc; 876 vm_offset_t paging_offset; 877 vm_object_t object; 878 int reqaddr[count]; 879 880 int first, last; 881 int failed; 882 int reqdskregion; 883 884 object = m[reqpage]->object; 885 paging_offset = object->paging_offset; 886 /* 887 * First determine if the page exists in the pager if this is 888 * a sync read. This quickly handles cases where we are 889 * following shadow chains looking for the top level object 890 * with the page. 891 */ 892 if (swp->sw_blocks == NULL) { 893 swap_pager_ridpages(m, count, reqpage); 894 return(VM_PAGER_FAIL); 895 } 896 897 for(i = 0; i < count; i++) { 898 vm_offset_t foff = m[i]->offset + paging_offset; 899 int ix = swap_pager_block_index(swp, foff); 900 if (ix >= swp->sw_nblocks) { 901 int j; 902 if( i <= reqpage) { 903 swap_pager_ridpages(m, count, reqpage); 904 return(VM_PAGER_FAIL); 905 } 906 for(j = i; j < count; j++) { 907 swap_pager_freepage(m[j]); 908 } 909 count = i; 910 break; 911 } 912 913 swb[i] = &swp->sw_blocks[ix]; 914 off[i] = swap_pager_block_offset(swp, foff); 915 reqaddr[i] = swb[i]->swb_block[off[i]]; 916 } 917 918 /* make sure that our required input request is existant */ 919 920 if (reqaddr[reqpage] == SWB_EMPTY || 921 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 922 swap_pager_ridpages(m, count, reqpage); 923 return(VM_PAGER_FAIL); 924 } 925 926 927 reqdskregion = reqaddr[reqpage] / dmmax; 928 929 /* 930 * search backwards for the first contiguous page to transfer 931 */ 932 failed = 0; 933 first = 0; 934 for (i = reqpage - 1; i >= 0; --i) { 935 if ( failed || (reqaddr[i] == SWB_EMPTY) || 936 (swb[i]->swb_valid & (1 << off[i])) == 0 || 937 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 938 ((reqaddr[i] / dmmax) != reqdskregion)) { 939 failed = 1; 940 swap_pager_freepage(m[i]); 941 if (first == 0) 942 first = i + 1; 943 } 944 } 945 /* 946 * search forwards for the last contiguous page to transfer 947 */ 948 failed = 0; 949 last = count; 950 for (i = reqpage + 1; i < count; i++) { 951 if ( failed || (reqaddr[i] == SWB_EMPTY) || 952 (swb[i]->swb_valid & (1 << off[i])) == 0 || 953 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 954 ((reqaddr[i] / dmmax) != reqdskregion)) { 955 failed = 1; 956 swap_pager_freepage(m[i]); 957 if (last == count) 958 last = i; 959 } 960 } 961 962 count = last; 963 if (first != 0) { 964 for (i = first; i < count; i++) { 965 m[i-first] = m[i]; 966 reqaddr[i-first] = reqaddr[i]; 967 off[i-first] = off[i]; 968 } 969 count -= first; 970 reqpage -= first; 971 } 972 973 ++swb[reqpage]->swb_locked; 974 975 /* 976 * at this point: 977 * "m" is a pointer to the array of vm_page_t for paging I/O 978 * "count" is the number of vm_page_t entries represented by "m" 979 * "object" is the vm_object_t for I/O 980 * "reqpage" is the index into "m" for the page actually faulted 981 */ 982 983 spc = NULL; /* we might not use an spc data structure */ 984 985 if (count == 1) { 986 /* 987 * if a kva has not been allocated, we can only do a one page transfer, 988 * so we free the other pages that might have been allocated by 989 * vm_fault. 990 */ 991 swap_pager_ridpages(m, count, reqpage); 992 m[0] = m[reqpage]; 993 reqaddr[0] = reqaddr[reqpage]; 994 995 count = 1; 996 reqpage = 0; 997 /* 998 * get a swap pager clean data structure, block until we get it 999 */ 1000 if (swap_pager_free.tqh_first == NULL) { 1001 s = splbio(); 1002 if( curproc == pageproc) 1003 (void) swap_pager_clean(); 1004 else 1005 wakeup((caddr_t) &vm_pages_needed); 1006 while (swap_pager_free.tqh_first == NULL) { 1007 swap_pager_needflags |= SWAP_FREE_NEEDED; 1008 tsleep((caddr_t)&swap_pager_free, 1009 PVM, "swpfre", 0); 1010 if( curproc == pageproc) 1011 (void) swap_pager_clean(); 1012 else 1013 wakeup((caddr_t) &vm_pages_needed); 1014 } 1015 splx(s); 1016 } 1017 spc = swap_pager_free.tqh_first; 1018 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1019 kva = spc->spc_kva; 1020 bp = spc->spc_bp; 1021 bzero(bp, sizeof *bp); 1022 bp->b_spc = spc; 1023 } else { 1024 /* 1025 * Get a swap buffer header to perform the IO 1026 */ 1027 bp = getpbuf(); 1028 kva = (vm_offset_t) bp->b_data; 1029 } 1030 1031 /* 1032 * map our page(s) into kva for input 1033 */ 1034 pmap_qenter( kva, m, count); 1035 1036 s = splbio(); 1037 bp->b_flags = B_BUSY | B_READ | B_CALL; 1038 bp->b_iodone = swap_pager_iodone1; 1039 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1040 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1041 crhold(bp->b_rcred); 1042 crhold(bp->b_wcred); 1043 bp->b_un.b_addr = (caddr_t) kva; 1044 bp->b_blkno = reqaddr[0]; 1045 bp->b_bcount = PAGE_SIZE*count; 1046 bp->b_bufsize = PAGE_SIZE*count; 1047 1048 bgetvp( swapdev_vp, bp); 1049 1050 swp->sw_piip++; 1051 1052 cnt.v_swapin++; 1053 cnt.v_swappgsin += count; 1054 /* 1055 * perform the I/O 1056 */ 1057 VOP_STRATEGY(bp); 1058 1059 /* 1060 * wait for the sync I/O to complete 1061 */ 1062 while ((bp->b_flags & B_DONE) == 0) { 1063 tsleep((caddr_t)bp, PVM, "swread", 0); 1064 } 1065 1066 if (bp->b_flags & B_ERROR) { 1067 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n", 1068 bp->b_blkno, bp->b_bcount, bp->b_error); 1069 rv = VM_PAGER_ERROR; 1070 } else { 1071 rv = VM_PAGER_OK; 1072 } 1073 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1074 1075 --swp->sw_piip; 1076 if (swp->sw_piip == 0) 1077 wakeup((caddr_t) swp); 1078 1079 /* 1080 * relpbuf does this, but we maintain our own buffer 1081 * list also... 1082 */ 1083 if (bp->b_vp) 1084 brelvp(bp); 1085 1086 splx(s); 1087 --swb[reqpage]->swb_locked; 1088 1089 /* 1090 * remove the mapping for kernel virtual 1091 */ 1092 pmap_qremove( kva, count); 1093 1094 if (spc) { 1095 /* 1096 * if we have used an spc, we need to free it. 1097 */ 1098 if( bp->b_rcred != NOCRED) 1099 crfree(bp->b_rcred); 1100 if( bp->b_wcred != NOCRED) 1101 crfree(bp->b_wcred); 1102 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1103 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1104 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1105 wakeup((caddr_t)&swap_pager_free); 1106 } 1107 } else { 1108 /* 1109 * release the physical I/O buffer 1110 */ 1111 relpbuf(bp); 1112 /* 1113 * finish up input if everything is ok 1114 */ 1115 if( rv == VM_PAGER_OK) { 1116 for (i = 0; i < count; i++) { 1117 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1118 m[i]->flags |= PG_CLEAN; 1119 m[i]->flags &= ~PG_LAUNDRY; 1120 if (i != reqpage) { 1121 /* 1122 * whether or not to leave the page activated 1123 * is up in the air, but we should put the page 1124 * on a page queue somewhere. (it already is in 1125 * the object). 1126 * After some emperical results, it is best 1127 * to deactivate the readahead pages. 1128 */ 1129 vm_page_deactivate(m[i]); 1130 1131 /* 1132 * just in case someone was asking for this 1133 * page we now tell them that it is ok to use 1134 */ 1135 m[i]->flags &= ~PG_FAKE; 1136 PAGE_WAKEUP(m[i]); 1137 } 1138 } 1139 /* 1140 * If we're out of swap space, then attempt to free 1141 * some whenever pages are brought in. We must clear 1142 * the clean flag so that the page contents will be 1143 * preserved. 1144 */ 1145 if (swap_pager_full) { 1146 for (i = 0; i < count; i++) { 1147 m[i]->flags &= ~PG_CLEAN; 1148 } 1149 _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE); 1150 } 1151 } else { 1152 swap_pager_ridpages(m, count, reqpage); 1153 } 1154 } 1155 return(rv); 1156 } 1157 1158 int 1159 swap_pager_output(swp, m, count, flags, rtvals) 1160 register sw_pager_t swp; 1161 vm_page_t *m; 1162 int count; 1163 int flags; 1164 int *rtvals; 1165 { 1166 register struct buf *bp; 1167 sw_blk_t swb[count]; 1168 register int s; 1169 int i, j, ix; 1170 boolean_t rv; 1171 vm_offset_t kva, off, foff; 1172 swp_clean_t spc; 1173 vm_offset_t paging_offset; 1174 vm_object_t object; 1175 int reqaddr[count]; 1176 int failed; 1177 1178 /* 1179 if( count > 1) 1180 printf("off: 0x%x, count: %d\n", m[0]->offset, count); 1181 */ 1182 if( vm_swap_size) 1183 no_swap_space = 0; 1184 if( no_swap_space) { 1185 for(i=0;i<count;i++) 1186 rtvals[i] = VM_PAGER_FAIL; 1187 return VM_PAGER_FAIL; 1188 } 1189 1190 spc = NULL; 1191 1192 object = m[0]->object; 1193 paging_offset = object->paging_offset; 1194 1195 failed = 0; 1196 for(j=0;j<count;j++) { 1197 foff = m[j]->offset + paging_offset; 1198 ix = swap_pager_block_index(swp, foff); 1199 swb[j] = 0; 1200 if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 1201 rtvals[j] = VM_PAGER_FAIL; 1202 failed = 1; 1203 continue; 1204 } else { 1205 rtvals[j] = VM_PAGER_OK; 1206 } 1207 swb[j] = &swp->sw_blocks[ix]; 1208 ++swb[j]->swb_locked; 1209 if( failed) { 1210 rtvals[j] = VM_PAGER_FAIL; 1211 continue; 1212 } 1213 off = swap_pager_block_offset(swp, foff); 1214 reqaddr[j] = swb[j]->swb_block[off]; 1215 if( reqaddr[j] == SWB_EMPTY) { 1216 int blk; 1217 int tries; 1218 int ntoget; 1219 tries = 0; 1220 s = splbio(); 1221 1222 /* 1223 * if any other pages have been allocated in this block, we 1224 * only try to get one page. 1225 */ 1226 for (i = 0; i < SWB_NPAGES; i++) { 1227 if (swb[j]->swb_block[i] != SWB_EMPTY) 1228 break; 1229 } 1230 1231 1232 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1233 /* 1234 * this code is alittle conservative, but works 1235 * (the intent of this code is to allocate small chunks 1236 * for small objects) 1237 */ 1238 if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) { 1239 ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE; 1240 } 1241 1242 retrygetspace: 1243 if (!swap_pager_full && ntoget > 1 && 1244 swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { 1245 1246 for (i = 0; i < ntoget; i++) { 1247 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1248 swb[j]->swb_valid = 0; 1249 } 1250 1251 reqaddr[j] = swb[j]->swb_block[off]; 1252 } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), 1253 &swb[j]->swb_block[off])) { 1254 /* 1255 * if the allocation has failed, we try to reclaim space and 1256 * retry. 1257 */ 1258 if (++tries == 1) { 1259 swap_pager_reclaim(); 1260 goto retrygetspace; 1261 } 1262 rtvals[j] = VM_PAGER_AGAIN; 1263 failed = 1; 1264 swap_pager_full = 1; 1265 } else { 1266 reqaddr[j] = swb[j]->swb_block[off]; 1267 swb[j]->swb_valid &= ~(1<<off); 1268 } 1269 splx(s); 1270 } 1271 } 1272 1273 /* 1274 * search forwards for the last contiguous page to transfer 1275 */ 1276 failed = 0; 1277 for (i = 0; i < count; i++) { 1278 if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) || 1279 (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 1280 (rtvals[i] != VM_PAGER_OK)) { 1281 failed = 1; 1282 if( rtvals[i] == VM_PAGER_OK) 1283 rtvals[i] = VM_PAGER_AGAIN; 1284 } 1285 } 1286 1287 for(i = 0; i < count; i++) { 1288 if( rtvals[i] != VM_PAGER_OK) { 1289 if( swb[i]) 1290 --swb[i]->swb_locked; 1291 } 1292 } 1293 1294 for(i = 0; i < count; i++) 1295 if( rtvals[i] != VM_PAGER_OK) 1296 break; 1297 1298 if( i == 0) { 1299 return VM_PAGER_AGAIN; 1300 } 1301 1302 count = i; 1303 for(i=0;i<count;i++) { 1304 if( reqaddr[i] == SWB_EMPTY) 1305 printf("I/O to empty block????\n"); 1306 } 1307 1308 /* 1309 */ 1310 1311 /* 1312 * For synchronous writes, we clean up 1313 * all completed async pageouts. 1314 */ 1315 if ((flags & B_ASYNC) == 0) { 1316 swap_pager_clean(); 1317 } 1318 1319 kva = 0; 1320 1321 /* 1322 * we allocate a new kva for transfers > 1 page 1323 * but for transfers == 1 page, the swap_pager_free list contains 1324 * entries that have pre-allocated kva's (for efficiency). 1325 * NOTE -- we do not use the physical buffer pool or the 1326 * preallocated associated kva's because of the potential for 1327 * deadlock. This is very subtile -- but deadlocks or resource 1328 * contention must be avoided on pageouts -- or your system will 1329 * sleep (forever) !!! 1330 */ 1331 /* 1332 if ( count > 1) { 1333 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1334 if( !kva) { 1335 for (i = 0; i < count; i++) { 1336 if( swb[i]) 1337 --swb[i]->swb_locked; 1338 rtvals[i] = VM_PAGER_AGAIN; 1339 } 1340 return VM_PAGER_AGAIN; 1341 } 1342 } 1343 */ 1344 1345 /* 1346 * get a swap pager clean data structure, block until we get it 1347 */ 1348 if (swap_pager_free.tqh_first == NULL) { 1349 s = splbio(); 1350 if( curproc == pageproc) 1351 (void) swap_pager_clean(); 1352 else 1353 wakeup((caddr_t) &vm_pages_needed); 1354 while (swap_pager_free.tqh_first == NULL) { 1355 swap_pager_needflags |= SWAP_FREE_NEEDED; 1356 tsleep((caddr_t)&swap_pager_free, 1357 PVM, "swpfre", 0); 1358 if( curproc == pageproc) 1359 (void) swap_pager_clean(); 1360 else 1361 wakeup((caddr_t) &vm_pages_needed); 1362 } 1363 splx(s); 1364 } 1365 1366 spc = swap_pager_free.tqh_first; 1367 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1368 1369 kva = spc->spc_kva; 1370 1371 /* 1372 * map our page(s) into kva for I/O 1373 */ 1374 pmap_qenter(kva, m, count); 1375 1376 /* 1377 * get the base I/O offset into the swap file 1378 */ 1379 for(i=0;i<count;i++) { 1380 foff = m[i]->offset + paging_offset; 1381 off = swap_pager_block_offset(swp, foff); 1382 /* 1383 * set the valid bit 1384 */ 1385 swb[i]->swb_valid |= (1 << off); 1386 /* 1387 * and unlock the data structure 1388 */ 1389 --swb[i]->swb_locked; 1390 } 1391 1392 s = splbio(); 1393 /* 1394 * Get a swap buffer header and perform the IO 1395 */ 1396 bp = spc->spc_bp; 1397 bzero(bp, sizeof *bp); 1398 bp->b_spc = spc; 1399 1400 bp->b_flags = B_BUSY; 1401 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1402 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1403 if( bp->b_rcred != NOCRED) 1404 crhold(bp->b_rcred); 1405 if( bp->b_wcred != NOCRED) 1406 crhold(bp->b_wcred); 1407 bp->b_data = (caddr_t) kva; 1408 bp->b_blkno = reqaddr[0]; 1409 bgetvp( swapdev_vp, bp); 1410 1411 bp->b_bcount = PAGE_SIZE*count; 1412 bp->b_bufsize = PAGE_SIZE*count; 1413 swapdev_vp->v_numoutput++; 1414 1415 /* 1416 * If this is an async write we set up additional buffer fields 1417 * and place a "cleaning" entry on the inuse queue. 1418 */ 1419 if ( flags & B_ASYNC ) { 1420 spc->spc_flags = 0; 1421 spc->spc_swp = swp; 1422 for(i=0;i<count;i++) 1423 spc->spc_m[i] = m[i]; 1424 spc->spc_count = count; 1425 /* 1426 * the completion routine for async writes 1427 */ 1428 bp->b_flags |= B_CALL; 1429 bp->b_iodone = swap_pager_iodone; 1430 bp->b_dirtyoff = 0; 1431 bp->b_dirtyend = bp->b_bcount; 1432 swp->sw_poip++; 1433 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1434 } else { 1435 swp->sw_poip++; 1436 bp->b_flags |= B_CALL; 1437 bp->b_iodone = swap_pager_iodone1; 1438 } 1439 1440 cnt.v_swapout++; 1441 cnt.v_swappgsout += count; 1442 /* 1443 * perform the I/O 1444 */ 1445 VOP_STRATEGY(bp); 1446 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) { 1447 if ((bp->b_flags & B_DONE) == B_DONE) { 1448 swap_pager_clean(); 1449 } 1450 splx(s); 1451 for(i=0;i<count;i++) { 1452 rtvals[i] = VM_PAGER_PEND; 1453 } 1454 return VM_PAGER_PEND; 1455 } 1456 1457 /* 1458 * wait for the sync I/O to complete 1459 */ 1460 while ((bp->b_flags & B_DONE) == 0) { 1461 tsleep((caddr_t)bp, PVM, "swwrt", 0); 1462 } 1463 if (bp->b_flags & B_ERROR) { 1464 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n", 1465 bp->b_blkno, bp->b_bcount, bp->b_error); 1466 rv = VM_PAGER_ERROR; 1467 } else { 1468 rv = VM_PAGER_OK; 1469 } 1470 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE); 1471 1472 --swp->sw_poip; 1473 if (swp->sw_poip == 0) 1474 wakeup((caddr_t) swp); 1475 1476 if (bp->b_vp) 1477 brelvp(bp); 1478 1479 splx(s); 1480 1481 /* 1482 * remove the mapping for kernel virtual 1483 */ 1484 pmap_qremove( kva, count); 1485 1486 /* 1487 * if we have written the page, then indicate that the page 1488 * is clean. 1489 */ 1490 if (rv == VM_PAGER_OK) { 1491 for(i=0;i<count;i++) { 1492 if( rtvals[i] == VM_PAGER_OK) { 1493 m[i]->flags |= PG_CLEAN; 1494 m[i]->flags &= ~PG_LAUNDRY; 1495 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1496 /* 1497 * optimization, if a page has been read during the 1498 * pageout process, we activate it. 1499 */ 1500 if ( (m[i]->flags & PG_ACTIVE) == 0 && 1501 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i]))) 1502 vm_page_activate(m[i]); 1503 } 1504 } 1505 } else { 1506 for(i=0;i<count;i++) { 1507 rtvals[i] = rv; 1508 m[i]->flags |= PG_LAUNDRY; 1509 } 1510 } 1511 1512 if( bp->b_rcred != NOCRED) 1513 crfree(bp->b_rcred); 1514 if( bp->b_wcred != NOCRED) 1515 crfree(bp->b_wcred); 1516 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1517 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1518 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1519 wakeup((caddr_t)&swap_pager_free); 1520 } 1521 1522 return(rv); 1523 } 1524 1525 boolean_t 1526 swap_pager_clean() 1527 { 1528 register swp_clean_t spc, tspc; 1529 register int s; 1530 1531 tspc = NULL; 1532 if (swap_pager_done.tqh_first == NULL) 1533 return FALSE; 1534 for (;;) { 1535 s = splbio(); 1536 /* 1537 * Look up and removal from done list must be done 1538 * at splbio() to avoid conflicts with swap_pager_iodone. 1539 */ 1540 while ((spc = swap_pager_done.tqh_first) != 0) { 1541 pmap_qremove( spc->spc_kva, spc->spc_count); 1542 swap_pager_finish(spc); 1543 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1544 goto doclean; 1545 } 1546 1547 /* 1548 * No operations done, thats all we can do for now. 1549 */ 1550 1551 splx(s); 1552 break; 1553 1554 /* 1555 * The desired page was found to be busy earlier in 1556 * the scan but has since completed. 1557 */ 1558 doclean: 1559 if (tspc && tspc == spc) { 1560 tspc = NULL; 1561 } 1562 spc->spc_flags = 0; 1563 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1564 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1565 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1566 wakeup((caddr_t)&swap_pager_free); 1567 } 1568 ++cleandone; 1569 splx(s); 1570 } 1571 1572 return(tspc ? TRUE : FALSE); 1573 } 1574 1575 void 1576 swap_pager_finish(spc) 1577 register swp_clean_t spc; 1578 { 1579 vm_object_t object = spc->spc_m[0]->object; 1580 int i; 1581 1582 if ((object->paging_in_progress -= spc->spc_count) == 0) 1583 thread_wakeup((int) object); 1584 1585 /* 1586 * If no error mark as clean and inform the pmap system. 1587 * If error, mark as dirty so we will try again. 1588 * (XXX could get stuck doing this, should give up after awhile) 1589 */ 1590 if (spc->spc_flags & SPC_ERROR) { 1591 for(i=0;i<spc->spc_count;i++) { 1592 printf("swap_pager_finish: I/O error, clean of page %lx failed\n", 1593 (u_long)VM_PAGE_TO_PHYS(spc->spc_m[i])); 1594 spc->spc_m[i]->flags |= PG_LAUNDRY; 1595 } 1596 } else { 1597 for(i=0;i<spc->spc_count;i++) { 1598 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1599 spc->spc_m[i]->flags |= PG_CLEAN; 1600 } 1601 } 1602 1603 1604 for(i=0;i<spc->spc_count;i++) { 1605 /* 1606 * we wakeup any processes that are waiting on 1607 * these pages. 1608 */ 1609 PAGE_WAKEUP(spc->spc_m[i]); 1610 } 1611 nswiodone -= spc->spc_count; 1612 1613 return; 1614 } 1615 1616 /* 1617 * swap_pager_iodone 1618 */ 1619 void 1620 swap_pager_iodone(bp) 1621 register struct buf *bp; 1622 { 1623 register swp_clean_t spc; 1624 int s; 1625 1626 s = splbio(); 1627 spc = (swp_clean_t) bp->b_spc; 1628 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1629 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1630 if (bp->b_flags & B_ERROR) { 1631 spc->spc_flags |= SPC_ERROR; 1632 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d", 1633 (bp->b_flags & B_READ) ? "pagein" : "pageout", 1634 bp->b_error, (u_long)bp->b_blkno, bp->b_bcount); 1635 } 1636 1637 /* 1638 if ((bp->b_flags & B_READ) == 0) 1639 vwakeup(bp); 1640 */ 1641 1642 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC); 1643 if (bp->b_vp) { 1644 brelvp(bp); 1645 } 1646 if( bp->b_rcred != NOCRED) 1647 crfree(bp->b_rcred); 1648 if( bp->b_wcred != NOCRED) 1649 crfree(bp->b_wcred); 1650 1651 nswiodone += spc->spc_count; 1652 if (--spc->spc_swp->sw_poip == 0) { 1653 wakeup((caddr_t)spc->spc_swp); 1654 } 1655 1656 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1657 swap_pager_inuse.tqh_first == 0) { 1658 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1659 wakeup((caddr_t)&swap_pager_free); 1660 wakeup((caddr_t)&vm_pages_needed); 1661 } 1662 1663 if (vm_pageout_pages_needed) { 1664 wakeup((caddr_t)&vm_pageout_pages_needed); 1665 } 1666 1667 if ((swap_pager_inuse.tqh_first == NULL) || 1668 (cnt.v_free_count < cnt.v_free_min && 1669 nswiodone + cnt.v_free_count >= cnt.v_free_min) ) { 1670 wakeup((caddr_t)&vm_pages_needed); 1671 } 1672 splx(s); 1673 } 1674 1675 /* 1676 * return true if any swap control structures can be allocated 1677 */ 1678 int 1679 swap_pager_ready() { 1680 if( swap_pager_free.tqh_first) 1681 return 1; 1682 else 1683 return 0; 1684 } 1685