1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.22 1995/01/09 16:05:33 davidg Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/proc.h> 55 #include <sys/buf.h> 56 #include <sys/vnode.h> 57 #include <sys/malloc.h> 58 59 #include <miscfs/specfs/specdev.h> 60 #include <sys/rlist.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_pager.h> 64 #include <vm/vm_page.h> 65 #include <vm/vm_pageout.h> 66 #include <vm/swap_pager.h> 67 68 #ifndef NPENDINGIO 69 #define NPENDINGIO 10 70 #endif 71 72 int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int)); 73 int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *)); 74 75 int nswiodone; 76 extern int vm_pageout_rate_limit; 77 static int cleandone; 78 extern int hz; 79 int swap_pager_full; 80 extern vm_map_t pager_map; 81 extern int vm_swap_size; 82 int no_swap_space = 1; 83 struct rlist *swaplist; 84 int nswaplist; 85 extern int vm_pio_needed; 86 87 #define MAX_PAGEOUT_CLUSTER 8 88 89 TAILQ_HEAD(swpclean, swpagerclean); 90 91 typedef struct swpagerclean *swp_clean_t; 92 93 struct swpagerclean { 94 TAILQ_ENTRY(swpagerclean) spc_list; 95 int spc_flags; 96 struct buf *spc_bp; 97 sw_pager_t spc_swp; 98 vm_offset_t spc_kva; 99 int spc_count; 100 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 101 } swcleanlist[NPENDINGIO]; 102 103 104 extern vm_map_t kernel_map; 105 106 /* spc_flags values */ 107 #define SPC_ERROR 0x01 108 109 #define SWB_EMPTY (-1) 110 111 struct swpclean swap_pager_done; /* list of compileted page cleans */ 112 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 113 struct swpclean swap_pager_free; /* list of free pager clean structs */ 114 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 115 struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 116 117 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 118 int swap_pager_needflags; 119 struct rlist *swapfrag; 120 121 struct pagerlst *swp_qs[] = { 122 &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 123 }; 124 125 int swap_pager_putmulti(); 126 127 struct pagerops swappagerops = { 128 swap_pager_init, 129 swap_pager_alloc, 130 swap_pager_dealloc, 131 swap_pager_getpage, 132 swap_pager_getmulti, 133 swap_pager_putpage, 134 swap_pager_putmulti, 135 swap_pager_haspage 136 }; 137 138 int npendingio = NPENDINGIO; 139 int pendingiowait; 140 int require_swap_init; 141 void swap_pager_finish(); 142 int dmmin, dmmax; 143 extern int vm_page_count; 144 145 static inline void 146 swapsizecheck() 147 { 148 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) { 149 if (swap_pager_full) 150 printf("swap_pager: out of space\n"); 151 swap_pager_full = 1; 152 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE)) 153 swap_pager_full = 0; 154 } 155 156 void 157 swap_pager_init() 158 { 159 dfltpagerops = &swappagerops; 160 161 TAILQ_INIT(&swap_pager_list); 162 TAILQ_INIT(&swap_pager_un_list); 163 164 /* 165 * Initialize clean lists 166 */ 167 TAILQ_INIT(&swap_pager_inuse); 168 TAILQ_INIT(&swap_pager_done); 169 TAILQ_INIT(&swap_pager_free); 170 171 require_swap_init = 1; 172 173 /* 174 * Calculate the swap allocation constants. 175 */ 176 177 dmmin = CLBYTES / DEV_BSIZE; 178 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2; 179 180 } 181 182 /* 183 * Allocate a pager structure and associated resources. 184 * Note that if we are called from the pageout daemon (handle == NULL) 185 * we should not wait for memory as it could resulting in deadlock. 186 */ 187 vm_pager_t 188 swap_pager_alloc(handle, size, prot, offset) 189 caddr_t handle; 190 register vm_size_t size; 191 vm_prot_t prot; 192 vm_offset_t offset; 193 { 194 register vm_pager_t pager; 195 register sw_pager_t swp; 196 int waitok; 197 int i, j; 198 199 if (require_swap_init) { 200 swp_clean_t spc; 201 struct buf *bp; 202 203 #if 0 204 int desiredpendingio; 205 206 desiredpendingio = cnt.v_page_count / 200 + 2; 207 if (desiredpendingio < npendingio) 208 npendingio = desiredpendingio; 209 #endif 210 211 /* 212 * kva's are allocated here so that we dont need to keep doing 213 * kmem_alloc pageables at runtime 214 */ 215 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) { 216 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER); 217 if (!spc->spc_kva) { 218 break; 219 } 220 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_NOWAIT); 221 if (!spc->spc_bp) { 222 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 223 break; 224 } 225 spc->spc_flags = 0; 226 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 227 } 228 require_swap_init = 0; 229 if (size == 0) 230 return (NULL); 231 } 232 /* 233 * If this is a "named" anonymous region, look it up and return the 234 * appropriate pager if it exists. 235 */ 236 if (handle) { 237 pager = vm_pager_lookup(&swap_pager_list, handle); 238 if (pager != NULL) { 239 /* 240 * Use vm_object_lookup to gain a reference to the 241 * object and also to remove from the object cache. 242 */ 243 if (vm_object_lookup(pager) == NULL) 244 panic("swap_pager_alloc: bad object"); 245 return (pager); 246 } 247 } 248 /* 249 * Pager doesn't exist, allocate swap management resources and 250 * initialize. 251 */ 252 waitok = handle ? M_WAITOK : M_NOWAIT; 253 pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, waitok); 254 if (pager == NULL) 255 return (NULL); 256 swp = (sw_pager_t) malloc(sizeof *swp, M_VMPGDATA, waitok); 257 if (swp == NULL) { 258 free((caddr_t) pager, M_VMPAGER); 259 return (NULL); 260 } 261 size = round_page(size); 262 swp->sw_osize = size; 263 swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES * PAGE_SIZE); 264 swp->sw_blocks = (sw_blk_t) 265 malloc(swp->sw_nblocks * sizeof(*swp->sw_blocks), 266 M_VMPGDATA, waitok); 267 if (swp->sw_blocks == NULL) { 268 free((caddr_t) swp, M_VMPGDATA); 269 free((caddr_t) pager, M_VMPAGER); 270 return (NULL); 271 } 272 for (i = 0; i < swp->sw_nblocks; i++) { 273 swp->sw_blocks[i].swb_valid = 0; 274 swp->sw_blocks[i].swb_locked = 0; 275 for (j = 0; j < SWB_NPAGES; j++) 276 swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 277 } 278 279 swp->sw_poip = 0; 280 if (handle) { 281 vm_object_t object; 282 283 swp->sw_flags = SW_NAMED; 284 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 285 /* 286 * Consistant with other pagers: return with object 287 * referenced. Can't do this with handle == NULL since it 288 * might be the pageout daemon calling. 289 */ 290 object = vm_object_allocate(size); 291 vm_object_enter(object, pager); 292 vm_object_setpager(object, pager, 0, FALSE); 293 } else { 294 swp->sw_flags = 0; 295 TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 296 } 297 pager->pg_handle = handle; 298 pager->pg_ops = &swappagerops; 299 pager->pg_type = PG_SWAP; 300 pager->pg_data = (caddr_t) swp; 301 302 return (pager); 303 } 304 305 /* 306 * returns disk block associated with pager and offset 307 * additionally, as a side effect returns a flag indicating 308 * if the block has been written 309 */ 310 311 static int * 312 swap_pager_diskaddr(swp, offset, valid) 313 sw_pager_t swp; 314 vm_offset_t offset; 315 int *valid; 316 { 317 register sw_blk_t swb; 318 int ix; 319 320 if (valid) 321 *valid = 0; 322 ix = offset / (SWB_NPAGES * PAGE_SIZE); 323 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 324 return (FALSE); 325 } 326 swb = &swp->sw_blocks[ix]; 327 ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE; 328 if (valid) 329 *valid = swb->swb_valid & (1 << ix); 330 return &swb->swb_block[ix]; 331 } 332 333 /* 334 * Utility routine to set the valid (written) bit for 335 * a block associated with a pager and offset 336 */ 337 static void 338 swap_pager_setvalid(swp, offset, valid) 339 sw_pager_t swp; 340 vm_offset_t offset; 341 int valid; 342 { 343 register sw_blk_t swb; 344 int ix; 345 346 ix = offset / (SWB_NPAGES * PAGE_SIZE); 347 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 348 return; 349 350 swb = &swp->sw_blocks[ix]; 351 ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE; 352 if (valid) 353 swb->swb_valid |= (1 << ix); 354 else 355 swb->swb_valid &= ~(1 << ix); 356 return; 357 } 358 359 /* 360 * this routine allocates swap space with a fragmentation 361 * minimization policy. 362 */ 363 int 364 swap_pager_getswapspace(unsigned amount, unsigned *rtval) 365 { 366 vm_swap_size -= amount; 367 if (!rlist_alloc(&swaplist, amount, rtval)) { 368 vm_swap_size += amount; 369 return 0; 370 } else { 371 swapsizecheck(); 372 return 1; 373 } 374 } 375 376 /* 377 * this routine frees swap space with a fragmentation 378 * minimization policy. 379 */ 380 void 381 swap_pager_freeswapspace(unsigned from, unsigned to) 382 { 383 rlist_free(&swaplist, from, to); 384 vm_swap_size += (to - from) + 1; 385 swapsizecheck(); 386 } 387 /* 388 * this routine frees swap blocks from a specified pager 389 */ 390 void 391 _swap_pager_freespace(swp, start, size) 392 sw_pager_t swp; 393 vm_offset_t start; 394 vm_offset_t size; 395 { 396 vm_offset_t i; 397 int s; 398 399 s = splbio(); 400 for (i = start; i < round_page(start + size); i += PAGE_SIZE) { 401 int valid; 402 int *addr = swap_pager_diskaddr(swp, i, &valid); 403 404 if (addr && *addr != SWB_EMPTY) { 405 swap_pager_freeswapspace(*addr, *addr + btodb(PAGE_SIZE) - 1); 406 if (valid) { 407 swap_pager_setvalid(swp, i, 0); 408 } 409 *addr = SWB_EMPTY; 410 } 411 } 412 splx(s); 413 } 414 415 void 416 swap_pager_freespace(pager, start, size) 417 vm_pager_t pager; 418 vm_offset_t start; 419 vm_offset_t size; 420 { 421 _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 422 } 423 424 /* 425 * swap_pager_reclaim frees up over-allocated space from all pagers 426 * this eliminates internal fragmentation due to allocation of space 427 * for segments that are never swapped to. It has been written so that 428 * it does not block until the rlist_free operation occurs; it keeps 429 * the queues consistant. 430 */ 431 432 /* 433 * Maximum number of blocks (pages) to reclaim per pass 434 */ 435 #define MAXRECLAIM 256 436 437 void 438 swap_pager_reclaim() 439 { 440 vm_pager_t p; 441 sw_pager_t swp; 442 int i, j, k; 443 int s; 444 int reclaimcount; 445 static int reclaims[MAXRECLAIM]; 446 static int in_reclaim; 447 448 /* 449 * allow only one process to be in the swap_pager_reclaim subroutine 450 */ 451 s = splbio(); 452 if (in_reclaim) { 453 tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 454 splx(s); 455 return; 456 } 457 in_reclaim = 1; 458 reclaimcount = 0; 459 460 /* for each pager queue */ 461 for (k = 0; swp_qs[k]; k++) { 462 463 p = swp_qs[k]->tqh_first; 464 while (p && (reclaimcount < MAXRECLAIM)) { 465 466 /* 467 * see if any blocks associated with a pager has been 468 * allocated but not used (written) 469 */ 470 swp = (sw_pager_t) p->pg_data; 471 for (i = 0; i < swp->sw_nblocks; i++) { 472 sw_blk_t swb = &swp->sw_blocks[i]; 473 474 if (swb->swb_locked) 475 continue; 476 for (j = 0; j < SWB_NPAGES; j++) { 477 if (swb->swb_block[j] != SWB_EMPTY && 478 (swb->swb_valid & (1 << j)) == 0) { 479 reclaims[reclaimcount++] = swb->swb_block[j]; 480 swb->swb_block[j] = SWB_EMPTY; 481 if (reclaimcount >= MAXRECLAIM) 482 goto rfinished; 483 } 484 } 485 } 486 p = p->pg_list.tqe_next; 487 } 488 } 489 490 rfinished: 491 492 /* 493 * free the blocks that have been added to the reclaim list 494 */ 495 for (i = 0; i < reclaimcount; i++) { 496 swap_pager_freeswapspace(reclaims[i], reclaims[i] + btodb(PAGE_SIZE) - 1); 497 wakeup((caddr_t) &in_reclaim); 498 } 499 500 splx(s); 501 in_reclaim = 0; 502 wakeup((caddr_t) &in_reclaim); 503 } 504 505 506 /* 507 * swap_pager_copy copies blocks from one pager to another and 508 * destroys the source pager 509 */ 510 511 void 512 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 513 vm_pager_t srcpager; 514 vm_offset_t srcoffset; 515 vm_pager_t dstpager; 516 vm_offset_t dstoffset; 517 vm_offset_t offset; 518 { 519 sw_pager_t srcswp, dstswp; 520 vm_offset_t i; 521 int s; 522 523 if (vm_swap_size) 524 no_swap_space = 0; 525 526 if (no_swap_space) 527 return; 528 529 srcswp = (sw_pager_t) srcpager->pg_data; 530 dstswp = (sw_pager_t) dstpager->pg_data; 531 532 /* 533 * remove the source pager from the swap_pager internal queue 534 */ 535 s = splbio(); 536 if (srcswp->sw_flags & SW_NAMED) { 537 TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 538 srcswp->sw_flags &= ~SW_NAMED; 539 } else { 540 TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 541 } 542 543 while (srcswp->sw_poip) { 544 tsleep((caddr_t) srcswp, PVM, "spgout", 0); 545 } 546 splx(s); 547 548 /* 549 * clean all of the pages that are currently active and finished 550 */ 551 (void) swap_pager_clean(); 552 553 s = splbio(); 554 /* 555 * clear source block before destination object 556 * (release allocated space) 557 */ 558 for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) { 559 int valid; 560 int *addr = swap_pager_diskaddr(srcswp, i, &valid); 561 562 if (addr && *addr != SWB_EMPTY) { 563 swap_pager_freeswapspace(*addr, *addr + btodb(PAGE_SIZE) - 1); 564 *addr = SWB_EMPTY; 565 } 566 } 567 /* 568 * transfer source to destination 569 */ 570 for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 571 int srcvalid, dstvalid; 572 int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 573 &srcvalid); 574 int *dstaddrp; 575 576 /* 577 * see if the source has space allocated 578 */ 579 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 580 /* 581 * if the source is valid and the dest has no space, 582 * then copy the allocation from the srouce to the 583 * dest. 584 */ 585 if (srcvalid) { 586 dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid); 587 /* 588 * if the dest already has a valid block, 589 * deallocate the source block without 590 * copying. 591 */ 592 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 593 swap_pager_freeswapspace(*dstaddrp, *dstaddrp + btodb(PAGE_SIZE) - 1); 594 *dstaddrp = SWB_EMPTY; 595 } 596 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 597 *dstaddrp = *srcaddrp; 598 *srcaddrp = SWB_EMPTY; 599 swap_pager_setvalid(dstswp, i + dstoffset, 1); 600 } 601 } 602 /* 603 * if the source is not empty at this point, then 604 * deallocate the space. 605 */ 606 if (*srcaddrp != SWB_EMPTY) { 607 swap_pager_freeswapspace(*srcaddrp, *srcaddrp + btodb(PAGE_SIZE) - 1); 608 *srcaddrp = SWB_EMPTY; 609 } 610 } 611 } 612 613 /* 614 * deallocate the rest of the source object 615 */ 616 for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) { 617 int valid; 618 int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid); 619 620 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 621 swap_pager_freeswapspace(*srcaddrp, *srcaddrp + btodb(PAGE_SIZE) - 1); 622 *srcaddrp = SWB_EMPTY; 623 } 624 } 625 626 splx(s); 627 628 free((caddr_t) srcswp->sw_blocks, M_VMPGDATA); 629 srcswp->sw_blocks = 0; 630 free((caddr_t) srcswp, M_VMPGDATA); 631 srcpager->pg_data = 0; 632 free((caddr_t) srcpager, M_VMPAGER); 633 634 return; 635 } 636 637 638 void 639 swap_pager_dealloc(pager) 640 vm_pager_t pager; 641 { 642 register int i, j; 643 register sw_blk_t bp; 644 register sw_pager_t swp; 645 int s; 646 647 /* 648 * Remove from list right away so lookups will fail if we block for 649 * pageout completion. 650 */ 651 s = splbio(); 652 swp = (sw_pager_t) pager->pg_data; 653 if (swp->sw_flags & SW_NAMED) { 654 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 655 swp->sw_flags &= ~SW_NAMED; 656 } else { 657 TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 658 } 659 /* 660 * Wait for all pageouts to finish and remove all entries from 661 * cleaning list. 662 */ 663 664 while (swp->sw_poip) { 665 tsleep((caddr_t) swp, PVM, "swpout", 0); 666 } 667 splx(s); 668 669 670 (void) swap_pager_clean(); 671 672 /* 673 * Free left over swap blocks 674 */ 675 s = splbio(); 676 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 677 for (j = 0; j < SWB_NPAGES; j++) 678 if (bp->swb_block[j] != SWB_EMPTY) { 679 swap_pager_freeswapspace((unsigned) bp->swb_block[j], 680 (unsigned) bp->swb_block[j] + btodb(PAGE_SIZE) - 1); 681 bp->swb_block[j] = SWB_EMPTY; 682 } 683 } 684 splx(s); 685 686 /* 687 * Free swap management resources 688 */ 689 free((caddr_t) swp->sw_blocks, M_VMPGDATA); 690 swp->sw_blocks = 0; 691 free((caddr_t) swp, M_VMPGDATA); 692 pager->pg_data = 0; 693 free((caddr_t) pager, M_VMPAGER); 694 } 695 696 /* 697 * swap_pager_getmulti can get multiple pages. 698 */ 699 int 700 swap_pager_getmulti(pager, m, count, reqpage, sync) 701 vm_pager_t pager; 702 vm_page_t *m; 703 int count; 704 int reqpage; 705 boolean_t sync; 706 { 707 if (reqpage >= count) 708 panic("swap_pager_getmulti: reqpage >= count\n"); 709 return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 710 } 711 712 /* 713 * swap_pager_getpage gets individual pages 714 */ 715 int 716 swap_pager_getpage(pager, m, sync) 717 vm_pager_t pager; 718 vm_page_t m; 719 boolean_t sync; 720 { 721 vm_page_t marray[1]; 722 723 marray[0] = m; 724 return swap_pager_input((sw_pager_t) pager->pg_data, marray, 1, 0); 725 } 726 727 int 728 swap_pager_putmulti(pager, m, c, sync, rtvals) 729 vm_pager_t pager; 730 vm_page_t *m; 731 int c; 732 boolean_t sync; 733 int *rtvals; 734 { 735 int flags; 736 737 if (pager == NULL) { 738 (void) swap_pager_clean(); 739 return VM_PAGER_OK; 740 } 741 flags = B_WRITE; 742 if (!sync) 743 flags |= B_ASYNC; 744 745 return swap_pager_output((sw_pager_t) pager->pg_data, m, c, flags, rtvals); 746 } 747 748 /* 749 * swap_pager_putpage writes individual pages 750 */ 751 int 752 swap_pager_putpage(pager, m, sync) 753 vm_pager_t pager; 754 vm_page_t m; 755 boolean_t sync; 756 { 757 int flags; 758 vm_page_t marray[1]; 759 int rtvals[1]; 760 761 762 if (pager == NULL) { 763 (void) swap_pager_clean(); 764 return VM_PAGER_OK; 765 } 766 marray[0] = m; 767 flags = B_WRITE; 768 if (!sync) 769 flags |= B_ASYNC; 770 771 swap_pager_output((sw_pager_t) pager->pg_data, marray, 1, flags, rtvals); 772 773 return rtvals[0]; 774 } 775 776 static inline int 777 const 778 swap_pager_block_index(swp, offset) 779 sw_pager_t swp; 780 vm_offset_t offset; 781 { 782 return (offset / (SWB_NPAGES * PAGE_SIZE)); 783 } 784 785 static inline int 786 const 787 swap_pager_block_offset(swp, offset) 788 sw_pager_t swp; 789 vm_offset_t offset; 790 { 791 return ((offset % (PAGE_SIZE * SWB_NPAGES)) / PAGE_SIZE); 792 } 793 794 /* 795 * _swap_pager_haspage returns TRUE if the pager has data that has 796 * been written out. 797 */ 798 static boolean_t 799 _swap_pager_haspage(swp, offset) 800 sw_pager_t swp; 801 vm_offset_t offset; 802 { 803 register sw_blk_t swb; 804 int ix; 805 806 ix = offset / (SWB_NPAGES * PAGE_SIZE); 807 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 808 return (FALSE); 809 } 810 swb = &swp->sw_blocks[ix]; 811 ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE; 812 if (swb->swb_block[ix] != SWB_EMPTY) { 813 if (swb->swb_valid & (1 << ix)) 814 return TRUE; 815 } 816 return (FALSE); 817 } 818 819 /* 820 * swap_pager_haspage is the externally accessible version of 821 * _swap_pager_haspage above. this routine takes a vm_pager_t 822 * for an argument instead of sw_pager_t. 823 */ 824 boolean_t 825 swap_pager_haspage(pager, offset) 826 vm_pager_t pager; 827 vm_offset_t offset; 828 { 829 return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 830 } 831 832 /* 833 * swap_pager_freepage is a convienience routine that clears the busy 834 * bit and deallocates a page. 835 */ 836 static void 837 swap_pager_freepage(m) 838 vm_page_t m; 839 { 840 PAGE_WAKEUP(m); 841 vm_page_free(m); 842 } 843 844 /* 845 * swap_pager_ridpages is a convienience routine that deallocates all 846 * but the required page. this is usually used in error returns that 847 * need to invalidate the "extra" readahead pages. 848 */ 849 static void 850 swap_pager_ridpages(m, count, reqpage) 851 vm_page_t *m; 852 int count; 853 int reqpage; 854 { 855 int i; 856 857 for (i = 0; i < count; i++) 858 if (i != reqpage) 859 swap_pager_freepage(m[i]); 860 } 861 862 int swapwritecount = 0; 863 864 /* 865 * swap_pager_iodone1 is the completion routine for both reads and async writes 866 */ 867 void 868 swap_pager_iodone1(bp) 869 struct buf *bp; 870 { 871 bp->b_flags |= B_DONE; 872 bp->b_flags &= ~B_ASYNC; 873 wakeup((caddr_t) bp); 874 /* 875 if ((bp->b_flags & B_READ) == 0) 876 vwakeup(bp); 877 */ 878 } 879 880 881 int 882 swap_pager_input(swp, m, count, reqpage) 883 register sw_pager_t swp; 884 vm_page_t *m; 885 int count, reqpage; 886 { 887 register struct buf *bp; 888 sw_blk_t swb[count]; 889 register int s; 890 int i; 891 boolean_t rv; 892 vm_offset_t kva, off[count]; 893 swp_clean_t spc; 894 vm_offset_t paging_offset; 895 vm_object_t object; 896 int reqaddr[count]; 897 898 int first, last; 899 int failed; 900 int reqdskregion; 901 902 object = m[reqpage]->object; 903 paging_offset = object->paging_offset; 904 /* 905 * First determine if the page exists in the pager if this is a sync 906 * read. This quickly handles cases where we are following shadow 907 * chains looking for the top level object with the page. 908 */ 909 if (swp->sw_blocks == NULL) { 910 swap_pager_ridpages(m, count, reqpage); 911 return (VM_PAGER_FAIL); 912 } 913 for (i = 0; i < count; i++) { 914 vm_offset_t foff = m[i]->offset + paging_offset; 915 int ix = swap_pager_block_index(swp, foff); 916 917 if (ix >= swp->sw_nblocks) { 918 int j; 919 920 if (i <= reqpage) { 921 swap_pager_ridpages(m, count, reqpage); 922 return (VM_PAGER_FAIL); 923 } 924 for (j = i; j < count; j++) { 925 swap_pager_freepage(m[j]); 926 } 927 count = i; 928 break; 929 } 930 swb[i] = &swp->sw_blocks[ix]; 931 off[i] = swap_pager_block_offset(swp, foff); 932 reqaddr[i] = swb[i]->swb_block[off[i]]; 933 } 934 935 /* make sure that our required input request is existant */ 936 937 if (reqaddr[reqpage] == SWB_EMPTY || 938 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 939 swap_pager_ridpages(m, count, reqpage); 940 return (VM_PAGER_FAIL); 941 } 942 reqdskregion = reqaddr[reqpage] / dmmax; 943 944 /* 945 * search backwards for the first contiguous page to transfer 946 */ 947 failed = 0; 948 first = 0; 949 for (i = reqpage - 1; i >= 0; --i) { 950 if (failed || (reqaddr[i] == SWB_EMPTY) || 951 (swb[i]->swb_valid & (1 << off[i])) == 0 || 952 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 953 ((reqaddr[i] / dmmax) != reqdskregion)) { 954 failed = 1; 955 swap_pager_freepage(m[i]); 956 if (first == 0) 957 first = i + 1; 958 } 959 } 960 /* 961 * search forwards for the last contiguous page to transfer 962 */ 963 failed = 0; 964 last = count; 965 for (i = reqpage + 1; i < count; i++) { 966 if (failed || (reqaddr[i] == SWB_EMPTY) || 967 (swb[i]->swb_valid & (1 << off[i])) == 0 || 968 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 969 ((reqaddr[i] / dmmax) != reqdskregion)) { 970 failed = 1; 971 swap_pager_freepage(m[i]); 972 if (last == count) 973 last = i; 974 } 975 } 976 977 count = last; 978 if (first != 0) { 979 for (i = first; i < count; i++) { 980 m[i - first] = m[i]; 981 reqaddr[i - first] = reqaddr[i]; 982 off[i - first] = off[i]; 983 } 984 count -= first; 985 reqpage -= first; 986 } 987 ++swb[reqpage]->swb_locked; 988 989 /* 990 * at this point: "m" is a pointer to the array of vm_page_t for 991 * paging I/O "count" is the number of vm_page_t entries represented 992 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index 993 * into "m" for the page actually faulted 994 */ 995 996 spc = NULL; /* we might not use an spc data structure */ 997 998 if (count == 1) { 999 /* 1000 * if a kva has not been allocated, we can only do a one page 1001 * transfer, so we free the other pages that might have been 1002 * allocated by vm_fault. 1003 */ 1004 swap_pager_ridpages(m, count, reqpage); 1005 m[0] = m[reqpage]; 1006 reqaddr[0] = reqaddr[reqpage]; 1007 1008 count = 1; 1009 reqpage = 0; 1010 /* 1011 * get a swap pager clean data structure, block until we get 1012 * it 1013 */ 1014 if (swap_pager_free.tqh_first == NULL) { 1015 s = splbio(); 1016 if (curproc == pageproc) 1017 (void) swap_pager_clean(); 1018 else 1019 wakeup((caddr_t) &vm_pages_needed); 1020 while (swap_pager_free.tqh_first == NULL) { 1021 swap_pager_needflags |= SWAP_FREE_NEEDED; 1022 tsleep((caddr_t) &swap_pager_free, 1023 PVM, "swpfre", 0); 1024 if (curproc == pageproc) 1025 (void) swap_pager_clean(); 1026 else 1027 wakeup((caddr_t) &vm_pages_needed); 1028 } 1029 splx(s); 1030 } 1031 spc = swap_pager_free.tqh_first; 1032 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1033 kva = spc->spc_kva; 1034 bp = spc->spc_bp; 1035 bzero(bp, sizeof *bp); 1036 bp->b_spc = spc; 1037 bp->b_vnbufs.le_next = NOLIST; 1038 } else { 1039 /* 1040 * Get a swap buffer header to perform the IO 1041 */ 1042 bp = getpbuf(); 1043 kva = (vm_offset_t) bp->b_data; 1044 } 1045 1046 /* 1047 * map our page(s) into kva for input 1048 */ 1049 pmap_qenter(kva, m, count); 1050 1051 bp->b_flags = B_BUSY | B_READ | B_CALL; 1052 bp->b_iodone = swap_pager_iodone1; 1053 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1054 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1055 crhold(bp->b_rcred); 1056 crhold(bp->b_wcred); 1057 bp->b_un.b_addr = (caddr_t) kva; 1058 bp->b_blkno = reqaddr[0]; 1059 bp->b_bcount = PAGE_SIZE * count; 1060 bp->b_bufsize = PAGE_SIZE * count; 1061 1062 pbgetvp(swapdev_vp, bp); 1063 swp->sw_piip++; 1064 1065 cnt.v_swapin++; 1066 cnt.v_swappgsin += count; 1067 /* 1068 * perform the I/O 1069 */ 1070 VOP_STRATEGY(bp); 1071 1072 /* 1073 * wait for the sync I/O to complete 1074 */ 1075 s = splbio(); 1076 while ((bp->b_flags & B_DONE) == 0) { 1077 tsleep((caddr_t) bp, PVM, "swread", 0); 1078 } 1079 1080 if (bp->b_flags & B_ERROR) { 1081 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n", 1082 bp->b_blkno, bp->b_bcount, bp->b_error); 1083 rv = VM_PAGER_ERROR; 1084 } else { 1085 rv = VM_PAGER_OK; 1086 } 1087 1088 --swp->sw_piip; 1089 if (swp->sw_piip == 0) 1090 wakeup((caddr_t) swp); 1091 1092 1093 /* 1094 * relpbuf does this, but we maintain our own buffer list also... 1095 */ 1096 if (bp->b_vp) 1097 pbrelvp(bp); 1098 1099 splx(s); 1100 --swb[reqpage]->swb_locked; 1101 1102 /* 1103 * remove the mapping for kernel virtual 1104 */ 1105 pmap_qremove(kva, count); 1106 1107 if (spc) { 1108 if (bp->b_flags & B_WANTED) 1109 wakeup((caddr_t) bp); 1110 /* 1111 * if we have used an spc, we need to free it. 1112 */ 1113 if (bp->b_rcred != NOCRED) 1114 crfree(bp->b_rcred); 1115 if (bp->b_wcred != NOCRED) 1116 crfree(bp->b_wcred); 1117 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1118 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1119 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1120 wakeup((caddr_t) &swap_pager_free); 1121 } 1122 } else { 1123 /* 1124 * release the physical I/O buffer 1125 */ 1126 relpbuf(bp); 1127 /* 1128 * finish up input if everything is ok 1129 */ 1130 if (rv == VM_PAGER_OK) { 1131 for (i = 0; i < count; i++) { 1132 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1133 m[i]->dirty = 0; 1134 if (i != reqpage) { 1135 /* 1136 * whether or not to leave the page 1137 * activated is up in the air, but we 1138 * should put the page on a page queue 1139 * somewhere. (it already is in the 1140 * object). After some emperical 1141 * results, it is best to deactivate 1142 * the readahead pages. 1143 */ 1144 if ((i == reqpage - 1) || (i == reqpage + 1)) 1145 vm_page_activate(m[i]); 1146 else 1147 vm_page_deactivate(m[i]); 1148 1149 /* 1150 * just in case someone was asking for 1151 * this page we now tell them that it 1152 * is ok to use 1153 */ 1154 m[i]->valid = VM_PAGE_BITS_ALL; 1155 PAGE_WAKEUP(m[i]); 1156 } 1157 } 1158 /* 1159 * If we're out of swap space, then attempt to free 1160 * some whenever pages are brought in. We must clear 1161 * the clean flag so that the page contents will be 1162 * preserved. 1163 */ 1164 if (swap_pager_full) { 1165 for (i = 0; i < count; i++) { 1166 m[i]->dirty = VM_PAGE_BITS_ALL; 1167 } 1168 _swap_pager_freespace(swp, m[0]->offset + paging_offset, count * PAGE_SIZE); 1169 } 1170 } else { 1171 swap_pager_ridpages(m, count, reqpage); 1172 } 1173 } 1174 return (rv); 1175 } 1176 1177 int 1178 swap_pager_output(swp, m, count, flags, rtvals) 1179 register sw_pager_t swp; 1180 vm_page_t *m; 1181 int count; 1182 int flags; 1183 int *rtvals; 1184 { 1185 register struct buf *bp; 1186 sw_blk_t swb[count]; 1187 register int s; 1188 int i, j, ix; 1189 boolean_t rv; 1190 vm_offset_t kva, off, foff; 1191 swp_clean_t spc; 1192 vm_offset_t paging_offset; 1193 vm_object_t object; 1194 int reqaddr[count]; 1195 int failed; 1196 1197 if (vm_swap_size) 1198 no_swap_space = 0; 1199 if (no_swap_space) { 1200 for (i = 0; i < count; i++) 1201 rtvals[i] = VM_PAGER_FAIL; 1202 return VM_PAGER_FAIL; 1203 } 1204 spc = NULL; 1205 1206 object = m[0]->object; 1207 paging_offset = object->paging_offset; 1208 1209 failed = 0; 1210 for (j = 0; j < count; j++) { 1211 foff = m[j]->offset + paging_offset; 1212 ix = swap_pager_block_index(swp, foff); 1213 swb[j] = 0; 1214 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 1215 rtvals[j] = VM_PAGER_FAIL; 1216 failed = 1; 1217 continue; 1218 } else { 1219 rtvals[j] = VM_PAGER_OK; 1220 } 1221 swb[j] = &swp->sw_blocks[ix]; 1222 ++swb[j]->swb_locked; 1223 if (failed) { 1224 rtvals[j] = VM_PAGER_FAIL; 1225 continue; 1226 } 1227 off = swap_pager_block_offset(swp, foff); 1228 reqaddr[j] = swb[j]->swb_block[off]; 1229 if (reqaddr[j] == SWB_EMPTY) { 1230 int blk; 1231 int tries; 1232 int ntoget; 1233 1234 tries = 0; 1235 s = splbio(); 1236 1237 /* 1238 * if any other pages have been allocated in this 1239 * block, we only try to get one page. 1240 */ 1241 for (i = 0; i < SWB_NPAGES; i++) { 1242 if (swb[j]->swb_block[i] != SWB_EMPTY) 1243 break; 1244 } 1245 1246 1247 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1248 /* 1249 * this code is alittle conservative, but works (the 1250 * intent of this code is to allocate small chunks for 1251 * small objects) 1252 */ 1253 if ((m[j]->offset == 0) && (ntoget * PAGE_SIZE > object->size)) { 1254 ntoget = (object->size + (PAGE_SIZE - 1)) / PAGE_SIZE; 1255 } 1256 retrygetspace: 1257 if (!swap_pager_full && ntoget > 1 && 1258 swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) { 1259 1260 for (i = 0; i < ntoget; i++) { 1261 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1262 swb[j]->swb_valid = 0; 1263 } 1264 1265 reqaddr[j] = swb[j]->swb_block[off]; 1266 } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE), 1267 &swb[j]->swb_block[off])) { 1268 /* 1269 * if the allocation has failed, we try to 1270 * reclaim space and retry. 1271 */ 1272 if (++tries == 1) { 1273 swap_pager_reclaim(); 1274 goto retrygetspace; 1275 } 1276 rtvals[j] = VM_PAGER_AGAIN; 1277 failed = 1; 1278 swap_pager_full = 1; 1279 } else { 1280 reqaddr[j] = swb[j]->swb_block[off]; 1281 swb[j]->swb_valid &= ~(1 << off); 1282 } 1283 splx(s); 1284 } 1285 } 1286 1287 /* 1288 * search forwards for the last contiguous page to transfer 1289 */ 1290 failed = 0; 1291 for (i = 0; i < count; i++) { 1292 if (failed || (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) || 1293 (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 1294 (rtvals[i] != VM_PAGER_OK)) { 1295 failed = 1; 1296 if (rtvals[i] == VM_PAGER_OK) 1297 rtvals[i] = VM_PAGER_AGAIN; 1298 } 1299 } 1300 1301 for (i = 0; i < count; i++) { 1302 if (rtvals[i] != VM_PAGER_OK) { 1303 if (swb[i]) 1304 --swb[i]->swb_locked; 1305 } 1306 } 1307 1308 for (i = 0; i < count; i++) 1309 if (rtvals[i] != VM_PAGER_OK) 1310 break; 1311 1312 if (i == 0) { 1313 return VM_PAGER_AGAIN; 1314 } 1315 count = i; 1316 for (i = 0; i < count; i++) { 1317 if (reqaddr[i] == SWB_EMPTY) 1318 printf("I/O to empty block????\n"); 1319 } 1320 1321 /* 1322 * */ 1323 1324 /* 1325 * For synchronous writes, we clean up all completed async pageouts. 1326 */ 1327 if ((flags & B_ASYNC) == 0) { 1328 swap_pager_clean(); 1329 } 1330 kva = 0; 1331 1332 /* 1333 * we allocate a new kva for transfers > 1 page but for transfers == 1 1334 * page, the swap_pager_free list contains entries that have 1335 * pre-allocated kva's (for efficiency). NOTE -- we do not use the 1336 * physical buffer pool or the preallocated associated kva's because 1337 * of the potential for deadlock. This is very subtile -- but 1338 * deadlocks or resource contention must be avoided on pageouts -- or 1339 * your system will sleep (forever) !!! 1340 */ 1341 /* 1342 if ( count > 1) { 1343 kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE); 1344 if( !kva) { 1345 for (i = 0; i < count; i++) { 1346 if( swb[i]) 1347 --swb[i]->swb_locked; 1348 rtvals[i] = VM_PAGER_AGAIN; 1349 } 1350 return VM_PAGER_AGAIN; 1351 } 1352 } 1353 */ 1354 1355 /* 1356 * get a swap pager clean data structure, block until we get it 1357 */ 1358 if (swap_pager_free.tqh_first == NULL || swap_pager_free.tqh_first->spc_list.tqe_next == NULL || swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { 1359 s = splbio(); 1360 if (curproc == pageproc) { 1361 (void) swap_pager_clean(); 1362 /* 1363 splx(s); 1364 return VM_PAGER_AGAIN; 1365 */ 1366 } else 1367 wakeup((caddr_t) &vm_pages_needed); 1368 while (swap_pager_free.tqh_first == NULL || swap_pager_free.tqh_first->spc_list.tqe_next == NULL || swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { 1369 if (curproc == pageproc && 1370 (cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min) 1371 wakeup((caddr_t) &cnt.v_free_count); 1372 1373 swap_pager_needflags |= SWAP_FREE_NEEDED; 1374 tsleep((caddr_t) &swap_pager_free, 1375 PVM, "swpfre", 0); 1376 if (curproc == pageproc) 1377 (void) swap_pager_clean(); 1378 else 1379 wakeup((caddr_t) &vm_pages_needed); 1380 } 1381 splx(s); 1382 } 1383 spc = swap_pager_free.tqh_first; 1384 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1385 1386 kva = spc->spc_kva; 1387 1388 /* 1389 * map our page(s) into kva for I/O 1390 */ 1391 pmap_qenter(kva, m, count); 1392 1393 /* 1394 * get the base I/O offset into the swap file 1395 */ 1396 for (i = 0; i < count; i++) { 1397 foff = m[i]->offset + paging_offset; 1398 off = swap_pager_block_offset(swp, foff); 1399 /* 1400 * set the valid bit 1401 */ 1402 swb[i]->swb_valid |= (1 << off); 1403 /* 1404 * and unlock the data structure 1405 */ 1406 --swb[i]->swb_locked; 1407 } 1408 1409 /* 1410 * Get a swap buffer header and perform the IO 1411 */ 1412 bp = spc->spc_bp; 1413 bzero(bp, sizeof *bp); 1414 bp->b_spc = spc; 1415 bp->b_vnbufs.le_next = NOLIST; 1416 1417 bp->b_flags = B_BUSY; 1418 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1419 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1420 if (bp->b_rcred != NOCRED) 1421 crhold(bp->b_rcred); 1422 if (bp->b_wcred != NOCRED) 1423 crhold(bp->b_wcred); 1424 bp->b_data = (caddr_t) kva; 1425 bp->b_blkno = reqaddr[0]; 1426 pbgetvp(swapdev_vp, bp); 1427 1428 bp->b_bcount = PAGE_SIZE * count; 1429 bp->b_bufsize = PAGE_SIZE * count; 1430 swapdev_vp->v_numoutput++; 1431 1432 /* 1433 * If this is an async write we set up additional buffer fields and 1434 * place a "cleaning" entry on the inuse queue. 1435 */ 1436 s = splbio(); 1437 if (flags & B_ASYNC) { 1438 spc->spc_flags = 0; 1439 spc->spc_swp = swp; 1440 for (i = 0; i < count; i++) 1441 spc->spc_m[i] = m[i]; 1442 spc->spc_count = count; 1443 /* 1444 * the completion routine for async writes 1445 */ 1446 bp->b_flags |= B_CALL; 1447 bp->b_iodone = swap_pager_iodone; 1448 bp->b_dirtyoff = 0; 1449 bp->b_dirtyend = bp->b_bcount; 1450 swp->sw_poip++; 1451 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1452 } else { 1453 swp->sw_poip++; 1454 bp->b_flags |= B_CALL; 1455 bp->b_iodone = swap_pager_iodone1; 1456 } 1457 1458 cnt.v_swapout++; 1459 cnt.v_swappgsout += count; 1460 /* 1461 * perform the I/O 1462 */ 1463 VOP_STRATEGY(bp); 1464 if ((flags & (B_READ | B_ASYNC)) == B_ASYNC) { 1465 if ((bp->b_flags & B_DONE) == B_DONE) { 1466 swap_pager_clean(); 1467 } 1468 splx(s); 1469 for (i = 0; i < count; i++) { 1470 rtvals[i] = VM_PAGER_PEND; 1471 } 1472 return VM_PAGER_PEND; 1473 } 1474 /* 1475 * wait for the sync I/O to complete 1476 */ 1477 while ((bp->b_flags & B_DONE) == 0) { 1478 tsleep((caddr_t) bp, PVM, "swwrt", 0); 1479 } 1480 if (bp->b_flags & B_ERROR) { 1481 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n", 1482 bp->b_blkno, bp->b_bcount, bp->b_error); 1483 rv = VM_PAGER_ERROR; 1484 } else { 1485 rv = VM_PAGER_OK; 1486 } 1487 1488 --swp->sw_poip; 1489 if (swp->sw_poip == 0) 1490 wakeup((caddr_t) swp); 1491 1492 if (bp->b_vp) 1493 pbrelvp(bp); 1494 if (bp->b_flags & B_WANTED) 1495 wakeup((caddr_t) bp); 1496 1497 splx(s); 1498 1499 /* 1500 * remove the mapping for kernel virtual 1501 */ 1502 pmap_qremove(kva, count); 1503 1504 /* 1505 * if we have written the page, then indicate that the page is clean. 1506 */ 1507 if (rv == VM_PAGER_OK) { 1508 for (i = 0; i < count; i++) { 1509 if (rtvals[i] == VM_PAGER_OK) { 1510 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1511 m[i]->dirty = 0; 1512 /* 1513 * optimization, if a page has been read 1514 * during the pageout process, we activate it. 1515 */ 1516 if ((m[i]->flags & PG_ACTIVE) == 0 && 1517 ((m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) 1518 vm_page_activate(m[i]); 1519 } 1520 } 1521 } else { 1522 for (i = 0; i < count; i++) { 1523 rtvals[i] = rv; 1524 } 1525 } 1526 1527 if (bp->b_rcred != NOCRED) 1528 crfree(bp->b_rcred); 1529 if (bp->b_wcred != NOCRED) 1530 crfree(bp->b_wcred); 1531 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1532 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1533 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1534 wakeup((caddr_t) &swap_pager_free); 1535 } 1536 return (rv); 1537 } 1538 1539 boolean_t 1540 swap_pager_clean() 1541 { 1542 register swp_clean_t spc, tspc; 1543 register int s; 1544 1545 tspc = NULL; 1546 if (swap_pager_done.tqh_first == NULL) 1547 return FALSE; 1548 for (;;) { 1549 s = splbio(); 1550 /* 1551 * Look up and removal from done list must be done at splbio() 1552 * to avoid conflicts with swap_pager_iodone. 1553 */ 1554 while ((spc = swap_pager_done.tqh_first) != 0) { 1555 pmap_qremove(spc->spc_kva, spc->spc_count); 1556 swap_pager_finish(spc); 1557 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1558 goto doclean; 1559 } 1560 1561 /* 1562 * No operations done, thats all we can do for now. 1563 */ 1564 1565 splx(s); 1566 break; 1567 1568 /* 1569 * The desired page was found to be busy earlier in the scan 1570 * but has since completed. 1571 */ 1572 doclean: 1573 if (tspc && tspc == spc) { 1574 tspc = NULL; 1575 } 1576 spc->spc_flags = 0; 1577 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1578 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1579 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1580 wakeup((caddr_t) &swap_pager_free); 1581 } 1582 ++cleandone; 1583 splx(s); 1584 } 1585 1586 return (tspc ? TRUE : FALSE); 1587 } 1588 1589 void 1590 swap_pager_finish(spc) 1591 register swp_clean_t spc; 1592 { 1593 vm_object_t object = spc->spc_m[0]->object; 1594 int i; 1595 1596 if ((object->paging_in_progress -= spc->spc_count) == 0) 1597 thread_wakeup((int) object); 1598 1599 /* 1600 * If no error mark as clean and inform the pmap system. If error, 1601 * mark as dirty so we will try again. (XXX could get stuck doing 1602 * this, should give up after awhile) 1603 */ 1604 if (spc->spc_flags & SPC_ERROR) { 1605 for (i = 0; i < spc->spc_count; i++) { 1606 printf("swap_pager_finish: I/O error, clean of page %lx failed\n", 1607 (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i])); 1608 } 1609 } else { 1610 for (i = 0; i < spc->spc_count; i++) { 1611 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1612 spc->spc_m[i]->dirty = 0; 1613 if ((spc->spc_m[i]->flags & PG_ACTIVE) == 0 && 1614 ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i])))) 1615 vm_page_activate(spc->spc_m[i]); 1616 } 1617 } 1618 1619 1620 for (i = 0; i < spc->spc_count; i++) { 1621 /* 1622 * we wakeup any processes that are waiting on these pages. 1623 */ 1624 PAGE_WAKEUP(spc->spc_m[i]); 1625 } 1626 nswiodone -= spc->spc_count; 1627 1628 return; 1629 } 1630 1631 /* 1632 * swap_pager_iodone 1633 */ 1634 void 1635 swap_pager_iodone(bp) 1636 register struct buf *bp; 1637 { 1638 register swp_clean_t spc; 1639 int s; 1640 1641 s = splbio(); 1642 spc = (swp_clean_t) bp->b_spc; 1643 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1644 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1645 if (bp->b_flags & B_ERROR) { 1646 spc->spc_flags |= SPC_ERROR; 1647 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d", 1648 (bp->b_flags & B_READ) ? "pagein" : "pageout", 1649 bp->b_error, (u_long) bp->b_blkno, bp->b_bcount); 1650 } 1651 /* 1652 if ((bp->b_flags & B_READ) == 0) 1653 vwakeup(bp); 1654 */ 1655 1656 if (bp->b_vp) 1657 pbrelvp(bp); 1658 1659 if (bp->b_flags & B_WANTED) 1660 wakeup((caddr_t) bp); 1661 1662 if (bp->b_rcred != NOCRED) 1663 crfree(bp->b_rcred); 1664 if (bp->b_wcred != NOCRED) 1665 crfree(bp->b_wcred); 1666 1667 nswiodone += spc->spc_count; 1668 if (--spc->spc_swp->sw_poip == 0) { 1669 wakeup((caddr_t) spc->spc_swp); 1670 } 1671 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1672 swap_pager_inuse.tqh_first == 0) { 1673 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1674 wakeup((caddr_t) &swap_pager_free); 1675 wakeup((caddr_t) &vm_pages_needed); 1676 } 1677 if (vm_pageout_pages_needed) { 1678 wakeup((caddr_t) &vm_pageout_pages_needed); 1679 } 1680 if ((swap_pager_inuse.tqh_first == NULL) || 1681 ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min && 1682 nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) { 1683 wakeup((caddr_t) &vm_pages_needed); 1684 } 1685 splx(s); 1686 } 1687 1688 /* 1689 * return true if any swap control structures can be allocated 1690 */ 1691 int 1692 swap_pager_ready() 1693 { 1694 if (swap_pager_free.tqh_first) 1695 return 1; 1696 else 1697 return 0; 1698 } 1699