1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.39 1995/05/14 03:00:08 davidg Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/proc.h> 56 #include <sys/buf.h> 57 #include <sys/vnode.h> 58 #include <sys/malloc.h> 59 60 #include <miscfs/specfs/specdev.h> 61 #include <sys/rlist.h> 62 63 #include <vm/vm.h> 64 #include <vm/vm_pager.h> 65 #include <vm/vm_page.h> 66 #include <vm/vm_pageout.h> 67 #include <vm/swap_pager.h> 68 #include <vm/vm_kern.h> 69 70 #ifndef NPENDINGIO 71 #define NPENDINGIO 10 72 #endif 73 74 int swap_pager_input __P((sw_pager_t, vm_page_t *, int, int)); 75 int swap_pager_output __P((sw_pager_t, vm_page_t *, int, int, int *)); 76 77 int nswiodone; 78 int swap_pager_full; 79 extern int vm_swap_size; 80 int no_swap_space = 1; 81 struct rlist *swaplist; 82 int nswaplist; 83 84 #define MAX_PAGEOUT_CLUSTER 8 85 86 TAILQ_HEAD(swpclean, swpagerclean); 87 88 typedef struct swpagerclean *swp_clean_t; 89 90 struct swpagerclean { 91 TAILQ_ENTRY(swpagerclean) spc_list; 92 int spc_flags; 93 struct buf *spc_bp; 94 sw_pager_t spc_swp; 95 vm_offset_t spc_kva; 96 int spc_count; 97 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 98 } swcleanlist[NPENDINGIO]; 99 100 101 /* spc_flags values */ 102 #define SPC_ERROR 0x01 103 104 #define SWB_EMPTY (-1) 105 106 struct swpclean swap_pager_done; /* list of completed page cleans */ 107 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 108 struct swpclean swap_pager_free; /* list of free pager clean structs */ 109 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 110 struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */ 111 112 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 113 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2 114 int swap_pager_needflags; 115 116 struct pagerlst *swp_qs[] = { 117 &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0 118 }; 119 120 int swap_pager_putmulti(); 121 122 struct pagerops swappagerops = { 123 swap_pager_init, 124 swap_pager_alloc, 125 swap_pager_dealloc, 126 swap_pager_getpage, 127 swap_pager_getmulti, 128 swap_pager_putpage, 129 swap_pager_putmulti, 130 swap_pager_haspage 131 }; 132 133 int npendingio = NPENDINGIO; 134 int require_swap_init; 135 void swap_pager_finish(); 136 int dmmin, dmmax; 137 138 static inline void 139 swapsizecheck() 140 { 141 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) { 142 if (swap_pager_full == 0) 143 printf("swap_pager: out of space\n"); 144 swap_pager_full = 1; 145 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE)) 146 swap_pager_full = 0; 147 } 148 149 void 150 swap_pager_init() 151 { 152 dfltpagerops = &swappagerops; 153 154 TAILQ_INIT(&swap_pager_list); 155 TAILQ_INIT(&swap_pager_un_list); 156 157 /* 158 * Initialize clean lists 159 */ 160 TAILQ_INIT(&swap_pager_inuse); 161 TAILQ_INIT(&swap_pager_done); 162 TAILQ_INIT(&swap_pager_free); 163 164 require_swap_init = 1; 165 166 /* 167 * Calculate the swap allocation constants. 168 */ 169 170 dmmin = CLBYTES / DEV_BSIZE; 171 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2; 172 173 } 174 175 /* 176 * Allocate a pager structure and associated resources. 177 * Note that if we are called from the pageout daemon (handle == NULL) 178 * we should not wait for memory as it could resulting in deadlock. 179 */ 180 vm_pager_t 181 swap_pager_alloc(handle, size, prot, offset) 182 void *handle; 183 register vm_size_t size; 184 vm_prot_t prot; 185 vm_offset_t offset; 186 { 187 register vm_pager_t pager; 188 register sw_pager_t swp; 189 int waitok; 190 int i, j; 191 192 if (require_swap_init) { 193 swp_clean_t spc; 194 struct buf *bp; 195 196 /* 197 * kva's are allocated here so that we dont need to keep doing 198 * kmem_alloc pageables at runtime 199 */ 200 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) { 201 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER); 202 if (!spc->spc_kva) { 203 break; 204 } 205 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL); 206 if (!spc->spc_bp) { 207 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 208 break; 209 } 210 spc->spc_flags = 0; 211 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 212 } 213 require_swap_init = 0; 214 if (size == 0) 215 return (NULL); 216 } 217 /* 218 * If this is a "named" anonymous region, look it up and return the 219 * appropriate pager if it exists. 220 */ 221 if (handle) { 222 pager = vm_pager_lookup(&swap_pager_list, handle); 223 if (pager != NULL) { 224 /* 225 * Use vm_object_lookup to gain a reference to the 226 * object and also to remove from the object cache. 227 */ 228 if (vm_object_lookup(pager) == NULL) 229 panic("swap_pager_alloc: bad object"); 230 return (pager); 231 } 232 } 233 /* 234 * Pager doesn't exist, allocate swap management resources and 235 * initialize. 236 */ 237 waitok = handle ? M_WAITOK : M_KERNEL; 238 pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, waitok); 239 if (pager == NULL) 240 return (NULL); 241 swp = (sw_pager_t) malloc(sizeof *swp, M_VMPGDATA, waitok); 242 if (swp == NULL) { 243 free((caddr_t) pager, M_VMPAGER); 244 return (NULL); 245 } 246 size = round_page(size); 247 swp->sw_osize = size; 248 swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES * PAGE_SIZE); 249 swp->sw_blocks = (sw_blk_t) 250 malloc(swp->sw_nblocks * sizeof(*swp->sw_blocks), 251 M_VMPGDATA, waitok); 252 if (swp->sw_blocks == NULL) { 253 free((caddr_t) swp, M_VMPGDATA); 254 free((caddr_t) pager, M_VMPAGER); 255 return (NULL); 256 } 257 for (i = 0; i < swp->sw_nblocks; i++) { 258 swp->sw_blocks[i].swb_valid = 0; 259 swp->sw_blocks[i].swb_locked = 0; 260 for (j = 0; j < SWB_NPAGES; j++) 261 swp->sw_blocks[i].swb_block[j] = SWB_EMPTY; 262 } 263 264 swp->sw_poip = 0; 265 swp->sw_allocsize = 0; 266 if (handle) { 267 vm_object_t object; 268 269 swp->sw_flags = SW_NAMED; 270 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 271 /* 272 * Consistant with other pagers: return with object 273 * referenced. Can't do this with handle == NULL since it 274 * might be the pageout daemon calling. 275 */ 276 object = vm_object_allocate(offset + size); 277 object->flags &= ~OBJ_INTERNAL; 278 vm_object_enter(object, pager); 279 object->pager = pager; 280 } else { 281 swp->sw_flags = 0; 282 TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list); 283 } 284 pager->pg_handle = handle; 285 pager->pg_ops = &swappagerops; 286 pager->pg_type = PG_SWAP; 287 pager->pg_data = (caddr_t) swp; 288 289 return (pager); 290 } 291 292 /* 293 * returns disk block associated with pager and offset 294 * additionally, as a side effect returns a flag indicating 295 * if the block has been written 296 */ 297 298 inline static int * 299 swap_pager_diskaddr(swp, offset, valid) 300 sw_pager_t swp; 301 vm_offset_t offset; 302 int *valid; 303 { 304 register sw_blk_t swb; 305 int ix; 306 307 if (valid) 308 *valid = 0; 309 ix = offset / (SWB_NPAGES * PAGE_SIZE); 310 if ((swp->sw_blocks == NULL) || (ix >= swp->sw_nblocks) || 311 (offset >= swp->sw_osize)) { 312 return (FALSE); 313 } 314 swb = &swp->sw_blocks[ix]; 315 ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE; 316 if (valid) 317 *valid = swb->swb_valid & (1 << ix); 318 return &swb->swb_block[ix]; 319 } 320 321 /* 322 * Utility routine to set the valid (written) bit for 323 * a block associated with a pager and offset 324 */ 325 static void 326 swap_pager_setvalid(swp, offset, valid) 327 sw_pager_t swp; 328 vm_offset_t offset; 329 int valid; 330 { 331 register sw_blk_t swb; 332 int ix; 333 334 ix = offset / (SWB_NPAGES * PAGE_SIZE); 335 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) 336 return; 337 338 swb = &swp->sw_blocks[ix]; 339 ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE; 340 if (valid) 341 swb->swb_valid |= (1 << ix); 342 else 343 swb->swb_valid &= ~(1 << ix); 344 return; 345 } 346 347 /* 348 * this routine allocates swap space with a fragmentation 349 * minimization policy. 350 */ 351 int 352 swap_pager_getswapspace(sw_pager_t swp, unsigned amount, unsigned *rtval) 353 { 354 vm_swap_size -= amount; 355 if (!rlist_alloc(&swaplist, amount, rtval)) { 356 vm_swap_size += amount; 357 return 0; 358 } else { 359 swapsizecheck(); 360 swp->sw_allocsize += amount; 361 return 1; 362 } 363 } 364 365 /* 366 * this routine frees swap space with a fragmentation 367 * minimization policy. 368 */ 369 void 370 swap_pager_freeswapspace(sw_pager_t swp, unsigned from, unsigned to) 371 { 372 rlist_free(&swaplist, from, to); 373 vm_swap_size += (to - from) + 1; 374 swp->sw_allocsize -= (to - from) + 1; 375 swapsizecheck(); 376 } 377 /* 378 * this routine frees swap blocks from a specified pager 379 */ 380 void 381 _swap_pager_freespace(swp, start, size) 382 sw_pager_t swp; 383 vm_offset_t start; 384 vm_offset_t size; 385 { 386 vm_offset_t i; 387 int s; 388 389 s = splbio(); 390 for (i = start; i < round_page(start + size); i += PAGE_SIZE) { 391 int valid; 392 int *addr = swap_pager_diskaddr(swp, i, &valid); 393 394 if (addr && *addr != SWB_EMPTY) { 395 swap_pager_freeswapspace(swp, *addr, *addr + btodb(PAGE_SIZE) - 1); 396 if (valid) { 397 swap_pager_setvalid(swp, i, 0); 398 } 399 *addr = SWB_EMPTY; 400 } 401 } 402 splx(s); 403 } 404 405 void 406 swap_pager_freespace(pager, start, size) 407 vm_pager_t pager; 408 vm_offset_t start; 409 vm_offset_t size; 410 { 411 _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size); 412 } 413 414 static void 415 swap_pager_free_swap(swp) 416 sw_pager_t swp; 417 { 418 register int i, j; 419 register sw_blk_t bp; 420 int first_block=0, block_count=0; 421 int s; 422 /* 423 * Free left over swap blocks 424 */ 425 s = splbio(); 426 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) { 427 for (j = 0; j < SWB_NPAGES; j++) { 428 if (bp->swb_block[j] != SWB_EMPTY) { 429 /* 430 * initially the length of the run is zero 431 */ 432 if( block_count == 0) { 433 first_block = bp->swb_block[j]; 434 block_count = btodb(PAGE_SIZE); 435 bp->swb_block[j] = SWB_EMPTY; 436 /* 437 * if the new block can be included into the current run 438 */ 439 } else if( bp->swb_block[j] == first_block + block_count) { 440 block_count += btodb(PAGE_SIZE); 441 bp->swb_block[j] = SWB_EMPTY; 442 /* 443 * terminate the previous run, and start a new one 444 */ 445 } else { 446 swap_pager_freeswapspace(swp, first_block, 447 (unsigned) first_block + block_count - 1); 448 first_block = bp->swb_block[j]; 449 block_count = btodb(PAGE_SIZE); 450 bp->swb_block[j] = SWB_EMPTY; 451 } 452 } 453 } 454 } 455 456 if( block_count) { 457 swap_pager_freeswapspace(swp, first_block, 458 (unsigned) first_block + block_count - 1); 459 } 460 splx(s); 461 } 462 463 464 /* 465 * swap_pager_reclaim frees up over-allocated space from all pagers 466 * this eliminates internal fragmentation due to allocation of space 467 * for segments that are never swapped to. It has been written so that 468 * it does not block until the rlist_free operation occurs; it keeps 469 * the queues consistant. 470 */ 471 472 /* 473 * Maximum number of blocks (pages) to reclaim per pass 474 */ 475 #define MAXRECLAIM 128 476 477 void 478 swap_pager_reclaim() 479 { 480 vm_pager_t p; 481 sw_pager_t swp; 482 int i, j, k; 483 int s; 484 int reclaimcount; 485 static struct { 486 int address; 487 sw_pager_t pager; 488 } reclaims[MAXRECLAIM]; 489 static int in_reclaim; 490 491 /* 492 * allow only one process to be in the swap_pager_reclaim subroutine 493 */ 494 s = splbio(); 495 if (in_reclaim) { 496 tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0); 497 splx(s); 498 return; 499 } 500 in_reclaim = 1; 501 reclaimcount = 0; 502 503 /* for each pager queue */ 504 for (k = 0; swp_qs[k]; k++) { 505 506 p = swp_qs[k]->tqh_first; 507 while (p && (reclaimcount < MAXRECLAIM)) { 508 509 /* 510 * see if any blocks associated with a pager has been 511 * allocated but not used (written) 512 */ 513 swp = (sw_pager_t) p->pg_data; 514 for (i = 0; i < swp->sw_nblocks; i++) { 515 sw_blk_t swb = &swp->sw_blocks[i]; 516 517 if (swb->swb_locked) 518 continue; 519 for (j = 0; j < SWB_NPAGES; j++) { 520 if (swb->swb_block[j] != SWB_EMPTY && 521 (swb->swb_valid & (1 << j)) == 0) { 522 reclaims[reclaimcount].address = swb->swb_block[j]; 523 reclaims[reclaimcount++].pager = swp; 524 swb->swb_block[j] = SWB_EMPTY; 525 if (reclaimcount >= MAXRECLAIM) 526 goto rfinished; 527 } 528 } 529 } 530 p = p->pg_list.tqe_next; 531 } 532 } 533 534 rfinished: 535 536 /* 537 * free the blocks that have been added to the reclaim list 538 */ 539 for (i = 0; i < reclaimcount; i++) { 540 swap_pager_freeswapspace(reclaims[i].pager, reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1); 541 } 542 splx(s); 543 in_reclaim = 0; 544 wakeup((caddr_t) &in_reclaim); 545 } 546 547 548 /* 549 * swap_pager_copy copies blocks from one pager to another and 550 * destroys the source pager 551 */ 552 553 void 554 swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset) 555 vm_pager_t srcpager; 556 vm_offset_t srcoffset; 557 vm_pager_t dstpager; 558 vm_offset_t dstoffset; 559 vm_offset_t offset; 560 { 561 sw_pager_t srcswp, dstswp; 562 vm_offset_t i; 563 int origsize; 564 int s; 565 566 if (vm_swap_size) 567 no_swap_space = 0; 568 569 if (no_swap_space) 570 return; 571 572 srcswp = (sw_pager_t) srcpager->pg_data; 573 origsize = srcswp->sw_allocsize; 574 dstswp = (sw_pager_t) dstpager->pg_data; 575 576 /* 577 * remove the source pager from the swap_pager internal queue 578 */ 579 s = splbio(); 580 if (srcswp->sw_flags & SW_NAMED) { 581 TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list); 582 srcswp->sw_flags &= ~SW_NAMED; 583 } else { 584 TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list); 585 } 586 587 while (srcswp->sw_poip) { 588 tsleep((caddr_t) srcswp, PVM, "spgout", 0); 589 } 590 splx(s); 591 592 /* 593 * clean all of the pages that are currently active and finished 594 */ 595 (void) swap_pager_clean(); 596 597 s = splbio(); 598 /* 599 * transfer source to destination 600 */ 601 for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) { 602 int srcvalid, dstvalid; 603 int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset, 604 &srcvalid); 605 int *dstaddrp; 606 607 /* 608 * see if the source has space allocated 609 */ 610 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 611 /* 612 * if the source is valid and the dest has no space, 613 * then copy the allocation from the srouce to the 614 * dest. 615 */ 616 if (srcvalid) { 617 dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, 618 &dstvalid); 619 /* 620 * if the dest already has a valid block, 621 * deallocate the source block without 622 * copying. 623 */ 624 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 625 swap_pager_freeswapspace(dstswp, *dstaddrp, 626 *dstaddrp + btodb(PAGE_SIZE) - 1); 627 *dstaddrp = SWB_EMPTY; 628 } 629 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 630 *dstaddrp = *srcaddrp; 631 *srcaddrp = SWB_EMPTY; 632 dstswp->sw_allocsize += btodb(PAGE_SIZE); 633 srcswp->sw_allocsize -= btodb(PAGE_SIZE); 634 swap_pager_setvalid(dstswp, i + dstoffset, 1); 635 } 636 } 637 /* 638 * if the source is not empty at this point, then 639 * deallocate the space. 640 */ 641 if (*srcaddrp != SWB_EMPTY) { 642 swap_pager_freeswapspace(srcswp, *srcaddrp, 643 *srcaddrp + btodb(PAGE_SIZE) - 1); 644 *srcaddrp = SWB_EMPTY; 645 } 646 } 647 } 648 splx(s); 649 650 /* 651 * Free left over swap blocks 652 */ 653 swap_pager_free_swap(srcswp); 654 655 if( srcswp->sw_allocsize) 656 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n", srcswp->sw_allocsize, origsize); 657 free((caddr_t) srcswp->sw_blocks, M_VMPGDATA); 658 srcswp->sw_blocks = 0; 659 free((caddr_t) srcswp, M_VMPGDATA); 660 srcpager->pg_data = 0; 661 free((caddr_t) srcpager, M_VMPAGER); 662 663 return; 664 } 665 666 void 667 swap_pager_dealloc(pager) 668 vm_pager_t pager; 669 { 670 register sw_pager_t swp; 671 int s; 672 673 /* 674 * Remove from list right away so lookups will fail if we block for 675 * pageout completion. 676 */ 677 s = splbio(); 678 swp = (sw_pager_t) pager->pg_data; 679 if (swp->sw_flags & SW_NAMED) { 680 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 681 swp->sw_flags &= ~SW_NAMED; 682 } else { 683 TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list); 684 } 685 /* 686 * Wait for all pageouts to finish and remove all entries from 687 * cleaning list. 688 */ 689 690 while (swp->sw_poip) { 691 tsleep((caddr_t) swp, PVM, "swpout", 0); 692 } 693 splx(s); 694 695 696 (void) swap_pager_clean(); 697 698 /* 699 * Free left over swap blocks 700 */ 701 swap_pager_free_swap(swp); 702 703 if( swp->sw_allocsize) 704 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n", swp->sw_allocsize); 705 /* 706 * Free swap management resources 707 */ 708 free((caddr_t) swp->sw_blocks, M_VMPGDATA); 709 swp->sw_blocks = 0; 710 free((caddr_t) swp, M_VMPGDATA); 711 pager->pg_data = 0; 712 free((caddr_t) pager, M_VMPAGER); 713 } 714 715 /* 716 * swap_pager_getmulti can get multiple pages. 717 */ 718 int 719 swap_pager_getmulti(pager, m, count, reqpage, sync) 720 vm_pager_t pager; 721 vm_page_t *m; 722 int count; 723 int reqpage; 724 boolean_t sync; 725 { 726 if (reqpage >= count) 727 panic("swap_pager_getmulti: reqpage >= count"); 728 return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage); 729 } 730 731 /* 732 * swap_pager_getpage gets individual pages 733 */ 734 int 735 swap_pager_getpage(pager, m, sync) 736 vm_pager_t pager; 737 vm_page_t m; 738 boolean_t sync; 739 { 740 vm_page_t marray[1]; 741 742 marray[0] = m; 743 return swap_pager_input((sw_pager_t) pager->pg_data, marray, 1, 0); 744 } 745 746 int 747 swap_pager_putmulti(pager, m, c, sync, rtvals) 748 vm_pager_t pager; 749 vm_page_t *m; 750 int c; 751 boolean_t sync; 752 int *rtvals; 753 { 754 int flags; 755 756 if (pager == NULL) { 757 (void) swap_pager_clean(); 758 return VM_PAGER_OK; 759 } 760 flags = B_WRITE; 761 if (!sync) 762 flags |= B_ASYNC; 763 764 return swap_pager_output((sw_pager_t) pager->pg_data, m, c, flags, rtvals); 765 } 766 767 /* 768 * swap_pager_putpage writes individual pages 769 */ 770 int 771 swap_pager_putpage(pager, m, sync) 772 vm_pager_t pager; 773 vm_page_t m; 774 boolean_t sync; 775 { 776 int flags; 777 vm_page_t marray[1]; 778 int rtvals[1]; 779 780 781 if (pager == NULL) { 782 (void) swap_pager_clean(); 783 return VM_PAGER_OK; 784 } 785 marray[0] = m; 786 flags = B_WRITE; 787 if (!sync) 788 flags |= B_ASYNC; 789 790 swap_pager_output((sw_pager_t) pager->pg_data, marray, 1, flags, rtvals); 791 792 return rtvals[0]; 793 } 794 795 static inline int 796 const 797 swap_pager_block_index(swp, offset) 798 sw_pager_t swp; 799 vm_offset_t offset; 800 { 801 return (offset / (SWB_NPAGES * PAGE_SIZE)); 802 } 803 804 static inline int 805 const 806 swap_pager_block_offset(swp, offset) 807 sw_pager_t swp; 808 vm_offset_t offset; 809 { 810 return ((offset % (PAGE_SIZE * SWB_NPAGES)) / PAGE_SIZE); 811 } 812 813 /* 814 * _swap_pager_haspage returns TRUE if the pager has data that has 815 * been written out. 816 */ 817 static boolean_t 818 _swap_pager_haspage(swp, offset) 819 sw_pager_t swp; 820 vm_offset_t offset; 821 { 822 register sw_blk_t swb; 823 int ix; 824 825 ix = offset / (SWB_NPAGES * PAGE_SIZE); 826 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 827 return (FALSE); 828 } 829 swb = &swp->sw_blocks[ix]; 830 ix = (offset % (SWB_NPAGES * PAGE_SIZE)) / PAGE_SIZE; 831 if (swb->swb_block[ix] != SWB_EMPTY) { 832 if (swb->swb_valid & (1 << ix)) 833 return TRUE; 834 } 835 return (FALSE); 836 } 837 838 /* 839 * swap_pager_haspage is the externally accessible version of 840 * _swap_pager_haspage above. this routine takes a vm_pager_t 841 * for an argument instead of sw_pager_t. 842 */ 843 boolean_t 844 swap_pager_haspage(pager, offset) 845 vm_pager_t pager; 846 vm_offset_t offset; 847 { 848 return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset); 849 } 850 851 /* 852 * swap_pager_freepage is a convienience routine that clears the busy 853 * bit and deallocates a page. 854 */ 855 static void 856 swap_pager_freepage(m) 857 vm_page_t m; 858 { 859 PAGE_WAKEUP(m); 860 vm_page_free(m); 861 } 862 863 /* 864 * swap_pager_ridpages is a convienience routine that deallocates all 865 * but the required page. this is usually used in error returns that 866 * need to invalidate the "extra" readahead pages. 867 */ 868 static void 869 swap_pager_ridpages(m, count, reqpage) 870 vm_page_t *m; 871 int count; 872 int reqpage; 873 { 874 int i; 875 876 for (i = 0; i < count; i++) 877 if (i != reqpage) 878 swap_pager_freepage(m[i]); 879 } 880 881 /* 882 * swap_pager_iodone1 is the completion routine for both reads and async writes 883 */ 884 void 885 swap_pager_iodone1(bp) 886 struct buf *bp; 887 { 888 bp->b_flags |= B_DONE; 889 bp->b_flags &= ~B_ASYNC; 890 wakeup((caddr_t) bp); 891 } 892 893 894 int 895 swap_pager_input(swp, m, count, reqpage) 896 register sw_pager_t swp; 897 vm_page_t *m; 898 int count, reqpage; 899 { 900 register struct buf *bp; 901 sw_blk_t swb[count]; 902 register int s; 903 int i; 904 boolean_t rv; 905 vm_offset_t kva, off[count]; 906 swp_clean_t spc; 907 vm_offset_t paging_offset; 908 vm_object_t object; 909 int reqaddr[count]; 910 int sequential; 911 912 int first, last; 913 int failed; 914 int reqdskregion; 915 916 object = m[reqpage]->object; 917 paging_offset = object->paging_offset; 918 sequential = (m[reqpage]->offset == (object->last_read + PAGE_SIZE)); 919 /* 920 * First determine if the page exists in the pager if this is a sync 921 * read. This quickly handles cases where we are following shadow 922 * chains looking for the top level object with the page. 923 */ 924 if (swp->sw_blocks == NULL) { 925 swap_pager_ridpages(m, count, reqpage); 926 return (VM_PAGER_FAIL); 927 } 928 for (i = 0; i < count; i++) { 929 vm_offset_t foff = m[i]->offset + paging_offset; 930 int ix = swap_pager_block_index(swp, foff); 931 932 if (ix >= swp->sw_nblocks) { 933 int j; 934 935 if (i <= reqpage) { 936 swap_pager_ridpages(m, count, reqpage); 937 return (VM_PAGER_FAIL); 938 } 939 for (j = i; j < count; j++) { 940 swap_pager_freepage(m[j]); 941 } 942 count = i; 943 break; 944 } 945 swb[i] = &swp->sw_blocks[ix]; 946 off[i] = swap_pager_block_offset(swp, foff); 947 reqaddr[i] = swb[i]->swb_block[off[i]]; 948 } 949 950 /* make sure that our required input request is existant */ 951 952 if (reqaddr[reqpage] == SWB_EMPTY || 953 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 954 swap_pager_ridpages(m, count, reqpage); 955 return (VM_PAGER_FAIL); 956 } 957 reqdskregion = reqaddr[reqpage] / dmmax; 958 959 /* 960 * search backwards for the first contiguous page to transfer 961 */ 962 failed = 0; 963 first = 0; 964 for (i = reqpage - 1; i >= 0; --i) { 965 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) || 966 (swb[i]->swb_valid & (1 << off[i])) == 0 || 967 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 968 ((reqaddr[i] / dmmax) != reqdskregion)) { 969 failed = 1; 970 swap_pager_freepage(m[i]); 971 if (first == 0) 972 first = i + 1; 973 } 974 } 975 /* 976 * search forwards for the last contiguous page to transfer 977 */ 978 failed = 0; 979 last = count; 980 for (i = reqpage + 1; i < count; i++) { 981 if (failed || (reqaddr[i] == SWB_EMPTY) || 982 (swb[i]->swb_valid & (1 << off[i])) == 0 || 983 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 984 ((reqaddr[i] / dmmax) != reqdskregion)) { 985 failed = 1; 986 swap_pager_freepage(m[i]); 987 if (last == count) 988 last = i; 989 } 990 } 991 992 count = last; 993 if (first != 0) { 994 for (i = first; i < count; i++) { 995 m[i - first] = m[i]; 996 reqaddr[i - first] = reqaddr[i]; 997 off[i - first] = off[i]; 998 } 999 count -= first; 1000 reqpage -= first; 1001 } 1002 ++swb[reqpage]->swb_locked; 1003 1004 /* 1005 * at this point: "m" is a pointer to the array of vm_page_t for 1006 * paging I/O "count" is the number of vm_page_t entries represented 1007 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index 1008 * into "m" for the page actually faulted 1009 */ 1010 1011 spc = NULL; /* we might not use an spc data structure */ 1012 1013 if ((count == 1) && (swap_pager_free.tqh_first != NULL)) { 1014 /* 1015 * if a kva has not been allocated, we can only do a one page 1016 * transfer, so we free the other pages that might have been 1017 * allocated by vm_fault. 1018 */ 1019 swap_pager_ridpages(m, count, reqpage); 1020 m[0] = m[reqpage]; 1021 reqaddr[0] = reqaddr[reqpage]; 1022 1023 count = 1; 1024 reqpage = 0; 1025 /* 1026 * get a swap pager clean data structure, block until we get 1027 * it 1028 */ 1029 if (swap_pager_free.tqh_first == NULL) { 1030 s = splbio(); 1031 if (curproc == pageproc) 1032 (void) swap_pager_clean(); 1033 else 1034 pagedaemon_wakeup(); 1035 while (swap_pager_free.tqh_first == NULL) { 1036 swap_pager_needflags |= SWAP_FREE_NEEDED; 1037 if (curproc == pageproc) 1038 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT; 1039 tsleep((caddr_t) &swap_pager_free, 1040 PVM, "swpfre", 0); 1041 if (curproc == pageproc) 1042 (void) swap_pager_clean(); 1043 else 1044 pagedaemon_wakeup(); 1045 } 1046 splx(s); 1047 } 1048 spc = swap_pager_free.tqh_first; 1049 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1050 kva = spc->spc_kva; 1051 bp = spc->spc_bp; 1052 bzero(bp, sizeof *bp); 1053 bp->b_spc = spc; 1054 bp->b_vnbufs.le_next = NOLIST; 1055 } else { 1056 /* 1057 * Get a swap buffer header to perform the IO 1058 */ 1059 bp = getpbuf(); 1060 kva = (vm_offset_t) bp->b_data; 1061 } 1062 1063 /* 1064 * map our page(s) into kva for input 1065 */ 1066 pmap_qenter(kva, m, count); 1067 1068 bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING; 1069 bp->b_iodone = swap_pager_iodone1; 1070 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1071 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1072 crhold(bp->b_rcred); 1073 crhold(bp->b_wcred); 1074 bp->b_un.b_addr = (caddr_t) kva; 1075 bp->b_blkno = reqaddr[0]; 1076 bp->b_bcount = PAGE_SIZE * count; 1077 bp->b_bufsize = PAGE_SIZE * count; 1078 1079 pbgetvp(swapdev_vp, bp); 1080 swp->sw_piip++; 1081 1082 cnt.v_swapin++; 1083 cnt.v_swappgsin += count; 1084 /* 1085 * perform the I/O 1086 */ 1087 VOP_STRATEGY(bp); 1088 1089 /* 1090 * wait for the sync I/O to complete 1091 */ 1092 s = splbio(); 1093 while ((bp->b_flags & B_DONE) == 0) { 1094 tsleep((caddr_t) bp, PVM, "swread", 0); 1095 } 1096 1097 if (bp->b_flags & B_ERROR) { 1098 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n", 1099 bp->b_blkno, bp->b_bcount, bp->b_error); 1100 rv = VM_PAGER_ERROR; 1101 } else { 1102 rv = VM_PAGER_OK; 1103 } 1104 1105 --swp->sw_piip; 1106 if (swp->sw_piip == 0) 1107 wakeup((caddr_t) swp); 1108 1109 1110 /* 1111 * relpbuf does this, but we maintain our own buffer list also... 1112 */ 1113 if (bp->b_vp) 1114 pbrelvp(bp); 1115 1116 splx(s); 1117 --swb[reqpage]->swb_locked; 1118 1119 /* 1120 * remove the mapping for kernel virtual 1121 */ 1122 pmap_qremove(kva, count); 1123 1124 if (spc) { 1125 m[reqpage]->object->last_read = m[reqpage]->offset; 1126 if (bp->b_flags & B_WANTED) 1127 wakeup((caddr_t) bp); 1128 /* 1129 * if we have used an spc, we need to free it. 1130 */ 1131 if (bp->b_rcred != NOCRED) 1132 crfree(bp->b_rcred); 1133 if (bp->b_wcred != NOCRED) 1134 crfree(bp->b_wcred); 1135 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1136 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1137 wakeup((caddr_t) &swap_pager_free); 1138 } 1139 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1140 pagedaemon_wakeup(); 1141 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1142 } else { 1143 /* 1144 * release the physical I/O buffer 1145 */ 1146 relpbuf(bp); 1147 /* 1148 * finish up input if everything is ok 1149 */ 1150 if (rv == VM_PAGER_OK) { 1151 for (i = 0; i < count; i++) { 1152 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1153 m[i]->dirty = 0; 1154 if (i != reqpage) { 1155 /* 1156 * whether or not to leave the page 1157 * activated is up in the air, but we 1158 * should put the page on a page queue 1159 * somewhere. (it already is in the 1160 * object). After some emperical 1161 * results, it is best to deactivate 1162 * the readahead pages. 1163 */ 1164 vm_page_deactivate(m[i]); 1165 1166 /* 1167 * just in case someone was asking for 1168 * this page we now tell them that it 1169 * is ok to use 1170 */ 1171 m[i]->valid = VM_PAGE_BITS_ALL; 1172 PAGE_WAKEUP(m[i]); 1173 } 1174 } 1175 1176 m[reqpage]->object->last_read = m[count-1]->offset; 1177 1178 /* 1179 * If we're out of swap space, then attempt to free 1180 * some whenever pages are brought in. We must clear 1181 * the clean flag so that the page contents will be 1182 * preserved. 1183 */ 1184 if (swap_pager_full) { 1185 for (i = 0; i < count; i++) { 1186 m[i]->dirty = VM_PAGE_BITS_ALL; 1187 } 1188 _swap_pager_freespace(swp, m[0]->offset + paging_offset, count * PAGE_SIZE); 1189 } 1190 } else { 1191 swap_pager_ridpages(m, count, reqpage); 1192 } 1193 } 1194 return (rv); 1195 } 1196 1197 int 1198 swap_pager_output(swp, m, count, flags, rtvals) 1199 register sw_pager_t swp; 1200 vm_page_t *m; 1201 int count; 1202 int flags; 1203 int *rtvals; 1204 { 1205 register struct buf *bp; 1206 sw_blk_t swb[count]; 1207 register int s; 1208 int i, j, ix; 1209 boolean_t rv; 1210 vm_offset_t kva, off, foff; 1211 swp_clean_t spc; 1212 vm_offset_t paging_offset; 1213 vm_object_t object; 1214 int reqaddr[count]; 1215 int failed; 1216 1217 if (vm_swap_size) 1218 no_swap_space = 0; 1219 if (no_swap_space) { 1220 for (i = 0; i < count; i++) 1221 rtvals[i] = VM_PAGER_FAIL; 1222 return VM_PAGER_FAIL; 1223 } 1224 spc = NULL; 1225 1226 object = m[0]->object; 1227 paging_offset = object->paging_offset; 1228 1229 failed = 0; 1230 for (j = 0; j < count; j++) { 1231 foff = m[j]->offset + paging_offset; 1232 ix = swap_pager_block_index(swp, foff); 1233 swb[j] = 0; 1234 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 1235 rtvals[j] = VM_PAGER_FAIL; 1236 failed = 1; 1237 continue; 1238 } else { 1239 rtvals[j] = VM_PAGER_OK; 1240 } 1241 swb[j] = &swp->sw_blocks[ix]; 1242 ++swb[j]->swb_locked; 1243 if (failed) { 1244 rtvals[j] = VM_PAGER_FAIL; 1245 continue; 1246 } 1247 off = swap_pager_block_offset(swp, foff); 1248 reqaddr[j] = swb[j]->swb_block[off]; 1249 if (reqaddr[j] == SWB_EMPTY) { 1250 int blk; 1251 int tries; 1252 int ntoget; 1253 1254 tries = 0; 1255 s = splbio(); 1256 1257 /* 1258 * if any other pages have been allocated in this 1259 * block, we only try to get one page. 1260 */ 1261 for (i = 0; i < SWB_NPAGES; i++) { 1262 if (swb[j]->swb_block[i] != SWB_EMPTY) 1263 break; 1264 } 1265 1266 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1267 /* 1268 * this code is alittle conservative, but works (the 1269 * intent of this code is to allocate small chunks for 1270 * small objects) 1271 */ 1272 if ((foff == 0) && 1273 ((ntoget * PAGE_SIZE) > object->size)) { 1274 ntoget = (object->size + (PAGE_SIZE - 1)) / PAGE_SIZE; 1275 } 1276 retrygetspace: 1277 if (!swap_pager_full && ntoget > 1 && 1278 swap_pager_getswapspace(swp, ntoget * btodb(PAGE_SIZE), &blk)) { 1279 1280 for (i = 0; i < ntoget; i++) { 1281 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1282 swb[j]->swb_valid = 0; 1283 } 1284 1285 reqaddr[j] = swb[j]->swb_block[off]; 1286 } else if (!swap_pager_getswapspace(swp, btodb(PAGE_SIZE), 1287 &swb[j]->swb_block[off])) { 1288 /* 1289 * if the allocation has failed, we try to 1290 * reclaim space and retry. 1291 */ 1292 if (++tries == 1) { 1293 swap_pager_reclaim(); 1294 goto retrygetspace; 1295 } 1296 rtvals[j] = VM_PAGER_AGAIN; 1297 failed = 1; 1298 swap_pager_full = 1; 1299 } else { 1300 reqaddr[j] = swb[j]->swb_block[off]; 1301 swb[j]->swb_valid &= ~(1 << off); 1302 } 1303 splx(s); 1304 } 1305 } 1306 1307 /* 1308 * search forwards for the last contiguous page to transfer 1309 */ 1310 failed = 0; 1311 for (i = 0; i < count; i++) { 1312 if (failed || (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) || 1313 (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) || 1314 (rtvals[i] != VM_PAGER_OK)) { 1315 failed = 1; 1316 if (rtvals[i] == VM_PAGER_OK) 1317 rtvals[i] = VM_PAGER_AGAIN; 1318 } 1319 } 1320 1321 for (i = 0; i < count; i++) { 1322 if (rtvals[i] != VM_PAGER_OK) { 1323 if (swb[i]) 1324 --swb[i]->swb_locked; 1325 } 1326 } 1327 1328 for (i = 0; i < count; i++) 1329 if (rtvals[i] != VM_PAGER_OK) 1330 break; 1331 1332 if (i == 0) { 1333 return VM_PAGER_AGAIN; 1334 } 1335 count = i; 1336 for (i = 0; i < count; i++) { 1337 if (reqaddr[i] == SWB_EMPTY) 1338 printf("I/O to empty block????\n"); 1339 } 1340 1341 /* 1342 * For synchronous writes, we clean up all completed async pageouts. 1343 */ 1344 if ((flags & B_ASYNC) == 0) { 1345 swap_pager_clean(); 1346 } 1347 kva = 0; 1348 1349 /* 1350 * get a swap pager clean data structure, block until we get it 1351 */ 1352 if (swap_pager_free.tqh_first == NULL || 1353 swap_pager_free.tqh_first->spc_list.tqe_next == NULL || 1354 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { 1355 s = splbio(); 1356 if (curproc == pageproc) { 1357 (void) swap_pager_clean(); 1358 #if 0 1359 splx(s); 1360 return VM_PAGER_AGAIN; 1361 #endif 1362 } else 1363 pagedaemon_wakeup(); 1364 while (swap_pager_free.tqh_first == NULL || 1365 swap_pager_free.tqh_first->spc_list.tqe_next == NULL || 1366 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { 1367 if (curproc == pageproc) { 1368 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT; 1369 if((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_reserved) 1370 wakeup((caddr_t) &cnt.v_free_count); 1371 } 1372 1373 swap_pager_needflags |= SWAP_FREE_NEEDED; 1374 tsleep((caddr_t) &swap_pager_free, 1375 PVM, "swpfre", 0); 1376 if (curproc == pageproc) 1377 (void) swap_pager_clean(); 1378 else 1379 pagedaemon_wakeup(); 1380 } 1381 splx(s); 1382 } 1383 spc = swap_pager_free.tqh_first; 1384 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1385 1386 kva = spc->spc_kva; 1387 1388 /* 1389 * map our page(s) into kva for I/O 1390 */ 1391 pmap_qenter(kva, m, count); 1392 1393 /* 1394 * get the base I/O offset into the swap file 1395 */ 1396 for (i = 0; i < count; i++) { 1397 foff = m[i]->offset + paging_offset; 1398 off = swap_pager_block_offset(swp, foff); 1399 /* 1400 * set the valid bit 1401 */ 1402 swb[i]->swb_valid |= (1 << off); 1403 /* 1404 * and unlock the data structure 1405 */ 1406 --swb[i]->swb_locked; 1407 } 1408 1409 /* 1410 * Get a swap buffer header and perform the IO 1411 */ 1412 bp = spc->spc_bp; 1413 bzero(bp, sizeof *bp); 1414 bp->b_spc = spc; 1415 bp->b_vnbufs.le_next = NOLIST; 1416 1417 bp->b_flags = B_BUSY | B_PAGING; 1418 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1419 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1420 if (bp->b_rcred != NOCRED) 1421 crhold(bp->b_rcred); 1422 if (bp->b_wcred != NOCRED) 1423 crhold(bp->b_wcred); 1424 bp->b_data = (caddr_t) kva; 1425 bp->b_blkno = reqaddr[0]; 1426 pbgetvp(swapdev_vp, bp); 1427 1428 bp->b_bcount = PAGE_SIZE * count; 1429 bp->b_bufsize = PAGE_SIZE * count; 1430 swapdev_vp->v_numoutput++; 1431 1432 /* 1433 * If this is an async write we set up additional buffer fields and 1434 * place a "cleaning" entry on the inuse queue. 1435 */ 1436 s = splbio(); 1437 if (flags & B_ASYNC) { 1438 spc->spc_flags = 0; 1439 spc->spc_swp = swp; 1440 for (i = 0; i < count; i++) 1441 spc->spc_m[i] = m[i]; 1442 spc->spc_count = count; 1443 /* 1444 * the completion routine for async writes 1445 */ 1446 bp->b_flags |= B_CALL; 1447 bp->b_iodone = swap_pager_iodone; 1448 bp->b_dirtyoff = 0; 1449 bp->b_dirtyend = bp->b_bcount; 1450 swp->sw_poip++; 1451 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1452 } else { 1453 swp->sw_poip++; 1454 bp->b_flags |= B_CALL; 1455 bp->b_iodone = swap_pager_iodone1; 1456 } 1457 1458 cnt.v_swapout++; 1459 cnt.v_swappgsout += count; 1460 /* 1461 * perform the I/O 1462 */ 1463 VOP_STRATEGY(bp); 1464 if ((flags & (B_READ | B_ASYNC)) == B_ASYNC) { 1465 if ((bp->b_flags & B_DONE) == B_DONE) { 1466 swap_pager_clean(); 1467 } 1468 splx(s); 1469 for (i = 0; i < count; i++) { 1470 rtvals[i] = VM_PAGER_PEND; 1471 } 1472 return VM_PAGER_PEND; 1473 } 1474 /* 1475 * wait for the sync I/O to complete 1476 */ 1477 while ((bp->b_flags & B_DONE) == 0) { 1478 tsleep((caddr_t) bp, PVM, "swwrt", 0); 1479 } 1480 if (bp->b_flags & B_ERROR) { 1481 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n", 1482 bp->b_blkno, bp->b_bcount, bp->b_error); 1483 rv = VM_PAGER_ERROR; 1484 } else { 1485 rv = VM_PAGER_OK; 1486 } 1487 1488 --swp->sw_poip; 1489 if (swp->sw_poip == 0) 1490 wakeup((caddr_t) swp); 1491 1492 if (bp->b_vp) 1493 pbrelvp(bp); 1494 if (bp->b_flags & B_WANTED) 1495 wakeup((caddr_t) bp); 1496 1497 splx(s); 1498 1499 /* 1500 * remove the mapping for kernel virtual 1501 */ 1502 pmap_qremove(kva, count); 1503 1504 /* 1505 * if we have written the page, then indicate that the page is clean. 1506 */ 1507 if (rv == VM_PAGER_OK) { 1508 for (i = 0; i < count; i++) { 1509 if (rtvals[i] == VM_PAGER_OK) { 1510 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1511 m[i]->dirty = 0; 1512 /* 1513 * optimization, if a page has been read 1514 * during the pageout process, we activate it. 1515 */ 1516 if ((m[i]->flags & PG_ACTIVE) == 0 && 1517 ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) || 1518 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) { 1519 vm_page_activate(m[i]); 1520 } 1521 } 1522 } 1523 } else { 1524 for (i = 0; i < count; i++) { 1525 rtvals[i] = rv; 1526 } 1527 } 1528 1529 if (bp->b_rcred != NOCRED) 1530 crfree(bp->b_rcred); 1531 if (bp->b_wcred != NOCRED) 1532 crfree(bp->b_wcred); 1533 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1534 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1535 wakeup((caddr_t) &swap_pager_free); 1536 } 1537 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1538 pagedaemon_wakeup(); 1539 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1540 return (rv); 1541 } 1542 1543 boolean_t 1544 swap_pager_clean() 1545 { 1546 register swp_clean_t spc, tspc; 1547 register int s; 1548 1549 tspc = NULL; 1550 if (swap_pager_done.tqh_first == NULL) 1551 return FALSE; 1552 for (;;) { 1553 s = splbio(); 1554 /* 1555 * Look up and removal from done list must be done at splbio() 1556 * to avoid conflicts with swap_pager_iodone. 1557 */ 1558 while ((spc = swap_pager_done.tqh_first) != 0) { 1559 pmap_qremove(spc->spc_kva, spc->spc_count); 1560 swap_pager_finish(spc); 1561 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1562 goto doclean; 1563 } 1564 1565 /* 1566 * No operations done, thats all we can do for now. 1567 */ 1568 1569 splx(s); 1570 break; 1571 1572 /* 1573 * The desired page was found to be busy earlier in the scan 1574 * but has since completed. 1575 */ 1576 doclean: 1577 if (tspc && tspc == spc) { 1578 tspc = NULL; 1579 } 1580 spc->spc_flags = 0; 1581 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1582 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1583 wakeup((caddr_t) &swap_pager_free); 1584 } 1585 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1586 pagedaemon_wakeup(); 1587 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1588 splx(s); 1589 } 1590 1591 return (tspc ? TRUE : FALSE); 1592 } 1593 1594 void 1595 swap_pager_finish(spc) 1596 register swp_clean_t spc; 1597 { 1598 vm_object_t object = spc->spc_m[0]->object; 1599 int i; 1600 1601 object->paging_in_progress -= spc->spc_count; 1602 if ((object->paging_in_progress == 0) && 1603 (object->flags & OBJ_PIPWNT)) { 1604 object->flags &= ~OBJ_PIPWNT; 1605 thread_wakeup((int) object); 1606 } 1607 1608 /* 1609 * If no error, mark as clean and inform the pmap system. If error, 1610 * mark as dirty so we will try again. (XXX could get stuck doing 1611 * this, should give up after awhile) 1612 */ 1613 if (spc->spc_flags & SPC_ERROR) { 1614 for (i = 0; i < spc->spc_count; i++) { 1615 printf("swap_pager_finish: I/O error, clean of page %lx failed\n", 1616 (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i])); 1617 } 1618 } else { 1619 for (i = 0; i < spc->spc_count; i++) { 1620 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1621 spc->spc_m[i]->dirty = 0; 1622 if ((spc->spc_m[i]->flags & PG_ACTIVE) == 0 && 1623 ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i])))) 1624 vm_page_activate(spc->spc_m[i]); 1625 } 1626 } 1627 1628 1629 for (i = 0; i < spc->spc_count; i++) { 1630 /* 1631 * we wakeup any processes that are waiting on these pages. 1632 */ 1633 PAGE_WAKEUP(spc->spc_m[i]); 1634 } 1635 nswiodone -= spc->spc_count; 1636 1637 return; 1638 } 1639 1640 /* 1641 * swap_pager_iodone 1642 */ 1643 void 1644 swap_pager_iodone(bp) 1645 register struct buf *bp; 1646 { 1647 register swp_clean_t spc; 1648 int s; 1649 1650 s = splbio(); 1651 spc = (swp_clean_t) bp->b_spc; 1652 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1653 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1654 if (bp->b_flags & B_ERROR) { 1655 spc->spc_flags |= SPC_ERROR; 1656 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n", 1657 (bp->b_flags & B_READ) ? "pagein" : "pageout", 1658 (u_long) bp->b_blkno, bp->b_bcount, bp->b_error); 1659 } 1660 1661 if (bp->b_vp) 1662 pbrelvp(bp); 1663 1664 if (bp->b_flags & B_WANTED) 1665 wakeup((caddr_t) bp); 1666 1667 if (bp->b_rcred != NOCRED) 1668 crfree(bp->b_rcred); 1669 if (bp->b_wcred != NOCRED) 1670 crfree(bp->b_wcred); 1671 1672 nswiodone += spc->spc_count; 1673 if (--spc->spc_swp->sw_poip == 0) { 1674 wakeup((caddr_t) spc->spc_swp); 1675 } 1676 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1677 swap_pager_inuse.tqh_first == 0) { 1678 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1679 wakeup((caddr_t) &swap_pager_free); 1680 } 1681 1682 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) { 1683 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT; 1684 pagedaemon_wakeup(); 1685 } 1686 1687 if (vm_pageout_pages_needed) { 1688 wakeup((caddr_t) &vm_pageout_pages_needed); 1689 vm_pageout_pages_needed = 0; 1690 } 1691 if ((swap_pager_inuse.tqh_first == NULL) || 1692 ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min && 1693 nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) { 1694 pagedaemon_wakeup(); 1695 } 1696 splx(s); 1697 } 1698 1699 /* 1700 * return true if any swap control structures can be allocated 1701 */ 1702 int 1703 swap_pager_ready() 1704 { 1705 if (swap_pager_free.tqh_first) 1706 return 1; 1707 else 1708 return 0; 1709 } 1710