1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id: swap_pager.c,v 1.62 1996/03/03 21:11:05 dyson Exp $ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/proc.h> 56 #include <sys/buf.h> 57 #include <sys/vnode.h> 58 #include <sys/malloc.h> 59 #include <sys/vmmeter.h> 60 61 #include <miscfs/specfs/specdev.h> 62 #include <sys/rlist.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_prot.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_pager.h> 70 #include <vm/vm_pageout.h> 71 #include <vm/swap_pager.h> 72 #include <vm/vm_kern.h> 73 #include <vm/vm_extern.h> 74 75 #ifndef NPENDINGIO 76 #define NPENDINGIO 10 77 #endif 78 79 static int nswiodone; 80 int swap_pager_full; 81 extern int vm_swap_size; 82 static int no_swap_space = 1; 83 struct rlisthdr swaplist; 84 85 #define MAX_PAGEOUT_CLUSTER 16 86 87 TAILQ_HEAD(swpclean, swpagerclean); 88 89 typedef struct swpagerclean *swp_clean_t; 90 91 static struct swpagerclean { 92 TAILQ_ENTRY(swpagerclean) spc_list; 93 int spc_flags; 94 struct buf *spc_bp; 95 vm_object_t spc_object; 96 vm_offset_t spc_kva; 97 int spc_count; 98 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 99 } swcleanlist[NPENDINGIO]; 100 101 102 /* spc_flags values */ 103 #define SPC_ERROR 0x01 104 105 #define SWB_EMPTY (-1) 106 107 /* list of completed page cleans */ 108 static struct swpclean swap_pager_done; 109 110 /* list of pending page cleans */ 111 static struct swpclean swap_pager_inuse; 112 113 /* list of free pager clean structs */ 114 static struct swpclean swap_pager_free; 115 116 /* list of "named" anon region objects */ 117 static struct pagerlst swap_pager_object_list; 118 119 /* list of "unnamed" anon region objects */ 120 struct pagerlst swap_pager_un_object_list; 121 122 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 123 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2 124 static int swap_pager_needflags; 125 126 static struct pagerlst *swp_qs[] = { 127 &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0 128 }; 129 130 /* 131 * pagerops for OBJT_SWAP - "swap pager". 132 */ 133 static vm_object_t 134 swap_pager_alloc __P((void *handle, vm_size_t size, 135 vm_prot_t prot, vm_ooffset_t offset)); 136 static void swap_pager_dealloc __P((vm_object_t object)); 137 static boolean_t 138 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex, 139 int *before, int *after)); 140 static int swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int)); 141 static void swap_pager_init __P((void)); 142 static void swap_pager_sync __P((void)); 143 144 struct pagerops swappagerops = { 145 swap_pager_init, 146 swap_pager_alloc, 147 swap_pager_dealloc, 148 swap_pager_getpages, 149 swap_pager_putpages, 150 swap_pager_haspage, 151 swap_pager_sync 152 }; 153 154 static int npendingio = NPENDINGIO; 155 static int dmmin; 156 int dmmax; 157 158 static __pure int 159 swap_pager_block_index __P((vm_pindex_t pindex)) __pure2; 160 static __pure int 161 swap_pager_block_offset __P((vm_pindex_t pindex)) __pure2; 162 static daddr_t *swap_pager_diskaddr __P((vm_object_t object, 163 vm_pindex_t pindex, int *valid)); 164 static void swap_pager_finish __P((swp_clean_t spc)); 165 static void swap_pager_freepage __P((vm_page_t m)); 166 static void swap_pager_free_swap __P((vm_object_t object)); 167 static void swap_pager_freeswapspace __P((vm_object_t object, 168 unsigned int from, 169 unsigned int to)); 170 static int swap_pager_getswapspace __P((vm_object_t object, 171 unsigned int amount, 172 daddr_t *rtval)); 173 static void swap_pager_iodone __P((struct buf *)); 174 static void swap_pager_iodone1 __P((struct buf *bp)); 175 static void swap_pager_reclaim __P((void)); 176 static void swap_pager_ridpages __P((vm_page_t *m, int count, 177 int reqpage)); 178 static void swap_pager_setvalid __P((vm_object_t object, 179 vm_offset_t offset, int valid)); 180 static void swapsizecheck __P((void)); 181 182 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE))) 183 184 static inline void 185 swapsizecheck() 186 { 187 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) { 188 if (swap_pager_full == 0) 189 printf("swap_pager: out of swap space\n"); 190 swap_pager_full = 1; 191 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE)) 192 swap_pager_full = 0; 193 } 194 195 static void 196 swap_pager_init() 197 { 198 TAILQ_INIT(&swap_pager_object_list); 199 TAILQ_INIT(&swap_pager_un_object_list); 200 201 /* 202 * Initialize clean lists 203 */ 204 TAILQ_INIT(&swap_pager_inuse); 205 TAILQ_INIT(&swap_pager_done); 206 TAILQ_INIT(&swap_pager_free); 207 208 /* 209 * Calculate the swap allocation constants. 210 */ 211 dmmin = CLBYTES / DEV_BSIZE; 212 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2; 213 } 214 215 void 216 swap_pager_swap_init() 217 { 218 swp_clean_t spc; 219 struct buf *bp; 220 int i; 221 222 /* 223 * kva's are allocated here so that we dont need to keep doing 224 * kmem_alloc pageables at runtime 225 */ 226 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) { 227 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER); 228 if (!spc->spc_kva) { 229 break; 230 } 231 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL); 232 if (!spc->spc_bp) { 233 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 234 break; 235 } 236 spc->spc_flags = 0; 237 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 238 } 239 } 240 241 int 242 swap_pager_swp_alloc(object, wait) 243 vm_object_t object; 244 int wait; 245 { 246 sw_blk_t swb; 247 int nblocks; 248 int i, j; 249 250 nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES; 251 swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait); 252 if (swb == NULL) 253 return 1; 254 255 for (i = 0; i < nblocks; i++) { 256 swb[i].swb_valid = 0; 257 swb[i].swb_locked = 0; 258 for (j = 0; j < SWB_NPAGES; j++) 259 swb[i].swb_block[j] = SWB_EMPTY; 260 } 261 262 object->un_pager.swp.swp_nblocks = nblocks; 263 object->un_pager.swp.swp_allocsize = 0; 264 object->un_pager.swp.swp_blocks = swb; 265 object->un_pager.swp.swp_poip = 0; 266 267 if (object->handle != NULL) { 268 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list); 269 } else { 270 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 271 } 272 273 return 0; 274 } 275 276 /* 277 * Allocate an object and associated resources. 278 * Note that if we are called from the pageout daemon (handle == NULL) 279 * we should not wait for memory as it could resulting in deadlock. 280 */ 281 static vm_object_t 282 swap_pager_alloc(handle, size, prot, offset) 283 void *handle; 284 register vm_size_t size; 285 vm_prot_t prot; 286 vm_ooffset_t offset; 287 { 288 vm_object_t object; 289 290 /* 291 * If this is a "named" anonymous region, look it up and use the 292 * object if it exists, otherwise allocate a new one. 293 */ 294 if (handle) { 295 object = vm_pager_object_lookup(&swap_pager_object_list, handle); 296 if (object != NULL) { 297 vm_object_reference(object); 298 } else { 299 /* 300 * XXX - there is a race condition here. Two processes 301 * can request the same named object simultaneuously, 302 * and if one blocks for memory, the result is a disaster. 303 * Probably quite rare, but is yet another reason to just 304 * rip support of "named anonymous regions" out altogether. 305 */ 306 object = vm_object_allocate(OBJT_SWAP, 307 OFF_TO_IDX(offset + PAGE_SIZE - 1) + size); 308 object->handle = handle; 309 (void) swap_pager_swp_alloc(object, M_WAITOK); 310 } 311 } else { 312 object = vm_object_allocate(OBJT_SWAP, 313 OFF_TO_IDX(offset + PAGE_SIZE - 1) + size); 314 (void) swap_pager_swp_alloc(object, M_WAITOK); 315 } 316 317 return (object); 318 } 319 320 /* 321 * returns disk block associated with pager and offset 322 * additionally, as a side effect returns a flag indicating 323 * if the block has been written 324 */ 325 326 inline static daddr_t * 327 swap_pager_diskaddr(object, pindex, valid) 328 vm_object_t object; 329 vm_pindex_t pindex; 330 int *valid; 331 { 332 register sw_blk_t swb; 333 int ix; 334 335 if (valid) 336 *valid = 0; 337 ix = pindex / SWB_NPAGES; 338 if ((ix >= object->un_pager.swp.swp_nblocks) || 339 (pindex >= object->size)) { 340 return (FALSE); 341 } 342 swb = &object->un_pager.swp.swp_blocks[ix]; 343 ix = pindex % SWB_NPAGES; 344 if (valid) 345 *valid = swb->swb_valid & (1 << ix); 346 return &swb->swb_block[ix]; 347 } 348 349 /* 350 * Utility routine to set the valid (written) bit for 351 * a block associated with a pager and offset 352 */ 353 static void 354 swap_pager_setvalid(object, offset, valid) 355 vm_object_t object; 356 vm_offset_t offset; 357 int valid; 358 { 359 register sw_blk_t swb; 360 int ix; 361 362 ix = offset / SWB_NPAGES; 363 if (ix >= object->un_pager.swp.swp_nblocks) 364 return; 365 366 swb = &object->un_pager.swp.swp_blocks[ix]; 367 ix = offset % SWB_NPAGES; 368 if (valid) 369 swb->swb_valid |= (1 << ix); 370 else 371 swb->swb_valid &= ~(1 << ix); 372 return; 373 } 374 375 /* 376 * this routine allocates swap space with a fragmentation 377 * minimization policy. 378 */ 379 static int 380 swap_pager_getswapspace(object, amount, rtval) 381 vm_object_t object; 382 unsigned int amount; 383 daddr_t *rtval; 384 { 385 unsigned location; 386 vm_swap_size -= amount; 387 if (!rlist_alloc(&swaplist, amount, &location)) { 388 vm_swap_size += amount; 389 return 0; 390 } else { 391 swapsizecheck(); 392 object->un_pager.swp.swp_allocsize += amount; 393 *rtval = location; 394 return 1; 395 } 396 } 397 398 /* 399 * this routine frees swap space with a fragmentation 400 * minimization policy. 401 */ 402 static void 403 swap_pager_freeswapspace(object, from, to) 404 vm_object_t object; 405 unsigned int from; 406 unsigned int to; 407 { 408 rlist_free(&swaplist, from, to); 409 vm_swap_size += (to - from) + 1; 410 object->un_pager.swp.swp_allocsize -= (to - from) + 1; 411 swapsizecheck(); 412 } 413 /* 414 * this routine frees swap blocks from a specified pager 415 */ 416 void 417 swap_pager_freespace(object, start, size) 418 vm_object_t object; 419 vm_pindex_t start; 420 vm_size_t size; 421 { 422 vm_pindex_t i; 423 int s; 424 425 s = splbio(); 426 for (i = start; i < start + size; i += 1) { 427 int valid; 428 daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 429 430 if (addr && *addr != SWB_EMPTY) { 431 swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1); 432 if (valid) { 433 swap_pager_setvalid(object, i, 0); 434 } 435 *addr = SWB_EMPTY; 436 } 437 } 438 splx(s); 439 } 440 441 static void 442 swap_pager_free_swap(object) 443 vm_object_t object; 444 { 445 register int i, j; 446 register sw_blk_t swb; 447 int first_block=0, block_count=0; 448 int s; 449 /* 450 * Free left over swap blocks 451 */ 452 s = splbio(); 453 for (i = 0, swb = object->un_pager.swp.swp_blocks; 454 i < object->un_pager.swp.swp_nblocks; i++, swb++) { 455 for (j = 0; j < SWB_NPAGES; j++) { 456 if (swb->swb_block[j] != SWB_EMPTY) { 457 /* 458 * initially the length of the run is zero 459 */ 460 if (block_count == 0) { 461 first_block = swb->swb_block[j]; 462 block_count = btodb(PAGE_SIZE); 463 swb->swb_block[j] = SWB_EMPTY; 464 /* 465 * if the new block can be included into the current run 466 */ 467 } else if (swb->swb_block[j] == first_block + block_count) { 468 block_count += btodb(PAGE_SIZE); 469 swb->swb_block[j] = SWB_EMPTY; 470 /* 471 * terminate the previous run, and start a new one 472 */ 473 } else { 474 swap_pager_freeswapspace(object, first_block, 475 (unsigned) first_block + block_count - 1); 476 first_block = swb->swb_block[j]; 477 block_count = btodb(PAGE_SIZE); 478 swb->swb_block[j] = SWB_EMPTY; 479 } 480 } 481 } 482 } 483 484 if (block_count) { 485 swap_pager_freeswapspace(object, first_block, 486 (unsigned) first_block + block_count - 1); 487 } 488 splx(s); 489 } 490 491 492 /* 493 * swap_pager_reclaim frees up over-allocated space from all pagers 494 * this eliminates internal fragmentation due to allocation of space 495 * for segments that are never swapped to. It has been written so that 496 * it does not block until the rlist_free operation occurs; it keeps 497 * the queues consistant. 498 */ 499 500 /* 501 * Maximum number of blocks (pages) to reclaim per pass 502 */ 503 #define MAXRECLAIM 128 504 505 static void 506 swap_pager_reclaim() 507 { 508 vm_object_t object; 509 int i, j, k; 510 int s; 511 int reclaimcount; 512 static struct { 513 int address; 514 vm_object_t object; 515 } reclaims[MAXRECLAIM]; 516 static int in_reclaim; 517 518 /* 519 * allow only one process to be in the swap_pager_reclaim subroutine 520 */ 521 s = splbio(); 522 if (in_reclaim) { 523 tsleep(&in_reclaim, PSWP, "swrclm", 0); 524 splx(s); 525 return; 526 } 527 in_reclaim = 1; 528 reclaimcount = 0; 529 530 /* for each pager queue */ 531 for (k = 0; swp_qs[k]; k++) { 532 533 object = swp_qs[k]->tqh_first; 534 while (object && (reclaimcount < MAXRECLAIM)) { 535 536 /* 537 * see if any blocks associated with a pager has been 538 * allocated but not used (written) 539 */ 540 if (object->paging_in_progress == 0) { 541 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) { 542 sw_blk_t swb = &object->un_pager.swp.swp_blocks[i]; 543 544 if (swb->swb_locked) 545 continue; 546 for (j = 0; j < SWB_NPAGES; j++) { 547 if (swb->swb_block[j] != SWB_EMPTY && 548 (swb->swb_valid & (1 << j)) == 0) { 549 reclaims[reclaimcount].address = swb->swb_block[j]; 550 reclaims[reclaimcount++].object = object; 551 swb->swb_block[j] = SWB_EMPTY; 552 if (reclaimcount >= MAXRECLAIM) 553 goto rfinished; 554 } 555 } 556 } 557 } 558 object = object->pager_object_list.tqe_next; 559 } 560 } 561 562 rfinished: 563 564 /* 565 * free the blocks that have been added to the reclaim list 566 */ 567 for (i = 0; i < reclaimcount; i++) { 568 swap_pager_freeswapspace(reclaims[i].object, 569 reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1); 570 } 571 splx(s); 572 in_reclaim = 0; 573 wakeup(&in_reclaim); 574 } 575 576 577 /* 578 * swap_pager_copy copies blocks from one pager to another and 579 * destroys the source pager 580 */ 581 582 void 583 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset) 584 vm_object_t srcobject; 585 vm_pindex_t srcoffset; 586 vm_object_t dstobject; 587 vm_pindex_t dstoffset; 588 vm_pindex_t offset; 589 { 590 vm_pindex_t i; 591 int origsize; 592 int s; 593 594 if (vm_swap_size) 595 no_swap_space = 0; 596 597 origsize = srcobject->un_pager.swp.swp_allocsize; 598 599 /* 600 * remove the source object from the swap_pager internal queue 601 */ 602 if (srcobject->handle == NULL) { 603 TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list); 604 } else { 605 TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list); 606 } 607 608 s = splbio(); 609 while (srcobject->un_pager.swp.swp_poip) { 610 tsleep(srcobject, PVM, "spgout", 0); 611 } 612 splx(s); 613 614 /* 615 * clean all of the pages that are currently active and finished 616 */ 617 swap_pager_sync(); 618 619 s = splbio(); 620 /* 621 * transfer source to destination 622 */ 623 for (i = 0; i < dstobject->size; i += 1) { 624 int srcvalid, dstvalid; 625 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset, 626 &srcvalid); 627 daddr_t *dstaddrp; 628 629 /* 630 * see if the source has space allocated 631 */ 632 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 633 /* 634 * if the source is valid and the dest has no space, 635 * then copy the allocation from the srouce to the 636 * dest. 637 */ 638 if (srcvalid) { 639 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset, 640 &dstvalid); 641 /* 642 * if the dest already has a valid block, 643 * deallocate the source block without 644 * copying. 645 */ 646 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 647 swap_pager_freeswapspace(dstobject, *dstaddrp, 648 *dstaddrp + btodb(PAGE_SIZE) - 1); 649 *dstaddrp = SWB_EMPTY; 650 } 651 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 652 *dstaddrp = *srcaddrp; 653 *srcaddrp = SWB_EMPTY; 654 dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE); 655 srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE); 656 swap_pager_setvalid(dstobject, i + dstoffset, 1); 657 } 658 } 659 /* 660 * if the source is not empty at this point, then 661 * deallocate the space. 662 */ 663 if (*srcaddrp != SWB_EMPTY) { 664 swap_pager_freeswapspace(srcobject, *srcaddrp, 665 *srcaddrp + btodb(PAGE_SIZE) - 1); 666 *srcaddrp = SWB_EMPTY; 667 } 668 } 669 } 670 splx(s); 671 672 /* 673 * Free left over swap blocks 674 */ 675 swap_pager_free_swap(srcobject); 676 677 if (srcobject->un_pager.swp.swp_allocsize) { 678 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n", 679 srcobject->un_pager.swp.swp_allocsize, origsize); 680 } 681 682 free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA); 683 srcobject->un_pager.swp.swp_blocks = NULL; 684 685 return; 686 } 687 688 static void 689 swap_pager_dealloc(object) 690 vm_object_t object; 691 { 692 int s; 693 694 /* 695 * Remove from list right away so lookups will fail if we block for 696 * pageout completion. 697 */ 698 if (object->handle == NULL) { 699 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list); 700 } else { 701 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list); 702 } 703 704 /* 705 * Wait for all pageouts to finish and remove all entries from 706 * cleaning list. 707 */ 708 709 s = splbio(); 710 while (object->un_pager.swp.swp_poip) { 711 tsleep(object, PVM, "swpout", 0); 712 } 713 splx(s); 714 715 716 swap_pager_sync(); 717 718 /* 719 * Free left over swap blocks 720 */ 721 swap_pager_free_swap(object); 722 723 if (object->un_pager.swp.swp_allocsize) { 724 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n", 725 object->un_pager.swp.swp_allocsize); 726 } 727 /* 728 * Free swap management resources 729 */ 730 free(object->un_pager.swp.swp_blocks, M_VMPGDATA); 731 object->un_pager.swp.swp_blocks = NULL; 732 } 733 734 static inline __pure int 735 swap_pager_block_index(pindex) 736 vm_pindex_t pindex; 737 { 738 return (pindex / SWB_NPAGES); 739 } 740 741 static inline __pure int 742 swap_pager_block_offset(pindex) 743 vm_pindex_t pindex; 744 { 745 return (pindex % SWB_NPAGES); 746 } 747 748 /* 749 * swap_pager_haspage returns TRUE if the pager has data that has 750 * been written out. 751 */ 752 static boolean_t 753 swap_pager_haspage(object, pindex, before, after) 754 vm_object_t object; 755 vm_pindex_t pindex; 756 int *before; 757 int *after; 758 { 759 register sw_blk_t swb; 760 int ix; 761 762 if (before != NULL) 763 *before = 0; 764 if (after != NULL) 765 *after = 0; 766 ix = pindex / SWB_NPAGES; 767 if (ix >= object->un_pager.swp.swp_nblocks) { 768 return (FALSE); 769 } 770 swb = &object->un_pager.swp.swp_blocks[ix]; 771 ix = pindex % SWB_NPAGES; 772 773 if (swb->swb_block[ix] != SWB_EMPTY) { 774 775 if (swb->swb_valid & (1 << ix)) { 776 int tix; 777 if (before) { 778 for(tix = ix - 1; tix >= 0; --tix) { 779 if ((swb->swb_valid & (1 << tix)) == 0) 780 break; 781 if ((swb->swb_block[tix] + 782 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) != 783 swb->swb_block[ix]) 784 break; 785 (*before)++; 786 } 787 } 788 789 if (after) { 790 for(tix = ix + 1; tix < SWB_NPAGES; tix++) { 791 if ((swb->swb_valid & (1 << tix)) == 0) 792 break; 793 if ((swb->swb_block[tix] - 794 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) != 795 swb->swb_block[ix]) 796 break; 797 (*after)++; 798 } 799 } 800 801 return TRUE; 802 } 803 } 804 return (FALSE); 805 } 806 807 /* 808 * swap_pager_freepage is a convienience routine that clears the busy 809 * bit and deallocates a page. 810 */ 811 static void 812 swap_pager_freepage(m) 813 vm_page_t m; 814 { 815 PAGE_WAKEUP(m); 816 vm_page_free(m); 817 } 818 819 /* 820 * swap_pager_ridpages is a convienience routine that deallocates all 821 * but the required page. this is usually used in error returns that 822 * need to invalidate the "extra" readahead pages. 823 */ 824 static void 825 swap_pager_ridpages(m, count, reqpage) 826 vm_page_t *m; 827 int count; 828 int reqpage; 829 { 830 int i; 831 832 for (i = 0; i < count; i++) 833 if (i != reqpage) 834 swap_pager_freepage(m[i]); 835 } 836 837 /* 838 * swap_pager_iodone1 is the completion routine for both reads and async writes 839 */ 840 static void 841 swap_pager_iodone1(bp) 842 struct buf *bp; 843 { 844 bp->b_flags |= B_DONE; 845 bp->b_flags &= ~B_ASYNC; 846 wakeup(bp); 847 } 848 849 static int 850 swap_pager_getpages(object, m, count, reqpage) 851 vm_object_t object; 852 vm_page_t *m; 853 int count, reqpage; 854 { 855 register struct buf *bp; 856 sw_blk_t swb[count]; 857 register int s; 858 int i; 859 boolean_t rv; 860 vm_offset_t kva, off[count]; 861 swp_clean_t spc; 862 vm_pindex_t paging_offset; 863 int reqaddr[count]; 864 int sequential; 865 866 int first, last; 867 int failed; 868 int reqdskregion; 869 870 object = m[reqpage]->object; 871 paging_offset = OFF_TO_IDX(object->paging_offset); 872 sequential = (m[reqpage]->pindex == (object->last_read + 1)); 873 874 for (i = 0; i < count; i++) { 875 vm_pindex_t fidx = m[i]->pindex + paging_offset; 876 int ix = swap_pager_block_index(fidx); 877 878 if (ix >= object->un_pager.swp.swp_nblocks) { 879 int j; 880 881 if (i <= reqpage) { 882 swap_pager_ridpages(m, count, reqpage); 883 return (VM_PAGER_FAIL); 884 } 885 for (j = i; j < count; j++) { 886 swap_pager_freepage(m[j]); 887 } 888 count = i; 889 break; 890 } 891 swb[i] = &object->un_pager.swp.swp_blocks[ix]; 892 off[i] = swap_pager_block_offset(fidx); 893 reqaddr[i] = swb[i]->swb_block[off[i]]; 894 } 895 896 /* make sure that our required input request is existant */ 897 898 if (reqaddr[reqpage] == SWB_EMPTY || 899 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 900 swap_pager_ridpages(m, count, reqpage); 901 return (VM_PAGER_FAIL); 902 } 903 reqdskregion = reqaddr[reqpage] / dmmax; 904 905 /* 906 * search backwards for the first contiguous page to transfer 907 */ 908 failed = 0; 909 first = 0; 910 for (i = reqpage - 1; i >= 0; --i) { 911 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) || 912 (swb[i]->swb_valid & (1 << off[i])) == 0 || 913 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 914 ((reqaddr[i] / dmmax) != reqdskregion)) { 915 failed = 1; 916 swap_pager_freepage(m[i]); 917 if (first == 0) 918 first = i + 1; 919 } 920 } 921 /* 922 * search forwards for the last contiguous page to transfer 923 */ 924 failed = 0; 925 last = count; 926 for (i = reqpage + 1; i < count; i++) { 927 if (failed || (reqaddr[i] == SWB_EMPTY) || 928 (swb[i]->swb_valid & (1 << off[i])) == 0 || 929 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 930 ((reqaddr[i] / dmmax) != reqdskregion)) { 931 failed = 1; 932 swap_pager_freepage(m[i]); 933 if (last == count) 934 last = i; 935 } 936 } 937 938 count = last; 939 if (first != 0) { 940 for (i = first; i < count; i++) { 941 m[i - first] = m[i]; 942 reqaddr[i - first] = reqaddr[i]; 943 off[i - first] = off[i]; 944 } 945 count -= first; 946 reqpage -= first; 947 } 948 ++swb[reqpage]->swb_locked; 949 950 /* 951 * at this point: "m" is a pointer to the array of vm_page_t for 952 * paging I/O "count" is the number of vm_page_t entries represented 953 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index 954 * into "m" for the page actually faulted 955 */ 956 957 spc = NULL; /* we might not use an spc data structure */ 958 959 if ((count == 1) && (swap_pager_free.tqh_first != NULL)) { 960 spc = swap_pager_free.tqh_first; 961 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 962 kva = spc->spc_kva; 963 bp = spc->spc_bp; 964 bzero(bp, sizeof *bp); 965 bp->b_spc = spc; 966 bp->b_vnbufs.le_next = NOLIST; 967 } else { 968 /* 969 * Get a swap buffer header to perform the IO 970 */ 971 bp = getpbuf(); 972 kva = (vm_offset_t) bp->b_data; 973 } 974 975 /* 976 * map our page(s) into kva for input 977 */ 978 pmap_qenter(kva, m, count); 979 980 bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING; 981 bp->b_iodone = swap_pager_iodone1; 982 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 983 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 984 crhold(bp->b_rcred); 985 crhold(bp->b_wcred); 986 bp->b_un.b_addr = (caddr_t) kva; 987 bp->b_blkno = reqaddr[0]; 988 bp->b_bcount = PAGE_SIZE * count; 989 bp->b_bufsize = PAGE_SIZE * count; 990 991 pbgetvp(swapdev_vp, bp); 992 993 cnt.v_swapin++; 994 cnt.v_swappgsin += count; 995 /* 996 * perform the I/O 997 */ 998 VOP_STRATEGY(bp); 999 1000 /* 1001 * wait for the sync I/O to complete 1002 */ 1003 s = splbio(); 1004 while ((bp->b_flags & B_DONE) == 0) { 1005 tsleep(bp, PVM, "swread", 0); 1006 } 1007 1008 if (bp->b_flags & B_ERROR) { 1009 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n", 1010 bp->b_blkno, bp->b_bcount, bp->b_error); 1011 rv = VM_PAGER_ERROR; 1012 } else { 1013 rv = VM_PAGER_OK; 1014 } 1015 1016 /* 1017 * relpbuf does this, but we maintain our own buffer list also... 1018 */ 1019 if (bp->b_vp) 1020 pbrelvp(bp); 1021 1022 splx(s); 1023 swb[reqpage]->swb_locked--; 1024 1025 /* 1026 * remove the mapping for kernel virtual 1027 */ 1028 pmap_qremove(kva, count); 1029 1030 if (spc) { 1031 m[reqpage]->object->last_read = m[reqpage]->pindex; 1032 if (bp->b_flags & B_WANTED) 1033 wakeup(bp); 1034 /* 1035 * if we have used an spc, we need to free it. 1036 */ 1037 if (bp->b_rcred != NOCRED) 1038 crfree(bp->b_rcred); 1039 if (bp->b_wcred != NOCRED) 1040 crfree(bp->b_wcred); 1041 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1042 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1043 wakeup(&swap_pager_free); 1044 } 1045 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1046 pagedaemon_wakeup(); 1047 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1048 if (rv == VM_PAGER_OK) { 1049 pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); 1050 m[reqpage]->valid = VM_PAGE_BITS_ALL; 1051 m[reqpage]->dirty = 0; 1052 } 1053 } else { 1054 /* 1055 * release the physical I/O buffer 1056 */ 1057 relpbuf(bp); 1058 /* 1059 * finish up input if everything is ok 1060 */ 1061 if (rv == VM_PAGER_OK) { 1062 for (i = 0; i < count; i++) { 1063 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1064 m[i]->dirty = 0; 1065 m[i]->flags &= ~PG_ZERO; 1066 if (i != reqpage) { 1067 /* 1068 * whether or not to leave the page 1069 * activated is up in the air, but we 1070 * should put the page on a page queue 1071 * somewhere. (it already is in the 1072 * object). After some emperical 1073 * results, it is best to deactivate 1074 * the readahead pages. 1075 */ 1076 vm_page_deactivate(m[i]); 1077 1078 /* 1079 * just in case someone was asking for 1080 * this page we now tell them that it 1081 * is ok to use 1082 */ 1083 m[i]->valid = VM_PAGE_BITS_ALL; 1084 PAGE_WAKEUP(m[i]); 1085 } 1086 } 1087 1088 m[reqpage]->object->last_read = m[count-1]->pindex; 1089 1090 /* 1091 * If we're out of swap space, then attempt to free 1092 * some whenever multiple pages are brought in. We 1093 * must set the dirty bits so that the page contents 1094 * will be preserved. 1095 */ 1096 if (SWAPLOW) { 1097 for (i = 0; i < count; i++) { 1098 m[i]->dirty = VM_PAGE_BITS_ALL; 1099 } 1100 swap_pager_freespace(object, m[0]->pindex + paging_offset, count); 1101 } 1102 } else { 1103 swap_pager_ridpages(m, count, reqpage); 1104 } 1105 } 1106 return (rv); 1107 } 1108 1109 int 1110 swap_pager_putpages(object, m, count, sync, rtvals) 1111 vm_object_t object; 1112 vm_page_t *m; 1113 int count; 1114 boolean_t sync; 1115 int *rtvals; 1116 { 1117 register struct buf *bp; 1118 sw_blk_t swb[count]; 1119 register int s; 1120 int i, j, ix; 1121 boolean_t rv; 1122 vm_offset_t kva, off, fidx; 1123 swp_clean_t spc; 1124 vm_pindex_t paging_pindex; 1125 int reqaddr[count]; 1126 int failed; 1127 1128 if (vm_swap_size) 1129 no_swap_space = 0; 1130 if (no_swap_space) { 1131 for (i = 0; i < count; i++) 1132 rtvals[i] = VM_PAGER_FAIL; 1133 return VM_PAGER_FAIL; 1134 } 1135 spc = NULL; 1136 1137 object = m[0]->object; 1138 paging_pindex = OFF_TO_IDX(object->paging_offset); 1139 1140 failed = 0; 1141 for (j = 0; j < count; j++) { 1142 fidx = m[j]->pindex + paging_pindex; 1143 ix = swap_pager_block_index(fidx); 1144 swb[j] = 0; 1145 if (ix >= object->un_pager.swp.swp_nblocks) { 1146 rtvals[j] = VM_PAGER_FAIL; 1147 failed = 1; 1148 continue; 1149 } else { 1150 rtvals[j] = VM_PAGER_OK; 1151 } 1152 swb[j] = &object->un_pager.swp.swp_blocks[ix]; 1153 swb[j]->swb_locked++; 1154 if (failed) { 1155 rtvals[j] = VM_PAGER_FAIL; 1156 continue; 1157 } 1158 off = swap_pager_block_offset(fidx); 1159 reqaddr[j] = swb[j]->swb_block[off]; 1160 if (reqaddr[j] == SWB_EMPTY) { 1161 daddr_t blk; 1162 int tries; 1163 int ntoget; 1164 1165 tries = 0; 1166 s = splbio(); 1167 1168 /* 1169 * if any other pages have been allocated in this 1170 * block, we only try to get one page. 1171 */ 1172 for (i = 0; i < SWB_NPAGES; i++) { 1173 if (swb[j]->swb_block[i] != SWB_EMPTY) 1174 break; 1175 } 1176 1177 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1178 /* 1179 * this code is alittle conservative, but works (the 1180 * intent of this code is to allocate small chunks for 1181 * small objects) 1182 */ 1183 if ((off == 0) && ((fidx + ntoget) > object->size)) { 1184 ntoget = object->size - fidx; 1185 } 1186 retrygetspace: 1187 if (!swap_pager_full && ntoget > 1 && 1188 swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE), 1189 &blk)) { 1190 1191 for (i = 0; i < ntoget; i++) { 1192 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1193 swb[j]->swb_valid = 0; 1194 } 1195 1196 reqaddr[j] = swb[j]->swb_block[off]; 1197 } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE), 1198 &swb[j]->swb_block[off])) { 1199 /* 1200 * if the allocation has failed, we try to 1201 * reclaim space and retry. 1202 */ 1203 if (++tries == 1) { 1204 swap_pager_reclaim(); 1205 goto retrygetspace; 1206 } 1207 rtvals[j] = VM_PAGER_AGAIN; 1208 failed = 1; 1209 swap_pager_full = 1; 1210 } else { 1211 reqaddr[j] = swb[j]->swb_block[off]; 1212 swb[j]->swb_valid &= ~(1 << off); 1213 } 1214 splx(s); 1215 } 1216 } 1217 1218 /* 1219 * search forwards for the last contiguous page to transfer 1220 */ 1221 failed = 0; 1222 for (i = 0; i < count; i++) { 1223 if (failed || 1224 (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) || 1225 ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) || 1226 (rtvals[i] != VM_PAGER_OK)) { 1227 failed = 1; 1228 if (rtvals[i] == VM_PAGER_OK) 1229 rtvals[i] = VM_PAGER_AGAIN; 1230 } 1231 } 1232 1233 for (i = 0; i < count; i++) { 1234 if (rtvals[i] != VM_PAGER_OK) { 1235 if (swb[i]) 1236 --swb[i]->swb_locked; 1237 } 1238 } 1239 1240 for (i = 0; i < count; i++) 1241 if (rtvals[i] != VM_PAGER_OK) 1242 break; 1243 1244 if (i == 0) { 1245 return VM_PAGER_AGAIN; 1246 } 1247 count = i; 1248 for (i = 0; i < count; i++) { 1249 if (reqaddr[i] == SWB_EMPTY) { 1250 printf("I/O to empty block???? -- pindex: %d, i: %d\n", 1251 m[i]->pindex, i); 1252 } 1253 } 1254 1255 /* 1256 * For synchronous writes, we clean up all completed async pageouts. 1257 */ 1258 if (sync == TRUE) { 1259 swap_pager_sync(); 1260 } 1261 kva = 0; 1262 1263 /* 1264 * get a swap pager clean data structure, block until we get it 1265 */ 1266 if (swap_pager_free.tqh_first == NULL || 1267 swap_pager_free.tqh_first->spc_list.tqe_next == NULL || 1268 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { 1269 s = splbio(); 1270 if (curproc == pageproc) { 1271 retryfree: 1272 /* 1273 * pageout daemon needs a swap control block 1274 */ 1275 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT|SWAP_FREE_NEEDED; 1276 /* 1277 * if it does not get one within a short time, then 1278 * there is a potential deadlock, so we go-on trying 1279 * to free pages. It is important to block here as opposed 1280 * to returning, thereby allowing the pageout daemon to continue. 1281 * It is likely that pageout daemon will start suboptimally 1282 * reclaiming vnode backed pages if we don't block. Since the 1283 * I/O subsystem is probably already fully utilized, might as 1284 * well wait. 1285 */ 1286 if (tsleep(&swap_pager_free, PVM, "swpfre", hz/5)) { 1287 swap_pager_sync(); 1288 if (swap_pager_free.tqh_first == NULL || 1289 swap_pager_free.tqh_first->spc_list.tqe_next == NULL || 1290 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { 1291 splx(s); 1292 return VM_PAGER_AGAIN; 1293 } 1294 } else { 1295 /* 1296 * we make sure that pageouts aren't taking up all of 1297 * the free swap control blocks. 1298 */ 1299 swap_pager_sync(); 1300 if (swap_pager_free.tqh_first == NULL || 1301 swap_pager_free.tqh_first->spc_list.tqe_next == NULL || 1302 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { 1303 goto retryfree; 1304 } 1305 } 1306 } else { 1307 pagedaemon_wakeup(); 1308 while (swap_pager_free.tqh_first == NULL || 1309 swap_pager_free.tqh_first->spc_list.tqe_next == NULL || 1310 swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { 1311 swap_pager_needflags |= SWAP_FREE_NEEDED; 1312 tsleep(&swap_pager_free, PVM, "swpfre", 0); 1313 pagedaemon_wakeup(); 1314 } 1315 } 1316 splx(s); 1317 } 1318 spc = swap_pager_free.tqh_first; 1319 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1320 1321 kva = spc->spc_kva; 1322 1323 /* 1324 * map our page(s) into kva for I/O 1325 */ 1326 pmap_qenter(kva, m, count); 1327 1328 /* 1329 * get the base I/O offset into the swap file 1330 */ 1331 for (i = 0; i < count; i++) { 1332 fidx = m[i]->pindex + paging_pindex; 1333 off = swap_pager_block_offset(fidx); 1334 /* 1335 * set the valid bit 1336 */ 1337 swb[i]->swb_valid |= (1 << off); 1338 /* 1339 * and unlock the data structure 1340 */ 1341 swb[i]->swb_locked--; 1342 } 1343 1344 /* 1345 * Get a swap buffer header and perform the IO 1346 */ 1347 bp = spc->spc_bp; 1348 bzero(bp, sizeof *bp); 1349 bp->b_spc = spc; 1350 bp->b_vnbufs.le_next = NOLIST; 1351 1352 bp->b_flags = B_BUSY | B_PAGING; 1353 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1354 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1355 if (bp->b_rcred != NOCRED) 1356 crhold(bp->b_rcred); 1357 if (bp->b_wcred != NOCRED) 1358 crhold(bp->b_wcred); 1359 bp->b_data = (caddr_t) kva; 1360 bp->b_blkno = reqaddr[0]; 1361 pbgetvp(swapdev_vp, bp); 1362 1363 bp->b_bcount = PAGE_SIZE * count; 1364 bp->b_bufsize = PAGE_SIZE * count; 1365 swapdev_vp->v_numoutput++; 1366 1367 /* 1368 * If this is an async write we set up additional buffer fields and 1369 * place a "cleaning" entry on the inuse queue. 1370 */ 1371 s = splbio(); 1372 if (sync == FALSE) { 1373 spc->spc_flags = 0; 1374 spc->spc_object = object; 1375 for (i = 0; i < count; i++) 1376 spc->spc_m[i] = m[i]; 1377 spc->spc_count = count; 1378 /* 1379 * the completion routine for async writes 1380 */ 1381 bp->b_flags |= B_CALL; 1382 bp->b_iodone = swap_pager_iodone; 1383 bp->b_dirtyoff = 0; 1384 bp->b_dirtyend = bp->b_bcount; 1385 object->un_pager.swp.swp_poip++; 1386 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1387 } else { 1388 object->un_pager.swp.swp_poip++; 1389 bp->b_flags |= B_CALL; 1390 bp->b_iodone = swap_pager_iodone1; 1391 } 1392 1393 cnt.v_swapout++; 1394 cnt.v_swappgsout += count; 1395 /* 1396 * perform the I/O 1397 */ 1398 VOP_STRATEGY(bp); 1399 if (sync == FALSE) { 1400 if ((bp->b_flags & B_DONE) == B_DONE) { 1401 swap_pager_sync(); 1402 } 1403 splx(s); 1404 for (i = 0; i < count; i++) { 1405 rtvals[i] = VM_PAGER_PEND; 1406 } 1407 return VM_PAGER_PEND; 1408 } 1409 /* 1410 * wait for the sync I/O to complete 1411 */ 1412 while ((bp->b_flags & B_DONE) == 0) { 1413 tsleep(bp, PVM, "swwrt", 0); 1414 } 1415 if (bp->b_flags & B_ERROR) { 1416 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n", 1417 bp->b_blkno, bp->b_bcount, bp->b_error); 1418 rv = VM_PAGER_ERROR; 1419 } else { 1420 rv = VM_PAGER_OK; 1421 } 1422 1423 object->un_pager.swp.swp_poip--; 1424 if (object->un_pager.swp.swp_poip == 0) 1425 wakeup(object); 1426 1427 if (bp->b_vp) 1428 pbrelvp(bp); 1429 if (bp->b_flags & B_WANTED) 1430 wakeup(bp); 1431 1432 splx(s); 1433 1434 /* 1435 * remove the mapping for kernel virtual 1436 */ 1437 pmap_qremove(kva, count); 1438 1439 /* 1440 * if we have written the page, then indicate that the page is clean. 1441 */ 1442 if (rv == VM_PAGER_OK) { 1443 for (i = 0; i < count; i++) { 1444 if (rtvals[i] == VM_PAGER_OK) { 1445 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1446 m[i]->dirty = 0; 1447 /* 1448 * optimization, if a page has been read 1449 * during the pageout process, we activate it. 1450 */ 1451 if ((m[i]->queue != PQ_ACTIVE) && 1452 ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) || 1453 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) { 1454 vm_page_activate(m[i]); 1455 } 1456 } 1457 } 1458 } else { 1459 for (i = 0; i < count; i++) { 1460 rtvals[i] = rv; 1461 } 1462 } 1463 1464 if (bp->b_rcred != NOCRED) 1465 crfree(bp->b_rcred); 1466 if (bp->b_wcred != NOCRED) 1467 crfree(bp->b_wcred); 1468 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1469 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1470 wakeup(&swap_pager_free); 1471 } 1472 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1473 pagedaemon_wakeup(); 1474 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1475 return (rv); 1476 } 1477 1478 static void 1479 swap_pager_sync() 1480 { 1481 register swp_clean_t spc, tspc; 1482 register int s; 1483 1484 tspc = NULL; 1485 if (swap_pager_done.tqh_first == NULL) 1486 return; 1487 for (;;) { 1488 s = splbio(); 1489 /* 1490 * Look up and removal from done list must be done at splbio() 1491 * to avoid conflicts with swap_pager_iodone. 1492 */ 1493 while ((spc = swap_pager_done.tqh_first) != 0) { 1494 pmap_qremove(spc->spc_kva, spc->spc_count); 1495 swap_pager_finish(spc); 1496 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1497 goto doclean; 1498 } 1499 1500 /* 1501 * No operations done, thats all we can do for now. 1502 */ 1503 1504 splx(s); 1505 break; 1506 1507 /* 1508 * The desired page was found to be busy earlier in the scan 1509 * but has since completed. 1510 */ 1511 doclean: 1512 if (tspc && tspc == spc) { 1513 tspc = NULL; 1514 } 1515 spc->spc_flags = 0; 1516 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1517 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1518 wakeup(&swap_pager_free); 1519 } 1520 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1521 pagedaemon_wakeup(); 1522 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1523 splx(s); 1524 } 1525 1526 return; 1527 } 1528 1529 void 1530 swap_pager_finish(spc) 1531 register swp_clean_t spc; 1532 { 1533 vm_object_t object = spc->spc_m[0]->object; 1534 int i; 1535 1536 object->paging_in_progress -= spc->spc_count; 1537 if ((object->paging_in_progress == 0) && 1538 (object->flags & OBJ_PIPWNT)) { 1539 object->flags &= ~OBJ_PIPWNT; 1540 wakeup(object); 1541 } 1542 1543 /* 1544 * If no error, mark as clean and inform the pmap system. If error, 1545 * mark as dirty so we will try again. (XXX could get stuck doing 1546 * this, should give up after awhile) 1547 */ 1548 if (spc->spc_flags & SPC_ERROR) { 1549 for (i = 0; i < spc->spc_count; i++) { 1550 printf("swap_pager_finish: I/O error, clean of page %lx failed\n", 1551 (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i])); 1552 } 1553 } else { 1554 for (i = 0; i < spc->spc_count; i++) { 1555 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1556 spc->spc_m[i]->dirty = 0; 1557 if ((spc->spc_m[i]->queue != PQ_ACTIVE) && 1558 ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i])))) 1559 vm_page_activate(spc->spc_m[i]); 1560 } 1561 } 1562 1563 1564 for (i = 0; i < spc->spc_count; i++) { 1565 /* 1566 * we wakeup any processes that are waiting on these pages. 1567 */ 1568 PAGE_WAKEUP(spc->spc_m[i]); 1569 } 1570 nswiodone -= spc->spc_count; 1571 1572 return; 1573 } 1574 1575 /* 1576 * swap_pager_iodone 1577 */ 1578 static void 1579 swap_pager_iodone(bp) 1580 register struct buf *bp; 1581 { 1582 register swp_clean_t spc; 1583 int s; 1584 1585 s = splbio(); 1586 spc = (swp_clean_t) bp->b_spc; 1587 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1588 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1589 if (bp->b_flags & B_ERROR) { 1590 spc->spc_flags |= SPC_ERROR; 1591 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n", 1592 (bp->b_flags & B_READ) ? "pagein" : "pageout", 1593 (u_long) bp->b_blkno, bp->b_bcount, bp->b_error); 1594 } 1595 1596 if (bp->b_vp) 1597 pbrelvp(bp); 1598 1599 if (bp->b_flags & B_WANTED) 1600 wakeup(bp); 1601 1602 if (bp->b_rcred != NOCRED) 1603 crfree(bp->b_rcred); 1604 if (bp->b_wcred != NOCRED) 1605 crfree(bp->b_wcred); 1606 1607 nswiodone += spc->spc_count; 1608 if (--spc->spc_object->un_pager.swp.swp_poip == 0) { 1609 wakeup(spc->spc_object); 1610 } 1611 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1612 swap_pager_inuse.tqh_first == 0) { 1613 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1614 wakeup(&swap_pager_free); 1615 } 1616 1617 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) { 1618 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT; 1619 pagedaemon_wakeup(); 1620 } 1621 1622 if (vm_pageout_pages_needed) { 1623 wakeup(&vm_pageout_pages_needed); 1624 vm_pageout_pages_needed = 0; 1625 } 1626 if ((swap_pager_inuse.tqh_first == NULL) || 1627 ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min && 1628 nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) { 1629 pagedaemon_wakeup(); 1630 } 1631 splx(s); 1632 } 1633