1 /* 2 * Copyright (c) 1994 John S. Dyson 3 * Copyright (c) 1990 University of Utah. 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 40 * 41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 42 * $Id$ 43 */ 44 45 /* 46 * Quick hack to page to dedicated partition(s). 47 * TODO: 48 * Add multiprocessor locks 49 * Deal with async writes in a better fashion 50 */ 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/kernel.h> 55 #include <sys/proc.h> 56 #include <sys/buf.h> 57 #include <sys/vnode.h> 58 #include <sys/malloc.h> 59 #include <sys/vmmeter.h> 60 61 #include <miscfs/specfs/specdev.h> 62 #include <sys/rlist.h> 63 64 #include <vm/vm.h> 65 #include <vm/vm_param.h> 66 #include <vm/vm_prot.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_pager.h> 70 #include <vm/vm_pageout.h> 71 #include <vm/swap_pager.h> 72 #include <vm/vm_kern.h> 73 #include <vm/vm_extern.h> 74 75 #ifndef NPENDINGIO 76 #define NPENDINGIO 10 77 #endif 78 79 static int nswiodone; 80 int swap_pager_full; 81 extern int vm_swap_size; 82 static int no_swap_space = 1; 83 struct rlisthdr swaplist; 84 85 #define MAX_PAGEOUT_CLUSTER 16 86 87 TAILQ_HEAD(swpclean, swpagerclean); 88 89 typedef struct swpagerclean *swp_clean_t; 90 91 static struct swpagerclean { 92 TAILQ_ENTRY(swpagerclean) spc_list; 93 int spc_flags; 94 struct buf *spc_bp; 95 vm_object_t spc_object; 96 vm_offset_t spc_kva; 97 int spc_count; 98 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER]; 99 } swcleanlist[NPENDINGIO]; 100 101 102 /* spc_flags values */ 103 #define SPC_ERROR 0x01 104 105 #define SWB_EMPTY (-1) 106 107 /* list of completed page cleans */ 108 static struct swpclean swap_pager_done; 109 110 /* list of pending page cleans */ 111 static struct swpclean swap_pager_inuse; 112 113 /* list of free pager clean structs */ 114 static struct swpclean swap_pager_free; 115 int swap_pager_free_count; 116 117 /* list of "named" anon region objects */ 118 static struct pagerlst swap_pager_object_list; 119 120 /* list of "unnamed" anon region objects */ 121 struct pagerlst swap_pager_un_object_list; 122 123 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */ 124 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2 125 static int swap_pager_needflags; 126 127 static struct pagerlst *swp_qs[] = { 128 &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0 129 }; 130 131 /* 132 * pagerops for OBJT_SWAP - "swap pager". 133 */ 134 static vm_object_t 135 swap_pager_alloc __P((void *handle, vm_size_t size, 136 vm_prot_t prot, vm_ooffset_t offset)); 137 static void swap_pager_dealloc __P((vm_object_t object)); 138 static boolean_t 139 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex, 140 int *before, int *after)); 141 static int swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int)); 142 static void swap_pager_init __P((void)); 143 static void swap_pager_sync __P((void)); 144 145 struct pagerops swappagerops = { 146 swap_pager_init, 147 swap_pager_alloc, 148 swap_pager_dealloc, 149 swap_pager_getpages, 150 swap_pager_putpages, 151 swap_pager_haspage, 152 swap_pager_sync 153 }; 154 155 static int npendingio = NPENDINGIO; 156 static int dmmin; 157 int dmmax; 158 159 static int swap_pager_block_index __P((vm_pindex_t pindex)); 160 static int swap_pager_block_offset __P((vm_pindex_t pindex)); 161 static daddr_t *swap_pager_diskaddr __P((vm_object_t object, 162 vm_pindex_t pindex, int *valid)); 163 static void swap_pager_finish __P((swp_clean_t spc)); 164 static void swap_pager_freepage __P((vm_page_t m)); 165 static void swap_pager_free_swap __P((vm_object_t object)); 166 static void swap_pager_freeswapspace __P((vm_object_t object, 167 unsigned int from, 168 unsigned int to)); 169 static int swap_pager_getswapspace __P((vm_object_t object, 170 unsigned int amount, 171 daddr_t *rtval)); 172 static void swap_pager_iodone __P((struct buf *)); 173 static void swap_pager_iodone1 __P((struct buf *bp)); 174 static void swap_pager_reclaim __P((void)); 175 static void swap_pager_ridpages __P((vm_page_t *m, int count, 176 int reqpage)); 177 static void swap_pager_setvalid __P((vm_object_t object, 178 vm_offset_t offset, int valid)); 179 static void swapsizecheck __P((void)); 180 181 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE))) 182 183 static inline void 184 swapsizecheck() 185 { 186 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) { 187 if (swap_pager_full == 0) 188 printf("swap_pager: out of swap space\n"); 189 swap_pager_full = 1; 190 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE)) 191 swap_pager_full = 0; 192 } 193 194 static void 195 swap_pager_init() 196 { 197 TAILQ_INIT(&swap_pager_object_list); 198 TAILQ_INIT(&swap_pager_un_object_list); 199 200 /* 201 * Initialize clean lists 202 */ 203 TAILQ_INIT(&swap_pager_inuse); 204 TAILQ_INIT(&swap_pager_done); 205 TAILQ_INIT(&swap_pager_free); 206 swap_pager_free_count = 0; 207 208 /* 209 * Calculate the swap allocation constants. 210 */ 211 dmmin = PAGE_SIZE / DEV_BSIZE; 212 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2; 213 } 214 215 void 216 swap_pager_swap_init() 217 { 218 swp_clean_t spc; 219 struct buf *bp; 220 int i; 221 222 /* 223 * kva's are allocated here so that we dont need to keep doing 224 * kmem_alloc pageables at runtime 225 */ 226 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) { 227 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER); 228 if (!spc->spc_kva) { 229 break; 230 } 231 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL); 232 if (!spc->spc_bp) { 233 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE); 234 break; 235 } 236 spc->spc_flags = 0; 237 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 238 swap_pager_free_count++; 239 } 240 } 241 242 int 243 swap_pager_swp_alloc(object, wait) 244 vm_object_t object; 245 int wait; 246 { 247 sw_blk_t swb; 248 int nblocks; 249 int i, j; 250 251 nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES; 252 swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait); 253 if (swb == NULL) 254 return 1; 255 256 for (i = 0; i < nblocks; i++) { 257 swb[i].swb_valid = 0; 258 swb[i].swb_locked = 0; 259 for (j = 0; j < SWB_NPAGES; j++) 260 swb[i].swb_block[j] = SWB_EMPTY; 261 } 262 263 object->un_pager.swp.swp_nblocks = nblocks; 264 object->un_pager.swp.swp_allocsize = 0; 265 object->un_pager.swp.swp_blocks = swb; 266 object->un_pager.swp.swp_poip = 0; 267 268 if (object->handle != NULL) { 269 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list); 270 } else { 271 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 272 } 273 274 return 0; 275 } 276 277 /* 278 * Allocate an object and associated resources. 279 * Note that if we are called from the pageout daemon (handle == NULL) 280 * we should not wait for memory as it could resulting in deadlock. 281 */ 282 static vm_object_t 283 swap_pager_alloc(handle, size, prot, offset) 284 void *handle; 285 register vm_size_t size; 286 vm_prot_t prot; 287 vm_ooffset_t offset; 288 { 289 vm_object_t object; 290 291 /* 292 * If this is a "named" anonymous region, look it up and use the 293 * object if it exists, otherwise allocate a new one. 294 */ 295 if (handle) { 296 object = vm_pager_object_lookup(&swap_pager_object_list, handle); 297 if (object != NULL) { 298 vm_object_reference(object); 299 } else { 300 /* 301 * XXX - there is a race condition here. Two processes 302 * can request the same named object simultaneuously, 303 * and if one blocks for memory, the result is a disaster. 304 * Probably quite rare, but is yet another reason to just 305 * rip support of "named anonymous regions" out altogether. 306 */ 307 object = vm_object_allocate(OBJT_SWAP, 308 OFF_TO_IDX(offset + PAGE_MASK) + size); 309 object->handle = handle; 310 (void) swap_pager_swp_alloc(object, M_WAITOK); 311 } 312 } else { 313 object = vm_object_allocate(OBJT_SWAP, 314 OFF_TO_IDX(offset + PAGE_MASK) + size); 315 (void) swap_pager_swp_alloc(object, M_WAITOK); 316 } 317 318 return (object); 319 } 320 321 /* 322 * returns disk block associated with pager and offset 323 * additionally, as a side effect returns a flag indicating 324 * if the block has been written 325 */ 326 327 inline static daddr_t * 328 swap_pager_diskaddr(object, pindex, valid) 329 vm_object_t object; 330 vm_pindex_t pindex; 331 int *valid; 332 { 333 register sw_blk_t swb; 334 int ix; 335 336 if (valid) 337 *valid = 0; 338 ix = pindex / SWB_NPAGES; 339 if ((ix >= object->un_pager.swp.swp_nblocks) || 340 (pindex >= object->size)) { 341 return (FALSE); 342 } 343 swb = &object->un_pager.swp.swp_blocks[ix]; 344 ix = pindex % SWB_NPAGES; 345 if (valid) 346 *valid = swb->swb_valid & (1 << ix); 347 return &swb->swb_block[ix]; 348 } 349 350 /* 351 * Utility routine to set the valid (written) bit for 352 * a block associated with a pager and offset 353 */ 354 static void 355 swap_pager_setvalid(object, offset, valid) 356 vm_object_t object; 357 vm_offset_t offset; 358 int valid; 359 { 360 register sw_blk_t swb; 361 int ix; 362 363 ix = offset / SWB_NPAGES; 364 if (ix >= object->un_pager.swp.swp_nblocks) 365 return; 366 367 swb = &object->un_pager.swp.swp_blocks[ix]; 368 ix = offset % SWB_NPAGES; 369 if (valid) 370 swb->swb_valid |= (1 << ix); 371 else 372 swb->swb_valid &= ~(1 << ix); 373 return; 374 } 375 376 /* 377 * this routine allocates swap space with a fragmentation 378 * minimization policy. 379 */ 380 static int 381 swap_pager_getswapspace(object, amount, rtval) 382 vm_object_t object; 383 unsigned int amount; 384 daddr_t *rtval; 385 { 386 unsigned location; 387 vm_swap_size -= amount; 388 if (!rlist_alloc(&swaplist, amount, &location)) { 389 vm_swap_size += amount; 390 return 0; 391 } else { 392 swapsizecheck(); 393 object->un_pager.swp.swp_allocsize += amount; 394 *rtval = location; 395 return 1; 396 } 397 } 398 399 /* 400 * this routine frees swap space with a fragmentation 401 * minimization policy. 402 */ 403 static void 404 swap_pager_freeswapspace(object, from, to) 405 vm_object_t object; 406 unsigned int from; 407 unsigned int to; 408 { 409 rlist_free(&swaplist, from, to); 410 vm_swap_size += (to - from) + 1; 411 object->un_pager.swp.swp_allocsize -= (to - from) + 1; 412 swapsizecheck(); 413 } 414 /* 415 * this routine frees swap blocks from a specified pager 416 */ 417 void 418 swap_pager_freespace(object, start, size) 419 vm_object_t object; 420 vm_pindex_t start; 421 vm_size_t size; 422 { 423 vm_pindex_t i; 424 int s; 425 426 s = splbio(); 427 for (i = start; i < start + size; i += 1) { 428 int valid; 429 daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 430 431 if (addr && *addr != SWB_EMPTY) { 432 swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1); 433 if (valid) { 434 swap_pager_setvalid(object, i, 0); 435 } 436 *addr = SWB_EMPTY; 437 } 438 } 439 splx(s); 440 } 441 442 /* 443 * same as freespace, but don't free, just force a DMZ next time 444 */ 445 void 446 swap_pager_dmzspace(object, start, size) 447 vm_object_t object; 448 vm_pindex_t start; 449 vm_size_t size; 450 { 451 vm_pindex_t i; 452 int s; 453 454 s = splbio(); 455 for (i = start; i < start + size; i += 1) { 456 int valid; 457 daddr_t *addr = swap_pager_diskaddr(object, i, &valid); 458 459 if (addr && *addr != SWB_EMPTY) { 460 if (valid) { 461 swap_pager_setvalid(object, i, 0); 462 } 463 } 464 } 465 splx(s); 466 } 467 468 static void 469 swap_pager_free_swap(object) 470 vm_object_t object; 471 { 472 register int i, j; 473 register sw_blk_t swb; 474 int first_block=0, block_count=0; 475 int s; 476 /* 477 * Free left over swap blocks 478 */ 479 s = splbio(); 480 for (i = 0, swb = object->un_pager.swp.swp_blocks; 481 i < object->un_pager.swp.swp_nblocks; i++, swb++) { 482 for (j = 0; j < SWB_NPAGES; j++) { 483 if (swb->swb_block[j] != SWB_EMPTY) { 484 /* 485 * initially the length of the run is zero 486 */ 487 if (block_count == 0) { 488 first_block = swb->swb_block[j]; 489 block_count = btodb(PAGE_SIZE); 490 swb->swb_block[j] = SWB_EMPTY; 491 /* 492 * if the new block can be included into the current run 493 */ 494 } else if (swb->swb_block[j] == first_block + block_count) { 495 block_count += btodb(PAGE_SIZE); 496 swb->swb_block[j] = SWB_EMPTY; 497 /* 498 * terminate the previous run, and start a new one 499 */ 500 } else { 501 swap_pager_freeswapspace(object, first_block, 502 (unsigned) first_block + block_count - 1); 503 first_block = swb->swb_block[j]; 504 block_count = btodb(PAGE_SIZE); 505 swb->swb_block[j] = SWB_EMPTY; 506 } 507 } 508 } 509 } 510 511 if (block_count) { 512 swap_pager_freeswapspace(object, first_block, 513 (unsigned) first_block + block_count - 1); 514 } 515 splx(s); 516 } 517 518 519 /* 520 * swap_pager_reclaim frees up over-allocated space from all pagers 521 * this eliminates internal fragmentation due to allocation of space 522 * for segments that are never swapped to. It has been written so that 523 * it does not block until the rlist_free operation occurs; it keeps 524 * the queues consistant. 525 */ 526 527 /* 528 * Maximum number of blocks (pages) to reclaim per pass 529 */ 530 #define MAXRECLAIM 128 531 532 static void 533 swap_pager_reclaim() 534 { 535 vm_object_t object; 536 int i, j, k; 537 int s; 538 int reclaimcount; 539 static struct { 540 int address; 541 vm_object_t object; 542 } reclaims[MAXRECLAIM]; 543 static int in_reclaim; 544 545 /* 546 * allow only one process to be in the swap_pager_reclaim subroutine 547 */ 548 s = splbio(); 549 if (in_reclaim) { 550 tsleep(&in_reclaim, PSWP, "swrclm", 0); 551 splx(s); 552 return; 553 } 554 in_reclaim = 1; 555 reclaimcount = 0; 556 557 /* for each pager queue */ 558 for (k = 0; swp_qs[k]; k++) { 559 560 object = TAILQ_FIRST(swp_qs[k]); 561 while (object && (reclaimcount < MAXRECLAIM)) { 562 563 /* 564 * see if any blocks associated with a pager has been 565 * allocated but not used (written) 566 */ 567 if ((object->flags & OBJ_DEAD) == 0 && 568 (object->paging_in_progress == 0)) { 569 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) { 570 sw_blk_t swb = &object->un_pager.swp.swp_blocks[i]; 571 572 if (swb->swb_locked) 573 continue; 574 for (j = 0; j < SWB_NPAGES; j++) { 575 if (swb->swb_block[j] != SWB_EMPTY && 576 (swb->swb_valid & (1 << j)) == 0) { 577 reclaims[reclaimcount].address = swb->swb_block[j]; 578 reclaims[reclaimcount++].object = object; 579 swb->swb_block[j] = SWB_EMPTY; 580 if (reclaimcount >= MAXRECLAIM) 581 goto rfinished; 582 } 583 } 584 } 585 } 586 object = TAILQ_NEXT(object, pager_object_list); 587 } 588 } 589 590 rfinished: 591 592 /* 593 * free the blocks that have been added to the reclaim list 594 */ 595 for (i = 0; i < reclaimcount; i++) { 596 swap_pager_freeswapspace(reclaims[i].object, 597 reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1); 598 } 599 splx(s); 600 in_reclaim = 0; 601 wakeup(&in_reclaim); 602 } 603 604 605 /* 606 * swap_pager_copy copies blocks from one pager to another and 607 * destroys the source pager 608 */ 609 610 void 611 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset) 612 vm_object_t srcobject; 613 vm_pindex_t srcoffset; 614 vm_object_t dstobject; 615 vm_pindex_t dstoffset; 616 vm_pindex_t offset; 617 { 618 vm_pindex_t i; 619 int origsize; 620 int s; 621 622 if (vm_swap_size) 623 no_swap_space = 0; 624 625 origsize = srcobject->un_pager.swp.swp_allocsize; 626 627 /* 628 * remove the source object from the swap_pager internal queue 629 */ 630 if (srcobject->handle == NULL) { 631 TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list); 632 } else { 633 TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list); 634 } 635 636 s = splbio(); 637 while (srcobject->un_pager.swp.swp_poip) { 638 tsleep(srcobject, PVM, "spgout", 0); 639 } 640 splx(s); 641 642 /* 643 * clean all of the pages that are currently active and finished 644 */ 645 swap_pager_sync(); 646 647 s = splbio(); 648 /* 649 * transfer source to destination 650 */ 651 for (i = 0; i < dstobject->size; i += 1) { 652 int srcvalid, dstvalid; 653 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset, 654 &srcvalid); 655 daddr_t *dstaddrp; 656 657 /* 658 * see if the source has space allocated 659 */ 660 if (srcaddrp && *srcaddrp != SWB_EMPTY) { 661 /* 662 * if the source is valid and the dest has no space, 663 * then copy the allocation from the srouce to the 664 * dest. 665 */ 666 if (srcvalid) { 667 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset, 668 &dstvalid); 669 /* 670 * if the dest already has a valid block, 671 * deallocate the source block without 672 * copying. 673 */ 674 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) { 675 swap_pager_freeswapspace(dstobject, *dstaddrp, 676 *dstaddrp + btodb(PAGE_SIZE) - 1); 677 *dstaddrp = SWB_EMPTY; 678 } 679 if (dstaddrp && *dstaddrp == SWB_EMPTY) { 680 *dstaddrp = *srcaddrp; 681 *srcaddrp = SWB_EMPTY; 682 dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE); 683 srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE); 684 swap_pager_setvalid(dstobject, i + dstoffset, 1); 685 } 686 } 687 /* 688 * if the source is not empty at this point, then 689 * deallocate the space. 690 */ 691 if (*srcaddrp != SWB_EMPTY) { 692 swap_pager_freeswapspace(srcobject, *srcaddrp, 693 *srcaddrp + btodb(PAGE_SIZE) - 1); 694 *srcaddrp = SWB_EMPTY; 695 } 696 } 697 } 698 splx(s); 699 700 /* 701 * Free left over swap blocks 702 */ 703 swap_pager_free_swap(srcobject); 704 705 if (srcobject->un_pager.swp.swp_allocsize) { 706 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n", 707 srcobject->un_pager.swp.swp_allocsize, origsize); 708 } 709 710 free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA); 711 srcobject->un_pager.swp.swp_blocks = NULL; 712 713 return; 714 } 715 716 static void 717 swap_pager_dealloc(object) 718 vm_object_t object; 719 { 720 int s; 721 722 /* 723 * Remove from list right away so lookups will fail if we block for 724 * pageout completion. 725 */ 726 if (object->handle == NULL) { 727 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list); 728 } else { 729 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list); 730 } 731 732 /* 733 * Wait for all pageouts to finish and remove all entries from 734 * cleaning list. 735 */ 736 737 s = splbio(); 738 while (object->un_pager.swp.swp_poip) { 739 tsleep(object, PVM, "swpout", 0); 740 } 741 splx(s); 742 743 744 swap_pager_sync(); 745 746 /* 747 * Free left over swap blocks 748 */ 749 swap_pager_free_swap(object); 750 751 if (object->un_pager.swp.swp_allocsize) { 752 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n", 753 object->un_pager.swp.swp_allocsize); 754 } 755 /* 756 * Free swap management resources 757 */ 758 free(object->un_pager.swp.swp_blocks, M_VMPGDATA); 759 object->un_pager.swp.swp_blocks = NULL; 760 } 761 762 static inline int 763 swap_pager_block_index(pindex) 764 vm_pindex_t pindex; 765 { 766 return (pindex / SWB_NPAGES); 767 } 768 769 static inline int 770 swap_pager_block_offset(pindex) 771 vm_pindex_t pindex; 772 { 773 return (pindex % SWB_NPAGES); 774 } 775 776 /* 777 * swap_pager_haspage returns TRUE if the pager has data that has 778 * been written out. 779 */ 780 static boolean_t 781 swap_pager_haspage(object, pindex, before, after) 782 vm_object_t object; 783 vm_pindex_t pindex; 784 int *before; 785 int *after; 786 { 787 register sw_blk_t swb; 788 int ix; 789 790 if (before != NULL) 791 *before = 0; 792 if (after != NULL) 793 *after = 0; 794 ix = pindex / SWB_NPAGES; 795 if (ix >= object->un_pager.swp.swp_nblocks) { 796 return (FALSE); 797 } 798 swb = &object->un_pager.swp.swp_blocks[ix]; 799 ix = pindex % SWB_NPAGES; 800 801 if (swb->swb_block[ix] != SWB_EMPTY) { 802 803 if (swb->swb_valid & (1 << ix)) { 804 int tix; 805 if (before) { 806 for(tix = ix - 1; tix >= 0; --tix) { 807 if ((swb->swb_valid & (1 << tix)) == 0) 808 break; 809 if ((swb->swb_block[tix] + 810 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) != 811 swb->swb_block[ix]) 812 break; 813 (*before)++; 814 } 815 } 816 817 if (after) { 818 for(tix = ix + 1; tix < SWB_NPAGES; tix++) { 819 if ((swb->swb_valid & (1 << tix)) == 0) 820 break; 821 if ((swb->swb_block[tix] - 822 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) != 823 swb->swb_block[ix]) 824 break; 825 (*after)++; 826 } 827 } 828 829 return TRUE; 830 } 831 } 832 return (FALSE); 833 } 834 835 /* 836 * swap_pager_freepage is a convienience routine that clears the busy 837 * bit and deallocates a page. 838 */ 839 static void 840 swap_pager_freepage(m) 841 vm_page_t m; 842 { 843 PAGE_WAKEUP(m); 844 vm_page_free(m); 845 } 846 847 /* 848 * swap_pager_ridpages is a convienience routine that deallocates all 849 * but the required page. this is usually used in error returns that 850 * need to invalidate the "extra" readahead pages. 851 */ 852 static void 853 swap_pager_ridpages(m, count, reqpage) 854 vm_page_t *m; 855 int count; 856 int reqpage; 857 { 858 int i; 859 860 for (i = 0; i < count; i++) 861 if (i != reqpage) 862 swap_pager_freepage(m[i]); 863 } 864 865 /* 866 * swap_pager_iodone1 is the completion routine for both reads and async writes 867 */ 868 static void 869 swap_pager_iodone1(bp) 870 struct buf *bp; 871 { 872 bp->b_flags |= B_DONE; 873 bp->b_flags &= ~B_ASYNC; 874 wakeup(bp); 875 } 876 877 static int 878 swap_pager_getpages(object, m, count, reqpage) 879 vm_object_t object; 880 vm_page_t *m; 881 int count, reqpage; 882 { 883 register struct buf *bp; 884 sw_blk_t swb[count]; 885 register int s; 886 int i; 887 boolean_t rv; 888 vm_offset_t kva, off[count]; 889 swp_clean_t spc; 890 vm_pindex_t paging_offset; 891 int reqaddr[count]; 892 int sequential; 893 894 int first, last; 895 int failed; 896 int reqdskregion; 897 898 object = m[reqpage]->object; 899 paging_offset = OFF_TO_IDX(object->paging_offset); 900 sequential = (m[reqpage]->pindex == (object->last_read + 1)); 901 902 for (i = 0; i < count; i++) { 903 vm_pindex_t fidx = m[i]->pindex + paging_offset; 904 int ix = swap_pager_block_index(fidx); 905 906 if (ix >= object->un_pager.swp.swp_nblocks) { 907 int j; 908 909 if (i <= reqpage) { 910 swap_pager_ridpages(m, count, reqpage); 911 return (VM_PAGER_FAIL); 912 } 913 for (j = i; j < count; j++) { 914 swap_pager_freepage(m[j]); 915 } 916 count = i; 917 break; 918 } 919 swb[i] = &object->un_pager.swp.swp_blocks[ix]; 920 off[i] = swap_pager_block_offset(fidx); 921 reqaddr[i] = swb[i]->swb_block[off[i]]; 922 } 923 924 /* make sure that our required input request is existant */ 925 926 if (reqaddr[reqpage] == SWB_EMPTY || 927 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) { 928 swap_pager_ridpages(m, count, reqpage); 929 return (VM_PAGER_FAIL); 930 } 931 reqdskregion = reqaddr[reqpage] / dmmax; 932 933 /* 934 * search backwards for the first contiguous page to transfer 935 */ 936 failed = 0; 937 first = 0; 938 for (i = reqpage - 1; i >= 0; --i) { 939 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) || 940 (swb[i]->swb_valid & (1 << off[i])) == 0 || 941 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 942 ((reqaddr[i] / dmmax) != reqdskregion)) { 943 failed = 1; 944 swap_pager_freepage(m[i]); 945 if (first == 0) 946 first = i + 1; 947 } 948 } 949 /* 950 * search forwards for the last contiguous page to transfer 951 */ 952 failed = 0; 953 last = count; 954 for (i = reqpage + 1; i < count; i++) { 955 if (failed || (reqaddr[i] == SWB_EMPTY) || 956 (swb[i]->swb_valid & (1 << off[i])) == 0 || 957 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) || 958 ((reqaddr[i] / dmmax) != reqdskregion)) { 959 failed = 1; 960 swap_pager_freepage(m[i]); 961 if (last == count) 962 last = i; 963 } 964 } 965 966 count = last; 967 if (first != 0) { 968 for (i = first; i < count; i++) { 969 m[i - first] = m[i]; 970 reqaddr[i - first] = reqaddr[i]; 971 off[i - first] = off[i]; 972 } 973 count -= first; 974 reqpage -= first; 975 } 976 ++swb[reqpage]->swb_locked; 977 978 /* 979 * at this point: "m" is a pointer to the array of vm_page_t for 980 * paging I/O "count" is the number of vm_page_t entries represented 981 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index 982 * into "m" for the page actually faulted 983 */ 984 985 spc = NULL; 986 if ((count == 1) && ((spc = TAILQ_FIRST(&swap_pager_free)) != NULL)) { 987 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 988 swap_pager_free_count--; 989 kva = spc->spc_kva; 990 bp = spc->spc_bp; 991 bzero(bp, sizeof *bp); 992 bp->b_spc = spc; 993 bp->b_vnbufs.le_next = NOLIST; 994 } else { 995 /* 996 * Get a swap buffer header to perform the IO 997 */ 998 bp = getpbuf(); 999 kva = (vm_offset_t) bp->b_data; 1000 } 1001 1002 /* 1003 * map our page(s) into kva for input 1004 */ 1005 pmap_qenter(kva, m, count); 1006 1007 bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING; 1008 bp->b_iodone = swap_pager_iodone1; 1009 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1010 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1011 crhold(bp->b_rcred); 1012 crhold(bp->b_wcred); 1013 bp->b_un.b_addr = (caddr_t) kva; 1014 bp->b_blkno = reqaddr[0]; 1015 bp->b_bcount = PAGE_SIZE * count; 1016 bp->b_bufsize = PAGE_SIZE * count; 1017 1018 pbgetvp(swapdev_vp, bp); 1019 1020 cnt.v_swapin++; 1021 cnt.v_swappgsin += count; 1022 /* 1023 * perform the I/O 1024 */ 1025 VOP_STRATEGY(bp); 1026 1027 /* 1028 * wait for the sync I/O to complete 1029 */ 1030 s = splbio(); 1031 while ((bp->b_flags & B_DONE) == 0) { 1032 if (tsleep(bp, PVM, "swread", hz*20)) { 1033 printf("swap_pager: indefinite wait buffer: device: %d, blkno: %d, size: %d\n", 1034 bp->b_dev, bp->b_blkno, bp->b_bcount); 1035 } 1036 } 1037 1038 if (bp->b_flags & B_ERROR) { 1039 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n", 1040 bp->b_blkno, bp->b_bcount, bp->b_error); 1041 rv = VM_PAGER_ERROR; 1042 } else { 1043 rv = VM_PAGER_OK; 1044 } 1045 1046 /* 1047 * relpbuf does this, but we maintain our own buffer list also... 1048 */ 1049 if (bp->b_vp) 1050 pbrelvp(bp); 1051 1052 splx(s); 1053 swb[reqpage]->swb_locked--; 1054 1055 /* 1056 * remove the mapping for kernel virtual 1057 */ 1058 pmap_qremove(kva, count); 1059 1060 if (spc) { 1061 m[reqpage]->object->last_read = m[reqpage]->pindex; 1062 if (bp->b_flags & B_WANTED) 1063 wakeup(bp); 1064 /* 1065 * if we have used an spc, we need to free it. 1066 */ 1067 if (bp->b_rcred != NOCRED) 1068 crfree(bp->b_rcred); 1069 if (bp->b_wcred != NOCRED) 1070 crfree(bp->b_wcred); 1071 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1072 swap_pager_free_count++; 1073 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1074 wakeup(&swap_pager_free); 1075 } 1076 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1077 pagedaemon_wakeup(); 1078 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1079 if (rv == VM_PAGER_OK) { 1080 #if notneeded 1081 pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); 1082 #endif 1083 m[reqpage]->valid = VM_PAGE_BITS_ALL; 1084 m[reqpage]->dirty = 0; 1085 } 1086 } else { 1087 /* 1088 * release the physical I/O buffer 1089 */ 1090 relpbuf(bp); 1091 /* 1092 * finish up input if everything is ok 1093 */ 1094 if (rv == VM_PAGER_OK) { 1095 for (i = 0; i < count; i++) { 1096 #if notneeded 1097 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1098 #endif 1099 m[i]->dirty = 0; 1100 m[i]->flags &= ~PG_ZERO; 1101 if (i != reqpage) { 1102 /* 1103 * whether or not to leave the page 1104 * activated is up in the air, but we 1105 * should put the page on a page queue 1106 * somewhere. (it already is in the 1107 * object). After some emperical 1108 * results, it is best to deactivate 1109 * the readahead pages. 1110 */ 1111 vm_page_deactivate(m[i]); 1112 1113 /* 1114 * just in case someone was asking for 1115 * this page we now tell them that it 1116 * is ok to use 1117 */ 1118 m[i]->valid = VM_PAGE_BITS_ALL; 1119 PAGE_WAKEUP(m[i]); 1120 } 1121 } 1122 1123 m[reqpage]->object->last_read = m[count-1]->pindex; 1124 1125 /* 1126 * If we're out of swap space, then attempt to free 1127 * some whenever multiple pages are brought in. We 1128 * must set the dirty bits so that the page contents 1129 * will be preserved. 1130 */ 1131 if (SWAPLOW) { 1132 for (i = 0; i < count; i++) { 1133 m[i]->dirty = VM_PAGE_BITS_ALL; 1134 } 1135 swap_pager_freespace(object, m[0]->pindex + paging_offset, count); 1136 } 1137 } else { 1138 swap_pager_ridpages(m, count, reqpage); 1139 } 1140 } 1141 return (rv); 1142 } 1143 1144 int 1145 swap_pager_putpages(object, m, count, sync, rtvals) 1146 vm_object_t object; 1147 vm_page_t *m; 1148 int count; 1149 boolean_t sync; 1150 int *rtvals; 1151 { 1152 register struct buf *bp; 1153 sw_blk_t swb[count]; 1154 register int s; 1155 int i, j, ix; 1156 boolean_t rv; 1157 vm_offset_t kva, off, fidx; 1158 swp_clean_t spc; 1159 vm_pindex_t paging_pindex; 1160 int reqaddr[count]; 1161 int failed; 1162 1163 if (vm_swap_size) 1164 no_swap_space = 0; 1165 if (no_swap_space) { 1166 for (i = 0; i < count; i++) 1167 rtvals[i] = VM_PAGER_FAIL; 1168 return VM_PAGER_FAIL; 1169 } 1170 spc = NULL; 1171 1172 object = m[0]->object; 1173 paging_pindex = OFF_TO_IDX(object->paging_offset); 1174 1175 failed = 0; 1176 for (j = 0; j < count; j++) { 1177 fidx = m[j]->pindex + paging_pindex; 1178 ix = swap_pager_block_index(fidx); 1179 swb[j] = 0; 1180 if (ix >= object->un_pager.swp.swp_nblocks) { 1181 rtvals[j] = VM_PAGER_FAIL; 1182 failed = 1; 1183 continue; 1184 } else { 1185 rtvals[j] = VM_PAGER_OK; 1186 } 1187 swb[j] = &object->un_pager.swp.swp_blocks[ix]; 1188 swb[j]->swb_locked++; 1189 if (failed) { 1190 rtvals[j] = VM_PAGER_FAIL; 1191 continue; 1192 } 1193 off = swap_pager_block_offset(fidx); 1194 reqaddr[j] = swb[j]->swb_block[off]; 1195 if (reqaddr[j] == SWB_EMPTY) { 1196 daddr_t blk; 1197 int tries; 1198 int ntoget; 1199 1200 tries = 0; 1201 s = splbio(); 1202 1203 /* 1204 * if any other pages have been allocated in this 1205 * block, we only try to get one page. 1206 */ 1207 for (i = 0; i < SWB_NPAGES; i++) { 1208 if (swb[j]->swb_block[i] != SWB_EMPTY) 1209 break; 1210 } 1211 1212 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1; 1213 /* 1214 * this code is alittle conservative, but works (the 1215 * intent of this code is to allocate small chunks for 1216 * small objects) 1217 */ 1218 if ((off == 0) && ((fidx + ntoget) > object->size)) { 1219 ntoget = object->size - fidx; 1220 } 1221 retrygetspace: 1222 if (!swap_pager_full && ntoget > 1 && 1223 swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE), 1224 &blk)) { 1225 1226 for (i = 0; i < ntoget; i++) { 1227 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i; 1228 swb[j]->swb_valid = 0; 1229 } 1230 1231 reqaddr[j] = swb[j]->swb_block[off]; 1232 } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE), 1233 &swb[j]->swb_block[off])) { 1234 /* 1235 * if the allocation has failed, we try to 1236 * reclaim space and retry. 1237 */ 1238 if (++tries == 1) { 1239 swap_pager_reclaim(); 1240 goto retrygetspace; 1241 } 1242 rtvals[j] = VM_PAGER_AGAIN; 1243 failed = 1; 1244 swap_pager_full = 1; 1245 } else { 1246 reqaddr[j] = swb[j]->swb_block[off]; 1247 swb[j]->swb_valid &= ~(1 << off); 1248 } 1249 splx(s); 1250 } 1251 } 1252 1253 /* 1254 * search forwards for the last contiguous page to transfer 1255 */ 1256 failed = 0; 1257 for (i = 0; i < count; i++) { 1258 if (failed || 1259 (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) || 1260 ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) || 1261 (rtvals[i] != VM_PAGER_OK)) { 1262 failed = 1; 1263 if (rtvals[i] == VM_PAGER_OK) 1264 rtvals[i] = VM_PAGER_AGAIN; 1265 } 1266 } 1267 1268 for (i = 0; i < count; i++) { 1269 if (rtvals[i] != VM_PAGER_OK) { 1270 if (swb[i]) 1271 --swb[i]->swb_locked; 1272 } 1273 } 1274 1275 for (i = 0; i < count; i++) 1276 if (rtvals[i] != VM_PAGER_OK) 1277 break; 1278 1279 if (i == 0) { 1280 return VM_PAGER_AGAIN; 1281 } 1282 count = i; 1283 for (i = 0; i < count; i++) { 1284 if (reqaddr[i] == SWB_EMPTY) { 1285 printf("I/O to empty block???? -- pindex: %d, i: %d\n", 1286 m[i]->pindex, i); 1287 } 1288 } 1289 1290 /* 1291 * For synchronous writes, we clean up all completed async pageouts. 1292 */ 1293 if (sync == TRUE) { 1294 swap_pager_sync(); 1295 } 1296 kva = 0; 1297 1298 /* 1299 * get a swap pager clean data structure, block until we get it 1300 */ 1301 if (swap_pager_free_count <= 3) { 1302 s = splbio(); 1303 if (curproc == pageproc) { 1304 retryfree: 1305 /* 1306 * pageout daemon needs a swap control block 1307 */ 1308 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT|SWAP_FREE_NEEDED; 1309 /* 1310 * if it does not get one within a short time, then 1311 * there is a potential deadlock, so we go-on trying 1312 * to free pages. It is important to block here as opposed 1313 * to returning, thereby allowing the pageout daemon to continue. 1314 * It is likely that pageout daemon will start suboptimally 1315 * reclaiming vnode backed pages if we don't block. Since the 1316 * I/O subsystem is probably already fully utilized, might as 1317 * well wait. 1318 */ 1319 if (tsleep(&swap_pager_free, PVM, "swpfre", hz/5)) { 1320 swap_pager_sync(); 1321 if (swap_pager_free_count <= 3) { 1322 splx(s); 1323 return VM_PAGER_AGAIN; 1324 } 1325 } else { 1326 /* 1327 * we make sure that pageouts aren't taking up all of 1328 * the free swap control blocks. 1329 */ 1330 swap_pager_sync(); 1331 if (swap_pager_free_count <= 3) { 1332 goto retryfree; 1333 } 1334 } 1335 } else { 1336 pagedaemon_wakeup(); 1337 while (swap_pager_free_count <= 3) { 1338 swap_pager_needflags |= SWAP_FREE_NEEDED; 1339 tsleep(&swap_pager_free, PVM, "swpfre", 0); 1340 pagedaemon_wakeup(); 1341 } 1342 } 1343 splx(s); 1344 } 1345 spc = TAILQ_FIRST(&swap_pager_free); 1346 if (spc == NULL) 1347 panic("swap_pager_putpages: free queue is empty, %d expected\n", swap_pager_free_count); 1348 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 1349 swap_pager_free_count--; 1350 1351 kva = spc->spc_kva; 1352 1353 /* 1354 * map our page(s) into kva for I/O 1355 */ 1356 pmap_qenter(kva, m, count); 1357 1358 /* 1359 * get the base I/O offset into the swap file 1360 */ 1361 for (i = 0; i < count; i++) { 1362 fidx = m[i]->pindex + paging_pindex; 1363 off = swap_pager_block_offset(fidx); 1364 /* 1365 * set the valid bit 1366 */ 1367 swb[i]->swb_valid |= (1 << off); 1368 /* 1369 * and unlock the data structure 1370 */ 1371 swb[i]->swb_locked--; 1372 } 1373 1374 /* 1375 * Get a swap buffer header and perform the IO 1376 */ 1377 bp = spc->spc_bp; 1378 bzero(bp, sizeof *bp); 1379 bp->b_spc = spc; 1380 bp->b_vnbufs.le_next = NOLIST; 1381 1382 bp->b_flags = B_BUSY | B_PAGING; 1383 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 1384 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1385 if (bp->b_rcred != NOCRED) 1386 crhold(bp->b_rcred); 1387 if (bp->b_wcred != NOCRED) 1388 crhold(bp->b_wcred); 1389 bp->b_data = (caddr_t) kva; 1390 bp->b_blkno = reqaddr[0]; 1391 pbgetvp(swapdev_vp, bp); 1392 1393 bp->b_bcount = PAGE_SIZE * count; 1394 bp->b_bufsize = PAGE_SIZE * count; 1395 swapdev_vp->v_numoutput++; 1396 1397 /* 1398 * If this is an async write we set up additional buffer fields and 1399 * place a "cleaning" entry on the inuse queue. 1400 */ 1401 s = splbio(); 1402 if (sync == FALSE) { 1403 spc->spc_flags = 0; 1404 spc->spc_object = object; 1405 for (i = 0; i < count; i++) 1406 spc->spc_m[i] = m[i]; 1407 spc->spc_count = count; 1408 /* 1409 * the completion routine for async writes 1410 */ 1411 bp->b_flags |= B_CALL; 1412 bp->b_iodone = swap_pager_iodone; 1413 bp->b_dirtyoff = 0; 1414 bp->b_dirtyend = bp->b_bcount; 1415 object->un_pager.swp.swp_poip++; 1416 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 1417 } else { 1418 object->un_pager.swp.swp_poip++; 1419 bp->b_flags |= B_CALL; 1420 bp->b_iodone = swap_pager_iodone1; 1421 } 1422 1423 cnt.v_swapout++; 1424 cnt.v_swappgsout += count; 1425 /* 1426 * perform the I/O 1427 */ 1428 VOP_STRATEGY(bp); 1429 if (sync == FALSE) { 1430 if ((bp->b_flags & B_DONE) == B_DONE) { 1431 swap_pager_sync(); 1432 } 1433 splx(s); 1434 for (i = 0; i < count; i++) { 1435 rtvals[i] = VM_PAGER_PEND; 1436 } 1437 return VM_PAGER_PEND; 1438 } 1439 /* 1440 * wait for the sync I/O to complete 1441 */ 1442 while ((bp->b_flags & B_DONE) == 0) { 1443 tsleep(bp, PVM, "swwrt", 0); 1444 } 1445 if (bp->b_flags & B_ERROR) { 1446 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n", 1447 bp->b_blkno, bp->b_bcount, bp->b_error); 1448 rv = VM_PAGER_ERROR; 1449 } else { 1450 rv = VM_PAGER_OK; 1451 } 1452 1453 object->un_pager.swp.swp_poip--; 1454 if (object->un_pager.swp.swp_poip == 0) 1455 wakeup(object); 1456 1457 if (bp->b_vp) 1458 pbrelvp(bp); 1459 if (bp->b_flags & B_WANTED) 1460 wakeup(bp); 1461 1462 splx(s); 1463 1464 /* 1465 * remove the mapping for kernel virtual 1466 */ 1467 pmap_qremove(kva, count); 1468 1469 /* 1470 * if we have written the page, then indicate that the page is clean. 1471 */ 1472 if (rv == VM_PAGER_OK) { 1473 for (i = 0; i < count; i++) { 1474 if (rtvals[i] == VM_PAGER_OK) { 1475 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1476 m[i]->dirty = 0; 1477 /* 1478 * optimization, if a page has been read 1479 * during the pageout process, we activate it. 1480 */ 1481 if ((m[i]->queue != PQ_ACTIVE) && 1482 ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) || 1483 pmap_ts_referenced(VM_PAGE_TO_PHYS(m[i])))) { 1484 vm_page_activate(m[i]); 1485 } 1486 } 1487 } 1488 } else { 1489 for (i = 0; i < count; i++) { 1490 rtvals[i] = rv; 1491 } 1492 } 1493 1494 if (bp->b_rcred != NOCRED) 1495 crfree(bp->b_rcred); 1496 if (bp->b_wcred != NOCRED) 1497 crfree(bp->b_wcred); 1498 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1499 swap_pager_free_count++; 1500 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1501 wakeup(&swap_pager_free); 1502 } 1503 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1504 pagedaemon_wakeup(); 1505 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1506 return (rv); 1507 } 1508 1509 static void 1510 swap_pager_sync() 1511 { 1512 register swp_clean_t spc, tspc; 1513 register int s; 1514 1515 tspc = NULL; 1516 if (TAILQ_FIRST(&swap_pager_done) == NULL) 1517 return; 1518 for (;;) { 1519 s = splbio(); 1520 /* 1521 * Look up and removal from done list must be done at splbio() 1522 * to avoid conflicts with swap_pager_iodone. 1523 */ 1524 while ((spc = TAILQ_FIRST(&swap_pager_done)) != 0) { 1525 pmap_qremove(spc->spc_kva, spc->spc_count); 1526 swap_pager_finish(spc); 1527 TAILQ_REMOVE(&swap_pager_done, spc, spc_list); 1528 goto doclean; 1529 } 1530 1531 /* 1532 * No operations done, thats all we can do for now. 1533 */ 1534 1535 splx(s); 1536 break; 1537 1538 /* 1539 * The desired page was found to be busy earlier in the scan 1540 * but has since completed. 1541 */ 1542 doclean: 1543 if (tspc && tspc == spc) { 1544 tspc = NULL; 1545 } 1546 spc->spc_flags = 0; 1547 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 1548 swap_pager_free_count++; 1549 if (swap_pager_needflags & SWAP_FREE_NEEDED) { 1550 wakeup(&swap_pager_free); 1551 } 1552 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) 1553 pagedaemon_wakeup(); 1554 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); 1555 splx(s); 1556 } 1557 1558 return; 1559 } 1560 1561 void 1562 swap_pager_finish(spc) 1563 register swp_clean_t spc; 1564 { 1565 vm_object_t object = spc->spc_m[0]->object; 1566 int i; 1567 1568 object->paging_in_progress -= spc->spc_count; 1569 if ((object->paging_in_progress == 0) && 1570 (object->flags & OBJ_PIPWNT)) { 1571 object->flags &= ~OBJ_PIPWNT; 1572 wakeup(object); 1573 } 1574 1575 /* 1576 * If no error, mark as clean and inform the pmap system. If error, 1577 * mark as dirty so we will try again. (XXX could get stuck doing 1578 * this, should give up after awhile) 1579 */ 1580 if (spc->spc_flags & SPC_ERROR) { 1581 for (i = 0; i < spc->spc_count; i++) { 1582 printf("swap_pager_finish: I/O error, clean of page %lx failed\n", 1583 (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i])); 1584 } 1585 } else { 1586 for (i = 0; i < spc->spc_count; i++) { 1587 pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); 1588 spc->spc_m[i]->dirty = 0; 1589 if ((spc->spc_m[i]->queue != PQ_ACTIVE) && 1590 ((spc->spc_m[i]->flags & PG_WANTED) || pmap_ts_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i])))) 1591 vm_page_activate(spc->spc_m[i]); 1592 } 1593 } 1594 1595 1596 for (i = 0; i < spc->spc_count; i++) { 1597 /* 1598 * we wakeup any processes that are waiting on these pages. 1599 */ 1600 PAGE_WAKEUP(spc->spc_m[i]); 1601 } 1602 nswiodone -= spc->spc_count; 1603 1604 return; 1605 } 1606 1607 /* 1608 * swap_pager_iodone 1609 */ 1610 static void 1611 swap_pager_iodone(bp) 1612 register struct buf *bp; 1613 { 1614 register swp_clean_t spc; 1615 int s; 1616 1617 s = splbio(); 1618 spc = (swp_clean_t) bp->b_spc; 1619 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 1620 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list); 1621 if (bp->b_flags & B_ERROR) { 1622 spc->spc_flags |= SPC_ERROR; 1623 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n", 1624 (bp->b_flags & B_READ) ? "pagein" : "pageout", 1625 (u_long) bp->b_blkno, bp->b_bcount, bp->b_error); 1626 } 1627 1628 if (bp->b_vp) 1629 pbrelvp(bp); 1630 1631 /* 1632 if (bp->b_flags & B_WANTED) 1633 */ 1634 wakeup(bp); 1635 1636 if (bp->b_rcred != NOCRED) 1637 crfree(bp->b_rcred); 1638 if (bp->b_wcred != NOCRED) 1639 crfree(bp->b_wcred); 1640 1641 nswiodone += spc->spc_count; 1642 if (--spc->spc_object->un_pager.swp.swp_poip == 0) { 1643 wakeup(spc->spc_object); 1644 } 1645 if ((swap_pager_needflags & SWAP_FREE_NEEDED) || 1646 TAILQ_FIRST(&swap_pager_inuse) == 0) { 1647 swap_pager_needflags &= ~SWAP_FREE_NEEDED; 1648 wakeup(&swap_pager_free); 1649 } 1650 1651 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) { 1652 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT; 1653 pagedaemon_wakeup(); 1654 } 1655 1656 if (vm_pageout_pages_needed) { 1657 wakeup(&vm_pageout_pages_needed); 1658 vm_pageout_pages_needed = 0; 1659 } 1660 if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) || 1661 ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min && 1662 nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) { 1663 pagedaemon_wakeup(); 1664 } 1665 splx(s); 1666 } 1667